aboutsummaryrefslogtreecommitdiff
path: root/arch/i386/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile5
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c21
-rw-r--r--arch/i386/kernel/alternative.c63
-rw-r--r--arch/i386/kernel/apic.c22
-rw-r--r--arch/i386/kernel/apm.c3
-rw-r--r--arch/i386/kernel/asm-offsets.c39
-rw-r--r--arch/i386/kernel/cpu/amd.c5
-rw-r--r--arch/i386/kernel/cpu/common.c249
-rw-r--r--arch/i386/kernel/cpu/intel.c12
-rw-r--r--arch/i386/kernel/cpu/intel_cacheinfo.c11
-rw-r--r--arch/i386/kernel/cpu/mtrr/Makefile4
-rw-r--r--arch/i386/kernel/cpu/mtrr/amd.c2
-rw-r--r--arch/i386/kernel/cpu/mtrr/centaur.c9
-rw-r--r--arch/i386/kernel/cpu/mtrr/cyrix.c25
-rw-r--r--arch/i386/kernel/cpu/mtrr/generic.c78
-rw-r--r--arch/i386/kernel/cpu/mtrr/if.c31
-rw-r--r--arch/i386/kernel/cpu/mtrr/main.c71
-rw-r--r--arch/i386/kernel/cpu/mtrr/mtrr.h25
-rw-r--r--arch/i386/kernel/cpu/proc.c3
-rw-r--r--arch/i386/kernel/cpuid.c1
-rw-r--r--arch/i386/kernel/e820.c894
-rw-r--r--arch/i386/kernel/efi.c17
-rw-r--r--arch/i386/kernel/entry.S331
-rw-r--r--arch/i386/kernel/head.S66
-rw-r--r--arch/i386/kernel/hpet.c7
-rw-r--r--arch/i386/kernel/i8259.c5
-rw-r--r--arch/i386/kernel/io_apic.c65
-rw-r--r--arch/i386/kernel/ldt.c4
-rw-r--r--arch/i386/kernel/mca.c13
-rw-r--r--arch/i386/kernel/module.c11
-rw-r--r--arch/i386/kernel/mpparse.c2
-rw-r--r--arch/i386/kernel/msr.c3
-rw-r--r--arch/i386/kernel/nmi.c42
-rw-r--r--arch/i386/kernel/paravirt.c569
-rw-r--r--arch/i386/kernel/pci-dma.c6
-rw-r--r--arch/i386/kernel/process.c81
-rw-r--r--arch/i386/kernel/ptrace.c18
-rw-r--r--arch/i386/kernel/quirks.c46
-rw-r--r--arch/i386/kernel/setup.c852
-rw-r--r--arch/i386/kernel/signal.c6
-rw-r--r--arch/i386/kernel/smp.c6
-rw-r--r--arch/i386/kernel/smpboot.c69
-rw-r--r--arch/i386/kernel/sysenter.c4
-rw-r--r--arch/i386/kernel/time.c15
-rw-r--r--arch/i386/kernel/time_hpet.c15
-rw-r--r--arch/i386/kernel/topology.c8
-rw-r--r--arch/i386/kernel/traps.c118
-rw-r--r--arch/i386/kernel/tsc.c1
-rw-r--r--arch/i386/kernel/vm86.c121
-rw-r--r--arch/i386/kernel/vmlinux.lds.S147
50 files changed, 2694 insertions, 1527 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 1a884b6e6e5..1e8988e558c 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -6,7 +6,7 @@ extra-y := head.o init_task.o vmlinux.lds
obj-y := process.o signal.o entry.o traps.o irq.o \
ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
- pci-dma.o i386_ksyms.o i387.o bootflag.o \
+ pci-dma.o i386_ksyms.o i387.o bootflag.o e820.o\
quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
@@ -40,6 +40,9 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_HPET_TIMER) += hpet.o
obj-$(CONFIG_K8_NB) += k8.o
+# Make sure this is linked after any other paravirt_ops structs: see head.S
+obj-$(CONFIG_PARAVIRT) += paravirt.o
+
EXTRA_AFLAGS := -traditional
obj-$(CONFIG_SCx200) += scx200.o
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index c9841692bb7..4b60af7f91d 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -10,6 +10,7 @@
#include <asm/pci-direct.h>
#include <asm/acpi.h>
#include <asm/apic.h>
+#include <asm/irq.h>
#ifdef CONFIG_ACPI
@@ -49,6 +50,24 @@ static int __init check_bridge(int vendor, int device)
return 0;
}
+static void check_intel(void)
+{
+ u16 vendor, device;
+
+ vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID);
+
+ if (vendor != PCI_VENDOR_ID_INTEL)
+ return;
+
+ device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID);
+#ifdef CONFIG_SMP
+ if (device == PCI_DEVICE_ID_INTEL_E7320_MCH ||
+ device == PCI_DEVICE_ID_INTEL_E7520_MCH ||
+ device == PCI_DEVICE_ID_INTEL_E7525_MCH)
+ quirk_intel_irqbalance();
+#endif
+}
+
void __init check_acpi_pci(void)
{
int num, slot, func;
@@ -60,6 +79,8 @@ void __init check_acpi_pci(void)
if (!early_pci_allowed())
return;
+ check_intel();
+
/* Poor man's PCI discovery */
for (num = 0; num < 32; num++) {
for (slot = 0; slot < 32; slot++) {
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c
index 535f9794fba..9eca21b49f6 100644
--- a/arch/i386/kernel/alternative.c
+++ b/arch/i386/kernel/alternative.c
@@ -124,6 +124,20 @@ static unsigned char** find_nop_table(void)
#endif /* CONFIG_X86_64 */
+static void nop_out(void *insns, unsigned int len)
+{
+ unsigned char **noptable = find_nop_table();
+
+ while (len > 0) {
+ unsigned int noplen = len;
+ if (noplen > ASM_NOP_MAX)
+ noplen = ASM_NOP_MAX;
+ memcpy(insns, noptable[noplen], noplen);
+ insns += noplen;
+ len -= noplen;
+ }
+}
+
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[];
extern u8 *__smp_locks[], *__smp_locks_end[];
@@ -138,10 +152,9 @@ extern u8 __smp_alt_begin[], __smp_alt_end[];
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
- unsigned char **noptable = find_nop_table();
struct alt_instr *a;
u8 *instr;
- int diff, i, k;
+ int diff;
DPRINTK("%s: alt table %p -> %p\n", __FUNCTION__, start, end);
for (a = start; a < end; a++) {
@@ -159,13 +172,7 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
#endif
memcpy(instr, a->replacement, a->replacementlen);
diff = a->instrlen - a->replacementlen;
- /* Pad the rest with nops */
- for (i = a->replacementlen; diff > 0; diff -= k, i += k) {
- k = diff;
- if (k > ASM_NOP_MAX)
- k = ASM_NOP_MAX;
- memcpy(a->instr + i, noptable[k], k);
- }
+ nop_out(instr + a->replacementlen, diff);
}
}
@@ -209,7 +216,6 @@ static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end)
static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end)
{
- unsigned char **noptable = find_nop_table();
u8 **ptr;
for (ptr = start; ptr < end; ptr++) {
@@ -217,7 +223,7 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end
continue;
if (*ptr > text_end)
continue;
- **ptr = noptable[1][0];
+ nop_out(*ptr, 1);
};
}
@@ -343,6 +349,40 @@ void alternatives_smp_switch(int smp)
#endif
+#ifdef CONFIG_PARAVIRT
+void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end)
+{
+ struct paravirt_patch *p;
+
+ for (p = start; p < end; p++) {
+ unsigned int used;
+
+ used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr,
+ p->len);
+#ifdef CONFIG_DEBUG_PARAVIRT
+ {
+ int i;
+ /* Deliberately clobber regs using "not %reg" to find bugs. */
+ for (i = 0; i < 3; i++) {
+ if (p->len - used >= 2 && (p->clobbers & (1 << i))) {
+ memcpy(p->instr + used, "\xf7\xd0", 2);
+ p->instr[used+1] |= i;
+ used += 2;
+ }
+ }
+ }
+#endif
+ /* Pad the rest with nops */
+ nop_out(p->instr + used, p->len - used);
+ }
+
+ /* Sync to be conservative, in case we patched following instructions */
+ sync_core();
+}
+extern struct paravirt_patch __start_parainstructions[],
+ __stop_parainstructions[];
+#endif /* CONFIG_PARAVIRT */
+
void __init alternative_instructions(void)
{
unsigned long flags;
@@ -390,5 +430,6 @@ void __init alternative_instructions(void)
alternatives_smp_switch(0);
}
#endif
+ apply_paravirt(__start_parainstructions, __stop_parainstructions);
local_irq_restore(flags);
}
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 2fd4b7d927c..776d9be26af 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -647,23 +647,30 @@ static struct {
static int lapic_suspend(struct sys_device *dev, pm_message_t state)
{
unsigned long flags;
+ int maxlvt;
if (!apic_pm_state.active)
return 0;
+ maxlvt = get_maxlvt();
+
apic_pm_state.apic_id = apic_read(APIC_ID);
apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
apic_pm_state.apic_ldr = apic_read(APIC_LDR);
apic_pm_state.apic_dfr = apic_read(APIC_DFR);
apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
- apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+ if (maxlvt >= 4)
+ apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
- apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#ifdef CONFIG_X86_MCE_P4THERMAL
+ if (maxlvt >= 5)
+ apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
+#endif
local_irq_save(flags);
disable_local_APIC();
@@ -675,10 +682,13 @@ static int lapic_resume(struct sys_device *dev)
{
unsigned int l, h;
unsigned long flags;
+ int maxlvt;
if (!apic_pm_state.active)
return 0;
+ maxlvt = get_maxlvt();
+
local_irq_save(flags);
/*
@@ -700,8 +710,12 @@ static int lapic_resume(struct sys_device *dev)
apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
- apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
- apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
+#ifdef CONFIG_X86_MCE_P4THERMAL
+ if (maxlvt >= 5)
+ apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+#endif
+ if (maxlvt >= 4)
+ apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
apic_write(APIC_TMICT, apic_pm_state.apic_tmict);
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c
index a60358fe9a4..a97847da9ed 100644
--- a/arch/i386/kernel/apm.c
+++ b/arch/i386/kernel/apm.c
@@ -231,6 +231,7 @@
#include <asm/uaccess.h>
#include <asm/desc.h>
#include <asm/i8253.h>
+#include <asm/paravirt.h>
#include "io_ports.h"
@@ -2235,7 +2236,7 @@ static int __init apm_init(void)
dmi_check_system(apm_dmi_table);
- if (apm_info.bios.version == 0) {
+ if (apm_info.bios.version == 0 || paravirt_enabled()) {
printk(KERN_INFO "apm: BIOS not found.\n");
return -ENODEV;
}
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c
index c80271f8f08..1b2f3cd3327 100644
--- a/arch/i386/kernel/asm-offsets.c
+++ b/arch/i386/kernel/asm-offsets.c
@@ -15,6 +15,7 @@
#include <asm/processor.h>
#include <asm/thread_info.h>
#include <asm/elf.h>
+#include <asm/pda.h>
#define DEFINE(sym, val) \
asm volatile("\n->" #sym " %0 " #val : : "i" (val))
@@ -51,13 +52,35 @@ void foo(void)
OFFSET(TI_exec_domain, thread_info, exec_domain);
OFFSET(TI_flags, thread_info, flags);
OFFSET(TI_status, thread_info, status);
- OFFSET(TI_cpu, thread_info, cpu);
OFFSET(TI_preempt_count, thread_info, preempt_count);
OFFSET(TI_addr_limit, thread_info, addr_limit);
OFFSET(TI_restart_block, thread_info, restart_block);
OFFSET(TI_sysenter_return, thread_info, sysenter_return);
BLANK();
+ OFFSET(GDS_size, Xgt_desc_struct, size);
+ OFFSET(GDS_address, Xgt_desc_struct, address);
+ OFFSET(GDS_pad, Xgt_desc_struct, pad);
+ BLANK();
+
+ OFFSET(PT_EBX, pt_regs, ebx);
+ OFFSET(PT_ECX, pt_regs, ecx);
+ OFFSET(PT_EDX, pt_regs, edx);
+ OFFSET(PT_ESI, pt_regs, esi);
+ OFFSET(PT_EDI, pt_regs, edi);
+ OFFSET(PT_EBP, pt_regs, ebp);
+ OFFSET(PT_EAX, pt_regs, eax);
+ OFFSET(PT_DS, pt_regs, xds);
+ OFFSET(PT_ES, pt_regs, xes);
+ OFFSET(PT_GS, pt_regs, xgs);
+ OFFSET(PT_ORIG_EAX, pt_regs, orig_eax);
+ OFFSET(PT_EIP, pt_regs, eip);
+ OFFSET(PT_CS, pt_regs, xcs);
+ OFFSET(PT_EFLAGS, pt_regs, eflags);
+ OFFSET(PT_OLDESP, pt_regs, esp);
+ OFFSET(PT_OLDSS, pt_regs, xss);
+ BLANK();
+
OFFSET(EXEC_DOMAIN_handler, exec_domain, handler);
OFFSET(RT_SIGFRAME_sigcontext, rt_sigframe, uc.uc_mcontext);
BLANK();
@@ -74,4 +97,18 @@ void foo(void)
DEFINE(VDSO_PRELINK, VDSO_PRELINK);
OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
+
+ BLANK();
+ OFFSET(PDA_cpu, i386_pda, cpu_number);
+ OFFSET(PDA_pcurrent, i386_pda, pcurrent);
+
+#ifdef CONFIG_PARAVIRT
+ BLANK();
+ OFFSET(PARAVIRT_enabled, paravirt_ops, paravirt_enabled);
+ OFFSET(PARAVIRT_irq_disable, paravirt_ops, irq_disable);
+ OFFSET(PARAVIRT_irq_enable, paravirt_ops, irq_enable);
+ OFFSET(PARAVIRT_irq_enable_sysexit, paravirt_ops, irq_enable_sysexit);
+ OFFSET(PARAVIRT_iret, paravirt_ops, iret);
+ OFFSET(PARAVIRT_read_cr0, paravirt_ops, read_cr0);
+#endif
}
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index e4758095d87..41cfea57232 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -104,10 +104,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
f_vide();
rdtscl(d2);
d = d2-d;
-
- /* Knock these two lines out if it debugs out ok */
- printk(KERN_INFO "AMD K6 stepping B detected - ");
- /* -- cut here -- */
+
if (d > 20*K6_BUG_LOOP)
printk("system stability may be impaired when more than 32 MB are used.\n");
else
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index d9f3e3c31f0..1b34c56f812 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -18,14 +18,15 @@
#include <asm/apic.h>
#include <mach_apic.h>
#endif
+#include <asm/pda.h>
#include "cpu.h"
DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr);
EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr);
-DEFINE_PER_CPU(unsigned char, cpu_16bit_stack[CPU_16BIT_STACK_SIZE]);
-EXPORT_PER_CPU_SYMBOL(cpu_16bit_stack);
+struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly;
+EXPORT_SYMBOL(_cpu_pda);
static int cachesize_override __cpuinitdata = -1;
static int disable_x86_fxsr __cpuinitdata;
@@ -235,29 +236,14 @@ static int __cpuinit have_cpuid_p(void)
return flag_is_changeable_p(X86_EFLAGS_ID);
}
-/* Do minimum CPU detection early.
- Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
- The others are not touched to avoid unwanted side effects.
-
- WARNING: this function is only called on the BP. Don't add code here
- that is supposed to run on all CPUs. */
-static void __init early_cpu_detect(void)
+void __init cpu_detect(struct cpuinfo_x86 *c)
{
- struct cpuinfo_x86 *c = &boot_cpu_data;
-
- c->x86_cache_alignment = 32;
-
- if (!have_cpuid_p())
- return;
-
/* Get vendor name */
cpuid(0x00000000, &c->cpuid_level,
(int *)&c->x86_vendor_id[0],
(int *)&c->x86_vendor_id[8],
(int *)&c->x86_vendor_id[4]);
- get_cpu_vendor(c, 1);
-
c->x86 = 4;
if (c->cpuid_level >= 0x00000001) {
u32 junk, tfms, cap0, misc;
@@ -274,6 +260,26 @@ static void __init early_cpu_detect(void)
}
}
+/* Do minimum CPU detection early.
+ Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment.
+ The others are not touched to avoid unwanted side effects.
+
+ WARNING: this function is only called on the BP. Don't add code here
+ that is supposed to run on all CPUs. */
+static void __init early_cpu_detect(void)
+{
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ c->x86_cache_alignment = 32;
+
+ if (!have_cpuid_p())
+ return;
+
+ cpu_detect(c);
+
+ get_cpu_vendor(c, 1);
+}
+
static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
{
u32 tfms, xlvl;
@@ -308,6 +314,8 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
#else
c->apicid = (ebx >> 24) & 0xFF;
#endif
+ if (c->x86_capability[0] & (1<<19))
+ c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
} else {
/* Have CPUID level 0 only - unheard of */
c->x86 = 4;
@@ -372,6 +380,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
+ c->x86_clflush_size = 32;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (!have_cpuid_p()) {
@@ -591,42 +600,24 @@ void __init early_cpu_init(void)
disable_pse = 1;
#endif
}
-/*
- * cpu_init() initializes state that is per-CPU. Some data is already
- * initialized (naturally) in the bootstrap process, such as the GDT
- * and IDT. We reload them nevertheless, this function acts as a
- * 'CPU state barrier', nothing should get across.
- */
-void __cpuinit cpu_init(void)
+
+/* Make sure %gs is initialized properly in idle threads */
+struct pt_regs * __devinit idle_regs(struct pt_regs *regs)
{
- int cpu = smp_processor_id();
- struct tss_struct * t = &per_cpu(init_tss, cpu);
- struct thread_struct *thread = &current->thread;
- struct desc_struct *gdt;
- __u32 stk16_off = (__u32)&per_cpu(cpu_16bit_stack, cpu);
- struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+ memset(regs, 0, sizeof(struct pt_regs));
+ regs->xgs = __KERNEL_PDA;
+ return regs;
+}
- if (cpu_test_and_set(cpu, cpu_initialized)) {
- printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
- for (;;) local_irq_enable();
- }
- printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+static __cpuinit int alloc_gdt(int cpu)
+{
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+ struct desc_struct *gdt;
+ struct i386_pda *pda;
- if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
- if (tsc_disable && cpu_has_tsc) {
- printk(KERN_NOTICE "Disabling TSC...\n");
- /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
- clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
- set_in_cr4(X86_CR4_TSD);
- }
+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
+ pda = cpu_pda(cpu);
- /* The CPU hotplug case */
- if (cpu_gdt_descr->address) {
- gdt = (struct desc_struct *)cpu_gdt_descr->address;
- memset(gdt, 0, PAGE_SIZE);
- goto old_gdt;
- }
/*
* This is a horrible hack to allocate the GDT. The problem
* is that cpu_init() is called really early for the boot CPU
@@ -634,43 +625,130 @@ void __cpuinit cpu_init(void)
* CPUs, when bootmem will have gone away
*/
if (NODE_DATA(0)->bdata->node_bootmem_map) {
- gdt = (struct desc_struct *)alloc_bootmem_pages(PAGE_SIZE);
- /* alloc_bootmem_pages panics on failure, so no check */
+ BUG_ON(gdt != NULL || pda != NULL);
+
+ gdt = alloc_bootmem_pages(PAGE_SIZE);
+ pda = alloc_bootmem(sizeof(*pda));
+ /* alloc_bootmem(_pages) panics on failure, so no check */
+
memset(gdt, 0, PAGE_SIZE);
+ memset(pda, 0, sizeof(*pda));
} else {
- gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
- if (unlikely(!gdt)) {
- printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu);
- for (;;)
- local_irq_enable();
+ /* GDT and PDA might already have been allocated if
+ this is a CPU hotplug re-insertion. */
+ if (gdt == NULL)
+ gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL);
+
+ if (pda == NULL)
+ pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu));
+
+ if (unlikely(!gdt || !pda)) {
+ free_pages((unsigned long)gdt, 0);
+ kfree(pda);
+ return 0;
}
}
-old_gdt:
+
+ cpu_gdt_descr->address = (unsigned long)gdt;
+ cpu_pda(cpu) = pda;
+
+ return 1;
+}
+
+/* Initial PDA used by boot CPU */
+struct i386_pda boot_pda = {
+ ._pda = &boot_pda,
+ .cpu_number = 0,
+ .pcurrent = &init_task,
+};
+
+static inline void set_kernel_gs(void)
+{
+ /* Set %gs for this CPU's PDA. Memory clobber is to create a
+ barrier with respect to any PDA operations, so the compiler
+ doesn't move any before here. */
+ asm volatile ("mov %0, %%gs" : : "r" (__KERNEL_PDA) : "memory");
+}
+
+/* Initialize the CPU's GDT and PDA. The boot CPU does this for
+ itself, but secondaries find this done for them. */
+__cpuinit int init_gdt(int cpu, struct task_struct *idle)
+{
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+ struct desc_struct *gdt;
+ struct i386_pda *pda;
+
+ /* For non-boot CPUs, the GDT and PDA should already have been
+ allocated. */
+ if (!alloc_gdt(cpu)) {
+ printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu);
+ return 0;
+ }
+
+ gdt = (struct desc_struct *)cpu_gdt_descr->address;
+ pda = cpu_pda(cpu);
+
+ BUG_ON(gdt == NULL || pda == NULL);
+
/*
* Initialize the per-CPU GDT with the boot GDT,
* and set up the GDT descriptor:
*/
memcpy(gdt, cpu_gdt_table, GDT_SIZE);
+ cpu_gdt_descr->size = GDT_SIZE - 1;
- /* Set up GDT entry for 16bit stack */
- *(__u64 *)(&gdt[GDT_ENTRY_ESPFIX_SS]) |=
- ((((__u64)stk16_off) << 16) & 0x000000ffffff0000ULL) |
- ((((__u64)stk16_off) << 32) & 0xff00000000000000ULL) |
- (CPU_16BIT_STACK_SIZE - 1);
+ pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a,
+ (u32 *)&gdt[GDT_ENTRY_PDA].b,
+ (unsigned long)pda, sizeof(*pda) - 1,
+ 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */
- cpu_gdt_descr->size = GDT_SIZE - 1;
- cpu_gdt_descr->address = (unsigned long)gdt;
+ memset(pda, 0, sizeof(*pda));
+ pda->_pda = pda;
+ pda->cpu_number = cpu;
+ pda->pcurrent = idle;
+
+ return 1;
+}
+
+/* Common CPU init for both boot and secondary CPUs */
+static void __cpuinit _cpu_init(int cpu, struct task_struct *curr)
+{
+ struct tss_struct * t = &per_cpu(init_tss, cpu);
+ struct thread_struct *thread = &curr->thread;
+ struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu);
+ /* Reinit these anyway, even if they've already been done (on
+ the boot CPU, this will transition from the boot gdt+pda to
+ the real ones). */
load_gdt(cpu_gdt_descr);
+ set_kernel_gs();
+
+ if (cpu_test_and_set(cpu, cpu_initialized)) {
+ printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
+ for (;;) local_irq_enable();
+ }
+
+ printk(KERN_INFO "Initializing CPU#%d\n", cpu);
+
+ if (cpu_has_vme || cpu_has_tsc || cpu_has_de)
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ if (tsc_disable && cpu_has_tsc) {
+ printk(KERN_NOTICE "Disabling TSC...\n");
+ /**** FIX-HPA: DOES THIS REALLY BELONG HERE? ****/
+ clear_bit(X86_FEATURE_TSC, boot_cpu_data.x86_capability);
+ set_in_cr4(X86_CR4_TSD);
+ }
+
load_idt(&idt_descr);
/*
* Set up and load the per-CPU TSS and LDT
*/
atomic_inc(&init_mm.mm_count);
- current->active_mm = &init_mm;
- BUG_ON(current->mm);
- enter_lazy_tlb(&init_mm, current);
+ curr->active_mm = &init_mm;
+ if (curr->mm)
+ BUG();
+ enter_lazy_tlb(&init_mm, curr);
load_esp0(t, thread);
set_tss_desc(cpu,t);
@@ -682,8 +760,8 @@ old_gdt:
__set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
#endif
- /* Clear %fs and %gs. */
- asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
+ /* Clear %fs. */
+ asm volatile ("mov %0, %%fs" : : "r" (0));
/* Clear all 6 debug registers: */
set_debugreg(0, 0);
@@ -701,6 +779,37 @@ old_gdt:
mxcsr_feature_mask_init();
}
+/* Entrypoint to initialize secondary CPU */
+void __cpuinit secondary_cpu_init(void)
+{
+ int cpu = smp_processor_id();
+ struct task_struct *curr = current;
+
+ _cpu_init(cpu, curr);
+}
+
+/*
+ * cpu_init() initializes state that is per-CPU. Some data is already
+ * initialized (naturally) in the bootstrap process, such as the GDT
+ * and IDT. We reload them nevertheless, this function acts as a
+ * 'CPU state barrier', nothing should get across.
+ */
+void __cpuinit cpu_init(void)
+{
+ int cpu = smp_processor_id();
+ struct task_struct *curr = current;
+
+ /* Set up the real GDT and PDA, so we can transition from the
+ boot versions. */
+ if (!init_gdt(cpu, curr)) {
+ /* failed to allocate something; not much we can do... */
+ for (;;)
+ local_irq_enable();
+ }
+
+ _cpu_init(cpu, curr);
+}
+
#ifdef CONFIG_HOTPLUG_CPU
void __cpuinit cpu_uninit(void)
{
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 94a95aa5227..56fe2658495 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -107,7 +107,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
* N