aboutsummaryrefslogtreecommitdiff
path: root/arch/x86_64/kernel
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-08-09 16:51:35 +0100
committerDavid Woodhouse <dwmw2@shinybook.infradead.org>2005-08-09 16:51:35 +0100
commitc973b112c76c9d8fd042991128f218a738cc8d0a (patch)
treee813b0da5d0a0e19e06de6462d145a29ad683026 /arch/x86_64/kernel
parentc5fbc3966f48279dbebfde10248c977014aa9988 (diff)
parent00dd1e433967872f3997a45d5adf35056fdf2f56 (diff)
Merge with /shiny/git/linux-2.6/.git
Diffstat (limited to 'arch/x86_64/kernel')
-rw-r--r--arch/x86_64/kernel/entry.S22
-rw-r--r--arch/x86_64/kernel/genapic.c33
-rw-r--r--arch/x86_64/kernel/genapic_flat.c142
-rw-r--r--arch/x86_64/kernel/head.S16
-rw-r--r--arch/x86_64/kernel/irq.c19
-rw-r--r--arch/x86_64/kernel/machine_kexec.c101
-rw-r--r--arch/x86_64/kernel/mce.c109
-rw-r--r--arch/x86_64/kernel/mpparse.c34
-rw-r--r--arch/x86_64/kernel/setup.c11
-rw-r--r--arch/x86_64/kernel/setup64.c18
-rw-r--r--arch/x86_64/kernel/smp.c66
-rw-r--r--arch/x86_64/kernel/smpboot.c94
-rw-r--r--arch/x86_64/kernel/traps.c3
13 files changed, 397 insertions, 271 deletions
diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S
index 28817490fdc..096d470e280 100644
--- a/arch/x86_64/kernel/entry.S
+++ b/arch/x86_64/kernel/entry.S
@@ -76,7 +76,7 @@
.macro FAKE_STACK_FRAME child_rip
/* push in order ss, rsp, eflags, cs, rip */
- xorq %rax, %rax
+ xorl %eax, %eax
pushq %rax /* ss */
CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* rsp */
@@ -423,7 +423,7 @@ ENTRY(stub_rt_sigreturn)
testl $3,CS(%rdi)
je 1f
swapgs
-1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count
+1: incl %gs:pda_irqcount # RED-PEN should check preempt count
movq %gs:pda_irqstackptr,%rax
cmoveq %rax,%rsp
pushq %rdi # save old stack
@@ -436,7 +436,7 @@ ENTRY(common_interrupt)
ret_from_intr:
popq %rdi
cli
- subl $1,%gs:pda_irqcount
+ decl %gs:pda_irqcount
#ifdef CONFIG_DEBUG_INFO
movq RBP(%rdi),%rbp
#endif
@@ -494,7 +494,7 @@ retint_signal:
sti
SAVE_REST
movq $-1,ORIG_RAX(%rsp)
- xorq %rsi,%rsi # oldset
+ xorl %esi,%esi # oldset
movq %rsp,%rdi # &pt_regs
call do_notify_resume
RESTORE_REST
@@ -752,7 +752,7 @@ child_rip:
movq %rsi, %rdi
call *%rax
# exit
- xorq %rdi, %rdi
+ xorl %edi, %edi
call do_exit
/*
@@ -918,3 +918,15 @@ ENTRY(machine_check)
ENTRY(call_debug)
zeroentry do_call_debug
+ENTRY(call_softirq)
+ movq %gs:pda_irqstackptr,%rax
+ pushq %r15
+ movq %rsp,%r15
+ incl %gs:pda_irqcount
+ cmove %rax,%rsp
+ call __do_softirq
+ movq %r15,%rsp
+ decl %gs:pda_irqcount
+ popq %r15
+ ret
+
diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c
index 69b9c25a8fc..30c843a5efd 100644
--- a/arch/x86_64/kernel/genapic.c
+++ b/arch/x86_64/kernel/genapic.c
@@ -31,6 +31,7 @@ u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
extern struct genapic apic_cluster;
extern struct genapic apic_flat;
+extern struct genapic apic_physflat;
struct genapic *genapic = &apic_flat;
@@ -44,12 +45,7 @@ void __init clustered_apic_check(void)
u8 clusters, max_cluster;
u8 id;
u8 cluster_cnt[NUM_APIC_CLUSTERS];
-
- if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
- /* AMD always uses flat mode right now */
- genapic = &apic_flat;
- goto print;
- }
+ int num_cpus = 0;
#if defined(CONFIG_ACPI_BUS)
/*
@@ -64,15 +60,34 @@ void __init clustered_apic_check(void)
#endif
memset(cluster_cnt, 0, sizeof(cluster_cnt));
-
for (i = 0; i < NR_CPUS; i++) {
id = bios_cpu_apicid[i];
- if (id != BAD_APICID)
- cluster_cnt[APIC_CLUSTERID(id)]++;
+ if (id == BAD_APICID)
+ continue;
+ num_cpus++;
+ cluster_cnt[APIC_CLUSTERID(id)]++;
}
+ /* Don't use clustered mode on AMD platforms. */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
+ genapic = &apic_physflat;
+#ifndef CONFIG_CPU_HOTPLUG
+ /* In the CPU hotplug case we cannot use broadcast mode
+ because that opens a race when a CPU is removed.
+ Stay at physflat mode in this case.
+ It is bad to do this unconditionally though. Once
+ we have ACPI platform support for CPU hotplug
+ we should detect hotplug capablity from ACPI tables and
+ only do this when really needed. -AK */
+ if (num_cpus <= 8)
+ genapic = &apic_flat;
+#endif
+ goto print;
+ }
+
clusters = 0;
max_cluster = 0;
+
for (i = 0; i < NUM_APIC_CLUSTERS; i++) {
if (cluster_cnt[i] > 0) {
++clusters;
diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c
index 28284696508..adc96282a9e 100644
--- a/arch/x86_64/kernel/genapic_flat.c
+++ b/arch/x86_64/kernel/genapic_flat.c
@@ -2,13 +2,11 @@
* Copyright 2004 James Cleverdon, IBM.
* Subject to the GNU Public License, v.2
*
- * Flat APIC subarch code. Maximum 8 CPUs, logical delivery.
+ * Flat APIC subarch code.
*
* Hacked for x86-64 by James Cleverdon from i386 architecture code by
* Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
* James Cleverdon.
- * Ashok Raj <ashok.raj@intel.com>
- * Removed IPI broadcast shortcut to support CPU hotplug
*/
#include <linux/config.h>
#include <linux/threads.h>
@@ -20,47 +18,6 @@
#include <asm/smp.h>
#include <asm/ipi.h>
-/*
- * The following permit choosing broadcast IPI shortcut v.s sending IPI only
- * to online cpus via the send_IPI_mask varient.
- * The mask version is my preferred option, since it eliminates a lot of
- * other extra code that would need to be written to cleanup intrs sent
- * to a CPU while offline.
- *
- * Sending broadcast introduces lots of trouble in CPU hotplug situations.
- * These IPI's are delivered to cpu's irrespective of their offline status
- * and could pickup stale intr data when these CPUS are turned online.
- *
- * Not using broadcast is a cleaner approach IMO, but Andi Kleen disagrees with
- * the idea of not using broadcast IPI's anymore. Hence the run time check
- * is introduced, on his request so we can choose an alternate mechanism.
- *
- * Initial wacky performance tests that collect cycle counts show
- * no increase in using mask v.s broadcast version. In fact they seem
- * identical in terms of cycle counts.
- *
- * if we need to use broadcast, we need to do the following.
- *
- * cli;
- * hold call_lock;
- * clear any pending IPI, just ack and clear all pending intr
- * set cpu_online_map;
- * release call_lock;
- * sti;
- *
- * The complicated dummy irq processing shown above is not required if
- * we didnt sent IPI's to wrong CPU's in the first place.
- *
- * - Ashok Raj <ashok.raj@intel.com>
- */
-#ifdef CONFIG_HOTPLUG_CPU
-#define DEFAULT_SEND_IPI (1)
-#else
-#define DEFAULT_SEND_IPI (0)
-#endif
-
-static int no_broadcast=DEFAULT_SEND_IPI;
-
static cpumask_t flat_target_cpus(void)
{
return cpu_online_map;
@@ -119,37 +76,15 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector)
local_irq_restore(flags);
}
-static inline void __local_flat_send_IPI_allbutself(int vector)
-{
- if (no_broadcast) {
- cpumask_t mask = cpu_online_map;
- int this_cpu = get_cpu();
-
- cpu_clear(this_cpu, mask);
- flat_send_IPI_mask(mask, vector);
- put_cpu();
- }
- else
- __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL);
-}
-
-static inline void __local_flat_send_IPI_all(int vector)
-{
- if (no_broadcast)
- flat_send_IPI_mask(cpu_online_map, vector);
- else
- __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
-}
-
static void flat_send_IPI_allbutself(int vector)
{
if (((num_online_cpus()) - 1) >= 1)
- __local_flat_send_IPI_allbutself(vector);
+ __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL);
}
static void flat_send_IPI_all(int vector)
{
- __local_flat_send_IPI_all(vector);
+ __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
}
static int flat_apic_id_registered(void)
@@ -170,16 +105,6 @@ static unsigned int phys_pkg_id(int index_msb)
return ((ebx >> 24) & 0xFF) >> index_msb;
}
-static __init int no_ipi_broadcast(char *str)
-{
- get_option(&str, &no_broadcast);
- printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" :
- "IPI Broadcast");
- return 1;
-}
-
-__setup("no_ipi_broadcast", no_ipi_broadcast);
-
struct genapic apic_flat = {
.name = "flat",
.int_delivery_mode = dest_LowestPrio,
@@ -195,11 +120,62 @@ struct genapic apic_flat = {
.phys_pkg_id = phys_pkg_id,
};
-static int __init print_ipi_mode(void)
+/*
+ * Physflat mode is used when there are more than 8 CPUs on a AMD system.
+ * We cannot use logical delivery in this case because the mask
+ * overflows, so use physical mode.
+ */
+
+static cpumask_t physflat_target_cpus(void)
+{
+ return cpumask_of_cpu(0);
+}
+
+static void physflat_send_IPI_mask(cpumask_t cpumask, int vector)
+{
+ send_IPI_mask_sequence(cpumask, vector);
+}
+
+static void physflat_send_IPI_allbutself(int vector)
+{
+ cpumask_t allbutme = cpu_online_map;
+ int me = get_cpu();
+ cpu_clear(me, allbutme);
+ physflat_send_IPI_mask(allbutme, vector);
+ put_cpu();
+}
+
+static void physflat_send_IPI_all(int vector)
{
- printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" :
- "Shortcut");
- return 0;
+ physflat_send_IPI_mask(cpu_online_map, vector);
}
-late_initcall(print_ipi_mode);
+static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
+{
+ int cpu;
+
+ /*
+ * We're using fixed IRQ delivery, can only return one phys APIC ID.
+ * May as well be the first.
+ */
+ cpu = first_cpu(cpumask);
+ if ((unsigned)cpu < NR_CPUS)
+ return x86_cpu_to_apicid[cpu];
+ else
+ return BAD_APICID;
+}
+
+struct genapic apic_physflat = {
+ .name = "physical flat",
+ .int_delivery_mode = dest_LowestPrio,
+ .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
+ .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST,
+ .target_cpus = physflat_target_cpus,
+ .apic_id_registered = flat_apic_id_registered,
+ .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/
+ .send_IPI_all = physflat_send_IPI_all,
+ .send_IPI_allbutself = physflat_send_IPI_allbutself,
+ .send_IPI_mask = physflat_send_IPI_mask,
+ .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
+ .phys_pkg_id = phys_pkg_id,
+};
diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S
index 8d765aa77a2..98ff5eb32b9 100644
--- a/arch/x86_64/kernel/head.S
+++ b/arch/x86_64/kernel/head.S
@@ -137,14 +137,14 @@ startup_64:
wrmsr
/* Setup cr0 */
- xorq %rax, %rax
- btsq $31, %rax /* Enable paging */
- btsq $0, %rax /* Enable protected mode */
- btsq $1, %rax /* Enable MP */
- btsq $4, %rax /* Enable ET */
- btsq $5, %rax /* Enable NE */
- btsq $16, %rax /* Enable WP */
- btsq $18, %rax /* Enable AM */
+#define CR0_PM 1 /* protected mode */
+#define CR0_MP (1<<1)
+#define CR0_ET (1<<4)
+#define CR0_NE (1<<5)
+#define CR0_WP (1<<16)
+#define CR0_AM (1<<18)
+#define CR0_PAGING (1<<31)
+ movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax
/* Make changes effective */
movq %rax, %cr0
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index cc3fb85f514..849a20aec7c 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -135,3 +135,22 @@ void fixup_irqs(cpumask_t map)
local_irq_disable();
}
#endif
+
+extern void call_softirq(void);
+
+asmlinkage void do_softirq(void)
+{
+ __u32 pending;
+ unsigned long flags;
+
+ if (in_interrupt())
+ return;
+
+ local_irq_save(flags);
+ pending = local_softirq_pending();
+ /* Switch to interrupt stack */
+ if (pending)
+ call_softirq();
+ local_irq_restore(flags);
+}
+EXPORT_SYMBOL(do_softirq);
diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c
index 60d1eff4156..89fab51e20f 100644
--- a/arch/x86_64/kernel/machine_kexec.c
+++ b/arch/x86_64/kernel/machine_kexec.c
@@ -8,43 +8,26 @@
#include <linux/mm.h>
#include <linux/kexec.h>
-#include <linux/delay.h>
#include <linux/string.h>
#include <linux/reboot.h>
-#include <asm/pda.h>
#include <asm/pgtable.h>
-#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/mmu_context.h>
#include <asm/io.h>
-#include <asm/apic.h>
-#include <asm/cpufeature.h>
-#include <asm/hw_irq.h>
-
-#define LEVEL0_SIZE (1UL << 12UL)
-#define LEVEL1_SIZE (1UL << 21UL)
-#define LEVEL2_SIZE (1UL << 30UL)
-#define LEVEL3_SIZE (1UL << 39UL)
-#define LEVEL4_SIZE (1UL << 48UL)
-
-#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE)
-#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
-
-static void init_level2_page(u64 *level2p, unsigned long addr)
+
+static void init_level2_page(pmd_t *level2p, unsigned long addr)
{
unsigned long end_addr;
addr &= PAGE_MASK;
- end_addr = addr + LEVEL2_SIZE;
+ end_addr = addr + PUD_SIZE;
while (addr < end_addr) {
- *(level2p++) = addr | L1_ATTR;
- addr += LEVEL1_SIZE;
+ set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC));
+ addr += PMD_SIZE;
}
}
-static int init_level3_page(struct kimage *image, u64 *level3p,
+static int init_level3_page(struct kimage *image, pud_t *level3p,
unsigned long addr, unsigned long last_addr)
{
unsigned long end_addr;
@@ -52,32 +35,32 @@ static int init_level3_page(struct kimage *image, u64 *level3p,
result = 0;
addr &= PAGE_MASK;
- end_addr = addr + LEVEL3_SIZE;
+ end_addr = addr + PGDIR_SIZE;
while ((addr < last_addr) && (addr < end_addr)) {
struct page *page;
- u64 *level2p;
+ pmd_t *level2p;
page = kimage_alloc_control_pages(image, 0);
if (!page) {
result = -ENOMEM;
goto out;
}
- level2p = (u64 *)page_address(page);
+ level2p = (pmd_t *)page_address(page);
init_level2_page(level2p, addr);
- *(level3p++) = __pa(level2p) | L2_ATTR;
- addr += LEVEL2_SIZE;
+ set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE));
+ addr += PUD_SIZE;
}
/* clear the unused entries */
while (addr < end_addr) {
- *(level3p++) = 0;
- addr += LEVEL2_SIZE;
+ pud_clear(level3p++);
+ addr += PUD_SIZE;
}
out:
return result;
}
-static int init_level4_page(struct kimage *image, u64 *level4p,
+static int init_level4_page(struct kimage *image, pgd_t *level4p,
unsigned long addr, unsigned long last_addr)
{
unsigned long end_addr;
@@ -85,28 +68,28 @@ static int init_level4_page(struct kimage *image, u64 *level4p,
result = 0;
addr &= PAGE_MASK;
- end_addr = addr + LEVEL4_SIZE;
+ end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE);
while ((addr < last_addr) && (addr < end_addr)) {
struct page *page;
- u64 *level3p;
+ pud_t *level3p;
page = kimage_alloc_control_pages(image, 0);
if (!page) {
result = -ENOMEM;
goto out;
}
- level3p = (u64 *)page_address(page);
+ level3p = (pud_t *)page_address(page);
result = init_level3_page(image, level3p, addr, last_addr);
if (result) {
goto out;
}
- *(level4p++) = __pa(level3p) | L3_ATTR;
- addr += LEVEL3_SIZE;
+ set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE));
+ addr += PGDIR_SIZE;
}
/* clear the unused entries */
while (addr < end_addr) {
- *(level4p++) = 0;
- addr += LEVEL3_SIZE;
+ pgd_clear(level4p++);
+ addr += PGDIR_SIZE;
}
out:
return result;
@@ -115,52 +98,50 @@ out:
static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
{
- u64 *level4p;
- level4p = (u64 *)__va(start_pgtable);
+ pgd_t *level4p;
+ level4p = (pgd_t *)__va(start_pgtable);
return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT);
}
static void set_idt(void *newidt, u16 limit)
{
- unsigned char curidt[10];
+ struct desc_ptr curidt;
/* x86-64 supports unaliged loads & stores */
- (*(u16 *)(curidt)) = limit;
- (*(u64 *)(curidt +2)) = (unsigned long)(newidt);
+ curidt.size = limit;
+ curidt.address = (unsigned long)newidt;
__asm__ __volatile__ (
- "lidt %0\n"
- : "=m" (curidt)
+ "lidtq %0\n"
+ : : "m" (curidt)
);
};
static void set_gdt(void *newgdt, u16 limit)
{
- unsigned char curgdt[10];
+ struct desc_ptr curgdt;
/* x86-64 supports unaligned loads & stores */
- (*(u16 *)(curgdt)) = limit;
- (*(u64 *)(curgdt +2)) = (unsigned long)(newgdt);
+ curgdt.size = limit;
+ curgdt.address = (unsigned long)newgdt;
__asm__ __volatile__ (
- "lgdt %0\n"
- : "=m" (curgdt)
+ "lgdtq %0\n"
+ : : "m" (curgdt)
);
};
static void load_segments(void)
{
__asm__ __volatile__ (
- "\tmovl $"STR(__KERNEL_DS)",%eax\n"
- "\tmovl %eax,%ds\n"
- "\tmovl %eax,%es\n"
- "\tmovl %eax,%ss\n"
- "\tmovl %eax,%fs\n"
- "\tmovl %eax,%gs\n"
+ "\tmovl %0,%%ds\n"
+ "\tmovl %0,%%es\n"
+ "\tmovl %0,%%ss\n"
+ "\tmovl %0,%%fs\n"
+ "\tmovl %0,%%gs\n"
+ : : "a" (__KERNEL_DS)
);
-#undef STR
-#undef __STR
}
typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page,
@@ -178,7 +159,7 @@ int machine_kexec_prepare(struct kimage *image)
/* Calculate the offsets */
start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
- control_code_buffer = start_pgtable + 4096UL;
+ control_code_buffer = start_pgtable + PAGE_SIZE;
/* Setup the identity mapped 64bit page table */
result = init_pgtable(image, start_pgtable);
@@ -214,7 +195,7 @@ NORET_TYPE void machine_kexec(struct kimage *image)
/* Calculate the offsets */
page_list = image->head;
start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT;
- control_code_buffer = start_pgtable + 4096UL;
+ control_code_buffer = start_pgtable + PAGE_SIZE;
/* Set the low half of the page table to my identity mapped
* page table for kexec. Leave the high half pointing at the
diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c
index 21e70625a49..8aa56736cde 100644
--- a/arch/x86_64/kernel/mce.c
+++ b/arch/x86_64/kernel/mce.c
@@ -15,6 +15,8 @@
#include <linux/sysdev.h>
#include <linux/miscdevice.h>
#include <linux/fs.h>
+#include <linux/cpu.h>
+#include <linux/percpu.h>
#include <asm/processor.h>
#include <asm/msr.h>
#include <asm/mce.h>
@@ -34,6 +36,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL };
static unsigned long console_logged;
static int notify_user;
static int rip_msr;
+static int mce_bootlog;
/*
* Lockless MCE logging infrastructure.
@@ -195,10 +198,11 @@ void do_machine_check(struct pt_regs * regs, long error_code)
rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr);
mce_get_rip(&m, regs);
- if (error_code != -1)
+ if (error_code >= 0)
rdtscll(m.tsc);
wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0);
- mce_log(&m);
+ if (error_code != -2)
+ mce_log(&m);
/* Did this bank cause the exception? */
/* Assume that the bank with uncorrectable errors did it,
@@ -313,7 +317,7 @@ static void mce_init(void *dummy)
/* Log the machine checks left over from the previous reset.
This also clears all registers */
- do_machine_check(NULL, -1);
+ do_machine_check(NULL, mce_bootlog ? -1 : -2);
set_in_cr4(X86_CR4_MCE);
@@ -474,11 +478,17 @@ static int __init mcheck_disable(char *str)
}
/* mce=off disables machine check. Note you can reenable it later
- using sysfs */
+ using sysfs.
+ mce=bootlog Log MCEs from before booting. Disabled by default to work
+ around buggy BIOS that leave bogus MCEs. */
static int __init mcheck_enable(char *str)
{
+ if (*str == '=')
+ str++;
if (!strcmp(str, "off"))
mce_dont_init = 1;
+ else if (!strcmp(str, "bootlog"))
+ mce_bootlog = 1;
else
printk("mce= argument %s ignored. Please use /sys", str);
return 0;
@@ -514,10 +524,7 @@ static struct sysdev_class mce_sysclass = {
set_kset_name("machinecheck"),
};
-static struct sys_device device_mce = {
- .id = 0,
- .cls = &mce_sysclass,
-};
+static DEFINE_PER_CPU(struct sys_device, device_mce);
/* Why are there no generic functions for this? */
#define ACCESSOR(name, var, start) \
@@ -542,27 +549,83 @@ ACCESSOR(bank4ctl,bank[4],mce_restart())
ACCESSOR(tolerant,tolerant,)
ACCESSOR(check_interval,check_interval,mce_restart())
-static __cpuinit int mce_init_device(void)
+/* Per cpu sysdev init. All of the cpus still share the same ctl bank */
+static __cpuinit int mce_create_device(unsigned int cpu)
{
int err;
+ if (!mce_available(&cpu_data[cpu]))
+ return -EIO;
+
+ per_cpu(device_mce,cpu).id = cpu;
+ per_cpu(device_mce,cpu).cls = &mce_sysclass;
+
+ err = sysdev_register(&per_cpu(device_mce,cpu));
+
+ if (!err) {
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant);
+ sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval);
+ }
+ return err;
+}
+
+#ifdef CONFIG_HOTPLUG_CPU
+static __cpuinit void mce_remove_device(unsigned int cpu)
+{
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant);
+ sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval);
+ sysdev_unregister(&per_cpu(device_mce,cpu));
+}
+#endif
+
+/* Get notified when a cpu comes on/off. Be hotplug friendly. */
+static __cpuinit int
+mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
+{
+ unsigned int cpu = (unsigned long)hcpu;
+
+ switch (action) {
+ case CPU_ONLINE:
+ mce_create_device(cpu);
+ break;
+#ifdef CONFIG_HOTPLUG_CPU
+ case CPU_DEAD:
+ mce_remove_device(cpu);
+ break;
+#endif
+ }
+ return NOTIFY_OK;
+}
+
+static struct notifier_block mce_cpu_notifier = {
+ .notifier_call = mce_cpu_callback,
+};
+
+static __init int mce_init_device(void)
+{
+ int err;
+ int i = 0;
+
if (!mce_available(&boot_cpu_data))
return -EIO;
err = sysdev_class_register(&mce_sysclass);
- if (!err)
- err = sysdev_register(&device_mce);
- if (!err) {
- /* could create per CPU objects, but it is not worth it. */
- sysdev_create_file(&device_mce, &attr_bank0ctl);
- sysdev_create_file(&device_mce, &attr_bank1ctl);
- sysdev_create_file(&device_mce, &attr_bank2ctl);
- sysdev_create_file(&device_mce, &attr_bank3ctl);
- sysdev_create_file(&device_mce, &attr_bank4ctl);
- sysdev_create_file(&device_mce, &attr_tolerant);
- sysdev_create_file(&device_mce, &attr_check_interval);
- }
-
+
+ for_each_online_cpu(i) {
+ mce_create_device(i);
+ }
+
+ register_cpu_notifier(&mce_cpu_notifier);
misc_register(&mce_log_device);
return err;
-
}
+
device_initcall(mce_init_device);
diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c
index 9c5aa2a790c..79c362d03e2 100644
--- a/arch/x86_64/kernel/mpparse.c
+++ b/arch/x86_64/kernel/mpparse.c
@@ -109,7 +109,7 @@ static int __init mpf_checksum(unsigned char *mp, int len)
static void __init MP_processor_info (struct mpc_config_processor *m)
{
- int ver;
+ int ver, cpu;
static int found_bsp=0;
if (!(m->mpc_cpuflag & CPU_ENABLED))
@@ -131,7 +131,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
return;
}
- num_processors++;
+ cpu = num_processors++;
if (m->mpc_apicid > MAX_APICS) {
printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
@@ -155,13 +155,18 @@ static void __init MP_processor_info (struct mpc_config_processor *m)
* in same order as logical cpu numbers. Hence the first
* entry is BSP, and so on.
*/
+ cpu = 0;
+
bios_cpu_apicid[0] = m->mpc_apicid;
x86_cpu_to_apicid[0] = m->mpc_apicid;
found_bsp = 1;
- } else {
- bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid;
- x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid;
- }
+ } else
+ cpu = num_processors - found_bsp;
+ bios_cpu_apicid[cpu] = m->mpc_apicid;
+ x86_cpu_to_apicid[cpu] = m->mpc_apicid;
+
+ cpu_set(cpu, cpu_possible_map);
+ cpu_set(cpu, cpu_present_map);
}
static void __init MP_bus_info (struct mpc_config_bus *m)
@@ -965,8 +970,21 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low)
* due to unused I/O APIC pins.
*/
int irq = gsi;
- gsi = pci_irq++;
- gsi_to_irq[irq] = gsi;
+ if (gsi < MAX_GSI_NUM) {
+ if (gsi > 15)
+ gsi = pci_irq++;
+#ifdef CONFIG_ACPI_BUS
+ /*
+ * Don't assign IRQ used by ACPI SCI
+ */
+ if (gsi == acpi_fadt.sci_int)
+ gsi = pci_irq++;
+#endif
+ gsi_to_irq[irq] = gsi;
+ } else {
+ printk(KERN_ERR "GSI %u is too high\n", gsi);
+ return gsi;
+ }
}
io_apic_set_pci_routing(ioapic, ioapic_pin, gsi,
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 5fd03225058..116a491e296 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -645,15 +645,15 @@ void __init setup_arch(char **cmdline_p)
}
}
#endif
-
- sparse_init();
-
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end) {
reserve_bootmem(crashk_res.start,
crashk_res.end - crashk_res.start + 1);
}
#endif
+
+ sparse_init();
+
paging_init();
check_ioapic();
@@ -765,8 +765,6 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c)
int cpu = smp_processor_id();
int node = 0;
unsigned bits;
- if (c->x86_num_cores == 1)
- return;
bits = 0;
while ((1 << bits) < c->x86_num_cores)
@@ -1081,8 +1079,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
else
mtrr_ap_init();
#ifdef CONFIG_NUMA
- if (c != &boot_cpu_data)
- numa_add_cpu(c - cpu_data);
+ numa_add_cpu(smp_processor_id());
#endif
}
diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c
index f2b9c6bc999..34082c1cc41 100644
--- a/arch/x86_64/kernel/setup64.c
+++ b/arch/x86_64/kernel/setup64.c
@@ -12,6 +12,7 @@
#include <linux/string.h>
#include <linux/bootmem.h>
#include <linux/bitops.h>
+#include <linux/module.h>
#include <asm/bootsetup.h>
#include <asm/pda.h>
#include <asm/pgtable.h>
@@ -22,10 +23,8 @@
#include <asm/smp.h>
#include <asm/i387.h>
#include <asm/percpu.h>
-#include <asm/mtrr.h>
#include <asm/proto.h>
-#include <asm/mman.h>
-#include <asm/numa.h>
+#include <asm/sections.h>
char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,};
@@ -33,11 +32,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned;
-extern struct task_struct init_task;
-
-extern unsigned char __per_cpu_start[], __per_cpu_end[];
-
-extern struct desc_ptr cpu_gdt_descr[];
struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table };
char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned")));
@@ -101,7 +95,7 @@ void __init setup_per_cpu_areas(void)
#endif
for (i = 0; i < NR_CPUS; i++) {
- unsigned char *ptr;
+ char *ptr;
if (!NODE_DATA(cpu_to_node(i))) {
printk("cpu with no node %d, num_online_nodes %d\n",
@@ -190,11 +184,7 @@ void __cpuinit check_efer(void)
*/
void __cpuinit cpu_init (void)
{
-#ifdef CONFIG_SMP
int cpu = stack_smp_processor_id();
-#else
- int cpu = smp_processor_id();
-#endif
struct tss_struct *t = &per_cpu(init_tss, cpu);
unsigned long v;
char *estacks = NULL;
@@ -214,7 +204,7 @@ void __cpuinit cpu_init (void)
printk("Initializing CPU#%d\n", cpu);
- clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
+ clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
/*
* Initialize the per-CPU GDT with the boot GDT,
diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index ccae392886a..e5958220d6b 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -129,10 +129,9 @@ asmlinkage void smp_invalidate_interrupt (void)
} else
leave_mm(cpu);
}
+out:
ack_APIC_irq();
cpu_clear(cpu, flush_cpumask);
-
-out:
put_cpu_no_resched();
}
@@ -294,6 +293,69 @@ void unlock_ipi_call_lock(void)
}
/*
+ * this function sends a 'generic call function' IPI to one other CPU
+ * in the system.
+ */
+static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ struct call_data_struct data;
+ int cpus = 1;
+
+ data.func = func;
+ data.info = info;
+ atomic_set(&data.started, 0);
+ data.wait = wait;
+ if (wait)
+ atomic_set(&data.finished, 0);
+
+ call_data = &data;
+ wmb();
+ /* Send a message to all other CPUs and wait for them to respond */
+ send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR);
+
+ /* Wait for response */
+ while (atomic_read(&data.started) != cpus)
+ cpu_relax();
+
+ if (!wait)
+ return;
+
+ while (atomic_read(&data.finished) != cpus)
+ cpu_relax();
+}
+
+/*
+ * smp_call_function_single - Run a function on another CPU
+ * @func: The function to run. This must be fast and non-blocking.
+ * @info: An arbitrary pointer to pass to the function.
+ * @nonatomic: Currently unused.
+ * @wait: If true, wait until function has completed on other CPUs.
+ *
+ * Retrurns 0 on success, else a negative status code.
+ *
+ * Does not return until the remote CPU is nearly ready to execute <func>
+ * or is or has executed.
+ */
+
+int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
+ int nonatomic, int wait)
+{
+ /* prevent preemption and reschedule on another processor */