From 54264911ce8c3f4a9e7fc193bc78a85e04df7fa0 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Wed, 27 Jul 2005 11:43:34 -0700 Subject: [PATCH] x86_64: fix SMP boot lockup on some machines Fixes boot up lockups on some machines where CPU apic ids don't start with 0 Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index b969ee12872..e66edfa1f3b 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -229,7 +229,7 @@ static __cpuinit void sync_master(void *arg) { unsigned long flags, i; - if (smp_processor_id() != boot_cpu_id) + if (smp_processor_id() != 0) return; go[MASTER] = 0; -- cgit v1.2.3-18-g5258 From 44456d37b59d8e541936ed26d8b6e08d27e88ac1 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Wed, 27 Jul 2005 11:45:17 -0700 Subject: [PATCH] turn many #if $undefined_string into #ifdef $undefined_string turn many #if $undefined_string into #ifdef $undefined_string to fix some warnings after -Wno-def was added to global CFLAGS Signed-off-by: Olaf Hering Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index e66edfa1f3b..e773a794ec4 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -285,7 +285,7 @@ static __cpuinit void sync_tsc(void) int i, done = 0; long delta, adj, adjust_latency = 0; unsigned long flags, rt, master_time_stamp, bound; -#if DEBUG_TSC_SYNC +#ifdef DEBUG_TSC_SYNC static struct syncdebug { long rt; /* roundtrip time */ long master; /* master's timestamp */ @@ -321,7 +321,7 @@ static __cpuinit void sync_tsc(void) rdtscll(t); wrmsrl(MSR_IA32_TSC, t + adj); } -#if DEBUG_TSC_SYNC +#ifdef DEBUG_TSC_SYNC t[i].rt = rt; t[i].master = master_time_stamp; t[i].diff = delta; @@ -331,7 +331,7 @@ static __cpuinit void sync_tsc(void) } spin_unlock_irqrestore(&tsc_sync_lock, flags); -#if DEBUG_TSC_SYNC +#ifdef DEBUG_TSC_SYNC for (i = 0; i < NUM_ROUNDS; ++i) printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", t[i].rt, t[i].master, t[i].diff, t[i].lat); @@ -537,7 +537,7 @@ void __cpuinit start_secondary(void) extern volatile unsigned long init_rsp; extern void (*initial_code)(void); -#if APIC_DEBUG +#ifdef APIC_DEBUG static void inquire_remote_apic(int apicid) { unsigned i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; @@ -841,7 +841,7 @@ do_rest: else /* trampoline code not run */ printk("Not responding.\n"); -#if APIC_DEBUG +#ifdef APIC_DEBUG inquire_remote_apic(apicid); #endif } -- cgit v1.2.3-18-g5258 From 2a16a3007d5b5896ff62d149bb4dca1a29fd78c4 Mon Sep 17 00:00:00 2001 From: Alexander Nyberg Date: Thu, 28 Jul 2005 21:15:20 -0700 Subject: [PATCH] x86_64: cpu hotplug changes kills nmi watchdog When the x86_64 cpu hotplug changes went in it added a check in default_do_nmi() which kills NMI delivery on any CPU but the BSP. The NMI watchdog is brought up quite some time before the online bit is set in num_online_cpus so this won't work very well. The nmi watchdogs on cpus that are not BSP will never be reprogrammed and no NMIs. Why was this check added? How does an offlined cpu receive an NMI? Signed-off-by: Alexander Nyberg Cc: Andi Kleen Cc: Andrew Morton Cc: Zwane Mwaikambo Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/traps.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/traps.c b/arch/x86_64/kernel/traps.c index 10273663000..6ead433a388 100644 --- a/arch/x86_64/kernel/traps.c +++ b/arch/x86_64/kernel/traps.c @@ -594,9 +594,6 @@ asmlinkage void default_do_nmi(struct pt_regs *regs) if (!cpu) reason = get_nmi_reason(); - if (!cpu_online(cpu)) - return; - if (!(reason & 0xc0)) { if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 0, SIGINT) == NOTIFY_STOP) -- cgit v1.2.3-18-g5258 From 5df3574ec0eac0eb8d758e8e9b1ad95d909a9e1f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:22 -0700 Subject: [PATCH] x86_64: Always ack IPIs even on errors Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smp.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index ccae392886a..6ee74db5230 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -129,10 +129,9 @@ asmlinkage void smp_invalidate_interrupt (void) } else leave_mm(cpu); } +out: ack_APIC_irq(); cpu_clear(cpu, flush_cpumask); - -out: put_cpu_no_resched(); } -- cgit v1.2.3-18-g5258 From 61b1b2d0239da82c0bed8adaa1d070c6551d4afd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:27 -0700 Subject: [PATCH] x86_64: Move cpu_present/possible_map parsing earlier Various code needs this information now before the actual SMP bootup. Instead of computing it on the fly while booting the other CPUs set it up now while initial MPtable/MADT parsing. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mpparse.c | 17 +++++++++----- arch/x86_64/kernel/smpboot.c | 55 +++++++++++++++++++------------------------- 2 files changed, 35 insertions(+), 37 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 9c5aa2a790c..08abf9f5b15 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -109,7 +109,7 @@ static int __init mpf_checksum(unsigned char *mp, int len) static void __init MP_processor_info (struct mpc_config_processor *m) { - int ver; + int ver, cpu; static int found_bsp=0; if (!(m->mpc_cpuflag & CPU_ENABLED)) @@ -131,7 +131,7 @@ static void __init MP_processor_info (struct mpc_config_processor *m) return; } - num_processors++; + cpu = num_processors++; if (m->mpc_apicid > MAX_APICS) { printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n", @@ -155,13 +155,18 @@ static void __init MP_processor_info (struct mpc_config_processor *m) * in same order as logical cpu numbers. Hence the first * entry is BSP, and so on. */ + cpu = 0; + bios_cpu_apicid[0] = m->mpc_apicid; x86_cpu_to_apicid[0] = m->mpc_apicid; found_bsp = 1; - } else { - bios_cpu_apicid[num_processors - found_bsp] = m->mpc_apicid; - x86_cpu_to_apicid[num_processors - found_bsp] = m->mpc_apicid; - } + } else + cpu = num_processors - found_bsp; + bios_cpu_apicid[cpu] = m->mpc_apicid; + x86_cpu_to_apicid[cpu] = m->mpc_apicid; + + cpu_set(cpu, cpu_possible_map); + cpu_set(cpu, cpu_present_map); } static void __init MP_bus_info (struct mpc_config_bus *m) diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index e773a794ec4..db27e1844ed 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -112,24 +112,6 @@ struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; #define get_idle_for_cpu(x) (idle_thread_array[(x)]) #define set_idle_for_cpu(x,p) (idle_thread_array[(x)] = (p)) -/* - * cpu_possible_map should be static, it cannot change as cpu's - * are onlined, or offlined. The reason is per-cpu data-structures - * are allocated by some modules at init time, and dont expect to - * do this dynamically on cpu arrival/departure. - * cpu_present_map on the other hand can change dynamically. - * In case when cpu_hotplug is not compiled, then we resort to current - * behaviour, which is cpu_possible == cpu_present. - * If cpu-hotplug is supported, then we need to preallocate for all - * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. - * - Ashok Raj - */ -#ifdef CONFIG_HOTPLUG_CPU -#define fixup_cpu_possible_map(x) cpu_set((x), cpu_possible_map) -#else -#define fixup_cpu_possible_map(x) -#endif - /* * Currently trivial. Write the real->protected mode * bootstrap into the page concerned. The caller @@ -924,6 +906,27 @@ static __init void enforce_max_cpus(unsigned max_cpus) } } +#ifdef CONFIG_HOTPLUG_CPU +/* + * cpu_possible_map should be static, it cannot change as cpu's + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_map on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * If cpu-hotplug is supported, then we need to preallocate for all + * those NR_CPUS, hence cpu_possible_map represents entire NR_CPUS range. + * - Ashok Raj + */ +static void prefill_possible_map(void) +{ + int i; + for (i = 0; i < NR_CPUS; i++) + cpu_set(i, cpu_possible_map); +} +#endif + /* * Various sanity checks. */ @@ -987,25 +990,15 @@ static int __init smp_sanity_check(unsigned max_cpus) */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int i; - nmi_watchdog_default(); current_cpu_data = boot_cpu_data; current_thread_info()->cpu = 0; /* needed? */ enforce_max_cpus(max_cpus); - /* - * Fill in cpu_present_mask - */ - for (i = 0; i < NR_CPUS; i++) { - int apicid = cpu_present_to_apicid(i); - if (physid_isset(apicid, phys_cpu_present_map)) { - cpu_set(i, cpu_present_map); - cpu_set(i, cpu_possible_map); - } - fixup_cpu_possible_map(i); - } +#ifdef CONFIG_HOTPLUG_CPU + prefill_possible_map(); +#endif if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); -- cgit v1.2.3-18-g5258 From 3019e8ebe6281843488250ee3a539106806da93a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:28 -0700 Subject: [PATCH] x86_64: Minor clean up to CPU setup - use smp_processor_id instead of custom hack Does not change any semantics because numa_add_cpu checks for CPU 0 anyways. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 5fd03225058..0c7ec94004a 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -1081,8 +1081,7 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) else mtrr_ap_init(); #ifdef CONFIG_NUMA - if (c != &boot_cpu_data) - numa_add_cpu(c - cpu_data); + numa_add_cpu(smp_processor_id()); #endif } -- cgit v1.2.3-18-g5258 From de04f3220b9789cc40fd6b1d151dfa0a7e3b03b9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:29 -0700 Subject: [PATCH] x86_64: Clarify Booting processor ... message No need to print kernel addresses there and clarify what the APIC-ID is. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index db27e1844ed..001c2d57248 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -755,8 +755,9 @@ do_rest: initial_code = start_secondary; clear_ti_thread_flag(c_idle.idle->thread_info, TIF_FORK); - printk(KERN_INFO "Booting processor %d/%d rip %lx rsp %lx\n", cpu, apicid, - start_rip, init_rsp); + printk(KERN_INFO "Booting processor %d/%d APIC 0x%x\n", cpu, + cpus_weight(cpu_present_map), + apicid); /* * This grunge runs the startup process for -- cgit v1.2.3-18-g5258 From a940199f206dcf51c65fae27e2ce412f2c5a2b22 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:30 -0700 Subject: [PATCH] x86_64: Some cleanup in setup64.c Minor cleanup. Move things into their include files, remove obsolete includes, fix indentation, remove obsolete special cases etc. I also added the per cpu section to asm-generic/sections.h and fixed init/main.c to use it. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup64.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup64.c b/arch/x86_64/kernel/setup64.c index f2b9c6bc999..34082c1cc41 100644 --- a/arch/x86_64/kernel/setup64.c +++ b/arch/x86_64/kernel/setup64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -22,10 +23,8 @@ #include #include #include -#include #include -#include -#include +#include char x86_boot_params[BOOT_PARAM_SIZE] __initdata = {0,}; @@ -33,11 +32,6 @@ cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; struct x8664_pda cpu_pda[NR_CPUS] __cacheline_aligned; -extern struct task_struct init_task; - -extern unsigned char __per_cpu_start[], __per_cpu_end[]; - -extern struct desc_ptr cpu_gdt_descr[]; struct desc_ptr idt_descr = { 256 * 16, (unsigned long) idt_table }; char boot_cpu_stack[IRQSTACKSIZE] __attribute__((section(".bss.page_aligned"))); @@ -101,7 +95,7 @@ void __init setup_per_cpu_areas(void) #endif for (i = 0; i < NR_CPUS; i++) { - unsigned char *ptr; + char *ptr; if (!NODE_DATA(cpu_to_node(i))) { printk("cpu with no node %d, num_online_nodes %d\n", @@ -190,11 +184,7 @@ void __cpuinit check_efer(void) */ void __cpuinit cpu_init (void) { -#ifdef CONFIG_SMP int cpu = stack_smp_processor_id(); -#else - int cpu = smp_processor_id(); -#endif struct tss_struct *t = &per_cpu(init_tss, cpu); unsigned long v; char *estacks = NULL; @@ -214,7 +204,7 @@ void __cpuinit cpu_init (void) printk("Initializing CPU#%d\n", cpu); - clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); + clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); /* * Initialize the per-CPU GDT with the boot GDT, -- cgit v1.2.3-18-g5258 From 91c6d400940e97f2809c04fcf383517f7650dbd4 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:39 -0700 Subject: [PATCH] x86_64: Create per CPU machine check sysfs directories This patch will create machinecheck sysdev directories per CPU. All of the cpus still share the same ctl banks. When compiled with CONFIG_HOTPLUG_CPU, it will also bring up/down sysdev directories as cpus go up/down. I have tested the patch along with CONFIG_HOTPLUG_CPU option on in 2.6.13-rc1 kernel. Minor changes by AK: remove useless unload function Signed-off-by: Jacob Shin Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 93 ++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 74 insertions(+), 19 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 21e70625a49..3b267c91bb0 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -514,10 +516,7 @@ static struct sysdev_class mce_sysclass = { set_kset_name("machinecheck"), }; -static struct sys_device device_mce = { - .id = 0, - .cls = &mce_sysclass, -}; +static DEFINE_PER_CPU(struct sys_device, device_mce); /* Why are there no generic functions for this? */ #define ACCESSOR(name, var, start) \ @@ -542,27 +541,83 @@ ACCESSOR(bank4ctl,bank[4],mce_restart()) ACCESSOR(tolerant,tolerant,) ACCESSOR(check_interval,check_interval,mce_restart()) -static __cpuinit int mce_init_device(void) +/* Per cpu sysdev init. All of the cpus still share the same ctl bank */ +static __cpuinit int mce_create_device(unsigned int cpu) { int err; + if (!mce_available(&cpu_data[cpu])) + return -EIO; + + per_cpu(device_mce,cpu).id = cpu; + per_cpu(device_mce,cpu).cls = &mce_sysclass; + + err = sysdev_register(&per_cpu(device_mce,cpu)); + + if (!err) { + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_tolerant); + sysdev_create_file(&per_cpu(device_mce,cpu), &attr_check_interval); + } + return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static __cpuinit void mce_remove_device(unsigned int cpu) +{ + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank0ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank1ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank2ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank3ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_bank4ctl); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_tolerant); + sysdev_remove_file(&per_cpu(device_mce,cpu), &attr_check_interval); + sysdev_unregister(&per_cpu(device_mce,cpu)); +} +#endif + +/* Get notified when a cpu comes on/off. Be hotplug friendly. */ +static __cpuinit int +mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action) { + case CPU_ONLINE: + mce_create_device(cpu); + break; +#ifdef CONFIG_HOTPLUG_CPU + case CPU_DEAD: + mce_remove_device(cpu); + break; +#endif + } + return NOTIFY_OK; +} + +static struct notifier_block mce_cpu_notifier = { + .notifier_call = mce_cpu_callback, +}; + +static __init int mce_init_device(void) +{ + int err; + int i = 0; + if (!mce_available(&boot_cpu_data)) return -EIO; err = sysdev_class_register(&mce_sysclass); - if (!err) - err = sysdev_register(&device_mce); - if (!err) { - /* could create per CPU objects, but it is not worth it. */ - sysdev_create_file(&device_mce, &attr_bank0ctl); - sysdev_create_file(&device_mce, &attr_bank1ctl); - sysdev_create_file(&device_mce, &attr_bank2ctl); - sysdev_create_file(&device_mce, &attr_bank3ctl); - sysdev_create_file(&device_mce, &attr_bank4ctl); - sysdev_create_file(&device_mce, &attr_tolerant); - sysdev_create_file(&device_mce, &attr_check_interval); - } - + + for_each_online_cpu(i) { + mce_create_device(i); + } + + register_cpu_notifier(&mce_cpu_notifier); misc_register(&mce_log_device); return err; - } + device_initcall(mce_init_device); -- cgit v1.2.3-18-g5258 From 37a47e65fa58d413a31f27ee88f6cb98583b6157 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:41 -0700 Subject: [PATCH] x86_64: Remove the broadcast options that were added for cpuhotplug Will be obsolete with physflat. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic_flat.c | 88 +-------------------------------------- 1 file changed, 2 insertions(+), 86 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index 28284696508..fdfa15f5d2e 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -7,8 +7,6 @@ * Hacked for x86-64 by James Cleverdon from i386 architecture code by * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and * James Cleverdon. - * Ashok Raj - * Removed IPI broadcast shortcut to support CPU hotplug */ #include #include @@ -20,47 +18,6 @@ #include #include -/* - * The following permit choosing broadcast IPI shortcut v.s sending IPI only - * to online cpus via the send_IPI_mask varient. - * The mask version is my preferred option, since it eliminates a lot of - * other extra code that would need to be written to cleanup intrs sent - * to a CPU while offline. - * - * Sending broadcast introduces lots of trouble in CPU hotplug situations. - * These IPI's are delivered to cpu's irrespective of their offline status - * and could pickup stale intr data when these CPUS are turned online. - * - * Not using broadcast is a cleaner approach IMO, but Andi Kleen disagrees with - * the idea of not using broadcast IPI's anymore. Hence the run time check - * is introduced, on his request so we can choose an alternate mechanism. - * - * Initial wacky performance tests that collect cycle counts show - * no increase in using mask v.s broadcast version. In fact they seem - * identical in terms of cycle counts. - * - * if we need to use broadcast, we need to do the following. - * - * cli; - * hold call_lock; - * clear any pending IPI, just ack and clear all pending intr - * set cpu_online_map; - * release call_lock; - * sti; - * - * The complicated dummy irq processing shown above is not required if - * we didnt sent IPI's to wrong CPU's in the first place. - * - * - Ashok Raj - */ -#ifdef CONFIG_HOTPLUG_CPU -#define DEFAULT_SEND_IPI (1) -#else -#define DEFAULT_SEND_IPI (0) -#endif - -static int no_broadcast=DEFAULT_SEND_IPI; - static cpumask_t flat_target_cpus(void) { return cpu_online_map; @@ -119,37 +76,15 @@ static void flat_send_IPI_mask(cpumask_t cpumask, int vector) local_irq_restore(flags); } -static inline void __local_flat_send_IPI_allbutself(int vector) -{ - if (no_broadcast) { - cpumask_t mask = cpu_online_map; - int this_cpu = get_cpu(); - - cpu_clear(this_cpu, mask); - flat_send_IPI_mask(mask, vector); - put_cpu(); - } - else - __send_IPI_shortcut(APIC_DEST_ALLBUT, vector, APIC_DEST_LOGICAL); -} - -static inline void __local_flat_send_IPI_all(int vector) -{ - if (no_broadcast) - flat_send_IPI_mask(cpu_online_map, vector); - else - __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); -} - static void flat_send_IPI_allbutself(int vector) { if (((num_online_cpus()) - 1) >= 1) - __local_flat_send_IPI_allbutself(vector); + __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); } static void flat_send_IPI_all(int vector) { - __local_flat_send_IPI_all(vector); + __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); } static int flat_apic_id_registered(void) @@ -170,16 +105,6 @@ static unsigned int phys_pkg_id(int index_msb) return ((ebx >> 24) & 0xFF) >> index_msb; } -static __init int no_ipi_broadcast(char *str) -{ - get_option(&str, &no_broadcast); - printk ("Using %s mode\n", no_broadcast ? "No IPI Broadcast" : - "IPI Broadcast"); - return 1; -} - -__setup("no_ipi_broadcast", no_ipi_broadcast); - struct genapic apic_flat = { .name = "flat", .int_delivery_mode = dest_LowestPrio, @@ -194,12 +119,3 @@ struct genapic apic_flat = { .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, }; - -static int __init print_ipi_mode(void) -{ - printk ("Using IPI %s mode\n", no_broadcast ? "No-Shortcut" : - "Shortcut"); - return 0; -} - -late_initcall(print_ipi_mode); -- cgit v1.2.3-18-g5258 From f8d311939f9d2b2a5e935df8dceb98b7cbe08d43 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:42 -0700 Subject: [PATCH] x86_64: Support more than 8 cores on AMD systems Use physical mode instead of logical mode to address more CPUs. This is also used in the CPU hotplug case to avoid a race. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/genapic.c | 33 +++++++++++++++------ arch/x86_64/kernel/genapic_flat.c | 62 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 85 insertions(+), 10 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/genapic.c b/arch/x86_64/kernel/genapic.c index 69b9c25a8fc..30c843a5efd 100644 --- a/arch/x86_64/kernel/genapic.c +++ b/arch/x86_64/kernel/genapic.c @@ -31,6 +31,7 @@ u8 x86_cpu_to_log_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID }; extern struct genapic apic_cluster; extern struct genapic apic_flat; +extern struct genapic apic_physflat; struct genapic *genapic = &apic_flat; @@ -44,12 +45,7 @@ void __init clustered_apic_check(void) u8 clusters, max_cluster; u8 id; u8 cluster_cnt[NUM_APIC_CLUSTERS]; - - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - /* AMD always uses flat mode right now */ - genapic = &apic_flat; - goto print; - } + int num_cpus = 0; #if defined(CONFIG_ACPI_BUS) /* @@ -64,15 +60,34 @@ void __init clustered_apic_check(void) #endif memset(cluster_cnt, 0, sizeof(cluster_cnt)); - for (i = 0; i < NR_CPUS; i++) { id = bios_cpu_apicid[i]; - if (id != BAD_APICID) - cluster_cnt[APIC_CLUSTERID(id)]++; + if (id == BAD_APICID) + continue; + num_cpus++; + cluster_cnt[APIC_CLUSTERID(id)]++; } + /* Don't use clustered mode on AMD platforms. */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { + genapic = &apic_physflat; +#ifndef CONFIG_CPU_HOTPLUG + /* In the CPU hotplug case we cannot use broadcast mode + because that opens a race when a CPU is removed. + Stay at physflat mode in this case. + It is bad to do this unconditionally though. Once + we have ACPI platform support for CPU hotplug + we should detect hotplug capablity from ACPI tables and + only do this when really needed. -AK */ + if (num_cpus <= 8) + genapic = &apic_flat; +#endif + goto print; + } + clusters = 0; max_cluster = 0; + for (i = 0; i < NUM_APIC_CLUSTERS; i++) { if (cluster_cnt[i] > 0) { ++clusters; diff --git a/arch/x86_64/kernel/genapic_flat.c b/arch/x86_64/kernel/genapic_flat.c index fdfa15f5d2e..adc96282a9e 100644 --- a/arch/x86_64/kernel/genapic_flat.c +++ b/arch/x86_64/kernel/genapic_flat.c @@ -2,7 +2,7 @@ * Copyright 2004 James Cleverdon, IBM. * Subject to the GNU Public License, v.2 * - * Flat APIC subarch code. Maximum 8 CPUs, logical delivery. + * Flat APIC subarch code. * * Hacked for x86-64 by James Cleverdon from i386 architecture code by * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and @@ -119,3 +119,63 @@ struct genapic apic_flat = { .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, .phys_pkg_id = phys_pkg_id, }; + +/* + * Physflat mode is used when there are more than 8 CPUs on a AMD system. + * We cannot use logical delivery in this case because the mask + * overflows, so use physical mode. + */ + +static cpumask_t physflat_target_cpus(void) +{ + return cpumask_of_cpu(0); +} + +static void physflat_send_IPI_mask(cpumask_t cpumask, int vector) +{ + send_IPI_mask_sequence(cpumask, vector); +} + +static void physflat_send_IPI_allbutself(int vector) +{ + cpumask_t allbutme = cpu_online_map; + int me = get_cpu(); + cpu_clear(me, allbutme); + physflat_send_IPI_mask(allbutme, vector); + put_cpu(); +} + +static void physflat_send_IPI_all(int vector) +{ + physflat_send_IPI_mask(cpu_online_map, vector); +} + +static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask) +{ + int cpu; + + /* + * We're using fixed IRQ delivery, can only return one phys APIC ID. + * May as well be the first. + */ + cpu = first_cpu(cpumask); + if ((unsigned)cpu < NR_CPUS) + return x86_cpu_to_apicid[cpu]; + else + return BAD_APICID; +} + +struct genapic apic_physflat = { + .name = "physical flat", + .int_delivery_mode = dest_LowestPrio, + .int_dest_mode = (APIC_DEST_PHYSICAL != 0), + .int_delivery_dest = APIC_DEST_PHYSICAL | APIC_DM_LOWEST, + .target_cpus = physflat_target_cpus, + .apic_id_registered = flat_apic_id_registered, + .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ + .send_IPI_all = physflat_send_IPI_all, + .send_IPI_allbutself = physflat_send_IPI_allbutself, + .send_IPI_mask = physflat_send_IPI_mask, + .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, + .phys_pkg_id = phys_pkg_id, +}; -- cgit v1.2.3-18-g5258 From 3829ee6b1be03d5aa3005fe7d19f30088b539836 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:48 -0700 Subject: [PATCH] x86_64: Small assembly improvements Save a byte here and there. Ultimatively useless, but these things always catch my eyes when reading the code so just fix them for now. Also I got at least one patch fixing of them already, which gives a good excuse. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/entry.S | 10 +++++----- arch/x86_64/kernel/head.S | 16 ++++++++-------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 28817490fdc..0696e025dce 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -76,7 +76,7 @@ .macro FAKE_STACK_FRAME child_rip /* push in order ss, rsp, eflags, cs, rip */ - xorq %rax, %rax + xorl %eax, %eax pushq %rax /* ss */ CFI_ADJUST_CFA_OFFSET 8 pushq %rax /* rsp */ @@ -423,7 +423,7 @@ ENTRY(stub_rt_sigreturn) testl $3,CS(%rdi) je 1f swapgs -1: addl $1,%gs:pda_irqcount # RED-PEN should check preempt count +1: incl %gs:pda_irqcount # RED-PEN should check preempt count movq %gs:pda_irqstackptr,%rax cmoveq %rax,%rsp pushq %rdi # save old stack @@ -436,7 +436,7 @@ ENTRY(common_interrupt) ret_from_intr: popq %rdi cli - subl $1,%gs:pda_irqcount + decl %gs:pda_irqcount #ifdef CONFIG_DEBUG_INFO movq RBP(%rdi),%rbp #endif @@ -494,7 +494,7 @@ retint_signal: sti SAVE_REST movq $-1,ORIG_RAX(%rsp) - xorq %rsi,%rsi # oldset + xorl %esi,%esi # oldset movq %rsp,%rdi # &pt_regs call do_notify_resume RESTORE_REST @@ -752,7 +752,7 @@ child_rip: movq %rsi, %rdi call *%rax # exit - xorq %rdi, %rdi + xorl %edi, %edi call do_exit /* diff --git a/arch/x86_64/kernel/head.S b/arch/x86_64/kernel/head.S index 8d765aa77a2..98ff5eb32b9 100644 --- a/arch/x86_64/kernel/head.S +++ b/arch/x86_64/kernel/head.S @@ -137,14 +137,14 @@ startup_64: wrmsr /* Setup cr0 */ - xorq %rax, %rax - btsq $31, %rax /* Enable paging */ - btsq $0, %rax /* Enable protected mode */ - btsq $1, %rax /* Enable MP */ - btsq $4, %rax /* Enable ET */ - btsq $5, %rax /* Enable NE */ - btsq $16, %rax /* Enable WP */ - btsq $18, %rax /* Enable AM */ +#define CR0_PM 1 /* protected mode */ +#define CR0_MP (1<<1) +#define CR0_ET (1<<4) +#define CR0_NE (1<<5) +#define CR0_WP (1<<16) +#define CR0_AM (1<<18) +#define CR0_PAGING (1<<31) + movl $CR0_PM|CR0_MP|CR0_ET|CR0_NE|CR0_WP|CR0_AM|CR0_PAGING,%eax /* Make changes effective */ movq %rax, %cr0 -- cgit v1.2.3-18-g5258 From ed6b676ca8b50e0b538e61c283d52fd04f007abf Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:49 -0700 Subject: [PATCH] x86_64: Switch to the interrupt stack when running a softirq in local_bh_enable() This avoids some potential stack overflows with very deep softirq callchains. i386 does this too. TOADD CFI annotation Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/entry.S | 12 ++++++++++++ arch/x86_64/kernel/irq.c | 19 +++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/entry.S b/arch/x86_64/kernel/entry.S index 0696e025dce..096d470e280 100644 --- a/arch/x86_64/kernel/entry.S +++ b/arch/x86_64/kernel/entry.S @@ -918,3 +918,15 @@ ENTRY(machine_check) ENTRY(call_debug) zeroentry do_call_debug +ENTRY(call_softirq) + movq %gs:pda_irqstackptr,%rax + pushq %r15 + movq %rsp,%r15 + incl %gs:pda_irqcount + cmove %rax,%rsp + call __do_softirq + movq %r15,%rsp + decl %gs:pda_irqcount + popq %r15 + ret + diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c index cc3fb85f514..849a20aec7c 100644 --- a/arch/x86_64/kernel/irq.c +++ b/arch/x86_64/kernel/irq.c @@ -135,3 +135,22 @@ void fixup_irqs(cpumask_t map) local_irq_disable(); } #endif + +extern void call_softirq(void); + +asmlinkage void do_softirq(void) +{ + __u32 pending; + unsigned long flags; + + if (in_interrupt()) + return; + + local_irq_save(flags); + pending = local_softirq_pending(); + /* Switch to interrupt stack */ + if (pending) + call_softirq(); + local_irq_restore(flags); +} +EXPORT_SYMBOL(do_softirq); -- cgit v1.2.3-18-g5258 From 07291d431ce6c6660c35dd4e5e980a406dc3629a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 28 Jul 2005 21:15:50 -0700 Subject: [PATCH] x86_64: Fix SRAT handling on non dual core systems Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0c7ec94004a..0aa526298e9 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -765,8 +765,6 @@ static void __init amd_detect_cmp(struct cpuinfo_x86 *c) int cpu = smp_processor_id(); int node = 0; unsigned bits; - if (c->x86_num_cores == 1) - return; bits = 0; while ((1 << bits) < c->x86_num_cores) -- cgit v1.2.3-18-g5258 From ef6e525393dbbab7e1dc42594d35a1dfd688d1d7 Mon Sep 17 00:00:00 2001 From: Nishanth Aravamudan Date: Thu, 28 Jul 2005 21:15:53 -0700 Subject: [PATCH] x86_64: Use msleep in smpboot.c Replace schedule_timeout() with msleep() to guarantee the task delays as expected. Signed-off-by: Nishanth Aravamudan Signed-off-by: Domen Puncer Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 001c2d57248..6d23354443c 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -1183,8 +1183,7 @@ void __cpu_die(unsigned int cpu) printk ("CPU %d is now offline\n", cpu); return; } - current->state = TASK_UNINTERRUPTIBLE; - schedule_timeout(HZ/10); + msleep(100); } printk(KERN_ERR "CPU %u didn't die...\n", cpu); } -- cgit v1.2.3-18-g5258 From 36c4fd23cc06f81d68ee968c4c1bf1cebb3dcea5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 29 Jul 2005 13:02:09 -0600 Subject: [PATCH] x86_64 machine_kexec: Cleanup inline assembly. In an uncensored copy of code from i386 to x86_64 I wound up with inline assembly with the wrong constraints. Use input constraints instead of output constraints. So I know the assembler will do the right thing specify the size of the operand lidtq and lgdtq instead of just lidt and lgdt. Make load_segments use an input constraint, and delete the macro fun. Without having to reload %cs like I do on i386 this code is noticeably simpler. Signed-off-by: Eric W. Biederman Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/machine_kexec.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 60d1eff4156..717f7db4b53 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -122,45 +122,43 @@ static int init_pgtable(struct kimage *image, unsigned long start_pgtable) static void set_idt(void *newidt, u16 limit) { - unsigned char curidt[10]; + struct desc_ptr curidt; /* x86-64 supports unaliged loads & stores */ - (*(u16 *)(curidt)) = limit; - (*(u64 *)(curidt +2)) = (unsigned long)(newidt); + curidt.size = limit; + curidt.address = (unsigned long)newidt; __asm__ __volatile__ ( - "lidt %0\n" - : "=m" (curidt) + "lidtq %0\n" + : : "m" (curidt) ); }; static void set_gdt(void *newgdt, u16 limit) { - unsigned char curgdt[10]; + struct desc_ptr curgdt; /* x86-64 supports unaligned loads & stores */ - (*(u16 *)(curgdt)) = limit; - (*(u64 *)(curgdt +2)) = (unsigned long)(newgdt); + curgdt.size = limit; + curgdt.address = (unsigned long)newgdt; __asm__ __volatile__ ( - "lgdt %0\n" - : "=m" (curgdt) + "lgdtq %0\n" + : : "m" (curgdt) ); }; static void load_segments(void) { __asm__ __volatile__ ( - "\tmovl $"STR(__KERNEL_DS)",%eax\n" - "\tmovl %eax,%ds\n" - "\tmovl %eax,%es\n" - "\tmovl %eax,%ss\n" - "\tmovl %eax,%fs\n" - "\tmovl %eax,%gs\n" + "\tmovl %0,%%ds\n" + "\tmovl %0,%%es\n" + "\tmovl %0,%%ss\n" + "\tmovl %0,%%fs\n" + "\tmovl %0,%%gs\n" + : : "a" (__KERNEL_DS) ); -#undef STR -#undef __STR } typedef NORET_TYPE void (*relocate_new_kernel_t)(unsigned long indirection_page, -- cgit v1.2.3-18-g5258 From 8bf2755664bfe03a414e7ec02d9f16a3d5beb60d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 29 Jul 2005 13:25:28 -0600 Subject: [PATCH] x86_64 machine_kexec: Use standard pagetable helpers Use the standard hardware page table manipulation macros. This is possible now that linux works with all 4 levels of the page tables. Signed-off-by: Eric W. Biederman Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/machine_kexec.c | 67 ++++++++++++++------------------------ 1 file changed, 25 insertions(+), 42 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/machine_kexec.c b/arch/x86_64/kernel/machine_kexec.c index 717f7db4b53..89fab51e20f 100644 --- a/arch/x86_64/kernel/machine_kexec.c +++ b/arch/x86_64/kernel/machine_kexec.c @@ -8,43 +8,26 @@ #include #include -#include #include #include -#include #include -#include #include #include #include -#include -#include -#include - -#define LEVEL0_SIZE (1UL << 12UL) -#define LEVEL1_SIZE (1UL << 21UL) -#define LEVEL2_SIZE (1UL << 30UL) -#define LEVEL3_SIZE (1UL << 39UL) -#define LEVEL4_SIZE (1UL << 48UL) - -#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE) -#define L2_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) -#define L3_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) - -static void init_level2_page(u64 *level2p, unsigned long addr) + +static void init_level2_page(pmd_t *level2p, unsigned long addr) { unsigned long end_addr; addr &= PAGE_MASK; - end_addr = addr + LEVEL2_SIZE; + end_addr = addr + PUD_SIZE; while (addr < end_addr) { - *(level2p++) = addr | L1_ATTR; - addr += LEVEL1_SIZE; + set_pmd(level2p++, __pmd(addr | __PAGE_KERNEL_LARGE_EXEC)); + addr += PMD_SIZE; } } -static int init_level3_page(struct kimage *image, u64 *level3p, +static int init_level3_page(struct kimage *image, pud_t *level3p, unsigned long addr, unsigned long last_addr) { unsigned long end_addr; @@ -52,32 +35,32 @@ static int init_level3_page(struct kimage *image, u64 *level3p, result = 0; addr &= PAGE_MASK; - end_addr = addr + LEVEL3_SIZE; + end_addr = addr + PGDIR_SIZE; while ((addr < last_addr) && (addr < end_addr)) { struct page *page; - u64 *level2p; + pmd_t *level2p; page = kimage_alloc_control_pages(image, 0); if (!page) { result = -ENOMEM; goto out; } - level2p = (u64 *)page_address(page); + level2p = (pmd_t *)page_address(page); init_level2_page(level2p, addr); - *(level3p++) = __pa(level2p) | L2_ATTR; - addr += LEVEL2_SIZE; + set_pud(level3p++, __pud(__pa(level2p) | _KERNPG_TABLE)); + addr += PUD_SIZE; } /* clear the unused entries */ while (addr < end_addr) { - *(level3p++) = 0; - addr += LEVEL2_SIZE; + pud_clear(level3p++); + addr += PUD_SIZE; } out: return result; } -static int init_level4_page(struct kimage *image, u64 *level4p, +static int init_level4_page(struct kimage *image, pgd_t *level4p, unsigned long addr, unsigned long last_addr) { unsigned long end_addr; @@ -85,28 +68,28 @@ static int init_level4_page(struct kimage *image, u64 *level4p, result = 0; addr &= PAGE_MASK; - end_addr = addr + LEVEL4_SIZE; + end_addr = addr + (PTRS_PER_PGD * PGDIR_SIZE); while ((addr < last_addr) && (addr < end_addr)) { struct page *page; - u64 *level3p; + pud_t *level3p; page = kimage_alloc_control_pages(image, 0); if (!page) { result = -ENOMEM; goto out; } - level3p = (u64 *)page_address(page); + level3p = (pud_t *)page_address(page); result = init_level3_page(image, level3p, addr, last_addr); if (result) { goto out; } - *(level4p++) = __pa(level3p) | L3_ATTR; - addr += LEVEL3_SIZE; + set_pgd(level4p++, __pgd(__pa(level3p) | _KERNPG_TABLE)); + addr += PGDIR_SIZE; } /* clear the unused entries */ while (addr < end_addr) { - *(level4p++) = 0; - addr += LEVEL3_SIZE; + pgd_clear(level4p++); + addr += PGDIR_SIZE; } out: return result; @@ -115,8 +98,8 @@ out: static int init_pgtable(struct kimage *image, unsigned long start_pgtable) { - u64 *level4p; - level4p = (u64 *)__va(start_pgtable); + pgd_t *level4p; + level4p = (pgd_t *)__va(start_pgtable); return init_level4_page(image, level4p, 0, end_pfn << PAGE_SHIFT); } @@ -176,7 +159,7 @@ int machine_kexec_prepare(struct kimage *image) /* Calculate the offsets */ start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; - control_code_buffer = start_pgtable + 4096UL; + control_code_buffer = start_pgtable + PAGE_SIZE; /* Setup the identity mapped 64bit page table */ result = init_pgtable(image, start_pgtable); @@ -212,7 +195,7 @@ NORET_TYPE void machine_kexec(struct kimage *image) /* Calculate the offsets */ page_list = image->head; start_pgtable = page_to_pfn(image->control_code_page) << PAGE_SHIFT; - control_code_buffer = start_pgtable + 4096UL; + control_code_buffer = start_pgtable + PAGE_SIZE; /* Set the low half of the page table to my identity mapped * page table for kexec. Leave the high half pointing at the -- cgit v1.2.3-18-g5258 From 3d483f47579461a4715db33c68ef8752e5a97a2d Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 29 Jul 2005 14:03:29 -0700 Subject: [PATCH] Fix sync_tsc hang sync_tsc was using smp_call_function to ask the boot processor to report it's tsc value. smp_call_function performs an IPI_send_allbutself which is a broadcast ipi. There is a window during processor startup during which the target cpu has started and before it has initialized it's interrupt vectors so it can properly process an interrupt. Receveing an interrupt during that window will triple fault the cpu and do other nasty things. Why cli does not protect us from that is beyond me. The simple fix is to match ia64 and provide a smp_call_function_single. Which avoids the broadcast and is more efficient. This certainly fixes the problem of getting stuck on boot which was very easy to trigger on my SMP Hyperthreaded Xeon, and I think it fixes it for the right reasons. Minor changes by AK Signed-off-by: Eric W. Biederman Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/smp.c | 63 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86_64/kernel/smpboot.c | 21 ++++++++------- 2 files changed, 75 insertions(+), 9 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c index 6ee74db5230..e5958220d6b 100644 --- a/arch/x86_64/kernel/smp.c +++ b/arch/x86_64/kernel/smp.c @@ -292,6 +292,69 @@ void unlock_ipi_call_lock(void) spin_unlock_irq(&call_lock); } +/* + * this function sends a 'generic call function' IPI to one other CPU + * in the system. + */ +static void __smp_call_function_single (int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + struct call_data_struct data; + int cpus = 1; + + data.func = func; + data.info = info; + atomic_set(&data.started, 0); + data.wait = wait; + if (wait) + atomic_set(&data.finished, 0); + + call_data = &data; + wmb(); + /* Send a message to all other CPUs and wait for them to respond */ + send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); + + /* Wait for response */ + while (atomic_read(&data.started) != cpus) + cpu_relax(); + + if (!wait) + return; + + while (atomic_read(&data.finished) != cpus) + cpu_relax(); +} + +/* + * smp_call_function_single - Run a function on another CPU + * @func: The function to run. This must be fast and non-blocking. + * @info: An arbitrary pointer to pass to the function. + * @nonatomic: Currently unused. + * @wait: If true, wait until function has completed on other CPUs. + * + * Retrurns 0 on success, else a negative status code. + * + * Does not return until the remote CPU is nearly ready to execute + * or is or has executed. + */ + +int smp_call_function_single (int cpu, void (*func) (void *info), void *info, + int nonatomic, int wait) +{ + /* prevent preemption and reschedule on another processor */ + int me = get_cpu(); + if (cpu == me) { + WARN_ON(1); + put_cpu(); + return -EBUSY; + } + spin_lock_bh(&call_lock); + __smp_call_function_single(cpu, func, info, nonatomic, wait); + spin_unlock_bh(&call_lock); + put_cpu(); + return 0; +} + /* * this function sends a 'generic call function' IPI to all other CPUs * in the system. diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c index 6d23354443c..6e4807d64d4 100644 --- a/arch/x86_64/kernel/smpboot.c +++ b/arch/x86_64/kernel/smpboot.c @@ -211,9 +211,6 @@ static __cpuinit void sync_master(void *arg) { unsigned long flags, i; - if (smp_processor_id() != 0) - return; - go[MASTER] = 0; local_irq_save(flags); @@ -262,7 +259,7 @@ get_delta(long *rt, long *master) return tcenter - best_tm; } -static __cpuinit void sync_tsc(void) +static __cpuinit void sync_tsc(unsigned int master) { int i, done = 0; long delta, adj, adjust_latency = 0; @@ -276,9 +273,17 @@ static __cpuinit void sync_tsc(void) } t[NUM_ROUNDS] __cpuinitdata; #endif + printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", + smp_processor_id(), master); + go[MASTER] = 1; - smp_call_function(sync_master, NULL, 1, 0); + /* It is dangerous to broadcast IPI as cpus are coming up, + * as they may not be ready to accept them. So since + * we only need to send the ipi to the boot cpu direct + * the message, and avoid the race. + */ + smp_call_function_single(master, sync_master, NULL, 1, 0); while (go[MASTER]) /* wait for master to be ready */ no_cpu_relax(); @@ -322,16 +327,14 @@ static __cpuinit void sync_tsc(void) printk(KERN_INFO "CPU %d: synchronized TSC with CPU %u (last diff %ld cycles, " "maxerr %lu cycles)\n", - smp_processor_id(), boot_cpu_id, delta, rt); + smp_processor_id(), master, delta, rt); } static void __cpuinit tsc_sync_wait(void) { if (notscsync || !cpu_has_tsc) return; - printk(KERN_INFO "CPU %d: Syncing TSC to CPU %u.\n", smp_processor_id(), - boot_cpu_id); - sync_tsc(); + sync_tsc(boot_cpu_id); } static __init int notscsync_setup(char *s) -- cgit v1.2.3-18-g5258 From 6a1caa21d66bcb9ba9892435a0a54fc32cd9eeab Mon Sep 17 00:00:00 2001 From: "Natalie.Protasevich@unisys.com" Date: Sat, 30 Jul 2005 11:25:32 -0700 Subject: [PATCH] x86_64: avoid wasting IRQs patch update The patch adds boundary check for the MAX_GSI_NUM. Same as the update for i386, the patch addresses a problem with ACPI SCI IRQ. The patch corrects the code such that SCI IRQ is skipped and duplicate entry is avoided. The VIA chipset uses 4-bit IRQ register for internal interrupt routing, and therefore cannot handle IRQ numbers assigned to its devices. The patch corrects this problem by allowing PCI IRQs below 16. Signed-off-by: Natalie Protasevich Acked-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mpparse.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mpparse.c b/arch/x86_64/kernel/mpparse.c index 08abf9f5b15..79c362d03e2 100644 --- a/arch/x86_64/kernel/mpparse.c +++ b/arch/x86_64/kernel/mpparse.c @@ -970,8 +970,21 @@ int mp_register_gsi(u32 gsi, int edge_level, int active_high_low) * due to unused I/O APIC pins. */ int irq = gsi; - gsi = pci_irq++; - gsi_to_irq[irq] = gsi; + if (gsi < MAX_GSI_NUM) { + if (gsi > 15) + gsi = pci_irq++; +#ifdef CONFIG_ACPI_BUS + /* + * Don't assign IRQ used by ACPI SCI + */ + if (gsi == acpi_fadt.sci_int) + gsi = pci_irq++; +#endif + gsi_to_irq[irq] = gsi; + } else { + printk(KERN_ERR "GSI %u is too high\n", gsi); + return gsi; + } } io_apic_set_pci_routing(ioapic, ioapic_pin, gsi, -- cgit v1.2.3-18-g5258 From 0d317fb72fe3cf0f611608cf3a3015bbe6cd2a66 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sat, 6 Aug 2005 13:47:36 -0600 Subject: [PATCH] x86_64 bootmem: sparse_mem/kexec merge bug. When the sparse mem changes and the kexec changes were merged into setup.c they came in, in the wrong order. This patch changes the order so we don't run sparse_init which uses the bootmem allocator until we all of the reserve_bootmem calls has been made. Signed-off-by: Eric W. Biederman Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/setup.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c index 0aa526298e9..116a491e296 100644 --- a/arch/x86_64/kernel/setup.c +++ b/arch/x86_64/kernel/setup.c @@ -645,15 +645,15 @@ void __init setup_arch(char **cmdline_p) } } #endif - - sparse_init(); - #ifdef CONFIG_KEXEC if (crashk_res.start != crashk_res.end) { reserve_bootmem(crashk_res.start, crashk_res.end - crashk_res.start + 1); } #endif + + sparse_init(); + paging_init(); check_ioapic(); -- cgit v1.2.3-18-g5258 From d5172f263f76ca6d588f533c0989b22521fcbac2 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Sun, 7 Aug 2005 09:42:07 -0700 Subject: [PATCH] x86_64: ignore machine checks from boot time Don't log machine check events left over from boot. Too many BIOSes leave bogus events in there. This unfortunately also makes it impossible to log events that caused a reboot. For people with non broken BIOS there is mce=bootlog Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86_64/kernel/mce.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86_64/kernel') diff --git a/arch/x86_64/kernel/mce.c b/arch/x86_64/kernel/mce.c index 3b267c91bb0..8aa56736cde 100644 --- a/arch/x86_64/kernel/mce.c +++ b/arch/x86_64/kernel/mce.c @@ -36,6 +36,7 @@ static unsigned long bank[NR_BANKS] = { [0 ... NR_BANKS-1] = ~0UL }; static unsigned long console_logged; static int notify_user; static int rip_msr; +static int mce_bootlog; /* * Lockless MCE logging infrastructure. @@ -197,10 +198,11 @@ void do_machine_check(struct pt_regs * regs, long error_code) rdmsrl(MSR_IA32_MC0_ADDR + i*4, m.addr); mce_get_rip(&m, regs); - if (error_code != -1) + if (error_code >= 0) rdtscll(m.tsc); wrmsrl(MSR_IA32_MC0_STATUS + i*4, 0); - mce_log(&m); + if (error_code != -2) + mce_log(&m); /* Did this bank cause the exception? */ /* Assume that the bank with uncorrectable errors did it, @@ -315,7 +317,7 @@ static void mce_init(void *dummy) /* Log the machine checks left over from the previous reset. This also clears all registers */ - do_machine_check(NULL, -1); + do_machine_check(NULL, mce_bootlog ? -1 : -2); set_in_cr4(X86_CR4_MCE); @@ -476,11 +478,17 @@ static int __init mcheck_disable(char *str) } /* mce=off disables machine check. Note you can reenable it later - using sysfs */ + using sysfs. + mce=bootlog Log MCEs from before booting. Disabled by default to work + around buggy BIOS that leave bogus MCEs. */ static int __init mcheck_enable(char *str) { + if (*str == '=') + str++; if (!strcmp(str, "off")) mce_dont_init = 1; + else if (!strcmp(str, "bootlog")) + mce_bootlog = 1; else printk("mce= argument %s ignored. Please use /sys", str); return 0; -- cgit v1.2.3-18-g5258