diff options
Diffstat (limited to 'arch/powerpc/kernel/smp.c')
| -rw-r--r-- | arch/powerpc/kernel/smp.c | 365 |
1 files changed, 210 insertions, 155 deletions
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 46695febc09..1007fb802e6 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -35,6 +35,8 @@ #include <asm/ptrace.h> #include <linux/atomic.h> #include <asm/irq.h> +#include <asm/hw_irq.h> +#include <asm/kvm_ppc.h> #include <asm/page.h> #include <asm/pgtable.h> #include <asm/prom.h> @@ -43,12 +45,13 @@ #include <asm/machdep.h> #include <asm/cputhreads.h> #include <asm/cputable.h> -#include <asm/system.h> #include <asm/mpic.h> #include <asm/vdso_datapage.h> #ifdef CONFIG_PPC64 #include <asm/paca.h> #endif +#include <asm/vdso.h> +#include <asm/debug.h> #ifdef DEBUG #include <asm/udbg.h> @@ -57,27 +60,9 @@ #define DBG(fmt...) #endif - -/* Store all idle threads, this can be reused instead of creating -* a new thread. Also avoids complicated thread destroy functionality -* for idle threads. -*/ #ifdef CONFIG_HOTPLUG_CPU -/* - * Needed only for CONFIG_HOTPLUG_CPU because __cpuinitdata is - * removed after init for !CONFIG_HOTPLUG_CPU. - */ -static DEFINE_PER_CPU(struct task_struct *, idle_thread_array); -#define get_idle_for_cpu(x) (per_cpu(idle_thread_array, x)) -#define set_idle_for_cpu(x, p) (per_cpu(idle_thread_array, x) = (p)) - /* State of each CPU during hotplug phases */ static DEFINE_PER_CPU(int, cpu_state) = { 0 }; - -#else -static struct task_struct *idle_thread_array[NR_CPUS] __cpuinitdata ; -#define get_idle_for_cpu(x) (idle_thread_array[(x)]) -#define set_idle_for_cpu(x, p) (idle_thread_array[(x)] = (p)) #endif struct thread_info *secondary_ti; @@ -98,8 +83,30 @@ int smt_enabled_at_boot = 1; static void (*crash_ipi_function_ptr)(struct pt_regs *) = NULL; +/* + * Returns 1 if the specified cpu should be brought up during boot. + * Used to inhibit booting threads if they've been disabled or + * limited on the command line + */ +int smp_generic_cpu_bootable(unsigned int nr) +{ + /* Special case - we inhibit secondary thread startup + * during boot if the user requests it. + */ + if (system_state == SYSTEM_BOOTING && cpu_has_feature(CPU_FTR_SMT)) { + if (!smt_enabled_at_boot && cpu_thread_in_core(nr) != 0) + return 0; + if (smt_enabled_at_boot + && cpu_thread_in_core(nr) >= smt_enabled_at_boot) + return 0; + } + + return 1; +} + + #ifdef CONFIG_PPC64 -int __devinit smp_generic_kick_cpu(int nr) +int smp_generic_kick_cpu(int nr) { BUG_ON(nr < 0 || nr >= NR_CPUS); @@ -119,7 +126,7 @@ int __devinit smp_generic_kick_cpu(int nr) * Ok it's not there, so it might be soft-unplugged, let's * try to bring it back */ - per_cpu(cpu_state, nr) = CPU_UP_PREPARE; + generic_set_cpu_up(nr); smp_wmb(); smp_send_reschedule(nr); #endif /* CONFIG_HOTPLUG_CPU */ @@ -140,9 +147,9 @@ static irqreturn_t reschedule_action(int irq, void *data) return IRQ_HANDLED; } -static irqreturn_t call_function_single_action(int irq, void *data) +static irqreturn_t tick_broadcast_ipi_action(int irq, void *data) { - generic_smp_call_function_single_interrupt(); + tick_broadcast_ipi_handler(); return IRQ_HANDLED; } @@ -163,14 +170,14 @@ static irqreturn_t debug_ipi_action(int irq, void *data) static irq_handler_t smp_ipi_action[] = { [PPC_MSG_CALL_FUNCTION] = call_function_action, [PPC_MSG_RESCHEDULE] = reschedule_action, - [PPC_MSG_CALL_FUNC_SINGLE] = call_function_single_action, + [PPC_MSG_TICK_BROADCAST] = tick_broadcast_ipi_action, [PPC_MSG_DEBUGGER_BREAK] = debug_ipi_action, }; const char *smp_ipi_name[] = { [PPC_MSG_CALL_FUNCTION] = "ipi call function", [PPC_MSG_RESCHEDULE] = "ipi reschedule", - [PPC_MSG_CALL_FUNC_SINGLE] = "ipi call function single", + [PPC_MSG_TICK_BROADCAST] = "ipi tick-broadcast", [PPC_MSG_DEBUGGER_BREAK] = "ipi debugger", }; @@ -188,8 +195,8 @@ int smp_request_message_ipi(int virq, int msg) } #endif err = request_irq(virq, smp_ipi_action[msg], - IRQF_PERCPU | IRQF_NO_THREAD, - smp_ipi_name[msg], 0); + IRQF_PERCPU | IRQF_NO_THREAD | IRQF_NO_SUSPEND, + smp_ipi_name[msg], NULL); WARN(err < 0, "unable to request_irq %d for %s (rc %d)\n", virq, smp_ipi_name[msg], err); @@ -215,11 +222,24 @@ void smp_muxed_ipi_message_pass(int cpu, int msg) struct cpu_messages *info = &per_cpu(ipi_message, cpu); char *message = (char *)&info->messages; + /* + * Order previous accesses before accesses in the IPI handler. + */ + smp_mb(); message[msg] = 1; - mb(); + /* + * cause_ipi functions are required to include a full barrier + * before doing whatever causes the IPI. + */ smp_ops->cause_ipi(cpu, info->data); } +#ifdef __BIG_ENDIAN__ +#define IPI_MESSAGE(A) (1 << (24 - 8 * (A))) +#else +#define IPI_MESSAGE(A) (1 << (8 * (A))) +#endif + irqreturn_t smp_ipi_demux(void) { struct cpu_messages *info = &__get_cpu_var(ipi_message); @@ -228,20 +248,15 @@ irqreturn_t smp_ipi_demux(void) mb(); /* order any irq clear */ do { - all = xchg_local(&info->messages, 0); - -#ifdef __BIG_ENDIAN - if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNCTION))) + all = xchg(&info->messages, 0); + if (all & IPI_MESSAGE(PPC_MSG_CALL_FUNCTION)) generic_smp_call_function_interrupt(); - if (all & (1 << (24 - 8 * PPC_MSG_RESCHEDULE))) + if (all & IPI_MESSAGE(PPC_MSG_RESCHEDULE)) scheduler_ipi(); - if (all & (1 << (24 - 8 * PPC_MSG_CALL_FUNC_SINGLE))) - generic_smp_call_function_single_interrupt(); - if (all & (1 << (24 - 8 * PPC_MSG_DEBUGGER_BREAK))) + if (all & IPI_MESSAGE(PPC_MSG_TICK_BROADCAST)) + tick_broadcast_ipi_handler(); + if (all & IPI_MESSAGE(PPC_MSG_DEBUGGER_BREAK)) debug_ipi_action(0, NULL); -#else -#error Unsupported ENDIAN -#endif } while (info->messages); return IRQ_HANDLED; @@ -267,7 +282,7 @@ EXPORT_SYMBOL_GPL(smp_send_reschedule); void arch_send_call_function_single_ipi(int cpu) { - do_message_pass(cpu, PPC_MSG_CALL_FUNC_SINGLE); + do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) @@ -278,6 +293,16 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) do_message_pass(cpu, PPC_MSG_CALL_FUNCTION); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) + do_message_pass(cpu, PPC_MSG_TICK_BROADCAST); +} +#endif + #if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC) void smp_send_debugger_break(void) { @@ -321,7 +346,7 @@ void smp_send_stop(void) struct thread_info *current_set[NR_CPUS]; -static void __devinit smp_store_cpu_info(int id) +static void smp_store_cpu_info(int id) { per_cpu(cpu_pvr, id) = mfspr(SPRN_PVR); #ifdef CONFIG_PPC_FSL_BOOK3E @@ -356,21 +381,17 @@ void __init smp_prepare_cpus(unsigned int max_cpus) cpumask_set_cpu(boot_cpuid, cpu_sibling_mask(boot_cpuid)); cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid)); - if (smp_ops) - if (smp_ops->probe) - max_cpus = smp_ops->probe(); - else - max_cpus = NR_CPUS; - else - max_cpus = 1; + if (smp_ops && smp_ops->probe) + smp_ops->probe(); } -void __devinit smp_prepare_boot_cpu(void) +void smp_prepare_boot_cpu(void) { BUG_ON(smp_processor_id() != boot_cpuid); #ifdef CONFIG_PPC64 paca[boot_cpuid].__current = current; #endif + set_numa_node(numa_cpu_lookup_table[boot_cpuid]); current_set[boot_cpuid] = task_thread_info(current); } @@ -423,79 +444,60 @@ void generic_set_cpu_dead(unsigned int cpu) per_cpu(cpu_state, cpu) = CPU_DEAD; } +/* + * The cpu_state should be set to CPU_UP_PREPARE in kick_cpu(), otherwise + * the cpu_state is always CPU_DEAD after calling generic_set_cpu_dead(), + * which makes the delay in generic_cpu_die() not happen. + */ +void generic_set_cpu_up(unsigned int cpu) +{ + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; +} + int generic_check_cpu_restart(unsigned int cpu) { return per_cpu(cpu_state, cpu) == CPU_UP_PREPARE; } -#endif -struct create_idle { - struct work_struct work; - struct task_struct *idle; - struct completion done; - int cpu; -}; - -static void __cpuinit do_fork_idle(struct work_struct *work) +static bool secondaries_inhibited(void) { - struct create_idle *c_idle = - container_of(work, struct create_idle, work); - - c_idle->idle = fork_idle(c_idle->cpu); - complete(&c_idle->done); + return kvm_hv_mode_active(); } -static int __cpuinit create_idle(unsigned int cpu) -{ - struct thread_info *ti; - struct create_idle c_idle = { - .cpu = cpu, - .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), - }; - INIT_WORK_ONSTACK(&c_idle.work, do_fork_idle); +#else /* HOTPLUG_CPU */ - c_idle.idle = get_idle_for_cpu(cpu); +#define secondaries_inhibited() 0 - /* We can't use kernel_thread since we must avoid to - * reschedule the child. We use a workqueue because - * we want to fork from a kernel thread, not whatever - * userspace process happens to be trying to online us. - */ - if (!c_idle.idle) { - schedule_work(&c_idle.work); - wait_for_completion(&c_idle.done); - } else - init_idle(c_idle.idle, cpu); - if (IS_ERR(c_idle.idle)) { - pr_err("Failed fork for CPU %u: %li", cpu, PTR_ERR(c_idle.idle)); - return PTR_ERR(c_idle.idle); - } - ti = task_thread_info(c_idle.idle); +#endif + +static void cpu_idle_thread_init(unsigned int cpu, struct task_struct *idle) +{ + struct thread_info *ti = task_thread_info(idle); #ifdef CONFIG_PPC64 - paca[cpu].__current = c_idle.idle; + paca[cpu].__current = idle; paca[cpu].kstack = (unsigned long)ti + THREAD_SIZE - STACK_FRAME_OVERHEAD; #endif ti->cpu = cpu; - current_set[cpu] = ti; - - return 0; + secondary_ti = current_set[cpu] = ti; } -int __cpuinit __cpu_up(unsigned int cpu) +int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int rc, c; + /* + * Don't allow secondary threads to come online if inhibited + */ + if (threads_per_core > 1 && secondaries_inhibited() && + cpu_thread_in_subcore(cpu)) + return -EBUSY; + if (smp_ops == NULL || (smp_ops->cpu_bootable && !smp_ops->cpu_bootable(cpu))) return -EINVAL; - /* Make sure we have an idle thread */ - rc = create_idle(cpu); - if (rc) - return rc; - - secondary_ti = current_set[cpu]; + cpu_idle_thread_init(cpu, tidle); /* Make sure callin-map entry is 0 (can be leftover a CPU * hotplug @@ -557,7 +559,7 @@ int __cpuinit __cpu_up(unsigned int cpu) int cpu_to_core_id(int cpu) { struct device_node *np; - const int *reg; + const __be32 *reg; int id = -1; np = of_get_cpu_node(cpu, NULL); @@ -568,7 +570,7 @@ int cpu_to_core_id(int cpu) if (!reg) goto out; - id = *reg; + id = be32_to_cpup(reg); out: of_node_put(np); return id; @@ -587,6 +589,33 @@ int cpu_first_thread_of_core(int core) } EXPORT_SYMBOL_GPL(cpu_first_thread_of_core); +static void traverse_siblings_chip_id(int cpu, bool add, int chipid) +{ + const struct cpumask *mask; + struct device_node *np; + int i, plen; + const __be32 *prop; + + mask = add ? cpu_online_mask : cpu_present_mask; + for_each_cpu(i, mask) { + np = of_get_cpu_node(i, NULL); + if (!np) + continue; + prop = of_get_property(np, "ibm,chip-id", &plen); + if (prop && plen == sizeof(int) && + of_read_number(prop, 1) == chipid) { + if (add) { + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); + } else { + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); + } + } + of_node_put(np); + } +} + /* Must be called when no change can occur to cpu_present_mask, * i.e. during cpu online or offline. */ @@ -609,11 +638,51 @@ static struct device_node *cpu_to_l2cache(int cpu) return cache; } +static void traverse_core_siblings(int cpu, bool add) +{ + struct device_node *l2_cache, *np; + const struct cpumask *mask; + int i, chip, plen; + const __be32 *prop; + + /* First see if we have ibm,chip-id properties in cpu nodes */ + np = of_get_cpu_node(cpu, NULL); + if (np) { + chip = -1; + prop = of_get_property(np, "ibm,chip-id", &plen); + if (prop && plen == sizeof(int)) + chip = of_read_number(prop, 1); + of_node_put(np); + if (chip >= 0) { + traverse_siblings_chip_id(cpu, add, chip); + return; + } + } + + l2_cache = cpu_to_l2cache(cpu); + mask = add ? cpu_online_mask : cpu_present_mask; + for_each_cpu(i, mask) { + np = cpu_to_l2cache(i); + if (!np) + continue; + if (np == l2_cache) { + if (add) { + cpumask_set_cpu(cpu, cpu_core_mask(i)); + cpumask_set_cpu(i, cpu_core_mask(cpu)); + } else { + cpumask_clear_cpu(cpu, cpu_core_mask(i)); + cpumask_clear_cpu(i, cpu_core_mask(cpu)); + } + } + of_node_put(np); + } + of_node_put(l2_cache); +} + /* Activate a secondary processor. */ -void __devinit start_secondary(void *unused) +void start_secondary(void *unused) { unsigned int cpu = smp_processor_id(); - struct device_node *l2_cache; int i, base; atomic_inc(&init_mm.mm_count); @@ -634,14 +703,13 @@ void __devinit start_secondary(void *unused) #ifdef CONFIG_PPC64 if (system_state == SYSTEM_RUNNING) vdso_data->processorCount++; + + vdso_getcpu_init(); #endif - ipi_call_lock(); - notify_cpu_starting(cpu); - set_cpu_online(cpu, true); /* Update sibling maps */ base = cpu_first_thread_sibling(cpu); for (i = 0; i < threads_per_core; i++) { - if (cpu_is_offline(base + i)) + if (cpu_is_offline(base + i) && (cpu != base + i)) continue; cpumask_set_cpu(cpu, cpu_sibling_mask(base + i)); cpumask_set_cpu(base + i, cpu_sibling_mask(cpu)); @@ -653,23 +721,21 @@ void __devinit start_secondary(void *unused) cpumask_set_cpu(cpu, cpu_core_mask(base + i)); cpumask_set_cpu(base + i, cpu_core_mask(cpu)); } - l2_cache = cpu_to_l2cache(cpu); - for_each_online_cpu(i) { - struct device_node *np = cpu_to_l2cache(i); - if (!np) - continue; - if (np == l2_cache) { - cpumask_set_cpu(cpu, cpu_core_mask(i)); - cpumask_set_cpu(i, cpu_core_mask(cpu)); - } - of_node_put(np); - } - of_node_put(l2_cache); - ipi_call_unlock(); + traverse_core_siblings(cpu, true); + + /* + * numa_node_id() works after this. + */ + set_numa_node(numa_cpu_lookup_table[cpu]); + set_numa_mem(local_memory_node(numa_cpu_lookup_table[cpu])); + + smp_wmb(); + notify_cpu_starting(cpu); + set_cpu_online(cpu, true); local_irq_enable(); - cpu_idle(); + cpu_startup_entry(CPUHP_ONLINE); BUG(); } @@ -679,6 +745,28 @@ int setup_profiling_timer(unsigned int multiplier) return 0; } +#ifdef CONFIG_SCHED_SMT +/* cpumask of CPUs with asymetric SMT dependancy */ +static int powerpc_smt_flags(void) +{ + int flags = SD_SHARE_CPUCAPACITY | SD_SHARE_PKG_RESOURCES; + + if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { + printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); + flags |= SD_ASYM_PACKING; + } + return flags; +} +#endif + +static struct sched_domain_topology_level powerpc_topology[] = { +#ifdef CONFIG_SCHED_SMT + { cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + void __init smp_cpus_done(unsigned int max_cpus) { cpumask_var_t old_mask; @@ -703,21 +791,13 @@ void __init smp_cpus_done(unsigned int max_cpus) dump_numa_cpu_topology(); -} + set_sched_topology(powerpc_topology); -int arch_sd_sibling_asym_packing(void) -{ - if (cpu_has_feature(CPU_FTR_ASYM_SMT)) { - printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n"); - return SD_ASYM_PACKING; - } - return 0; } #ifdef CONFIG_HOTPLUG_CPU int __cpu_disable(void) { - struct device_node *l2_cache; int cpu = smp_processor_id(); int base, i; int err; @@ -737,20 +817,7 @@ int __cpu_disable(void) cpumask_clear_cpu(cpu, cpu_core_mask(base + i)); cpumask_clear_cpu(base + i, cpu_core_mask(cpu)); } - - l2_cache = cpu_to_l2cache(cpu); - for_each_present_cpu(i) { - struct device_node *np = cpu_to_l2cache(i); - if (!np) - continue; - if (np == l2_cache) { - cpumask_clear_cpu(cpu, cpu_core_mask(i)); - cpumask_clear_cpu(i, cpu_core_mask(cpu)); - } - of_node_put(np); - } - of_node_put(l2_cache); - + traverse_core_siblings(cpu, false); return 0; } @@ -761,18 +828,6 @@ void __cpu_die(unsigned int cpu) smp_ops->cpu_die(cpu); } -static DEFINE_MUTEX(powerpc_cpu_hotplug_driver_mutex); - -void cpu_hotplug_driver_lock() -{ - mutex_lock(&powerpc_cpu_hotplug_driver_mutex); -} - -void cpu_hotplug_driver_unlock() -{ - mutex_unlock(&powerpc_cpu_hotplug_driver_mutex); -} - void cpu_die(void) { if (ppc_md.cpu_die) |
