diff options
Diffstat (limited to 'kernel/cpu.c')
| -rw-r--r-- | kernel/cpu.c | 300 |
1 files changed, 195 insertions, 105 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 2060c6e5702..a343bde710b 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -10,30 +10,42 @@ #include <linux/sched.h> #include <linux/unistd.h> #include <linux/cpu.h> +#include <linux/oom.h> +#include <linux/rcupdate.h> #include <linux/export.h> +#include <linux/bug.h> #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> #include <linux/gfp.h> #include <linux/suspend.h> +#include <linux/lockdep.h> +#include <trace/events/power.h> + +#include "smpboot.h" #ifdef CONFIG_SMP /* Serializes the updates to cpu_online_mask, cpu_present_mask */ static DEFINE_MUTEX(cpu_add_remove_lock); /* - * The following two API's must be used when attempting - * to serialize the updates to cpu_online_mask, cpu_present_mask. + * The following two APIs (cpu_maps_update_begin/done) must be used when + * attempting to serialize the updates to cpu_online_mask & cpu_present_mask. + * The APIs cpu_notifier_register_begin/done() must be used to protect CPU + * hotplug callback (un)registration performed using __register_cpu_notifier() + * or __unregister_cpu_notifier(). */ void cpu_maps_update_begin(void) { mutex_lock(&cpu_add_remove_lock); } +EXPORT_SYMBOL(cpu_notifier_register_begin); void cpu_maps_update_done(void) { mutex_unlock(&cpu_add_remove_lock); } +EXPORT_SYMBOL(cpu_notifier_register_done); static RAW_NOTIFIER_HEAD(cpu_chain); @@ -52,17 +64,30 @@ static struct { * an ongoing cpu hotplug operation. */ int refcount; + +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif } cpu_hotplug = { .active_writer = NULL, .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), .refcount = 0, +#ifdef CONFIG_DEBUG_LOCK_ALLOC + .dep_map = {.name = "cpu_hotplug.lock" }, +#endif }; +/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */ +#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map) +#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map) +#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map) + void get_online_cpus(void) { might_sleep(); if (cpu_hotplug.active_writer == current) return; + cpuhp_lock_acquire_read(); mutex_lock(&cpu_hotplug.lock); cpu_hotplug.refcount++; mutex_unlock(&cpu_hotplug.lock); @@ -75,9 +100,14 @@ void put_online_cpus(void) if (cpu_hotplug.active_writer == current) return; mutex_lock(&cpu_hotplug.lock); + + if (WARN_ON(!cpu_hotplug.refcount)) + cpu_hotplug.refcount++; /* try to fix things up */ + if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) wake_up_process(cpu_hotplug.active_writer); mutex_unlock(&cpu_hotplug.lock); + cpuhp_lock_release(); } EXPORT_SYMBOL_GPL(put_online_cpus); @@ -104,10 +134,11 @@ EXPORT_SYMBOL_GPL(put_online_cpus); * get_online_cpus() not an api which is called all that often. * */ -static void cpu_hotplug_begin(void) +void cpu_hotplug_begin(void) { cpu_hotplug.active_writer = current; + cpuhp_lock_acquire(); for (;;) { mutex_lock(&cpu_hotplug.lock); if (likely(!cpu_hotplug.refcount)) @@ -118,16 +149,35 @@ static void cpu_hotplug_begin(void) } } -static void cpu_hotplug_done(void) +void cpu_hotplug_done(void) { cpu_hotplug.active_writer = NULL; mutex_unlock(&cpu_hotplug.lock); + cpuhp_lock_release(); } -#else /* #if CONFIG_HOTPLUG_CPU */ -static void cpu_hotplug_begin(void) {} -static void cpu_hotplug_done(void) {} -#endif /* #else #if CONFIG_HOTPLUG_CPU */ +/* + * Wait for currently running CPU hotplug operations to complete (if any) and + * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects + * the 'cpu_hotplug_disabled' flag. The same lock is also acquired by the + * hotplug path before performing hotplug operations. So acquiring that lock + * guarantees mutual exclusion from any currently running hotplug operations. + */ +void cpu_hotplug_disable(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 1; + cpu_maps_update_done(); +} + +void cpu_hotplug_enable(void) +{ + cpu_maps_update_begin(); + cpu_hotplug_disabled = 0; + cpu_maps_update_done(); +} + +#endif /* CONFIG_HOTPLUG_CPU */ /* Need to know about CPUs going up/down? */ int __ref register_cpu_notifier(struct notifier_block *nb) @@ -139,6 +189,11 @@ int __ref register_cpu_notifier(struct notifier_block *nb) return ret; } +int __ref __register_cpu_notifier(struct notifier_block *nb) +{ + return raw_notifier_chain_register(&cpu_chain, nb); +} + static int __cpu_notify(unsigned long val, void *v, int nr_to_call, int *nr_calls) { @@ -162,6 +217,7 @@ static void cpu_notify_nofail(unsigned long val, void *v) BUG_ON(cpu_notify(val, v)); } EXPORT_SYMBOL(register_cpu_notifier); +EXPORT_SYMBOL(__register_cpu_notifier); void __ref unregister_cpu_notifier(struct notifier_block *nb) { @@ -171,16 +227,64 @@ void __ref unregister_cpu_notifier(struct notifier_block *nb) } EXPORT_SYMBOL(unregister_cpu_notifier); +void __ref __unregister_cpu_notifier(struct notifier_block *nb) +{ + raw_notifier_chain_unregister(&cpu_chain, nb); +} +EXPORT_SYMBOL(__unregister_cpu_notifier); + +/** + * clear_tasks_mm_cpumask - Safely clear tasks' mm_cpumask for a CPU + * @cpu: a CPU id + * + * This function walks all processes, finds a valid mm struct for each one and + * then clears a corresponding bit in mm's cpumask. While this all sounds + * trivial, there are various non-obvious corner cases, which this function + * tries to solve in a safe manner. + * + * Also note that the function uses a somewhat relaxed locking scheme, so it may + * be called only for an already offlined CPU. + */ +void clear_tasks_mm_cpumask(int cpu) +{ + struct task_struct *p; + + /* + * This function is called after the cpu is taken down and marked + * offline, so its not like new tasks will ever get this cpu set in + * their mm mask. -- Peter Zijlstra + * Thus, we may use rcu_read_lock() here, instead of grabbing + * full-fledged tasklist_lock. + */ + WARN_ON(cpu_online(cpu)); + rcu_read_lock(); + for_each_process(p) { + struct task_struct *t; + + /* + * Main thread might exit, but other threads may still have + * a valid mm. Find one. + */ + t = find_lock_task_mm(p); + if (!t) + continue; + cpumask_clear_cpu(cpu, mm_cpumask(t->mm)); + task_unlock(t); + } + rcu_read_unlock(); +} + static inline void check_for_tasks(int cpu) { struct task_struct *p; + cputime_t utime, stime; write_lock_irq(&tasklist_lock); for_each_process(p) { + task_cputime(p, &utime, &stime); if (task_cpu(p) == cpu && p->state == TASK_RUNNING && - (p->utime || p->stime)) - printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d " - "(state = %ld, flags = %x)\n", + (utime || stime)) + pr_warn("Task %s (pid = %d) is on cpu %d (state = %ld, flags = %x)\n", p->comm, task_pid_nr(p), cpu, p->state, p->flags); } @@ -204,6 +308,8 @@ static int __ref take_cpu_down(void *_param) return err; cpu_notify(CPU_DYING | param->mod, param->hcpu); + /* Park the stopper thread */ + kthread_park(current); return 0; } @@ -230,16 +336,37 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen) if (err) { nr_calls--; __cpu_notify(CPU_DOWN_FAILED | mod, hcpu, nr_calls, NULL); - printk("%s: attempt to take down CPU %u failed\n", - __func__, cpu); + pr_warn("%s: attempt to take down CPU %u failed\n", + __func__, cpu); goto out_release; } + /* + * By now we've cleared cpu_active_mask, wait for all preempt-disabled + * and RCU users of this state to go away such that all new such users + * will observe it. + * + * For CONFIG_PREEMPT we have preemptible RCU and its sync_rcu() might + * not imply sync_sched(), so explicitly call both. + * + * Do sync before park smpboot threads to take care the rcu boost case. + */ +#ifdef CONFIG_PREEMPT + synchronize_sched(); +#endif + synchronize_rcu(); + + smpboot_park_threads(cpu); + + /* + * So now all preempt/rcu users must observe !cpu_active(). + */ + err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu)); if (err) { /* CPU didn't die: tell everyone. Can't complain. */ + smpboot_unpark_threads(cpu); cpu_notify_nofail(CPU_DOWN_FAILED | mod, hcpu); - goto out_release; } BUG_ON(cpu_online(cpu)); @@ -290,81 +417,75 @@ EXPORT_SYMBOL(cpu_down); #endif /*CONFIG_HOTPLUG_CPU*/ /* Requires cpu_add_remove_lock to be held */ -static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen) +static int _cpu_up(unsigned int cpu, int tasks_frozen) { int ret, nr_calls = 0; void *hcpu = (void *)(long)cpu; unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0; - - if (cpu_online(cpu) || !cpu_present(cpu)) - return -EINVAL; + struct task_struct *idle; cpu_hotplug_begin(); + + if (cpu_online(cpu) || !cpu_present(cpu)) { + ret = -EINVAL; + goto out; + } + + idle = idle_thread_get(cpu); + if (IS_ERR(idle)) { + ret = PTR_ERR(idle); + goto out; + } + + ret = smpboot_create_threads(cpu); + if (ret) + goto out; + ret = __cpu_notify(CPU_UP_PREPARE | mod, hcpu, -1, &nr_calls); if (ret) { nr_calls--; - printk(KERN_WARNING "%s: attempt to bring up CPU %u failed\n", - __func__, cpu); + pr_warn("%s: attempt to bring up CPU %u failed\n", + __func__, cpu); goto out_notify; } /* Arch-specific enabling code. */ - ret = __cpu_up(cpu); + ret = __cpu_up(cpu, idle); if (ret != 0) goto out_notify; BUG_ON(!cpu_online(cpu)); + /* Wake the per cpu threads */ + smpboot_unpark_threads(cpu); + /* Now call notifier in preparation. */ cpu_notify(CPU_ONLINE | mod, hcpu); out_notify: if (ret != 0) __cpu_notify(CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL); +out: cpu_hotplug_done(); return ret; } -int __cpuinit cpu_up(unsigned int cpu) +int cpu_up(unsigned int cpu) { int err = 0; -#ifdef CONFIG_MEMORY_HOTPLUG - int nid; - pg_data_t *pgdat; -#endif - if (!cpu_possible(cpu)) { - printk(KERN_ERR "can't online cpu %d because it is not " - "configured as may-hotadd at boot time\n", cpu); + pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", + cpu); #if defined(CONFIG_IA64) - printk(KERN_ERR "please check additional_cpus= boot " - "parameter\n"); + pr_err("please check additional_cpus= boot parameter\n"); #endif return -EINVAL; } -#ifdef CONFIG_MEMORY_HOTPLUG - nid = cpu_to_node(cpu); - if (!node_online(nid)) { - err = mem_online_node(nid); - if (err) - return err; - } - - pgdat = NODE_DATA(nid); - if (!pgdat) { - printk(KERN_ERR - "Can't online cpu %d due to NULL pgdat\n", cpu); - return -ENOMEM; - } - - if (pgdat->node_zonelists->_zonerefs->zone == NULL) { - mutex_lock(&zonelists_mutex); - build_all_zonelists(NULL); - mutex_unlock(&zonelists_mutex); - } -#endif + err = try_online_node(cpu_to_node(cpu)); + if (err) + return err; cpu_maps_update_begin(); @@ -384,14 +505,6 @@ EXPORT_SYMBOL_GPL(cpu_up); #ifdef CONFIG_PM_SLEEP_SMP static cpumask_var_t frozen_cpus; -void __weak arch_disable_nonboot_cpus_begin(void) -{ -} - -void __weak arch_disable_nonboot_cpus_end(void) -{ -} - int disable_nonboot_cpus(void) { int cpu, first_cpu, error = 0; @@ -403,30 +516,28 @@ int disable_nonboot_cpus(void) * with the userspace trying to use the CPU hotplug at the same time */ cpumask_clear(frozen_cpus); - arch_disable_nonboot_cpus_begin(); - printk("Disabling non-boot CPUs ...\n"); + pr_info("Disabling non-boot CPUs ...\n"); for_each_online_cpu(cpu) { if (cpu == first_cpu) continue; + trace_suspend_resume(TPS("CPU_OFF"), cpu, true); error = _cpu_down(cpu, 1); + trace_suspend_resume(TPS("CPU_OFF"), cpu, false); if (!error) cpumask_set_cpu(cpu, frozen_cpus); else { - printk(KERN_ERR "Error taking CPU%d down: %d\n", - cpu, error); + pr_err("Error taking CPU%d down: %d\n", cpu, error); break; } } - arch_disable_nonboot_cpus_end(); - if (!error) { BUG_ON(num_online_cpus() > 1); /* Make sure the CPUs won't be enabled by someone else */ cpu_hotplug_disabled = 1; } else { - printk(KERN_ERR "Non-boot CPUs are not disabled\n"); + pr_err("Non-boot CPUs are not disabled\n"); } cpu_maps_update_done(); return error; @@ -450,17 +561,19 @@ void __ref enable_nonboot_cpus(void) if (cpumask_empty(frozen_cpus)) goto out; - printk(KERN_INFO "Enabling non-boot CPUs ...\n"); + pr_info("Enabling non-boot CPUs ...\n"); arch_enable_nonboot_cpus_begin(); for_each_cpu(cpu, frozen_cpus) { + trace_suspend_resume(TPS("CPU_ON"), cpu, true); error = _cpu_up(cpu, 1); + trace_suspend_resume(TPS("CPU_ON"), cpu, false); if (!error) { - printk(KERN_INFO "CPU%d is up\n", cpu); + pr_info("CPU%d is up\n", cpu); continue; } - printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error); + pr_warn("Error taking CPU%d up: %d\n", cpu, error); } arch_enable_nonboot_cpus_end(); @@ -479,36 +592,6 @@ static int __init alloc_frozen_cpus(void) core_initcall(alloc_frozen_cpus); /* - * Prevent regular CPU hotplug from racing with the freezer, by disabling CPU - * hotplug when tasks are about to be frozen. Also, don't allow the freezer - * to continue until any currently running CPU hotplug operation gets - * completed. - * To modify the 'cpu_hotplug_disabled' flag, we need to acquire the - * 'cpu_add_remove_lock'. And this same lock is also taken by the regular - * CPU hotplug path and released only after it is complete. Thus, we - * (and hence the freezer) will block here until any currently running CPU - * hotplug operation gets completed. - */ -void cpu_hotplug_disable_before_freeze(void) -{ - cpu_maps_update_begin(); - cpu_hotplug_disabled = 1; - cpu_maps_update_done(); -} - - -/* - * When tasks have been thawed, re-enable regular CPU hotplug (which had been - * disabled while beginning to freeze tasks). - */ -void cpu_hotplug_enable_after_thaw(void) -{ - cpu_maps_update_begin(); - cpu_hotplug_disabled = 0; - cpu_maps_update_done(); -} - -/* * When callbacks for CPU hotplug notifications are being executed, we must * ensure that the state of the system with respect to the tasks being frozen * or not, as reported by the notification, remains unchanged *throughout the @@ -527,12 +610,12 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, case PM_SUSPEND_PREPARE: case PM_HIBERNATION_PREPARE: - cpu_hotplug_disable_before_freeze(); + cpu_hotplug_disable(); break; case PM_POST_SUSPEND: case PM_POST_HIBERNATION: - cpu_hotplug_enable_after_thaw(); + cpu_hotplug_enable(); break; default: @@ -545,6 +628,11 @@ cpu_hotplug_pm_callback(struct notifier_block *nb, static int __init cpu_hotplug_pm_sync_init(void) { + /* + * cpu_hotplug_pm_callback has higher priority than x86 + * bsp_pm_callback which depends on cpu_hotplug_pm_callback + * to disable cpu hotplug to avoid cpu hotplug race. + */ pm_notifier(cpu_hotplug_pm_callback, 0); return 0; } @@ -560,7 +648,7 @@ core_initcall(cpu_hotplug_pm_sync_init); * It must be called by the arch code on the new cpu, before the new cpu * enables interrupts and before the "boot" cpu returns from __cpu_up(). */ -void __cpuinit notify_cpu_starting(unsigned int cpu) +void notify_cpu_starting(unsigned int cpu) { unsigned long val = CPU_STARTING; @@ -640,10 +728,12 @@ void set_cpu_present(unsigned int cpu, bool present) void set_cpu_online(unsigned int cpu, bool online) { - if (online) + if (online) { cpumask_set_cpu(cpu, to_cpumask(cpu_online_bits)); - else + cpumask_set_cpu(cpu, to_cpumask(cpu_active_bits)); + } else { cpumask_clear_cpu(cpu, to_cpumask(cpu_online_bits)); + } } void set_cpu_active(unsigned int cpu, bool active) |
