diff options
author | Ingo Molnar <mingo@elte.hu> | 2011-06-04 12:13:06 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2011-06-04 12:13:06 +0200 |
commit | 710054ba25c0d1f8f41c22ce13ba336503fb5318 (patch) | |
tree | f9b09b722bf511841539173d946f90a20fc2e59a /kernel | |
parent | 74c355fbdfedd3820046dba4f537876cea54c207 (diff) | |
parent | b273fa9716aa1564bee88ceee62f9042981cdc81 (diff) |
Merge branch 'perf/urgent' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/urgent
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpuset.c | 4 | ||||
-rw-r--r-- | kernel/events/core.c | 8 | ||||
-rw-r--r-- | kernel/fork.c | 42 | ||||
-rw-r--r-- | kernel/jump_label.c | 18 | ||||
-rw-r--r-- | kernel/kthread.c | 4 | ||||
-rw-r--r-- | kernel/pm_qos_params.c | 37 | ||||
-rw-r--r-- | kernel/rcutree.c | 208 | ||||
-rw-r--r-- | kernel/rcutree.h | 30 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 33 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 12 | ||||
-rw-r--r-- | kernel/sched.c | 56 | ||||
-rw-r--r-- | kernel/sched_fair.c | 5 | ||||
-rw-r--r-- | kernel/sched_rt.c | 10 | ||||
-rw-r--r-- | kernel/sched_stats.h | 4 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 31 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 10 | ||||
-rw-r--r-- | kernel/trace/trace.h | 15 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 7 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 27 | ||||
-rw-r--r-- | kernel/watchdog.c | 9 |
20 files changed, 337 insertions, 233 deletions
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 1ceeb049c82..9c9b7545c81 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2190,7 +2190,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) rcu_read_lock(); cs = task_cs(tsk); if (cs) - cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed); + do_set_cpus_allowed(tsk, cs->cpus_allowed); rcu_read_unlock(); /* @@ -2217,7 +2217,7 @@ int cpuset_cpus_allowed_fallback(struct task_struct *tsk) * Like above we can temporary set any mask and rely on * set_cpus_allowed_ptr() as synchronization point. */ - cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask); + do_set_cpus_allowed(tsk, cpu_possible_mask); cpu = cpumask_any(cpu_active_mask); } diff --git a/kernel/events/core.c b/kernel/events/core.c index 8a15944bf9d..9efe7108cca 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -5028,6 +5028,14 @@ static int __perf_event_overflow(struct perf_event *event, int nmi, else perf_event_output(event, nmi, data, regs); + if (event->fasync && event->pending_kill) { + if (nmi) { + event->pending_wakeup = 1; + irq_work_queue(&event->pending); + } else + perf_event_wakeup(event); + } + return ret; } diff --git a/kernel/fork.c b/kernel/fork.c index ca406d91671..0276c30401a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -484,20 +484,6 @@ static void mm_init_aio(struct mm_struct *mm) #endif } -int mm_init_cpumask(struct mm_struct *mm, struct mm_struct *oldmm) -{ -#ifdef CONFIG_CPUMASK_OFFSTACK - if (!alloc_cpumask_var(&mm->cpu_vm_mask_var, GFP_KERNEL)) - return -ENOMEM; - - if (oldmm) - cpumask_copy(mm_cpumask(mm), mm_cpumask(oldmm)); - else - memset(mm_cpumask(mm), 0, cpumask_size()); -#endif - return 0; -} - static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p) { atomic_set(&mm->mm_users, 1); @@ -538,17 +524,8 @@ struct mm_struct * mm_alloc(void) return NULL; memset(mm, 0, sizeof(*mm)); - mm = mm_init(mm, current); - if (!mm) - return NULL; - - if (mm_init_cpumask(mm, NULL)) { - mm_free_pgd(mm); - free_mm(mm); - return NULL; - } - - return mm; + mm_init_cpumask(mm); + return mm_init(mm, current); } /* @@ -559,7 +536,6 @@ struct mm_struct * mm_alloc(void) void __mmdrop(struct mm_struct *mm) { BUG_ON(mm == &init_mm); - free_cpumask_var(mm->cpu_vm_mask_var); mm_free_pgd(mm); destroy_context(mm); mmu_notifier_mm_destroy(mm); @@ -753,6 +729,7 @@ struct mm_struct *dup_mm(struct task_struct *tsk) goto fail_nomem; memcpy(mm, oldmm, sizeof(*mm)); + mm_init_cpumask(mm); /* Initializing for Swap token stuff */ mm->token_priority = 0; @@ -765,9 +742,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk) if (!mm_init(mm, tsk)) goto fail_nomem; - if (mm_init_cpumask(mm, oldmm)) - goto fail_nocpumask; - if (init_new_context(tsk, mm)) goto fail_nocontext; @@ -794,9 +768,6 @@ fail_nomem: return NULL; fail_nocontext: - free_cpumask_var(mm->cpu_vm_mask_var); - -fail_nocpumask: /* * If init_new_context() failed, we cannot use mmput() to free the mm * because it calls destroy_context() @@ -1591,6 +1562,13 @@ void __init proc_caches_init(void) fs_cachep = kmem_cache_create("fs_cache", sizeof(struct fs_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); + /* + * FIXME! The "sizeof(struct mm_struct)" currently includes the + * whole struct cpumask for the OFFSTACK case. We could change + * this to *only* allocate as much of it as required by the + * maximum number of CPU's we can ever have. The cpumask_allocation + * is at the end of the structure, exactly for that reason. + */ mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL); diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 74d1c099fbd..fa27e750dbc 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c @@ -105,9 +105,12 @@ static int __jump_label_text_reserved(struct jump_entry *iter_start, } static void __jump_label_update(struct jump_label_key *key, - struct jump_entry *entry, int enable) + struct jump_entry *entry, + struct jump_entry *stop, int enable) { - for (; entry->key == (jump_label_t)(unsigned long)key; entry++) { + for (; (entry < stop) && + (entry->key == (jump_label_t)(unsigned long)key); + entry++) { /* * entry->code set to 0 invalidates module init text sections * kernel_text_address() verifies we are not in core kernel @@ -181,7 +184,11 @@ static void __jump_label_mod_update(struct jump_label_key *key, int enable) struct jump_label_mod *mod = key->next; while (mod) { - __jump_label_update(key, mod->entries, enable); + struct module *m = mod->mod; + + __jump_label_update(key, mod->entries, + m->jump_entries + m->num_jump_entries, + enable); mod = mod->next; } } @@ -245,7 +252,8 @@ static int jump_label_add_module(struct module *mod) key->next = jlm; if (jump_label_enabled(key)) - __jump_label_update(key, iter, JUMP_LABEL_ENABLE); + __jump_label_update(key, iter, iter_stop, + JUMP_LABEL_ENABLE); } return 0; @@ -371,7 +379,7 @@ static void jump_label_update(struct jump_label_key *key, int enable) /* if there are no users, entry can be NULL */ if (entry) - __jump_label_update(key, entry, enable); + __jump_label_update(key, entry, __stop___jump_table, enable); #ifdef CONFIG_MODULES __jump_label_mod_update(key, enable); diff --git a/kernel/kthread.c b/kernel/kthread.c index 3b34d2732bc..4ba7cccb499 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -202,8 +202,8 @@ void kthread_bind(struct task_struct *p, unsigned int cpu) return; } - p->cpus_allowed = cpumask_of_cpu(cpu); - p->rt.nr_cpus_allowed = 1; + /* It's safe because the task is inactive. */ + do_set_cpus_allowed(p, cpumask_of(cpu)); p->flags |= PF_THREAD_BOUND; } EXPORT_SYMBOL(kthread_bind); diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c index fd8d1e035df..6824ca7d4d0 100644 --- a/kernel/pm_qos_params.c +++ b/kernel/pm_qos_params.c @@ -54,11 +54,17 @@ enum pm_qos_type { PM_QOS_MIN /* return the smallest value */ }; +/* + * Note: The lockless read path depends on the CPU accessing + * target_value atomically. Atomic access is only guaranteed on all CPU + * types linux supports for 32 bit quantites + */ struct pm_qos_object { struct plist_head requests; struct blocking_notifier_head *notifiers; struct miscdevice pm_qos_power_miscdev; char *name; + s32 target_value; /* Do not change to 64 bit */ s32 default_value; enum pm_qos_type type; }; @@ -71,7 +77,8 @@ static struct pm_qos_object cpu_dma_pm_qos = { .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), .notifiers = &cpu_dma_lat_notifier, .name = "cpu_dma_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN, }; @@ -80,7 +87,8 @@ static struct pm_qos_object network_lat_pm_qos = { .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), .notifiers = &network_lat_notifier, .name = "network_latency", - .default_value = 2000 * USEC_PER_SEC, + .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE, .type = PM_QOS_MIN }; @@ -90,7 +98,8 @@ static struct pm_qos_object network_throughput_pm_qos = { .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), .notifiers = &network_throughput_notifier, .name = "network_throughput", - .default_value = 0, + .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, + .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE, .type = PM_QOS_MAX, }; @@ -136,6 +145,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o) } } +static inline s32 pm_qos_read_value(struct pm_qos_object *o) +{ + return o->target_value; +} + +static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value) +{ + o->target_value = value; +} + static void update_target(struct pm_qos_object *o, struct plist_node *node, int del, int value) { @@ -160,6 +179,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node, plist_add(node, &o->requests); } curr_value = pm_qos_get_value(o); + pm_qos_set_value(o, curr_value); spin_unlock_irqrestore(&pm_qos_lock, flags); if (prev_value != curr_value) @@ -194,18 +214,11 @@ static int find_pm_qos_object_by_minor(int minor) * pm_qos_request - returns current system wide qos expectation * @pm_qos_class: identification of which qos value is requested * - * This function returns the current target value in an atomic manner. + * This function returns the current target value. */ int pm_qos_request(int pm_qos_class) { - unsigned long flags; - int value; - - spin_lock_irqsave(&pm_qos_lock, flags); - value = pm_qos_get_value(pm_qos_array[pm_qos_class]); - spin_unlock_irqrestore(&pm_qos_lock, flags); - - return value; + return pm_qos_read_value(pm_qos_array[pm_qos_class]); } EXPORT_SYMBOL_GPL(pm_qos_request); diff --git a/kernel/rcutree.c b/kernel/rcutree.c index f07d2f03181..89419ff92e9 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -36,7 +36,7 @@ #include <linux/interrupt.h> #include <linux/sched.h> #include <linux/nmi.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/bitops.h> #include <linux/module.h> #include <linux/completion.h> @@ -95,7 +95,6 @@ static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); DEFINE_PER_CPU(int, rcu_cpu_kthread_cpu); DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); -static DEFINE_PER_CPU(wait_queue_head_t, rcu_cpu_wq); DEFINE_PER_CPU(char, rcu_cpu_has_work); static char rcu_kthreads_spawnable; @@ -163,7 +162,7 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch); #ifdef CONFIG_NO_HZ DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { .dynticks_nesting = 1, - .dynticks = 1, + .dynticks = ATOMIC_INIT(1), }; #endif /* #ifdef CONFIG_NO_HZ */ @@ -322,13 +321,25 @@ void rcu_enter_nohz(void) unsigned long flags; struct rcu_dynticks *rdtp; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - rdtp->dynticks++; - rdtp->dynticks_nesting--; - WARN_ON_ONCE(rdtp->dynticks & 0x1); + if (--rdtp->dynticks_nesting) { + local_irq_restore(flags); + return; + } + /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ + smp_mb__before_atomic_inc(); /* See above. */ + atomic_inc(&rdtp->dynticks); + smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ + WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); local_irq_restore(flags); + + /* If the interrupt queued a callback, get out of dyntick mode. */ + if (in_irq() && + (__get_cpu_var(rcu_sched_data).nxtlist || + __get_cpu_var(rcu_bh_data).nxtlist || + rcu_preempt_needs_cpu(smp_processor_id()))) + set_need_resched(); } /* @@ -344,11 +355,16 @@ void rcu_exit_nohz(void) local_irq_save(flags); rdtp = &__get_cpu_var(rcu_dynticks); - rdtp->dynticks++; - rdtp->dynticks_nesting++; - WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); + if (rdtp->dynticks_nesting++) { + local_irq_restore(flags); + return; + } + smp_mb__before_atomic_inc(); /* Force ordering w/previous sojourn. */ + atomic_inc(&rdtp->dynticks); + /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ + smp_mb__after_atomic_inc(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); local_irq_restore(flags); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ } /** @@ -362,11 +378,15 @@ void rcu_nmi_enter(void) { struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - if (rdtp->dynticks & 0x1) + if (rdtp->dynticks_nmi_nesting == 0 && + (atomic_read(&rdtp->dynticks) & 0x1)) return; - rdtp->dynticks_nmi++; - WARN_ON_ONCE(!(rdtp->dynticks_nmi & 0x1)); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ + rdtp->dynticks_nmi_nesting++; + smp_mb__before_atomic_inc(); /* Force delay from prior write. */ + atomic_inc(&rdtp->dynticks); + /* CPUs seeing atomic_inc() must see later RCU read-side crit sects */ + smp_mb__after_atomic_inc(); /* See above. */ + WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); } /** @@ -380,11 +400,14 @@ void rcu_nmi_exit(void) { struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - if (rdtp->dynticks & 0x1) + if (rdtp->dynticks_nmi_nesting == 0 || + --rdtp->dynticks_nmi_nesting != 0) return; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - rdtp->dynticks_nmi++; - WARN_ON_ONCE(rdtp->dynticks_nmi & 0x1); + /* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ + smp_mb__before_atomic_inc(); /* See above. */ + atomic_inc(&rdtp->dynticks); + smp_mb__after_atomic_inc(); /* Force delay to next write. */ + WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); } /** @@ -395,13 +418,7 @@ void rcu_nmi_exit(void) */ void rcu_irq_enter(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - if (rdtp->dynticks_nesting++) - return; - rdtp->dynticks++; - WARN_ON_ONCE(!(rdtp->dynticks & 0x1)); - smp_mb(); /* CPUs seeing ++ must see later RCU read-side crit sects */ + rcu_exit_nohz(); } /** @@ -413,18 +430,7 @@ void rcu_irq_enter(void) */ void rcu_irq_exit(void) { - struct rcu_dynticks *rdtp = &__get_cpu_var(rcu_dynticks); - - if (--rdtp->dynticks_nesting) - return; - smp_mb(); /* CPUs seeing ++ must see prior RCU read-side crit sects */ - rdtp->dynticks++; - WARN_ON_ONCE(rdtp->dynticks & 0x1); - - /* If the interrupt queued a callback, get out of dyntick mode. */ - if (__this_cpu_read(rcu_sched_data.nxtlist) || - __this_cpu_read(rcu_bh_data.nxtlist)) - set_need_resched(); + rcu_enter_nohz(); } #ifdef CONFIG_SMP @@ -436,19 +442,8 @@ void rcu_irq_exit(void) */ static int dyntick_save_progress_counter(struct rcu_data *rdp) { - int ret; - int snap; - int snap_nmi; - - snap = rdp->dynticks->dynticks; - snap_nmi = rdp->dynticks->dynticks_nmi; - smp_mb(); /* Order sampling of snap with end of grace period. */ - rdp->dynticks_snap = snap; - rdp->dynticks_nmi_snap = snap_nmi; - ret = ((snap & 0x1) == 0) && ((snap_nmi & 0x1) == 0); - if (ret) - rdp->dynticks_fqs++; - return ret; + rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); + return 0; } /* @@ -459,16 +454,11 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp) */ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) { - long curr; - long curr_nmi; - long snap; - long snap_nmi; + unsigned long curr; + unsigned long snap; - curr = rdp->dynticks->dynticks; - snap = rdp->dynticks_snap; - curr_nmi = rdp->dynticks->dynticks_nmi; - snap_nmi = rdp->dynticks_nmi_snap; - smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ + curr = (unsigned long)atomic_add_return(0, &rdp->dynticks->dynticks); + snap = (unsigned long)rdp->dynticks_snap; /* * If the CPU passed through or entered a dynticks idle phase with @@ -478,8 +468,7 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) * read-side critical section that started before the beginning * of the current RCU grace period. */ - if ((curr != snap || (curr & 0x1) == 0) && - (curr_nmi != snap_nmi || (curr_nmi & 0x1) == 0)) { + if ((curr & 0x1) == 0 || ULONG_CMP_GE(curr, snap + 2)) { rdp->dynticks_fqs++; return 1; } @@ -908,6 +897,12 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags) unsigned long gp_duration; WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); + + /* + * Ensure that all grace-period and pre-grace-period activity + * is seen before the assignment to rsp->completed. + */ + smp_mb(); /* See above block comment. */ gp_duration = jiffies - rsp->gp_start; if (gp_duration > rsp->gp_max) rsp->gp_max = gp_duration; @@ -1455,25 +1450,11 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) */ static void rcu_process_callbacks(void) { - /* - * Memory references from any prior RCU read-side critical sections - * executed by the interrupted code must be seen before any RCU - * grace-period manipulations below. - */ - smp_mb(); /* See above block comment. */ - __rcu_process_callbacks(&rcu_sched_state, &__get_cpu_var(rcu_sched_data)); __rcu_process_callbacks(&rcu_bh_state, &__get_cpu_var(rcu_bh_data)); rcu_preempt_process_callbacks(); - /* - * Memory references from any later RCU read-side critical sections - * executed by the interrupted code must be seen after any RCU - * grace-period manipulations above. - */ - smp_mb(); /* See above block comment. */ - /* If we are last CPU on way to dyntick-idle mode, accelerate it. */ rcu_needs_cpu_flush(); } @@ -1494,7 +1475,7 @@ static void invoke_rcu_cpu_kthread(void) local_irq_restore(flags); return; } - wake_up(&__get_cpu_var(rcu_cpu_wq)); + wake_up_process(__this_cpu_read(rcu_cpu_kthread_task)); local_irq_restore(flags); } @@ -1544,13 +1525,10 @@ static void rcu_cpu_kthread_setrt(int cpu, int to_rt) */ static void rcu_cpu_kthread_timer(unsigned long arg) { - unsigned long flags; struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, arg); struct rcu_node *rnp = rdp->mynode; - raw_spin_lock_irqsave(&rnp->lock, flags); - rnp->wakemask |= rdp->grpmask; - raw_spin_unlock_irqrestore(&rnp->lock, flags); + atomic_or(rdp->grpmask, &rnp->wakemask); invoke_rcu_node_kthread(rnp); } @@ -1617,14 +1595,12 @@ static int rcu_cpu_kthread(void *arg) unsigned long flags; int spincnt = 0; unsigned int *statusp = &per_cpu(rcu_cpu_kthread_status, cpu); - wait_queue_head_t *wqp = &per_cpu(rcu_cpu_wq, cpu); char work; char *workp = &per_cpu(rcu_cpu_has_work, cpu); for (;;) { *statusp = RCU_KTHREAD_WAITING; - wait_event_interruptible(*wqp, - *workp != 0 || kthread_should_stop()); + rcu_wait(*workp != 0 || kthread_should_stop()); local_bh_disable(); if (rcu_cpu_kthread_should_stop(cpu)) { local_bh_enable(); @@ -1675,7 +1651,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); per_cpu(rcu_cpu_kthread_task, cpu) = t; - wake_up_process(t); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); return 0; @@ -1698,11 +1673,10 @@ static int rcu_node_kthread(void *arg) for (;;) { rnp->node_kthread_status = RCU_KTHREAD_WAITING; - wait_event_interruptible(rnp->node_wq, rnp->wakemask != 0); + rcu_wait(atomic_read(&rnp->wakemask) != 0); rnp->node_kthread_status = RCU_KTHREAD_RUNNING; raw_spin_lock_irqsave(&rnp->lock, flags); - mask = rnp->wakemask; - rnp->wakemask = 0; + mask = atomic_xchg(&rnp->wakemask, 0); rcu_initiate_boost(rnp, flags); /* releases rnp->lock. */ for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1) { if ((mask & 0x1) == 0) @@ -1783,13 +1757,14 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, raw_spin_lock_irqsave(&rnp->lock, flags); rnp->node_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - wake_up_process(t); sp.sched_priority = 99; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); } return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } +static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); + /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1797,24 +1772,31 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; + struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { - init_waitqueue_head(&per_cpu(rcu_cpu_wq, cpu)); per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) + if (cpu_online(cpu)) { (void)rcu_spawn_one_cpu_kthread(cpu); + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + } } rnp = rcu_get_root(rcu_state); - init_waitqueue_head(&rnp->node_wq); - rcu_init_boost_waitqueue(rnp); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); - if (NUM_RCU_NODES > 1) + if (rnp->node_kthread_task) + wake_up_process(rnp->node_kthread_task); + if (NUM_RCU_NODES > 1) { rcu_for_each_leaf_node(rcu_state, rnp) { - init_waitqueue_head(&rnp->node_wq); - rcu_init_boost_waitqueue(rnp); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + rcu_wake_one_boost_kthread(rnp); } + } return 0; } early_initcall(rcu_spawn_kthreads); @@ -2218,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } -static void __cpuinit rcu_online_cpu(int cpu) +static void __cpuinit rcu_prepare_cpu(int cpu) { rcu_init_percpu_data(cpu, &rcu_sched_state, 0); rcu_init_percpu_data(cpu, &rcu_bh_state, 0); rcu_preempt_init_percpu_data(cpu); } -static void __cpuinit rcu_online_kthreads(int cpu) +static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; @@ -2239,6 +2221,31 @@ static void __cpuinit rcu_online_kthreads(int cpu) } /* + * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, + * but the RCU threads are woken on demand, and if demand is low this + * could be a while triggering the hung task watchdog. + * + * In order to avoid this, poke all tasks once the CPU is fully + * up and running. + */ +static void __cpuinit rcu_online_kthreads(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_node *rnp = rdp->mynode; + struct task_struct *t; + + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + + rcu_wake_one_boost_kthread(rnp); +} + +/* * Handle CPU online/offline notification events. */ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, @@ -2251,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - rcu_online_kthreads(cpu); + rcu_prepare_cpu(cpu); + rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: + rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); diff --git a/kernel/rcutree.h b/kernel/rcutree.h index 257664815d5..7b9a08b4aae 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -84,11 +84,9 @@ * Dynticks per-CPU state. */ struct rcu_dynticks { - int dynticks_nesting; /* Track nesting level, sort of. */ - int dynticks; /* Even value for dynticks-idle, else odd. */ - int dynticks_nmi; /* Even value for either dynticks-idle or */ - /* not in nmi handler, else odd. So this */ - /* remains even for nmi from irq handler. */ + int dynticks_nesting; /* Track irq/process nesting level. */ + int dynticks_nmi_nesting; /* Track NMI nesting level. */ + atomic_t dynticks; /* Even value for dynticks-idle, else odd. */ }; /* RCU's kthread states for tracing. */ @@ -121,7 +119,9 @@ struct rcu_node { /* elements that need to drain to allow the */ /* current expedited grace period to */ /* complete (only for TREE_PREEMPT_RCU). */ - unsigned long wakemask; /* CPUs whose kthread needs to be awakened. */ + atomic_t wakemask; /* CPUs whose kthread needs to be awakened. */ + /* Since this has meaning only for leaf */ + /* rcu_node structures, 32 bits suffices. */ unsigned long qsmaskinit; /* Per-GP initial value for qsmask & expmask. */ unsigned long grpmask; /* Mask to apply to parent qsmask. */ @@ -159,9 +159,6 @@ struct rcu_node { struct task_struct *boost_kthread_task; /* kthread that takes care of priority */ /* boosting for this rcu_node structure. */ - wait_queue_head_t boost_wq; - /* Wait queue on which to park the boost */ - /* kthread. */ unsigned int boost_kthread_status; /* State of boost_kthread_task for tracing. */ unsigned long n_tasks_boosted; @@ -188,9 +185,6 @@ struct rcu_node { /* kthread that takes care of this rcu_node */ /* structure, for example, awakening the */ /* per-CPU kthreads as needed. */ - wait_queue_head_t node_wq; - /* Wait queue on which to park the per-node */ - /* kthread. */ unsigned int node_kthread_status; /* State of node_kthread_task for tracing. */ } ____cacheline_internodealigned_in_smp; @@ -284,7 +278,6 @@ struct rcu_data { /* 3) dynticks interface. */ struct rcu_dynticks *dynticks; /* Shared per-CPU dynticks state. */ int dynticks_snap; /* Per-GP tracking for dynticks. */ - int dynticks_nmi_snap; /* Per-GP tracking for dynticks_nmi. */ #endif /* #ifdef CONFIG_NO_HZ */ /* 4) reasons this CPU needed to be kicked by force_quiescent_state */ @@ -337,6 +330,16 @@ struct rcu_data { /* scheduling clock irq */ /* before ratting on them. */ +#define rcu_wait(cond) \ +do { \ + for (;;) { \ + set_current_state(TASK_INTERRUPTIBLE); \ + if (cond) \ + break; \ + schedule(); \ + } \ + __set_current_state(TASK_RUNNING); \ +} while (0) /* * RCU global state, including node hierarchy. This hierarchy is @@ -446,7 +449,6 @@ static void __cpuinit rcu_preempt_init_percpu_data(int cpu); static void rcu_preempt_send_cbs_to_online(void); static void __init __rcu_init_preempt(void); static void rcu_needs_cpu_flush(void); -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp); static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags); static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, cpumask_var_t cm); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 3f6559a5f5c..c8bff3099a8 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1196,8 +1196,7 @@ static int rcu_boost_kthread(void *arg) for (;;) { rnp->boost_kthread_status = RCU_KTHREAD_WAITING; - wait_event_interruptible(rnp->boost_wq, rnp->boost_tasks || - rnp->exp_tasks); + rcu_wait(rnp->boost_tasks || rnp->exp_tasks); rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; more2boost = rcu_boost(rnp); if (more2boost) @@ -1275,14 +1274,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) } /* - * Initialize the RCU-boost waitqueue. - */ -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) -{ - init_waitqueue_head(&rnp->boost_wq); -} - -/* * Create an RCU-boost kthread for the specified node if one does not * already exist. We only create this kthread for preemptible RCU. * Returns zero if all is well, a negated errno otherwise. @@ -1306,12 +1297,17 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, raw_spin_lock_irqsave(&rnp->lock, flags); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); - wake_up_process(t); sp.sched_priority = RCU_KTHREAD_PRIO; sched_setscheduler_nocheck(t, SCHED_FIFO, &sp); return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ + if (rnp->boost_kthread_task) + wake_up_process(rnp->boost_kthread_task); +} + #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1328,10 +1324,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp) { } -static void __init rcu_init_boost_waitqueue(struct rcu_node *rnp) -{ -} - static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, struct rcu_node *rnp, int rnp_index) @@ -1339,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ +} + #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP @@ -1520,7 +1516,6 @@ int rcu_needs_cpu(int cpu) { int c = 0; int snap; - int snap_nmi; int thatcpu; /* Check for being in the holdoff period. */ @@ -1531,10 +1526,10 @@ int rcu_needs_cpu(int cpu) for_each_online_cpu(thatcpu) { if (thatcpu == cpu) continue; - snap = per_cpu(rcu_dynticks, thatcpu).dynticks; - snap_nmi = per_cpu(rcu_dynticks, thatcpu).dynticks_nmi; + snap = atomic_add_return(0, &per_cpu(rcu_dynticks, + thatcpu).dynticks); smp_mb(); /* Order sampling of snap with end of grace period. */ - if (((snap & 0x1) != 0) || ((snap_nmi & 0x1) != 0)) { + if ((snap & 0x1) != 0) { per_cpu(rcu_dyntick_drain, cpu) = 0; per_cpu(rcu_dyntick_holdoff, cpu) = jiffies - 1; return rcu |