diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 3 | ||||
-rw-r--r-- | kernel/exit.c | 5 | ||||
-rw-r--r-- | kernel/hrtimer.c | 17 | ||||
-rw-r--r-- | kernel/sched.c | 17 | ||||
-rw-r--r-- | kernel/sched_debug.c | 41 | ||||
-rw-r--r-- | kernel/sched_fair.c | 17 | ||||
-rw-r--r-- | kernel/softirq.c | 7 | ||||
-rw-r--r-- | kernel/time/tick-sched.c | 4 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 19 | ||||
-rw-r--r-- | kernel/workqueue.c | 45 |
11 files changed, 142 insertions, 35 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 86d49045dae..5a732c5ef08 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -499,3 +499,6 @@ const unsigned long cpu_bit_bitmap[BITS_PER_LONG+1][BITS_TO_LONGS(NR_CPUS)] = { #endif }; EXPORT_SYMBOL_GPL(cpu_bit_bitmap); + +const DECLARE_BITMAP(cpu_all_bits, NR_CPUS) = CPU_BITS_ALL; +EXPORT_SYMBOL(cpu_all_bits); diff --git a/kernel/exit.c b/kernel/exit.c index 80137a5d946..ae2b92be5fa 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -141,6 +141,11 @@ static void __exit_signal(struct task_struct *tsk) if (sig) { flush_sigqueue(&sig->shared_pending); taskstats_tgid_free(sig); + /* + * Make sure ->signal can't go away under rq->lock, + * see account_group_exec_runtime(). + */ + task_rq_unlock_wait(tsk); __cleanup_signal(sig); } } diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 2b465dfde42..95d3949f2ae 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -1209,6 +1209,7 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) enum hrtimer_restart (*fn)(struct hrtimer *); struct hrtimer *timer; int restart; + int emulate_hardirq_ctx = 0; timer = list_entry(cpu_base->cb_pending.next, struct hrtimer, cb_entry); @@ -1217,10 +1218,24 @@ static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) timer_stats_account_hrtimer(timer); fn = timer->function; + /* + * A timer might have been added to the cb_pending list + * when it was migrated during a cpu-offline operation. + * Emulate hardirq context for such timers. + */ + if (timer->cb_mode == HRTIMER_CB_IRQSAFE_PERCPU || + timer->cb_mode == HRTIMER_CB_IRQSAFE_UNLOCKED) + emulate_hardirq_ctx = 1; + __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); spin_unlock_irq(&cpu_base->lock); - restart = fn(timer); + if (unlikely(emulate_hardirq_ctx)) { + local_irq_disable(); + restart = fn(timer); + local_irq_enable(); + } else + restart = fn(timer); spin_lock_irq(&cpu_base->lock); diff --git a/kernel/sched.c b/kernel/sched.c index 82cc839c921..50a21f96467 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -399,7 +399,7 @@ struct cfs_rq { */ struct sched_entity *curr, *next, *last; - unsigned long nr_spread_over; + unsigned int nr_spread_over; #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ @@ -969,6 +969,14 @@ static struct rq *task_rq_lock(struct task_struct *p, unsigned long *flags) } } +void task_rq_unlock_wait(struct task_struct *p) +{ + struct rq *rq = task_rq(p); + + smp_mb(); /* spin-unlock-wait is not a full memory barrier */ + spin_unlock_wait(&rq->lock); +} + static void __task_rq_unlock(struct rq *rq) __releases(rq->lock) { @@ -6877,15 +6885,17 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu) struct sched_domain *tmp; /* Remove the sched domains which do not contribute to scheduling. */ - for (tmp = sd; tmp; tmp = tmp->parent) { + for (tmp = sd; tmp; ) { struct sched_domain *parent = tmp->parent; if (!parent) break; + if (sd_parent_degenerate(tmp, parent)) { tmp->parent = parent->parent; if (parent->parent) parent->parent->child = tmp; - } + } else + tmp = tmp->parent; } if (sd && sd_degenerate(sd)) { @@ -7674,6 +7684,7 @@ static int __build_sched_domains(const cpumask_t *cpu_map, error: free_sched_groups(cpu_map, tmpmask); SCHED_CPUMASK_FREE((void *)allmasks); + kfree(rd); return -ENOMEM; #endif } diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5ae17762ec3..48ecc51e770 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -144,7 +144,7 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) last = __pick_last_entity(cfs_rq); if (last) max_vruntime = last->vruntime; - min_vruntime = rq->cfs.min_vruntime; + min_vruntime = cfs_rq->min_vruntime; rq0_min_vruntime = per_cpu(runqueues, 0).cfs.min_vruntime; spin_unlock_irqrestore(&rq->lock, flags); SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "MIN_vruntime", @@ -161,26 +161,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) SPLIT_NS(spread0)); SEQ_printf(m, " .%-30s: %ld\n", "nr_running", cfs_rq->nr_running); SEQ_printf(m, " .%-30s: %ld\n", "load", cfs_rq->load.weight); -#ifdef CONFIG_SCHEDSTATS -#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); - - P(yld_exp_empty); - P(yld_act_empty); - P(yld_both_empty); - P(yld_count); - P(sched_switch); - P(sched_count); - P(sched_goidle); - - P(ttwu_count); - P(ttwu_local); - - P(bkl_count); - -#undef P -#endif - SEQ_printf(m, " .%-30s: %ld\n", "nr_spread_over", + SEQ_printf(m, " .%-30s: %d\n", "nr_spread_over", cfs_rq->nr_spread_over); #ifdef CONFIG_FAIR_GROUP_SCHED #ifdef CONFIG_SMP @@ -260,6 +242,25 @@ static void print_cpu(struct seq_file *m, int cpu) #undef P #undef PN +#ifdef CONFIG_SCHEDSTATS +#define P(n) SEQ_printf(m, " .%-30s: %d\n", #n, rq->n); + + P(yld_exp_empty); + P(yld_act_empty); + P(yld_both_empty); + P(yld_count); + + P(sched_switch); + P(sched_count); + P(sched_goidle); + + P(ttwu_count); + P(ttwu_local); + + P(bkl_count); + +#undef P +#endif print_cfs_stats(m, cpu); print_rt_stats(m, cpu); diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 51aa3e102ac..98345e45b05 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -716,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) __enqueue_entity(cfs_rq, se); } +static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) +{ + if (cfs_rq->last == se) + cfs_rq->last = NULL; + + if (cfs_rq->next == se) + cfs_rq->next = NULL; +} + static void dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) { @@ -738,11 +747,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) #endif } - if (cfs_rq->last == se) - cfs_rq->last = NULL; - - if (cfs_rq->next == se) - cfs_rq->next = NULL; + clear_buddies(cfs_rq, se); if (se != cfs_rq->curr) __dequeue_entity(cfs_rq, se); @@ -977,6 +982,8 @@ static void yield_task_fair(struct rq *rq) if (unlikely(cfs_rq->nr_running == 1)) return; + clear_buddies(cfs_rq, se); + if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { update_rq_clock(rq); /* diff --git a/kernel/softirq.c b/kernel/softirq.c index 7110daeb9a9..e7c69a720d6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -269,10 +269,11 @@ void irq_enter(void) { int cpu = smp_processor_id(); - if (idle_cpu(cpu) && !in_interrupt()) + if (idle_cpu(cpu) && !in_interrupt()) { + __irq_enter(); tick_check_idle(cpu); - - __irq_enter(); + } else + __irq_enter(); } #ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 5bbb1044f84..342fc9ccab4 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -568,6 +568,9 @@ static void tick_nohz_switch_to_nohz(void) */ static void tick_nohz_kick_tick(int cpu) { +#if 0 + /* Switch back to 2.6.27 behaviour */ + struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu); ktime_t delta, now; @@ -584,6 +587,7 @@ static void tick_nohz_kick_tick(int cpu) return; tick_nohz_restart(ts, now); +#endif } #else diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 3f338063864..2f76193c348 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1060,7 +1060,7 @@ rb_reserve_next_event(struct ring_buffer_per_cpu *cpu_buffer, /* Did the write stamp get updated already? */ if (unlikely(ts < cpu_buffer->write_stamp)) - goto again; + delta = 0; if (test_time_stamp(delta)) { diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f917..697eda36b86 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1755,7 +1755,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter) return TRACE_TYPE_HANDLED; SEQ_PUT_FIELD_RET(s, entry->pid); - SEQ_PUT_FIELD_RET(s, iter->cpu); + SEQ_PUT_FIELD_RET(s, entry->cpu); SEQ_PUT_FIELD_RET(s, iter->ts); switch (entry->type) { @@ -2676,7 +2676,7 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, { unsigned long val; char buf[64]; - int ret; + int ret, cpu; struct trace_array *tr = filp->private_data; if (cnt >= sizeof(buf)) @@ -2704,6 +2704,14 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, goto out; } + /* disable all cpu buffers */ + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_inc(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_inc(&max_tr.data[cpu]->disabled); + } + if (val != global_trace.entries) { ret = ring_buffer_resize(global_trace.buffer, val); if (ret < 0) { @@ -2735,6 +2743,13 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, if (tracing_disabled) cnt = -ENOMEM; out: + for_each_tracing_cpu(cpu) { + if (global_trace.data[cpu]) + atomic_dec(&global_trace.data[cpu]->disabled); + if (max_tr.data[cpu]) + atomic_dec(&max_tr.data[cpu]->disabled); + } + max_tr.entries = global_trace.entries; mutex_unlock(&trace_types_lock); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f928f2a87b9..d4dc69ddebd 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -970,6 +970,51 @@ undo: return ret; } +#ifdef CONFIG_SMP +struct work_for_cpu { + struct work_struct work; + long (*fn)(void *); + void *arg; + long ret; +}; + +static void do_work_for_cpu(struct work_struct *w) +{ + struct work_for_cpu *wfc = container_of(w, struct work_for_cpu, work); + + wfc->ret = wfc->fn(wfc->arg); +} + +/** + * work_on_cpu - run a function in user context on a particular cpu + * @cpu: the cpu to run on + * @fn: the function to run + * @arg: the function arg + * + * This will return -EINVAL in the cpu is not online, or the return value + * of @fn otherwise. + */ +long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) +{ + struct work_for_cpu wfc; + + INIT_WORK(&wfc.work, do_work_for_cpu); + wfc.fn = fn; + wfc.arg = arg; + get_online_cpus(); + if (unlikely(!cpu_online(cpu))) + wfc.ret = -EINVAL; + else { + schedule_work_on(cpu, &wfc.work); + flush_work(&wfc.work); + } + put_online_cpus(); + + return wfc.ret; +} +EXPORT_SYMBOL_GPL(work_on_cpu); +#endif /* CONFIG_SMP */ + void __init init_workqueues(void) { cpu_populated_map = cpu_online_map; |