From e159489baa717dbae70f9903770a6a4990865887 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 9 Jan 2011 23:32:15 +0100 Subject: workqueue: relax lockdep annotation on flush_work() Currently, the lockdep annotation in flush_work() requires exclusive access on the workqueue the target work is queued on and triggers warning if a work is trying to flush another work on the same workqueue; however, this is no longer true as workqueues can now execute multiple works concurrently. This patch adds lock_map_acquire_read() and make process_one_work() hold read access to the workqueue while executing a work and start_flush_work() check for write access if concurrnecy level is one or the workqueue has a rescuer (as only one execution resource - the rescuer - is guaranteed to be available under memory pressure), and read access if higher. This better represents what's going on and removes spurious lockdep warnings which are triggered by fake dependency chain created through flush_work(). * Peter pointed out that flushing another work from a WQ_MEM_RECLAIM wq breaks forward progress guarantee under memory pressure. Condition check accordingly updated. Signed-off-by: Tejun Heo Reported-by: "Rafael J. Wysocki" Tested-by: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: stable@kernel.org --- kernel/workqueue.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 8ee6ec82f88..930c2390b77 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1840,7 +1840,7 @@ __acquires(&gcwq->lock) spin_unlock_irq(&gcwq->lock); work_clear_pending(work); - lock_map_acquire(&cwq->wq->lockdep_map); + lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); trace_workqueue_execute_start(work); f(work); @@ -2384,8 +2384,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr, insert_wq_barrier(cwq, barr, work, worker); spin_unlock_irq(&gcwq->lock); - lock_map_acquire(&cwq->wq->lockdep_map); + /* + * If @max_active is 1 or rescuer is in use, flushing another work + * item on the same workqueue may lead to deadlock. Make sure the + * flusher is not running on the same workqueue by verifying write + * access. + */ + if (cwq->wq->saved_max_active == 1 || cwq->wq->flags & WQ_RESCUER) + lock_map_acquire(&cwq->wq->lockdep_map); + else + lock_map_acquire_read(&cwq->wq->lockdep_map); lock_map_release(&cwq->wq->lockdep_map); + return true; already_gone: spin_unlock_irq(&gcwq->lock); -- cgit v1.2.3-70-g09d2 From 42c025f3de9042d9c9abd9a6f6205d1a0f4bcadf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 11 Jan 2011 15:58:49 +0100 Subject: workqueue: note the nested NOT_RUNNING test in worker_clr_flags() isn't a noop The nested NOT_RUNNING test in worker_clr_flags() is slightly misleading in that if NOT_RUNNING were a single flag the nested test would be always %true and thus noop. Add a comment noting that the test isn't a noop. Signed-off-by: Tejun Heo Cc: Hillf Danton Cc: Andrew Morton --- kernel/workqueue.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 930c2390b77..11869faa681 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -768,7 +768,11 @@ static inline void worker_clr_flags(struct worker *worker, unsigned int flags) worker->flags &= ~flags; - /* if transitioning out of NOT_RUNNING, increment nr_running */ + /* + * If transitioning out of NOT_RUNNING, increment nr_running. Note + * that the nested NOT_RUNNING is not a noop. NOT_RUNNING is mask + * of multiple flags, not a single flag. + */ if ((flags & WORKER_NOT_RUNNING) && (oflags & WORKER_NOT_RUNNING)) if (!(worker->flags & WORKER_NOT_RUNNING)) atomic_inc(get_gcwq_nr_running(gcwq->cpu)); -- cgit v1.2.3-70-g09d2 From 977dda7c9b540f48b228174346d8b31542c1e99f Mon Sep 17 00:00:00 2001 From: Paul Turner Date: Fri, 14 Jan 2011 17:57:50 -0800 Subject: sched: Update effective_load() to use global share weights Previously effective_load would approximate the global load weight present on a group taking advantage of: entity_weight = tg->shares ( lw / global_lw ), where entity_weight was provided by tg_shares_up. This worked (approximately) for an 'empty' (at tg level) cpu since we would place boost load representative of what a newly woken task would receive. However, now that load is instantaneously updated this assumption is no longer true and the load calculation is rather incorrect in this case. Fix this (and improve the general case) by re-writing effective_load to take advantage of the new shares distribution code. Signed-off-by: Paul Turner Signed-off-by: Peter Zijlstra LKML-Reference: <20110115015817.069769529@google.com> Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c62ebae65cf..414145cf534 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1362,27 +1362,27 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg) return wl; for_each_sched_entity(se) { - long S, rw, s, a, b; + long lw, w; - S = se->my_q->tg->shares; - s = se->load.weight; - rw = se->my_q->load.weight; + tg = se->my_q->tg; + w = se->my_q->load.weight; - a = S*(rw + wl); - b = S*rw + s*wg; + /* use this cpu's instantaneous contribution */ + lw = atomic_read(&tg->load_weight); + lw -= se->my_q->load_contribution; + lw += w + wg; - wl = s*(a-b); + wl += w; - if (likely(b)) - wl /= b; + if (lw > 0 && wl < lw) + wl = (wl * tg->shares) / lw; + else + wl = tg->shares; - /* - * Assume the group is already running and will - * thus already be accounted for in the weight. - * - * That is, moving shares between CPUs, does not - * alter the group weight. - */ + /* zero point is MIN_SHARES */ + if (wl < MIN_SHARES) + wl = MIN_SHARES; + wl -= se->load.weight; wg = 0; } -- cgit v1.2.3-70-g09d2 From efe25c2c7b3a5d17b0c70987a758d8fe7af8e3d1 Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 11 Jan 2011 15:41:54 +0530 Subject: sched: Reinstate group names in /proc/sched_debug Displaying of group names in /proc/sched_debug was dropped in autogroup patches. Add group names while displaying cfs_rq and tasks information. Signed-off-by: Bharata B Rao Signed-off-by: Peter Zijlstra LKML-Reference: <20110111101153.GE4772@in.ibm.com> Signed-off-by: Ingo Molnar --- kernel/sched_debug.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'kernel') diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 1dfae3d014b..4d36f3726da 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -16,6 +16,8 @@ #include #include +static DEFINE_SPINLOCK(sched_debug_lock); + /* * This allows printing both to /proc/sched_debug and * to the console @@ -86,6 +88,23 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group } #endif +#ifdef CONFIG_CGROUP_SCHED +static char group_path[PATH_MAX]; + +static char *task_group_path(struct task_group *tg) +{ + /* + * May be NULL if the underlying cgroup isn't fully-created yet + */ + if (!tg->css.cgroup) { + group_path[0] = '\0'; + return group_path; + } + cgroup_path(tg->css.cgroup, group_path, PATH_MAX); + return group_path; +} +#endif + static void print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) { @@ -108,6 +127,9 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p) SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld", 0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L); #endif +#ifdef CONFIG_CGROUP_SCHED + SEQ_printf(m, " %s", task_group_path(task_group(p))); +#endif SEQ_printf(m, "\n"); } @@ -144,7 +166,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) struct sched_entity *last; unsigned long flags; +#ifdef CONFIG_FAIR_GROUP_SCHED + SEQ_printf(m, "\ncfs_rq[%d]:%s\n", cpu, task_group_path(cfs_rq->tg)); +#else SEQ_printf(m, "\ncfs_rq[%d]:\n", cpu); +#endif SEQ_printf(m, " .%-30s: %Ld.%06ld\n", "exec_clock", SPLIT_NS(cfs_rq->exec_clock)); @@ -191,7 +217,11 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq) void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq) { +#ifdef CONFIG_RT_GROUP_SCHED + SEQ_printf(m, "\nrt_rq[%d]:%s\n", cpu, task_group_path(rt_rq->tg)); +#else SEQ_printf(m, "\nrt_rq[%d]:\n", cpu); +#endif #define P(x) \ SEQ_printf(m, " .%-30s: %Ld\n", #x, (long long)(rt_rq->x)) @@ -212,6 +242,7 @@ extern __read_mostly int sched_clock_running; static void print_cpu(struct seq_file *m, int cpu) { struct rq *rq = cpu_rq(cpu); + unsigned long flags; #ifdef CONFIG_X86 { @@ -266,10 +297,14 @@ static void print_cpu(struct seq_file *m, int cpu) #undef P #endif + spin_lock_irqsave(&sched_debug_lock, flags); print_cfs_stats(m, cpu); print_rt_stats(m, cpu); + rcu_read_lock(); print_rq(m, rq, cpu); + rcu_read_unlock(); + spin_unlock_irqrestore(&sched_debug_lock, flags); } static const char *sched_tunable_scaling_names[] = { -- cgit v1.2.3-70-g09d2 From 8ecedd7a06d27a31dbb36fab88e2ba6e6edd43ca Mon Sep 17 00:00:00 2001 From: Bharata B Rao Date: Tue, 11 Jan 2011 15:42:57 +0530 Subject: sched: Display autogroup names in /proc/sched_debug Add autogroup name to cfs_rq and tasks information to /proc/sched_debug. Signed-off-by: Bharata B Rao Signed-off-by: Peter Zijlstra LKML-Reference: <20110111101257.GF4772@in.ibm.com> Signed-off-by: Ingo Molnar --- kernel/sched_autogroup.c | 5 +++++ kernel/sched_debug.c | 3 +++ 2 files changed, 8 insertions(+) (limited to 'kernel') diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 32a723b8f84..938d52f80a2 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c @@ -231,6 +231,11 @@ void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m) #ifdef CONFIG_SCHED_DEBUG static inline int autogroup_path(struct task_group *tg, char *buf, int buflen) { + int enabled = ACCESS_ONCE(sysctl_sched_autogroup_enabled); + + if (!enabled || !tg->autogroup) + return 0; + return snprintf(buf, buflen, "%s-%ld", "/autogroup", tg->autogroup->id); } #endif /* CONFIG_SCHED_DEBUG */ diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 4d36f3726da..e4d37259d49 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -93,6 +93,9 @@ static char group_path[PATH_MAX]; static char *task_group_path(struct task_group *tg) { + if (autogroup_path(tg, group_path, PATH_MAX)) + return group_path; + /* * May be NULL if the underlying cgroup isn't fully-created yet */ -- cgit v1.2.3-70-g09d2 From f44937718ce3b8360f72f6c68c9481712517a867 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Thu, 13 Jan 2011 04:54:50 +0100 Subject: sched, autogroup: Fix CONFIG_RT_GROUP_SCHED sched_setscheduler() failure If CONFIG_RT_GROUP_SCHED is set, __sched_setscheduler() fails due to autogroup not allocating rt_runtime. Free unused/unusable rt_se and rt_rq, redirect RT tasks to the root task group, and tell __sched_setscheduler() that it's ok. Reported-and-tested-by: Bharata B Rao Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1294890890.8089.39.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- kernel/sched.c | 3 ++- kernel/sched_autogroup.c | 27 +++++++++++++++++++++++++++ kernel/sched_autogroup.h | 4 ++++ 3 files changed, 33 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index a0eb0941fa8..6cbff6bd1a6 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4871,7 +4871,8 @@ recheck: * assigned. */ if (rt_bandwidth_enabled() && rt_policy(policy) && - task_group(p)->rt_bandwidth.rt_runtime == 0) { + task_group(p)->rt_bandwidth.rt_runtime == 0 && + !task_group_is_autogroup(task_group(p))) { __task_rq_unlock(rq); raw_spin_unlock_irqrestore(&p->pi_lock, flags); return -EPERM; diff --git a/kernel/sched_autogroup.c b/kernel/sched_autogroup.c index 938d52f80a2..9fb65628315 100644 --- a/kernel/sched_autogroup.c +++ b/kernel/sched_autogroup.c @@ -27,6 +27,11 @@ static inline void autogroup_destroy(struct kref *kref) { struct autogroup *ag = container_of(kref, struct autogroup, kref); +#ifdef CONFIG_RT_GROUP_SCHED + /* We've redirected RT tasks to the root task group... */ + ag->tg->rt_se = NULL; + ag->tg->rt_rq = NULL; +#endif sched_destroy_group(ag->tg); } @@ -55,6 +60,10 @@ static inline struct autogroup *autogroup_task_get(struct task_struct *p) return ag; } +#ifdef CONFIG_RT_GROUP_SCHED +static void free_rt_sched_group(struct task_group *tg); +#endif + static inline struct autogroup *autogroup_create(void) { struct autogroup *ag = kzalloc(sizeof(*ag), GFP_KERNEL); @@ -72,6 +81,19 @@ static inline struct autogroup *autogroup_create(void) init_rwsem(&ag->lock); ag->id = atomic_inc_return(&autogroup_seq_nr); ag->tg = tg; +#ifdef CONFIG_RT_GROUP_SCHED + /* + * Autogroup RT tasks are redirected to the root task group + * so we don't have to move tasks around upon policy change, + * or flail around trying to allocate bandwidth on the fly. + * A bandwidth exception in __sched_setscheduler() allows + * the policy change to proceed. Thereafter, task_group() + * returns &root_task_group, so zero bandwidth is required. + */ + free_rt_sched_group(tg); + tg->rt_se = root_task_group.rt_se; + tg->rt_rq = root_task_group.rt_rq; +#endif tg->autogroup = ag; return ag; @@ -106,6 +128,11 @@ task_wants_autogroup(struct task_struct *p, struct task_group *tg) return true; } +static inline bool task_group_is_autogroup(struct task_group *tg) +{ + return tg != &root_task_group && tg->autogroup; +} + static inline struct task_group * autogroup_task_group(struct task_struct *p, struct task_group *tg) { diff --git a/kernel/sched_autogroup.h b/kernel/sched_autogroup.h index 5358e241cb2..7b859ffe5da 100644 --- a/kernel/sched_autogroup.h +++ b/kernel/sched_autogroup.h @@ -15,6 +15,10 @@ autogroup_task_group(struct task_struct *p, struct task_group *tg); static inline void autogroup_init(struct task_struct *init_task) { } static inline void autogroup_free(struct task_group *tg) { } +static inline bool task_group_is_autogroup(struct task_group *tg) +{ + return 0; +} static inline struct task_group * autogroup_task_group(struct task_struct *p, struct task_group *tg) -- cgit v1.2.3-70-g09d2 From fce2097983d914ea8c2338efc6f6e9bb737f6ae4 Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Fri, 14 Jan 2011 15:57:39 +0800 Subject: sched: Replace rq->bkl_count with rq->rq_sched_info.bkl_count Now rq->rq_sched_info.bkl_count is not used for rq, scroll rq->bkl_count into it. Thus we can save some space for rq. Signed-off-by: Yong Zhang Signed-off-by: Peter Zijlstra LKML-Reference: <1294991859-13246-1-git-send-email-yong.zhang0@gmail.com> Signed-off-by: Ingo Molnar --- kernel/sched.c | 5 +---- kernel/sched_debug.c | 4 +++- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 6cbff6bd1a6..0a169a85eb3 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -553,9 +553,6 @@ struct rq { /* try_to_wake_up() stats */ unsigned int ttwu_count; unsigned int ttwu_local; - - /* BKL stats */ - unsigned int bkl_count; #endif }; @@ -3887,7 +3884,7 @@ static inline void schedule_debug(struct task_struct *prev) schedstat_inc(this_rq(), sched_count); #ifdef CONFIG_SCHEDSTATS if (unlikely(prev->lock_depth >= 0)) { - schedstat_inc(this_rq(), bkl_count); + schedstat_inc(this_rq(), rq_sched_info.bkl_count); schedstat_inc(prev, sched_info.bkl_count); } #endif diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index e4d37259d49..eb6cb8edd07 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c @@ -296,9 +296,11 @@ static void print_cpu(struct seq_file *m, int cpu) P(ttwu_count); P(ttwu_local); - P(bkl_count); + SEQ_printf(m, " .%-30s: %d\n", "bkl_count", + rq->rq_sched_info.bkl_count); #undef P +#undef P64 #endif spin_lock_irqsave(&sched_debug_lock, flags); print_cfs_stats(m, cpu); -- cgit v1.2.3-70-g09d2 From d7d8294415f0ce4254827d4a2a5ee88b00be52a8 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Wed, 5 Jan 2011 05:41:17 +0100 Subject: sched: Fix signed unsigned comparison in check_preempt_tick() Signed unsigned comparison may lead to superfluous resched if leftmost is right of the current task, wasting a few cycles, and inadvertently _lengthening_ the current task's slice. Reported-by: Venkatesh Pallipadi Signed-off-by: Mike Galbraith Signed-off-by: Peter Zijlstra LKML-Reference: <1294202477.9384.5.camel@marge.simson.net> Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 414145cf534..77e9166d7bb 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1062,6 +1062,9 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) struct sched_entity *se = __pick_next_entity(cfs_rq); s64 delta = curr->vruntime - se->vruntime; + if (delta < 0) + return; + if (delta > ideal_runtime) resched_task(rq_of(cfs_rq)->curr); } -- cgit v1.2.3-70-g09d2 From 068c5cc5ac7414a8e9eb7856b4bf3cc4d4744267 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Jan 2011 12:26:11 +0100 Subject: sched, cgroup: Use exit hook to avoid use-after-free crash By not notifying the controller of the on-exit move back to init_css_set, we fail to move the task out of the previous cgroup's cfs_rq. This leads to an opportunity for a cgroup-destroy to come in and free the cgroup (there are no active tasks left in it after all) to which the not-quite dead task is still enqueued. Reported-by: Miklos Vajna Fixed-by: Mike Galbraith Signed-off-by: Peter Zijlstra Cc: Cc: Mike Galbraith Signed-off-by: Ingo Molnar LKML-Reference: <1293206353.29444.205.camel@laptop> --- kernel/sched.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 0a169a85eb3..fa5272a0932 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -606,6 +606,9 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; struct cgroup_subsys_state *css; + if (p->flags & PF_EXITING) + return &root_task_group; + css = task_subsys_state_check(p, cpu_cgroup_subsys_id, lockdep_is_held(&task_rq(p)->lock)); tg = container_of(css, struct task_group, css); @@ -8880,6 +8883,20 @@ cpu_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, } } +static void +cpu_cgroup_exit(struct cgroup_subsys *ss, struct task_struct *task) +{ + /* + * cgroup_exit() is called in the copy_process() failure path. + * Ignore this case since the task hasn't ran yet, this avoids + * trying to poke a half freed task state from generic code. + */ + if (!(task->flags & PF_EXITING)) + return; + + sched_move_task(task); +} + #ifdef CONFIG_FAIR_GROUP_SCHED static int cpu_shares_write_u64(struct cgroup *cgrp, struct cftype *cftype, u64 shareval) @@ -8952,6 +8969,7 @@ struct cgroup_subsys cpu_cgroup_subsys = { .destroy = cpu_cgroup_destroy, .can_attach = cpu_cgroup_can_attach, .attach = cpu_cgroup_attach, + .exit = cpu_cgroup_exit, .populate = cpu_cgroup_populate, .subsys_id = cpu_cgroup_subsys_id, .early_init = 1, -- cgit v1.2.3-70-g09d2 From dbe08d82ce3967ccdf459f7951d02589cf967300 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 19 Jan 2011 19:22:07 +0100 Subject: perf: Fix find_get_context() vs perf_event_exit_task() race find_get_context() must not install the new perf_event_context if the task has already passed perf_event_exit_task(). If nothing else, this means the memory leak. Initially ctx->refcount == 2, it is supposed that perf_event_exit_task_context() should participate and do the necessary put_ctx(). find_lively_task_by_vpid() checks PF_EXITING but this buys nothing, by the time we call find_get_context() this task can be already dead. To the point, cmpxchg() can succeed when the task has already done the last schedule(). Change find_get_context() to populate task->perf_event_ctxp[] under task->perf_event_mutex, this way we can trust PF_EXITING because perf_event_exit_task() takes the same mutex. Also, change perf_event_exit_task_context() to use rcu_dereference(). Probably this is not strictly needed, but with or without this change find_get_context() can race with setup_new_exec()->perf_event_exit_task(), rcu_dereference() looks better. Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Prasad Cc: Roland McGrath LKML-Reference: <20110119182207.GB12183@redhat.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 84522c79698..4ec55ef5810 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -2201,13 +2201,6 @@ find_lively_task_by_vpid(pid_t vpid) if (!task) return ERR_PTR(-ESRCH); - /* - * Can't attach events to a dying task. - */ - err = -ESRCH; - if (task->flags & PF_EXITING) - goto errout; - /* Reuse ptrace permission checks for now. */ err = -EACCES; if (!ptrace_may_access(task, PTRACE_MODE_READ)) @@ -2268,14 +2261,27 @@ retry: get_ctx(ctx); - if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) { - /* - * We raced with some other task; use - * the context they set. - */ + err = 0; + mutex_lock(&task->perf_event_mutex); + /* + * If it has already passed perf_event_exit_task(). + * we must see PF_EXITING, it takes this mutex too. + */ + if (task->flags & PF_EXITING) + err = -ESRCH; + else if (task->perf_event_ctxp[ctxn]) + err = -EAGAIN; + else + rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); + mutex_unlock(&task->perf_event_mutex); + + if (unlikely(err)) { put_task_struct(task); kfree(ctx); - goto retry; + + if (err == -EAGAIN) + goto retry; + goto errout; } } @@ -6127,7 +6133,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) * scheduled, so we are now safe from rescheduling changing * our context. */ - child_ctx = child->perf_event_ctxp[ctxn]; + child_ctx = rcu_dereference(child->perf_event_ctxp[ctxn]); task_ctx_sched_out(child_ctx, EVENT_ALL); /* -- cgit v1.2.3-70-g09d2 From 8550d7cb6ed6c89add49c3b6ad4c753ab8a3d7f9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Wed, 19 Jan 2011 19:22:28 +0100 Subject: perf: Fix perf_event_init_task()/perf_event_free_task() interaction perf_event_init_task() should clear child->perf_event_ctxp[] before anything else. Otherwise, if perf_event_init_context(perf_hw_context) fails, perf_event_free_task() can free perf_event_ctxp[perf_sw_context] copied from parent->perf_event_ctxp[] by dup_task_struct(). Also move the initialization of perf_event_mutex and perf_event_list from perf_event_init_context() to perf_event_init_context(). Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Cc: Alan Stern Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Prasad Cc: Roland McGrath LKML-Reference: <20110119182228.GC12183@redhat.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 4ec55ef5810..244ca3acb0e 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -6446,11 +6446,6 @@ int perf_event_init_context(struct task_struct *child, int ctxn) unsigned long flags; int ret = 0; - child->perf_event_ctxp[ctxn] = NULL; - - mutex_init(&child->perf_event_mutex); - INIT_LIST_HEAD(&child->perf_event_list); - if (likely(!parent->perf_event_ctxp[ctxn])) return 0; @@ -6539,6 +6534,10 @@ int perf_event_init_task(struct task_struct *child) { int ctxn, ret; + memset(child->perf_event_ctxp, 0, sizeof(child->perf_event_ctxp)); + mutex_init(&child->perf_event_mutex); + INIT_LIST_HEAD(&child->perf_event_list); + for_each_task_context_nr(ctxn) { ret = perf_event_init_context(child, ctxn); if (ret) -- cgit v1.2.3-70-g09d2 From 2d0640b47da74cff7c11642c798d40de861ed524 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Tue, 18 Jan 2011 22:46:34 -0800 Subject: hrtimers: Notify hrtimer users of switches to NOHZ mode When NOHZ=y and high res timers are disabled (via cmdline or Kconfig) tick_nohz_switch_to_nohz() will notify the user about switching into NOHZ mode. Nothing is printed for the case where HIGH_RES_TIMERS=y. Fix this for the HIGH_RES_TIMERS=y case by duplicating the printk from the low res NOHZ path in the high res NOHZ path. This confused me since I was thinking 'dmesg | grep -i NOHZ' would tell me if NOHZ was enabled, but if I have hrtimers there is nothing. Signed-off-by: Stephen Boyd Acked-by: Thomas Gleixner Cc: Peter Zijlstra LKML-Reference: <1295419594-13085-1-git-send-email-sboyd@codeaurora.org> Signed-off-by: Ingo Molnar --- kernel/time/tick-sched.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 3e216e01bbd..c55ea243347 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -642,8 +642,7 @@ static void tick_nohz_switch_to_nohz(void) } local_irq_enable(); - printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", - smp_processor_id()); + printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); } /* @@ -795,8 +794,10 @@ void tick_setup_sched_timer(void) } #ifdef CONFIG_NO_HZ - if (tick_nohz_enabled) + if (tick_nohz_enabled) { ts->nohz_mode = NOHZ_MODE_HIGHRES; + printk(KERN_INFO "Switched to NOHz mode on CPU #%d\n", smp_processor_id()); + } #endif } #endif /* HIGH_RES_TIMERS */ -- cgit v1.2.3-70-g09d2 From 2ce802f62ba32a7d95748ac92bf351f76affb6ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Jan 2011 12:06:35 +0100 Subject: lockdep: Move early boot local IRQ enable/disable status to init/main.c During early boot, local IRQ is disabled until IRQ subsystem is properly initialized. During this time, no one should enable local IRQ and some operations which usually are not allowed with IRQ disabled, e.g. operations which might sleep or require communications with other processors, are allowed. lockdep tracked this with early_boot_irqs_off/on() callbacks. As other subsystems need this information too, move it to init/main.c and make it generally available. While at it, toggle the boolean to early_boot_irqs_disabled instead of enabled so that it can be initialized with %false and %true indicates the exceptional condition. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Pekka Enberg Cc: Linus Torvalds LKML-Reference: <20110120110635.GB6036@htj.dyndns.org> Signed-off-by: Ingo Molnar --- arch/x86/xen/enlighten.c | 2 +- include/linux/kernel.h | 2 ++ include/linux/lockdep.h | 8 -------- init/main.c | 13 +++++++++++-- kernel/lockdep.c | 18 +----------------- kernel/trace/trace_irqsoff.c | 8 -------- 6 files changed, 15 insertions(+), 36 deletions(-) (limited to 'kernel') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 7e8d3bc80af..50542efe45f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1194,7 +1194,7 @@ asmlinkage void __init xen_start_kernel(void) per_cpu(xen_vcpu, 0) = &HYPERVISOR_shared_info->vcpu_info[0]; local_irq_disable(); - early_boot_irqs_off(); + early_boot_irqs_disabled = true; memblock_init(); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5a9d9059520..d07d8057e44 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -243,6 +243,8 @@ extern int test_taint(unsigned flag); extern unsigned long get_taint(void); extern int root_mountflags; +extern bool early_boot_irqs_disabled; + /* Values used for system_state */ extern enum system_states { SYSTEM_BOOTING, diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 71c09b26c75..f638fd78d10 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -436,16 +436,8 @@ do { \ #endif /* CONFIG_LOCKDEP */ #ifdef CONFIG_TRACE_IRQFLAGS -extern void early_boot_irqs_off(void); -extern void early_boot_irqs_on(void); extern void print_irqtrace_events(struct task_struct *curr); #else -static inline void early_boot_irqs_off(void) -{ -} -static inline void early_boot_irqs_on(void) -{ -} static inline void print_irqtrace_events(struct task_struct *curr) { } diff --git a/init/main.c b/init/main.c index 00799c1d462..33c37c379e9 100644 --- a/init/main.c +++ b/init/main.c @@ -96,6 +96,15 @@ static inline void mark_rodata_ro(void) { } extern void tc_init(void); #endif +/* + * Debug helper: via this flag we know that we are in 'early bootup code' + * where only the boot processor is running with IRQ disabled. This means + * two things - IRQ must not be enabled before the flag is cleared and some + * operations which are not allowed with IRQ disabled are allowed while the + * flag is set. + */ +bool early_boot_irqs_disabled __read_mostly; + enum system_states system_state __read_mostly; EXPORT_SYMBOL(system_state); @@ -554,7 +563,7 @@ asmlinkage void __init start_kernel(void) cgroup_init_early(); local_irq_disable(); - early_boot_irqs_off(); + early_boot_irqs_disabled = true; /* * Interrupts are still disabled. Do necessary setups, then @@ -621,7 +630,7 @@ asmlinkage void __init start_kernel(void) if (!irqs_disabled()) printk(KERN_CRIT "start_kernel(): bug: interrupts were " "enabled early\n"); - early_boot_irqs_on(); + early_boot_irqs_disabled = false; local_irq_enable(); /* Interrupts are enabled now so all GFP allocations are safe. */ diff --git a/kernel/lockdep.c b/kernel/lockdep.c index 42ba65dff7d..0d2058da80f 100644 --- a/kernel/lockdep.c +++ b/kernel/lockdep.c @@ -2291,22 +2291,6 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) return 1; } -/* - * Debugging helper: via this flag we know that we are in - * 'early bootup code', and will warn about any invalid irqs-on event: - */ -static int early_boot_irqs_enabled; - -void early_boot_irqs_off(void) -{ - early_boot_irqs_enabled = 0; -} - -void early_boot_irqs_on(void) -{ - early_boot_irqs_enabled = 1; -} - /* * Hardirqs will be enabled: */ @@ -2319,7 +2303,7 @@ void trace_hardirqs_on_caller(unsigned long ip) if (unlikely(!debug_locks || current->lockdep_recursion)) return; - if (DEBUG_LOCKS_WARN_ON(unlikely(!early_boot_irqs_enabled))) + if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) return; if (unlikely(curr->hardirqs_enabled)) { diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c index 5cf8c602b88..92b6e1e12d9 100644 --- a/kernel/trace/trace_irqsoff.c +++ b/kernel/trace/trace_irqsoff.c @@ -453,14 +453,6 @@ void time_hardirqs_off(unsigned long a0, unsigned long a1) * Stubs: */ -void early_boot_irqs_off(void) -{ -} - -void early_boot_irqs_on(void) -{ -} - void trace_softirqs_on(unsigned long ip) { } -- cgit v1.2.3-70-g09d2 From bd924e8cbd4b73ffb7d707a774c04f7e2cae88ed Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Jan 2011 12:07:13 +0100 Subject: smp: Allow on_each_cpu() to be called while early_boot_irqs_disabled status to init/main.c percpu may end up calling vfree() during early boot which in turn may call on_each_cpu() for TLB flushes. The function of on_each_cpu() can be done safely while IRQ is disabled during early boot but it assumed that the function is always called with local IRQ enabled which ended up enabling local IRQ prematurely during boot and triggering a couple of warnings. This patch updates on_each_cpu() and smp_call_function_many() such on_each_cpu() can be used safely while early_boot_irqs_disabled is set. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Acked-by: Pekka Enberg Cc: Linus Torvalds LKML-Reference: <20110120110713.GC6036@htj.dyndns.org> Signed-off-by: Ingo Molnar Reported-by: Ingo Molnar --- kernel/smp.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 4ec30e06998..4b83cd6815e 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -430,7 +430,7 @@ void smp_call_function_many(const struct cpumask *mask, * can't happen. */ WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() - && !oops_in_progress); + && !oops_in_progress && !early_boot_irqs_disabled); /* So, what's a CPU they want? Ignoring this one. */ cpu = cpumask_first_and(mask, cpu_online_mask); @@ -533,17 +533,20 @@ void ipi_call_unlock_irq(void) #endif /* USE_GENERIC_SMP_HELPERS */ /* - * Call a function on all processors + * Call a function on all processors. May be used during early boot while + * early_boot_irqs_disabled is set. Use local_irq_save/restore() instead + * of local_irq_disable/enable(). */ int on_each_cpu(void (*func) (void *info), void *info, int wait) { + unsigned long flags; int ret = 0; preempt_disable(); ret = smp_call_function(func, info, wait); - local_irq_disable(); + local_irq_save(flags); func(info); - local_irq_enable(); + local_irq_restore(flags); preempt_enable(); return ret; } -- cgit v1.2.3-70-g09d2 From 6dc19899958e420a931274b94019e267e2396d3e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 20 Jan 2011 14:44:33 -0800 Subject: kernel/smp.c: fix smp_call_function_many() SMP race I noticed a failure where we hit the following WARN_ON in generic_smp_call_function_interrupt: if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) continue; data->csd.func(data->csd.info); refs = atomic_dec_return(&data->refs); WARN_ON(refs < 0); <------------------------- We atomically tested and cleared our bit in the cpumask, and yet the number of cpus left (ie refs) was 0. How can this be? It turns out commit 54fdade1c3332391948ec43530c02c4794a38172 ("generic-ipi: make struct call_function_data lockless") is at fault. It removes locking from smp_call_function_many and in doing so creates a rather complicated race. The problem comes about because: - The smp_call_function_many interrupt handler walks call_function.queue without any locking. - We reuse a percpu data structure in smp_call_function_many. - We do not wait for any RCU grace period before starting the next smp_call_function_many. Imagine a scenario where CPU A does two smp_call_functions back to back, and CPU B does an smp_call_function in between. We concentrate on how CPU C handles the calls: CPU A CPU B CPU C CPU D smp_call_function smp_call_function_interrupt walks call_function.queue sees data from CPU A on list smp_call_function smp_call_function_interrupt walks call_function.queue sees (stale) CPU A on list smp_call_function int clears last ref on A list_del_rcu, unlock smp_call_function reuses percpu *data A data->cpumask sees and clears bit in cpumask might be using old or new fn! decrements refs below 0 set data->refs (too late!) The important thing to note is since the interrupt handler walks a potentially stale call_function.queue without any locking, then another cpu can view the percpu *data structure at any time, even when the owner is in the process of initialising it. The following test case hits the WARN_ON 100% of the time on my PowerPC box (having 128 threads does help :) #include #include #define ITERATIONS 100 static void do_nothing_ipi(void *dummy) { } static void do_ipis(struct work_struct *dummy) { int i; for (i = 0; i < ITERATIONS; i++) smp_call_function(do_nothing_ipi, NULL, 1); printk(KERN_DEBUG "cpu %d finished\n", smp_processor_id()); } static struct work_struct work[NR_CPUS]; static int __init testcase_init(void) { int cpu; for_each_online_cpu(cpu) { INIT_WORK(&work[cpu], do_ipis); schedule_work_on(cpu, &work[cpu]); } return 0; } static void __exit testcase_exit(void) { } module_init(testcase_init) module_exit(testcase_exit) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Anton Blanchard"); I tried to fix it by ordering the read and the write of ->cpumask and ->refs. In doing so I missed a critical case but Paul McKenney was able to spot my bug thankfully :) To ensure we arent viewing previous iterations the interrupt handler needs to read ->refs then ->cpumask then ->refs _again_. Thanks to Milton Miller and Paul McKenney for helping to debug this issue. [miltonm@bga.com: add WARN_ON and BUG_ON, remove extra read of refs before initial read of mask that doesn't help (also noted by Peter Zijlstra), adjust comments, hopefully clarify scenario ] [miltonm@bga.com: remove excess tests] Signed-off-by: Anton Blanchard Signed-off-by: Milton Miller Cc: Ingo Molnar Cc: "Paul E. McKenney" Cc: [2.6.32+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/smp.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 4ec30e06998..17c6e586023 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -195,6 +195,24 @@ void generic_smp_call_function_interrupt(void) list_for_each_entry_rcu(data, &call_function.queue, csd.list) { int refs; + /* + * Since we walk the list without any locks, we might + * see an entry that was completed, removed from the + * list and is in the process of being reused. + * + * We must check that the cpu is in the cpumask before + * checking the refs, and both must be set before + * executing the callback on this cpu. + */ + + if (!cpumask_test_cpu(cpu, data->cpumask)) + continue; + + smp_rmb(); + + if (atomic_read(&data->refs) == 0) + continue; + if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) continue; @@ -203,6 +221,8 @@ void generic_smp_call_function_interrupt(void) refs = atomic_dec_return(&data->refs); WARN_ON(refs < 0); if (!refs) { + WARN_ON(!cpumask_empty(data->cpumask)); + raw_spin_lock(&call_function.lock); list_del_rcu(&data->csd.list); raw_spin_unlock(&call_function.lock); @@ -454,11 +474,21 @@ void smp_call_function_many(const struct cpumask *mask, data = &__get_cpu_var(cfd_data); csd_lock(&data->csd); + BUG_ON(atomic_read(&data->refs) || !cpumask_empty(data->cpumask)); data->csd.func = func; data->csd.info = info; cpumask_and(data->cpumask, mask, cpu_online_mask); cpumask_clear_cpu(this_cpu, data->cpumask); + + /* + * To ensure the interrupt handler gets an complete view + * we order the cpumask and refs writes and order the read + * of them in the interrupt handler. In addition we may + * only clear our own cpu bit from the mask. + */ + smp_wmb(); + atomic_set(&data->refs, cpumask_weight(data->cpumask)); raw_spin_lock_irqsave(&call_function.lock, flags); -- cgit v1.2.3-70-g09d2 From 225c8e010f2d17a62aef131e24c6e7c111f36f9b Mon Sep 17 00:00:00 2001 From: Milton Miller Date: Thu, 20 Jan 2011 14:44:34 -0800 Subject: kernel/smp.c: consolidate writes in smp_call_function_interrupt() We have to test the cpu mask in the interrupt handler before checking the refs, otherwise we can start to follow an entry before its deleted and find it partially initailzed for the next trip. Presently we also clear the cpumask bit before executing the called function, which implies getting write access to the line. After the function is called we then decrement refs, and if they go to zero we then unlock the structure. However, this implies getting write access to the call function data before and after another the function is called. If we can assert that no smp_call_function execution function is allowed to enable interrupts, then we can move both writes to after the function is called, hopfully allowing both writes with one cache line bounce. On a 256 thread system with a kernel compiled for 1024 threads, the time to execute testcase in the "smp_call_function_many race" changelog was reduced by about 30-40ms out of about 545 ms. I decided to keep this as WARN because its now a buggy function, even though the stack trace is of no value -- a simple printk would give us the information needed. Raw data: Without patch: ipi_test startup took 1219366ns complete 539819014ns total 541038380ns ipi_test startup took 1695754ns complete 543439872ns total 545135626ns ipi_test startup took 7513568ns complete 539606362ns total 547119930ns ipi_test startup took 13304064ns complete 533898562ns total 547202626ns ipi_test startup took 8668192ns complete 544264074ns total 552932266ns ipi_test startup took 4977626ns complete 548862684ns total 553840310ns ipi_test startup took 2144486ns complete 541292318ns total 543436804ns ipi_test startup took 21245824ns complete 530280180ns total 551526004ns With patch: ipi_test startup took 5961748ns complete 500859628ns total 506821376ns ipi_test startup took 8975996ns complete 495098924ns total 504074920ns ipi_test startup took 19797750ns complete 492204740ns total 512002490ns ipi_test startup took 14824796ns complete 487495878ns total 502320674ns ipi_test startup took 11514882ns complete 494439372ns total 505954254ns ipi_test startup took 8288084ns complete 502570774ns total 510858858ns ipi_test startup took 6789954ns complete 493388112ns total 500178066ns #include #include #include /* sched clock */ #define ITERATIONS 100 static void do_nothing_ipi(void *dummy) { } static void do_ipis(struct work_struct *dummy) { int i; for (i = 0; i < ITERATIONS; i++) smp_call_function(do_nothing_ipi, NULL, 1); printk(KERN_DEBUG "cpu %d finished\n", smp_processor_id()); } static struct work_struct work[NR_CPUS]; static int __init testcase_init(void) { int cpu; u64 start, started, done; start = local_clock(); for_each_online_cpu(cpu) { INIT_WORK(&work[cpu], do_ipis); schedule_work_on(cpu, &work[cpu]); } started = local_clock(); for_each_online_cpu(cpu) flush_work(&work[cpu]); done = local_clock(); pr_info("ipi_test startup took %lldns complete %lldns total %lldns\n", started-start, done-started, done-start); return 0; } static void __exit testcase_exit(void) { } module_init(testcase_init) module_exit(testcase_exit) MODULE_LICENSE("GPL"); MODULE_AUTHOR("Anton Blanchard"); Signed-off-by: Milton Miller Cc: Anton Blanchard Cc: Ingo Molnar Cc: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/smp.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/smp.c b/kernel/smp.c index 17c6e586023..2fe66f7c617 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -194,6 +194,7 @@ void generic_smp_call_function_interrupt(void) */ list_for_each_entry_rcu(data, &call_function.queue, csd.list) { int refs; + void (*func) (void *info); /* * Since we walk the list without any locks, we might @@ -213,24 +214,32 @@ void generic_smp_call_function_interrupt(void) if (atomic_read(&data->refs) == 0) continue; - if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) - continue; - + func = data->csd.func; /* for later warn */ data->csd.func(data->csd.info); + /* + * If the cpu mask is not still set then it enabled interrupts, + * we took another smp interrupt, and executed the function + * twice on this cpu. In theory that copy decremented refs. + */ + if (!cpumask_test_and_clear_cpu(cpu, data->cpumask)) { + WARN(1, "%pS enabled interrupts and double executed\n", + func); + continue; + } + refs = atomic_dec_return(&data->refs); WARN_ON(refs < 0); - if (!refs) { - WARN_ON(!cpumask_empty(data->cpumask)); - - raw_spin_lock(&call_function.lock); - list_del_rcu(&data->csd.list); - raw_spin_unlock(&call_function.lock); - } if (refs) continue; + WARN_ON(!cpumask_empty(data->cpumask)); + + raw_spin_lock(&call_function.lock); + list_del_rcu(&data->csd.list); + raw_spin_unlock(&call_function.lock); + csd_unlock(&data->csd); } -- cgit v1.2.3-70-g09d2 From 1c77ff22f539ceaa64ea43d6a26d867e84602cb7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 19 Jan 2011 19:41:35 +0100 Subject: genirq: Remove __do_IRQ All architectures are finally converted. Remove the cruft. Signed-off-by: Thomas Gleixner Cc: Richard Henderson Cc: Mike Frysinger Cc: David Howells Cc: Tony Luck Cc: Greg Ungerer Cc: Michal Simek Acked-by: David Howells Cc: Kyle McMartin Acked-by: Benjamin Herrenschmidt Cc: Chen Liqin Cc: "David S. Miller" Cc: Chris Metcalf Cc: Jeff Dike --- Documentation/feature-removal-schedule.txt | 8 --- arch/alpha/Kconfig | 3 - arch/blackfin/Kconfig | 3 - arch/frv/Kconfig | 4 -- arch/ia64/Kconfig | 3 - arch/m68knommu/Kconfig | 4 -- arch/microblaze/Kconfig | 3 - arch/mips/Kconfig | 3 - arch/mn10300/Kconfig | 3 - arch/parisc/Kconfig | 4 -- arch/powerpc/Kconfig | 4 -- arch/score/Kconfig | 3 - arch/sparc/Kconfig | 4 -- arch/tile/Kconfig | 3 - arch/um/Kconfig.um | 3 - include/linux/irqdesc.h | 14 ---- kernel/irq/Kconfig | 3 - kernel/irq/handle.c | 111 ----------------------------- 18 files changed, 183 deletions(-) (limited to 'kernel') diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index 8c594c45b6a..b959659c5df 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -357,14 +357,6 @@ Who: Dave Jones , Matthew Garrett ----------------------------- -What: __do_IRQ all in one fits nothing interrupt handler -When: 2.6.32 -Why: __do_IRQ was kept for easy migration to the type flow handlers. - More than two years of migration time is enough. -Who: Thomas Gleixner - ------------------------------ - What: fakephp and associated sysfs files in /sys/bus/pci/slots/ When: 2011 Why: In 2.6.27, the semantics of /sys/bus/pci/slots was redefined to diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index fc95ee1bcf6..943fe6930f7 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -68,9 +68,6 @@ config GENERIC_IOMAP bool default n -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config GENERIC_HARDIRQS bool default y diff --git a/arch/blackfin/Kconfig b/arch/blackfin/Kconfig index 0a221d48152..a37b2be23f1 100644 --- a/arch/blackfin/Kconfig +++ b/arch/blackfin/Kconfig @@ -50,9 +50,6 @@ config GENERIC_HARDIRQS config GENERIC_IRQ_PROBE def_bool y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config GENERIC_GPIO def_bool y diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index f6bcb039cd6..e504edeb3d8 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -33,10 +33,6 @@ config GENERIC_HARDIRQS bool default y -config GENERIC_HARDIRQS_NO__DO_IRQ - bool - default y - config TIME_LOW_RES bool default y diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index e0f5b6d7f84..be1faf99181 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -684,9 +684,6 @@ source "lib/Kconfig" config GENERIC_HARDIRQS def_bool y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config GENERIC_IRQ_PROBE bool default y diff --git a/arch/m68knommu/Kconfig b/arch/m68knommu/Kconfig index 704e7b92334..7379cb0ce1a 100644 --- a/arch/m68knommu/Kconfig +++ b/arch/m68knommu/Kconfig @@ -52,10 +52,6 @@ config GENERIC_HARDIRQS bool default y -config GENERIC_HARDIRQS_NO__DO_IRQ - bool - default y - config GENERIC_CALIBRATE_DELAY bool default y diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 5f5018a71a3..5a637849379 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -52,9 +52,6 @@ config GENERIC_TIME_VSYSCALL config GENERIC_CLOCKEVENTS def_bool y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config GENERIC_GPIO def_bool y diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 548e6cc3bc2..f5ecc0566bc 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -793,9 +793,6 @@ config SCHED_OMIT_FRAME_POINTER bool default y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - # # Select some configuration options automatically based on user selections. # diff --git a/arch/mn10300/Kconfig b/arch/mn10300/Kconfig index 8ed41cf2b08..4638269152f 100644 --- a/arch/mn10300/Kconfig +++ b/arch/mn10300/Kconfig @@ -34,9 +34,6 @@ config RWSEM_GENERIC_SPINLOCK config RWSEM_XCHGADD_ALGORITHM bool -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config GENERIC_CALIBRATE_DELAY def_bool y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 0888675c98d..4b94ac4dc60 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -12,7 +12,6 @@ config PARISC select HAVE_IRQ_WORK select HAVE_PERF_EVENTS select GENERIC_ATOMIC64 if !64BIT - select GENERIC_HARDIRQS_NO__DO_IRQ help The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, @@ -79,9 +78,6 @@ config IRQ_PER_CPU bool default y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - # unless you want to implement ACPI on PA-RISC ... ;-) config PM bool diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 959f38ccb9a..e0b185dd718 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -40,10 +40,6 @@ config GENERIC_HARDIRQS bool default y -config GENERIC_HARDIRQS_NO__DO_IRQ - bool - default y - config HAVE_SETUP_PER_CPU_AREA def_bool PPC64 diff --git a/arch/score/Kconfig b/arch/score/Kconfig index 4293fdcb539..4f159acfbe3 100644 --- a/arch/score/Kconfig +++ b/arch/score/Kconfig @@ -53,9 +53,6 @@ config GENERIC_CLOCKEVENTS config SCHED_NO_NO_OMIT_FRAME_POINTER def_bool y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config GENERIC_SYSCALL_TABLE def_bool y diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 45d9c87d083..989bb6415ea 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -107,10 +107,6 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK config NEED_PER_CPU_PAGE_FIRST_CHUNK def_bool y if SPARC64 -config GENERIC_HARDIRQS_NO__DO_IRQ - bool - def_bool y if SPARC64 - config MMU bool default y diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig index 4e8b82bca9e..c16b98c2435 100644 --- a/arch/tile/Kconfig +++ b/arch/tile/Kconfig @@ -10,9 +10,6 @@ config GENERIC_CSUM config GENERIC_HARDIRQS def_bool y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config GENERIC_IRQ_PROBE def_bool y diff --git a/arch/um/Kconfig.um b/arch/um/Kconfig.um index f8d1d0d47fe..90a438acbfa 100644 --- a/arch/um/Kconfig.um +++ b/arch/um/Kconfig.um @@ -120,9 +120,6 @@ config SMP If you don't know what to do, say N. -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - config NR_CPUS int "Maximum number of CPUs (2-32)" range 2 32 diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 6a64c6fa81a..c1a95b7b58d 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -100,13 +100,6 @@ static inline struct irq_desc *move_irq_desc(struct irq_desc *desc, int node) #define get_irq_desc_data(desc) ((desc)->irq_data.handler_data) #define get_irq_desc_msi(desc) ((desc)->irq_data.msi_desc) -/* - * Monolithic do_IRQ implementation. - */ -#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ -extern unsigned int __do_IRQ(unsigned int irq); -#endif - /* * Architectures call this to let the generic IRQ layer * handle an interrupt. If the descriptor is attached to an @@ -115,14 +108,7 @@ extern unsigned int __do_IRQ(unsigned int irq); */ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *desc) { -#ifdef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ desc->handle_irq(irq, desc); -#else - if (likely(desc->handle_irq)) - desc->handle_irq(irq, desc); - else - __do_IRQ(irq); -#endif } static inline void generic_handle_irq(unsigned int irq) diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 31d766bf5d2..8e42fec7686 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -9,9 +9,6 @@ menu "IRQ subsystem" config GENERIC_HARDIRQS def_bool y -config GENERIC_HARDIRQS_NO__DO_IRQ - def_bool y - # Select this to disable the deprecated stuff config GENERIC_HARDIRQS_NO_DEPRECATED def_bool n diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index e2347eb6330..3540a719012 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -118,114 +118,3 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) return retval; } - -#ifndef CONFIG_GENERIC_HARDIRQS_NO__DO_IRQ - -#ifdef CONFIG_ENABLE_WARN_DEPRECATED -# warning __do_IRQ is deprecated. Please convert to proper flow handlers -#endif - -/** - * __do_IRQ - original all in one highlevel IRQ handler - * @irq: the interrupt number - * - * __do_IRQ handles all normal device IRQ's (the special - * SMP cross-CPU interrupts have their own specific - * handlers). - * - * This is the original x86 implementation which is used for every - * interrupt type. - */ -unsigned int __do_IRQ(unsigned int irq) -{ - struct irq_desc *desc = irq_to_desc(irq); - struct irqaction *action; - unsigned int status; - - kstat_incr_irqs_this_cpu(irq, desc); - - if (CHECK_IRQ_PER_CPU(desc->status)) { - irqreturn_t action_ret; - - /* - * No locking required for CPU-local interrupts: - */ - if (desc->irq_data.chip->ack) - desc->irq_data.chip->ack(irq); - if (likely(!(desc->status & IRQ_DISABLED))) { - action_ret = handle_IRQ_event(irq, desc->action); - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); - } - desc->irq_data.chip->end(irq); - return 1; - } - - raw_spin_lock(&desc->lock); - if (desc->irq_data.chip->ack) - desc->irq_data.chip->ack(irq); - /* - * REPLAY is when Linux resends an IRQ that was dropped earlier - * WAITING is used by probe to mark irqs that are being tested - */ - status = desc->status & ~(IRQ_REPLAY | IRQ_WAITING); - status |= IRQ_PENDING; /* we _want_ to handle it */ - - /* - * If the IRQ is disabled for whatever reason, we cannot - * use the action we have. - */ - action = NULL; - if (likely(!(status & (IRQ_DISABLED | IRQ_INPROGRESS)))) { - action = desc->action; - status &= ~IRQ_PENDING; /* we commit to handling */ - status |= IRQ_INPROGRESS; /* we are handling it */ - } - desc->status = status; - - /* - * If there is no IRQ handler or it was disabled, exit early. - * Since we set PENDING, if another processor is handling - * a different instance of this same irq, the other processor - * will take care of it. - */ - if (unlikely(!action)) - goto out; - - /* - * Edge triggered interrupts need to remember - * pending events. - * This applies to any hw interrupts that allow a second - * instance of the same irq to arrive while we are in do_IRQ - * or in the handler. But the code here only handles the _second_ - * instance of the irq, not the third or fourth. So it is mostly - * useful for irq hardware that does not mask cleanly in an - * SMP environment. - */ - for (;;) { - irqreturn_t action_ret; - - raw_spin_unlock(&desc->lock); - - action_ret = handle_IRQ_event(irq, action); - if (!noirqdebug) - note_interrupt(irq, desc, action_ret); - - raw_spin_lock(&desc->lock); - if (likely(!(desc->status & IRQ_PENDING))) - break; - desc->status &= ~IRQ_PENDING; - } - desc->status &= ~IRQ_INPROGRESS; - -out: - /* - * The ->end() handler has to deal with interrupts which got - * disabled while the handler was running. - */ - desc->irq_data.chip->end(irq); - raw_spin_unlock(&desc->lock); - - return 1; -} -#endif -- cgit v1.2.3-70-g09d2 From 547e9fd7d328af261f184bf66effc5033c886498 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 19 Jan 2011 12:51:39 +0100 Subject: perf: Annotate cpuctx->ctx.mutex to avoid a lockdep splat Lockdep spotted: loop_1b_instruc/1899 is trying to acquire lock: (event_mutex){+.+.+.}, at: [] perf_trace_init+0x3b/0x2f7 but task is already holding lock: (&ctx->mutex){+.+.+.}, at: [] perf_event_init_context+0xc0/0x218 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #3 (&ctx->mutex){+.+.+.}: -> #2 (cpu_hotplug.lock){+.+.+.}: -> #1 (module_mutex){+.+...}: -> #0 (event_mutex){+.+.+.}: But because the deadlock would be cpuhotplug (cpu-event) vs fork (task-event) it cannot, in fact, happen. We can annotate this by giving the perf_event_context used for the cpuctx a different lock class from those used by tasks. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 244ca3acb0e..c5fa717cf09 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -5380,6 +5380,8 @@ free_dev: goto out; } +static struct lock_class_key cpuctx_mutex; + int perf_pmu_register(struct pmu *pmu, char *name, int type) { int cpu, ret; @@ -5428,6 +5430,7 @@ skip_type: cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); __perf_event_init_context(&cpuctx->ctx); + lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex); cpuctx->ctx.type = cpu_context; cpuctx->ctx.pmu = pmu; cpuctx->jiffies_interval = 1; -- cgit v1.2.3-70-g09d2 From 806839b22cbda90176d7f8d421889bddd7826e93 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Jan 2011 18:45:47 +0100 Subject: perf: perf_event_exit_task_context: s/rcu_dereference/rcu_dereference_raw/ In theory, almost every user of task->child->perf_event_ctxp[] is wrong. find_get_context() can install the new context at any moment, we need read_barrier_depends(). dbe08d82ce3967ccdf459f7951d02589cf967300 "perf: Fix find_get_context() vs perf_event_exit_task() race" added rcu_dereference() into perf_event_exit_task_context() to make the precedent, but this makes __rcu_dereference_check() unhappy. Use rcu_dereference_raw() to shut up the warning. Reported-by: Ingo Molnar Signed-off-by: Oleg Nesterov Cc: acme@redhat.com Cc: paulus@samba.org Cc: stern@rowland.harvard.edu Cc: a.p.zijlstra@chello.nl Cc: fweisbec@gmail.com Cc: roland@redhat.com Cc: prasad@linux.vnet.ibm.com Cc: Paul E. McKenney LKML-Reference: <20110121174547.GA8796@redhat.com> Signed-off-by: Ingo Molnar --- kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/perf_event.c b/kernel/perf_event.c index c5fa717cf09..126a302c481 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -6136,7 +6136,7 @@ static void perf_event_exit_task_context(struct task_struct *child, int ctxn) * scheduled, so we are now safe from rescheduling changing * our context. */ - child_ctx = rcu_dereference(child->perf_event_ctxp[ctxn]); + child_ctx = rcu_dereference_raw(child->perf_event_ctxp[ctxn]); task_ctx_sched_out(child_ctx, EVENT_ALL); /* -- cgit v1.2.3-70-g09d2 From e94965ed5beb23c6fabf7ed31f625e66d7ff28de Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Wed, 15 Dec 2010 14:00:19 -0800 Subject: module: show version information for built-in modules in sysfs Currently only drivers that are built as modules have their versions shown in /sys/module//version, but this information might also be useful for built-in drivers as well. This especially important for drivers that do not define any parameters - such drivers, if built-in, are completely invisible from userspace. This patch changes MODULE_VERSION() macro so that in case when we are compiling built-in module, version information is stored in a separate section. Kernel then uses this data to create 'version' sysfs attribute in the same fashion it creates attributes for module parameters. Signed-off-by: Dmitry Torokhov Signed-off-by: Rusty Russell --- include/asm-generic/vmlinux.lds.h | 7 +++++ include/linux/module.h | 27 ++++++++++++++++ kernel/params.c | 65 ++++++++++++++++++++++++++++++++------- 3 files changed, 88 insertions(+), 11 deletions(-) (limited to 'kernel') diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 68649336c4a..6ebb81030d2 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -364,6 +364,13 @@ VMLINUX_SYMBOL(__start___param) = .; \ *(__param) \ VMLINUX_SYMBOL(__stop___param) = .; \ + } \ + \ + /* Built-in module versions. */ \ + __modver : AT(ADDR(__modver) - LOAD_OFFSET) { \ + VMLINUX_SYMBOL(__start___modver) = .; \ + *(__modver) \ + VMLINUX_SYMBOL(__stop___modver) = .; \ . = ALIGN((align)); \ VMLINUX_SYMBOL(__end_rodata) = .; \ } \ diff --git a/include/linux/module.h b/include/linux/module.h index 8b17fd8c790..50efcd3ae85 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -58,6 +58,12 @@ struct module_attribute { void (*free)(struct module *); }; +struct module_version_attribute { + struct module_attribute mattr; + const char *module_name; + const char *version; +}; + struct module_kobject { struct kobject kobj; @@ -161,7 +167,28 @@ extern struct module __this_module; Using this automatically adds a checksum of the .c files and the local headers in "srcversion". */ + +#ifdef MODULE #define MODULE_VERSION(_version) MODULE_INFO(version, _version) +#else +#define MODULE_VERSION(_version) \ + extern ssize_t __modver_version_show(struct module_attribute *, \ + struct module *, char *); \ + static struct module_version_attribute __modver_version_attr \ + __used \ + __attribute__ ((__section__ ("__modver"),aligned(sizeof(void *)))) \ + = { \ + .mattr = { \ + .attr = { \ + .name = "version", \ + .mode = S_IRUGO, \ + }, \ + .show = __modver_version_show, \ + }, \ + .module_name = KBUILD_MODNAME, \ + .version = _version, \ + } +#endif /* Optional firmware file (or files) needed by the module * format is simply firmware file name. Multiple firmware diff --git a/kernel/params.c b/kernel/params.c index 08107d18175..0da1411222b 100644 --- a/kernel/params.c +++ b/kernel/params.c @@ -719,9 +719,7 @@ void destroy_params(const struct kernel_param *params, unsigned num) params[i].ops->free(params[i].arg); } -static void __init kernel_add_sysfs_param(const char *name, - struct kernel_param *kparam, - unsigned int name_skip) +static struct module_kobject * __init locate_module_kobject(const char *name) { struct module_kobject *mk; struct kobject *kobj; @@ -729,10 +727,7 @@ static void __init kernel_add_sysfs_param(const char *name, kobj = kset_find_obj(module_kset, name); if (kobj) { - /* We already have one. Remove params so we can add more. */ mk = to_module_kobject(kobj); - /* We need to remove it before adding parameters. */ - sysfs_remove_group(&mk->kobj, &mk->mp->grp); } else { mk = kzalloc(sizeof(struct module_kobject), GFP_KERNEL); BUG_ON(!mk); @@ -743,15 +738,36 @@ static void __init kernel_add_sysfs_param(const char *name, "%s", name); if (err) { kobject_put(&mk->kobj); - printk(KERN_ERR "Module '%s' failed add to sysfs, " - "error number %d\n", name, err); - printk(KERN_ERR "The system will be unstable now.\n"); - return; + printk(KERN_ERR + "Module '%s' failed add to sysfs, error number %d\n", + name, err); + printk(KERN_ERR + "The system will be unstable now.\n"); + return NULL; } - /* So that exit path is even. */ + + /* So that we hold reference in both cases. */ kobject_get(&mk->kobj); } + return mk; +} + +static void __init kernel_add_sysfs_param(const char *name, + struct kernel_param *kparam, + unsigned int name_skip) +{ + struct module_kobject *mk; + int err; + + mk = locate_module_kobject(name); + if (!mk) + return; + + /* We need to remove old parameters before adding more. */ + if (mk->mp) + sysfs_remove_group(&mk->kobj, &mk->mp->grp); + /* These should not fail at boot. */ err = add_sysfs_param(mk, kparam, kparam->name + name_skip); BUG_ON(err); @@ -796,6 +812,32 @@ static void __init param_sysfs_builtin(void) } } +ssize_t __modver_version_show(struct module_attribute *mattr, + struct module *mod, char *buf) +{ + struct module_version_attribute *vattr = + container_of(mattr, struct module_version_attribute, mattr); + + return sprintf(buf, "%s\n", vattr->version); +} + +extern struct module_version_attribute __start___modver[], __stop___modver[]; + +static void __init version_sysfs_builtin(void) +{ + const struct module_version_attribute *vattr; + struct module_kobject *mk; + int err; + + for (vattr = __start___modver; vattr < __stop___modver; vattr++) { + mk = locate_module_kobject(vattr->module_name); + if (mk) { + err = sysfs_create_file(&mk->kobj, &vattr->mattr.attr); + kobject_uevent(&mk->kobj, KOBJ_ADD); + kobject_put(&mk->kobj); + } + } +} /* module-related sysfs stuff */ @@ -875,6 +917,7 @@ static int __init param_sysfs_init(void) } module_sysfs_initialized = 1; + version_sysfs_builtin(); param_sysfs_builtin(); return 0; -- cgit v1.2.3-70-g09d2 From 3ff6dcac735704824c1dff64dc6863c390d364cc Mon Sep 17 00:00:00 2001 From: Yong Zhang Date: Mon, 24 Jan 2011 15:33:52 +0800 Subject: sched: Fix poor interactivity on UP systems due to group scheduler nice tune bug Michael Witten and Christian Kujau reported that the autogroup scheduling feature hurts interactivity on their UP systems. It turns out that this is an older bug in the group scheduling code, and the wider appeal provided by the autogroup feature exposed it more prominently. When on UP with FAIR_GROUP_SCHED enabled, tune shares only affect tg->shares, but is not reflected in tg->se->load. The reason is that update_cfs_shares() does nothing on UP. So introduce update_cfs_shares() for UP && FAIR_GROUP_SCHED. This issue was found when enable autogroup scheduling was enabled, but it is an older bug that also exists on cgroup.cpu on UP. Reported-and-Tested-by: Michael Witten Reported-and-Tested-by: Christian Kujau Signed-off-by: Yong Zhang Acked-by: Pekka Enberg Acked-by: Mike Galbraith Acked-by: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: <20110124073352.GA24186@windriver.com> Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 78 ++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 53 insertions(+), 25 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 77e9166d7bb..354769979c0 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -699,7 +699,8 @@ account_entity_dequeue(struct cfs_rq *cfs_rq, struct sched_entity *se) cfs_rq->nr_running--; } -#if defined CONFIG_SMP && defined CONFIG_FAIR_GROUP_SCHED +#ifdef CONFIG_FAIR_GROUP_SCHED +# ifdef CONFIG_SMP static void update_cfs_rq_load_contribution(struct cfs_rq *cfs_rq, int global_update) { @@ -762,6 +763,51 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) list_del_leaf_cfs_rq(cfs_rq); } +static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, + long weight_delta) +{ + long load_weight, load, shares; + + load = cfs_rq->load.weight + weight_delta; + + load_weight = atomic_read(&tg->load_weight); + load_weight -= cfs_rq->load_contribution; + load_weight += load; + + shares = (tg->shares * load); + if (load_weight) + shares /= load_weight; + + if (shares < MIN_SHARES) + shares = MIN_SHARES; + if (shares > tg->shares) + shares = tg->shares; + + return shares; +} + +static void update_entity_shares_tick(struct cfs_rq *cfs_rq) +{ + if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { + update_cfs_load(cfs_rq, 0); + update_cfs_shares(cfs_rq, 0); + } +} +# else /* CONFIG_SMP */ +static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) +{ +} + +static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, + long weight_delta) +{ + return tg->shares; +} + +static inline void update_entity_shares_tick(struct cfs_rq *cfs_rq) +{ +} +# endif /* CONFIG_SMP */ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, unsigned long weight) { @@ -782,7 +828,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) { struct task_group *tg; struct sched_entity *se; - long load_weight, load, shares; + long shares; if (!cfs_rq) return; @@ -791,32 +837,14 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) se = tg->se[cpu_of(rq_of(cfs_rq))]; if (!se) return; - - load = cfs_rq->load.weight + weight_delta; - - load_weight = atomic_read(&tg->load_weight); - load_weight -= cfs_rq->load_contribution; - load_weight += load; - - shares = (tg->shares * load); - if (load_weight) - shares /= load_weight; - - if (shares < MIN_SHARES) - shares = MIN_SHARES; - if (shares > tg->shares) - shares = tg->shares; +#ifndef CONFIG_SMP + if (likely(se->load.weight == tg->shares)) + return; +#endif + shares = calc_cfs_shares(cfs_rq, tg, weight_delta); reweight_entity(cfs_rq_of(se), se, shares); } - -static void update_entity_shares_tick(struct cfs_rq *cfs_rq) -{ - if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { - update_cfs_load(cfs_rq, 0); - update_cfs_shares(cfs_rq, 0); - } -} #else /* CONFIG_FAIR_GROUP_SCHED */ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) { -- cgit v1.2.3-70-g09d2 From 8c6a98b22b750c9eb52653ba643faa17db8d3881 Mon Sep 17 00:00:00 2001 From: Andy Whitcroft Date: Mon, 24 Jan 2011 09:31:38 -0800 Subject: Input: sysrq - ensure sysrq_enabled and __sysrq_enabled are consistent Currently sysrq_enabled and __sysrq_enabled are initialised separately and inconsistently, leading to sysrq being actually enabled by reported as not enabled in sysfs. The first change to the sysfs configurable synchronises these two: static int __read_mostly sysrq_enabled = 1; static int __sysrq_enabled; Add a common define to carry the default for these preventing them becoming out of sync again. Default this to 1 to mirror previous behaviour. Signed-off-by: Andy Whitcroft Cc: stable@kernel.org Signed-off-by: Dmitry Torokhov --- drivers/char/sysrq.c | 2 +- include/linux/sysrq.h | 3 +++ kernel/sysctl.c | 3 ++- 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c index c556ed9db13..8e0dd254eb1 100644 --- a/drivers/char/sysrq.c +++ b/drivers/char/sysrq.c @@ -46,7 +46,7 @@ #include /* Whether we react on sysrq keys or just ignore them */ -static int __read_mostly sysrq_enabled = 1; +static int __read_mostly sysrq_enabled = SYSRQ_DEFAULT_ENABLE; static bool __read_mostly sysrq_always_enabled; static bool sysrq_on(void) diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h index 387fa7d05c9..7faf933cced 100644 --- a/include/linux/sysrq.h +++ b/include/linux/sysrq.h @@ -17,6 +17,9 @@ #include #include +/* Enable/disable SYSRQ support by default (0==no, 1==yes). */ +#define SYSRQ_DEFAULT_ENABLE 1 + /* Possible values of bitmask for enabling sysrq functions */ /* 0x0001 is reserved for enable everything */ #define SYSRQ_ENABLE_LOG 0x0002 diff --git a/kernel/sysctl.c b/kernel/sysctl.c index c33a1edb799..3afce4dc9ba 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -169,7 +169,8 @@ static int proc_taint(struct ctl_table *table, int write, #endif #ifdef CONFIG_MAGIC_SYSRQ -static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */ +/* Note: sysrq code uses it's own private copy */ +static int __sysrq_enabled = SYSRQ_DEFAULT_ENABLE; static int sysrq_sysctl_handler(ctl_table *table, int write, void __user *buffer, size_t *lenp, -- cgit v1.2.3-70-g09d2 From ac751efa6a0d70f2c9daef5c7e3a92270f5c2dff Mon Sep 17 00:00:00 2001 From: Torben Hohn Date: Tue, 25 Jan 2011 15:07:35 -0800 Subject: console: rename acquire/release_console_sem() to console_lock/unlock() The -rt patches change the console_semaphore to console_mutex. As a result, a quite large chunk of the patches changes all acquire/release_console_sem() to acquire/release_console_mutex() This commit makes things use more neutral function names which dont make implications about the underlying lock. The only real change is the return value of console_trylock which is inverted from try_acquire_console_sem() This patch also paves the way to switching console_sem from a semaphore to a mutex. [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: make console_trylock return 1 on success, per Geert] Signed-off-by: Torben Hohn Cc: Thomas Gleixner Cc: Greg KH Cc: Ingo Molnar Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mach-omap2/pm24xx.c | 4 +- arch/arm/mach-omap2/pm34xx.c | 4 +- arch/arm/mach-omap2/serial.c | 4 +- arch/parisc/kernel/pdc_cons.c | 4 +- drivers/char/bfin_jtag_comm.c | 8 +-- drivers/gpu/drm/nouveau/nouveau_drv.c | 8 +-- drivers/gpu/drm/radeon/radeon_device.c | 10 +-- drivers/staging/msm/msm_fb.c | 8 +-- drivers/staging/olpc_dcon/olpc_dcon.c | 10 +-- drivers/staging/sm7xx/smtcfb.c | 8 +-- drivers/tty/serial/sb1250-duart.c | 2 +- drivers/tty/tty_io.c | 4 +- drivers/tty/vt/selection.c | 4 +- drivers/tty/vt/vc_screen.c | 16 ++--- drivers/tty/vt/vt.c | 124 ++++++++++++++++----------------- drivers/tty/vt/vt_ioctl.c | 60 ++++++++-------- drivers/video/arkfb.c | 12 ++-- drivers/video/aty/aty128fb.c | 12 ++-- drivers/video/aty/atyfb_base.c | 10 +-- drivers/video/aty/radeon_pm.c | 10 +-- drivers/video/chipsfb.c | 8 +-- drivers/video/console/fbcon.c | 42 +++++------ drivers/video/da8xx-fb.c | 8 +-- drivers/video/fbmem.c | 12 ++-- drivers/video/fbsysfs.c | 20 +++--- drivers/video/geode/gxfb_core.c | 8 +-- drivers/video/geode/lxfb_core.c | 8 +-- drivers/video/i810/i810_main.c | 8 +-- drivers/video/jz4740_fb.c | 8 +-- drivers/video/mx3fb.c | 8 +-- drivers/video/nvidia/nvidia.c | 8 +-- drivers/video/ps3fb.c | 16 ++--- drivers/video/s3fb.c | 16 ++--- drivers/video/savage/savagefb_driver.c | 8 +-- drivers/video/sh_mobile_hdmi.c | 8 +-- drivers/video/sh_mobile_lcdcfb.c | 4 +- drivers/video/sm501fb.c | 8 +-- drivers/video/tmiofb.c | 10 +-- drivers/video/via/viafbdev.c | 8 +-- drivers/video/vt8623fb.c | 12 ++-- drivers/video/xen-fbfront.c | 4 +- fs/proc/consoles.c | 4 +- include/linux/console.h | 6 +- kernel/printk.c | 100 ++++++++++++++------------ 44 files changed, 336 insertions(+), 328 deletions(-) (limited to 'kernel') diff --git a/arch/arm/mach-omap2/pm24xx.c b/arch/arm/mach-omap2/pm24xx.c index 9e5dc8ed51e..97feb3ab6a6 100644 --- a/arch/arm/mach-omap2/pm24xx.c +++ b/arch/arm/mach-omap2/pm24xx.c @@ -134,7 +134,7 @@ static void omap2_enter_full_retention(void) /* Block console output in case it is on one of the OMAP UARTs */ if (!is_suspending()) - if (try_acquire_console_sem()) + if (!console_trylock()) goto no_sleep; omap_uart_prepare_idle(0); @@ -151,7 +151,7 @@ static void omap2_enter_full_retention(void) omap_uart_resume_idle(0); if (!is_suspending()) - release_console_sem(); + console_unlock(); no_sleep: if (omap2_pm_debug) { diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c index 8cbbeade4b8..a4aa1920a75 100644 --- a/arch/arm/mach-omap2/pm34xx.c +++ b/arch/arm/mach-omap2/pm34xx.c @@ -398,7 +398,7 @@ void omap_sram_idle(void) if (!is_suspending()) if (per_next_state < PWRDM_POWER_ON || core_next_state < PWRDM_POWER_ON) - if (try_acquire_console_sem()) + if (!console_trylock()) goto console_still_active; /* PER */ @@ -481,7 +481,7 @@ void omap_sram_idle(void) } if (!is_suspending()) - release_console_sem(); + console_unlock(); console_still_active: /* Disable IO-PAD and IO-CHAIN wakeup */ diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c index 302da7403a1..32e91a9c8b6 100644 --- a/arch/arm/mach-omap2/serial.c +++ b/arch/arm/mach-omap2/serial.c @@ -812,7 +812,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata) oh->dev_attr = uart; - acquire_console_sem(); /* in case the earlycon is on the UART */ + console_lock(); /* in case the earlycon is on the UART */ /* * Because of early UART probing, UART did not get idled @@ -838,7 +838,7 @@ void __init omap_serial_init_port(struct omap_board_data *bdata) omap_uart_block_sleep(uart); uart->timeout = DEFAULT_TIMEOUT; - release_console_sem(); + console_unlock(); if ((cpu_is_omap34xx() && uart->padconf) || (uart->wk_en && uart->wk_mask)) { diff --git a/arch/parisc/kernel/pdc_cons.c b/arch/parisc/kernel/pdc_cons.c index 11bdd68e576..fc770be465f 100644 --- a/arch/parisc/kernel/pdc_cons.c +++ b/arch/parisc/kernel/pdc_cons.c @@ -169,11 +169,11 @@ static int __init pdc_console_tty_driver_init(void) struct console *tmp; - acquire_console_sem(); + console_lock(); for_each_console(tmp) if (tmp == &pdc_cons) break; - release_console_sem(); + console_unlock(); if (!tmp) { printk(KERN_INFO "PDC console driver not registered anymore, not creating %s\n", pdc_cons.name); diff --git a/drivers/char/bfin_jtag_comm.c b/drivers/char/bfin_jtag_comm.c index e397df3ad98..16402445f2b 100644 --- a/drivers/char/bfin_jtag_comm.c +++ b/drivers/char/bfin_jtag_comm.c @@ -183,16 +183,16 @@ bfin_jc_circ_write(const unsigned char *buf, int count) } #ifndef CONFIG_BFIN_JTAG_COMM_CONSOLE -# define acquire_console_sem() -# define release_console_sem() +# define console_lock() +# define console_unlock() #endif static int bfin_jc_write(struct tty_struct *tty, const unsigned char *buf, int count) { int i; - acquire_console_sem(); + console_lock(); i = bfin_jc_circ_write(buf, count); - release_console_sem(); + console_unlock(); wake_up_process(bfin_jc_kthread); return i; } diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c index 13bb672a16f..f658a04eecf 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.c +++ b/drivers/gpu/drm/nouveau/nouveau_drv.c @@ -234,9 +234,9 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state) pci_set_power_state(pdev, PCI_D3hot); } - acquire_console_sem(); + console_lock(); nouveau_fbcon_set_suspend(dev, 1); - release_console_sem(); + console_unlock(); nouveau_fbcon_restore_accel(dev); return 0; @@ -359,9 +359,9 @@ nouveau_pci_resume(struct pci_dev *pdev) nv_crtc->lut.depth = 0; } - acquire_console_sem(); + console_lock(); nouveau_fbcon_set_suspend(dev, 0); - release_console_sem(); + console_unlock(); nouveau_fbcon_zfill_all(dev); diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index 26091d602b8..0d478932b1a 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -891,9 +891,9 @@ int radeon_suspend_kms(struct drm_device *dev, pm_message_t state) pci_disable_device(dev->pdev); pci_set_power_state(dev->pdev, PCI_D3hot); } - acquire_console_sem(); + console_lock(); radeon_fbdev_set_suspend(rdev, 1); - release_console_sem(); + console_unlock(); return 0; } @@ -905,11 +905,11 @@ int radeon_resume_kms(struct drm_device *dev) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; - acquire_console_sem(); + console_lock(); pci_set_power_state(dev->pdev, PCI_D0); pci_restore_state(dev->pdev); if (pci_enable_device(dev->pdev)) { - release_console_sem(); + console_unlock(); return -1; } pci_set_master(dev->pdev); @@ -920,7 +920,7 @@ int radeon_resume_kms(struct drm_device *dev) radeon_restore_bios_scratch_regs(rdev); radeon_fbdev_set_suspend(rdev, 0); - release_console_sem(); + console_unlock(); /* reset hpd state */ radeon_hpd_init(rdev); diff --git a/drivers/staging/msm/msm_fb.c b/drivers/staging/msm/msm_fb.c index 23fa049b51f..a2f29d46405 100644 --- a/drivers/staging/msm/msm_fb.c +++ b/drivers/staging/msm/msm_fb.c @@ -347,7 +347,7 @@ static int msm_fb_suspend(struct platform_device *pdev, pm_message_t state) if ((!mfd) || (mfd->key != MFD_KEY)) return 0; - acquire_console_sem(); + console_lock(); fb_set_suspend(mfd->fbi, 1); ret = msm_fb_suspend_sub(mfd); @@ -358,7 +358,7 @@ static int msm_fb_suspend(struct platform_device *pdev, pm_message_t state) pdev->dev.power.power_state = state; } - release_console_sem(); + console_unlock(); return ret; } #else @@ -431,11 +431,11 @@ static int msm_fb_resume(struct platform_device *pdev) if ((!mfd) || (mfd->key != MFD_KEY)) return 0; - acquire_console_sem(); + console_lock(); ret = msm_fb_resume_sub(mfd); pdev->dev.power.power_state = PMSG_ON; fb_set_suspend(mfd->fbi, 1); - release_console_sem(); + console_unlock(); return ret; } diff --git a/drivers/staging/olpc_dcon/olpc_dcon.c b/drivers/staging/olpc_dcon/olpc_dcon.c index 9f26dc9408b..56a283d1a74 100644 --- a/drivers/staging/olpc_dcon/olpc_dcon.c +++ b/drivers/staging/olpc_dcon/olpc_dcon.c @@ -373,17 +373,17 @@ static void dcon_source_switch(struct work_struct *work) * * For now, we just hope.. */ - acquire_console_sem(); + console_lock(); ignore_fb_events = 1; if (fb_blank(fbinfo, FB_BLANK_UNBLANK)) { ignore_fb_events = 0; - release_console_sem(); + console_unlock(); printk(KERN_ERR "olpc-dcon: Failed to enter CPU mode\n"); dcon_pending = DCON_SOURCE_DCON; return; } ignore_fb_events = 0; - release_console_sem(); + console_unlock(); /* And turn off the DCON */ pdata->set_dconload(1); @@ -435,12 +435,12 @@ static void dcon_source_switch(struct work_struct *work) } } - acquire_console_sem(); + console_lock(); ignore_fb_events = 1; if (fb_blank(fbinfo, FB_BLANK_POWERDOWN)) printk(KERN_ERR "olpc-dcon: couldn't blank fb!\n"); ignore_fb_events = 0; - release_console_sem(); + console_unlock(); printk(KERN_INFO "olpc-dcon: The DCON has control\n"); break; diff --git a/drivers/staging/sm7xx/smtcfb.c b/drivers/staging/sm7xx/smtcfb.c index 0bc113c44d3..d007e4a12c1 100644 --- a/drivers/staging/sm7xx/smtcfb.c +++ b/drivers/staging/sm7xx/smtcfb.c @@ -1044,9 +1044,9 @@ static int __maybe_unused smtcfb_suspend(struct pci_dev *pdev, pm_message_t msg) /* when doing suspend, call fb apis and pci apis */ if (msg.event == PM_EVENT_SUSPEND) { - acquire_console_sem(); + console_lock(); fb_set_suspend(&sfb->fb, 1); - release_console_sem(); + console_unlock(); retv = pci_save_state(pdev); pci_disable_device(pdev); retv = pci_choose_state(pdev, msg); @@ -1105,9 +1105,9 @@ static int __maybe_unused smtcfb_resume(struct pci_dev *pdev) smtcfb_setmode(sfb); - acquire_console_sem(); + console_lock(); fb_set_suspend(&sfb->fb, 0); - release_console_sem(); + console_unlock(); return 0; } diff --git a/drivers/tty/serial/sb1250-duart.c b/drivers/tty/serial/sb1250-duart.c index a2f2b325449..602d9845c52 100644 --- a/drivers/tty/serial/sb1250-duart.c +++ b/drivers/tty/serial/sb1250-duart.c @@ -829,7 +829,7 @@ static void __init sbd_probe_duarts(void) #ifdef CONFIG_SERIAL_SB1250_DUART_CONSOLE /* * Serial console stuff. Very basic, polling driver for doing serial - * console output. The console_sem is held by the caller, so we + * console output. The console_lock is held by the caller, so we * shouldn't be interrupted for more console activity. */ static void sbd_console_putchar(struct uart_port *uport, int ch) diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 464d09d9787..6158eae0f64 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -3256,7 +3256,7 @@ static ssize_t show_cons_active(struct device *dev, struct console *c; ssize_t count = 0; - acquire_console_sem(); + console_lock(); for (c = console_drivers; c; c = c->next) { if (!c->device) continue; @@ -3271,7 +3271,7 @@ static ssize_t show_cons_active(struct device *dev, while (i--) count += sprintf(buf + count, "%s%d%c", cs[i]->name, cs[i]->index, i ? ' ':'\n'); - release_console_sem(); + console_unlock(); return count; } diff --git a/drivers/tty/vt/selection.c b/drivers/tty/vt/selection.c index ebae344ce91..c956ed6c83a 100644 --- a/drivers/tty/vt/selection.c +++ b/drivers/tty/vt/selection.c @@ -316,9 +316,9 @@ int paste_selection(struct tty_struct *tty) /* always called with BTM from vt_ioctl */ WARN_ON(!tty_locked()); - acquire_console_sem(); + console_lock(); poke_blanked_console(); - release_console_sem(); + console_unlock(); ld = tty_ldisc_ref(tty); if (!ld) { diff --git a/drivers/tty/vt/vc_screen.c b/drivers/tty/vt/vc_screen.c index eab3a1ff99e..a672ed192d3 100644 --- a/drivers/tty/vt/vc_screen.c +++ b/drivers/tty/vt/vc_screen.c @@ -202,7 +202,7 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) /* Select the proper current console and verify * sanity of the situation under the console lock. */ - acquire_console_sem(); + console_lock(); attr = (currcons & 128); currcons = (currcons & 127); @@ -336,9 +336,9 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) * the pagefault handling code may want to call printk(). */ - release_console_sem(); + console_unlock(); ret = copy_to_user(buf, con_buf_start, orig_count); - acquire_console_sem(); + console_lock(); if (ret) { read += (orig_count - ret); @@ -354,7 +354,7 @@ vcs_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) if (read) ret = read; unlock_out: - release_console_sem(); + console_unlock(); mutex_unlock(&con_buf_mtx); return ret; } @@ -379,7 +379,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) /* Select the proper current console and verify * sanity of the situation under the console lock. */ - acquire_console_sem(); + console_lock(); attr = (currcons & 128); currcons = (currcons & 127); @@ -414,9 +414,9 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) /* Temporarily drop the console lock so that we can read * in the write data from userspace safely. */ - release_console_sem(); + console_unlock(); ret = copy_from_user(con_buf, buf, this_round); - acquire_console_sem(); + console_lock(); if (ret) { this_round -= ret; @@ -542,7 +542,7 @@ vcs_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) vcs_scr_updated(vc); unlock_out: - release_console_sem(); + console_unlock(); mutex_unlock(&con_buf_mtx); diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 76407eca9ab..b230bd3f056 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -1003,9 +1003,9 @@ static int vt_resize(struct tty_struct *tty, struct winsize *ws) struct vc_data *vc = tty->driver_data; int ret; - acquire_console_sem(); + console_lock(); ret = vc_do_resize(tty, vc, ws->ws_col, ws->ws_row); - release_console_sem(); + console_unlock(); return ret; } @@ -1271,7 +1271,7 @@ static void default_attr(struct vc_data *vc) vc->vc_color = vc->vc_def_color; } -/* console_sem is held */ +/* console_lock is held */ static void csi_m(struct vc_data *vc) { int i; @@ -1415,7 +1415,7 @@ int mouse_reporting(void) return vc_cons[fg_console].d->vc_report_mouse; } -/* console_sem is held */ +/* console_lock is held */ static void set_mode(struct vc_data *vc, int on_off) { int i; @@ -1485,7 +1485,7 @@ static void set_mode(struct vc_data *vc, int on_off) } } -/* console_sem is held */ +/* console_lock is held */ static void setterm_command(struct vc_data *vc) { switch(vc->vc_par[0]) { @@ -1545,7 +1545,7 @@ static void setterm_command(struct vc_data *vc) } } -/* console_sem is held */ +/* console_lock is held */ static void csi_at(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_cols - vc->vc_x) @@ -1555,7 +1555,7 @@ static void csi_at(struct vc_data *vc, unsigned int nr) insert_char(vc, nr); } -/* console_sem is held */ +/* console_lock is held */ static void csi_L(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_rows - vc->vc_y) @@ -1566,7 +1566,7 @@ static void csi_L(struct vc_data *vc, unsigned int nr) vc->vc_need_wrap = 0; } -/* console_sem is held */ +/* console_lock is held */ static void csi_P(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_cols - vc->vc_x) @@ -1576,7 +1576,7 @@ static void csi_P(struct vc_data *vc, unsigned int nr) delete_char(vc, nr); } -/* console_sem is held */ +/* console_lock is held */ static void csi_M(struct vc_data *vc, unsigned int nr) { if (nr > vc->vc_rows - vc->vc_y) @@ -1587,7 +1587,7 @@ static void csi_M(struct vc_data *vc, unsigned int nr) vc->vc_need_wrap = 0; } -/* console_sem is held (except via vc_init->reset_terminal */ +/* console_lock is held (except via vc_init->reset_terminal */ static void save_cur(struct vc_data *vc) { vc->vc_saved_x = vc->vc_x; @@ -1603,7 +1603,7 @@ static void save_cur(struct vc_data *vc) vc->vc_saved_G1 = vc->vc_G1_charset; } -/* console_sem is held */ +/* console_lock is held */ static void restore_cur(struct vc_data *vc) { gotoxy(vc, vc->vc_saved_x, vc->vc_saved_y); @@ -1625,7 +1625,7 @@ enum { ESnormal, ESesc, ESsquare, ESgetpars, ESgotpars, ESfunckey, EShash, ESsetG0, ESsetG1, ESpercent, ESignore, ESnonstd, ESpalette }; -/* console_sem is held (except via vc_init()) */ +/* console_lock is held (except via vc_init()) */ static void reset_terminal(struct vc_data *vc, int do_clear) { vc->vc_top = 0; @@ -1685,7 +1685,7 @@ static void reset_terminal(struct vc_data *vc, int do_clear) csi_J(vc, 2); } -/* console_sem is held */ +/* console_lock is held */ static void do_con_trol(struct tty_struct *tty, struct vc_data *vc, int c) { /* @@ -2119,7 +2119,7 @@ static int is_double_width(uint32_t ucs) return bisearch(ucs, double_width, ARRAY_SIZE(double_width) - 1); } -/* acquires console_sem */ +/* acquires console_lock */ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int count) { #ifdef VT_BUF_VRAM_ONLY @@ -2147,11 +2147,11 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co might_sleep(); - acquire_console_sem(); + console_lock(); vc = tty->driver_data; if (vc == NULL) { printk(KERN_ERR "vt: argh, driver_data is NULL !\n"); - release_console_sem(); + console_unlock(); return 0; } @@ -2159,7 +2159,7 @@ static int do_con_write(struct tty_struct *tty, const unsigned char *buf, int co if (!vc_cons_allocated(currcons)) { /* could this happen? */ printk_once("con_write: tty %d not allocated\n", currcons+1); - release_console_sem(); + console_unlock(); return 0; } @@ -2375,7 +2375,7 @@ rescan_last_byte: } FLUSH console_conditional_schedule(); - release_console_sem(); + console_unlock(); notify_update(vc); return n; #undef FLUSH @@ -2388,11 +2388,11 @@ rescan_last_byte: * us to do the switches asynchronously (needed when we want * to switch due to a keyboard interrupt). Synchronization * with other console code and prevention of re-entrancy is - * ensured with console_sem. + * ensured with console_lock. */ static void console_callback(struct work_struct *ignored) { - acquire_console_sem(); + console_lock(); if (want_console >= 0) { if (want_console != fg_console && @@ -2422,7 +2422,7 @@ static void console_callback(struct work_struct *ignored) } notify_update(vc_cons[fg_console].d); - release_console_sem(); + console_unlock(); } int set_console(int nr) @@ -2603,7 +2603,7 @@ static struct console vt_console_driver = { */ /* - * Generally a bit racy with respect to console_sem(). + * Generally a bit racy with respect to console_lock();. * * There are some functions which don't need it. * @@ -2629,17 +2629,17 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) switch (type) { case TIOCL_SETSEL: - acquire_console_sem(); + console_lock(); ret = set_selection((struct tiocl_selection __user *)(p+1), tty); - release_console_sem(); + console_unlock(); break; case TIOCL_PASTESEL: ret = paste_selection(tty); break; case TIOCL_UNBLANKSCREEN: - acquire_console_sem(); + console_lock(); unblank_screen(); - release_console_sem(); + console_unlock(); break; case TIOCL_SELLOADLUT: ret = sel_loadlut(p); @@ -2688,10 +2688,10 @@ int tioclinux(struct tty_struct *tty, unsigned long arg) } break; case TIOCL_BLANKSCREEN: /* until explicitly unblanked, not only poked */ - acquire_console_sem(); + console_lock(); ignore_poke = 1; do_blank_screen(0); - release_console_sem(); + console_unlock(); break; case TIOCL_BLANKEDSCREEN: ret = console_blanked; @@ -2790,11 +2790,11 @@ static void con_flush_chars(struct tty_struct *tty) return; /* if we race with con_close(), vt may be null */ - acquire_console_sem(); + console_lock(); vc = tty->driver_data; if (vc) set_cursor(vc); - release_console_sem(); + console_unlock(); } /* @@ -2805,7 +2805,7 @@ static int con_open(struct tty_struct *tty, struct file *filp) unsigned int currcons = tty->index; int ret = 0; - acquire_console_sem(); + console_lock(); if (tty->driver_data == NULL) { ret = vc_allocate(currcons); if (ret == 0) { @@ -2813,7 +2813,7 @@ static int con_open(struct tty_struct *tty, struct file *filp) /* Still being freed */ if (vc->port.tty) { - release_console_sem(); + console_unlock(); return -ERESTARTSYS; } tty->driver_data = vc; @@ -2827,11 +2827,11 @@ static int con_open(struct tty_struct *tty, struct file *filp) tty->termios->c_iflag |= IUTF8; else tty->termios->c_iflag &= ~IUTF8; - release_console_sem(); + console_unlock(); return ret; } } - release_console_sem(); + console_unlock(); return ret; } @@ -2844,9 +2844,9 @@ static void con_shutdown(struct tty_struct *tty) { struct vc_data *vc = tty->driver_data; BUG_ON(vc == NULL); - acquire_console_sem(); + console_lock(); vc->port.tty = NULL; - release_console_sem(); + console_unlock(); tty_shutdown(tty); } @@ -2893,13 +2893,13 @@ static int __init con_init(void) struct vc_data *vc; unsigned int currcons = 0, i; - acquire_console_sem(); + console_lock(); if (conswitchp) display_desc = conswitchp->con_startup(); if (!display_desc) { fg_console = 0; - release_console_sem(); + console_unlock(); return 0; } @@ -2946,7 +2946,7 @@ static int __init con_init(void) printable = 1; printk("\n"); - release_console_sem(); + console_unlock(); #ifdef CONFIG_VT_CONSOLE register_console(&vt_console_driver); @@ -3037,7 +3037,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last, if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); /* check if driver is registered */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { @@ -3122,7 +3122,7 @@ static int bind_con_driver(const struct consw *csw, int first, int last, retval = 0; err: - release_console_sem(); + console_unlock(); module_put(owner); return retval; }; @@ -3171,7 +3171,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); /* check if driver is registered and if it is unbindable */ for (i = 0; i < MAX_NR_CON_DRIVER; i++) { @@ -3185,7 +3185,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) } if (retval) { - release_console_sem(); + console_unlock(); goto err; } @@ -3204,12 +3204,12 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) } if (retval) { - release_console_sem(); + console_unlock(); goto err; } if (!con_is_bound(csw)) { - release_console_sem(); + console_unlock(); goto err; } @@ -3238,7 +3238,7 @@ int unbind_con_driver(const struct consw *csw, int first, int last, int deflt) if (!con_is_bound(csw)) con_driver->flag &= ~CON_DRIVER_FLAG_INIT; - release_console_sem(); + console_unlock(); /* ignore return value, binding should not fail */ bind_con_driver(defcsw, first, last, deflt); err: @@ -3538,7 +3538,7 @@ int register_con_driver(const struct consw *csw, int first, int last) if (!try_module_get(owner)) return -ENODEV; - acquire_console_sem(); + console_lock(); for (i = 0; i < MAX_NR_CON_DRIVER; i++) { con_driver = ®istered_con_driver[i]; @@ -3592,7 +3592,7 @@ int register_con_driver(const struct consw *csw, int first, int last) } err: - release_console_sem(); + console_unlock(); module_put(owner); return retval; } @@ -3613,7 +3613,7 @@ int unregister_con_driver(const struct consw *csw) { int i, retval = -ENODEV; - acquire_console_sem(); + console_lock(); /* cannot unregister a bound driver */ if (con_is_bound(csw)) @@ -3639,7 +3639,7 @@ int unregister_con_driver(const struct consw *csw) } } err: - release_console_sem(); + console_unlock(); return retval; } EXPORT_SYMBOL(unregister_con_driver); @@ -3934,9 +3934,9 @@ int con_set_cmap(unsigned char __user *arg) { int rc; - acquire_console_sem(); + console_lock(); rc = set_get_cmap (arg,1); - release_console_sem(); + console_unlock(); return rc; } @@ -3945,9 +3945,9 @@ int con_get_cmap(unsigned char __user *arg) { int rc; - acquire_console_sem(); + console_lock(); rc = set_get_cmap (arg,0); - release_console_sem(); + console_unlock(); return rc; } @@ -3994,12 +3994,12 @@ static int con_font_get(struct vc_data *vc, struct console_font_op *op) } else font.data = NULL; - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_get) rc = vc->vc_sw->con_font_get(vc, &font); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); if (rc) goto out; @@ -4076,12 +4076,12 @@ static int con_font_set(struct vc_data *vc, struct console_font_op *op) font.data = memdup_user(op->data, size); if (IS_ERR(font.data)) return PTR_ERR(font.data); - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_set) rc = vc->vc_sw->con_font_set(vc, &font, op->flags); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); kfree(font.data); return rc; } @@ -4103,12 +4103,12 @@ static int con_font_default(struct vc_data *vc, struct console_font_op *op) else name[MAX_FONT_NAME - 1] = 0; - acquire_console_sem(); + console_lock(); if (vc->vc_sw->con_font_default) rc = vc->vc_sw->con_font_default(vc, &font, s); else rc = -ENOSYS; - release_console_sem(); + console_unlock(); if (!rc) { op->width = font.width; op->height = font.height; @@ -4124,7 +4124,7 @@ static int con_font_copy(struct vc_data *vc, struct console_font_op *op) if (vc->vc_mode != KD_TEXT) return -EINVAL; - acquire_console_sem(); + console_lock(); if (!vc->vc_sw->con_font_copy) rc = -ENOSYS; else if (con < 0 || !vc_cons_allocated(con)) @@ -4133,7 +4133,7 @@ static int con_font_copy(struct vc_data *vc, struct console_font_op *op) rc = 0; else rc = vc->vc_sw->con_font_copy(vc, con); - release_console_sem(); + console_unlock(); return rc; } diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index 6b68a0fb461..1235ebda6e1 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -649,12 +649,12 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, /* * explicitly blank/unblank the screen if switching modes */ - acquire_console_sem(); + console_lock(); if (arg == KD_TEXT) do_unblank_screen(1); else do_blank_screen(1); - release_console_sem(); + console_unlock(); break; case KDGETMODE: @@ -893,7 +893,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -EINVAL; goto out; } - acquire_console_sem(); + console_lock(); vc->vt_mode = tmp; /* the frsig is ignored, so we set it to 0 */ vc->vt_mode.frsig = 0; @@ -901,7 +901,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc->vt_pid = get_pid(task_pid(current)); /* no switch is required -- saw@shade.msu.ru */ vc->vt_newvt = -1; - release_console_sem(); + console_unlock(); break; } @@ -910,9 +910,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, struct vt_mode tmp; int rc; - acquire_console_sem(); + console_lock(); memcpy(&tmp, &vc->vt_mode, sizeof(struct vt_mode)); - release_console_sem(); + console_unlock(); rc = copy_to_user(up, &tmp, sizeof(struct vt_mode)); if (rc) @@ -965,9 +965,9 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -ENXIO; else { arg--; - acquire_console_sem(); + console_lock(); ret = vc_allocate(arg); - release_console_sem(); + console_unlock(); if (ret) break; set_console(arg); @@ -990,7 +990,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, ret = -ENXIO; else { vsa.console--; - acquire_console_sem(); + console_lock(); ret = vc_allocate(vsa.console); if (ret == 0) { struct vc_data *nvc; @@ -1003,7 +1003,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, put_pid(nvc->vt_pid); nvc->vt_pid = get_pid(task_pid(current)); } - release_console_sem(); + console_unlock(); if (ret) break; /* Commence switch and lock */ @@ -1044,7 +1044,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, /* * Switching-from response */ - acquire_console_sem(); + console_lock(); if (vc->vt_newvt >= 0) { if (arg == 0) /* @@ -1063,7 +1063,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc->vt_newvt = -1; ret = vc_allocate(newvt); if (ret) { - release_console_sem(); + console_unlock(); break; } /* @@ -1083,7 +1083,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, if (arg != VT_ACKACQ) ret = -EINVAL; } - release_console_sem(); + console_unlock(); break; /* @@ -1096,20 +1096,20 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, } if (arg == 0) { /* deallocate all unused consoles, but leave 0 */ - acquire_console_sem(); + console_lock(); for (i=1; iv_cols)) ret = -EFAULT; else { - acquire_console_sem(); + console_lock(); for (i = 0; i < MAX_NR_CONSOLES; i++) { vc = vc_cons[i].d; @@ -1135,7 +1135,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, vc_resize(vc_cons[i].d, cc, ll); } } - release_console_sem(); + console_unlock(); } break; } @@ -1187,14 +1187,14 @@ int vt_ioctl(struct tty_struct *tty, struct file * file, for (i = 0; i < MAX_NR_CONSOLES; i++) { if (!vc_cons[i].d) continue; - acquire_console_sem(); + console_lock(); if (vlin) vc_cons[i].d->vc_scan_lines = vlin; if (clin) vc_cons[i].d->vc_font.height = clin; vc_cons[i].d->vc_resize_user = 1; vc_resize(vc_cons[i].d, cc, ll); - release_console_sem(); + console_unlock(); } break; } @@ -1367,7 +1367,7 @@ void vc_SAK(struct work_struct *work) struct vc_data *vc; struct tty_struct *tty; - acquire_console_sem(); + console_lock(); vc = vc_con->d; if (vc) { tty = vc->port.tty; @@ -1379,7 +1379,7 @@ void vc_SAK(struct work_struct *work) __do_SAK(tty); reset_vc(vc); } - release_console_sem(); + console_unlock(); } #ifdef CONFIG_COMPAT @@ -1737,10 +1737,10 @@ int vt_move_to_console(unsigned int vt, int alloc) { int prev; - acquire_console_sem(); + console_lock(); /* Graphics mode - up to X */ if (disable_vt_switch) { - release_console_sem(); + console_unlock(); return 0; } prev = fg_console; @@ -1748,7 +1748,7 @@ int vt_move_to_console(unsigned int vt, int alloc) if (alloc && vc_allocate(vt)) { /* we can't have a free VC for now. Too bad, * we don't want to mess the screen for now. */ - release_console_sem(); + console_unlock(); return -ENOSPC; } @@ -1758,10 +1758,10 @@ int vt_move_to_console(unsigned int vt, int alloc) * Let the calling function know so it can decide * what to do. */ - release_console_sem(); + console_unlock(); return -EIO; } - release_console_sem(); + console_unlock(); tty_lock(); if (vt_waitactive(vt + 1)) { pr_debug("Suspend: Can't switch VCs."); @@ -1781,8 +1781,8 @@ int vt_move_to_console(unsigned int vt, int alloc) */ void pm_set_vt_switch(int do_switch) { - acquire_console_sem(); + console_lock(); disable_vt_switch = !do_switch; - release_console_sem(); + console_unlock(); } EXPORT_SYMBOL(pm_set_vt_switch); diff --git a/drivers/video/arkfb.c b/drivers/video/arkfb.c index d583bea608f..391ac939f01 100644 --- a/drivers/video/arkfb.c +++ b/drivers/video/arkfb.c @@ -23,7 +23,7 @@ #include #include #include -#include /* Why should fb driver call console functions? because acquire_console_sem() */ +#include /* Why should fb driver call console functions? because console_lock() */ #include