From 9985b0bab332289f14837eff3c6e0bcc658b58f7 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Jun 2008 12:57:11 -0700 Subject: sched: prevent bound kthreads from changing cpus_allowed Kthreads that have called kthread_bind() are bound to specific cpus, so other tasks should not be able to change their cpus_allowed from under them. Otherwise, it is possible to move kthreads, such as the migration or software watchdog threads, so they are not allowed access to the cpu they work on. Cc: Peter Zijlstra Cc: Paul Menage Cc: Paul Jackson Signed-off-by: David Rientjes Signed-off-by: Ingo Molnar --- kernel/kthread.c | 1 + 1 file changed, 1 insertion(+) (limited to 'kernel/kthread.c') diff --git a/kernel/kthread.c b/kernel/kthread.c index bd1b9ea024e..97747cdd37c 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -180,6 +180,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu) set_task_cpu(k, cpu); k->cpus_allowed = cpumask_of_cpu(cpu); k->rt.nr_cpus_allowed = 1; + k->flags |= PF_THREAD_BOUND; } EXPORT_SYMBOL(kthread_bind); -- cgit v1.2.3-70-g09d2 From ebb12db51f6c13b30752fcf506baad4c617b153c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 11 Jun 2008 22:04:29 +0200 Subject: Freezer: Introduce PF_FREEZER_NOSIG The freezer currently attempts to distinguish kernel threads from user space tasks by checking if their mm pointer is unset and it does not send fake signals to kernel threads. However, there are kernel threads, mostly related to networking, that behave like user space tasks and may want to be sent a fake signal to be frozen. Introduce the new process flag PF_FREEZER_NOSIG that will be set by default for all kernel threads and make the freezer only send fake signals to the tasks having PF_FREEZER_NOSIG unset. Provide the set_freezable_with_signal() function to be called by the kernel threads that want to be sent a fake signal for freezing. This patch should not change the freezer's observable behavior. Signed-off-by: Rafael J. Wysocki Signed-off-by: Andi Kleen Acked-by: Pavel Machek Signed-off-by: Len Brown --- include/linux/freezer.h | 10 +++++ include/linux/sched.h | 1 + kernel/kthread.c | 2 +- kernel/power/process.c | 97 +++++++++++++++++++++---------------------------- 4 files changed, 54 insertions(+), 56 deletions(-) (limited to 'kernel/kthread.c') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index 08934995c7a..deddeedf325 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -127,6 +127,15 @@ static inline void set_freezable(void) current->flags &= ~PF_NOFREEZE; } +/* + * Tell the freezer that the current task should be frozen by it and that it + * should send a fake signal to the task to freeze it. + */ +static inline void set_freezable_with_signal(void) +{ + current->flags &= ~(PF_NOFREEZE | PF_FREEZER_NOSIG); +} + /* * Freezer-friendly wrappers around wait_event_interruptible() and * wait_event_interruptible_timeout(), originally defined in @@ -174,6 +183,7 @@ static inline void freezer_do_not_count(void) {} static inline void freezer_count(void) {} static inline int freezer_should_skip(struct task_struct *p) { return 0; } static inline void set_freezable(void) {} +static inline void set_freezable_with_signal(void) {} #define wait_event_freezable(wq, condition) \ wait_event_interruptible(wq, condition) diff --git a/include/linux/sched.h b/include/linux/sched.h index 21349173d14..ba2f859c6e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1494,6 +1494,7 @@ static inline void put_task_struct(struct task_struct *t) #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezeable */ +#define PF_FREEZER_NOSIG 0x80000000 /* Freezer won't send signals to it */ /* * Only the _current_ task can read/write to tsk->flags, but other diff --git a/kernel/kthread.c b/kernel/kthread.c index 97747cdd37c..ac3fb732664 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -235,7 +235,7 @@ int kthreadd(void *unused) set_user_nice(tsk, KTHREAD_NICE_LEVEL); set_cpus_allowed(tsk, CPU_MASK_ALL); - current->flags |= PF_NOFREEZE; + current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; for (;;) { set_current_state(TASK_INTERRUPTIBLE); diff --git a/kernel/power/process.c b/kernel/power/process.c index f1d0b345c9b..5fb87652f21 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -19,9 +19,6 @@ */ #define TIMEOUT (20 * HZ) -#define FREEZER_KERNEL_THREADS 0 -#define FREEZER_USER_SPACE 1 - static inline int freezeable(struct task_struct * p) { if ((p == current) || @@ -84,63 +81,53 @@ static void fake_signal_wake_up(struct task_struct *p) spin_unlock_irqrestore(&p->sighand->siglock, flags); } -static int has_mm(struct task_struct *p) +static inline bool should_send_signal(struct task_struct *p) { - return (p->mm && !(p->flags & PF_BORROWED_MM)); + return !(p->flags & PF_FREEZER_NOSIG); } /** * freeze_task - send a freeze request to given task * @p: task to send the request to - * @with_mm_only: if set, the request will only be sent if the task has its - * own mm - * Return value: 0, if @with_mm_only is set and the task has no mm of its - * own or the task is frozen, 1, otherwise + * @sig_only: if set, the request will only be sent if the task has the + * PF_FREEZER_NOSIG flag unset + * Return value: 'false', if @sig_only is set and the task has + * PF_FREEZER_NOSIG set or the task is frozen, 'true', otherwise * - * The freeze request is sent by seting the tasks's TIF_FREEZE flag and + * The freeze request is sent by setting the tasks's TIF_FREEZE flag and * either sending a fake signal to it or waking it up, depending on whether - * or not it has its own mm (ie. it is a user land task). If @with_mm_only - * is set and the task has no mm of its own (ie. it is a kernel thread), - * its TIF_FREEZE flag should not be set. - * - * The task_lock() is necessary to prevent races with exit_mm() or - * use_mm()/unuse_mm() from occuring. + * or not it has PF_FREEZER_NOSIG set. If @sig_only is set and the task + * has PF_FREEZER_NOSIG set (ie. it is a typical kernel thread), its + * TIF_FREEZE flag will not be set. */ -static int freeze_task(struct task_struct *p, int with_mm_only) +static bool freeze_task(struct task_struct *p, bool sig_only) { - int ret = 1; + /* + * We first check if the task is freezing and next if it has already + * been frozen to avoid the race with frozen_process() which first marks + * the task as frozen and next clears its TIF_FREEZE. + */ + if (!freezing(p)) { + rmb(); + if (frozen(p)) + return false; - task_lock(p); - if (freezing(p)) { - if (has_mm(p)) { - if (!signal_pending(p)) - fake_signal_wake_up(p); - } else { - if (with_mm_only) - ret = 0; - else - wake_up_state(p, TASK_INTERRUPTIBLE); - } + if (!sig_only || should_send_signal(p)) + set_freeze_flag(p); + else + return false; + } + + if (should_send_signal(p)) { + if (!signal_pending(p)) + fake_signal_wake_up(p); + } else if (sig_only) { + return false; } else { - rmb(); - if (frozen(p)) { - ret = 0; - } else { - if (has_mm(p)) { - set_freeze_flag(p); - fake_signal_wake_up(p); - } else { - if (with_mm_only) { - ret = 0; - } else { - set_freeze_flag(p); - wake_up_state(p, TASK_INTERRUPTIBLE); - } - } - } + wake_up_state(p, TASK_INTERRUPTIBLE); } - task_unlock(p); - return ret; + + return true; } static void cancel_freezing(struct task_struct *p) @@ -156,7 +143,7 @@ static void cancel_freezing(struct task_struct *p) } } -static int try_to_freeze_tasks(int freeze_user_space) +static int try_to_freeze_tasks(bool sig_only) { struct task_struct *g, *p; unsigned long end_time; @@ -175,7 +162,7 @@ static int try_to_freeze_tasks(int freeze_user_space) if (frozen(p) || !freezeable(p)) continue; - if (!freeze_task(p, freeze_user_space)) + if (!freeze_task(p, sig_only)) continue; /* @@ -235,13 +222,13 @@ int freeze_processes(void) int error; printk("Freezing user space processes ... "); - error = try_to_freeze_tasks(FREEZER_USER_SPACE); + error = try_to_freeze_tasks(true); if (error) goto Exit; printk("done.\n"); printk("Freezing remaining freezable tasks ... "); - error = try_to_freeze_tasks(FREEZER_KERNEL_THREADS); + error = try_to_freeze_tasks(false); if (error) goto Exit; printk("done."); @@ -251,7 +238,7 @@ int freeze_processes(void) return error; } -static void thaw_tasks(int thaw_user_space) +static void thaw_tasks(bool nosig_only) { struct task_struct *g, *p; @@ -260,7 +247,7 @@ static void thaw_tasks(int thaw_user_space) if (!freezeable(p)) continue; - if (!p->mm == thaw_user_space) + if (nosig_only && should_send_signal(p)) continue; thaw_process(p); @@ -271,8 +258,8 @@ static void thaw_tasks(int thaw_user_space) void thaw_processes(void) { printk("Restarting tasks ... "); - thaw_tasks(FREEZER_KERNEL_THREADS); - thaw_tasks(FREEZER_USER_SPACE); + thaw_tasks(true); + thaw_tasks(false); schedule(); printk("done.\n"); } -- cgit v1.2.3-70-g09d2 From 8df185a95c9b84fc0c3c02224e64fdc5b83bae34 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 8 Jul 2008 15:55:48 -0700 Subject: kthread: reduce stack pressure in create_kthread and kthreadd * Replace: set_cpus_allowed(..., CPU_MASK_ALL) with: set_cpus_allowed_ptr(..., CPU_MASK_ALL_PTR) to remove excessive stack requirements when NR_CPUS=4096. Signed-off-by: Mike Travis Cc: Andrew Morton Signed-off-by: Ingo Molnar --- kernel/kthread.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/kthread.c') diff --git a/kernel/kthread.c b/kernel/kthread.c index ac3fb732664..6111c27491b 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -106,7 +106,7 @@ static void create_kthread(struct kthread_create_info *create) */ sched_setscheduler(create->result, SCHED_NORMAL, ¶m); set_user_nice(create->result, KTHREAD_NICE_LEVEL); - set_cpus_allowed(create->result, CPU_MASK_ALL); + set_cpus_allowed_ptr(create->result, CPU_MASK_ALL_PTR); } complete(&create->done); } @@ -233,7 +233,7 @@ int kthreadd(void *unused) set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); set_user_nice(tsk, KTHREAD_NICE_LEVEL); - set_cpus_allowed(tsk, CPU_MASK_ALL); + set_cpus_allowed_ptr(tsk, CPU_MASK_ALL_PTR); current->flags |= PF_NOFREEZE | PF_FREEZER_NOSIG; -- cgit v1.2.3-70-g09d2 From 85ba2d862e521375a8ee01526c5c46b1f24bb4af Mon Sep 17 00:00:00 2001 From: Roland McGrath Date: Fri, 25 Jul 2008 19:45:58 -0700 Subject: tracehook: wait_task_inactive This extends wait_task_inactive() with a new argument so it can be used in a "soft" mode where it will check for the task changing state unexpectedly and back off. There is no change to existing callers. This lays the groundwork to allow robust, noninvasive tracing that can try to sample a blocked thread but back off safely if it wakes up. Signed-off-by: Roland McGrath Cc: Oleg Nesterov Reviewed-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/ia64/kernel/perfmon.c | 4 ++-- include/linux/sched.h | 8 ++++++-- kernel/kthread.c | 2 +- kernel/ptrace.c | 2 +- kernel/sched.c | 29 +++++++++++++++++++++++++++-- 5 files changed, 37 insertions(+), 8 deletions(-) (limited to 'kernel/kthread.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 19d4493c619..fc8f3509df2 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2626,7 +2626,7 @@ pfm_task_incompatible(pfm_context_t *ctx, struct task_struct *task) /* * make sure the task is off any CPU */ - wait_task_inactive(task); + wait_task_inactive(task, 0); /* more to come... */ @@ -4774,7 +4774,7 @@ recheck: UNPROTECT_CTX(ctx, flags); - wait_task_inactive(task); + wait_task_inactive(task, 0); PROTECT_CTX(ctx, flags); diff --git a/include/linux/sched.h b/include/linux/sched.h index a95d84d0da9..f59318a0099 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1882,9 +1882,13 @@ extern void set_task_comm(struct task_struct *tsk, char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -extern void wait_task_inactive(struct task_struct * p); +extern unsigned long wait_task_inactive(struct task_struct *, long match_state); #else -#define wait_task_inactive(p) do { } while (0) +static inline unsigned long wait_task_inactive(struct task_struct *p, + long match_state) +{ + return 1; +} #endif #define next_task(p) list_entry(rcu_dereference((p)->tasks.next), struct task_struct, tasks) diff --git a/kernel/kthread.c b/kernel/kthread.c index 6111c27491b..96cff2f8710 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -176,7 +176,7 @@ void kthread_bind(struct task_struct *k, unsigned int cpu) return; } /* Must have done schedule() in kthread() before we set_task_cpu */ - wait_task_inactive(k); + wait_task_inactive(k, 0); set_task_cpu(k, cpu); k->cpus_allowed = cpumask_of_cpu(cpu); k->rt.nr_cpus_allowed = 1; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 8392a9da645..082b3fcb32a 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -107,7 +107,7 @@ int ptrace_check_attach(struct task_struct *child, int kill) read_unlock(&tasklist_lock); if (!ret && !kill) - wait_task_inactive(child); + ret = wait_task_inactive(child, TASK_TRACED) ? 0 : -ESRCH; /* All systems go.. */ return ret; diff --git a/kernel/sched.c b/kernel/sched.c index fde1a102635..0236958addc 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -1867,16 +1867,24 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req) /* * wait_task_inactive - wait for a thread to unschedule. * + * If @match_state is nonzero, it's the @p->state value just checked and + * not expected to change. If it changes, i.e. @p might have woken up, + * then return zero. When we succeed in waiting for @p to be off its CPU, + * we return a positive number (its total switch count). If a second call + * a short while later returns the same number, the caller can be sure that + * @p has remained unscheduled the whole time. + * * The caller must ensure that the task *will* unschedule sometime soon, * else this function might spin for a *long* time. This function can't * be called with interrupts off, or it may introduce deadlock with * smp_call_function() if an IPI is sent by the same process we are * waiting to become inactive. */ -void wait_task_inactive(struct task_struct *p) +unsigned long wait_task_inactive(struct task_struct *p, long match_state) { unsigned long flags; int running, on_rq; + unsigned long ncsw; struct rq *rq; for (;;) { @@ -1899,8 +1907,11 @@ void wait_task_inactive(struct task_struct *p) * return false if the runqueue has changed and p * is actually now running somewhere else! */ - while (task_running(rq, p)) + while (task_running(rq, p)) { + if (match_state && unlikely(p->state != match_state)) + return 0; cpu_relax(); + } /* * Ok, time to look more closely! We need the rq @@ -1910,8 +1921,20 @@ void wait_task_inactive(struct task_struct *p) rq = task_rq_lock(p, &flags); running = task_running(rq, p); on_rq = p->se.on_rq; + ncsw = 0; + if (!match_state || p->state == match_state) { + ncsw = p->nivcsw + p->nvcsw; + if (unlikely(!ncsw)) + ncsw = 1; + } task_rq_unlock(rq, &flags); + /* + * If it changed from the expected state, bail out now. + */ + if (unlikely(!ncsw)) + break; + /* * Was it really running after all now that we * checked with the proper locks actually held? @@ -1944,6 +1967,8 @@ void wait_task_inactive(struct task_struct *p) */ break; } + + return ncsw; } /*** -- cgit v1.2.3-70-g09d2