aboutsummaryrefslogtreecommitdiff
path: root/kernel/exit.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c320
1 files changed, 81 insertions, 239 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 346616c0092..e5c4668f179 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -20,6 +20,7 @@
#include <linux/tsacct_kern.h>
#include <linux/file.h>
#include <linux/fdtable.h>
+#include <linux/freezer.h>
#include <linux/binfmts.h>
#include <linux/nsproxy.h>
#include <linux/pid_namespace.h>
@@ -31,7 +32,6 @@
#include <linux/mempolicy.h>
#include <linux/taskstats_kern.h>
#include <linux/delayacct.h>
-#include <linux/freezer.h>
#include <linux/cgroup.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
@@ -72,20 +72,9 @@ static void __unhash_process(struct task_struct *p, bool group_dead)
list_del_rcu(&p->tasks);
list_del_init(&p->sibling);
__this_cpu_dec(process_counts);
- /*
- * If we are the last child process in a pid namespace to be
- * reaped, notify the reaper sleeping zap_pid_ns_processes().
- */
- if (IS_ENABLED(CONFIG_PID_NS)) {
- struct task_struct *parent = p->real_parent;
-
- if ((task_active_pid_ns(parent)->child_reaper == parent) &&
- list_empty(&parent->children) &&
- (parent->flags & PF_EXITING))
- wake_up_process(parent);
- }
}
list_del_rcu(&p->thread_group);
+ list_del_rcu(&p->thread_node);
}
/*
@@ -97,6 +86,7 @@ static void __exit_signal(struct task_struct *tsk)
bool group_dead = thread_group_leader(tsk);
struct sighand_struct *sighand;
struct tty_struct *uninitialized_var(tty);
+ cputime_t utime, stime;
sighand = rcu_dereference_check(tsk->sighand,
lockdep_tasklist_lock_is_held());
@@ -135,9 +125,10 @@ static void __exit_signal(struct task_struct *tsk)
* We won't ever get here for the group leader, since it
* will have been the last reference on the signal_struct.
*/
- sig->utime += tsk->utime;
- sig->stime += tsk->stime;
- sig->gtime += tsk->gtime;
+ task_cputime(tsk, &utime, &stime);
+ sig->utime += utime;
+ sig->stime += stime;
+ sig->gtime += task_gtime(tsk);
sig->min_flt += tsk->min_flt;
sig->maj_flt += tsk->maj_flt;
sig->nvcsw += tsk->nvcsw;
@@ -322,149 +313,7 @@ kill_orphaned_pgrp(struct task_struct *tsk, struct task_struct *parent)
}
}
-/**
- * reparent_to_kthreadd - Reparent the calling kernel thread to kthreadd
- *
- * If a kernel thread is launched as a result of a system call, or if
- * it ever exits, it should generally reparent itself to kthreadd so it
- * isn't in the way of other processes and is correctly cleaned up on exit.
- *
- * The various task state such as scheduling policy and priority may have
- * been inherited from a user process, so we reset them to sane values here.
- *
- * NOTE that reparent_to_kthreadd() gives the caller full capabilities.
- */
-static void reparent_to_kthreadd(void)
-{
- write_lock_irq(&tasklist_lock);
-
- ptrace_unlink(current);
- /* Reparent to init */
- current->real_parent = current->parent = kthreadd_task;
- list_move_tail(&current->sibling, &current->real_parent->children);
-
- /* Set the exit signal to SIGCHLD so we signal init on exit */
- current->exit_signal = SIGCHLD;
-
- if (task_nice(current) < 0)
- set_user_nice(current, 0);
- /* cpus_allowed? */
- /* rt_priority? */
- /* signals? */
- memcpy(current->signal->rlim, init_task.signal->rlim,
- sizeof(current->signal->rlim));
-
- atomic_inc(&init_cred.usage);
- commit_creds(&init_cred);
- write_unlock_irq(&tasklist_lock);
-}
-
-void __set_special_pids(struct pid *pid)
-{
- struct task_struct *curr = current->group_leader;
-
- if (task_session(curr) != pid)
- change_pid(curr, PIDTYPE_SID, pid);
-
- if (task_pgrp(curr) != pid)
- change_pid(curr, PIDTYPE_PGID, pid);
-}
-
-static void set_special_pids(struct pid *pid)
-{
- write_lock_irq(&tasklist_lock);
- __set_special_pids(pid);
- write_unlock_irq(&tasklist_lock);
-}
-
-/*
- * Let kernel threads use this to say that they allow a certain signal.
- * Must not be used if kthread was cloned with CLONE_SIGHAND.
- */
-int allow_signal(int sig)
-{
- if (!valid_signal(sig) || sig < 1)
- return -EINVAL;
-
- spin_lock_irq(&current->sighand->siglock);
- /* This is only needed for daemonize()'ed kthreads */
- sigdelset(&current->blocked, sig);
- /*
- * Kernel threads handle their own signals. Let the signal code
- * know it'll be handled, so that they don't get converted to
- * SIGKILL or just silently dropped.
- */
- current->sighand->action[(sig)-1].sa.sa_handler = (void __user *)2;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- return 0;
-}
-
-EXPORT_SYMBOL(allow_signal);
-
-int disallow_signal(int sig)
-{
- if (!valid_signal(sig) || sig < 1)
- return -EINVAL;
-
- spin_lock_irq(&current->sighand->siglock);
- current->sighand->action[(sig)-1].sa.sa_handler = SIG_IGN;
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
- return 0;
-}
-
-EXPORT_SYMBOL(disallow_signal);
-
-/*
- * Put all the gunge required to become a kernel thread without
- * attached user resources in one place where it belongs.
- */
-
-void daemonize(const char *name, ...)
-{
- va_list args;
- sigset_t blocked;
-
- va_start(args, name);
- vsnprintf(current->comm, sizeof(current->comm), name, args);
- va_end(args);
-
- /*
- * If we were started as result of loading a module, close all of the
- * user space pages. We don't need them, and if we didn't close them
- * they would be locked into memory.
- */
- exit_mm(current);
- /*
- * We don't want to get frozen, in case system-wide hibernation
- * or suspend transition begins right now.
- */
- current->flags |= (PF_NOFREEZE | PF_KTHREAD);
-
- if (current->nsproxy != &init_nsproxy) {
- get_nsproxy(&init_nsproxy);
- switch_task_namespaces(current, &init_nsproxy);
- }
- set_special_pids(&init_struct_pid);
- proc_clear_tty(current);
-
- /* Block and flush all signals */
- sigfillset(&blocked);
- sigprocmask(SIG_BLOCK, &blocked, NULL);
- flush_signals(current);
-
- /* Become as one with the init task */
-
- daemonize_fs_struct();
- daemonize_descriptors();
-
- reparent_to_kthreadd();
-}
-
-EXPORT_SYMBOL(daemonize);
-
-#ifdef CONFIG_MM_OWNER
+#ifdef CONFIG_MEMCG
/*
* A task is exiting. If it owned this mm, find a new owner for the mm.
*/
@@ -507,14 +356,18 @@ retry:
}
/*
- * Search through everything else. We should not get
- * here often
+ * Search through everything else, we should not get here often.
*/
- do_each_thread(g, c) {
- if (c->mm == mm)
- goto assign_new_owner;
- } while_each_thread(g, c);
-
+ for_each_process(g) {
+ if (g->flags & PF_KTHREAD)
+ continue;
+ for_each_thread(g, c) {
+ if (c->mm == mm)
+ goto assign_new_owner;
+ if (c->mm)
+ break;
+ }
+ }
read_unlock(&tasklist_lock);
/*
* We found no owner yet mm_users > 1: this implies that we are
@@ -546,7 +399,7 @@ assign_new_owner:
task_unlock(c);
put_task_struct(c);
}
-#endif /* CONFIG_MM_OWNER */
+#endif /* CONFIG_MEMCG */
/*
* Turn us into a lazy TLB process if we
@@ -587,7 +440,7 @@ static void exit_mm(struct task_struct * tsk)
set_task_state(tsk, TASK_UNINTERRUPTIBLE);
if (!self.task) /* see coredump_finish() */
break;
- schedule();
+ freezable_schedule();
}
__set_task_state(tsk, TASK_RUNNING);
down_read(&mm->mmap_sem);
@@ -682,7 +535,7 @@ static void reparent_leader(struct task_struct *father, struct task_struct *p,
if (same_thread_group(p->real_parent, father))
return;
- /* We don't want people slaying init. */
+ /* We don't want people slaying init. */
p->exit_signal = SIGCHLD;
/* If it has exited notify the new parent about this child's death. */
@@ -751,7 +604,6 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
forget_original_parent(tsk);
- exit_task_namespaces(tsk);
write_lock_irq(&tasklist_lock);
if (group_dead)
@@ -897,8 +749,10 @@ void do_exit(long code)
exit_shm(tsk);
exit_files(tsk);
exit_fs(tsk);
+ if (group_dead)
+ disassociate_ctty(1);
+ exit_task_namespaces(tsk);
exit_task_work(tsk);
- check_stack_usage();
exit_thread();
/*
@@ -909,21 +763,17 @@ void do_exit(long code)
*/
perf_event_exit_task(tsk);
- cgroup_exit(tsk, 1);
-
- if (group_dead)
- disassociate_ctty(1);
+ cgroup_exit(tsk);
module_put(task_thread_info(tsk)->exec_domain->module);
- proc_exit_connector(tsk);
-
/*
* FIXME: do that only when needed, using sched_exit tracepoint
*/
- ptrace_put_breakpoints(tsk);
+ flush_ptrace_hw_breakpoint(tsk);
exit_notify(tsk, group_dead);
+ proc_exit_connector(tsk);
#ifdef CONFIG_NUMA
task_lock(tsk);
mpol_put(tsk->mempolicy);
@@ -937,7 +787,7 @@ void do_exit(long code)
/*
* Make sure we are holding no locks:
*/
- debug_check_no_locks_held(tsk);
+ debug_check_no_locks_held();
/*
* We can do this unlocked here. The futex code uses this flag
* just to verify whether the pi state cleanup has been done
@@ -949,13 +799,14 @@ void do_exit(long code)
exit_io_context(tsk);
if (tsk->splice_pipe)
- __free_pipe_info(tsk->splice_pipe);
+ free_pipe_info(tsk->splice_pipe);
if (tsk->task_frag.page)
put_page(tsk->task_frag.page);
validate_creds_for_do_exit(tsk);
+ check_stack_usage();
preempt_disable();
if (tsk->nr_dirtied)
__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
@@ -1150,17 +1001,13 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
return wait_noreap_copyout(wo, p, pid, uid, why, status);
}
+ traced = ptrace_reparented(p);
/*
- * Try to move the task's state to DEAD
- * only one thread is allowed to do this:
+ * Move the task's state to DEAD/TRACE, only one thread can do this.
*/
- state = xchg(&p->exit_state, EXIT_DEAD);
- if (state != EXIT_ZOMBIE) {
- BUG_ON(state != EXIT_DEAD);
+ state = traced && thread_group_leader(p) ? EXIT_TRACE : EXIT_DEAD;
+ if (cmpxchg(&p->exit_state, EXIT_ZOMBIE, state) != EXIT_ZOMBIE)
return 0;
- }
-
- traced = ptrace_reparented(p);
/*
* It can be ptraced but not reparented, check
* thread_group_leader() to filter out sub-threads.
@@ -1186,17 +1033,17 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
* as other threads in the parent group can be right
* here reaping other children at the same time.
*
- * We use thread_group_times() to get times for the thread
+ * We use thread_group_cputime_adjusted() to get times for the thread
* group, which consolidates times for all threads in the
* group including the group leader.
*/
- thread_group_times(p, &tgutime, &tgstime);
+ thread_group_cputime_adjusted(p, &tgutime, &tgstime);
spin_lock_irq(&p->real_parent->sighand->siglock);
psig = p->real_parent->signal;
sig = p->signal;
psig->cutime += tgutime + sig->cutime;
psig->cstime += tgstime + sig->cstime;
- psig->cgtime += p->gtime + sig->gtime + sig->cgtime;
+ psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
psig->cmin_flt +=
p->min_flt + sig->min_flt + sig->cmin_flt;
psig->cmaj_flt +=
@@ -1221,7 +1068,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
/*
* Now we are sure this task is interesting, and no other
- * thread can reap it because we set its state to EXIT_DEAD.
+ * thread can reap it because we its state == DEAD/TRACE.
*/
read_unlock(&tasklist_lock);
@@ -1258,22 +1105,19 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
if (!retval)
retval = pid;
- if (traced) {
+ if (state == EXIT_TRACE) {
write_lock_irq(&tasklist_lock);
/* We dropped tasklist, ptracer could die and untrace */
ptrace_unlink(p);
- /*
- * If this is not a sub-thread, notify the parent.
- * If parent wants a zombie, don't release it now.
- */
- if (thread_group_leader(p) &&
- !do_notify_parent(p, p->exit_signal)) {
- p->exit_state = EXIT_ZOMBIE;
- p = NULL;
- }
+
+ /* If parent wants a zombie, don't release it now */
+ state = EXIT_ZOMBIE;
+ if (do_notify_parent(p, p->exit_signal))
+ state = EXIT_DEAD;
+ p->exit_state = state;
write_unlock_irq(&tasklist_lock);
}
- if (p != NULL)
+ if (state == EXIT_DEAD)
release_task(p);
return retval;
@@ -1450,7 +1294,12 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
static int wait_consider_task(struct wait_opts *wo, int ptrace,
struct task_struct *p)
{
- int ret = eligible_child(wo, p);
+ int ret;
+
+ if (unlikely(p->exit_state == EXIT_DEAD))
+ return 0;
+
+ ret = eligible_child(wo, p);
if (!ret)
return ret;
@@ -1468,33 +1317,44 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
return 0;
}
- /* dead body doesn't have much to contribute */
- if (unlikely(p->exit_state == EXIT_DEAD)) {
+ if (unlikely(p->exit_state == EXIT_TRACE)) {
/*
- * But do not ignore this task until the tracer does
- * wait_task_zombie()->do_notify_parent().
+ * ptrace == 0 means we are the natural parent. In this case
+ * we should clear notask_error, debugger will notify us.
*/
- if (likely(!ptrace) && unlikely(ptrace_reparented(p)))
+ if (likely(!ptrace))
wo->notask_error = 0;
return 0;
}
- /* slay zombie? */
- if (p->exit_state == EXIT_ZOMBIE) {
+ if (likely(!ptrace) && unlikely(p->ptrace)) {
/*
- * A zombie ptracee is only visible to its ptracer.
- * Notification and reaping will be cascaded to the real
- * parent when the ptracer detaches.
+ * If it is traced by its real parent's group, just pretend
+ * the caller is ptrace_do_wait() and reap this child if it
+ * is zombie.
+ *
+ * This also hides group stop state from real parent; otherwise
+ * a single stop can be reported twice as group and ptrace stop.
+ * If a ptracer wants to distinguish these two events for its
+ * own children it should create a separate process which takes
+ * the role of real parent.
*/
- if (likely(!ptrace) && unlikely(p->ptrace)) {
- /* it will become visible, clear notask_error */
- wo->notask_error = 0;
- return 0;
- }
+ if (!ptrace_reparented(p))
+ ptrace = 1;
+ }
+ /* slay zombie? */
+ if (p->exit_state == EXIT_ZOMBIE) {
/* we don't reap group leaders with subthreads */
- if (!delay_group_leader(p))
- return wait_task_zombie(wo, p);
+ if (!delay_group_leader(p)) {
+ /*
+ * A zombie ptracee is only visible to its ptracer.
+ * Notification and reaping will be cascaded to the
+ * real parent when the ptracer detaches.
+ */
+ if (unlikely(ptrace) || likely(!p->ptrace))
+ return wait_task_zombie(wo, p);
+ }
/*
* Allow access to stopped/continued state via zombie by
@@ -1520,19 +1380,6 @@ static int wait_consider_task(struct wait_opts *wo, int ptrace,
wo->notask_error = 0;
} else {
/*
- * If @p is ptraced by a task in its real parent's group,
- * hide group stop/continued state when looking at @p as
- * the real parent; otherwise, a single stop can be
- * reported twice as group and ptrace stops.
- *
- * If a ptracer wants to distinguish the two events for its
- * own children, it should create a separate process which
- * takes the role of real parent.
- */
- if (likely(!ptrace) && p->ptrace && !ptrace_reparented(p))
- return 0;
-
- /*
* @p is alive and it's gonna stop, continue or exit, so
* there always is something to wait for.
*/
@@ -1731,9 +1578,6 @@ SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *,
}
put_pid(pid);
-
- /* avoid REGPARM breakage on x86: */
- asmlinkage_protect(5, ret, which, upid, infop, options, ru);
return ret;
}
@@ -1771,8 +1615,6 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
ret = do_wait(&wo);
put_pid(pid);
- /* avoid REGPARM breakage on x86: */
- asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
return ret;
}