From 0771dfefc9e538f077d0b43b6dec19a5a67d0e70 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:22 -0800 Subject: [PATCH] lightweight robust futexes: core Add the core infrastructure for robust futexes: structure definitions, the new syscalls and the do_exit() based cleanup mechanism. Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven Acked-by: Ulrich Drepper Cc: Michael Kerrisk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 8037405e136..aecb48ca737 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -852,6 +853,8 @@ fastcall NORET_TYPE void do_exit(long code) exit_itimers(tsk->signal); acct_process(code); } + if (unlikely(tsk->robust_list)) + exit_robust_list(tsk); exit_mm(tsk); exit_sem(tsk); -- cgit v1.2.3-70-g09d2 From 34f192c6527f20c47ccec239e7d51a27691b93fc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:24 -0800 Subject: [PATCH] lightweight robust futexes: compat 32-bit syscall compatibility support. (This patch also moves all futex related compat functionality into kernel/futex_compat.c.) Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner Signed-off-by: Arjan van de Ven Acked-by: Ulrich Drepper Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compat.h | 18 +++++++ include/linux/sched.h | 3 ++ kernel/Makefile | 3 ++ kernel/compat.c | 23 -------- kernel/exit.c | 5 ++ kernel/futex_compat.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 171 insertions(+), 23 deletions(-) create mode 100644 kernel/futex_compat.c (limited to 'kernel/exit.c') diff --git a/include/linux/compat.h b/include/linux/compat.h index 24d659cdbaf..6d3a654be1a 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -147,6 +147,24 @@ typedef struct compat_sigevent { } _sigev_un; } compat_sigevent_t; +struct compat_robust_list { + compat_uptr_t next; +}; + +struct compat_robust_list_head { + struct compat_robust_list list; + compat_long_t futex_offset; + compat_uptr_t list_op_pending; +}; + +extern void compat_exit_robust_list(struct task_struct *curr); + +asmlinkage long +compat_sys_set_robust_list(struct compat_robust_list_head __user *head, + compat_size_t len); +asmlinkage long +compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, + compat_size_t __user *len_ptr); long compat_sys_semctl(int first, int second, int third, void __user *uptr); long compat_sys_msgsnd(int first, int second, int third, void __user *uptr); diff --git a/include/linux/sched.h b/include/linux/sched.h index fd4848f2d75..20b4f0372e4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -874,6 +874,9 @@ struct task_struct { int cpuset_mem_spread_rotor; #endif struct robust_list_head __user *robust_list; +#ifdef CONFIG_COMPAT + struct compat_robust_list_head __user *compat_robust_list; +#endif atomic_t fs_excl; /* holding fs exclusive resources */ struct rcu_head rcu; diff --git a/kernel/Makefile b/kernel/Makefile index ff1c11dc12c..58908f9d156 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -12,6 +12,9 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ obj-$(CONFIG_DEBUG_MUTEXES) += mutex-debug.o obj-$(CONFIG_FUTEX) += futex.o +ifeq ($(CONFIG_COMPAT),y) +obj-$(CONFIG_FUTEX) += futex_compat.o +endif obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_SMP) += cpu.o spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o diff --git a/kernel/compat.c b/kernel/compat.c index b9bdd1271f4..c1601a84f8d 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -17,7 +17,6 @@ #include #include #include /* for MAX_SCHEDULE_TIMEOUT */ -#include /* for FUTEX_WAIT */ #include #include #include @@ -239,28 +238,6 @@ asmlinkage long compat_sys_sigprocmask(int how, compat_old_sigset_t __user *set, return ret; } -#ifdef CONFIG_FUTEX -asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val, - struct compat_timespec __user *utime, u32 __user *uaddr2, - int val3) -{ - struct timespec t; - unsigned long timeout = MAX_SCHEDULE_TIMEOUT; - int val2 = 0; - - if ((op == FUTEX_WAIT) && utime) { - if (get_compat_timespec(&t, utime)) - return -EFAULT; - timeout = timespec_to_jiffies(&t) + 1; - } - if (op >= FUTEX_REQUEUE) - val2 = (int) (unsigned long) utime; - - return do_futex((unsigned long)uaddr, op, val, timeout, - (unsigned long)uaddr2, val2, val3); -} -#endif - asmlinkage long compat_sys_setrlimit(unsigned int resource, struct compat_rlimit __user *rlim) { diff --git a/kernel/exit.c b/kernel/exit.c index aecb48ca737..a8c7efc7a68 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -855,6 +856,10 @@ fastcall NORET_TYPE void do_exit(long code) } if (unlikely(tsk->robust_list)) exit_robust_list(tsk); +#ifdef CONFIG_COMPAT + if (unlikely(tsk->compat_robust_list)) + compat_exit_robust_list(tsk); +#endif exit_mm(tsk); exit_sem(tsk); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c new file mode 100644 index 00000000000..c153559ef28 --- /dev/null +++ b/kernel/futex_compat.c @@ -0,0 +1,142 @@ +/* + * linux/kernel/futex_compat.c + * + * Futex compatibililty routines. + * + * Copyright 2006, Red Hat, Inc., Ingo Molnar + */ + +#include +#include +#include + +#include + +/* + * Walk curr->robust_list (very carefully, it's a userspace list!) + * and mark any locks found there dead, and notify any waiters. + * + * We silently return on any sign of list-walking problem. + */ +void compat_exit_robust_list(struct task_struct *curr) +{ + struct compat_robust_list_head __user *head = curr->compat_robust_list; + struct robust_list __user *entry, *pending; + compat_uptr_t uentry, upending; + unsigned int limit = ROBUST_LIST_LIMIT; + compat_long_t futex_offset; + + /* + * Fetch the list head (which was registered earlier, via + * sys_set_robust_list()): + */ + if (get_user(uentry, &head->list.next)) + return; + entry = compat_ptr(uentry); + /* + * Fetch the relative futex offset: + */ + if (get_user(futex_offset, &head->futex_offset)) + return; + /* + * Fetch any possibly pending lock-add first, and handle it + * if it exists: + */ + if (get_user(upending, &head->list_op_pending)) + return; + pending = compat_ptr(upending); + if (upending) + handle_futex_death((void *)pending + futex_offset, curr); + + while (compat_ptr(uentry) != &head->list) { + /* + * A pending lock might already be on the list, so + * dont process it twice: + */ + if (entry != pending) + if (handle_futex_death((void *)entry + futex_offset, + curr)) + return; + + /* + * Fetch the next entry in the list: + */ + if (get_user(uentry, (compat_uptr_t *)&entry->next)) + return; + entry = compat_ptr(uentry); + /* + * Avoid excessively long or circular lists: + */ + if (!--limit) + break; + + cond_resched(); + } +} + +asmlinkage long +compat_sys_set_robust_list(struct compat_robust_list_head __user *head, + compat_size_t len) +{ + if (unlikely(len != sizeof(*head))) + return -EINVAL; + + current->compat_robust_list = head; + + return 0; +} + +asmlinkage long +compat_sys_get_robust_list(int pid, compat_uptr_t *head_ptr, + compat_size_t __user *len_ptr) +{ + struct compat_robust_list_head *head; + unsigned long ret; + + if (!pid) + head = current->compat_robust_list; + else { + struct task_struct *p; + + ret = -ESRCH; + read_lock(&tasklist_lock); + p = find_task_by_pid(pid); + if (!p) + goto err_unlock; + ret = -EPERM; + if ((current->euid != p->euid) && (current->euid != p->uid) && + !capable(CAP_SYS_PTRACE)) + goto err_unlock; + head = p->compat_robust_list; + read_unlock(&tasklist_lock); + } + + if (put_user(sizeof(*head), len_ptr)) + return -EFAULT; + return put_user(ptr_to_compat(head), head_ptr); + +err_unlock: + read_unlock(&tasklist_lock); + + return ret; +} + +asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val, + struct compat_timespec __user *utime, u32 __user *uaddr2, + int val3) +{ + struct timespec t; + unsigned long timeout = MAX_SCHEDULE_TIMEOUT; + int val2 = 0; + + if ((op == FUTEX_WAIT) && utime) { + if (get_compat_timespec(&t, utime)) + return -EFAULT; + timeout = timespec_to_jiffies(&t) + 1; + } + if (op >= FUTEX_REQUEUE) + val2 = (int) (unsigned long) utime; + + return do_futex((unsigned long)uaddr, op, val, timeout, + (unsigned long)uaddr2, val2, val3); +} -- cgit v1.2.3-70-g09d2 From fef23e7fbb11a0a78cd61935f7056bc2b237995a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 28 Mar 2006 16:10:58 -0800 Subject: [PATCH] exec: allow init to exec from any thread. After looking at the problem of init calling exec some more I figured out an easy way to make the code work. The actual symptom without out this patch is that all threads will die except pid == 1, and the thread calling exec. The thread calling exec will wait forever for pid == 1 to die. Since pid == 1 does not install a handler for SIGKILL it will never die. This modifies the tests for init from current->pid == 1 to the equivalent current == child_reaper. And then it causes exec in the ugly case to modify child_reaper. The only weird symptom is that you wind up with an init process that doesn't have the oldest start time on the box. Signed-off-by: Eric W. Biederman Cc: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 13 ++++++++++++- kernel/exit.c | 2 +- kernel/signal.c | 2 +- 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/fs/exec.c b/fs/exec.c index c7397c46ad6..d0ecea0781f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -660,12 +660,23 @@ static int de_thread(struct task_struct *tsk) struct dentry *proc_dentry1, *proc_dentry2; unsigned long ptrace; + leader = current->group_leader; + /* + * If our leader is the child_reaper become + * the child_reaper and resend SIGKILL signal. + */ + if (unlikely(leader == child_reaper)) { + write_lock(&tasklist_lock); + child_reaper = current; + zap_other_threads(current); + write_unlock(&tasklist_lock); + } + /* * Wait for the thread group leader to be a zombie. * It should already be zombie at this point, most * of the time. */ - leader = current->group_leader; while (leader->exit_state != EXIT_ZOMBIE) yield(); diff --git a/kernel/exit.c b/kernel/exit.c index a8c7efc7a68..223a8802b66 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -807,7 +807,7 @@ fastcall NORET_TYPE void do_exit(long code) panic("Aiee, killing interrupt handler!"); if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); - if (unlikely(tsk->pid == 1)) + if (unlikely(tsk == child_reaper)) panic("Attempted to kill init!"); if (unlikely(current->ptrace & PT_TRACE_EXIT)) { diff --git a/kernel/signal.c b/kernel/signal.c index 75f7341b0c3..dc8f91bf9f8 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1990,7 +1990,7 @@ relock: continue; /* Init gets no signals it doesn't want. */ - if (current->pid == 1) + if (current == child_reaper) continue; if (sig_kernel_stop(signr)) { -- cgit v1.2.3-70-g09d2 From d799f03597cabc6112acb518fc8ab4487aa4f953 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:04 -0800 Subject: [PATCH] choose_new_parent: remove unused arg, sanitize exit_state check 'child_reaper' arg is not used in choose_new_parent(). "->exit_state >= EXIT_ZOMBIE" check is a leftover, was valid when EXIT_ZOMBIE lived in ->state var. Signed-off-by: Oleg Nesterov Acked-by: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 223a8802b66..e04a59405e9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -538,13 +538,13 @@ static void exit_mm(struct task_struct * tsk) mmput(mm); } -static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper) +static inline void choose_new_parent(task_t *p, task_t *reaper) { /* * Make sure we're not reparenting to ourselves and that * the parent is not a zombie. */ - BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE); + BUG_ON(p == reaper || reaper->exit_state); p->real_parent = reaper; } @@ -645,7 +645,7 @@ static void forget_original_parent(struct task_struct * father, if (father == p->real_parent) { /* reparent with a reaper, real father it's us */ - choose_new_parent(p, reaper, child_reaper); + choose_new_parent(p, reaper); reparent_thread(p, father, 0); } else { /* reparent ptraced task to its real parent */ @@ -666,7 +666,7 @@ static void forget_original_parent(struct task_struct * father, } list_for_each_safe(_p, _n, &father->ptrace_children) { p = list_entry(_p,struct task_struct,ptrace_list); - choose_new_parent(p, reaper, child_reaper); + choose_new_parent(p, reaper); reparent_thread(p, father, 1); } } -- cgit v1.2.3-70-g09d2 From 8fafabd86f1b75ed3cc6a6ffbe6c3e53e3d8457d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:05 -0800 Subject: [PATCH] remove add_parent()'s parent argument add_parent(p, parent) is always called with parent == p->parent, and it makes no sense to do it differently. This patch removes this argument. No changes in affected .o files. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/irixsig.c | 4 ++-- fs/exec.c | 4 ++-- include/linux/sched.h | 4 ++-- kernel/exit.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) (limited to 'kernel/exit.c') diff --git a/arch/mips/kernel/irixsig.c b/arch/mips/kernel/irixsig.c index 08273a2a501..8150f071f80 100644 --- a/arch/mips/kernel/irixsig.c +++ b/arch/mips/kernel/irixsig.c @@ -603,7 +603,7 @@ repeat: /* move to end of parent's list to avoid starvation */ write_lock_irq(&tasklist_lock); remove_parent(p); - add_parent(p, p->parent); + add_parent(p); write_unlock_irq(&tasklist_lock); retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0; if (retval) @@ -643,7 +643,7 @@ repeat: write_lock_irq(&tasklist_lock); remove_parent(p); p->parent = p->real_parent; - add_parent(p, p->parent); + add_parent(p); do_notify_parent(p, SIGCHLD); write_unlock_irq(&tasklist_lock); } else diff --git a/fs/exec.c b/fs/exec.c index db0769447d3..9046ad2b061 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -725,8 +725,8 @@ static int de_thread(struct task_struct *tsk) current->group_leader = current; leader->group_leader = leader; - add_parent(current, current->parent); - add_parent(leader, leader->parent); + add_parent(current); + add_parent(leader); if (ptrace) { current->ptrace = ptrace; __ptrace_link(current, parent); diff --git a/include/linux/sched.h b/include/linux/sched.h index 5f5ab98bbb6..b4b14c32b28 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1184,7 +1184,7 @@ extern void wait_task_inactive(task_t * p); #endif #define remove_parent(p) list_del_init(&(p)->sibling) -#define add_parent(p, parent) list_add_tail(&(p)->sibling,&(parent)->children) +#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) #define REMOVE_LINKS(p) do { \ if (thread_group_leader(p)) \ @@ -1195,7 +1195,7 @@ extern void wait_task_inactive(task_t * p); #define SET_LINKS(p) do { \ if (thread_group_leader(p)) \ list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p, (p)->parent); \ + add_parent(p); \ } while (0) #define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) diff --git a/kernel/exit.c b/kernel/exit.c index e04a59405e9..df26c33037d 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1281,7 +1281,7 @@ bail_ref: /* move to end of parent's list to avoid starvation */ remove_parent(p); - add_parent(p, p->parent); + add_parent(p); write_unlock_irq(&tasklist_lock); -- cgit v1.2.3-70-g09d2 From 9b678ece42893b53aae5ed7cb8d7cb261cacb72c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:05 -0800 Subject: [PATCH] don't use REMOVE_LINKS/SET_LINKS for reparenting There are places where kernel uses REMOVE_LINKS/SET_LINKS while changing process's ->parent. Use add_parent/remove_parent instead, they don't abuse of global process list. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 4 ++-- kernel/ptrace.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index df26c33037d..5b5e8b67680 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -238,10 +238,10 @@ static void reparent_to_init(void) ptrace_unlink(current); /* Reparent to init */ - REMOVE_LINKS(current); + remove_parent(current); current->parent = child_reaper; current->real_parent = child_reaper; - SET_LINKS(current); + add_parent(current); /* Set the exit signal to SIGCHLD so we signal init on exit */ current->exit_signal = SIGCHLD; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index d95a72c9279..86a7f6c60cb 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -35,9 +35,9 @@ void __ptrace_link(task_t *child, task_t *new_parent) if (child->parent == new_parent) return; list_add(&child->ptrace_list, &child->parent->ptrace_children); - REMOVE_LINKS(child); + remove_parent(child); child->parent = new_parent; - SET_LINKS(child); + add_parent(child); } /* @@ -77,9 +77,9 @@ void __ptrace_unlink(task_t *child) child->ptrace = 0; if (!list_empty(&child->ptrace_list)) { list_del_init(&child->ptrace_list); - REMOVE_LINKS(child); + remove_parent(child); child->parent = child->real_parent; - SET_LINKS(child); + add_parent(child); } ptrace_untrace(child); -- cgit v1.2.3-70-g09d2 From c97d98931ac52ef110b62d9b75c6a6f2bfbc1898 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:06 -0800 Subject: [PATCH] kill SET_LINKS/REMOVE_LINKS Both SET_LINKS() and SET_LINKS/REMOVE_LINKS() have exactly one caller, and these callers already check thread_group_leader(). This patch kills theese macros, they mix two different things: setting process's parent and registering it in init_task.tasks list. Callers are updated to do these actions by hand. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 12 ------------ kernel/exit.c | 4 +++- kernel/fork.c | 4 +++- 3 files changed, 6 insertions(+), 14 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4b14c32b28..1f16fb1fea2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1186,18 +1186,6 @@ extern void wait_task_inactive(task_t * p); #define remove_parent(p) list_del_init(&(p)->sibling) #define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) -#define REMOVE_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_del_init(&(p)->tasks); \ - remove_parent(p); \ - } while (0) - -#define SET_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p); \ - } while (0) - #define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) #define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks) diff --git a/kernel/exit.c b/kernel/exit.c index 5b5e8b67680..f436a6bd3fb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,11 +54,13 @@ static void __unhash_process(struct task_struct *p) if (thread_group_leader(p)) { detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); + + list_del_init(&p->tasks); if (p->pid) __get_cpu_var(process_counts)--; } - REMOVE_LINKS(p); + remove_parent(p); } void release_task(struct task_struct * p) diff --git a/kernel/fork.c b/kernel/fork.c index c49bd193b05..74c67629ee6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1181,7 +1181,7 @@ static task_t *copy_process(unsigned long clone_flags, */ p->ioprio = current->ioprio; - SET_LINKS(p); + add_parent(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); @@ -1191,6 +1191,8 @@ static task_t *copy_process(unsigned long clone_flags, p->signal->session = current->signal->session; attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); + + list_add_tail(&p->tasks, &init_task.tasks); if (p->pid) __get_cpu_var(process_counts)++; } -- cgit v1.2.3-70-g09d2 From 73b9ebfe126a4a886ee46cbab637374d7024668a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:07 -0800 Subject: [PATCH] pidhash: don't count idle threads fork_idle() does unhash_process() just after copy_process(). Contrary, boot_cpu's idle thread explicitely registers itself for each pid_type with nr = 0. copy_process() already checks p->pid != 0 before process_counts++, I think we can just skip attach_pid() calls and job control inits for idle threads and kill unhash_process(). We don't need to cleanup ->proc_dentry in fork_idle() because with this patch idle threads are never hashed in kernel/pid.c:pid_hash[]. We don't need to hash pid == 0 in pidmap_init(). free_pidmap() is never called with pid == 0 arg, so it will never be reused. So it is still possible to use pid == 0 in any PIDTYPE_xxx namespace from kernel/pid.c's POV. However with this patch we don't hash pid == 0 for PIDTYPE_PID case. We still have have PIDTYPE_PGID/PIDTYPE_SID entries with pid == 0: /sbin/init and kernel threads which don't call daemonize(). Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/smp.c | 1 - include/linux/sched.h | 2 -- kernel/exit.c | 18 +----------------- kernel/fork.c | 35 ++++++++++++++++++----------------- kernel/pid.c | 10 +--------- 5 files changed, 20 insertions(+), 46 deletions(-) (limited to 'kernel/exit.c') diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index c8d8d0ac1a7..511116aebaf 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -143,7 +143,6 @@ void smp_prepare_cpus(unsigned int maxcpus) idle = idle_thread(cpu); init_idle(idle, cpu); - unhash_process(idle); waittime = 200000000; while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) diff --git a/include/linux/sched.h b/include/linux/sched.h index 1f16fb1fea2..ddc0df7f8bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1214,8 +1214,6 @@ static inline int thread_group_empty(task_t *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern void unhash_process(struct task_struct *p); - /* * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also diff --git a/kernel/exit.c b/kernel/exit.c index f436a6bd3fb..a94e1c31131 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -56,8 +56,7 @@ static void __unhash_process(struct task_struct *p) detach_pid(p, PIDTYPE_SID); list_del_init(&p->tasks); - if (p->pid) - __get_cpu_var(process_counts)--; + __get_cpu_var(process_counts)--; } remove_parent(p); @@ -118,21 +117,6 @@ repeat: goto repeat; } -/* we are using it only for SMP init */ - -void unhash_process(struct task_struct *p) -{ - struct dentry *proc_dentry; - - spin_lock(&p->proc_lock); - proc_dentry = proc_pid_unhash(p); - write_lock_irq(&tasklist_lock); - __unhash_process(p); - write_unlock_irq(&tasklist_lock); - spin_unlock(&p->proc_lock); - proc_pid_flush(proc_dentry); -} - /* * This checks not only the pgrp, but falls back on the pid if no * satisfactory pgrp is found. I dunno - gdb doesn't work correctly diff --git a/kernel/fork.c b/kernel/fork.c index 74c67629ee6..0c32e28cdc5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1181,25 +1181,26 @@ static task_t *copy_process(unsigned long clone_flags, */ p->ioprio = current->ioprio; - add_parent(p); - if (unlikely(p->ptrace & PT_PTRACED)) - __ptrace_link(p, current->parent); - - if (thread_group_leader(p)) { - p->signal->tty = current->signal->tty; - p->signal->pgrp = process_group(current); - p->signal->session = current->signal->session; - attach_pid(p, PIDTYPE_PGID, process_group(p)); - attach_pid(p, PIDTYPE_SID, p->signal->session); - - list_add_tail(&p->tasks, &init_task.tasks); - if (p->pid) + if (likely(p->pid)) { + add_parent(p); + if (unlikely(p->ptrace & PT_PTRACED)) + __ptrace_link(p, current->parent); + + if (thread_group_leader(p)) { + p->signal->tty = current->signal->tty; + p->signal->pgrp = process_group(current); + p->signal->session = current->signal->session; + attach_pid(p, PIDTYPE_PGID, process_group(p)); + attach_pid(p, PIDTYPE_SID, p->signal->session); + + list_add_tail(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; + } + attach_pid(p, PIDTYPE_TGID, p->tgid); + attach_pid(p, PIDTYPE_PID, p->pid); + nr_threads++; } - attach_pid(p, PIDTYPE_TGID, p->tgid); - attach_pid(p, PIDTYPE_PID, p->pid); - nr_threads++; total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); @@ -1263,7 +1264,7 @@ task_t * __devinit fork_idle(int cpu) if (!task) return ERR_PTR(-ENOMEM); init_idle(task, cpu); - unhash_process(task); + return task; } diff --git a/kernel/pid.c b/kernel/pid.c index 7781d999905..a9f2dfd006d 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -247,16 +247,8 @@ void __init pidhash_init(void) void __init pidmap_init(void) { - int i; - pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); + /* Reserve PID 0. We never call free_pidmap(0) */ set_bit(0, pidmap_array->page); atomic_dec(&pidmap_array->nr_free); - - /* - * Allocate PID 0, and hash it via all PID types: - */ - - for (i = 0; i < PIDTYPE_MAX; i++) - attach_pid(current, i, 0); } -- cgit v1.2.3-70-g09d2 From 6ac781b11ade6e3451f6b460991c8b2b87e58280 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:09 -0800 Subject: [PATCH] reparent_thread: use remove_parent/add_parent Use remove_parent/add_parent instead of open coding. No changes in kernel/exit.o Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index a94e1c31131..98eec590ecb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -555,9 +555,9 @@ static void reparent_thread(task_t *p, task_t *father, int traced) * anyway, so let go of it. */ p->ptrace = 0; - list_del_init(&p->sibling); + remove_parent(p); p->parent = p->real_parent; - list_add_tail(&p->sibling, &p->parent->children); + add_parent(p); /* If we'd notified the old parent about this child's death, * also notify the new parent. -- cgit v1.2.3-70-g09d2 From 1f09f9749cdde4e69f95d62d96d2e03f50b3353c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:11 -0800 Subject: [PATCH] release_task: replace open-coded ptrace_unlink() Use ptrace_unlink() instead of open-coding. No changes in kernel/exit.o Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 98eec590ecb..77c35efad88 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -68,13 +68,12 @@ void release_task(struct task_struct * p) task_t *leader; struct dentry *proc_dentry; -repeat: +repeat: atomic_dec(&p->user->processes); spin_lock(&p->proc_lock); proc_dentry = proc_pid_unhash(p); write_lock_irq(&tasklist_lock); - if (unlikely(p->ptrace)) - __ptrace_unlink(p); + ptrace_unlink(p); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); /* -- cgit v1.2.3-70-g09d2 From 6a14c5c9da0b4c34b5be783403c54f0396fcfe77 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:18 -0800 Subject: [PATCH] move __exit_signal() to kernel/exit.c __exit_signal() is private to release_task() now. I think it is better to make it static in kernel/exit.c and export flush_sigqueue() instead - this function is much more simple and straightforward. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - include/linux/signal.h | 2 ++ kernel/exit.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ kernel/signal.c | 65 +------------------------------------------------- 4 files changed, 66 insertions(+), 65 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 921148277da..a913fca9e70 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); extern void cleanup_sighand(struct task_struct *); -extern void __exit_signal(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); diff --git a/include/linux/signal.h b/include/linux/signal.h index b7d093520bb..162a8fd10b2 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -249,6 +249,8 @@ static inline void init_sigpending(struct sigpending *sig) INIT_LIST_HEAD(&sig->list); } +extern void flush_sigqueue(struct sigpending *queue); + /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) { diff --git a/kernel/exit.c b/kernel/exit.c index 77c35efad88..3823ec89d7b 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -62,6 +63,68 @@ static void __unhash_process(struct task_struct *p) remove_parent(p); } +/* + * This function expects the tasklist_lock write-locked. + */ +static void __exit_signal(struct task_struct *tsk) +{ + struct signal_struct *sig = tsk->signal; + struct sighand_struct *sighand; + + BUG_ON(!sig); + BUG_ON(!atomic_read(&sig->count)); + + rcu_read_lock(); + sighand = rcu_dereference(tsk->sighand); + spin_lock(&sighand->siglock); + + posix_cpu_timers_exit(tsk); + if (atomic_dec_and_test(&sig->count)) + posix_cpu_timers_exit_group(tsk); + else { + /* + * If there is any task waiting for the group exit + * then notify it: + */ + if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { + wake_up_process(sig->group_exit_task); + sig->group_exit_task = NULL; + } + if (tsk == sig->curr_target) + sig->curr_target = next_thread(tsk); + /* + * Accumulate here the counters for all threads but the + * group leader as they die, so they can be added into + * the process-wide totals when those are taken. + * The group leader stays around as a zombie as long + * as there are other threads. When it gets reaped, + * the exit.c code will add its counts into these totals. + * We won't ever get here for the group leader, since it + * will have been the last reference on the signal_struct. + */ + sig->utime = cputime_add(sig->utime, tsk->utime); + sig->stime = cputime_add(sig->stime, tsk->stime); + sig->min_flt += tsk->min_flt; + sig->maj_flt += tsk->maj_flt; + sig->nvcsw += tsk->nvcsw; + sig->nivcsw += tsk->nivcsw; + sig->sched_time += tsk->sched_time; + sig = NULL; /* Marker for below. */ + } + + tsk->signal = NULL; + cleanup_sighand(tsk); + spin_unlock(&sighand->siglock); + rcu_read_unlock(); + + clear_tsk_thread_flag(tsk,TIF_SIGPENDING); + flush_sigqueue(&tsk->pending); + if (sig) { + flush_sigqueue(&sig->shared_pending); + __cleanup_signal(sig); + } +} + void release_task(struct task_struct * p) { int zap_leader; diff --git a/kernel/signal.c b/kernel/signal.c index b29c868bd5e..6ea49f742a2 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -22,7 +22,6 @@ #include #include #include -#include #include #include #include @@ -295,7 +294,7 @@ static void __sigqueue_free(struct sigqueue *q) kmem_cache_free(sigqueue_cachep, q); } -static void flush_sigqueue(struct sigpending *queue) +void flush_sigqueue(struct sigpending *queue) { struct sigqueue *q; @@ -321,68 +320,6 @@ void flush_signals(struct task_struct *t) spin_unlock_irqrestore(&t->sighand->siglock, flags); } -/* - * This function expects the tasklist_lock write-locked. - */ -void __exit_signal(struct task_struct *tsk) -{ - struct signal_struct *sig = tsk->signal; - struct sighand_struct *sighand; - - BUG_ON(!sig); - BUG_ON(!atomic_read(&sig->count)); - - rcu_read_lock(); - sighand = rcu_dereference(tsk->sighand); - spin_lock(&sighand->siglock); - - posix_cpu_timers_exit(tsk); - if (atomic_dec_and_test(&sig->count)) - posix_cpu_timers_exit_group(tsk); - else { - /* - * If there is any task waiting for the group exit - * then notify it: - */ - if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) { - wake_up_process(sig->group_exit_task); - sig->group_exit_task = NULL; - } - if (tsk == sig->curr_target) - sig->curr_target = next_thread(tsk); - /* - * Accumulate here the counters for all threads but the - * group leader as they die, so they can be added into - * the process-wide totals when those are taken. - * The group leader stays around as a zombie as long - * as there are other threads. When it gets reaped, - * the exit.c code will add its counts into these totals. - * We won't ever get here for the group leader, since it - * will have been the last reference on the signal_struct. - */ - sig->utime = cputime_add(sig->utime, tsk->utime); - sig->stime = cputime_add(sig->stime, tsk->stime); - sig->min_flt += tsk->min_flt; - sig->maj_flt += tsk->maj_flt; - sig->nvcsw += tsk->nvcsw; - sig->nivcsw += tsk->nivcsw; - sig->sched_time += tsk->sched_time; - sig = NULL; /* Marker for below. */ - } - - tsk->signal = NULL; - cleanup_sighand(tsk); - spin_unlock(&sighand->siglock); - rcu_read_unlock(); - - clear_tsk_thread_flag(tsk,TIF_SIGPENDING); - flush_sigqueue(&tsk->pending); - if (sig) { - flush_sigqueue(&sig->shared_pending); - __cleanup_signal(sig); - } -} - /* * Flush all handlers for a task. */ -- cgit v1.2.3-70-g09d2 From 35f5cad8c4bab94ecc5acdc4055df5ea12dc76f8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:19 -0800 Subject: [PATCH] revert "Optimize sys_times for a single thread process" This patch reverts 'CONFIG_SMP && thread_group_empty()' optimization in sys_times(). The reason is that the next patch breaks memory ordering which is needed for that optimization. tasklist_lock in sys_times() will be eliminated completely by further patch. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 6 +---- kernel/sys.c | 86 ++++++++++++++++++----------------------------------------- 2 files changed, 27 insertions(+), 65 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 3823ec89d7b..6b2e4cf3e14 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -139,11 +139,7 @@ repeat: ptrace_unlink(p); BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); - /* - * Note that the fastpath in sys_times depends on __exit_signal having - * updated the counters before a task is removed from the tasklist of - * the process by __unhash_process. - */ + __unhash_process(p); /* diff --git a/kernel/sys.c b/kernel/sys.c index c93d37f71ae..84371fdc660 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1202,69 +1202,35 @@ asmlinkage long sys_times(struct tms __user * tbuf) */ if (tbuf) { struct tms tmp; + struct task_struct *tsk = current; + struct task_struct *t; cputime_t utime, stime, cutime, cstime; -#ifdef CONFIG_SMP - if (thread_group_empty(current)) { - /* - * Single thread case without the use of any locks. - * - * We may race with release_task if two threads are - * executing. However, release task first adds up the - * counters (__exit_signal) before removing the task - * from the process tasklist (__unhash_process). - * __exit_signal also acquires and releases the - * siglock which results in the proper memory ordering - * so that the list modifications are always visible - * after the counters have been updated. - * - * If the counters have been updated by the second thread - * but the thread has not yet been removed from the list - * then the other branch will be executing which will - * block on tasklist_lock until the exit handling of the - * other task is finished. - * - * This also implies that the sighand->siglock cannot - * be held by another processor. So we can also - * skip acquiring that lock. - */ - utime = cputime_add(current->signal->utime, current->utime); - stime = cputime_add(current->signal->utime, current->stime); - cutime = current->signal->cutime; - cstime = current->signal->cstime; - } else -#endif - { - - /* Process with multiple threads */ - struct task_struct *tsk = current; - struct task_struct *t; - - read_lock(&tasklist_lock); - utime = tsk->signal->utime; - stime = tsk->signal->stime; - t = tsk; - do { - utime = cputime_add(utime, t->utime); - stime = cputime_add(stime, t->stime); - t = next_thread(t); - } while (t != tsk); + read_lock(&tasklist_lock); + utime = tsk->signal->utime; + stime = tsk->signal->stime; + t = tsk; + do { + utime = cputime_add(utime, t->utime); + stime = cputime_add(stime, t->stime); + t = next_thread(t); + } while (t != tsk); + + /* + * While we have tasklist_lock read-locked, no dying thread + * can be updating current->signal->[us]time. Instead, + * we got their counts included in the live thread loop. + * However, another thread can come in right now and + * do a wait call that updates current->signal->c[us]time. + * To make sure we always see that pair updated atomically, + * we take the siglock around fetching them. + */ + spin_lock_irq(&tsk->sighand->siglock); + cutime = tsk->signal->cutime; + cstime = tsk->signal->cstime; + spin_unlock_irq(&tsk->sighand->siglock); + read_unlock(&tasklist_lock); - /* - * While we have tasklist_lock read-locked, no dying thread - * can be updating current->signal->[us]time. Instead, - * we got their counts included in the live thread loop. - * However, another thread can come in right now and - * do a wait call that updates current->signal->c[us]time. - * To make sure we always see that pair updated atomically, - * we take the siglock around fetching them. - */ - spin_lock_irq(&tsk->sighand->siglock); - cutime = tsk->signal->cutime; - cstime = tsk->signal->cstime; - spin_unlock_irq(&tsk->sighand->siglock); - read_unlock(&tasklist_lock); - } tmp.tms_utime = cputime_to_clock_t(utime); tmp.tms_stime = cputime_to_clock_t(stime); tmp.tms_cutime = cputime_to_clock_t(cutime); -- cgit v1.2.3-70-g09d2 From 5876700cd399112ecfa70df36203c8c6660d84f8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:20 -0800 Subject: [PATCH] do __unhash_process() under ->siglock This patch moves __unhash_process() call from realease_task() to __exit_signal(), so __detach_pid() is called with ->siglock held. This means we don't need tasklist_lock to iterate over thread group anymore: copy_process() was already changed to do attach_pid() under ->siglock. Eric's "pidhash-kill-switch_exec_pids.patch" from -mm changed de_thread() so it doesn't touch PIDTYPE_TGID. NOTE: de_thread() still needs some attention. It still changes task->pid lockless. Taking ->sighand.siglock here allows to do more tasklist_lock removals. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 6b2e4cf3e14..44d6c6e3896 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -112,6 +112,8 @@ static void __exit_signal(struct task_struct *tsk) sig = NULL; /* Marker for below. */ } + __unhash_process(tsk); + tsk->signal = NULL; cleanup_sighand(tsk); spin_unlock(&sighand->siglock); @@ -140,8 +142,6 @@ repeat: BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children)); __exit_signal(p); - __unhash_process(p); - /* * If we are the last non-leader member of the thread * group, and the leader is zombie, then notify the -- cgit v1.2.3-70-g09d2 From aacc90944d4b1f2fcec73a8127eb60a3a701ce1c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:23 -0800 Subject: [PATCH] do_group_exit: don't take tasklist_lock do_group_exit() takes tasklist_lock for zap_other_threads(), this is unneeded now. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/exit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'kernel/exit.c') diff --git a/kernel/exit.c b/kernel/exit.c index 44d6c6e3896..aea23e713cf 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -985,7 +985,6 @@ do_group_exit(int exit_code) else if (!thread_group_empty(current)) { struct signal_struct *const sig = current->signal; struct sighand_struct *const sighand = current->sighand; - read_lock(&tasklist_lock); spin_lock_irq(&sighand->siglock); if (sig->flags & SIGNAL_GROUP_EXIT) /* Another thread got here before we took the lock. */ @@ -995,7 +994,6 @@ do_group_exit(int exit_code) zap_other_threads(current); } spin_unlock_irq(&sighand->siglock); - read_unlock(&tasklist_lock); } do_exit(exit_code); -- cgit v1.2.3-70-g09d2 From 47e65328a7b1cdfc4e3102e50d60faf94ebba7d3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:25 -0800 Subject: [PATCH] pids: kill PIDTYPE_TGID This patch kills PIDTYPE_TGID pid_type thus saving one hash table in kernel/pid.c and speeding up subthreads create/destroy a bit. It is also a preparation for the further tref/pids rework. This patch adds 'struct list_head thread_group' to 'struct task_struct' instead. We don't detach group leader from PIDTYPE_PID namespace until another thread inherits it's ->pid == ->tgid, so we are safe wrt premature free_pidmap(->tgid) call. Currently there are no users of find_task_by_pid_type(PIDTYPE_TGID). Should the need arise, we can use find_task_by_pid()->group_leader. Signed-off-by: Oleg Nesterov Acked-By: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 1 - include/linux/sched.h | 11 ++++++++--- kernel/exit.c | 10 +--------- kernel/fork.c | 4 +++- 4 files changed, 12 insertions(+), 14 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/pid.h b/include/linux/pid.h index 099e70ecf7c..5b9082cc600 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -4,7 +4,6 @@ enum pid_type { PIDTYPE_PID, - PIDTYPE_TGID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX diff --git a/include/linux/sched.h b/include/linux/sched.h index a913fca9e70..99855f694eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -752,6 +752,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid pids[PIDTYPE_MAX]; + struct list_head thread_group; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -1192,13 +1193,17 @@ extern void wait_task_inactive(task_t * p); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) -extern task_t * FASTCALL(next_thread(const task_t *p)); - #define thread_group_leader(p) (p->pid == p->tgid) +static inline task_t *next_thread(task_t *p) +{ + return list_entry(rcu_dereference(p->thread_group.next), + task_t, thread_group); +} + static inline int thread_group_empty(task_t *p) { - return list_empty(&p->pids[PIDTYPE_TGID].pid_list); + return list_empty(&p->thread_group); } #define delay_group_leader(p) \ diff --git a/kernel/exit.c b/kernel/exit.c index aea23e713cf..22399caf757 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -51,7 +51,6 @@ static void __unhash_process(struct task_struct *p) { nr_threads--; detach_pid(p, PIDTYPE_PID); - detach_pid(p, PIDTYPE_TGID); if (thread_group_leader(p)) { detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); @@ -59,7 +58,7 @@ static void __unhash_process(struct task_struct *p) list_del_init(&p->tasks); __get_cpu_var(process_counts)--; } - + list_del_rcu(&p->thread_group); remove_parent(p); } @@ -964,13 +963,6 @@ asmlinkage long sys_exit(int error_code) do_exit((error_code&0xff)<<8); } -task_t fastcall *next_thread(const task_t *p) -{ - return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID); -} - -EXPORT_SYMBOL(next_thread); - /* * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). diff --git a/kernel/fork.c b/kernel/fork.c index 12cdd9fc9d0..bc551efb5fd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1112,6 +1112,7 @@ static task_t *copy_process(unsigned long clone_flags, * We dont wake it up yet. */ p->group_leader = p; + INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); @@ -1165,7 +1166,9 @@ static task_t *copy_process(unsigned long clone_flags, retval = -EAGAIN; goto bad_fork_cleanup_namespace; } + p->group_leader = current->group_leader; + list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); if (current->signal->group_stop_count > 0) { /* @@ -1213,7 +1216,6 @@ static task_t *copy_process(unsigned long clone_flags, list_add_tail(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } - attach_pid(p, PIDTYPE_TGID, p->tgid); attach_pid(p, PIDTYPE_PID, p->pid); nr_threads++; } -- cgit v1.2.3-70-g09d2 From a7e5328a06a2beee3a2bbfaf87ce2a7bbe937de1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:27 -0800 Subject: [PATCH] cleanup __exit_signal->cleanup_sighand path Move 'tsk->sighand = NULL' from cleanup_sighand() to __exit_signal(). This makes the exit path more understandable and allows us to do cleanup_sighand() outside of ->siglock protected section. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- kernel/exit.c | 3 ++- kernel/fork.c | 8 ++------ 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel/exit.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 99855f694eb..d04186d8cc6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,7 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); -extern void cleanup_sighand(struct task_struct *); +extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); diff --git a/kernel/exit.c b/kernel/exit.c index 22399caf757..bc0ec674d3f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -114,10 +114,11 @@ static void __exit_signal(struct task_struct *tsk) __unhash_process(tsk); tsk->signal = NULL; - cleanup_sighand(tsk); + tsk->sighand = NULL; spin_unlock(&sighand->siglock); rcu_read_unlock(); + __cleanup_sighand(sighand); clear_tsk_thread_flag(tsk,TIF_SIGPENDING); flush_sigqueue(&tsk->pending); if (sig) { diff --git a/kernel/fork.c b/kernel/fork.c index aa50c848fae..b3f7a1bb5e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -803,12 +803,8 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t return 0; } -void cleanup_sighand(struct task_struct *tsk) +void __cleanup_sighand(struct sighand_struct *sighand) { - struct sighand_struct * sighand = tsk->sighand; - - /* Ok, we're done with the signal handlers */ - tsk->sighand = NULL; if (atomic_dec_and_test(&sighand->count)) kmem_cache_free(sighand_cachep, sighand); } @@ -1233,7 +1229,7 @@ bad_fork_cleanup_mm: bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: - cleanup_sighand(p); + __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: -- cgit v1.2.3-70-g09d2