From 910dea7fdda22f0ee83d26d459e460c79ed94557 Mon Sep 17 00:00:00 2001 From: Eric Sesterhenn Date: Sun, 26 Mar 2006 18:29:26 +0200 Subject: BUG_ON() Conversion in kernel/fork.c this changes if() BUG(); constructs to BUG_ON() which is cleaner, contains unlikely() and can better optimized away. Signed-off-by: Eric Sesterhenn Signed-off-by: Adrian Bunk --- kernel/fork.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index a02063903aa..d93ab2ba729 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -769,8 +769,7 @@ int unshare_files(void) struct files_struct *files = current->files; int rc; - if(!files) - BUG(); + BUG_ON(!files); /* This can race but the race causes us to copy when we don't need to and drop the copy */ -- cgit v1.2.3-70-g09d2 From 05cfb614ddbf3181540ce09d44d96486f8ba8d6a Mon Sep 17 00:00:00 2001 From: Roman Zippel Date: Sun, 26 Mar 2006 01:38:12 -0800 Subject: [PATCH] hrtimers: remove data field The nanosleep cleanup allows to remove the data field of hrtimer. The callback function can use container_of() to get it's own data. Since the hrtimer structure is anyway embedded in other structures, this adds no overhead. Signed-off-by: Roman Zippel Signed-off-by: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 2 +- include/linux/hrtimer.h | 5 +---- include/linux/sched.h | 1 + include/linux/timer.h | 3 ++- kernel/fork.c | 2 +- kernel/hrtimer.c | 12 +++++------- kernel/itimer.c | 15 +++++++-------- kernel/posix-timers.c | 9 ++++----- 8 files changed, 22 insertions(+), 27 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/exec.c b/fs/exec.c index 995cba3c62b..c7397c46ad6 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -632,7 +632,7 @@ static int de_thread(struct task_struct *tsk) * synchronize with any firing (by calling del_timer_sync) * before we can safely let the old group leader die. */ - sig->real_timer.data = current; + sig->tsk = current; spin_unlock_irq(lock); if (hrtimer_cancel(&sig->real_timer)) hrtimer_restart(&sig->real_timer); diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index f57cc7bd700..93830158348 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -45,9 +45,7 @@ struct hrtimer_base; * @expires: the absolute expiry time in the hrtimers internal * representation. The time is related to the clock on * which the timer is based. - * @state: state of the timer * @function: timer expiry callback function - * @data: argument for the callback function * @base: pointer to the timer base (per cpu and per clock) * * The hrtimer structure must be initialized by init_hrtimer_#CLOCKTYPE() @@ -55,8 +53,7 @@ struct hrtimer_base; struct hrtimer { struct rb_node node; ktime_t expires; - int (*function)(void *); - void *data; + int (*function)(struct hrtimer *); struct hrtimer_base *base; }; diff --git a/include/linux/sched.h b/include/linux/sched.h index e0054c1b9a0..036d14d2bf9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -402,6 +402,7 @@ struct signal_struct { /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; + struct task_struct *tsk; ktime_t it_real_incr; /* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */ diff --git a/include/linux/timer.h b/include/linux/timer.h index ee5a09e806e..b5caabca553 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -96,6 +96,7 @@ static inline void add_timer(struct timer_list *timer) extern void init_timers(void); extern void run_local_timers(void); -extern int it_real_fn(void *); +struct hrtimer; +extern int it_real_fn(struct hrtimer *); #endif diff --git a/kernel/fork.c b/kernel/fork.c index a02063903aa..4bd6486aa67 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -848,7 +848,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL); sig->it_real_incr.tv64 = 0; sig->real_timer.function = it_real_fn; - sig->real_timer.data = tsk; + sig->tsk = tsk; sig->it_virt_expires = cputime_zero; sig->it_virt_incr = cputime_zero; diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 44108de4f02..0237a556eb1 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c @@ -613,21 +613,19 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base) while ((node = base->first)) { struct hrtimer *timer; - int (*fn)(void *); + int (*fn)(struct hrtimer *); int restart; - void *data; timer = rb_entry(node, struct hrtimer, node); if (base->softirq_time.tv64 <= timer->expires.tv64) break; fn = timer->function; - data = timer->data; set_curr_timer(base, timer); __remove_hrtimer(timer, base); spin_unlock_irq(&base->lock); - restart = fn(data); + restart = fn(timer); spin_lock_irq(&base->lock); @@ -664,9 +662,10 @@ struct sleep_hrtimer { int expired; }; -static int nanosleep_wakeup(void *data) +static int nanosleep_wakeup(struct hrtimer *timer) { - struct sleep_hrtimer *t = data; + struct sleep_hrtimer *t = + container_of(timer, struct sleep_hrtimer, timer); t->expired = 1; wake_up_process(t->task); @@ -677,7 +676,6 @@ static int nanosleep_wakeup(void *data) static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode) { t->timer.function = nanosleep_wakeup; - t->timer.data = t; t->task = current; t->expired = 0; diff --git a/kernel/itimer.c b/kernel/itimer.c index af2ec6b4392..204ed7939e7 100644 --- a/kernel/itimer.c +++ b/kernel/itimer.c @@ -128,17 +128,16 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value) /* * The timer is automagically restarted, when interval != 0 */ -int it_real_fn(void *data) +int it_real_fn(struct hrtimer *timer) { - struct task_struct *tsk = (struct task_struct *) data; + struct signal_struct *sig = + container_of(timer, struct signal_struct, real_timer); - send_group_sig_info(SIGALRM, SEND_SIG_PRIV, tsk); - - if (tsk->signal->it_real_incr.tv64 != 0) { - hrtimer_forward(&tsk->signal->real_timer, - tsk->signal->real_timer.base->softirq_time, - tsk->signal->it_real_incr); + send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk); + if (sig->it_real_incr.tv64 != 0) { + hrtimer_forward(timer, timer->base->softirq_time, + sig->it_real_incr); return HRTIMER_RESTART; } return HRTIMER_NORESTART; diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 7c5f44787c8..ac6dc874442 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c @@ -145,7 +145,7 @@ static int common_timer_set(struct k_itimer *, int, struct itimerspec *, struct itimerspec *); static int common_timer_del(struct k_itimer *timer); -static int posix_timer_fn(void *data); +static int posix_timer_fn(struct hrtimer *data); static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags); @@ -334,14 +334,14 @@ EXPORT_SYMBOL_GPL(posix_timer_event); * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers. */ -static int posix_timer_fn(void *data) +static int posix_timer_fn(struct hrtimer *timer) { - struct k_itimer *timr = data; - struct hrtimer *timer = &timr->it.real.timer; + struct k_itimer *timr; unsigned long flags; int si_private = 0; int ret = HRTIMER_NORESTART; + timr = container_of(timer, struct k_itimer, it.real.timer); spin_lock_irqsave(&timr->it_lock, flags); if (timr->it.real.interval.tv64 != 0) @@ -725,7 +725,6 @@ common_timer_set(struct k_itimer *timr, int flags, mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL; hrtimer_init(&timr->it.real.timer, timr->it_clock, mode); - timr->it.real.timer.data = timr; timr->it.real.timer.function = posix_timer_fn; timer->expires = timespec_to_ktime(new_setting->it_value); -- cgit v1.2.3-70-g09d2 From 8f17d3a5049d32392b79925c73a0cf99ce6d5af0 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 27 Mar 2006 01:16:27 -0800 Subject: [PATCH] lightweight robust futexes updates - fix: initialize the robust list(s) to NULL in copy_process. - doc update - cleanup: rename _inuser to _inatomic - __user cleanups and other small cleanups Signed-off-by: Ingo Molnar Cc: Thomas Gleixner Cc: Arjan van de Ven Cc: Ulrich Drepper Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/robust-futex-ABI.txt | 2 -- Documentation/robust-futexes.txt | 2 +- include/asm-frv/futex.h | 2 +- include/asm-generic/futex.h | 2 +- include/asm-i386/futex.h | 2 +- include/asm-mips/futex.h | 2 +- include/asm-powerpc/futex.h | 2 +- include/asm-x86_64/futex.h | 2 +- include/linux/futex.h | 2 +- kernel/fork.c | 5 ++++- kernel/futex.c | 20 +++++++++----------- kernel/futex_compat.c | 7 +++---- 12 files changed, 24 insertions(+), 26 deletions(-) (limited to 'kernel/fork.c') diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt index def5d873528..8529a17ffaa 100644 --- a/Documentation/robust-futex-ABI.txt +++ b/Documentation/robust-futex-ABI.txt @@ -142,8 +142,6 @@ On insertion: of the 'lock word', to the linked list starting at 'head', and 4) clear the 'list_op_pending' word. - XXX I am particularly unsure of the following -pj XXX - On removal: 1) set the 'list_op_pending' word to the address of the 'lock word' to be removed, diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt index 7aecc67b136..df82d75245a 100644 --- a/Documentation/robust-futexes.txt +++ b/Documentation/robust-futexes.txt @@ -213,6 +213,6 @@ robust-mutex testcases. All other architectures should build just fine too - but they wont have the new syscalls yet. -Architectures need to implement the new futex_atomic_cmpxchg_inuser() +Architectures need to implement the new futex_atomic_cmpxchg_inatomic() inline function before writing up the syscalls (that function returns -ENOSYS right now). diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h index 9a0e9026ba5..08b3d1da358 100644 --- a/include/asm-frv/futex.h +++ b/include/asm-frv/futex.h @@ -10,7 +10,7 @@ extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr); static inline int -futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { return -ENOSYS; } diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index 514bd401cd7..df893c16031 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -50,7 +50,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { return -ENOSYS; } diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h index 41184a31885..7b8ceefd010 100644 --- a/include/asm-i386/futex.h +++ b/include/asm-i386/futex.h @@ -105,7 +105,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h index c5fb2d6d918..a554089991f 100644 --- a/include/asm-mips/futex.h +++ b/include/asm-mips/futex.h @@ -100,7 +100,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { return -ENOSYS; } diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h index 80ed9854e42..f1b3c00bc1c 100644 --- a/include/asm-powerpc/futex.h +++ b/include/asm-powerpc/futex.h @@ -82,7 +82,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { return -ENOSYS; } diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h index 7d9eb1a8454..9804bf07b09 100644 --- a/include/asm-x86_64/futex.h +++ b/include/asm-x86_64/futex.h @@ -95,7 +95,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr) } static inline int -futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval) +futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval) { if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) return -EFAULT; diff --git a/include/linux/futex.h b/include/linux/futex.h index 20face6b798..55fff96ae85 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -100,7 +100,7 @@ long do_futex(unsigned long uaddr, int op, int val, unsigned long timeout, unsigned long uaddr2, int val2, int val3); -extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr); +extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr); #ifdef CONFIG_FUTEX extern void exit_robust_list(struct task_struct *curr); diff --git a/kernel/fork.c b/kernel/fork.c index e0a2b449dea..c49bd193b05 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1061,7 +1061,10 @@ static task_t *copy_process(unsigned long clone_flags, * Clear TID on mm_release()? */ p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL; - + p->robust_list = NULL; +#ifdef CONFIG_COMPAT + p->compat_robust_list = NULL; +#endif /* * sigaltstack should be cleared when sharing the same VM */ diff --git a/kernel/futex.c b/kernel/futex.c index feb724b2554..9c9b2b6b22d 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -913,15 +913,15 @@ err_unlock: * Process a futex-list entry, check whether it's owned by the * dying task, and do notification if so: */ -int handle_futex_death(unsigned int *uaddr, struct task_struct *curr) +int handle_futex_death(u32 __user *uaddr, struct task_struct *curr) { - unsigned int futex_val; + u32 uval; -repeat: - if (get_user(futex_val, uaddr)) +retry: + if (get_user(uval, uaddr)) return -1; - if ((futex_val & FUTEX_TID_MASK) == curr->pid) { + if ((uval & FUTEX_TID_MASK) == curr->pid) { /* * Ok, this dying thread is truly holding a futex * of interest. Set the OWNER_DIED bit atomically @@ -932,12 +932,11 @@ repeat: * thread-death.) The rest of the cleanup is done in * userspace. */ - if (futex_atomic_cmpxchg_inuser(uaddr, futex_val, - futex_val | FUTEX_OWNER_DIED) != - futex_val) - goto repeat; + if (futex_atomic_cmpxchg_inatomic(uaddr, uval, + uval | FUTEX_OWNER_DIED) != uval) + goto retry; - if (futex_val & FUTEX_WAITERS) + if (uval & FUTEX_WAITERS) futex_wake((unsigned long)uaddr, 1); } return 0; @@ -985,7 +984,6 @@ void exit_robust_list(struct task_struct *curr) if (handle_futex_death((void *)entry + futex_offset, curr)) return; - /* * Fetch the next entry in the list: */ diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index c153559ef28..9c077cf9aa8 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -121,9 +121,9 @@ err_unlock: return ret; } -asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val, +asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val, struct compat_timespec __user *utime, u32 __user *uaddr2, - int val3) + u32 val3) { struct timespec t; unsigned long timeout = MAX_SCHEDULE_TIMEOUT; @@ -137,6 +137,5 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val, if (op >= FUTEX_REQUEUE) val2 = (int) (unsigned long) utime; - return do_futex((unsigned long)uaddr, op, val, timeout, - (unsigned long)uaddr2, val2, val3); + return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3); } -- cgit v1.2.3-70-g09d2 From c97d98931ac52ef110b62d9b75c6a6f2bfbc1898 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:06 -0800 Subject: [PATCH] kill SET_LINKS/REMOVE_LINKS Both SET_LINKS() and SET_LINKS/REMOVE_LINKS() have exactly one caller, and these callers already check thread_group_leader(). This patch kills theese macros, they mix two different things: setting process's parent and registering it in init_task.tasks list. Callers are updated to do these actions by hand. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 12 ------------ kernel/exit.c | 4 +++- kernel/fork.c | 4 +++- 3 files changed, 6 insertions(+), 14 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index b4b14c32b28..1f16fb1fea2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1186,18 +1186,6 @@ extern void wait_task_inactive(task_t * p); #define remove_parent(p) list_del_init(&(p)->sibling) #define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children) -#define REMOVE_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_del_init(&(p)->tasks); \ - remove_parent(p); \ - } while (0) - -#define SET_LINKS(p) do { \ - if (thread_group_leader(p)) \ - list_add_tail(&(p)->tasks,&init_task.tasks); \ - add_parent(p); \ - } while (0) - #define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks) #define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks) diff --git a/kernel/exit.c b/kernel/exit.c index 5b5e8b67680..f436a6bd3fb 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -54,11 +54,13 @@ static void __unhash_process(struct task_struct *p) if (thread_group_leader(p)) { detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); + + list_del_init(&p->tasks); if (p->pid) __get_cpu_var(process_counts)--; } - REMOVE_LINKS(p); + remove_parent(p); } void release_task(struct task_struct * p) diff --git a/kernel/fork.c b/kernel/fork.c index c49bd193b05..74c67629ee6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1181,7 +1181,7 @@ static task_t *copy_process(unsigned long clone_flags, */ p->ioprio = current->ioprio; - SET_LINKS(p); + add_parent(p); if (unlikely(p->ptrace & PT_PTRACED)) __ptrace_link(p, current->parent); @@ -1191,6 +1191,8 @@ static task_t *copy_process(unsigned long clone_flags, p->signal->session = current->signal->session; attach_pid(p, PIDTYPE_PGID, process_group(p)); attach_pid(p, PIDTYPE_SID, p->signal->session); + + list_add_tail(&p->tasks, &init_task.tasks); if (p->pid) __get_cpu_var(process_counts)++; } -- cgit v1.2.3-70-g09d2 From 73b9ebfe126a4a886ee46cbab637374d7024668a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:07 -0800 Subject: [PATCH] pidhash: don't count idle threads fork_idle() does unhash_process() just after copy_process(). Contrary, boot_cpu's idle thread explicitely registers itself for each pid_type with nr = 0. copy_process() already checks p->pid != 0 before process_counts++, I think we can just skip attach_pid() calls and job control inits for idle threads and kill unhash_process(). We don't need to cleanup ->proc_dentry in fork_idle() because with this patch idle threads are never hashed in kernel/pid.c:pid_hash[]. We don't need to hash pid == 0 in pidmap_init(). free_pidmap() is never called with pid == 0 arg, so it will never be reused. So it is still possible to use pid == 0 in any PIDTYPE_xxx namespace from kernel/pid.c's POV. However with this patch we don't hash pid == 0 for PIDTYPE_PID case. We still have have PIDTYPE_PGID/PIDTYPE_SID entries with pid == 0: /sbin/init and kernel threads which don't call daemonize(). Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/um/kernel/smp.c | 1 - include/linux/sched.h | 2 -- kernel/exit.c | 18 +----------------- kernel/fork.c | 35 ++++++++++++++++++----------------- kernel/pid.c | 10 +--------- 5 files changed, 20 insertions(+), 46 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c index c8d8d0ac1a7..511116aebaf 100644 --- a/arch/um/kernel/smp.c +++ b/arch/um/kernel/smp.c @@ -143,7 +143,6 @@ void smp_prepare_cpus(unsigned int maxcpus) idle = idle_thread(cpu); init_idle(idle, cpu); - unhash_process(idle); waittime = 200000000; while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) diff --git a/include/linux/sched.h b/include/linux/sched.h index 1f16fb1fea2..ddc0df7f8bf 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1214,8 +1214,6 @@ static inline int thread_group_empty(task_t *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern void unhash_process(struct task_struct *p); - /* * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring * subscriptions and synchronises with wait4(). Also used in procfs. Also diff --git a/kernel/exit.c b/kernel/exit.c index f436a6bd3fb..a94e1c31131 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -56,8 +56,7 @@ static void __unhash_process(struct task_struct *p) detach_pid(p, PIDTYPE_SID); list_del_init(&p->tasks); - if (p->pid) - __get_cpu_var(process_counts)--; + __get_cpu_var(process_counts)--; } remove_parent(p); @@ -118,21 +117,6 @@ repeat: goto repeat; } -/* we are using it only for SMP init */ - -void unhash_process(struct task_struct *p) -{ - struct dentry *proc_dentry; - - spin_lock(&p->proc_lock); - proc_dentry = proc_pid_unhash(p); - write_lock_irq(&tasklist_lock); - __unhash_process(p); - write_unlock_irq(&tasklist_lock); - spin_unlock(&p->proc_lock); - proc_pid_flush(proc_dentry); -} - /* * This checks not only the pgrp, but falls back on the pid if no * satisfactory pgrp is found. I dunno - gdb doesn't work correctly diff --git a/kernel/fork.c b/kernel/fork.c index 74c67629ee6..0c32e28cdc5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1181,25 +1181,26 @@ static task_t *copy_process(unsigned long clone_flags, */ p->ioprio = current->ioprio; - add_parent(p); - if (unlikely(p->ptrace & PT_PTRACED)) - __ptrace_link(p, current->parent); - - if (thread_group_leader(p)) { - p->signal->tty = current->signal->tty; - p->signal->pgrp = process_group(current); - p->signal->session = current->signal->session; - attach_pid(p, PIDTYPE_PGID, process_group(p)); - attach_pid(p, PIDTYPE_SID, p->signal->session); - - list_add_tail(&p->tasks, &init_task.tasks); - if (p->pid) + if (likely(p->pid)) { + add_parent(p); + if (unlikely(p->ptrace & PT_PTRACED)) + __ptrace_link(p, current->parent); + + if (thread_group_leader(p)) { + p->signal->tty = current->signal->tty; + p->signal->pgrp = process_group(current); + p->signal->session = current->signal->session; + attach_pid(p, PIDTYPE_PGID, process_group(p)); + attach_pid(p, PIDTYPE_SID, p->signal->session); + + list_add_tail(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; + } + attach_pid(p, PIDTYPE_TGID, p->tgid); + attach_pid(p, PIDTYPE_PID, p->pid); + nr_threads++; } - attach_pid(p, PIDTYPE_TGID, p->tgid); - attach_pid(p, PIDTYPE_PID, p->pid); - nr_threads++; total_forks++; spin_unlock(¤t->sighand->siglock); write_unlock_irq(&tasklist_lock); @@ -1263,7 +1264,7 @@ task_t * __devinit fork_idle(int cpu) if (!task) return ERR_PTR(-ENOMEM); init_idle(task, cpu); - unhash_process(task); + return task; } diff --git a/kernel/pid.c b/kernel/pid.c index 7781d999905..a9f2dfd006d 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -247,16 +247,8 @@ void __init pidhash_init(void) void __init pidmap_init(void) { - int i; - pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL); + /* Reserve PID 0. We never call free_pidmap(0) */ set_bit(0, pidmap_array->page); atomic_dec(&pidmap_array->nr_free); - - /* - * Allocate PID 0, and hash it via all PID types: - */ - - for (i = 0; i < PIDTYPE_MAX; i++) - attach_pid(current, i, 0); } -- cgit v1.2.3-70-g09d2 From aa1757f90bea3f598b6e5d04d922a6a60200f1da Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:12 -0800 Subject: [PATCH] convert sighand_cache to use SLAB_DESTROY_BY_RCU This patch borrows a clever Hugh's 'struct anon_vma' trick. Without tasklist_lock held we can't trust task->sighand until we locked it and re-checked that it is still the same. But this means we don't need to defer 'kmem_cache_free(sighand)'. We can return the memory to slab immediately, all we need is to be sure that sighand->siglock can't dissapear inside rcu protected section. To do so we need to initialize ->siglock inside ctor function, SLAB_DESTROY_BY_RCU does the rest. Signed-off-by: Oleg Nesterov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/exec.c | 3 +-- include/linux/sched.h | 8 -------- kernel/fork.c | 21 +++++++++++---------- kernel/signal.c | 2 +- 4 files changed, 13 insertions(+), 21 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/exec.c b/fs/exec.c index 9046ad2b061..950ebd43cdc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -768,7 +768,6 @@ no_thread_group: /* * Move our state over to newsighand and switch it in. */ - spin_lock_init(&newsighand->siglock); atomic_set(&newsighand->count, 1); memcpy(newsighand->action, oldsighand->action, sizeof(newsighand->action)); @@ -785,7 +784,7 @@ no_thread_group: write_unlock_irq(&tasklist_lock); if (atomic_dec_and_test(&oldsighand->count)) - sighand_free(oldsighand); + kmem_cache_free(sighand_cachep, oldsighand); } BUG_ON(!thread_group_leader(current)); diff --git a/include/linux/sched.h b/include/linux/sched.h index ddc0df7f8bf..bbcfc873bd9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -355,16 +355,8 @@ struct sighand_struct { atomic_t count; struct k_sigaction action[_NSIG]; spinlock_t siglock; - struct rcu_head rcu; }; -extern void sighand_free_cb(struct rcu_head *rhp); - -static inline void sighand_free(struct sighand_struct *sp) -{ - call_rcu(&sp->rcu, sighand_free_cb); -} - /* * NOTE! "signal_struct" does not have it's own * locking, because a shared signal_struct always diff --git a/kernel/fork.c b/kernel/fork.c index 0c32e28cdc5..33ffb5bf0db 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -786,14 +786,6 @@ int unshare_files(void) EXPORT_SYMBOL(unshare_files); -void sighand_free_cb(struct rcu_head *rhp) -{ - struct sighand_struct *sp; - - sp = container_of(rhp, struct sighand_struct, rcu); - kmem_cache_free(sighand_cachep, sp); -} - static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk) { struct sighand_struct *sig; @@ -806,7 +798,6 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t rcu_assign_pointer(tsk->sighand, sig); if (!sig) return -ENOMEM; - spin_lock_init(&sig->siglock); atomic_set(&sig->count, 1); memcpy(sig->action, current->sighand->action, sizeof(sig->action)); return 0; @@ -1356,11 +1347,21 @@ long do_fork(unsigned long clone_flags, #define ARCH_MIN_MMSTRUCT_ALIGN 0 #endif +static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags) +{ + struct sighand_struct *sighand = data; + + if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) + spin_lock_init(&sighand->siglock); +} + void __init proc_caches_init(void) { sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU, + sighand_ctor, NULL); signal_cachep = kmem_cache_create("signal_cache", sizeof(struct signal_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index dc8f91bf9f8..b0b1ca9daa3 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -330,7 +330,7 @@ void __exit_sighand(struct task_struct *tsk) /* Ok, we're done with the signal handlers */ tsk->sighand = NULL; if (atomic_dec_and_test(&sighand->count)) - sighand_free(sighand); + kmem_cache_free(sighand_cachep, sighand); } void exit_sighand(struct task_struct *tsk) -- cgit v1.2.3-70-g09d2 From 7001510d0cbf51ad202dd2d0744f54104285cbb9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:14 -0800 Subject: [PATCH] copy_process: cleanup bad_fork_cleanup_sighand The only caller of exit_sighand(tsk) is copy_process's error path. We can call __exit_sighand() directly and kill exit_sighand(). This 'tsk' was not yet registered in pid_hash[] or init_task.tasks, it has no external references, nobody can see it, and IF (clone_flags & CLONE_SIGHAND) At least 'current' has a reference to ->sighand, this means atomic_dec_and_test(sighand->count) can't be true. ELSE Nobody can see this ->sighand, this means we can free it without any locking. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 - kernel/fork.c | 3 ++- kernel/signal.c | 14 -------------- 3 files changed, 2 insertions(+), 16 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index ca1fd31aae9..69c2a1e1529 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,6 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void exit_signal(struct task_struct *); extern void __exit_signal(struct task_struct *); -extern void exit_sighand(struct task_struct *); extern void __exit_sighand(struct task_struct *); extern void exit_itimers(struct signal_struct *); diff --git a/kernel/fork.c b/kernel/fork.c index 33ffb5bf0db..8a46ad52be8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1208,7 +1208,8 @@ bad_fork_cleanup_mm: bad_fork_cleanup_signal: exit_signal(p); bad_fork_cleanup_sighand: - exit_sighand(p); + if (p->sighand) + __exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: diff --git a/kernel/signal.c b/kernel/signal.c index c5b65aa4c2b..1d7f4463c32 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -336,20 +336,6 @@ void __exit_sighand(struct task_struct *tsk) kmem_cache_free(sighand_cachep, sighand); } -void exit_sighand(struct task_struct *tsk) -{ - write_lock_irq(&tasklist_lock); - rcu_read_lock(); - if (tsk->sighand != NULL) { - struct sighand_struct *sighand = rcu_dereference(tsk->sighand); - spin_lock(&sighand->siglock); - __exit_sighand(tsk); - spin_unlock(&sighand->siglock); - } - rcu_read_unlock(); - write_unlock_irq(&tasklist_lock); -} - /* * This function expects the tasklist_lock write-locked. */ -- cgit v1.2.3-70-g09d2 From 6b3934ef52712ece50605dfc72e55d00c580831a Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:16 -0800 Subject: [PATCH] copy_process: cleanup bad_fork_cleanup_signal __exit_signal() does important cleanups atomically under ->siglock. It is also called from copy_process's error path. This is not good, for example we can't move __unhash_process() under ->siglock for that reason. We should not mix these 2 paths, just look at ugly 'if (p->sighand)' under 'bad_fork_cleanup_sighand:' label. For copy_process() case it is sufficient to just backout copy_signal(), nothing more. Again, nobody can see this task yet. For CLONE_THREAD case we just decrement signal->count, otherwise nobody can see this ->signal and we can free it lockless. This patch assumes it is safe to do exit_thread_group_keys() without tasklist_lock. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: David Howells Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- include/linux/slab.h | 1 - kernel/fork.c | 23 +++++++++++++++++++---- kernel/signal.c | 15 +-------------- 4 files changed, 21 insertions(+), 20 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 69c2a1e1529..7dd430b697a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1149,7 +1149,7 @@ extern void flush_thread(void); extern void exit_thread(void); extern void exit_files(struct task_struct *); -extern void exit_signal(struct task_struct *); +extern void __cleanup_signal(struct signal_struct *); extern void __exit_signal(struct task_struct *); extern void __exit_sighand(struct task_struct *); extern void exit_itimers(struct signal_struct *); diff --git a/include/linux/slab.h b/include/linux/slab.h index 15e1d9736b1..3af03b19c98 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -210,7 +210,6 @@ extern kmem_cache_t *names_cachep; extern kmem_cache_t *files_cachep; extern kmem_cache_t *filp_cachep; extern kmem_cache_t *fs_cachep; -extern kmem_cache_t *signal_cachep; extern kmem_cache_t *sighand_cachep; extern kmem_cache_t *bio_cachep; diff --git a/kernel/fork.c b/kernel/fork.c index 8a46ad52be8..0aff28cdbad 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -84,7 +84,7 @@ static kmem_cache_t *task_struct_cachep; #endif /* SLAB cache for signal_struct structures (tsk->signal) */ -kmem_cache_t *signal_cachep; +static kmem_cache_t *signal_cachep; /* SLAB cache for sighand_struct structures (tsk->sighand) */ kmem_cache_t *sighand_cachep; @@ -872,6 +872,22 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts return 0; } +void __cleanup_signal(struct signal_struct *sig) +{ + exit_thread_group_keys(sig); + kmem_cache_free(signal_cachep, sig); +} + +static inline void cleanup_signal(struct task_struct *tsk) +{ + struct signal_struct *sig = tsk->signal; + + atomic_dec(&sig->live); + + if (atomic_dec_and_test(&sig->count)) + __cleanup_signal(sig); +} + static inline void copy_flags(unsigned long clone_flags, struct task_struct *p) { unsigned long new_flags = p->flags; @@ -1206,10 +1222,9 @@ bad_fork_cleanup_mm: if (p->mm) mmput(p->mm); bad_fork_cleanup_signal: - exit_signal(p); + cleanup_signal(p); bad_fork_cleanup_sighand: - if (p->sighand) - __exit_sighand(p); + __exit_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: diff --git a/kernel/signal.c b/kernel/signal.c index 1d7f4463c32..54e9ef673e6 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -395,23 +395,10 @@ void __exit_signal(struct task_struct *tsk) clear_tsk_thread_flag(tsk,TIF_SIGPENDING); flush_sigqueue(&tsk->pending); if (sig) { - /* - * We are cleaning up the signal_struct here. - */ - exit_thread_group_keys(sig); - kmem_cache_free(signal_cachep, sig); + __cleanup_signal(sig); } } -void exit_signal(struct task_struct *tsk) -{ - atomic_dec(&tsk->signal->live); - - write_lock_irq(&tasklist_lock); - __exit_signal(tsk); - write_unlock_irq(&tasklist_lock); -} - /* * Flush all handlers for a task. */ -- cgit v1.2.3-70-g09d2 From c81addc9d3a0ebff2155e0cd86f90820ab97147e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:17 -0800 Subject: [PATCH] rename __exit_sighand to cleanup_sighand Cosmetic, rename __exit_sighand to cleanup_sighand and move it close to copy_sighand(). This matches copy_signal/cleanup_signal naming, and I think it is easier to follow. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Acked-by: "Paul E. McKenney" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- kernel/fork.c | 12 +++++++++++- kernel/signal.c | 19 ++----------------- 3 files changed, 14 insertions(+), 19 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7dd430b697a..921148277da 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1150,8 +1150,8 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); +extern void cleanup_sighand(struct task_struct *); extern void __exit_signal(struct task_struct *); -extern void __exit_sighand(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); diff --git a/kernel/fork.c b/kernel/fork.c index 0aff28cdbad..12cdd9fc9d0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -803,6 +803,16 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t return 0; } +void cleanup_sighand(struct task_struct *tsk) +{ + struct sighand_struct * sighand = tsk->sighand; + + /* Ok, we're done with the signal handlers */ + tsk->sighand = NULL; + if (atomic_dec_and_test(&sighand->count)) + kmem_cache_free(sighand_cachep, sighand); +} + static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk) { struct signal_struct *sig; @@ -1224,7 +1234,7 @@ bad_fork_cleanup_mm: bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: - __exit_sighand(p); + cleanup_sighand(p); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: diff --git a/kernel/signal.c b/kernel/signal.c index ca1fa854e46..b29c868bd5e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -310,9 +310,7 @@ static void flush_sigqueue(struct sigpending *queue) /* * Flush all pending signals for a task. */ - -void -flush_signals(struct task_struct *t) +void flush_signals(struct task_struct *t) { unsigned long flags; @@ -323,19 +321,6 @@ flush_signals(struct task_struct *t) spin_unlock_irqrestore(&t->sighand->siglock, flags); } -/* - * This function expects the tasklist_lock write-locked. - */ -void __exit_sighand(struct task_struct *tsk) -{ - struct sighand_struct * sighand = tsk->sighand; - - /* Ok, we're done with the signal handlers */ - tsk->sighand = NULL; - if (atomic_dec_and_test(&sighand->count)) - kmem_cache_free(sighand_cachep, sighand); -} - /* * This function expects the tasklist_lock write-locked. */ @@ -386,7 +371,7 @@ void __exit_signal(struct task_struct *tsk) } tsk->signal = NULL; - __exit_sighand(tsk); + cleanup_sighand(tsk); spin_unlock(&sighand->siglock); rcu_read_unlock(); -- cgit v1.2.3-70-g09d2 From 47e65328a7b1cdfc4e3102e50d60faf94ebba7d3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:25 -0800 Subject: [PATCH] pids: kill PIDTYPE_TGID This patch kills PIDTYPE_TGID pid_type thus saving one hash table in kernel/pid.c and speeding up subthreads create/destroy a bit. It is also a preparation for the further tref/pids rework. This patch adds 'struct list_head thread_group' to 'struct task_struct' instead. We don't detach group leader from PIDTYPE_PID namespace until another thread inherits it's ->pid == ->tgid, so we are safe wrt premature free_pidmap(->tgid) call. Currently there are no users of find_task_by_pid_type(PIDTYPE_TGID). Should the need arise, we can use find_task_by_pid()->group_leader. Signed-off-by: Oleg Nesterov Acked-By: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid.h | 1 - include/linux/sched.h | 11 ++++++++--- kernel/exit.c | 10 +--------- kernel/fork.c | 4 +++- 4 files changed, 12 insertions(+), 14 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/pid.h b/include/linux/pid.h index 099e70ecf7c..5b9082cc600 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -4,7 +4,6 @@ enum pid_type { PIDTYPE_PID, - PIDTYPE_TGID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX diff --git a/include/linux/sched.h b/include/linux/sched.h index a913fca9e70..99855f694eb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -752,6 +752,7 @@ struct task_struct { /* PID/PID hash table linkage. */ struct pid pids[PIDTYPE_MAX]; + struct list_head thread_group; struct completion *vfork_done; /* for vfork() */ int __user *set_child_tid; /* CLONE_CHILD_SETTID */ @@ -1192,13 +1193,17 @@ extern void wait_task_inactive(task_t * p); #define while_each_thread(g, t) \ while ((t = next_thread(t)) != g) -extern task_t * FASTCALL(next_thread(const task_t *p)); - #define thread_group_leader(p) (p->pid == p->tgid) +static inline task_t *next_thread(task_t *p) +{ + return list_entry(rcu_dereference(p->thread_group.next), + task_t, thread_group); +} + static inline int thread_group_empty(task_t *p) { - return list_empty(&p->pids[PIDTYPE_TGID].pid_list); + return list_empty(&p->thread_group); } #define delay_group_leader(p) \ diff --git a/kernel/exit.c b/kernel/exit.c index aea23e713cf..22399caf757 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -51,7 +51,6 @@ static void __unhash_process(struct task_struct *p) { nr_threads--; detach_pid(p, PIDTYPE_PID); - detach_pid(p, PIDTYPE_TGID); if (thread_group_leader(p)) { detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); @@ -59,7 +58,7 @@ static void __unhash_process(struct task_struct *p) list_del_init(&p->tasks); __get_cpu_var(process_counts)--; } - + list_del_rcu(&p->thread_group); remove_parent(p); } @@ -964,13 +963,6 @@ asmlinkage long sys_exit(int error_code) do_exit((error_code&0xff)<<8); } -task_t fastcall *next_thread(const task_t *p) -{ - return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID); -} - -EXPORT_SYMBOL(next_thread); - /* * Take down every thread in the group. This is called by fatal signals * as well as by sys_exit_group (below). diff --git a/kernel/fork.c b/kernel/fork.c index 12cdd9fc9d0..bc551efb5fd 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1112,6 +1112,7 @@ static task_t *copy_process(unsigned long clone_flags, * We dont wake it up yet. */ p->group_leader = p; + INIT_LIST_HEAD(&p->thread_group); INIT_LIST_HEAD(&p->ptrace_children); INIT_LIST_HEAD(&p->ptrace_list); @@ -1165,7 +1166,9 @@ static task_t *copy_process(unsigned long clone_flags, retval = -EAGAIN; goto bad_fork_cleanup_namespace; } + p->group_leader = current->group_leader; + list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); if (current->signal->group_stop_count > 0) { /* @@ -1213,7 +1216,6 @@ static task_t *copy_process(unsigned long clone_flags, list_add_tail(&p->tasks, &init_task.tasks); __get_cpu_var(process_counts)++; } - attach_pid(p, PIDTYPE_TGID, p->tgid); attach_pid(p, PIDTYPE_PID, p->pid); nr_threads++; } -- cgit v1.2.3-70-g09d2 From 4a2c7a7837da1b91468e50426066d988050e4d56 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:26 -0800 Subject: [PATCH] make fork() atomic wrt pgrp/session signals Eric W. Biederman wrote: > > Ok. SUSV3/Posix is clear, fork is atomic with respect > to signals. Either a signal comes before or after a > fork but not during. (See the rationale section). > http://www.opengroup.org/onlinepubs/000095399/functions/fork.html > > The tasklist_lock does not stop forks from adding to a process > group. The forks stall while the tasklist_lock is held, but a fork > that began before we grabbed the tasklist_lock simply completes > afterwards, and the child does not receive the signal. This also means that SIGSTOP or sig_kernel_coredump() signal can't be delivered to pgrp/session reliably. With this patch copy_process() returns -ERESTARTNOINTR when it detects a pending signal, fork() will be restarted transparently after handling the signals. This patch also deletes now unneeded "group_stop_count > 0" check, copy_process() can no longer succeed while group stop in progress. Signed-off-by: Oleg Nesterov Acked-By: Eric Biederman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index bc551efb5fd..aa50c848fae 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1136,16 +1136,6 @@ static task_t *copy_process(unsigned long clone_flags, !cpu_online(task_cpu(p)))) set_task_cpu(p, smp_processor_id()); - /* - * Check for pending SIGKILL! The new thread should not be allowed - * to slip out of an OOM kill. (or normal SIGKILL.) - */ - if (sigismember(¤t->pending.signal, SIGKILL)) { - write_unlock_irq(&tasklist_lock); - retval = -EINTR; - goto bad_fork_cleanup_namespace; - } - /* CLONE_PARENT re-uses the old parent */ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) p->real_parent = current->real_parent; @@ -1154,6 +1144,23 @@ static task_t *copy_process(unsigned long clone_flags, p->parent = p->real_parent; spin_lock(¤t->sighand->siglock); + + /* + * Process group and session signals need to be delivered to just the + * parent before the fork or both the parent and the child after the + * fork. Restart if a signal comes in before we add the new process to + * it's process group. + * A fatal signal pending means that current will exit, so the new + * thread can't slip out of an OOM kill (or normal SIGKILL). + */ + recalc_sigpending(); + if (signal_pending(current)) { + spin_unlock(¤t->sighand->siglock); + write_unlock_irq(&tasklist_lock); + retval = -ERESTARTNOINTR; + goto bad_fork_cleanup_namespace; + } + if (clone_flags & CLONE_THREAD) { /* * Important: if an exit-all has been started then @@ -1170,16 +1177,6 @@ static task_t *copy_process(unsigned long clone_flags, p->group_leader = current->group_leader; list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); - if (current->signal->group_stop_count > 0) { - /* - * There is an all-stop in progress for the group. - * We ourselves will stop as soon as we check signals. - * Make the new thread part of that group stop too. - */ - current->signal->group_stop_count++; - set_tsk_thread_flag(p, TIF_SIGPENDING); - } - if (!cputime_eq(current->signal->it_virt_expires, cputime_zero) || !cputime_eq(current->signal->it_prof_expires, -- cgit v1.2.3-70-g09d2 From a7e5328a06a2beee3a2bbfaf87ce2a7bbe937de1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 28 Mar 2006 16:11:27 -0800 Subject: [PATCH] cleanup __exit_signal->cleanup_sighand path Move 'tsk->sighand = NULL' from cleanup_sighand() to __exit_signal(). This makes the exit path more understandable and allows us to do cleanup_sighand() outside of ->siglock protected section. Signed-off-by: Oleg Nesterov Cc: "Eric W. Biederman" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 +- kernel/exit.c | 3 ++- kernel/fork.c | 8 ++------ 3 files changed, 5 insertions(+), 8 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 99855f694eb..d04186d8cc6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1151,7 +1151,7 @@ extern void exit_thread(void); extern void exit_files(struct task_struct *); extern void __cleanup_signal(struct signal_struct *); -extern void cleanup_sighand(struct task_struct *); +extern void __cleanup_sighand(struct sighand_struct *); extern void exit_itimers(struct signal_struct *); extern NORET_TYPE void do_group_exit(int); diff --git a/kernel/exit.c b/kernel/exit.c index 22399caf757..bc0ec674d3f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -114,10 +114,11 @@ static void __exit_signal(struct task_struct *tsk) __unhash_process(tsk); tsk->signal = NULL; - cleanup_sighand(tsk); + tsk->sighand = NULL; spin_unlock(&sighand->siglock); rcu_read_unlock(); + __cleanup_sighand(sighand); clear_tsk_thread_flag(tsk,TIF_SIGPENDING); flush_sigqueue(&tsk->pending); if (sig) { diff --git a/kernel/fork.c b/kernel/fork.c index aa50c848fae..b3f7a1bb5e5 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -803,12 +803,8 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t return 0; } -void cleanup_sighand(struct task_struct *tsk) +void __cleanup_sighand(struct sighand_struct *sighand) { - struct sighand_struct * sighand = tsk->sighand; - - /* Ok, we're done with the signal handlers */ - tsk->sighand = NULL; if (atomic_dec_and_test(&sighand->count)) kmem_cache_free(sighand_cachep, sighand); } @@ -1233,7 +1229,7 @@ bad_fork_cleanup_mm: bad_fork_cleanup_signal: cleanup_signal(p); bad_fork_cleanup_sighand: - cleanup_sighand(p); + __cleanup_sighand(p->sighand); bad_fork_cleanup_fs: exit_fs(p); /* blocking */ bad_fork_cleanup_files: -- cgit v1.2.3-70-g09d2