From 910dea7fdda22f0ee83d26d459e460c79ed94557 Mon Sep 17 00:00:00 2001
From: Eric Sesterhenn <snakebyte@gmx.de>
Date: Sun, 26 Mar 2006 18:29:26 +0200
Subject: BUG_ON() Conversion in kernel/fork.c

this changes if() BUG(); constructs to BUG_ON() which is
cleaner, contains unlikely() and can better optimized away.

Signed-off-by: Eric Sesterhenn <snakebyte@gmx.de>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 kernel/fork.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/kernel/fork.c b/kernel/fork.c
index a02063903aa..d93ab2ba729 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -769,8 +769,7 @@ int unshare_files(void)
 	struct files_struct *files  = current->files;
 	int rc;
 
-	if(!files)
-		BUG();
+	BUG_ON(!files);
 
 	/* This can race but the race causes us to copy when we don't
 	   need to and drop the copy */
-- 
cgit v1.2.3-70-g09d2


From 05cfb614ddbf3181540ce09d44d96486f8ba8d6a Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Sun, 26 Mar 2006 01:38:12 -0800
Subject: [PATCH] hrtimers: remove data field

The nanosleep cleanup allows to remove the data field of hrtimer.  The
callback function can use container_of() to get it's own data.  Since the
hrtimer structure is anyway embedded in other structures, this adds no
overhead.

Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c               |  2 +-
 include/linux/hrtimer.h |  5 +----
 include/linux/sched.h   |  1 +
 include/linux/timer.h   |  3 ++-
 kernel/fork.c           |  2 +-
 kernel/hrtimer.c        | 12 +++++-------
 kernel/itimer.c         | 15 +++++++--------
 kernel/posix-timers.c   |  9 ++++-----
 8 files changed, 22 insertions(+), 27 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/fs/exec.c b/fs/exec.c
index 995cba3c62b..c7397c46ad6 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -632,7 +632,7 @@ static int de_thread(struct task_struct *tsk)
 		 * synchronize with any firing (by calling del_timer_sync)
 		 * before we can safely let the old group leader die.
 		 */
-		sig->real_timer.data = current;
+		sig->tsk = current;
 		spin_unlock_irq(lock);
 		if (hrtimer_cancel(&sig->real_timer))
 			hrtimer_restart(&sig->real_timer);
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index f57cc7bd700..93830158348 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -45,9 +45,7 @@ struct hrtimer_base;
  * @expires:	the absolute expiry time in the hrtimers internal
  *		representation. The time is related to the clock on
  *		which the timer is based.
- * @state:	state of the timer
  * @function:	timer expiry callback function
- * @data:	argument for the callback function
  * @base:	pointer to the timer base (per cpu and per clock)
  *
  * The hrtimer structure must be initialized by init_hrtimer_#CLOCKTYPE()
@@ -55,8 +53,7 @@ struct hrtimer_base;
 struct hrtimer {
 	struct rb_node		node;
 	ktime_t			expires;
-	int			(*function)(void *);
-	void			*data;
+	int			(*function)(struct hrtimer *);
 	struct hrtimer_base	*base;
 };
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e0054c1b9a0..036d14d2bf9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -402,6 +402,7 @@ struct signal_struct {
 
 	/* ITIMER_REAL timer for the process */
 	struct hrtimer real_timer;
+	struct task_struct *tsk;
 	ktime_t it_real_incr;
 
 	/* ITIMER_PROF and ITIMER_VIRTUAL timers for the process */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index ee5a09e806e..b5caabca553 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -96,6 +96,7 @@ static inline void add_timer(struct timer_list *timer)
 
 extern void init_timers(void);
 extern void run_local_timers(void);
-extern int it_real_fn(void *);
+struct hrtimer;
+extern int it_real_fn(struct hrtimer *);
 
 #endif
diff --git a/kernel/fork.c b/kernel/fork.c
index a02063903aa..4bd6486aa67 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -848,7 +848,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_REL);
 	sig->it_real_incr.tv64 = 0;
 	sig->real_timer.function = it_real_fn;
-	sig->real_timer.data = tsk;
+	sig->tsk = tsk;
 
 	sig->it_virt_expires = cputime_zero;
 	sig->it_virt_incr = cputime_zero;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 44108de4f02..0237a556eb1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -613,21 +613,19 @@ static inline void run_hrtimer_queue(struct hrtimer_base *base)
 
 	while ((node = base->first)) {
 		struct hrtimer *timer;
-		int (*fn)(void *);
+		int (*fn)(struct hrtimer *);
 		int restart;
-		void *data;
 
 		timer = rb_entry(node, struct hrtimer, node);
 		if (base->softirq_time.tv64 <= timer->expires.tv64)
 			break;
 
 		fn = timer->function;
-		data = timer->data;
 		set_curr_timer(base, timer);
 		__remove_hrtimer(timer, base);
 		spin_unlock_irq(&base->lock);
 
-		restart = fn(data);
+		restart = fn(timer);
 
 		spin_lock_irq(&base->lock);
 
@@ -664,9 +662,10 @@ struct sleep_hrtimer {
 	int expired;
 };
 
-static int nanosleep_wakeup(void *data)
+static int nanosleep_wakeup(struct hrtimer *timer)
 {
-	struct sleep_hrtimer *t = data;
+	struct sleep_hrtimer *t =
+		container_of(timer, struct sleep_hrtimer, timer);
 
 	t->expired = 1;
 	wake_up_process(t->task);
@@ -677,7 +676,6 @@ static int nanosleep_wakeup(void *data)
 static int __sched do_nanosleep(struct sleep_hrtimer *t, enum hrtimer_mode mode)
 {
 	t->timer.function = nanosleep_wakeup;
-	t->timer.data = t;
 	t->task = current;
 	t->expired = 0;
 
diff --git a/kernel/itimer.c b/kernel/itimer.c
index af2ec6b4392..204ed7939e7 100644
--- a/kernel/itimer.c
+++ b/kernel/itimer.c
@@ -128,17 +128,16 @@ asmlinkage long sys_getitimer(int which, struct itimerval __user *value)
 /*
  * The timer is automagically restarted, when interval != 0
  */
-int it_real_fn(void *data)
+int it_real_fn(struct hrtimer *timer)
 {
-	struct task_struct *tsk = (struct task_struct *) data;
+	struct signal_struct *sig =
+	    container_of(timer, struct signal_struct, real_timer);
 
-	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, tsk);
-
-	if (tsk->signal->it_real_incr.tv64 != 0) {
-		hrtimer_forward(&tsk->signal->real_timer,
-				tsk->signal->real_timer.base->softirq_time,
-				tsk->signal->it_real_incr);
+	send_group_sig_info(SIGALRM, SEND_SIG_PRIV, sig->tsk);
 
+	if (sig->it_real_incr.tv64 != 0) {
+		hrtimer_forward(timer, timer->base->softirq_time,
+				sig->it_real_incr);
 		return HRTIMER_RESTART;
 	}
 	return HRTIMER_NORESTART;
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 7c5f44787c8..ac6dc874442 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -145,7 +145,7 @@ static int common_timer_set(struct k_itimer *, int,
 			    struct itimerspec *, struct itimerspec *);
 static int common_timer_del(struct k_itimer *timer);
 
-static int posix_timer_fn(void *data);
+static int posix_timer_fn(struct hrtimer *data);
 
 static struct k_itimer *lock_timer(timer_t timer_id, unsigned long *flags);
 
@@ -334,14 +334,14 @@ EXPORT_SYMBOL_GPL(posix_timer_event);
 
  * This code is for CLOCK_REALTIME* and CLOCK_MONOTONIC* timers.
  */
-static int posix_timer_fn(void *data)
+static int posix_timer_fn(struct hrtimer *timer)
 {
-	struct k_itimer *timr = data;
-	struct hrtimer *timer = &timr->it.real.timer;
+	struct k_itimer *timr;
 	unsigned long flags;
 	int si_private = 0;
 	int ret = HRTIMER_NORESTART;
 
+	timr = container_of(timer, struct k_itimer, it.real.timer);
 	spin_lock_irqsave(&timr->it_lock, flags);
 
 	if (timr->it.real.interval.tv64 != 0)
@@ -725,7 +725,6 @@ common_timer_set(struct k_itimer *timr, int flags,
 
 	mode = flags & TIMER_ABSTIME ? HRTIMER_ABS : HRTIMER_REL;
 	hrtimer_init(&timr->it.real.timer, timr->it_clock, mode);
-	timr->it.real.timer.data = timr;
 	timr->it.real.timer.function = posix_timer_fn;
 
 	timer->expires = timespec_to_ktime(new_setting->it_value);
-- 
cgit v1.2.3-70-g09d2


From 8f17d3a5049d32392b79925c73a0cf99ce6d5af0 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 27 Mar 2006 01:16:27 -0800
Subject: [PATCH] lightweight robust futexes updates

- fix: initialize the robust list(s) to NULL in copy_process.

- doc update

- cleanup: rename _inuser to _inatomic

- __user cleanups and other small cleanups

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/robust-futex-ABI.txt |  2 --
 Documentation/robust-futexes.txt   |  2 +-
 include/asm-frv/futex.h            |  2 +-
 include/asm-generic/futex.h        |  2 +-
 include/asm-i386/futex.h           |  2 +-
 include/asm-mips/futex.h           |  2 +-
 include/asm-powerpc/futex.h        |  2 +-
 include/asm-x86_64/futex.h         |  2 +-
 include/linux/futex.h              |  2 +-
 kernel/fork.c                      |  5 ++++-
 kernel/futex.c                     | 20 +++++++++-----------
 kernel/futex_compat.c              |  7 +++----
 12 files changed, 24 insertions(+), 26 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/Documentation/robust-futex-ABI.txt b/Documentation/robust-futex-ABI.txt
index def5d873528..8529a17ffaa 100644
--- a/Documentation/robust-futex-ABI.txt
+++ b/Documentation/robust-futex-ABI.txt
@@ -142,8 +142,6 @@ On insertion:
     of the 'lock word', to the linked list starting at 'head', and
  4) clear the 'list_op_pending' word.
 
-	XXX I am particularly unsure of the following -pj XXX
-
 On removal:
  1) set the 'list_op_pending' word to the address of the 'lock word'
     to be removed,
diff --git a/Documentation/robust-futexes.txt b/Documentation/robust-futexes.txt
index 7aecc67b136..df82d75245a 100644
--- a/Documentation/robust-futexes.txt
+++ b/Documentation/robust-futexes.txt
@@ -213,6 +213,6 @@ robust-mutex testcases.
 All other architectures should build just fine too - but they wont have
 the new syscalls yet.
 
-Architectures need to implement the new futex_atomic_cmpxchg_inuser()
+Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
 inline function before writing up the syscalls (that function returns
 -ENOSYS right now).
diff --git a/include/asm-frv/futex.h b/include/asm-frv/futex.h
index 9a0e9026ba5..08b3d1da358 100644
--- a/include/asm-frv/futex.h
+++ b/include/asm-frv/futex.h
@@ -10,7 +10,7 @@
 extern int futex_atomic_op_inuser(int encoded_op, int __user *uaddr);
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h
index 514bd401cd7..df893c16031 100644
--- a/include/asm-generic/futex.h
+++ b/include/asm-generic/futex.h
@@ -50,7 +50,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-i386/futex.h b/include/asm-i386/futex.h
index 41184a31885..7b8ceefd010 100644
--- a/include/asm-i386/futex.h
+++ b/include/asm-i386/futex.h
@@ -105,7 +105,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
diff --git a/include/asm-mips/futex.h b/include/asm-mips/futex.h
index c5fb2d6d918..a554089991f 100644
--- a/include/asm-mips/futex.h
+++ b/include/asm-mips/futex.h
@@ -100,7 +100,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-powerpc/futex.h b/include/asm-powerpc/futex.h
index 80ed9854e42..f1b3c00bc1c 100644
--- a/include/asm-powerpc/futex.h
+++ b/include/asm-powerpc/futex.h
@@ -82,7 +82,7 @@ static inline int futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	return -ENOSYS;
 }
diff --git a/include/asm-x86_64/futex.h b/include/asm-x86_64/futex.h
index 7d9eb1a8454..9804bf07b09 100644
--- a/include/asm-x86_64/futex.h
+++ b/include/asm-x86_64/futex.h
@@ -95,7 +95,7 @@ futex_atomic_op_inuser (int encoded_op, int __user *uaddr)
 }
 
 static inline int
-futex_atomic_cmpxchg_inuser(int __user *uaddr, int oldval, int newval)
+futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
 		return -EFAULT;
diff --git a/include/linux/futex.h b/include/linux/futex.h
index 20face6b798..55fff96ae85 100644
--- a/include/linux/futex.h
+++ b/include/linux/futex.h
@@ -100,7 +100,7 @@ long do_futex(unsigned long uaddr, int op, int val,
 		unsigned long timeout, unsigned long uaddr2, int val2,
 		int val3);
 
-extern int handle_futex_death(unsigned int *uaddr, struct task_struct *curr);
+extern int handle_futex_death(u32 __user *uaddr, struct task_struct *curr);
 
 #ifdef CONFIG_FUTEX
 extern void exit_robust_list(struct task_struct *curr);
diff --git a/kernel/fork.c b/kernel/fork.c
index e0a2b449dea..c49bd193b05 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1061,7 +1061,10 @@ static task_t *copy_process(unsigned long clone_flags,
 	 * Clear TID on mm_release()?
 	 */
 	p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;
-
+	p->robust_list = NULL;
+#ifdef CONFIG_COMPAT
+	p->compat_robust_list = NULL;
+#endif
 	/*
 	 * sigaltstack should be cleared when sharing the same VM
 	 */
diff --git a/kernel/futex.c b/kernel/futex.c
index feb724b2554..9c9b2b6b22d 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -913,15 +913,15 @@ err_unlock:
  * Process a futex-list entry, check whether it's owned by the
  * dying task, and do notification if so:
  */
-int handle_futex_death(unsigned int *uaddr, struct task_struct *curr)
+int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
 {
-	unsigned int futex_val;
+	u32 uval;
 
-repeat:
-	if (get_user(futex_val, uaddr))
+retry:
+	if (get_user(uval, uaddr))
 		return -1;
 
-	if ((futex_val & FUTEX_TID_MASK) == curr->pid) {
+	if ((uval & FUTEX_TID_MASK) == curr->pid) {
 		/*
 		 * Ok, this dying thread is truly holding a futex
 		 * of interest. Set the OWNER_DIED bit atomically
@@ -932,12 +932,11 @@ repeat:
 		 * thread-death.) The rest of the cleanup is done in
 		 * userspace.
 		 */
-		if (futex_atomic_cmpxchg_inuser(uaddr, futex_val,
-					 futex_val | FUTEX_OWNER_DIED) !=
-								   futex_val)
-			goto repeat;
+		if (futex_atomic_cmpxchg_inatomic(uaddr, uval,
+					 uval | FUTEX_OWNER_DIED) != uval)
+			goto retry;
 
-		if (futex_val & FUTEX_WAITERS)
+		if (uval & FUTEX_WAITERS)
 			futex_wake((unsigned long)uaddr, 1);
 	}
 	return 0;
@@ -985,7 +984,6 @@ void exit_robust_list(struct task_struct *curr)
 			if (handle_futex_death((void *)entry + futex_offset,
 						curr))
 				return;
-
 		/*
 		 * Fetch the next entry in the list:
 		 */
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index c153559ef28..9c077cf9aa8 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -121,9 +121,9 @@ err_unlock:
 	return ret;
 }
 
-asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
+asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, u32 val,
 		struct compat_timespec __user *utime, u32 __user *uaddr2,
-		int val3)
+		u32 val3)
 {
 	struct timespec t;
 	unsigned long timeout = MAX_SCHEDULE_TIMEOUT;
@@ -137,6 +137,5 @@ asmlinkage long compat_sys_futex(u32 __user *uaddr, int op, int val,
 	if (op >= FUTEX_REQUEUE)
 		val2 = (int) (unsigned long) utime;
 
-	return do_futex((unsigned long)uaddr, op, val, timeout,
-			(unsigned long)uaddr2, val2, val3);
+	return do_futex(uaddr, op, val, timeout, uaddr2, val2, val3);
 }
-- 
cgit v1.2.3-70-g09d2


From c97d98931ac52ef110b62d9b75c6a6f2bfbc1898 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:06 -0800
Subject: [PATCH] kill SET_LINKS/REMOVE_LINKS

Both SET_LINKS() and SET_LINKS/REMOVE_LINKS() have exactly one caller, and
these callers already check thread_group_leader().

This patch kills theese macros, they mix two different things: setting
process's parent and registering it in init_task.tasks list.  Callers are
updated to do these actions by hand.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 12 ------------
 kernel/exit.c         |  4 +++-
 kernel/fork.c         |  4 +++-
 3 files changed, 6 insertions(+), 14 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index b4b14c32b28..1f16fb1fea2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1186,18 +1186,6 @@ extern void wait_task_inactive(task_t * p);
 #define remove_parent(p)	list_del_init(&(p)->sibling)
 #define add_parent(p)		list_add_tail(&(p)->sibling,&(p)->parent->children)
 
-#define REMOVE_LINKS(p) do {					\
-	if (thread_group_leader(p))				\
-		list_del_init(&(p)->tasks);			\
-	remove_parent(p);					\
-	} while (0)
-
-#define SET_LINKS(p) do {					\
-	if (thread_group_leader(p))				\
-		list_add_tail(&(p)->tasks,&init_task.tasks);	\
-	add_parent(p);						\
-	} while (0)
-
 #define next_task(p)	list_entry((p)->tasks.next, struct task_struct, tasks)
 #define prev_task(p)	list_entry((p)->tasks.prev, struct task_struct, tasks)
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 5b5e8b67680..f436a6bd3fb 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -54,11 +54,13 @@ static void __unhash_process(struct task_struct *p)
 	if (thread_group_leader(p)) {
 		detach_pid(p, PIDTYPE_PGID);
 		detach_pid(p, PIDTYPE_SID);
+
+		list_del_init(&p->tasks);
 		if (p->pid)
 			__get_cpu_var(process_counts)--;
 	}
 
-	REMOVE_LINKS(p);
+	remove_parent(p);
 }
 
 void release_task(struct task_struct * p)
diff --git a/kernel/fork.c b/kernel/fork.c
index c49bd193b05..74c67629ee6 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1181,7 +1181,7 @@ static task_t *copy_process(unsigned long clone_flags,
 	 */
 	p->ioprio = current->ioprio;
 
-	SET_LINKS(p);
+	add_parent(p);
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
@@ -1191,6 +1191,8 @@ static task_t *copy_process(unsigned long clone_flags,
 		p->signal->session = current->signal->session;
 		attach_pid(p, PIDTYPE_PGID, process_group(p));
 		attach_pid(p, PIDTYPE_SID, p->signal->session);
+
+		list_add_tail(&p->tasks, &init_task.tasks);
 		if (p->pid)
 			__get_cpu_var(process_counts)++;
 	}
-- 
cgit v1.2.3-70-g09d2


From 73b9ebfe126a4a886ee46cbab637374d7024668a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:07 -0800
Subject: [PATCH] pidhash: don't count idle threads

fork_idle() does unhash_process() just after copy_process().  Contrary,
boot_cpu's idle thread explicitely registers itself for each pid_type with nr
= 0.

copy_process() already checks p->pid != 0 before process_counts++, I think we
can just skip attach_pid() calls and job control inits for idle threads and
kill unhash_process().  We don't need to cleanup ->proc_dentry in fork_idle()
because with this patch idle threads are never hashed in
kernel/pid.c:pid_hash[].

We don't need to hash pid == 0 in pidmap_init().  free_pidmap() is never
called with pid == 0 arg, so it will never be reused.  So it is still possible
to use pid == 0 in any PIDTYPE_xxx namespace from kernel/pid.c's POV.

However with this patch we don't hash pid == 0 for PIDTYPE_PID case.  We still
have have PIDTYPE_PGID/PIDTYPE_SID entries with pid == 0: /sbin/init and
kernel threads which don't call daemonize().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/um/kernel/smp.c  |  1 -
 include/linux/sched.h |  2 --
 kernel/exit.c         | 18 +-----------------
 kernel/fork.c         | 35 ++++++++++++++++++-----------------
 kernel/pid.c          | 10 +---------
 5 files changed, 20 insertions(+), 46 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
index c8d8d0ac1a7..511116aebaf 100644
--- a/arch/um/kernel/smp.c
+++ b/arch/um/kernel/smp.c
@@ -143,7 +143,6 @@ void smp_prepare_cpus(unsigned int maxcpus)
 		idle = idle_thread(cpu);
 
 		init_idle(idle, cpu);
-		unhash_process(idle);
 
 		waittime = 200000000;
 		while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1f16fb1fea2..ddc0df7f8bf 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1214,8 +1214,6 @@ static inline int thread_group_empty(task_t *p)
 #define delay_group_leader(p) \
 		(thread_group_leader(p) && !thread_group_empty(p))
 
-extern void unhash_process(struct task_struct *p);
-
 /*
  * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
diff --git a/kernel/exit.c b/kernel/exit.c
index f436a6bd3fb..a94e1c31131 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -56,8 +56,7 @@ static void __unhash_process(struct task_struct *p)
 		detach_pid(p, PIDTYPE_SID);
 
 		list_del_init(&p->tasks);
-		if (p->pid)
-			__get_cpu_var(process_counts)--;
+		__get_cpu_var(process_counts)--;
 	}
 
 	remove_parent(p);
@@ -118,21 +117,6 @@ repeat:
 		goto repeat;
 }
 
-/* we are using it only for SMP init */
-
-void unhash_process(struct task_struct *p)
-{
-	struct dentry *proc_dentry;
-
-	spin_lock(&p->proc_lock);
-	proc_dentry = proc_pid_unhash(p);
-	write_lock_irq(&tasklist_lock);
-	__unhash_process(p);
-	write_unlock_irq(&tasklist_lock);
-	spin_unlock(&p->proc_lock);
-	proc_pid_flush(proc_dentry);
-}
-
 /*
  * This checks not only the pgrp, but falls back on the pid if no
  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
diff --git a/kernel/fork.c b/kernel/fork.c
index 74c67629ee6..0c32e28cdc5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1181,25 +1181,26 @@ static task_t *copy_process(unsigned long clone_flags,
 	 */
 	p->ioprio = current->ioprio;
 
-	add_parent(p);
-	if (unlikely(p->ptrace & PT_PTRACED))
-		__ptrace_link(p, current->parent);
-
-	if (thread_group_leader(p)) {
-		p->signal->tty = current->signal->tty;
-		p->signal->pgrp = process_group(current);
-		p->signal->session = current->signal->session;
-		attach_pid(p, PIDTYPE_PGID, process_group(p));
-		attach_pid(p, PIDTYPE_SID, p->signal->session);
-
-		list_add_tail(&p->tasks, &init_task.tasks);
-		if (p->pid)
+	if (likely(p->pid)) {
+		add_parent(p);
+		if (unlikely(p->ptrace & PT_PTRACED))
+			__ptrace_link(p, current->parent);
+
+		if (thread_group_leader(p)) {
+			p->signal->tty = current->signal->tty;
+			p->signal->pgrp = process_group(current);
+			p->signal->session = current->signal->session;
+			attach_pid(p, PIDTYPE_PGID, process_group(p));
+			attach_pid(p, PIDTYPE_SID, p->signal->session);
+
+			list_add_tail(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
+		}
+		attach_pid(p, PIDTYPE_TGID, p->tgid);
+		attach_pid(p, PIDTYPE_PID, p->pid);
+		nr_threads++;
 	}
-	attach_pid(p, PIDTYPE_TGID, p->tgid);
-	attach_pid(p, PIDTYPE_PID, p->pid);
 
-	nr_threads++;
 	total_forks++;
 	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
@@ -1263,7 +1264,7 @@ task_t * __devinit fork_idle(int cpu)
 	if (!task)
 		return ERR_PTR(-ENOMEM);
 	init_idle(task, cpu);
-	unhash_process(task);
+
 	return task;
 }
 
diff --git a/kernel/pid.c b/kernel/pid.c
index 7781d999905..a9f2dfd006d 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -247,16 +247,8 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-	int i;
-
 	pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
+	/* Reserve PID 0. We never call free_pidmap(0) */
 	set_bit(0, pidmap_array->page);
 	atomic_dec(&pidmap_array->nr_free);
-
-	/*
-	 * Allocate PID 0, and hash it via all PID types:
-	 */
-
-	for (i = 0; i < PIDTYPE_MAX; i++)
-		attach_pid(current, i, 0);
 }
-- 
cgit v1.2.3-70-g09d2


From aa1757f90bea3f598b6e5d04d922a6a60200f1da Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:12 -0800
Subject: [PATCH] convert sighand_cache to use SLAB_DESTROY_BY_RCU

This patch borrows a clever Hugh's 'struct anon_vma' trick.

Without tasklist_lock held we can't trust task->sighand until we locked it
and re-checked that it is still the same.

But this means we don't need to defer 'kmem_cache_free(sighand)'.  We can
return the memory to slab immediately, all we need is to be sure that
sighand->siglock can't dissapear inside rcu protected section.

To do so we need to initialize ->siglock inside ctor function,
SLAB_DESTROY_BY_RCU does the rest.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c             |  3 +--
 include/linux/sched.h |  8 --------
 kernel/fork.c         | 21 +++++++++++----------
 kernel/signal.c       |  2 +-
 4 files changed, 13 insertions(+), 21 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/fs/exec.c b/fs/exec.c
index 9046ad2b061..950ebd43cdc 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -768,7 +768,6 @@ no_thread_group:
 		/*
 		 * Move our state over to newsighand and switch it in.
 		 */
-		spin_lock_init(&newsighand->siglock);
 		atomic_set(&newsighand->count, 1);
 		memcpy(newsighand->action, oldsighand->action,
 		       sizeof(newsighand->action));
@@ -785,7 +784,7 @@ no_thread_group:
 		write_unlock_irq(&tasklist_lock);
 
 		if (atomic_dec_and_test(&oldsighand->count))
-			sighand_free(oldsighand);
+			kmem_cache_free(sighand_cachep, oldsighand);
 	}
 
 	BUG_ON(!thread_group_leader(current));
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ddc0df7f8bf..bbcfc873bd9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -355,16 +355,8 @@ struct sighand_struct {
 	atomic_t		count;
 	struct k_sigaction	action[_NSIG];
 	spinlock_t		siglock;
-	struct rcu_head		rcu;
 };
 
-extern void sighand_free_cb(struct rcu_head *rhp);
-
-static inline void sighand_free(struct sighand_struct *sp)
-{
-	call_rcu(&sp->rcu, sighand_free_cb);
-}
-
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
diff --git a/kernel/fork.c b/kernel/fork.c
index 0c32e28cdc5..33ffb5bf0db 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -786,14 +786,6 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
-void sighand_free_cb(struct rcu_head *rhp)
-{
-	struct sighand_struct *sp;
-
-	sp = container_of(rhp, struct sighand_struct, rcu);
-	kmem_cache_free(sighand_cachep, sp);
-}
-
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct sighand_struct *sig;
@@ -806,7 +798,6 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
 	rcu_assign_pointer(tsk->sighand, sig);
 	if (!sig)
 		return -ENOMEM;
-	spin_lock_init(&sig->siglock);
 	atomic_set(&sig->count, 1);
 	memcpy(sig->action, current->sighand->action, sizeof(sig->action));
 	return 0;
@@ -1356,11 +1347,21 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
+static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+{
+	struct sighand_struct *sighand = data;
+
+	if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+					SLAB_CTOR_CONSTRUCTOR)
+		spin_lock_init(&sighand->siglock);
+}
+
 void __init proc_caches_init(void)
 {
 	sighand_cachep = kmem_cache_create("sighand_cache",
 			sizeof(struct sighand_struct), 0,
-			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+			SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+			sighand_ctor, NULL);
 	signal_cachep = kmem_cache_create("signal_cache",
 			sizeof(struct signal_struct), 0,
 			SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
diff --git a/kernel/signal.c b/kernel/signal.c
index dc8f91bf9f8..b0b1ca9daa3 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -330,7 +330,7 @@ void __exit_sighand(struct task_struct *tsk)
 	/* Ok, we're done with the signal handlers */
 	tsk->sighand = NULL;
 	if (atomic_dec_and_test(&sighand->count))
-		sighand_free(sighand);
+		kmem_cache_free(sighand_cachep, sighand);
 }
 
 void exit_sighand(struct task_struct *tsk)
-- 
cgit v1.2.3-70-g09d2


From 7001510d0cbf51ad202dd2d0744f54104285cbb9 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:14 -0800
Subject: [PATCH] copy_process: cleanup bad_fork_cleanup_sighand

The only caller of exit_sighand(tsk) is copy_process's error path.  We can
call __exit_sighand() directly and kill exit_sighand().

This 'tsk' was not yet registered in pid_hash[] or init_task.tasks, it has no
external references, nobody can see it, and

	IF (clone_flags & CLONE_SIGHAND)
		At least 'current' has a reference to ->sighand, this
		means atomic_dec_and_test(sighand->count) can't be true.

	ELSE
		Nobody can see this ->sighand, this means we can free it
		without any locking.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  1 -
 kernel/fork.c         |  3 ++-
 kernel/signal.c       | 14 --------------
 3 files changed, 2 insertions(+), 16 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index ca1fd31aae9..69c2a1e1529 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1151,7 +1151,6 @@ extern void exit_thread(void);
 extern void exit_files(struct task_struct *);
 extern void exit_signal(struct task_struct *);
 extern void __exit_signal(struct task_struct *);
-extern void exit_sighand(struct task_struct *);
 extern void __exit_sighand(struct task_struct *);
 extern void exit_itimers(struct signal_struct *);
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 33ffb5bf0db..8a46ad52be8 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1208,7 +1208,8 @@ bad_fork_cleanup_mm:
 bad_fork_cleanup_signal:
 	exit_signal(p);
 bad_fork_cleanup_sighand:
-	exit_sighand(p);
+	if (p->sighand)
+		__exit_sighand(p);
 bad_fork_cleanup_fs:
 	exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
diff --git a/kernel/signal.c b/kernel/signal.c
index c5b65aa4c2b..1d7f4463c32 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -336,20 +336,6 @@ void __exit_sighand(struct task_struct *tsk)
 		kmem_cache_free(sighand_cachep, sighand);
 }
 
-void exit_sighand(struct task_struct *tsk)
-{
-	write_lock_irq(&tasklist_lock);
-	rcu_read_lock();
-	if (tsk->sighand != NULL) {
-		struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
-		spin_lock(&sighand->siglock);
-		__exit_sighand(tsk);
-		spin_unlock(&sighand->siglock);
-	}
-	rcu_read_unlock();
-	write_unlock_irq(&tasklist_lock);
-}
-
 /*
  * This function expects the tasklist_lock write-locked.
  */
-- 
cgit v1.2.3-70-g09d2


From 6b3934ef52712ece50605dfc72e55d00c580831a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:16 -0800
Subject: [PATCH] copy_process: cleanup bad_fork_cleanup_signal

__exit_signal() does important cleanups atomically under ->siglock.  It is
also called from copy_process's error path.  This is not good, for example we
can't move __unhash_process() under ->siglock for that reason.

We should not mix these 2 paths, just look at ugly 'if (p->sighand)' under
'bad_fork_cleanup_sighand:' label.  For copy_process() case it is sufficient
to just backout copy_signal(), nothing more.

Again, nobody can see this task yet.  For CLONE_THREAD case we just decrement
signal->count, otherwise nobody can see this ->signal and we can free it
lockless.

This patch assumes it is safe to do exit_thread_group_keys() without
tasklist_lock.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  2 +-
 include/linux/slab.h  |  1 -
 kernel/fork.c         | 23 +++++++++++++++++++----
 kernel/signal.c       | 15 +--------------
 4 files changed, 21 insertions(+), 20 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 69c2a1e1529..7dd430b697a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1149,7 +1149,7 @@ extern void flush_thread(void);
 extern void exit_thread(void);
 
 extern void exit_files(struct task_struct *);
-extern void exit_signal(struct task_struct *);
+extern void __cleanup_signal(struct signal_struct *);
 extern void __exit_signal(struct task_struct *);
 extern void __exit_sighand(struct task_struct *);
 extern void exit_itimers(struct signal_struct *);
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 15e1d9736b1..3af03b19c98 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -210,7 +210,6 @@ extern kmem_cache_t	*names_cachep;
 extern kmem_cache_t	*files_cachep;
 extern kmem_cache_t	*filp_cachep;
 extern kmem_cache_t	*fs_cachep;
-extern kmem_cache_t	*signal_cachep;
 extern kmem_cache_t	*sighand_cachep;
 extern kmem_cache_t	*bio_cachep;
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 8a46ad52be8..0aff28cdbad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -84,7 +84,7 @@ static kmem_cache_t *task_struct_cachep;
 #endif
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
-kmem_cache_t *signal_cachep;
+static kmem_cache_t *signal_cachep;
 
 /* SLAB cache for sighand_struct structures (tsk->sighand) */
 kmem_cache_t *sighand_cachep;
@@ -872,6 +872,22 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	return 0;
 }
 
+void __cleanup_signal(struct signal_struct *sig)
+{
+	exit_thread_group_keys(sig);
+	kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void cleanup_signal(struct task_struct *tsk)
+{
+	struct signal_struct *sig = tsk->signal;
+
+	atomic_dec(&sig->live);
+
+	if (atomic_dec_and_test(&sig->count))
+		__cleanup_signal(sig);
+}
+
 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
 	unsigned long new_flags = p->flags;
@@ -1206,10 +1222,9 @@ bad_fork_cleanup_mm:
 	if (p->mm)
 		mmput(p->mm);
 bad_fork_cleanup_signal:
-	exit_signal(p);
+	cleanup_signal(p);
 bad_fork_cleanup_sighand:
-	if (p->sighand)
-		__exit_sighand(p);
+	__exit_sighand(p);
 bad_fork_cleanup_fs:
 	exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
diff --git a/kernel/signal.c b/kernel/signal.c
index 1d7f4463c32..54e9ef673e6 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -395,23 +395,10 @@ void __exit_signal(struct task_struct *tsk)
 	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
 	flush_sigqueue(&tsk->pending);
 	if (sig) {
-		/*
-		 * We are cleaning up the signal_struct here.
-		 */
-		exit_thread_group_keys(sig);
-		kmem_cache_free(signal_cachep, sig);
+		__cleanup_signal(sig);
 	}
 }
 
-void exit_signal(struct task_struct *tsk)
-{
-	atomic_dec(&tsk->signal->live);
-
-	write_lock_irq(&tasklist_lock);
-	__exit_signal(tsk);
-	write_unlock_irq(&tasklist_lock);
-}
-
 /*
  * Flush all handlers for a task.
  */
-- 
cgit v1.2.3-70-g09d2


From c81addc9d3a0ebff2155e0cd86f90820ab97147e Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:17 -0800
Subject: [PATCH] rename __exit_sighand to cleanup_sighand

Cosmetic, rename __exit_sighand to cleanup_sighand and move it close to
copy_sighand().

This matches copy_signal/cleanup_signal naming, and I think it is easier to
follow.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Acked-by: "Paul E. McKenney" <paulmck@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h |  2 +-
 kernel/fork.c         | 12 +++++++++++-
 kernel/signal.c       | 19 ++-----------------
 3 files changed, 14 insertions(+), 19 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 7dd430b697a..921148277da 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1150,8 +1150,8 @@ extern void exit_thread(void);
 
 extern void exit_files(struct task_struct *);
 extern void __cleanup_signal(struct signal_struct *);
+extern void cleanup_sighand(struct task_struct *);
 extern void __exit_signal(struct task_struct *);
-extern void __exit_sighand(struct task_struct *);
 extern void exit_itimers(struct signal_struct *);
 
 extern NORET_TYPE void do_group_exit(int);
diff --git a/kernel/fork.c b/kernel/fork.c
index 0aff28cdbad..12cdd9fc9d0 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -803,6 +803,16 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
 	return 0;
 }
 
+void cleanup_sighand(struct task_struct *tsk)
+{
+	struct sighand_struct * sighand = tsk->sighand;
+
+	/* Ok, we're done with the signal handlers */
+	tsk->sighand = NULL;
+	if (atomic_dec_and_test(&sighand->count))
+		kmem_cache_free(sighand_cachep, sighand);
+}
+
 static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
 {
 	struct signal_struct *sig;
@@ -1224,7 +1234,7 @@ bad_fork_cleanup_mm:
 bad_fork_cleanup_signal:
 	cleanup_signal(p);
 bad_fork_cleanup_sighand:
-	__exit_sighand(p);
+	cleanup_sighand(p);
 bad_fork_cleanup_fs:
 	exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
diff --git a/kernel/signal.c b/kernel/signal.c
index ca1fa854e46..b29c868bd5e 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -310,9 +310,7 @@ static void flush_sigqueue(struct sigpending *queue)
 /*
  * Flush all pending signals for a task.
  */
-
-void
-flush_signals(struct task_struct *t)
+void flush_signals(struct task_struct *t)
 {
 	unsigned long flags;
 
@@ -323,19 +321,6 @@ flush_signals(struct task_struct *t)
 	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 }
 
-/*
- * This function expects the tasklist_lock write-locked.
- */
-void __exit_sighand(struct task_struct *tsk)
-{
-	struct sighand_struct * sighand = tsk->sighand;
-
-	/* Ok, we're done with the signal handlers */
-	tsk->sighand = NULL;
-	if (atomic_dec_and_test(&sighand->count))
-		kmem_cache_free(sighand_cachep, sighand);
-}
-
 /*
  * This function expects the tasklist_lock write-locked.
  */
@@ -386,7 +371,7 @@ void __exit_signal(struct task_struct *tsk)
 	}
 
 	tsk->signal = NULL;
-	__exit_sighand(tsk);
+	cleanup_sighand(tsk);
 	spin_unlock(&sighand->siglock);
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-70-g09d2


From 47e65328a7b1cdfc4e3102e50d60faf94ebba7d3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:25 -0800
Subject: [PATCH] pids: kill PIDTYPE_TGID

This patch kills PIDTYPE_TGID pid_type thus saving one hash table in
kernel/pid.c and speeding up subthreads create/destroy a bit.  It is also a
preparation for the further tref/pids rework.

This patch adds 'struct list_head thread_group' to 'struct task_struct'
instead.

We don't detach group leader from PIDTYPE_PID namespace until another
thread inherits it's ->pid == ->tgid, so we are safe wrt premature
free_pidmap(->tgid) call.

Currently there are no users of find_task_by_pid_type(PIDTYPE_TGID).
Should the need arise, we can use find_task_by_pid()->group_leader.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-By: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pid.h   |  1 -
 include/linux/sched.h | 11 ++++++++---
 kernel/exit.c         | 10 +---------
 kernel/fork.c         |  4 +++-
 4 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/include/linux/pid.h b/include/linux/pid.h
index 099e70ecf7c..5b9082cc600 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -4,7 +4,6 @@
 enum pid_type
 {
 	PIDTYPE_PID,
-	PIDTYPE_TGID,
 	PIDTYPE_PGID,
 	PIDTYPE_SID,
 	PIDTYPE_MAX
diff --git a/include/linux/sched.h b/include/linux/sched.h
index a913fca9e70..99855f694eb 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -752,6 +752,7 @@ struct task_struct {
 
 	/* PID/PID hash table linkage. */
 	struct pid pids[PIDTYPE_MAX];
+	struct list_head thread_group;
 
 	struct completion *vfork_done;		/* for vfork() */
 	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
@@ -1192,13 +1193,17 @@ extern void wait_task_inactive(task_t * p);
 #define while_each_thread(g, t) \
 	while ((t = next_thread(t)) != g)
 
-extern task_t * FASTCALL(next_thread(const task_t *p));
-
 #define thread_group_leader(p)	(p->pid == p->tgid)
 
+static inline task_t *next_thread(task_t *p)
+{
+	return list_entry(rcu_dereference(p->thread_group.next),
+				task_t, thread_group);
+}
+
 static inline int thread_group_empty(task_t *p)
 {
-	return list_empty(&p->pids[PIDTYPE_TGID].pid_list);
+	return list_empty(&p->thread_group);
 }
 
 #define delay_group_leader(p) \
diff --git a/kernel/exit.c b/kernel/exit.c
index aea23e713cf..22399caf757 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -51,7 +51,6 @@ static void __unhash_process(struct task_struct *p)
 {
 	nr_threads--;
 	detach_pid(p, PIDTYPE_PID);
-	detach_pid(p, PIDTYPE_TGID);
 	if (thread_group_leader(p)) {
 		detach_pid(p, PIDTYPE_PGID);
 		detach_pid(p, PIDTYPE_SID);
@@ -59,7 +58,7 @@ static void __unhash_process(struct task_struct *p)
 		list_del_init(&p->tasks);
 		__get_cpu_var(process_counts)--;
 	}
-
+	list_del_rcu(&p->thread_group);
 	remove_parent(p);
 }
 
@@ -964,13 +963,6 @@ asmlinkage long sys_exit(int error_code)
 	do_exit((error_code&0xff)<<8);
 }
 
-task_t fastcall *next_thread(const task_t *p)
-{
-	return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
-}
-
-EXPORT_SYMBOL(next_thread);
-
 /*
  * Take down every thread in the group.  This is called by fatal signals
  * as well as by sys_exit_group (below).
diff --git a/kernel/fork.c b/kernel/fork.c
index 12cdd9fc9d0..bc551efb5fd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1112,6 +1112,7 @@ static task_t *copy_process(unsigned long clone_flags,
 	 * We dont wake it up yet.
 	 */
 	p->group_leader = p;
+	INIT_LIST_HEAD(&p->thread_group);
 	INIT_LIST_HEAD(&p->ptrace_children);
 	INIT_LIST_HEAD(&p->ptrace_list);
 
@@ -1165,7 +1166,9 @@ static task_t *copy_process(unsigned long clone_flags,
 			retval = -EAGAIN;
 			goto bad_fork_cleanup_namespace;
 		}
+
 		p->group_leader = current->group_leader;
+		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
 
 		if (current->signal->group_stop_count > 0) {
 			/*
@@ -1213,7 +1216,6 @@ static task_t *copy_process(unsigned long clone_flags,
 			list_add_tail(&p->tasks, &init_task.tasks);
 			__get_cpu_var(process_counts)++;
 		}
-		attach_pid(p, PIDTYPE_TGID, p->tgid);
 		attach_pid(p, PIDTYPE_PID, p->pid);
 		nr_threads++;
 	}
-- 
cgit v1.2.3-70-g09d2


From 4a2c7a7837da1b91468e50426066d988050e4d56 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:26 -0800
Subject: [PATCH] make fork() atomic wrt pgrp/session signals

Eric W. Biederman wrote:
>
> Ok. SUSV3/Posix is clear, fork is atomic with respect
> to signals.  Either a signal comes before or after a
> fork but not during. (See the rationale section).
> http://www.opengroup.org/onlinepubs/000095399/functions/fork.html
>
> The tasklist_lock does not stop forks from adding to a process
> group. The forks stall while the tasklist_lock is held, but a fork
> that began before we grabbed the tasklist_lock simply completes
> afterwards, and the child does not receive the signal.

This also means that SIGSTOP or sig_kernel_coredump() signal can't
be delivered to pgrp/session reliably.

With this patch copy_process() returns -ERESTARTNOINTR when it
detects a pending signal, fork() will be restarted transparently
after handling the signals.

This patch also deletes now unneeded "group_stop_count > 0" check,
copy_process() can no longer succeed while group stop in progress.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Acked-By: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 37 +++++++++++++++++--------------------
 1 file changed, 17 insertions(+), 20 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/kernel/fork.c b/kernel/fork.c
index bc551efb5fd..aa50c848fae 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1136,16 +1136,6 @@ static task_t *copy_process(unsigned long clone_flags,
 			!cpu_online(task_cpu(p))))
 		set_task_cpu(p, smp_processor_id());
 
-	/*
-	 * Check for pending SIGKILL! The new thread should not be allowed
-	 * to slip out of an OOM kill. (or normal SIGKILL.)
-	 */
-	if (sigismember(&current->pending.signal, SIGKILL)) {
-		write_unlock_irq(&tasklist_lock);
-		retval = -EINTR;
-		goto bad_fork_cleanup_namespace;
-	}
-
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
 		p->real_parent = current->real_parent;
@@ -1154,6 +1144,23 @@ static task_t *copy_process(unsigned long clone_flags,
 	p->parent = p->real_parent;
 
 	spin_lock(&current->sighand->siglock);
+
+	/*
+	 * Process group and session signals need to be delivered to just the
+	 * parent before the fork or both the parent and the child after the
+	 * fork. Restart if a signal comes in before we add the new process to
+	 * it's process group.
+	 * A fatal signal pending means that current will exit, so the new
+	 * thread can't slip out of an OOM kill (or normal SIGKILL).
+ 	 */
+ 	recalc_sigpending();
+	if (signal_pending(current)) {
+		spin_unlock(&current->sighand->siglock);
+		write_unlock_irq(&tasklist_lock);
+		retval = -ERESTARTNOINTR;
+		goto bad_fork_cleanup_namespace;
+	}
+
 	if (clone_flags & CLONE_THREAD) {
 		/*
 		 * Important: if an exit-all has been started then
@@ -1170,16 +1177,6 @@ static task_t *copy_process(unsigned long clone_flags,
 		p->group_leader = current->group_leader;
 		list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
 
-		if (current->signal->group_stop_count > 0) {
-			/*
-			 * There is an all-stop in progress for the group.
-			 * We ourselves will stop as soon as we check signals.
-			 * Make the new thread part of that group stop too.
-			 */
-			current->signal->group_stop_count++;
-			set_tsk_thread_flag(p, TIF_SIGPENDING);
-		}
-
 		if (!cputime_eq(current->signal->it_virt_expires,
 				cputime_zero) ||
 		    !cputime_eq(current->signal->it_prof_expires,
-- 
cgit v1.2.3-70-g09d2


From a7e5328a06a2beee3a2bbfaf87ce2a7bbe937de1 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Tue, 28 Mar 2006 16:11:27 -0800
Subject: [PATCH] cleanup __exit_signal->cleanup_sighand path

Move 'tsk->sighand = NULL' from cleanup_sighand() to __exit_signal().  This
makes the exit path more understandable and allows us to do
cleanup_sighand() outside of ->siglock protected section.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h | 2 +-
 kernel/exit.c         | 3 ++-
 kernel/fork.c         | 8 ++------
 3 files changed, 5 insertions(+), 8 deletions(-)

(limited to 'kernel/fork.c')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 99855f694eb..d04186d8cc6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1151,7 +1151,7 @@ extern void exit_thread(void);
 
 extern void exit_files(struct task_struct *);
 extern void __cleanup_signal(struct signal_struct *);
-extern void cleanup_sighand(struct task_struct *);
+extern void __cleanup_sighand(struct sighand_struct *);
 extern void exit_itimers(struct signal_struct *);
 
 extern NORET_TYPE void do_group_exit(int);
diff --git a/kernel/exit.c b/kernel/exit.c
index 22399caf757..bc0ec674d3f 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -114,10 +114,11 @@ static void __exit_signal(struct task_struct *tsk)
 	__unhash_process(tsk);
 
 	tsk->signal = NULL;
-	cleanup_sighand(tsk);
+	tsk->sighand = NULL;
 	spin_unlock(&sighand->siglock);
 	rcu_read_unlock();
 
+	__cleanup_sighand(sighand);
 	clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
 	flush_sigqueue(&tsk->pending);
 	if (sig) {
diff --git a/kernel/fork.c b/kernel/fork.c
index aa50c848fae..b3f7a1bb5e5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -803,12 +803,8 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
 	return 0;
 }
 
-void cleanup_sighand(struct task_struct *tsk)
+void __cleanup_sighand(struct sighand_struct *sighand)
 {
-	struct sighand_struct * sighand = tsk->sighand;
-
-	/* Ok, we're done with the signal handlers */
-	tsk->sighand = NULL;
 	if (atomic_dec_and_test(&sighand->count))
 		kmem_cache_free(sighand_cachep, sighand);
 }
@@ -1233,7 +1229,7 @@ bad_fork_cleanup_mm:
 bad_fork_cleanup_signal:
 	cleanup_signal(p);
 bad_fork_cleanup_sighand:
-	cleanup_sighand(p);
+	__cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
 	exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
-- 
cgit v1.2.3-70-g09d2