From af432eb1cc3178ec7109aca2283aafb1c12ccac1 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Fri, 16 Jan 2009 09:09:38 -0800 Subject: softlockup: fix to allow compiling with !DETECT_HUNG_TASK Fixes the following compile error: kernel/fork.c:1049: error: 'struct task_struct' has no member named 'last_switch_count' kernel/fork.c:1050: error: 'struct task_struct' has no member named 'last_switch_timestamp' Signed-off-by: Mandeep Singh Baines Signed-off-by: Ingo Molnar --- kernel/fork.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 1d68f1255dd..fb944428283 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1041,7 +1041,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->default_timer_slack_ns = current->timer_slack_ns; -#ifdef CONFIG_DETECT_SOFTLOCKUP +#ifdef CONFIG_DETECT_HUNG_TASK p->last_switch_count = 0; p->last_switch_timestamp = 0; #endif -- cgit v1.2.3-70-g09d2 From 17406b82d621930cca8ccc1272cdac9a7dae8e40 Mon Sep 17 00:00:00 2001 From: Mandeep Singh Baines Date: Fri, 6 Feb 2009 15:37:47 -0800 Subject: softlockup: remove timestamp checking from hung_task Impact: saves sizeof(long) bytes per task_struct By guaranteeing that sysctl_hung_task_timeout_secs have elapsed between tasklist scans we can avoid using timestamps. Signed-off-by: Mandeep Singh Baines Signed-off-by: Ingo Molnar --- include/linux/sched.h | 1 - kernel/fork.c | 8 +++----- kernel/hung_task.c | 48 +++++++++--------------------------------------- 3 files changed, 12 insertions(+), 45 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/sched.h b/include/linux/sched.h index 2a2811c6239..e0d723fea9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1241,7 +1241,6 @@ struct task_struct { #endif #ifdef CONFIG_DETECT_HUNG_TASK /* hung task detection */ - unsigned long last_switch_timestamp; unsigned long last_switch_count; #endif /* CPU-specific state of this task */ diff --git a/kernel/fork.c b/kernel/fork.c index fb944428283..bf582f75014 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -639,6 +639,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) tsk->min_flt = tsk->maj_flt = 0; tsk->nvcsw = tsk->nivcsw = 0; +#ifdef CONFIG_DETECT_HUNG_TASK + tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw; +#endif tsk->mm = NULL; tsk->active_mm = NULL; @@ -1041,11 +1044,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->default_timer_slack_ns = current->timer_slack_ns; -#ifdef CONFIG_DETECT_HUNG_TASK - p->last_switch_count = 0; - p->last_switch_timestamp = 0; -#endif - task_io_accounting_init(&p->ioac); acct_clear_integrals(p); diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 3951a80e7cb..0c924de58cb 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -34,7 +34,6 @@ unsigned long __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; * Zero means infinite timeout - no checking done: */ unsigned long __read_mostly sysctl_hung_task_timeout_secs = 120; -static unsigned long __read_mostly hung_task_poll_jiffies; unsigned long __read_mostly sysctl_hung_task_warnings = 10; @@ -69,33 +68,17 @@ static struct notifier_block panic_block = { .notifier_call = hung_task_panic, }; -/* - * Returns seconds, approximately. We don't need nanosecond - * resolution, and we don't need to waste time with a big divide when - * 2^30ns == 1.074s. - */ -static unsigned long get_timestamp(void) -{ - int this_cpu = raw_smp_processor_id(); - - return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */ -} - -static void check_hung_task(struct task_struct *t, unsigned long now, - unsigned long timeout) +static void check_hung_task(struct task_struct *t, unsigned long timeout) { unsigned long switch_count = t->nvcsw + t->nivcsw; if (t->flags & PF_FROZEN) return; - if (switch_count != t->last_switch_count || !t->last_switch_timestamp) { + if (switch_count != t->last_switch_count) { t->last_switch_count = switch_count; - t->last_switch_timestamp = now; return; } - if ((long)(now - t->last_switch_timestamp) < timeout) - return; if (!sysctl_hung_task_warnings) return; sysctl_hung_task_warnings--; @@ -111,7 +94,6 @@ static void check_hung_task(struct task_struct *t, unsigned long now, sched_show_task(t); __debug_show_held_locks(t); - t->last_switch_timestamp = now; touch_nmi_watchdog(); if (sysctl_hung_task_panic) @@ -145,7 +127,6 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) { int max_count = sysctl_hung_task_check_count; int batch_count = HUNG_TASK_BATCHING; - unsigned long now = get_timestamp(); struct task_struct *g, *t; /* @@ -168,19 +149,16 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) } /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ if (t->state == TASK_UNINTERRUPTIBLE) - check_hung_task(t, now, timeout); + check_hung_task(t, timeout); } while_each_thread(g, t); unlock: rcu_read_unlock(); } -static void update_poll_jiffies(void) +static unsigned long timeout_jiffies(unsigned long timeout) { /* timeout of 0 will disable the watchdog */ - if (sysctl_hung_task_timeout_secs == 0) - hung_task_poll_jiffies = MAX_SCHEDULE_TIMEOUT; - else - hung_task_poll_jiffies = sysctl_hung_task_timeout_secs * HZ / 2; + return timeout ? timeout * HZ : MAX_SCHEDULE_TIMEOUT; } /* @@ -197,8 +175,6 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, if (ret || !write) goto out; - update_poll_jiffies(); - wake_up_process(watchdog_task); out: @@ -211,20 +187,14 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, static int watchdog(void *dummy) { set_user_nice(current, 0); - update_poll_jiffies(); for ( ; ; ) { - unsigned long timeout; + unsigned long timeout = sysctl_hung_task_timeout_secs; - while (schedule_timeout_interruptible(hung_task_poll_jiffies)); + while (schedule_timeout_interruptible(timeout_jiffies(timeout))) + timeout = sysctl_hung_task_timeout_secs; - /* - * Need to cache timeout here to avoid timeout being set - * to 0 via sysctl while inside check_hung_*_tasks(). - */ - timeout = sysctl_hung_task_timeout_secs; - if (timeout) - check_hung_uninterruptible_tasks(timeout); + check_hung_uninterruptible_tasks(timeout); } return 0; -- cgit v1.2.3-70-g09d2 From 3e93cd671813e204c258f1e6c797959920cf7772 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:00:13 -0400 Subject: Take fs_struct handling to new file (fs/fs_struct.c) Pure code move; two new helper functions for nfsd and daemonize (unshare_fs_struct() and daemonize_fs_struct() resp.; for now - the same code as used to be in callers). unshare_fs_struct() exported (for nfsd, as copy_fs_struct()/exit_fs() used to be), copy_fs_struct() and exit_fs() don't need exports anymore. Signed-off-by: Al Viro --- fs/Makefile | 2 +- fs/fs_struct.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++ fs/internal.h | 6 ++ fs/namei.c | 7 --- fs/namespace.c | 68 ---------------------- fs/nfsd/nfssvc.c | 7 +-- include/linux/fs_struct.h | 2 + kernel/exit.c | 31 +--------- kernel/fork.c | 29 +--------- 9 files changed, 155 insertions(+), 138 deletions(-) create mode 100644 fs/fs_struct.c (limited to 'kernel/fork.c') diff --git a/fs/Makefile b/fs/Makefile index 6e82a307bcd..b5cd8e18dd9 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -11,7 +11,7 @@ obj-y := open.o read_write.o file_table.o super.o \ attr.o bad_inode.o file.o filesystems.o namespace.o \ seq_file.o xattr.o libfs.o fs-writeback.o \ pnode.o drop_caches.o splice.o sync.o utimes.o \ - stack.o + stack.o fs_struct.o ifeq ($(CONFIG_BLOCK),y) obj-y += buffer.o bio.o block_dev.o direct-io.o mpage.o ioprio.o diff --git a/fs/fs_struct.c b/fs/fs_struct.c new file mode 100644 index 00000000000..36e0a123bbf --- /dev/null +++ b/fs/fs_struct.c @@ -0,0 +1,141 @@ +#include +#include +#include +#include +#include + +/* + * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_root(struct fs_struct *fs, struct path *path) +{ + struct path old_root; + + write_lock(&fs->lock); + old_root = fs->root; + fs->root = *path; + path_get(path); + write_unlock(&fs->lock); + if (old_root.dentry) + path_put(&old_root); +} + +/* + * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. + * It can block. + */ +void set_fs_pwd(struct fs_struct *fs, struct path *path) +{ + struct path old_pwd; + + write_lock(&fs->lock); + old_pwd = fs->pwd; + fs->pwd = *path; + path_get(path); + write_unlock(&fs->lock); + + if (old_pwd.dentry) + path_put(&old_pwd); +} + +void chroot_fs_refs(struct path *old_root, struct path *new_root) +{ + struct task_struct *g, *p; + struct fs_struct *fs; + int count = 0; + + read_lock(&tasklist_lock); + do_each_thread(g, p) { + task_lock(p); + fs = p->fs; + if (fs) { + write_lock(&fs->lock); + if (fs->root.dentry == old_root->dentry + && fs->root.mnt == old_root->mnt) { + path_get(new_root); + fs->root = *new_root; + count++; + } + if (fs->pwd.dentry == old_root->dentry + && fs->pwd.mnt == old_root->mnt) { + path_get(new_root); + fs->pwd = *new_root; + count++; + } + write_unlock(&fs->lock); + } + task_unlock(p); + } while_each_thread(g, p); + read_unlock(&tasklist_lock); + while (count--) + path_put(old_root); +} + +void put_fs_struct(struct fs_struct *fs) +{ + /* No need to hold fs->lock if we are killing it */ + if (atomic_dec_and_test(&fs->count)) { + path_put(&fs->root); + path_put(&fs->pwd); + kmem_cache_free(fs_cachep, fs); + } +} + +void exit_fs(struct task_struct *tsk) +{ + struct fs_struct * fs = tsk->fs; + + if (fs) { + task_lock(tsk); + tsk->fs = NULL; + task_unlock(tsk); + put_fs_struct(fs); + } +} + +struct fs_struct *copy_fs_struct(struct fs_struct *old) +{ + struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); + /* We don't need to lock fs - think why ;-) */ + if (fs) { + atomic_set(&fs->count, 1); + rwlock_init(&fs->lock); + fs->umask = old->umask; + read_lock(&old->lock); + fs->root = old->root; + path_get(&old->root); + fs->pwd = old->pwd; + path_get(&old->pwd); + read_unlock(&old->lock); + } + return fs; +} + +int unshare_fs_struct(void) +{ + struct fs_struct *fsp = copy_fs_struct(current->fs); + if (!fsp) + return -ENOMEM; + exit_fs(current); + current->fs = fsp; + return 0; +} +EXPORT_SYMBOL_GPL(unshare_fs_struct); + +/* to be mentioned only in INIT_TASK */ +struct fs_struct init_fs = { + .count = ATOMIC_INIT(1), + .lock = __RW_LOCK_UNLOCKED(init_fs.lock), + .umask = 0022, +}; + +void daemonize_fs_struct(void) +{ + struct fs_struct *fs; + + exit_fs(current); /* current->fs->count--; */ + fs = &init_fs; + current->fs = fs; + atomic_inc(&fs->count); +} diff --git a/fs/internal.h b/fs/internal.h index 53af885f173..477a105f8df 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -11,6 +11,7 @@ struct super_block; struct linux_binprm; +struct path; /* * block_dev.c @@ -60,3 +61,8 @@ extern void umount_tree(struct vfsmount *, int, struct list_head *); extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); extern void __init mnt_init(void); + +/* + * fs_struct.c + */ +extern void chroot_fs_refs(struct path *, struct path *); diff --git a/fs/namei.c b/fs/namei.c index d040ce11785..4c65a646013 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2897,10 +2897,3 @@ EXPORT_SYMBOL(vfs_symlink); EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(dentry_unhash); EXPORT_SYMBOL(generic_readlink); - -/* to be mentioned only in INIT_TASK */ -struct fs_struct init_fs = { - .count = ATOMIC_INIT(1), - .lock = __RW_LOCK_UNLOCKED(init_fs.lock), - .umask = 0022, -}; diff --git a/fs/namespace.c b/fs/namespace.c index f7ec283ccfb..1e56303c718 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2092,74 +2092,6 @@ out1: return retval; } -/* - * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_root(struct fs_struct *fs, struct path *path) -{ - struct path old_root; - - write_lock(&fs->lock); - old_root = fs->root; - fs->root = *path; - path_get(path); - write_unlock(&fs->lock); - if (old_root.dentry) - path_put(&old_root); -} - -/* - * Replace the fs->{pwdmnt,pwd} with {mnt,dentry}. Put the old values. - * It can block. Requires the big lock held. - */ -void set_fs_pwd(struct fs_struct *fs, struct path *path) -{ - struct path old_pwd; - - write_lock(&fs->lock); - old_pwd = fs->pwd; - fs->pwd = *path; - path_get(path); - write_unlock(&fs->lock); - - if (old_pwd.dentry) - path_put(&old_pwd); -} - -static void chroot_fs_refs(struct path *old_root, struct path *new_root) -{ - struct task_struct *g, *p; - struct fs_struct *fs; - int count = 0; - - read_lock(&tasklist_lock); - do_each_thread(g, p) { - task_lock(p); - fs = p->fs; - if (fs) { - write_lock(&fs->lock); - if (fs->root.dentry == old_root->dentry - && fs->root.mnt == old_root->mnt) { - path_get(new_root); - fs->root = *new_root; - count++; - } - if (fs->pwd.dentry == old_root->dentry - && fs->pwd.mnt == old_root->mnt) { - path_get(new_root); - fs->pwd = *new_root; - count++; - } - write_unlock(&fs->lock); - } - task_unlock(p); - } while_each_thread(g, p); - read_unlock(&tasklist_lock); - while (count--) - path_put(old_root); -} - /* * pivot_root Semantics: * Moves the root file system of the current process to the directory put_old, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d7baa..144d6991861 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -404,7 +404,6 @@ static int nfsd(void *vrqstp) { struct svc_rqst *rqstp = (struct svc_rqst *) vrqstp; - struct fs_struct *fsp; int err, preverr = 0; /* Lock module and set up kernel thread */ @@ -413,13 +412,11 @@ nfsd(void *vrqstp) /* At this point, the thread shares current->fs * with the init process. We need to create files with a * umask of 0 instead of init's umask. */ - fsp = copy_fs_struct(current->fs); - if (!fsp) { + if (unshare_fs_struct() < 0) { printk("Unable to start nfsd thread: out of memory\n"); goto out; } - exit_fs(current); - current->fs = fsp; + current->fs->umask = 0; /* diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 18b467dbe27..298cef1c079 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -20,5 +20,7 @@ extern void set_fs_root(struct fs_struct *, struct path *); extern void set_fs_pwd(struct fs_struct *, struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); extern void put_fs_struct(struct fs_struct *); +extern void daemonize_fs_struct(void); +extern int unshare_fs_struct(void); #endif /* _LINUX_FS_STRUCT_H */ diff --git a/kernel/exit.c b/kernel/exit.c index 167e1e3ad7c..ad8375758a7 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -429,7 +429,6 @@ EXPORT_SYMBOL(disallow_signal); void daemonize(const char *name, ...) { va_list args; - struct fs_struct *fs; sigset_t blocked; va_start(args, name); @@ -462,11 +461,7 @@ void daemonize(const char *name, ...) /* Become as one with the init task */ - exit_fs(current); /* current->fs->count--; */ - fs = init_task.fs; - current->fs = fs; - atomic_inc(&fs->count); - + daemonize_fs_struct(); exit_files(current); current->files = init_task.files; atomic_inc(¤t->files->count); @@ -565,30 +560,6 @@ void exit_files(struct task_struct *tsk) } } -void put_fs_struct(struct fs_struct *fs) -{ - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { - path_put(&fs->root); - path_put(&fs->pwd); - kmem_cache_free(fs_cachep, fs); - } -} - -void exit_fs(struct task_struct *tsk) -{ - struct fs_struct * fs = tsk->fs; - - if (fs) { - task_lock(tsk); - tsk->fs = NULL; - task_unlock(tsk); - put_fs_struct(fs); - } -} - -EXPORT_SYMBOL_GPL(exit_fs); - #ifdef CONFIG_MM_OWNER /* * Task p is exiting and it owned mm, lets find a new owner for it diff --git a/kernel/fork.c b/kernel/fork.c index 47c15840a38..05c02dc586b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -681,38 +681,13 @@ fail_nomem: return retval; } -static struct fs_struct *__copy_fs_struct(struct fs_struct *old) -{ - struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); - /* We don't need to lock fs - think why ;-) */ - if (fs) { - atomic_set(&fs->count, 1); - rwlock_init(&fs->lock); - fs->umask = old->umask; - read_lock(&old->lock); - fs->root = old->root; - path_get(&old->root); - fs->pwd = old->pwd; - path_get(&old->pwd); - read_unlock(&old->lock); - } - return fs; -} - -struct fs_struct *copy_fs_struct(struct fs_struct *old) -{ - return __copy_fs_struct(old); -} - -EXPORT_SYMBOL_GPL(copy_fs_struct); - static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) { if (clone_flags & CLONE_FS) { atomic_inc(¤t->fs->count); return 0; } - tsk->fs = __copy_fs_struct(current->fs); + tsk->fs = copy_fs_struct(current->fs); if (!tsk->fs) return -ENOMEM; return 0; @@ -1545,7 +1520,7 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) if ((unshare_flags & CLONE_FS) && (fs && atomic_read(&fs->count) > 1)) { - *new_fsp = __copy_fs_struct(current->fs); + *new_fsp = copy_fs_struct(current->fs); if (!*new_fsp) return -ENOMEM; } -- cgit v1.2.3-70-g09d2 From 498052bba55ecaff58db6a1436b0e25bfd75a7ff Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 30 Mar 2009 07:20:30 -0400 Subject: New locking/refcounting for fs_struct * all changes of current->fs are done under task_lock and write_lock of old fs->lock * refcount is not atomic anymore (same protection) * its decrements are done when removing reference from current; at the same time we decide whether to free it. * put_fs_struct() is gone * new field - ->in_exec. Set by check_unsafe_exec() if we are trying to do execve() and only subthreads share fs_struct. Cleared when finishing exec (success and failure alike). Makes CLONE_FS fail with -EAGAIN if set. * check_unsafe_exec() may fail with -EAGAIN if another execve() from subthread is in progress. Signed-off-by: Al Viro --- fs/compat.c | 16 +++++++++-- fs/exec.c | 31 +++++++++++++++++---- fs/fs_struct.c | 69 +++++++++++++++++++++++++++++++++-------------- fs/internal.h | 2 +- fs/proc/task_nommu.c | 2 +- include/linux/fs_struct.h | 8 +++--- kernel/fork.c | 37 ++++++++++++++++++------- 7 files changed, 121 insertions(+), 44 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/compat.c b/fs/compat.c index 55efdfebdf5..baabf203b84 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include @@ -1441,12 +1442,15 @@ int compat_do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + + retval = check_unsafe_exec(bprm); + if (retval) + goto out_unlock; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_unlock; + goto out_unmark; sched_exec(); @@ -1488,6 +1492,9 @@ int compat_do_execve(char * filename, goto out; /* execve succeeded */ + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); @@ -1506,6 +1513,11 @@ out_file: fput(bprm->file); } +out_unmark: + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); + out_unlock: current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); diff --git a/fs/exec.c b/fs/exec.c index c5128fbc916..07a059664b7 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1056,16 +1056,18 @@ EXPORT_SYMBOL(install_exec_creds); * - the caller must hold current->cred_exec_mutex to protect against * PTRACE_ATTACH */ -void check_unsafe_exec(struct linux_binprm *bprm) +int check_unsafe_exec(struct linux_binprm *bprm) { struct task_struct *p = current, *t; unsigned long flags; unsigned n_fs, n_sighand; + int res = 0; bprm->unsafe = tracehook_unsafe_exec(p); n_fs = 1; n_sighand = 1; + write_lock(&p->fs->lock); lock_task_sighand(p, &flags); for (t = next_thread(p); t != p; t = next_thread(t)) { if (t->fs == p->fs) @@ -1073,11 +1075,19 @@ void check_unsafe_exec(struct linux_binprm *bprm) n_sighand++; } - if (atomic_read(&p->fs->count) > n_fs || - atomic_read(&p->sighand->count) > n_sighand) + if (p->fs->users > n_fs || + atomic_read(&p->sighand->count) > n_sighand) { bprm->unsafe |= LSM_UNSAFE_SHARE; + } else { + if (p->fs->in_exec) + res = -EAGAIN; + p->fs->in_exec = 1; + } unlock_task_sighand(p, &flags); + write_unlock(&p->fs->lock); + + return res; } /* @@ -1296,12 +1306,15 @@ int do_execve(char * filename, bprm->cred = prepare_exec_creds(); if (!bprm->cred) goto out_unlock; - check_unsafe_exec(bprm); + + retval = check_unsafe_exec(bprm); + if (retval) + goto out_unlock; file = open_exec(filename); retval = PTR_ERR(file); if (IS_ERR(file)) - goto out_unlock; + goto out_unmark; sched_exec(); @@ -1344,6 +1357,9 @@ int do_execve(char * filename, goto out; /* execve succeeded */ + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); acct_update_integrals(current); @@ -1362,6 +1378,11 @@ out_file: fput(bprm->file); } +out_unmark: + write_lock(¤t->fs->lock); + current->fs->in_exec = 0; + write_unlock(¤t->fs->lock); + out_unlock: current->in_execve = 0; mutex_unlock(¤t->cred_exec_mutex); diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 36e0a123bbf..41cff72b377 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -72,25 +72,27 @@ void chroot_fs_refs(struct path *old_root, struct path *new_root) path_put(old_root); } -void put_fs_struct(struct fs_struct *fs) +void free_fs_struct(struct fs_struct *fs) { - /* No need to hold fs->lock if we are killing it */ - if (atomic_dec_and_test(&fs->count)) { - path_put(&fs->root); - path_put(&fs->pwd); - kmem_cache_free(fs_cachep, fs); - } + path_put(&fs->root); + path_put(&fs->pwd); + kmem_cache_free(fs_cachep, fs); } void exit_fs(struct task_struct *tsk) { - struct fs_struct * fs = tsk->fs; + struct fs_struct *fs = tsk->fs; if (fs) { + int kill; task_lock(tsk); + write_lock(&fs->lock); tsk->fs = NULL; + kill = !--fs->users; + write_unlock(&fs->lock); task_unlock(tsk); - put_fs_struct(fs); + if (kill) + free_fs_struct(fs); } } @@ -99,7 +101,8 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL); /* We don't need to lock fs - think why ;-) */ if (fs) { - atomic_set(&fs->count, 1); + fs->users = 1; + fs->in_exec = 0; rwlock_init(&fs->lock); fs->umask = old->umask; read_lock(&old->lock); @@ -114,28 +117,54 @@ struct fs_struct *copy_fs_struct(struct fs_struct *old) int unshare_fs_struct(void) { - struct fs_struct *fsp = copy_fs_struct(current->fs); - if (!fsp) + struct fs_struct *fs = current->fs; + struct fs_struct *new_fs = copy_fs_struct(fs); + int kill; + + if (!new_fs) return -ENOMEM; - exit_fs(current); - current->fs = fsp; + + task_lock(current); + write_lock(&fs->lock); + kill = !--fs->users; + current->fs = new_fs; + write_unlock(&fs->lock); + task_unlock(current); + + if (kill) + free_fs_struct(fs); + return 0; } EXPORT_SYMBOL_GPL(unshare_fs_struct); /* to be mentioned only in INIT_TASK */ struct fs_struct init_fs = { - .count = ATOMIC_INIT(1), + .users = 1, .lock = __RW_LOCK_UNLOCKED(init_fs.lock), .umask = 0022, }; void daemonize_fs_struct(void) { - struct fs_struct *fs; + struct fs_struct *fs = current->fs; + + if (fs) { + int kill; + + task_lock(current); - exit_fs(current); /* current->fs->count--; */ - fs = &init_fs; - current->fs = fs; - atomic_inc(&fs->count); + write_lock(&init_fs.lock); + init_fs.users++; + write_unlock(&init_fs.lock); + + write_lock(&fs->lock); + current->fs = &init_fs; + kill = !--fs->users; + write_unlock(&fs->lock); + + task_unlock(current); + if (kill) + free_fs_struct(fs); + } } diff --git a/fs/internal.h b/fs/internal.h index 477a105f8df..b4dac4fb6b6 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -44,7 +44,7 @@ extern void __init chrdev_init(void); /* * exec.c */ -extern void check_unsafe_exec(struct linux_binprm *); +extern int check_unsafe_exec(struct linux_binprm *); /* * namespace.c diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 343ea1216bc..6ca01052c5b 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -49,7 +49,7 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) else bytes += kobjsize(mm); - if (current->fs && atomic_read(¤t->fs->count) > 1) + if (current->fs && current->fs->users > 1) sbytes += kobjsize(current->fs); else bytes += kobjsize(current->fs); diff --git a/include/linux/fs_struct.h b/include/linux/fs_struct.h index 298cef1c079..78a05bfcd8e 100644 --- a/include/linux/fs_struct.h +++ b/include/linux/fs_struct.h @@ -4,12 +4,10 @@ #include struct fs_struct { - atomic_t count; /* This usage count is used by check_unsafe_exec() for - * security checking purposes - therefore it may not be - * incremented, except by clone(CLONE_FS). - */ + int users; rwlock_t lock; int umask; + int in_exec; struct path root, pwd; }; @@ -19,7 +17,7 @@ extern void exit_fs(struct task_struct *); extern void set_fs_root(struct fs_struct *, struct path *); extern void set_fs_pwd(struct fs_struct *, struct path *); extern struct fs_struct *copy_fs_struct(struct fs_struct *); -extern void put_fs_struct(struct fs_struct *); +extern void free_fs_struct(struct fs_struct *); extern void daemonize_fs_struct(void); extern int unshare_fs_struct(void); diff --git a/kernel/fork.c b/kernel/fork.c index 05c02dc586b..51f138a131d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -683,11 +683,19 @@ fail_nomem: static int copy_fs(unsigned long clone_flags, struct task_struct *tsk) { + struct fs_struct *fs = current->fs; if (clone_flags & CLONE_FS) { - atomic_inc(¤t->fs->count); + /* tsk->fs is already what we want */ + write_lock(&fs->lock); + if (fs->in_exec) { + write_unlock(&fs->lock); + return -EAGAIN; + } + fs->users++; + write_unlock(&fs->lock); return 0; } - tsk->fs = copy_fs_struct(current->fs); + tsk->fs = copy_fs_struct(fs); if (!tsk->fs) return -ENOMEM; return 0; @@ -1518,12 +1526,16 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) { struct fs_struct *fs = current->fs; - if ((unshare_flags & CLONE_FS) && - (fs && atomic_read(&fs->count) > 1)) { - *new_fsp = copy_fs_struct(current->fs); - if (!*new_fsp) - return -ENOMEM; - } + if (!(unshare_flags & CLONE_FS) || !fs) + return 0; + + /* don't need lock here; in the worst case we'll do useless copy */ + if (fs->users == 1) + return 0; + + *new_fsp = copy_fs_struct(fs); + if (!*new_fsp) + return -ENOMEM; return 0; } @@ -1639,8 +1651,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (new_fs) { fs = current->fs; + write_lock(&fs->lock); current->fs = new_fs; - new_fs = fs; + if (--fs->users) + new_fs = NULL; + else + new_fs = fs; + write_unlock(&fs->lock); } if (new_mm) { @@ -1679,7 +1696,7 @@ bad_unshare_cleanup_sigh: bad_unshare_cleanup_fs: if (new_fs) - put_fs_struct(new_fs); + free_fs_struct(new_fs); bad_unshare_cleanup_thread: bad_unshare_out: -- cgit v1.2.3-70-g09d2 From 5ad4e53bd5406ee214ddc5a41f03f779b8b2d526 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 29 Mar 2009 19:50:06 -0400 Subject: Get rid of indirect include of fs_struct.h Don't pull it in sched.h; very few files actually need it and those can include directly. sched.h itself only needs forward declaration of struct fs_struct; Signed-off-by: Al Viro --- arch/cris/kernel/process.c | 1 - fs/dcache.c | 1 + fs/exec.c | 1 + fs/fs_struct.c | 1 + fs/namei.c | 1 + fs/namespace.c | 1 + fs/open.c | 1 + fs/proc/base.c | 1 + fs/proc/task_nommu.c | 1 + include/linux/mnt_namespace.h | 2 ++ include/linux/nsproxy.h | 1 + include/linux/sched.h | 3 ++- init/do_mounts.c | 1 + kernel/auditsc.c | 1 + kernel/exec_domain.c | 1 + kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sys.c | 1 + security/tomoyo/realpath.c | 1 + 19 files changed, 20 insertions(+), 2 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 60816e87645..4df0b320d52 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -19,7 +19,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/dcache.c b/fs/dcache.c index 90bbd7e1b11..0dc4de21f08 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "internal.h" int sysctl_vfs_cache_pressure __read_mostly = 100; diff --git a/fs/exec.c b/fs/exec.c index 614991bf0c8..052a961e41a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 6ac21933867..eee059052db 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -3,6 +3,7 @@ #include #include #include +#include /* * Replace the fs->{rootmnt,root} with {mnt,dentry}. Put the old values. diff --git a/fs/namei.c b/fs/namei.c index 964c0249444..b8433ebfae0 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE]) diff --git a/fs/namespace.c b/fs/namespace.c index 1e56303c718..c6f54e4c429 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "pnode.h" diff --git a/fs/open.c b/fs/open.c index 75b61677daa..377eb25b6ab 100644 --- a/fs/open.c +++ b/fs/open.c @@ -29,6 +29,7 @@ #include #include #include +#include int vfs_statfs(struct dentry *dentry, struct kstatfs *buf) { diff --git a/fs/proc/base.c b/fs/proc/base.c index e0afd326b68..f71559784bf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -80,6 +80,7 @@ #include #include #include +#include #include "internal.h" /* NOTE: diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 6ca01052c5b..253afc04484 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/mnt_namespace.h b/include/linux/mnt_namespace.h index 830bbcd449d..3a059298cc1 100644 --- a/include/linux/mnt_namespace.h +++ b/include/linux/mnt_namespace.h @@ -22,6 +22,8 @@ struct proc_mounts { int event; }; +struct fs_struct; + extern struct mnt_namespace *copy_mnt_ns(unsigned long, struct mnt_namespace *, struct fs_struct *); extern void __put_mnt_ns(struct mnt_namespace *ns); diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index afad7dec1b3..7b370c7cfef 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -8,6 +8,7 @@ struct mnt_namespace; struct uts_namespace; struct ipc_namespace; struct pid_namespace; +struct fs_struct; /* * A structure to contain pointers to all per-process diff --git a/include/linux/sched.h b/include/linux/sched.h index 29df6374d2d..b4e065ea0de 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -68,7 +68,7 @@ struct sched_param { #include #include #include -#include +#include #include #include #include @@ -97,6 +97,7 @@ struct futex_pi_state; struct robust_list_head; struct bio; struct bts_tracer; +struct fs_struct; /* * List of flags we want to share for kernel threads, diff --git a/init/do_mounts.c b/init/do_mounts.c index 8d4ff5afc1d..dd7ee5f203f 100644 --- a/init/do_mounts.c +++ b/init/do_mounts.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 8cbddff6c28..2bfc6478676 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -66,6 +66,7 @@ #include #include #include +#include #include "audit.h" diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index cb8e9626c21..c35452cadde 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -18,6 +18,7 @@ #include #include #include +#include static void default_handler(int, struct pt_regs *); diff --git a/kernel/exit.c b/kernel/exit.c index ad8375758a7..b5d656845c9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 51f138a131d..e82a14577a9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 37f458e6882..ce182aaed20 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index d47f16b844b..3bbe01a7a4b 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "common.h" #include "realpath.h" -- cgit v1.2.3-70-g09d2 From 33e5d76979cf01e3834814fe0aea569d1d602c1a Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Apr 2009 16:56:32 -0700 Subject: nommu: fix a number of issues with the per-MM VMA patch Fix a number of issues with the per-MM VMA patch: (1) Make mmap_pages_allocated an atomic_long_t, just in case this is used on a NOMMU system with more than 2G pages. Makes no difference on a 32-bit system. (2) Report vma->vm_pgoff * PAGE_SIZE as a 64-bit value, not a 32-bit value, lest it overflow. (3) Move the allocation of the vm_area_struct slab back for fork.c. (4) Use KMEM_CACHE() for both vm_area_struct and vm_region slabs. (5) Use BUG_ON() rather than if () BUG(). (6) Make the default validate_nommu_regions() a static inline rather than a #define. (7) Make free_page_series()'s objection to pages with a refcount != 1 more informative. (8) Adjust the __put_nommu_region() banner comment to indicate that the semaphore must be held for writing. (9) Limit the number of warnings about munmaps of non-mmapped regions. Reported-by: Andrew Morton Signed-off-by: David Howells Cc: Greg Ungerer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/meminfo.c | 2 +- fs/proc/task_nommu.c | 4 ++-- include/linux/mm.h | 2 +- kernel/fork.c | 1 + mm/mmap.c | 3 --- mm/nommu.c | 52 +++++++++++++++++++++++++--------------------------- 6 files changed, 30 insertions(+), 34 deletions(-) (limited to 'kernel/fork.c') diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 43d23948384..74ea974f5ca 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -120,7 +120,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) K(i.freeram-i.freehigh), #endif #ifndef CONFIG_MMU - K((unsigned long) atomic_read(&mmap_pages_allocated)), + K((unsigned long) atomic_long_read(&mmap_pages_allocated)), #endif K(i.totalswap), K(i.freeswap), diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c index 343ea1216bc..370be0a2c90 100644 --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -136,14 +136,14 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma) } seq_printf(m, - "%08lx-%08lx %c%c%c%c %08lx %02x:%02x %lu %n", + "%08lx-%08lx %c%c%c%c %08llx %02x:%02x %lu %n", vma->vm_start, vma->vm_end, flags & VM_READ ? 'r' : '-', flags & VM_WRITE ? 'w' : '-', flags & VM_EXEC ? 'x' : '-', flags & VM_MAYSHARE ? flags & VM_SHARED ? 'S' : 's' : 'p', - vma->vm_pgoff << PAGE_SHIFT, + (unsigned long long) vma->vm_pgoff << PAGE_SHIFT, MAJOR(dev), MINOR(dev), ino, &len); if (file) { diff --git a/include/linux/mm.h b/include/linux/mm.h index aeabe953ba4..bff1f0d475c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1079,7 +1079,7 @@ static inline void setup_per_cpu_pageset(void) {} #endif /* nommu.c */ -extern atomic_t mmap_pages_allocated; +extern atomic_long_t mmap_pages_allocated; /* prio_tree.c */ void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old); diff --git a/kernel/fork.c b/kernel/fork.c index 47c15840a38..51d1aa21483 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1488,6 +1488,7 @@ void __init proc_caches_init(void) mm_cachep = kmem_cache_create("mm_struct", sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC); mmap_init(); } diff --git a/mm/mmap.c b/mm/mmap.c index 1abb9185a68..4a3841186c1 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -2481,7 +2481,4 @@ void mm_drop_all_locks(struct mm_struct *mm) */ void __init mmap_init(void) { - vm_area_cachep = kmem_cache_create("vm_area_struct", - sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL); } diff --git a/mm/nommu.c b/mm/nommu.c index 2fcf47d449b..72eda4aee2c 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -69,7 +69,7 @@ int sysctl_max_map_count = DEFAULT_MAX_MAP_COUNT; int sysctl_nr_trim_pages = 1; /* page trimming behaviour */ int heap_stack_gap = 0; -atomic_t mmap_pages_allocated; +atomic_long_t mmap_pages_allocated; EXPORT_SYMBOL(mem_map); EXPORT_SYMBOL(num_physpages); @@ -463,12 +463,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) */ void __init mmap_init(void) { - vm_region_jar = kmem_cache_create("vm_region_jar", - sizeof(struct vm_region), 0, - SLAB_PANIC, NULL); - vm_area_cachep = kmem_cache_create("vm_area_struct", - sizeof(struct vm_area_struct), 0, - SLAB_PANIC, NULL); + vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC); } /* @@ -486,27 +481,24 @@ static noinline void validate_nommu_regions(void) return; last = rb_entry(lastp, struct vm_region, vm_rb); - if (unlikely(last->vm_end <= last->vm_start)) - BUG(); - if (unlikely(last->vm_top < last->vm_end)) - BUG(); + BUG_ON(unlikely(last->vm_end <= last->vm_start)); + BUG_ON(unlikely(last->vm_top < last->vm_end)); while ((p = rb_next(lastp))) { region = rb_entry(p, struct vm_region, vm_rb); last = rb_entry(lastp, struct vm_region, vm_rb); - if (unlikely(region->vm_end <= region->vm_start)) - BUG(); - if (unlikely(region->vm_top < region->vm_end)) - BUG(); - if (unlikely(region->vm_start < last->vm_top)) - BUG(); + BUG_ON(unlikely(region->vm_end <= region->vm_start)); + BUG_ON(unlikely(region->vm_top < region->vm_end)); + BUG_ON(unlikely(region->vm_start < last->vm_top)); lastp = p; } } #else -#define validate_nommu_regions() do {} while(0) +static void validate_nommu_regions(void) +{ +} #endif /* @@ -563,16 +555,17 @@ static void free_page_series(unsigned long from, unsigned long to) struct page *page = virt_to_page(from); kdebug("- free %lx", from); - atomic_dec(&mmap_pages_allocated); + atomic_long_dec(&mmap_pages_allocated); if (page_count(page) != 1) - kdebug("free page %p [%d]", page, page_count(page)); + kdebug("free page %p: refcount not one: %d", + page, page_count(page)); put_page(page); } } /* * release a reference to a region - * - the caller must hold the region semaphore, which this releases + * - the caller must hold the region semaphore for writing, which this releases * - the region may not have been added to the tree yet, in which case vm_top * will equal vm_start */ @@ -1096,7 +1089,7 @@ static int do_mmap_private(struct vm_area_struct *vma, goto enomem; total = 1 << order; - atomic_add(total, &mmap_pages_allocated); + atomic_long_add(total, &mmap_pages_allocated); point = rlen >> PAGE_SHIFT; @@ -1107,7 +1100,7 @@ static int do_mmap_private(struct vm_area_struct *vma, order = ilog2(total - point); n = 1 << order; kdebug("shave %lu/%lu @%lu", n, total - point, total); - atomic_sub(n, &mmap_pages_allocated); + atomic_long_sub(n, &mmap_pages_allocated); total -= n; set_page_refcounted(pages + total); __free_pages(pages + total, order); @@ -1536,10 +1529,15 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) /* find the first potentially overlapping VMA */ vma = find_vma(mm, start); if (!vma) { - printk(KERN_WARNING - "munmap of memory not mmapped by process %d (%s):" - " 0x%lx-0x%lx\n", - current->pid, current->comm, start, start + len - 1); + static int limit = 0; + if (limit < 5) { + printk(KERN_WARNING + "munmap of memory not mmapped by process %d" + " (%s): 0x%lx-0x%lx\n", + current->pid, current->comm, + start, start + len - 1); + limit++; + } return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 6f2c55b843836d26528c56a0968689accaedbc67 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Thu, 2 Apr 2009 16:56:59 -0700 Subject: Simplify copy_thread() First argument unused since 2.3.11. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Alexey Dobriyan Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/process.c | 2 +- arch/arm/kernel/process.c | 2 +- arch/avr32/kernel/process.c | 2 +- arch/blackfin/kernel/process.c | 2 +- arch/cris/arch-v10/kernel/process.c | 2 +- arch/cris/arch-v32/kernel/process.c | 2 +- arch/frv/kernel/process.c | 2 +- arch/h8300/kernel/process.c | 2 +- arch/ia64/kernel/process.c | 2 +- arch/m32r/kernel/process.c | 2 +- arch/m68k/kernel/process.c | 2 +- arch/m68knommu/kernel/process.c | 2 +- arch/mips/kernel/process.c | 2 +- arch/mn10300/kernel/process.c | 2 +- arch/parisc/kernel/process.c | 2 +- arch/powerpc/kernel/process.c | 2 +- arch/s390/kernel/process.c | 2 +- arch/sh/kernel/process_32.c | 2 +- arch/sh/kernel/process_64.c | 2 +- arch/sparc/kernel/process_32.c | 2 +- arch/sparc/kernel/process_64.c | 2 +- arch/um/kernel/process.c | 2 +- arch/x86/kernel/process_32.c | 2 +- arch/x86/kernel/process_64.c | 2 +- arch/xtensa/kernel/process.c | 2 +- include/linux/sched.h | 3 ++- kernel/fork.c | 2 +- 27 files changed, 28 insertions(+), 27 deletions(-) (limited to 'kernel/fork.c') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 8d0097f1020..3a2fb7a02db 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -272,7 +272,7 @@ alpha_vfork(struct pt_regs *regs) */ int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 2de14e2afdc..c3265a2e7cd 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -301,7 +301,7 @@ void release_thread(struct task_struct *dead_task) asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int -copy_thread(int nr, unsigned long clone_flags, unsigned long stack_start, +copy_thread(unsigned long clone_flags, unsigned long stack_start, unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs) { struct thread_info *thread = task_thread_info(p); diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 43ae555ecb3..1bbe1da5486 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -332,7 +332,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 33e2e8993f7..f49427293ca 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -193,7 +193,7 @@ asmlinkage int bfin_clone(struct pt_regs *regs) } int -copy_thread(int nr, unsigned long clone_flags, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index bd9b3ff63f6..c4c69cf721e 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -115,7 +115,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) */ asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index ced5b725d9b..120e7f796fe 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -131,7 +131,7 @@ kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) extern asmlinkage void ret_from_fork(void); int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index 9583a338e9d..0de50df7497 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -204,7 +204,7 @@ void prepare_to_copy(struct task_struct *tsk) /* * set up the kernel stack and exception frames for a new process */ -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index a8ef654a5a0..e2f33d0f996 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -191,7 +191,7 @@ asmlinkage int h8300_clone(struct pt_regs *regs) } -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index c5716270514..5d7c0e5b9e7 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -413,7 +413,7 @@ ia64_load_extra (struct task_struct *task) * so there is nothing to worry about. */ int -copy_thread (int nr, unsigned long clone_flags, +copy_thread(unsigned long clone_flags, unsigned long user_stack_base, unsigned long user_stack_size, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 7103d91e1a2..3e876f0baeb 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -225,7 +225,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) return 0; /* Task didn't use the fpu at all. */ } -int copy_thread(int nr, unsigned long clone_flags, unsigned long spu, +int copy_thread(unsigned long clone_flags, unsigned long spu, unsigned long unused, struct task_struct *tsk, struct pt_regs *regs) { struct pt_regs *childregs = task_pt_regs(tsk); diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 632ce016014..ec37fb56c12 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -233,7 +233,7 @@ asmlinkage int m68k_clone(struct pt_regs *regs) parent_tidptr, child_tidptr); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/m68knommu/kernel/process.c b/arch/m68knommu/kernel/process.c index 3f2d7745f31..1e96c6eb631 100644 --- a/arch/m68knommu/kernel/process.c +++ b/arch/m68knommu/kernel/process.c @@ -199,7 +199,7 @@ asmlinkage int m68k_clone(struct pt_regs *regs) return do_fork(clone_flags, newsp, regs, 0, NULL, NULL); } -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long topstk, struct task_struct * p, struct pt_regs * regs) { diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index ca2e4026ad2..1eaaa450e20 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -99,7 +99,7 @@ void flush_thread(void) { } -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { struct thread_info *ti = task_thread_info(p); diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index b28c9a60445..234cf344cdc 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -193,7 +193,7 @@ void prepare_to_copy(struct task_struct *tsk) * set up the kernel stack for a new thread and copy arch-specific thread * control information */ -int copy_thread(int nr, unsigned long clone_flags, +int copy_thread(unsigned long clone_flags, unsigned long c_usp, unsigned long ustk_size, struct task_struct *p, struct pt_regs *kregs) { diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index b80e02a4d81..8aa591ed912 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -263,7 +263,7 @@ sys_vfork(struct pt_regs *regs) } int -copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, /* in ia64 this is "user_stack_size" */ struct task_struct * p, struct pt_regs * pregs) { diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index eac06494878..7b44a33f03c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -598,7 +598,7 @@ void prepare_to_copy(struct task_struct *tsk) /* * Copy a thread.. */ -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index b48e961a38f..a3acd8e60af 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -160,7 +160,7 @@ void release_thread(struct task_struct *dead_task) { } -int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp, +int copy_thread(unsigned long clone_flags, unsigned long new_stackp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index ddafbbbab2a..694bc15f84f 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -170,7 +170,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index c90c7e5e5fe..96be839040f 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -425,7 +425,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t *fpu) asmlinkage void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index f4bee35a1b4..2830b415e21 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -455,7 +455,7 @@ asmlinkage int sparc_do_fork(unsigned long clone_flags, */ extern void ret_from_fork(void); -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index a73954b87f0..4041f94e772 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -561,7 +561,7 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags, * Parent --> %o0 == childs pid, %o1 == 0 * Child --> %o0 == parents pid, %o1 == 1 */ -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index a1c6d07cac3..4a28a1568d8 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -179,7 +179,7 @@ void fork_handler(void) userspace(¤t->thread.regs.regs); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long stack_top, struct task_struct * p, struct pt_regs *regs) { diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 14014d766ca..76f8f84043a 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -245,7 +245,7 @@ void prepare_to_copy(struct task_struct *tsk) unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index abb7e6a7f0c..b751a41392b 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -278,7 +278,7 @@ void prepare_to_copy(struct task_struct *tsk) unlazy_fpu(tsk); } -int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, +int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long unused, struct task_struct *p, struct pt_regs *regs) { diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 9185597eb6a..031f3668571 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -172,7 +172,7 @@ void prepare_to_copy(struct task_struct *tsk) * childregs. */ -int copy_thread(int nr, unsigned long clone_flags, unsigned long usp, +int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long unused, struct task_struct * p, struct pt_regs * regs) { diff --git a/include/linux/sched.h b/include/linux/sched.h index 481fad3a9b4..9186f8c5d5f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1975,7 +1975,8 @@ extern void mm_release(struct task_struct *, struct mm_struct *); /* Allocate a new mm structure and copy contents from tsk->mm */ extern struct mm_struct *dup_mm(struct task_struct *tsk); -extern int copy_thread(int, unsigned long, unsigned long, unsigned long, struct task_struct *, struct pt_regs *); +extern int copy_thread(unsigned long, unsigned long, unsigned long, + struct task_struct *, struct pt_regs *); extern void flush_thread(void); extern void exit_thread(void); diff --git a/kernel/fork.c b/kernel/fork.c index 51d1aa21483..d7eb727eb53 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1125,7 +1125,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, goto bad_fork_cleanup_mm; if ((retval = copy_io(clone_flags, p))) goto bad_fork_cleanup_namespaces; - retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs); + retval = copy_thread(clone_flags, stack_start, stack_size, p, regs); if (retval) goto bad_fork_cleanup_io; -- cgit v1.2.3-70-g09d2 From b3bfa0cba867f23365b81658b47efd906830879b Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 2 Apr 2009 16:58:08 -0700 Subject: signals: protect cinit from blocked fatal signals Normally SIG_DFL signals to global and container-init are dropped early. But if a signal is blocked when it is posted, we cannot drop the signal since the receiver may install a handler before unblocking the signal. Once this signal is queued however, the receiver container-init has no way of knowing if the signal was sent from an ancestor or descendant namespace. This patch ensures that contianer-init drops all SIG_DFL signals in get_signal_to_deliver() except SIGKILL/SIGSTOP. If SIGSTOP/SIGKILL originate from a descendant of container-init they are never queued (i.e dropped in sig_ignored() in an earler patch). If SIGSTOP/SIGKILL originate from parent namespace, the signal is queued and container-init processes the signal. IOW, if get_signal_to_deliver() sees a sig_kernel_only() signal for global or container-init, the signal must have been generated internally or must have come from an ancestor ns and we process the signal. Further, the signal_group_exit() check was needed to cover the case of a multi-threaded init sending SIGKILL to other threads when doing an exit() or exec(). But since the new sig_kernel_only() check covers the SIGKILL, the signal_group_exit() check is no longer needed and can be removed. Finally, now that we have all pieces in place, set SIGNAL_UNKILLABLE for container-inits. Signed-off-by: Sukadev Bhattiprolu Cc: Oleg Nesterov Cc: Roland McGrath Cc: "Eric W. Biederman" Cc: Daniel Lezcano Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 2 ++ kernel/signal.c | 9 ++++++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index d7eb727eb53..adbea16ec64 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -841,6 +841,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) atomic_set(&sig->live, 1); init_waitqueue_head(&sig->wait_chldexit); sig->flags = 0; + if (clone_flags & CLONE_NEWPID) + sig->flags |= SIGNAL_UNKILLABLE; sig->group_exit_code = 0; sig->group_exit_task = NULL; sig->group_stop_count = 0; diff --git a/kernel/signal.c b/kernel/signal.c index fb19aae2363..ba3da25f0ee 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1870,9 +1870,16 @@ relock: /* * Global init gets no signals it doesn't want. + * Container-init gets no signals it doesn't want from same + * container. + * + * Note that if global/container-init sees a sig_kernel_only() + * signal here, the signal must have been generated internally + * or must have come from an ancestor namespace. In either + * case, the signal cannot be dropped. */ if (unlikely(signal->flags & SIGNAL_UNKILLABLE) && - !signal_group_exit(signal)) + !sig_kernel_only(signr)) continue; if (sig_kernel_stop(signr)) { -- cgit v1.2.3-70-g09d2 From 1b0f7ffd0ea27cd3a0b9ca04e3df9522048c32a3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 2 Apr 2009 16:58:39 -0700 Subject: pids: kill signal_struct-> __pgrp/__session and friends We are wasting 2 words in signal_struct without any reason to implement task_pgrp_nr() and task_session_nr(). task_session_nr() has no callers since 2e2ba22ea4fd4bb85f0fa37c521066db6775cbef, we can remove it. task_pgrp_nr() is still (I believe wrongly) used in fs/autofsX and fs/coda. This patch reimplements task_pgrp_nr() via task_pgrp_nr_ns(), and kills __pgrp/__session and the related helpers. The change in drivers/char/tty_io.c is cosmetic, but hopefully makes sense anyway. Signed-off-by: Oleg Nesterov Acked-by: Alan Cox [tty parts] Cc: Cedric Le Goater Cc: Dave Hansen Cc: Eric Biederman Cc: Pavel Emelyanov Cc: Serge Hallyn Cc: Sukadev Bhattiprolu Cc: Roland McGrath Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/char/tty_io.c | 4 ++-- include/linux/sched.h | 43 ++++++------------------------------------- kernel/exit.c | 10 +++------- kernel/fork.c | 2 -- kernel/sys.c | 4 +--- 5 files changed, 12 insertions(+), 51 deletions(-) (limited to 'kernel/fork.c') diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index a44b701c5bb..66b99a2049e 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -2681,7 +2681,7 @@ void __do_SAK(struct tty_struct *tty) /* Kill the entire session */ do_each_pid_task(session, PIDTYPE_SID, p) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): task_session_nr(p)==tty->session\n", + " (%s): task_session(p)==tty->session\n", task_pid_nr(p), p->comm); send_sig(SIGKILL, p, 1); } while_each_pid_task(session, PIDTYPE_SID, p); @@ -2691,7 +2691,7 @@ void __do_SAK(struct tty_struct *tty) do_each_thread(g, p) { if (p->signal->tty == tty) { printk(KERN_NOTICE "SAK: killed process %d" - " (%s): task_session_nr(p)==tty->session\n", + " (%s): task_session(p)==tty->session\n", task_pid_nr(p), p->comm); send_sig(SIGKILL, p, 1); continue; diff --git a/include/linux/sched.h b/include/linux/sched.h index 49df878a0ca..206ac003e8c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -547,25 +547,8 @@ struct signal_struct { struct list_head cpu_timers[3]; - /* job control IDs */ - - /* - * pgrp and session fields are deprecated. - * use the task_session_Xnr and task_pgrp_Xnr routines below - */ - - union { - pid_t pgrp __deprecated; - pid_t __pgrp; - }; - struct pid *tty_old_pgrp; - union { - pid_t session __deprecated; - pid_t __session; - }; - /* boolean value for session group leader */ int leader; @@ -1469,16 +1452,6 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } -static inline void set_task_session(struct task_struct *tsk, pid_t session) -{ - tsk->signal->__session = session; -} - -static inline void set_task_pgrp(struct task_struct *tsk, pid_t pgrp) -{ - tsk->signal->__pgrp = pgrp; -} - static inline struct pid *task_pid(struct task_struct *task) { return task->pids[PIDTYPE_PID].pid; @@ -1552,11 +1525,6 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } -static inline pid_t task_pgrp_nr(struct task_struct *tsk) -{ - return tsk->signal->__pgrp; -} - static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1569,11 +1537,6 @@ static inline pid_t task_pgrp_vnr(struct task_struct *tsk) } -static inline pid_t task_session_nr(struct task_struct *tsk) -{ - return tsk->signal->__session; -} - static inline pid_t task_session_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1585,6 +1548,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL); } +/* obsolete, do not use */ +static inline pid_t task_pgrp_nr(struct task_struct *tsk) +{ + return task_pgrp_nr_ns(tsk, &init_pid_ns); +} + /** * pid_alive - check that a task structure is not stale * @p: Task structure to be checked. diff --git a/kernel/exit.c b/kernel/exit.c index 384f09caf2e..3bec141c82f 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -357,16 +357,12 @@ static void reparent_to_kthreadd(void) void __set_special_pids(struct pid *pid) { struct task_struct *curr = current->group_leader; - pid_t nr = pid_nr(pid); - if (task_session(curr) != pid) { + if (task_session(curr) != pid) change_pid(curr, PIDTYPE_SID, pid); - set_task_session(curr, nr); - } - if (task_pgrp(curr) != pid) { + + if (task_pgrp(curr) != pid) change_pid(curr, PIDTYPE_PGID, pid); - set_task_pgrp(curr, nr); - } } static void set_special_pids(struct pid *pid) diff --git a/kernel/fork.c b/kernel/fork.c index adbea16ec64..f7445823144 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1265,8 +1265,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->signal->leader_pid = pid; tty_kref_put(p->signal->tty); p->signal->tty = tty_kref_get(current->signal->tty); - set_task_pgrp(p, task_pgrp_nr(current)); - set_task_session(p, task_session_nr(current)); attach_pid(p, PIDTYPE_PGID, task_pgrp(current)); attach_pid(p, PIDTYPE_SID, task_session(current)); list_add_tail_rcu(&p->tasks, &init_task.tasks); diff --git a/kernel/sys.c b/kernel/sys.c index 37f458e6882..742cefa527e 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -1013,10 +1013,8 @@ SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid) if (err) goto out; - if (task_pgrp(p) != pgrp) { + if (task_pgrp(p) != pgrp) change_pid(p, PIDTYPE_PGID, pgrp); - set_task_pgrp(p, pid_nr(pgrp)); - } err = 0; out: -- cgit v1.2.3-70-g09d2 From 6279a751fe096a21dc7704e918d570d3ff06e769 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 27 Mar 2009 01:06:07 +0100 Subject: posix-timers: fix RLIMIT_CPU && fork() See http://bugzilla.kernel.org/show_bug.cgi?id=12911 copy_signal() copies signal->rlim, but RLIMIT_CPU is "lost". Because posix_cpu_timers_init_group() sets cputime_expires.prof_exp = 0 and thus fastpath_timer_check() returns false unless we have other expired cpu timers. Change copy_signal() to set cputime_expires.prof_exp if we have RLIMIT_CPU. Also, set cputimer.running = 1 in that case. This is not strictly necessary, but imho makes sense. Reported-by: Peter Lojkin Signed-off-by: Oleg Nesterov Acked-by: Peter Zijlstra Cc: Peter Lojkin Cc: Roland McGrath Cc: stable@kernel.org LKML-Reference: <20090327000607.GA10104@redhat.com> Signed-off-by: Ingo Molnar --- kernel/fork.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 4854c2c4a82..9b51a1b190d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -808,6 +808,12 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) sig->cputime_expires.virt_exp = cputime_zero; sig->cputime_expires.sched_exp = 0; + if (sig->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) { + sig->cputime_expires.prof_exp = + secs_to_cputime(sig->rlim[RLIMIT_CPU].rlim_cur); + sig->cputimer.running = 1; + } + /* The timer lists. */ INIT_LIST_HEAD(&sig->cpu_timers[0]); INIT_LIST_HEAD(&sig->cpu_timers[1]); @@ -823,11 +829,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) atomic_inc(¤t->signal->live); return 0; } - sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); - - if (sig) - posix_cpu_timers_init_group(sig); + sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL); tsk->signal = sig; if (!sig) return -ENOMEM; @@ -865,6 +868,8 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim); task_unlock(current->group_leader); + posix_cpu_timers_init_group(sig); + acct_init_pacct(&sig->pacct); tty_audit_fork(sig); -- cgit v1.2.3-70-g09d2 From 087eb437051b3de817720f9c80c440fc9e7dcce8 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 4 Jun 2009 16:29:07 -0700 Subject: ptrace: tracehook_report_clone: fix false positives The "trace || CLONE_PTRACE" check in tracehook_report_clone() is not right, - If the untraced task does clone(CLONE_PTRACE) the new child is not traced, we must not queue SIGSTOP. - If we forked the traced task, but the tracer exits and untraces both the forking task and the new child (after copy_process() drops tasklist_lock), we should not queue SIGSTOP too. Change the code to check task_ptrace() != 0 instead. This is still racy, but the race is harmless. We can race with another tracer attaching to this child, or the tracer can exit and detach in parallel. But giwen that we didn't do wake_up_new_task() yet, the child must have the pending SIGSTOP anyway. Signed-off-by: Oleg Nesterov Acked-by: Roland McGrath Cc: Christoph Hellwig Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/tracehook.h | 11 +++++------ kernel/fork.c | 2 +- 2 files changed, 6 insertions(+), 7 deletions(-) (limited to 'kernel/fork.c') diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index c7aa154f4bf..eb96603d92d 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -259,14 +259,12 @@ static inline void tracehook_finish_clone(struct task_struct *child, /** * tracehook_report_clone - in parent, new child is about to start running - * @trace: return value from tracehook_prepare_clone() * @regs: parent's user register state * @clone_flags: flags from parent's system call * @pid: new child's PID in the parent's namespace * @child: new child task * - * Called after a child is set up, but before it has been started - * running. @trace is the value returned by tracehook_prepare_clone(). + * Called after a child is set up, but before it has been started running. * This is not a good place to block, because the child has not started * yet. Suspend the child here if desired, and then block in * tracehook_report_clone_complete(). This must prevent the child from @@ -276,13 +274,14 @@ static inline void tracehook_finish_clone(struct task_struct *child, * * Called with no locks held, but the child cannot run until this returns. */ -static inline void tracehook_report_clone(int trace, struct pt_regs *regs, +static inline void tracehook_report_clone(struct pt_regs *regs, unsigned long clone_flags, pid_t pid, struct task_struct *child) { - if (unlikely(trace) || unlikely(clone_flags & CLONE_PTRACE)) { + if (unlikely(task_ptrace(child))) { /* - * The child starts up with an immediate SIGSTOP. + * It doesn't matter who attached/attaching to this + * task, the pending SIGSTOP is right in any case. */ sigaddset(&child->pending.signal, SIGSTOP); set_tsk_thread_flag(child, TIF_SIGPENDING); diff --git a/kernel/fork.c b/kernel/fork.c index b9e2edd0072..875ffbdd96d 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1409,7 +1409,7 @@ long do_fork(unsigned long clone_flags, } audit_finish_fork(p); - tracehook_report_clone(trace, regs, clone_flags, nr, p); + tracehook_report_clone(regs, clone_flags, nr, p); /* * We set PF_STARTING at creation in case tracing wants to -- cgit v1.2.3-70-g09d2