aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/Makefile1
-rw-r--r--kernel/acct.c26
-rw-r--r--kernel/auditfilter.c4
-rw-r--r--kernel/auditsc.c6
-rw-r--r--kernel/cpu.c8
-rw-r--r--kernel/cpuset.c106
-rw-r--r--kernel/exit.c104
-rw-r--r--kernel/fork.c85
-rw-r--r--kernel/futex.c10
-rw-r--r--kernel/irq/chip.c33
-rw-r--r--kernel/irq/manage.c89
-rw-r--r--kernel/irq/proc.c3
-rw-r--r--kernel/irq/spurious.c2
-rw-r--r--kernel/kallsyms.c16
-rw-r--r--kernel/kmod.c2
-rw-r--r--kernel/kprobes.c20
-rw-r--r--kernel/lockdep.c207
-rw-r--r--kernel/module.c127
-rw-r--r--kernel/mutex.c9
-rw-r--r--kernel/nsproxy.c38
-rw-r--r--kernel/params.c34
-rw-r--r--kernel/pid.c75
-rw-r--r--kernel/power/Kconfig35
-rw-r--r--kernel/power/disk.c8
-rw-r--r--kernel/power/main.c2
-rw-r--r--kernel/power/process.c21
-rw-r--r--kernel/power/swap.c9
-rw-r--r--kernel/power/user.c7
-rw-r--r--kernel/printk.c2
-rw-r--r--kernel/profile.c17
-rw-r--r--kernel/rcutorture.c3
-rw-r--r--kernel/relay.c25
-rw-r--r--kernel/resource.c62
-rw-r--r--kernel/sched.c538
-rw-r--r--kernel/signal.c17
-rw-r--r--kernel/sys.c38
-rw-r--r--kernel/sysctl.c390
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/timer.c169
-rw-r--r--kernel/tsacct.c9
-rw-r--r--kernel/unwind.c1305
-rw-r--r--kernel/workqueue.c45
42 files changed, 1568 insertions, 2147 deletions
diff --git a/kernel/Makefile b/kernel/Makefile
index 5e3f3b75563..14f4d45e0ae 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -31,7 +31,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
obj-$(CONFIG_UID16) += uid16.o
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_KALLSYMS) += kallsyms.o
-obj-$(CONFIG_STACK_UNWIND) += unwind.o
obj-$(CONFIG_PM) += power/
obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o
obj-$(CONFIG_KEXEC) += kexec.o
diff --git a/kernel/acct.c b/kernel/acct.c
index dc12db8600e..70d0d88e555 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -118,7 +118,7 @@ static int check_free_space(struct file *file)
spin_unlock(&acct_globals.lock);
/* May block */
- if (vfs_statfs(file->f_dentry, &sbuf))
+ if (vfs_statfs(file->f_path.dentry, &sbuf))
return res;
suspend = sbuf.f_blocks * SUSPEND;
resume = sbuf.f_blocks * RESUME;
@@ -194,7 +194,7 @@ static void acct_file_reopen(struct file *file)
add_timer(&acct_globals.timer);
}
if (old_acct) {
- mnt_unpin(old_acct->f_vfsmnt);
+ mnt_unpin(old_acct->f_path.mnt);
spin_unlock(&acct_globals.lock);
do_acct_process(old_acct);
filp_close(old_acct, NULL);
@@ -212,7 +212,7 @@ static int acct_on(char *name)
if (IS_ERR(file))
return PTR_ERR(file);
- if (!S_ISREG(file->f_dentry->d_inode->i_mode)) {
+ if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) {
filp_close(file, NULL);
return -EACCES;
}
@@ -229,11 +229,11 @@ static int acct_on(char *name)
}
spin_lock(&acct_globals.lock);
- mnt_pin(file->f_vfsmnt);
+ mnt_pin(file->f_path.mnt);
acct_file_reopen(file);
spin_unlock(&acct_globals.lock);
- mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */
+ mntput(file->f_path.mnt); /* it's pinned, now give up active reference */
return 0;
}
@@ -283,7 +283,7 @@ asmlinkage long sys_acct(const char __user *name)
void acct_auto_close_mnt(struct vfsmount *m)
{
spin_lock(&acct_globals.lock);
- if (acct_globals.file && acct_globals.file->f_vfsmnt == m)
+ if (acct_globals.file && acct_globals.file->f_path.mnt == m)
acct_file_reopen(NULL);
spin_unlock(&acct_globals.lock);
}
@@ -299,7 +299,7 @@ void acct_auto_close(struct super_block *sb)
{
spin_lock(&acct_globals.lock);
if (acct_globals.file &&
- acct_globals.file->f_vfsmnt->mnt_sb == sb) {
+ acct_globals.file->f_path.mnt->mnt_sb == sb) {
acct_file_reopen(NULL);
}
spin_unlock(&acct_globals.lock);
@@ -428,6 +428,7 @@ static void do_acct_process(struct file *file)
u64 elapsed;
u64 run_time;
struct timespec uptime;
+ struct tty_struct *tty;
/*
* First check to see if there is enough free_space to continue
@@ -484,16 +485,9 @@ static void do_acct_process(struct file *file)
ac.ac_ppid = current->parent->tgid;
#endif
- mutex_lock(&tty_mutex);
- /* FIXME: Whoever is responsible for current->signal locking needs
- to use the same locking all over the kernel and document it */
- read_lock(&tasklist_lock);
- ac.ac_tty = current->signal->tty ?
- old_encode_dev(tty_devnum(current->signal->tty)) : 0;
- read_unlock(&tasklist_lock);
- mutex_unlock(&tty_mutex);
-
spin_lock_irq(&current->sighand->siglock);
+ tty = current->signal->tty;
+ ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0;
ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime)));
ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime)));
ac.ac_flag = pacct->ac_flag;
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 2e896f8ae29..9c8c23227c7 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -800,8 +800,8 @@ static inline int audit_dupe_selinux_field(struct audit_field *df,
/* our own copy of se_str */
se_str = kstrdup(sf->se_str, GFP_KERNEL);
- if (unlikely(IS_ERR(se_str)))
- return -ENOMEM;
+ if (unlikely(!se_str))
+ return -ENOMEM;
df->se_str = se_str;
/* our own (refreshed) copy of se_rule */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 40722e26de9..298897559ca 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -781,8 +781,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk
if ((vma->vm_flags & VM_EXECUTABLE) &&
vma->vm_file) {
audit_log_d_path(ab, "exe=",
- vma->vm_file->f_dentry,
- vma->vm_file->f_vfsmnt);
+ vma->vm_file->f_path.dentry,
+ vma->vm_file->f_path.mnt);
break;
}
vma = vma->vm_next;
@@ -826,10 +826,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
context->return_code);
mutex_lock(&tty_mutex);
+ read_lock(&tasklist_lock);
if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name)
tty = tsk->signal->tty->name;
else
tty = "(none)";
+ read_unlock(&tasklist_lock);
audit_log_format(ab,
" a0=%lx a1=%lx a2=%lx a3=%lx items=%d"
" ppid=%d pid=%d auid=%u uid=%u gid=%u"
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 9124669f458..7406fe6966f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -204,7 +204,7 @@ int cpu_down(unsigned int cpu)
#endif /*CONFIG_HOTPLUG_CPU*/
/* Requires cpu_add_remove_lock to be held */
-static int __devinit _cpu_up(unsigned int cpu)
+static int __cpuinit _cpu_up(unsigned int cpu)
{
int ret;
void *hcpu = (void *)(long)cpu;
@@ -239,7 +239,7 @@ out_notify:
return ret;
}
-int __devinit cpu_up(unsigned int cpu)
+int __cpuinit cpu_up(unsigned int cpu)
{
int err = 0;
@@ -258,7 +258,7 @@ static cpumask_t frozen_cpus;
int disable_nonboot_cpus(void)
{
- int cpu, first_cpu, error;
+ int cpu, first_cpu, error = 0;
mutex_lock(&cpu_add_remove_lock);
first_cpu = first_cpu(cpu_present_map);
@@ -294,7 +294,7 @@ int disable_nonboot_cpus(void)
/* Make sure the CPUs won't be enabled by someone else */
cpu_hotplug_disabled = 1;
} else {
- printk(KERN_ERR "Non-boot CPUs are not disabled");
+ printk(KERN_ERR "Non-boot CPUs are not disabled\n");
}
out:
mutex_unlock(&cpu_add_remove_lock);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 0a6b4d89f9a..6b05dc69c95 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -413,8 +413,8 @@ static struct file_system_type cpuset_fs_type = {
*
*
* When reading/writing to a file:
- * - the cpuset to use in file->f_dentry->d_parent->d_fsdata
- * - the 'cftype' of the file is file->f_dentry->d_fsdata
+ * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata
+ * - the 'cftype' of the file is file->f_path.dentry->d_fsdata
*/
struct cftype {
@@ -1284,8 +1284,8 @@ static ssize_t cpuset_common_file_write(struct file *file,
const char __user *userbuf,
size_t nbytes, loff_t *unused_ppos)
{
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
cpuset_filetype_t type = cft->private;
char *buffer;
char *pathbuf = NULL;
@@ -1367,7 +1367,7 @@ static ssize_t cpuset_file_write(struct file *file, const char __user *buf,
size_t nbytes, loff_t *ppos)
{
ssize_t retval = 0;
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
@@ -1417,8 +1417,8 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs)
static ssize_t cpuset_common_file_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos)
{
- struct cftype *cft = __d_cft(file->f_dentry);
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
cpuset_filetype_t type = cft->private;
char *page;
ssize_t retval = 0;
@@ -1476,7 +1476,7 @@ static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbyt
loff_t *ppos)
{
ssize_t retval = 0;
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
@@ -1498,7 +1498,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file)
if (err)
return err;
- cft = __d_cft(file->f_dentry);
+ cft = __d_cft(file->f_path.dentry);
if (!cft)
return -ENODEV;
if (cft->open)
@@ -1511,7 +1511,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file)
static int cpuset_file_release(struct inode *inode, struct file *file)
{
- struct cftype *cft = __d_cft(file->f_dentry);
+ struct cftype *cft = __d_cft(file->f_path.dentry);
if (cft->release)
return cft->release(inode, file);
return 0;
@@ -1700,7 +1700,7 @@ static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids)
*/
static int cpuset_tasks_open(struct inode *unused, struct file *file)
{
- struct cpuset *cs = __d_cs(file->f_dentry->d_parent);
+ struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent);
struct ctr_struct *ctr;
pid_t *pidarray;
int npids;
@@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
}
/**
- * cpuset_zone_allowed - Can we allocate memory on zone z's memory node?
+ * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node?
* @z: is this zone on an allowed node?
- * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL)
+ * @gfp_mask: memory allocation flags
*
- * If we're in interrupt, yes, we can always allocate. If zone
+ * If we're in interrupt, yes, we can always allocate. If
+ * __GFP_THISNODE is set, yes, we can always allocate. If zone
* z's node is in our tasks mems_allowed, yes. If it's not a
* __GFP_HARDWALL request and this zone's nodes is in the nearest
* mem_exclusive cpuset ancestor to this tasks cpuset, yes.
* Otherwise, no.
*
+ * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall()
+ * reduces to cpuset_zone_allowed_hardwall(). Otherwise,
+ * cpuset_zone_allowed_softwall() might sleep, and might allow a zone
+ * from an enclosing cpuset.
+ *
+ * cpuset_zone_allowed_hardwall() only handles the simpler case of
+ * hardwall cpusets, and never sleeps.
+ *
+ * The __GFP_THISNODE placement logic is really handled elsewhere,
+ * by forcibly using a zonelist starting at a specified node, and by
+ * (in get_page_from_freelist()) refusing to consider the zones for
+ * any node on the zonelist except the first. By the time any such
+ * calls get to this routine, we should just shut up and say 'yes'.
+ *
* GFP_USER allocations are marked with the __GFP_HARDWALL bit,
* and do not allow allocations outside the current tasks cpuset.
* GFP_KERNEL allocations are not so marked, so can escape to the
- * nearest mem_exclusive ancestor cpuset.
+ * nearest enclosing mem_exclusive ancestor cpuset.
*
- * Scanning up parent cpusets requires callback_mutex. The __alloc_pages()
- * routine only calls here with __GFP_HARDWALL bit _not_ set if
- * it's a GFP_KERNEL allocation, and all nodes in the current tasks
- * mems_allowed came up empty on the first pass over the zonelist.
- * So only GFP_KERNEL allocations, if all nodes in the cpuset are
- * short of memory, might require taking the callback_mutex mutex.
+ * Scanning up parent cpusets requires callback_mutex. The
+ * __alloc_pages() routine only calls here with __GFP_HARDWALL bit
+ * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the
+ * current tasks mems_allowed came up empty on the first pass over
+ * the zonelist. So only GFP_KERNEL allocations, if all nodes in the
+ * cpuset are short of memory, might require taking the callback_mutex
+ * mutex.
*
* The first call here from mm/page_alloc:get_page_from_freelist()
- * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so
- * no allocation on a node outside the cpuset is allowed (unless in
- * interrupt, of course).
+ * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets,
+ * so no allocation on a node outside the cpuset is allowed (unless
+ * in interrupt, of course).
*
* The second pass through get_page_from_freelist() doesn't even call
* here for GFP_ATOMIC calls. For those calls, the __alloc_pages()
@@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs)
* GFP_USER - only nodes in current tasks mems allowed ok.
*
* Rule:
- * Don't call cpuset_zone_allowed() if you can't sleep, unless you
+ * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you
* pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
* the code that might scan up ancestor cpusets and sleep.
- **/
+ */
-int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
+int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
{
int node; /* node that zone z is on */
const struct cpuset *cs; /* current cpuset ancestors */
@@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
return allowed;
}
+/*
+ * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node?
+ * @z: is this zone on an allowed node?
+ * @gfp_mask: memory allocation flags
+ *
+ * If we're in interrupt, yes, we can always allocate.
+ * If __GFP_THISNODE is set, yes, we can always allocate. If zone
+ * z's node is in our tasks mems_allowed, yes. Otherwise, no.
+ *
+ * The __GFP_THISNODE placement logic is really handled elsewhere,
+ * by forcibly using a zonelist starting at a specified node, and by
+ * (in get_page_from_freelist()) refusing to consider the zones for
+ * any node on the zonelist except the first. By the time any such
+ * calls get to this routine, we should just shut up and say 'yes'.
+ *
+ * Unlike the cpuset_zone_allowed_softwall() variant, above,
+ * this variant requires that the zone be in the current tasks
+ * mems_allowed or that we're in interrupt. It does not scan up the
+ * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
+ * It never sleeps.
+ */
+
+int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
+{
+ int node; /* node that zone z is on */
+
+ if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
+ return 1;
+ node = zone_to_nid(z);
+ if (node_isset(node, current->mems_allowed))
+ return 1;
+ return 0;
+}
+
/**
* cpuset_lock - lock out any changes to cpuset structures
*
@@ -2606,7 +2656,7 @@ static int cpuset_open(struct inode *inode, struct file *file)
return single_open(file, proc_cpuset_show, pid);
}
-const struct file_operations proc_cpuset_operations = {
+struct file_operations proc_cpuset_operations = {
.open = cpuset_open,
.read = seq_read,
.llseek = seq_lseek,
diff --git a/kernel/exit.c b/kernel/exit.c
index 4e3f919edc4..fec12eb1247 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -13,7 +13,7 @@
#include <linux/completion.h>
#include <linux/personality.h>
#include <linux/tty.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
#include <linux/key.h>
#include <linux/security.h>
#include <linux/cpu.h>
@@ -22,6 +22,7 @@
#include <linux/file.h>
#include <linux/binfmts.h>
#include <linux/nsproxy.h>
+#include <linux/pid_namespace.h>
#include <linux/ptrace.h>
#include <linux/profile.h>
#include <linux/mount.h>
@@ -48,7 +49,6 @@
#include <asm/mmu_context.h>
extern void sem_exit (void);
-extern struct task_struct *child_reaper;
static void exit_mm(struct task_struct * tsk);
@@ -189,21 +189,18 @@ repeat:
int session_of_pgrp(int pgrp)
{
struct task_struct *p;
- int sid = -1;
+ int sid = 0;
read_lock(&tasklist_lock);
- do_each_task_pid(pgrp, PIDTYPE_PGID, p) {
- if (p->signal->session > 0) {
- sid = p->signal->session;
- goto out;
- }
- } while_each_task_pid(pgrp, PIDTYPE_PGID, p);
- p = find_task_by_pid(pgrp);
- if (p)
- sid = p->signal->session;
-out:
+
+ p = find_task_by_pid_type(PIDTYPE_PGID, pgrp);
+ if (p == NULL)
+ p = find_task_by_pid(pgrp);
+ if (p != NULL)
+ sid = process_session(p);
+
read_unlock(&tasklist_lock);
-
+
return sid;
}
@@ -225,8 +222,8 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task)
|| p->exit_state
|| is_init(p->real_parent))
continue;
- if (process_group(p->real_parent) != pgrp
- && p->real_parent->signal->session == p->signal->session) {
+ if (process_group(p->real_parent) != pgrp &&
+ process_session(p->real_parent) == process_session(p)) {
ret = 0;
break;
}
@@ -260,7 +257,8 @@ static int has_stopped_jobs(int pgrp)
}
/**
- * reparent_to_init - Reparent the calling kernel thread to the init task.
+ * reparent_to_init - Reparent the calling kernel thread to the init task
+ * of the pid space that the thread belongs to.
*
* If a kernel thread is launched as a result of a system call, or if
* it ever exits, it should generally reparent itself to init so that
@@ -278,8 +276,8 @@ static void reparent_to_init(void)
ptrace_unlink(current);
/* Reparent to init */
remove_parent(current);
- current->parent = child_reaper;
- current->real_parent = child_reaper;
+ current->parent = child_reaper(current);
+ current->real_parent = child_reaper(current);
add_parent(current);
/* Set the exit signal to SIGCHLD so we signal init on exit */
@@ -302,9 +300,9 @@ void __set_special_pids(pid_t session, pid_t pgrp)
{
struct task_struct *curr = current->group_leader;
- if (curr->signal->session != session) {
+ if (process_session(curr) != session) {
detach_pid(curr, PIDTYPE_SID);
- curr->signal->session = session;
+ set_signal_session(curr->signal, session);
attach_pid(curr, PIDTYPE_SID, session);
}
if (process_group(curr) != pgrp) {
@@ -314,7 +312,7 @@ void __set_special_pids(pid_t session, pid_t pgrp)
}
}
-void set_special_pids(pid_t session, pid_t pgrp)
+static void set_special_pids(pid_t session, pid_t pgrp)
{
write_lock_irq(&tasklist_lock);
__set_special_pids(session, pgrp);
@@ -384,9 +382,7 @@ void daemonize(const char *name, ...)
exit_mm(current);
set_special_pids(1, 1);
- mutex_lock(&tty_mutex);
- current->signal->tty = NULL;
- mutex_unlock(&tty_mutex);
+ proc_clear_tty(current);
/* Block and flush all signals */
sigfillset(&blocked);
@@ -429,7 +425,7 @@ static void close_files(struct files_struct * files)
for (;;) {
unsigned long set;
i = j * __NFDBITS;
- if (i >= fdt->max_fdset || i >= fdt->max_fds)
+ if (i >= fdt->max_fds)
break;
set = fdt->open_fds->fds_bits[j++];
while (set) {
@@ -470,9 +466,7 @@ void fastcall put_files_struct(struct files_struct *files)
* you can free files immediately.
*/
fdt = files_fdtable(files);
- if (fdt == &files->fdtab)
- fdt->free_files = files;
- else
+ if (fdt != &files->fdtab)
kmem_cache_free(files_cachep, files);
free_fdtable(fdt);
}
@@ -603,10 +597,6 @@ choose_new_parent(struct task_struct *p, struct task_struct *reaper)
static void
reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
{
- /* We don't want people slaying init. */
- if (p->exit_signal != -1)
- p->exit_signal = SIGCHLD;
-
if (p->pdeath_signal)
/* We already hold the tasklist_lock here. */
group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p);
@@ -626,13 +616,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
p->parent = p->real_parent;
add_parent(p);
- /* If we'd notified the old parent about this child's death,
- * also notify the new parent.
- */
- if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 &&
- thread_group_empty(p))
- do_notify_parent(p, p->exit_signal);
- else if (p->state == TASK_TRACED) {
+ if (p->state == TASK_TRACED) {
/*
* If it was at a trace stop, turn it into
* a normal stop since it's no longer being
@@ -642,6 +626,23 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
}
}
+ /* If this is a threaded reparent there is no need to
+ * notify anyone anything has happened.
+ */
+ if (p->real_parent->group_leader == father->group_leader)
+ return;
+
+ /* We don't want people slaying init. */
+ if (p->exit_signal != -1)
+ p->exit_signal = SIGCHLD;
+
+ /* If we'd notified the old parent about this child's death,
+ * also notify the new parent.
+ */
+ if (!traced && p->exit_state == EXIT_ZOMBIE &&
+ p->exit_signal != -1 && thread_group_empty(p))
+ do_notify_parent(p, p->exit_signal);
+
/*
* process group orphan check
* Case ii: Our child is in a different pgrp
@@ -649,10 +650,11 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
* outside, so the child pgrp is now orphaned.
*/
if ((process_group(p) != process_group(father)) &&
- (p->signal->session == father->signal->session)) {
+ (process_session(p) == process_session(father))) {
int pgrp = process_group(p);
- if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) {
+ if (will_become_orphaned_pgrp(pgrp, NULL) &&
+ has_stopped_jobs(pgrp)) {
__kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp);
__kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp);
}
@@ -663,7 +665,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced)
* When we die, we re-parent all our children.
* Try to give them to another thread in our thread
* group, and if no such member exists, give it to
- * the global child reaper process (ie "init")
+ * the child reaper process (ie "init") in our pid
+ * space.
*/
static void
forget_original_parent(struct task_struct *father, struct list_head *to_release)
@@ -674,7 +677,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release)
do {
reaper = next_thread(reaper);
if (reaper == father) {
- reaper = child_reaper;
+ reaper = child_reaper(father);
break;
}
} while (reaper->exit_state);
@@ -786,7 +789,7 @@ static void exit_notify(struct task_struct *tsk)
t = tsk->real_parent;
if ((process_group(t) != process_group(tsk)) &&
- (t->signal->session == tsk->signal->session) &&
+ (process_session(t) == process_session(tsk)) &&
will_become_orphaned_pgrp(process_group(tsk), tsk) &&
has_stopped_jobs(process_group(tsk))) {
__kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk));
@@ -860,8 +863,13 @@ fastcall NORET_TYPE void do_exit(long code)
panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
- if (unlikely(tsk == child_reaper))
- panic("Attempted to kill init!");
+ if (unlikely(tsk == child_reaper(tsk))) {
+ if (tsk->nsproxy->pid_ns != &init_pid_ns)
+ tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper;
+ else
+ panic("Attempted to kill init!");
+ }
+
if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
current->ptrace_message = code;
@@ -930,8 +938,8 @@ fastcall NORET_TYPE void do_exit(long code)
tsk->exit_code = code;
proc_exit_connector(tsk);
- exit_notify(tsk);
exit_task_namespaces(tsk);
+ exit_notify(tsk);
#ifdef CONFIG_NUMA
mpol_free(tsk->mempolicy);
tsk->mempolicy = NULL;
diff --git a/kernel/fork.c b/kernel/fork.c
index 7f2e31ba33a..d57118da73f 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -18,7 +18,7 @@
#include <linux/module.h>
#include <linux/vmalloc.h>
#include <linux/completion.h>
-#include <linux/namespace.h>
+#include <linux/mnt_namespace.h>
#include <linux/personality.h>
#include <linux/mempolicy.h>
#include <linux/sem.h>
@@ -36,6 +36,7 @@
#include <linux/syscalls.h>
#include <linux/jiffies.h>
#include <linux/futex.h>
+#include <linux/task_io_accounting_ops.h>
#include <linux/rcupdate.h>
#include <linux/ptrace.h>
#include <linux/mount.h>
@@ -202,7 +203,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
struct mempolicy *pol;
down_write(&oldmm->mmap_sem);
- flush_cache_mm(oldmm);
+ flush_cache_dup_mm(oldmm);
/*
* Not linked in yet - no deadlock potential:
*/
@@ -252,7 +253,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
anon_vma_link(tmp);
file = tmp->vm_file;
if (file) {
- struct inode *inode = file->f_dentry->d_inode;
+ struct inode *inode = file->f_path.dentry->d_inode;
get_file(file);
if (tmp->vm_flags & VM_DENYWRITE)
atomic_dec(&inode->i_writecount);
@@ -613,7 +614,7 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
static int count_open_files(struct fdtable *fdt)
{
- int size = fdt->max_fdset;
+ int size = fdt->max_fds;
int i;
/* Find the last open fd */
@@ -640,12 +641,10 @@ static struct files_struct *alloc_files(void)
newf->next_fd = 0;
fdt = &newf->fdtab;
fdt->max_fds = NR_OPEN_DEFAULT;
- fdt->max_fdset = EMBEDDED_FD_SET_SIZE;
fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init;
fdt->open_fds = (fd_set *)&newf->open_fds_init;
fdt->fd = &newf->fd_array[0];
INIT_RCU_HEAD(&fdt->rcu);
- fdt->free_files = NULL;
fdt->next = NULL;
rcu_assign_pointer(newf->fdt, fdt);
out:
@@ -661,7 +660,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
{
struct files_struct *newf;
struct file **old_fds, **new_fds;
- int open_files, size, i, expand;
+ int open_files, size, i;
struct fdtable *old_fdt, *new_fdt;
*errorp = -ENOMEM;
@@ -672,25 +671,14 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
spin_lock(&oldf->file_lock);
old_fdt = files_fdtable(oldf);