diff options
Diffstat (limited to 'kernel')
42 files changed, 1568 insertions, 2147 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 5e3f3b75563..14f4d45e0ae 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -31,7 +31,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_UID16) += uid16.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_KALLSYMS) += kallsyms.o -obj-$(CONFIG_STACK_UNWIND) += unwind.o obj-$(CONFIG_PM) += power/ obj-$(CONFIG_BSD_PROCESS_ACCT) += acct.o obj-$(CONFIG_KEXEC) += kexec.o diff --git a/kernel/acct.c b/kernel/acct.c index dc12db8600e..70d0d88e555 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -118,7 +118,7 @@ static int check_free_space(struct file *file) spin_unlock(&acct_globals.lock); /* May block */ - if (vfs_statfs(file->f_dentry, &sbuf)) + if (vfs_statfs(file->f_path.dentry, &sbuf)) return res; suspend = sbuf.f_blocks * SUSPEND; resume = sbuf.f_blocks * RESUME; @@ -194,7 +194,7 @@ static void acct_file_reopen(struct file *file) add_timer(&acct_globals.timer); } if (old_acct) { - mnt_unpin(old_acct->f_vfsmnt); + mnt_unpin(old_acct->f_path.mnt); spin_unlock(&acct_globals.lock); do_acct_process(old_acct); filp_close(old_acct, NULL); @@ -212,7 +212,7 @@ static int acct_on(char *name) if (IS_ERR(file)) return PTR_ERR(file); - if (!S_ISREG(file->f_dentry->d_inode->i_mode)) { + if (!S_ISREG(file->f_path.dentry->d_inode->i_mode)) { filp_close(file, NULL); return -EACCES; } @@ -229,11 +229,11 @@ static int acct_on(char *name) } spin_lock(&acct_globals.lock); - mnt_pin(file->f_vfsmnt); + mnt_pin(file->f_path.mnt); acct_file_reopen(file); spin_unlock(&acct_globals.lock); - mntput(file->f_vfsmnt); /* it's pinned, now give up active reference */ + mntput(file->f_path.mnt); /* it's pinned, now give up active reference */ return 0; } @@ -283,7 +283,7 @@ asmlinkage long sys_acct(const char __user *name) void acct_auto_close_mnt(struct vfsmount *m) { spin_lock(&acct_globals.lock); - if (acct_globals.file && acct_globals.file->f_vfsmnt == m) + if (acct_globals.file && acct_globals.file->f_path.mnt == m) acct_file_reopen(NULL); spin_unlock(&acct_globals.lock); } @@ -299,7 +299,7 @@ void acct_auto_close(struct super_block *sb) { spin_lock(&acct_globals.lock); if (acct_globals.file && - acct_globals.file->f_vfsmnt->mnt_sb == sb) { + acct_globals.file->f_path.mnt->mnt_sb == sb) { acct_file_reopen(NULL); } spin_unlock(&acct_globals.lock); @@ -428,6 +428,7 @@ static void do_acct_process(struct file *file) u64 elapsed; u64 run_time; struct timespec uptime; + struct tty_struct *tty; /* * First check to see if there is enough free_space to continue @@ -484,16 +485,9 @@ static void do_acct_process(struct file *file) ac.ac_ppid = current->parent->tgid; #endif - mutex_lock(&tty_mutex); - /* FIXME: Whoever is responsible for current->signal locking needs - to use the same locking all over the kernel and document it */ - read_lock(&tasklist_lock); - ac.ac_tty = current->signal->tty ? - old_encode_dev(tty_devnum(current->signal->tty)) : 0; - read_unlock(&tasklist_lock); - mutex_unlock(&tty_mutex); - spin_lock_irq(¤t->sighand->siglock); + tty = current->signal->tty; + ac.ac_tty = tty ? old_encode_dev(tty_devnum(tty)) : 0; ac.ac_utime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_utime))); ac.ac_stime = encode_comp_t(jiffies_to_AHZ(cputime_to_jiffies(pacct->ac_stime))); ac.ac_flag = pacct->ac_flag; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 2e896f8ae29..9c8c23227c7 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -800,8 +800,8 @@ static inline int audit_dupe_selinux_field(struct audit_field *df, /* our own copy of se_str */ se_str = kstrdup(sf->se_str, GFP_KERNEL); - if (unlikely(IS_ERR(se_str))) - return -ENOMEM; + if (unlikely(!se_str)) + return -ENOMEM; df->se_str = se_str; /* our own (refreshed) copy of se_rule */ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 40722e26de9..298897559ca 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -781,8 +781,8 @@ static void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) { audit_log_d_path(ab, "exe=", - vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); + vma->vm_file->f_path.dentry, + vma->vm_file->f_path.mnt); break; } vma = vma->vm_next; @@ -826,10 +826,12 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->return_code); mutex_lock(&tty_mutex); + read_lock(&tasklist_lock); if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) tty = tsk->signal->tty->name; else tty = "(none)"; + read_unlock(&tasklist_lock); audit_log_format(ab, " a0=%lx a1=%lx a2=%lx a3=%lx items=%d" " ppid=%d pid=%d auid=%u uid=%u gid=%u" diff --git a/kernel/cpu.c b/kernel/cpu.c index 9124669f458..7406fe6966f 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -204,7 +204,7 @@ int cpu_down(unsigned int cpu) #endif /*CONFIG_HOTPLUG_CPU*/ /* Requires cpu_add_remove_lock to be held */ -static int __devinit _cpu_up(unsigned int cpu) +static int __cpuinit _cpu_up(unsigned int cpu) { int ret; void *hcpu = (void *)(long)cpu; @@ -239,7 +239,7 @@ out_notify: return ret; } -int __devinit cpu_up(unsigned int cpu) +int __cpuinit cpu_up(unsigned int cpu) { int err = 0; @@ -258,7 +258,7 @@ static cpumask_t frozen_cpus; int disable_nonboot_cpus(void) { - int cpu, first_cpu, error; + int cpu, first_cpu, error = 0; mutex_lock(&cpu_add_remove_lock); first_cpu = first_cpu(cpu_present_map); @@ -294,7 +294,7 @@ int disable_nonboot_cpus(void) /* Make sure the CPUs won't be enabled by someone else */ cpu_hotplug_disabled = 1; } else { - printk(KERN_ERR "Non-boot CPUs are not disabled"); + printk(KERN_ERR "Non-boot CPUs are not disabled\n"); } out: mutex_unlock(&cpu_add_remove_lock); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 0a6b4d89f9a..6b05dc69c95 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -413,8 +413,8 @@ static struct file_system_type cpuset_fs_type = { * * * When reading/writing to a file: - * - the cpuset to use in file->f_dentry->d_parent->d_fsdata - * - the 'cftype' of the file is file->f_dentry->d_fsdata + * - the cpuset to use in file->f_path.dentry->d_parent->d_fsdata + * - the 'cftype' of the file is file->f_path.dentry->d_fsdata */ struct cftype { @@ -1284,8 +1284,8 @@ static ssize_t cpuset_common_file_write(struct file *file, const char __user *userbuf, size_t nbytes, loff_t *unused_ppos) { - struct cpuset *cs = __d_cs(file->f_dentry->d_parent); - struct cftype *cft = __d_cft(file->f_dentry); + struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); + struct cftype *cft = __d_cft(file->f_path.dentry); cpuset_filetype_t type = cft->private; char *buffer; char *pathbuf = NULL; @@ -1367,7 +1367,7 @@ static ssize_t cpuset_file_write(struct file *file, const char __user *buf, size_t nbytes, loff_t *ppos) { ssize_t retval = 0; - struct cftype *cft = __d_cft(file->f_dentry); + struct cftype *cft = __d_cft(file->f_path.dentry); if (!cft) return -ENODEV; @@ -1417,8 +1417,8 @@ static int cpuset_sprintf_memlist(char *page, struct cpuset *cs) static ssize_t cpuset_common_file_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos) { - struct cftype *cft = __d_cft(file->f_dentry); - struct cpuset *cs = __d_cs(file->f_dentry->d_parent); + struct cftype *cft = __d_cft(file->f_path.dentry); + struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); cpuset_filetype_t type = cft->private; char *page; ssize_t retval = 0; @@ -1476,7 +1476,7 @@ static ssize_t cpuset_file_read(struct file *file, char __user *buf, size_t nbyt loff_t *ppos) { ssize_t retval = 0; - struct cftype *cft = __d_cft(file->f_dentry); + struct cftype *cft = __d_cft(file->f_path.dentry); if (!cft) return -ENODEV; @@ -1498,7 +1498,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file) if (err) return err; - cft = __d_cft(file->f_dentry); + cft = __d_cft(file->f_path.dentry); if (!cft) return -ENODEV; if (cft->open) @@ -1511,7 +1511,7 @@ static int cpuset_file_open(struct inode *inode, struct file *file) static int cpuset_file_release(struct inode *inode, struct file *file) { - struct cftype *cft = __d_cft(file->f_dentry); + struct cftype *cft = __d_cft(file->f_path.dentry); if (cft->release) return cft->release(inode, file); return 0; @@ -1700,7 +1700,7 @@ static int pid_array_to_buf(char *buf, int sz, pid_t *a, int npids) */ static int cpuset_tasks_open(struct inode *unused, struct file *file) { - struct cpuset *cs = __d_cs(file->f_dentry->d_parent); + struct cpuset *cs = __d_cs(file->f_path.dentry->d_parent); struct ctr_struct *ctr; pid_t *pidarray; int npids; @@ -2342,32 +2342,48 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) } /** - * cpuset_zone_allowed - Can we allocate memory on zone z's memory node? + * cpuset_zone_allowed_softwall - Can we allocate on zone z's memory node? * @z: is this zone on an allowed node? - * @gfp_mask: memory allocation flags (we use __GFP_HARDWALL) + * @gfp_mask: memory allocation flags * - * If we're in interrupt, yes, we can always allocate. If zone + * If we're in interrupt, yes, we can always allocate. If + * __GFP_THISNODE is set, yes, we can always allocate. If zone * z's node is in our tasks mems_allowed, yes. If it's not a * __GFP_HARDWALL request and this zone's nodes is in the nearest * mem_exclusive cpuset ancestor to this tasks cpuset, yes. * Otherwise, no. * + * If __GFP_HARDWALL is set, cpuset_zone_allowed_softwall() + * reduces to cpuset_zone_allowed_hardwall(). Otherwise, + * cpuset_zone_allowed_softwall() might sleep, and might allow a zone + * from an enclosing cpuset. + * + * cpuset_zone_allowed_hardwall() only handles the simpler case of + * hardwall cpusets, and never sleeps. + * + * The __GFP_THISNODE placement logic is really handled elsewhere, + * by forcibly using a zonelist starting at a specified node, and by + * (in get_page_from_freelist()) refusing to consider the zones for + * any node on the zonelist except the first. By the time any such + * calls get to this routine, we should just shut up and say 'yes'. + * * GFP_USER allocations are marked with the __GFP_HARDWALL bit, * and do not allow allocations outside the current tasks cpuset. * GFP_KERNEL allocations are not so marked, so can escape to the - * nearest mem_exclusive ancestor cpuset. + * nearest enclosing mem_exclusive ancestor cpuset. * - * Scanning up parent cpusets requires callback_mutex. The __alloc_pages() - * routine only calls here with __GFP_HARDWALL bit _not_ set if - * it's a GFP_KERNEL allocation, and all nodes in the current tasks - * mems_allowed came up empty on the first pass over the zonelist. - * So only GFP_KERNEL allocations, if all nodes in the cpuset are - * short of memory, might require taking the callback_mutex mutex. + * Scanning up parent cpusets requires callback_mutex. The + * __alloc_pages() routine only calls here with __GFP_HARDWALL bit + * _not_ set if it's a GFP_KERNEL allocation, and all nodes in the + * current tasks mems_allowed came up empty on the first pass over + * the zonelist. So only GFP_KERNEL allocations, if all nodes in the + * cpuset are short of memory, might require taking the callback_mutex + * mutex. * * The first call here from mm/page_alloc:get_page_from_freelist() - * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, so - * no allocation on a node outside the cpuset is allowed (unless in - * interrupt, of course). + * has __GFP_HARDWALL set in gfp_mask, enforcing hardwall cpusets, + * so no allocation on a node outside the cpuset is allowed (unless + * in interrupt, of course). * * The second pass through get_page_from_freelist() doesn't even call * here for GFP_ATOMIC calls. For those calls, the __alloc_pages() @@ -2380,12 +2396,12 @@ static const struct cpuset *nearest_exclusive_ancestor(const struct cpuset *cs) * GFP_USER - only nodes in current tasks mems allowed ok. * * Rule: - * Don't call cpuset_zone_allowed() if you can't sleep, unless you + * Don't call cpuset_zone_allowed_softwall if you can't sleep, unless you * pass in the __GFP_HARDWALL flag set in gfp_flag, which disables * the code that might scan up ancestor cpusets and sleep. - **/ + */ -int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) +int __cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask) { int node; /* node that zone z is on */ const struct cpuset *cs; /* current cpuset ancestors */ @@ -2415,6 +2431,40 @@ int __cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask) return allowed; } +/* + * cpuset_zone_allowed_hardwall - Can we allocate on zone z's memory node? + * @z: is this zone on an allowed node? + * @gfp_mask: memory allocation flags + * + * If we're in interrupt, yes, we can always allocate. + * If __GFP_THISNODE is set, yes, we can always allocate. If zone + * z's node is in our tasks mems_allowed, yes. Otherwise, no. + * + * The __GFP_THISNODE placement logic is really handled elsewhere, + * by forcibly using a zonelist starting at a specified node, and by + * (in get_page_from_freelist()) refusing to consider the zones for + * any node on the zonelist except the first. By the time any such + * calls get to this routine, we should just shut up and say 'yes'. + * + * Unlike the cpuset_zone_allowed_softwall() variant, above, + * this variant requires that the zone be in the current tasks + * mems_allowed or that we're in interrupt. It does not scan up the + * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset. + * It never sleeps. + */ + +int __cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask) +{ + int node; /* node that zone z is on */ + + if (in_interrupt() || (gfp_mask & __GFP_THISNODE)) + return 1; + node = zone_to_nid(z); + if (node_isset(node, current->mems_allowed)) + return 1; + return 0; +} + /** * cpuset_lock - lock out any changes to cpuset structures * @@ -2606,7 +2656,7 @@ static int cpuset_open(struct inode *inode, struct file *file) return single_open(file, proc_cpuset_show, pid); } -const struct file_operations proc_cpuset_operations = { +struct file_operations proc_cpuset_operations = { .open = cpuset_open, .read = seq_read, .llseek = seq_lseek, diff --git a/kernel/exit.c b/kernel/exit.c index 4e3f919edc4..fec12eb1247 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -13,7 +13,7 @@ #include <linux/completion.h> #include <linux/personality.h> #include <linux/tty.h> -#include <linux/namespace.h> +#include <linux/mnt_namespace.h> #include <linux/key.h> #include <linux/security.h> #include <linux/cpu.h> @@ -22,6 +22,7 @@ #include <linux/file.h> #include <linux/binfmts.h> #include <linux/nsproxy.h> +#include <linux/pid_namespace.h> #include <linux/ptrace.h> #include <linux/profile.h> #include <linux/mount.h> @@ -48,7 +49,6 @@ #include <asm/mmu_context.h> extern void sem_exit (void); -extern struct task_struct *child_reaper; static void exit_mm(struct task_struct * tsk); @@ -189,21 +189,18 @@ repeat: int session_of_pgrp(int pgrp) { struct task_struct *p; - int sid = -1; + int sid = 0; read_lock(&tasklist_lock); - do_each_task_pid(pgrp, PIDTYPE_PGID, p) { - if (p->signal->session > 0) { - sid = p->signal->session; - goto out; - } - } while_each_task_pid(pgrp, PIDTYPE_PGID, p); - p = find_task_by_pid(pgrp); - if (p) - sid = p->signal->session; -out: + + p = find_task_by_pid_type(PIDTYPE_PGID, pgrp); + if (p == NULL) + p = find_task_by_pid(pgrp); + if (p != NULL) + sid = process_session(p); + read_unlock(&tasklist_lock); - + return sid; } @@ -225,8 +222,8 @@ static int will_become_orphaned_pgrp(int pgrp, struct task_struct *ignored_task) || p->exit_state || is_init(p->real_parent)) continue; - if (process_group(p->real_parent) != pgrp - && p->real_parent->signal->session == p->signal->session) { + if (process_group(p->real_parent) != pgrp && + process_session(p->real_parent) == process_session(p)) { ret = 0; break; } @@ -260,7 +257,8 @@ static int has_stopped_jobs(int pgrp) } /** - * reparent_to_init - Reparent the calling kernel thread to the init task. + * reparent_to_init - Reparent the calling kernel thread to the init task + * of the pid space that the thread belongs to. * * If a kernel thread is launched as a result of a system call, or if * it ever exits, it should generally reparent itself to init so that @@ -278,8 +276,8 @@ static void reparent_to_init(void) ptrace_unlink(current); /* Reparent to init */ remove_parent(current); - current->parent = child_reaper; - current->real_parent = child_reaper; + current->parent = child_reaper(current); + current->real_parent = child_reaper(current); add_parent(current); /* Set the exit signal to SIGCHLD so we signal init on exit */ @@ -302,9 +300,9 @@ void __set_special_pids(pid_t session, pid_t pgrp) { struct task_struct *curr = current->group_leader; - if (curr->signal->session != session) { + if (process_session(curr) != session) { detach_pid(curr, PIDTYPE_SID); - curr->signal->session = session; + set_signal_session(curr->signal, session); attach_pid(curr, PIDTYPE_SID, session); } if (process_group(curr) != pgrp) { @@ -314,7 +312,7 @@ void __set_special_pids(pid_t session, pid_t pgrp) } } -void set_special_pids(pid_t session, pid_t pgrp) +static void set_special_pids(pid_t session, pid_t pgrp) { write_lock_irq(&tasklist_lock); __set_special_pids(session, pgrp); @@ -384,9 +382,7 @@ void daemonize(const char *name, ...) exit_mm(current); set_special_pids(1, 1); - mutex_lock(&tty_mutex); - current->signal->tty = NULL; - mutex_unlock(&tty_mutex); + proc_clear_tty(current); /* Block and flush all signals */ sigfillset(&blocked); @@ -429,7 +425,7 @@ static void close_files(struct files_struct * files) for (;;) { unsigned long set; i = j * __NFDBITS; - if (i >= fdt->max_fdset || i >= fdt->max_fds) + if (i >= fdt->max_fds) break; set = fdt->open_fds->fds_bits[j++]; while (set) { @@ -470,9 +466,7 @@ void fastcall put_files_struct(struct files_struct *files) * you can free files immediately. */ fdt = files_fdtable(files); - if (fdt == &files->fdtab) - fdt->free_files = files; - else + if (fdt != &files->fdtab) kmem_cache_free(files_cachep, files); free_fdtable(fdt); } @@ -603,10 +597,6 @@ choose_new_parent(struct task_struct *p, struct task_struct *reaper) static void reparent_thread(struct task_struct *p, struct task_struct *father, int traced) { - /* We don't want people slaying init. */ - if (p->exit_signal != -1) - p->exit_signal = SIGCHLD; - if (p->pdeath_signal) /* We already hold the tasklist_lock here. */ group_send_sig_info(p->pdeath_signal, SEND_SIG_NOINFO, p); @@ -626,13 +616,7 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) p->parent = p->real_parent; add_parent(p); - /* If we'd notified the old parent about this child's death, - * also notify the new parent. - */ - if (p->exit_state == EXIT_ZOMBIE && p->exit_signal != -1 && - thread_group_empty(p)) - do_notify_parent(p, p->exit_signal); - else if (p->state == TASK_TRACED) { + if (p->state == TASK_TRACED) { /* * If it was at a trace stop, turn it into * a normal stop since it's no longer being @@ -642,6 +626,23 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) } } + /* If this is a threaded reparent there is no need to + * notify anyone anything has happened. + */ + if (p->real_parent->group_leader == father->group_leader) + return; + + /* We don't want people slaying init. */ + if (p->exit_signal != -1) + p->exit_signal = SIGCHLD; + + /* If we'd notified the old parent about this child's death, + * also notify the new parent. + */ + if (!traced && p->exit_state == EXIT_ZOMBIE && + p->exit_signal != -1 && thread_group_empty(p)) + do_notify_parent(p, p->exit_signal); + /* * process group orphan check * Case ii: Our child is in a different pgrp @@ -649,10 +650,11 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) * outside, so the child pgrp is now orphaned. */ if ((process_group(p) != process_group(father)) && - (p->signal->session == father->signal->session)) { + (process_session(p) == process_session(father))) { int pgrp = process_group(p); - if (will_become_orphaned_pgrp(pgrp, NULL) && has_stopped_jobs(pgrp)) { + if (will_become_orphaned_pgrp(pgrp, NULL) && + has_stopped_jobs(pgrp)) { __kill_pg_info(SIGHUP, SEND_SIG_PRIV, pgrp); __kill_pg_info(SIGCONT, SEND_SIG_PRIV, pgrp); } @@ -663,7 +665,8 @@ reparent_thread(struct task_struct *p, struct task_struct *father, int traced) * When we die, we re-parent all our children. * Try to give them to another thread in our thread * group, and if no such member exists, give it to - * the global child reaper process (ie "init") + * the child reaper process (ie "init") in our pid + * space. */ static void forget_original_parent(struct task_struct *father, struct list_head *to_release) @@ -674,7 +677,7 @@ forget_original_parent(struct task_struct *father, struct list_head *to_release) do { reaper = next_thread(reaper); if (reaper == father) { - reaper = child_reaper; + reaper = child_reaper(father); break; } } while (reaper->exit_state); @@ -786,7 +789,7 @@ static void exit_notify(struct task_struct *tsk) t = tsk->real_parent; if ((process_group(t) != process_group(tsk)) && - (t->signal->session == tsk->signal->session) && + (process_session(t) == process_session(tsk)) && will_become_orphaned_pgrp(process_group(tsk), tsk) && has_stopped_jobs(process_group(tsk))) { __kill_pg_info(SIGHUP, SEND_SIG_PRIV, process_group(tsk)); @@ -860,8 +863,13 @@ fastcall NORET_TYPE void do_exit(long code) panic("Aiee, killing interrupt handler!"); if (unlikely(!tsk->pid)) panic("Attempted to kill the idle task!"); - if (unlikely(tsk == child_reaper)) - panic("Attempted to kill init!"); + if (unlikely(tsk == child_reaper(tsk))) { + if (tsk->nsproxy->pid_ns != &init_pid_ns) + tsk->nsproxy->pid_ns->child_reaper = init_pid_ns.child_reaper; + else + panic("Attempted to kill init!"); + } + if (unlikely(current->ptrace & PT_TRACE_EXIT)) { current->ptrace_message = code; @@ -930,8 +938,8 @@ fastcall NORET_TYPE void do_exit(long code) tsk->exit_code = code; proc_exit_connector(tsk); - exit_notify(tsk); exit_task_namespaces(tsk); + exit_notify(tsk); #ifdef CONFIG_NUMA mpol_free(tsk->mempolicy); tsk->mempolicy = NULL; diff --git a/kernel/fork.c b/kernel/fork.c index 7f2e31ba33a..d57118da73f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -18,7 +18,7 @@ #include <linux/module.h> #include <linux/vmalloc.h> #include <linux/completion.h> -#include <linux/namespace.h> +#include <linux/mnt_namespace.h> #include <linux/personality.h> #include <linux/mempolicy.h> #include <linux/sem.h> @@ -36,6 +36,7 @@ #include <linux/syscalls.h> #include <linux/jiffies.h> #include <linux/futex.h> +#include <linux/task_io_accounting_ops.h> #include <linux/rcupdate.h> #include <linux/ptrace.h> #include <linux/mount.h> @@ -202,7 +203,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct mempolicy *pol; down_write(&oldmm->mmap_sem); - flush_cache_mm(oldmm); + flush_cache_dup_mm(oldmm); /* * Not linked in yet - no deadlock potential: */ @@ -252,7 +253,7 @@ static inline int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) anon_vma_link(tmp); file = tmp->vm_file; if (file) { - struct inode *inode = file->f_dentry->d_inode; + struct inode *inode = file->f_path.dentry->d_inode; get_file(file); if (tmp->vm_flags & VM_DENYWRITE) atomic_dec(&inode->i_writecount); @@ -613,7 +614,7 @@ static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk) static int count_open_files(struct fdtable *fdt) { - int size = fdt->max_fdset; + int size = fdt->max_fds; int i; /* Find the last open fd */ @@ -640,12 +641,10 @@ static struct files_struct *alloc_files(void) newf->next_fd = 0; fdt = &newf->fdtab; fdt->max_fds = NR_OPEN_DEFAULT; - fdt->max_fdset = EMBEDDED_FD_SET_SIZE; fdt->close_on_exec = (fd_set *)&newf->close_on_exec_init; fdt->open_fds = (fd_set *)&newf->open_fds_init; fdt->fd = &newf->fd_array[0]; INIT_RCU_HEAD(&fdt->rcu); - fdt->free_files = NULL; fdt->next = NULL; rcu_assign_pointer(newf->fdt, fdt); out: @@ -661,7 +660,7 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) { struct files_struct *newf; struct file **old_fds, **new_fds; - int open_files, size, i, expand; + int open_files, size, i; struct fdtable *old_fdt, *new_fdt; *errorp = -ENOMEM; @@ -672,25 +671,14 @@ static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) spin_lock(&oldf->file_lock); old_fdt = files_fdtable(oldf); |