diff options
Diffstat (limited to 'kernel/fork.c')
| -rw-r--r-- | kernel/fork.c | 150 |
1 files changed, 77 insertions, 73 deletions
diff --git a/kernel/fork.c b/kernel/fork.c index 403d2bb8a96..6a13c46cd87 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -28,6 +28,8 @@ #include <linux/mman.h> #include <linux/mmu_notifier.h> #include <linux/fs.h> +#include <linux/mm.h> +#include <linux/vmacache.h> #include <linux/nsproxy.h> #include <linux/capability.h> #include <linux/cpu.h> @@ -71,6 +73,7 @@ #include <linux/signalfd.h> #include <linux/uprobes.h> #include <linux/aio.h> +#include <linux/compiler.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -147,15 +150,15 @@ void __weak arch_release_thread_info(struct thread_info *ti) static struct thread_info *alloc_thread_info_node(struct task_struct *tsk, int node) { - struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED, - THREAD_SIZE_ORDER); + struct page *page = alloc_kmem_pages_node(node, THREADINFO_GFP, + THREAD_SIZE_ORDER); return page ? page_address(page) : NULL; } static inline void free_thread_info(struct thread_info *ti) { - free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); + free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER); } # else static struct kmem_cache *thread_info_cache; @@ -237,6 +240,7 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(atomic_read(&tsk->usage)); WARN_ON(tsk == current); + task_numa_free(tsk); security_task_free(tsk); exit_creds(tsk); delayacct_tsk_free(tsk); @@ -283,7 +287,7 @@ void __init fork_init(unsigned long mempages) init_task.signal->rlim[RLIMIT_NPROC]; } -int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, +int __weak arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { *dst = *src; @@ -351,7 +355,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) struct rb_node **rb_link, *rb_parent; int retval; unsigned long charge; - struct mempolicy *pol; uprobe_start_dup_mmap(); down_write(&oldmm->mmap_sem); @@ -364,7 +367,7 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) mm->locked_vm = 0; mm->mmap = NULL; - mm->mmap_cache = NULL; + mm->vmacache_seqnum = 0; mm->map_count = 0; cpumask_clear(mm_cpumask(mm)); mm->mm_rb = RB_ROOT; @@ -400,11 +403,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) goto fail_nomem; *tmp = *mpnt; INIT_LIST_HEAD(&tmp->anon_vma_chain); - pol = mpol_dup(vma_policy(mpnt)); - retval = PTR_ERR(pol); - if (IS_ERR(pol)) + retval = vma_dup_policy(mpnt, tmp); + if (retval) goto fail_nomem_policy; - vma_set_policy(tmp, pol); tmp->vm_mm = mm; if (anon_vma_fork(tmp, mpnt)) goto fail_nomem_anon_vma_fork; @@ -472,7 +473,7 @@ out: uprobe_end_dup_mmap(); return retval; fail_nomem_anon_vma_fork: - mpol_put(pol); + mpol_put(vma_policy(tmp)); fail_nomem_policy: kmem_cache_free(vm_area_cachep, tmp); fail_nomem: @@ -522,7 +523,7 @@ static void mm_init_aio(struct mm_struct *mm) { #ifdef CONFIG_AIO spin_lock_init(&mm->ioctx_lock); - INIT_HLIST_HEAD(&mm->ioctx_list); + mm->ioctx_table = NULL; #endif } @@ -532,17 +533,23 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) atomic_set(&mm->mm_count, 1); init_rwsem(&mm->mmap_sem); INIT_LIST_HEAD(&mm->mmlist); - mm->flags = (current->mm) ? - (current->mm->flags & MMF_INIT_MASK) : default_dump_filter; mm->core_state = NULL; - mm->nr_ptes = 0; + atomic_long_set(&mm->nr_ptes, 0); memset(&mm->rss_stat, 0, sizeof(mm->rss_stat)); spin_lock_init(&mm->page_table_lock); mm_init_aio(mm); mm_init_owner(mm, p); + clear_tlb_flush_pending(mm); - if (likely(!mm_alloc_pgd(mm))) { + if (current->mm) { + mm->flags = current->mm->flags & MMF_INIT_MASK; + mm->def_flags = current->mm->def_flags & VM_INIT_DEF_MASK; + } else { + mm->flags = default_dump_filter; mm->def_flags = 0; + } + + if (likely(!mm_alloc_pgd(mm))) { mmu_notifier_mm_init(mm); return mm; } @@ -563,7 +570,7 @@ static void check_mm(struct mm_struct *mm) "mm:%p idx:%d val:%ld\n", mm, i, x); } -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS VM_BUG_ON(mm->pmd_huge_pte); #endif } @@ -802,14 +809,11 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) * Allocate a new mm structure and copy contents from the * mm structure of the passed in task structure. */ -struct mm_struct *dup_mm(struct task_struct *tsk) +static struct mm_struct *dup_mm(struct task_struct *tsk) { struct mm_struct *mm, *oldmm = current->mm; int err; - if (!oldmm) - return NULL; - mm = allocate_mm(); if (!mm) goto fail_nomem; @@ -817,12 +821,9 @@ struct mm_struct *dup_mm(struct task_struct *tsk) memcpy(mm, oldmm, sizeof(*mm)); mm_init_cpumask(mm); -#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif -#ifdef CONFIG_NUMA_BALANCING - mm->first_nid = NUMA_PTE_SCAN_INIT; -#endif if (!mm_init(mm, tsk)) goto fail_nomem; @@ -884,6 +885,9 @@ static int copy_mm(unsigned long clone_flags, struct task_struct *tsk) if (!oldmm) return 0; + /* initialize the new vmacache entries */ + vmacache_flush(tsk); + if (clone_flags & CLONE_VM) { atomic_inc(&oldmm->mm_users); mm = oldmm; @@ -1040,6 +1044,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) sig->nr_threads = 1; atomic_set(&sig->live, 1); atomic_set(&sig->sigcnt, 1); + + /* list_add(thread_node, thread_head) without INIT_LIST_HEAD() */ + sig->thread_head = (struct list_head)LIST_HEAD_INIT(tsk->thread_node); + tsk->thread_node = (struct list_head)LIST_HEAD_INIT(sig->thread_head); + init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); @@ -1072,15 +1081,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) return 0; } -static void copy_flags(unsigned long clone_flags, struct task_struct *p) -{ - unsigned long new_flags = p->flags; - - new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); - new_flags |= PF_FORKNOEXEC; - p->flags = new_flags; -} - SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr) { current->clear_child_tid = tidptr; @@ -1092,17 +1092,19 @@ static void rt_mutex_init_task(struct task_struct *p) { raw_spin_lock_init(&p->pi_lock); #ifdef CONFIG_RT_MUTEXES - plist_head_init(&p->pi_waiters); + p->pi_waiters = RB_ROOT; + p->pi_waiters_leftmost = NULL; p->pi_blocked_on = NULL; + p->pi_top_task = NULL; #endif } -#ifdef CONFIG_MM_OWNER +#ifdef CONFIG_MEMCG void mm_init_owner(struct mm_struct *mm, struct task_struct *p) { mm->owner = p; } -#endif /* CONFIG_MM_OWNER */ +#endif /* CONFIG_MEMCG */ /* * Initialize POSIX timer handling for a single task. @@ -1173,12 +1175,16 @@ static struct task_struct *copy_process(unsigned long clone_flags, return ERR_PTR(-EINVAL); /* - * If the new process will be in a different pid namespace - * don't allow the creation of threads. + * If the new process will be in a different pid or user namespace + * do not allow it to share a thread group or signal handlers or + * parent with the forking task. */ - if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) && - (task_active_pid_ns(current) != current->nsproxy->pid_ns)) - return ERR_PTR(-EINVAL); + if (clone_flags & CLONE_SIGHAND) { + if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) || + (task_active_pid_ns(current) != + current->nsproxy->pid_ns_for_children)) + return ERR_PTR(-EINVAL); + } retval = security_task_create(clone_flags); if (retval) @@ -1223,9 +1229,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (!try_module_get(task_thread_info(p)->exec_domain->module)) goto bad_fork_cleanup_count; - p->did_exec = 0; delayacct_tsk_init(p); /* Must remain after dup_task_struct() */ - copy_flags(clone_flags, p); + p->flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER); + p->flags |= PF_FORKNOEXEC; INIT_LIST_HEAD(&p->children); INIT_LIST_HEAD(&p->sibling); rcu_copy_process(p); @@ -1269,9 +1275,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (IS_ERR(p->mempolicy)) { retval = PTR_ERR(p->mempolicy); p->mempolicy = NULL; - goto bad_fork_cleanup_cgroup; + goto bad_fork_cleanup_threadgroup_lock; } - mpol_fix_fork_child_flag(p); #endif #ifdef CONFIG_CPUSETS p->cpuset_mem_spread_rotor = NUMA_NO_NODE; @@ -1312,7 +1317,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, #endif /* Perform scheduler related setup. Assign this task to a CPU. */ - sched_fork(p); + retval = sched_fork(clone_flags, p); + if (retval) + goto bad_fork_cleanup_policy; retval = perf_event_init_task(p); if (retval) @@ -1351,7 +1358,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (pid != &init_struct_pid) { retval = -ENOMEM; - pid = alloc_pid(p->nsproxy->pid_ns); + pid = alloc_pid(p->nsproxy->pid_ns_for_children); if (!pid) goto bad_fork_cleanup_io; } @@ -1372,7 +1379,6 @@ static struct task_struct *copy_process(unsigned long clone_flags, INIT_LIST_HEAD(&p->pi_state_list); p->pi_state_cache = NULL; #endif - uprobe_copy_process(p); /* * sigaltstack should be cleared when sharing the same VM */ @@ -1405,13 +1411,11 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->tgid = p->pid; } - p->pdeath_signal = 0; - p->exit_state = 0; - p->nr_dirtied = 0; p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10); p->dirty_paused_when = 0; + p->pdeath_signal = 0; INIT_LIST_HEAD(&p->thread_group); p->task_works = NULL; @@ -1474,6 +1478,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, atomic_inc(¤t->signal->sigcnt); list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group); + list_add_tail_rcu(&p->thread_node, + &p->signal->thread_head); } attach_pid(p, PIDTYPE_PID); nr_threads++; @@ -1481,7 +1487,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, total_forks++; spin_unlock(¤t->sighand->siglock); + syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); + proc_fork_connector(p); cgroup_post_fork(p); if (clone_flags & CLONE_THREAD) @@ -1489,6 +1497,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, perf_event_fork(p); trace_task_newtask(p, clone_flags); + uprobe_copy_process(p, clone_flags); return p; @@ -1520,11 +1529,10 @@ bad_fork_cleanup_policy: perf_event_free_task(p); #ifdef CONFIG_NUMA mpol_put(p->mempolicy); -bad_fork_cleanup_cgroup: +bad_fork_cleanup_threadgroup_lock: #endif if (clone_flags & CLONE_THREAD) threadgroup_change_end(current); - cgroup_exit(p, 0); delayacct_tsk_free(p); module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: @@ -1575,15 +1583,6 @@ long do_fork(unsigned long clone_flags, long nr; /* - * Do some preliminary argument and permissions checking before we - * actually start allocating stuff - */ - if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) { - if (clone_flags & (CLONE_THREAD|CLONE_PARENT)) - return -EINVAL; - } - - /* * Determine whether and which event to report to ptracer. When * called from kernel_thread or CLONE_UNTRACED is explicitly * requested, no event is reported; otherwise, report if the event @@ -1609,10 +1608,12 @@ long do_fork(unsigned long clone_flags, */ if (!IS_ERR(p)) { struct completion vfork; + struct pid *pid; trace_sched_process_fork(current, p); - nr = task_pid_vnr(p); + pid = get_task_pid(p, PIDTYPE_PID); + nr = pid_vnr(pid); if (clone_flags & CLONE_PARENT_SETTID) put_user(nr, parent_tidptr); @@ -1627,12 +1628,14 @@ long do_fork(unsigned long clone_flags, /* forking complete and child started to run, tell ptracer */ if (unlikely(trace)) - ptrace_event(trace, nr); + ptrace_event_pid(trace, pid); if (clone_flags & CLONE_VFORK) { if (!wait_for_vfork_done(p, &vfork)) - ptrace_event(PTRACE_EVENT_VFORK_DONE, nr); + ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid); } + + put_pid(pid); } else { nr = PTR_ERR(p); } @@ -1655,7 +1658,7 @@ SYSCALL_DEFINE0(fork) return do_fork(SIGCHLD, 0, 0, NULL, NULL); #else /* can not support in nommu mode */ - return(-EINVAL); + return -EINVAL; #endif } #endif @@ -1663,7 +1666,7 @@ SYSCALL_DEFINE0(fork) #ifdef __ARCH_WANT_SYS_VFORK SYSCALL_DEFINE0(vfork) { - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, + return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 0, NULL, NULL); } #endif @@ -1679,6 +1682,12 @@ SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags, int __user *, parent_tidptr, int __user *, child_tidptr, int, tls_val) +#elif defined(CONFIG_CLONE_BACKWARDS3) +SYSCALL_DEFINE6(clone, unsigned long, clone_flags, unsigned long, newsp, + int, stack_size, + int __user *, parent_tidptr, + int __user *, child_tidptr, + int, tls_val) #else SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp, int __user *, parent_tidptr, @@ -1818,11 +1827,6 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags) if (unshare_flags & CLONE_NEWUSER) unshare_flags |= CLONE_THREAD | CLONE_FS; /* - * If unsharing a pid namespace must also unshare the thread. - */ - if (unshare_flags & CLONE_NEWPID) - unshare_flags |= CLONE_THREAD; - /* * If unsharing a thread from a thread group, must also unshare vm. */ if (unshare_flags & CLONE_THREAD) |
