aboutsummaryrefslogtreecommitdiff
path: root/kernel/fork.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/fork.c')
-rw-r--r--kernel/fork.c136
1 files changed, 78 insertions, 58 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 4d57d9e3a6e..25e429152dd 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -65,6 +65,8 @@
#include <linux/perf_event.h>
#include <linux/posix-timers.h>
#include <linux/user-return-notifier.h>
+#include <linux/oom.h>
+#include <linux/khugepaged.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -165,6 +167,19 @@ void free_task(struct task_struct *tsk)
}
EXPORT_SYMBOL(free_task);
+static inline void free_signal_struct(struct signal_struct *sig)
+{
+ taskstats_tgid_free(sig);
+ sched_autogroup_exit(sig);
+ kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void put_signal_struct(struct signal_struct *sig)
+{
+ if (atomic_dec_and_test(&sig->sigcnt))
+ free_signal_struct(sig);
+}
+
void __put_task_struct(struct task_struct *tsk)
{
WARN_ON(!tsk->exit_state);
@@ -173,6 +188,7 @@ void __put_task_struct(struct task_struct *tsk)
exit_creds(tsk);
delayacct_tsk_free(tsk);
+ put_signal_struct(tsk->signal);
if (!profile_handoff_task(tsk))
free_task(tsk);
@@ -259,6 +275,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
setup_thread_stack(tsk, orig);
clear_user_return_notifier(tsk);
+ clear_tsk_need_resched(tsk);
stackend = end_of_stack(tsk);
*stackend = STACK_END_MAGIC; /* for overflow detection */
@@ -287,7 +304,7 @@ out:
#ifdef CONFIG_MMU
static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
{
- struct vm_area_struct *mpnt, *tmp, **pprev;
+ struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
struct rb_node **rb_link, *rb_parent;
int retval;
unsigned long charge;
@@ -314,7 +331,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
retval = ksm_fork(mm, oldmm);
if (retval)
goto out;
+ retval = khugepaged_fork(mm, oldmm);
+ if (retval)
+ goto out;
+ prev = NULL;
for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
struct file *file;
@@ -342,11 +363,11 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
if (IS_ERR(pol))
goto fail_nomem_policy;
vma_set_policy(tmp, pol);
+ tmp->vm_mm = mm;
if (anon_vma_fork(tmp, mpnt))
goto fail_nomem_anon_vma_fork;
tmp->vm_flags &= ~VM_LOCKED;
- tmp->vm_mm = mm;
- tmp->vm_next = NULL;
+ tmp->vm_next = tmp->vm_prev = NULL;
file = tmp->vm_file;
if (file) {
struct inode *inode = file->f_path.dentry->d_inode;
@@ -379,6 +400,8 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
*/
*pprev = tmp;
pprev = &tmp->vm_next;
+ tmp->vm_prev = prev;
+ prev = tmp;
__vma_link_rb(mm, tmp, rb_link, rb_parent);
rb_link = &tmp->vm_rb.rb_right;
@@ -472,6 +495,7 @@ static struct mm_struct * mm_init(struct mm_struct * mm, struct task_struct *p)
mm->cached_hole_size = ~0UL;
mm_init_aio(mm);
mm_init_owner(mm, p);
+ atomic_set(&mm->oom_disable_count, 0);
if (likely(!mm_alloc_pgd(mm))) {
mm->def_flags = 0;
@@ -509,6 +533,9 @@ void __mmdrop(struct mm_struct *mm)
mm_free_pgd(mm);
destroy_context(mm);
mmu_notifier_mm_destroy(mm);
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ VM_BUG_ON(mm->pmd_huge_pte);
+#endif
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -523,6 +550,7 @@ void mmput(struct mm_struct *mm)
if (atomic_dec_and_test(&mm->mm_users)) {
exit_aio(mm);
ksm_exit(mm);
+ khugepaged_exit(mm); /* must run before exit_mmap */
exit_mmap(mm);
set_mm_exe_file(mm, NULL);
if (!list_empty(&mm->mmlist)) {
@@ -649,6 +677,10 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
mm->token_priority = 0;
mm->last_interval = 0;
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+ mm->pmd_huge_pte = NULL;
+#endif
+
if (!mm_init(mm, tsk))
goto fail_nomem;
@@ -725,6 +757,8 @@ good_mm:
/* Initializing for Swap token stuff */
mm->token_priority = 0;
mm->last_interval = 0;
+ if (tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+ atomic_inc(&mm->oom_disable_count);
tsk->mm = mm;
tsk->active_mm = mm;
@@ -739,13 +773,13 @@ static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
struct fs_struct *fs = current->fs;
if (clone_flags & CLONE_FS) {
/* tsk->fs is already what we want */
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
if (fs->in_exec) {
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
return -EAGAIN;
}
fs->users++;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
return 0;
}
tsk->fs = copy_fs_struct(fs);
@@ -864,8 +898,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
if (!sig)
return -ENOMEM;
- atomic_set(&sig->count, 1);
+ sig->nr_threads = 1;
atomic_set(&sig->live, 1);
+ atomic_set(&sig->sigcnt, 1);
init_waitqueue_head(&sig->wait_chldexit);
if (clone_flags & CLONE_NEWPID)
sig->flags |= SIGNAL_UNKILLABLE;
@@ -883,24 +918,22 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
posix_cpu_timers_init_group(sig);
tty_audit_fork(sig);
+ sched_autogroup_fork(sig);
sig->oom_adj = current->signal->oom_adj;
+ sig->oom_score_adj = current->signal->oom_score_adj;
+ sig->oom_score_adj_min = current->signal->oom_score_adj_min;
- return 0;
-}
+ mutex_init(&sig->cred_guard_mutex);
-void __cleanup_signal(struct signal_struct *sig)
-{
- thread_group_cputime_free(sig);
- tty_kref_put(sig->tty);
- kmem_cache_free(signal_cachep, sig);
+ return 0;
}
static void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
- new_flags &= ~PF_SUPERPRIV;
+ new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
new_flags |= PF_FORKNOEXEC;
new_flags |= PF_STARTING;
p->flags = new_flags;
@@ -1245,8 +1278,9 @@ static struct task_struct *copy_process(unsigned long clone_flags,
}
if (clone_flags & CLONE_THREAD) {
- atomic_inc(&current->signal->count);
+ current->signal->nr_threads++;
atomic_inc(&current->signal->live);
+ atomic_inc(&current->signal->sigcnt);
p->group_leader = current->group_leader;
list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
}
@@ -1259,13 +1293,12 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->nsproxy->pid_ns->child_reaper = p;
p->signal->leader_pid = pid;
- tty_kref_put(p->signal->tty);
p->signal->tty = tty_kref_get(current->signal->tty);
attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
attach_pid(p, PIDTYPE_SID, task_session(current));
list_add_tail(&p->sibling, &p->real_parent->children);
list_add_tail_rcu(&p->tasks, &init_task.tasks);
- __get_cpu_var(process_counts)++;
+ __this_cpu_inc(process_counts);
}
attach_pid(p, PIDTYPE_PID, pid);
nr_threads++;
@@ -1288,11 +1321,16 @@ bad_fork_cleanup_io:
bad_fork_cleanup_namespaces:
exit_task_namespaces(p);
bad_fork_cleanup_mm:
- if (p->mm)
+ if (p->mm) {
+ task_lock(p);
+ if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+ atomic_dec(&p->mm->oom_disable_count);
+ task_unlock(p);
mmput(p->mm);
+ }
bad_fork_cleanup_signal:
if (!(clone_flags & CLONE_THREAD))
- __cleanup_signal(p->signal);
+ free_signal_struct(p->signal);
bad_fork_cleanup_sighand:
__cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
@@ -1327,6 +1365,16 @@ noinline struct pt_regs * __cpuinit __attribute__((weak)) idle_regs(struct pt_re
return regs;
}
+static inline void init_idle_pids(struct pid_link *links)
+{
+ enum pid_type type;
+
+ for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
+ INIT_HLIST_NODE(&links[type].node); /* not really needed */
+ links[type].pid = &init_struct_pid;
+ }
+}
+
struct task_struct * __cpuinit fork_idle(int cpu)
{
struct task_struct *task;
@@ -1334,8 +1382,10 @@ struct task_struct * __cpuinit fork_idle(int cpu)
task = copy_process(CLONE_VM, 0, idle_regs(&regs), 0, NULL,
&init_struct_pid, 0);
- if (!IS_ERR(task))
+ if (!IS_ERR(task)) {
+ init_idle_pids(task->pids);
init_idle(task, cpu);
+ }
return task;
}
@@ -1373,23 +1423,6 @@ long do_fork(unsigned long clone_flags,
}
/*
- * We hope to recycle these flags after 2.6.26
- */
- if (unlikely(clone_flags & CLONE_STOPPED)) {
- static int __read_mostly count = 100;
-
- if (count > 0 && printk_ratelimit()) {
- char comm[TASK_COMM_LEN];
-
- count--;
- printk(KERN_INFO "fork(): process `%s' used deprecated "
- "clone flags 0x%lx\n",
- get_task_comm(comm, current),
- clone_flags & CLONE_STOPPED);
- }
- }
-
- /*
* When called from kernel_thread, don't do user tracing stuff.
*/
if (likely(user_mode(regs)))
@@ -1427,16 +1460,7 @@ long do_fork(unsigned long clone_flags,
*/
p->flags &= ~PF_STARTING;
- if (unlikely(clone_flags & CLONE_STOPPED)) {
- /*
- * We'll start up with an immediate SIGSTOP.
- */
- sigaddset(&p->pending.signal, SIGSTOP);
- set_tsk_thread_flag(p, TIF_SIGPENDING);
- __set_task_state(p, TASK_STOPPED);
- } else {
- wake_up_new_task(p, clone_flags);
- }
+ wake_up_new_task(p, clone_flags);
tracehook_report_clone_complete(trace, regs,
clone_flags, nr, p);
@@ -1507,14 +1531,6 @@ static void check_unshare_flags(unsigned long *flags_ptr)
*flags_ptr |= CLONE_SIGHAND;
/*
- * If unsharing signal handlers and the task was created
- * using CLONE_THREAD, then must unshare the thread
- */
- if ((*flags_ptr & CLONE_SIGHAND) &&
- (atomic_read(&current->signal->count) > 1))
- *flags_ptr |= CLONE_THREAD;
-
- /*
* If unsharing namespace, must also unshare filesystem information.
*/
if (*flags_ptr & CLONE_NEWNS)
@@ -1664,13 +1680,13 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
if (new_fs) {
fs = current->fs;
- write_lock(&fs->lock);
+ spin_lock(&fs->lock);
current->fs = new_fs;
if (--fs->users)
new_fs = NULL;
else
new_fs = fs;
- write_unlock(&fs->lock);
+ spin_unlock(&fs->lock);
}
if (new_mm) {
@@ -1678,6 +1694,10 @@ SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
active_mm = current->active_mm;
current->mm = new_mm;
current->active_mm = new_mm;
+ if (current->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+ atomic_dec(&mm->oom_disable_count);
+ atomic_inc(&new_mm->oom_disable_count);
+ }
activate_mm(active_mm, new_mm);
new_mm = mm;
}