diff options
Diffstat (limited to 'kernel')
59 files changed, 1015 insertions, 548 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 2921d90ce32..170a9213c1b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -41,6 +41,9 @@ obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_GENERIC_ISA_DMA) += dma.o obj-$(CONFIG_USE_GENERIC_SMP_HELPERS) += smp.o +ifneq ($(CONFIG_SMP),y) +obj-y += up.o +endif obj-$(CONFIG_SMP) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_PROVE_LOCKING) += spinlock.o diff --git a/kernel/acct.c b/kernel/acct.c index d57b7cbb98b..7afa3156416 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -277,7 +277,7 @@ static int acct_on(char *name) * should be written. If the filename is NULL, accounting will be * shutdown. */ -asmlinkage long sys_acct(const char __user *name) +SYSCALL_DEFINE1(acct, const char __user *, name) { int error; diff --git a/kernel/async.c b/kernel/async.c index f286e9f2b73..f565891f2c9 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -54,6 +54,7 @@ asynchronous and synchronous parts of the kernel. #include <linux/sched.h> #include <linux/init.h> #include <linux/kthread.h> +#include <linux/delay.h> #include <asm/atomic.h> static async_cookie_t next_cookie = 1; @@ -90,12 +91,12 @@ extern int initcall_debug; static async_cookie_t __lowest_in_progress(struct list_head *running) { struct async_entry *entry; - if (!list_empty(&async_pending)) { - entry = list_first_entry(&async_pending, + if (!list_empty(running)) { + entry = list_first_entry(running, struct async_entry, list); return entry->cookie; - } else if (!list_empty(running)) { - entry = list_first_entry(running, + } else if (!list_empty(&async_pending)) { + entry = list_first_entry(&async_pending, struct async_entry, list); return entry->cookie; } else { @@ -104,6 +105,17 @@ static async_cookie_t __lowest_in_progress(struct list_head *running) } } + +static async_cookie_t lowest_in_progress(struct list_head *running) +{ + unsigned long flags; + async_cookie_t ret; + + spin_lock_irqsave(&async_lock, flags); + ret = __lowest_in_progress(running); + spin_unlock_irqrestore(&async_lock, flags); + return ret; +} /* * pick the first pending entry and run it */ @@ -121,21 +133,23 @@ static void run_one_entry(void) entry = list_first_entry(&async_pending, struct async_entry, list); /* 2) move it to the running queue */ - list_del(&entry->list); - list_add_tail(&entry->list, &async_running); + list_move_tail(&entry->list, entry->running); spin_unlock_irqrestore(&async_lock, flags); /* 3) run it (and print duration)*/ if (initcall_debug && system_state == SYSTEM_BOOTING) { - printk("calling %lli_%pF @ %i\n", entry->cookie, entry->func, task_pid_nr(current)); + printk("calling %lli_%pF @ %i\n", (long long)entry->cookie, + entry->func, task_pid_nr(current)); calltime = ktime_get(); } entry->func(entry->data, entry->cookie); if (initcall_debug && system_state == SYSTEM_BOOTING) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); - printk("initcall %lli_%pF returned 0 after %lld usecs\n", entry->cookie, - entry->func, ktime_to_ns(delta) >> 10); + printk("initcall %lli_%pF returned 0 after %lld usecs\n", + (long long)entry->cookie, + entry->func, + (long long)ktime_to_ns(delta) >> 10); } /* 4) remove it from the running queue */ @@ -194,18 +208,44 @@ static async_cookie_t __async_schedule(async_func_ptr *ptr, void *data, struct l return newcookie; } +/** + * async_schedule - schedule a function for asynchronous execution + * @ptr: function to execute asynchronously + * @data: data pointer to pass to the function + * + * Returns an async_cookie_t that may be used for checkpointing later. + * Note: This function may be called from atomic or non-atomic contexts. + */ async_cookie_t async_schedule(async_func_ptr *ptr, void *data) { - return __async_schedule(ptr, data, &async_pending); + return __async_schedule(ptr, data, &async_running); } EXPORT_SYMBOL_GPL(async_schedule); -async_cookie_t async_schedule_special(async_func_ptr *ptr, void *data, struct list_head *running) +/** + * async_schedule_domain - schedule a function for asynchronous execution within a certain domain + * @ptr: function to execute asynchronously + * @data: data pointer to pass to the function + * @running: running list for the domain + * + * Returns an async_cookie_t that may be used for checkpointing later. + * @running may be used in the async_synchronize_*_domain() functions + * to wait within a certain synchronization domain rather than globally. + * A synchronization domain is specified via the running queue @running to use. + * Note: This function may be called from atomic or non-atomic contexts. + */ +async_cookie_t async_schedule_domain(async_func_ptr *ptr, void *data, + struct list_head *running) { return __async_schedule(ptr, data, running); } -EXPORT_SYMBOL_GPL(async_schedule_special); +EXPORT_SYMBOL_GPL(async_schedule_domain); +/** + * async_synchronize_full - synchronize all asynchronous function calls + * + * This function waits until all asynchronous function calls have been done. + */ void async_synchronize_full(void) { do { @@ -214,13 +254,30 @@ void async_synchronize_full(void) } EXPORT_SYMBOL_GPL(async_synchronize_full); -void async_synchronize_full_special(struct list_head *list) +/** + * async_synchronize_full_domain - synchronize all asynchronous function within a certain domain + * @list: running list to synchronize on + * + * This function waits until all asynchronous function calls for the + * synchronization domain specified by the running list @list have been done. + */ +void async_synchronize_full_domain(struct list_head *list) { - async_synchronize_cookie_special(next_cookie, list); + async_synchronize_cookie_domain(next_cookie, list); } -EXPORT_SYMBOL_GPL(async_synchronize_full_special); +EXPORT_SYMBOL_GPL(async_synchronize_full_domain); -void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *running) +/** + * async_synchronize_cookie_domain - synchronize asynchronous function calls within a certain domain with cookie checkpointing + * @cookie: async_cookie_t to use as checkpoint + * @running: running list to synchronize on + * + * This function waits until all asynchronous function calls for the + * synchronization domain specified by the running list @list submitted + * prior to @cookie have been done. + */ +void async_synchronize_cookie_domain(async_cookie_t cookie, + struct list_head *running) { ktime_t starttime, delta, endtime; @@ -229,21 +286,29 @@ void async_synchronize_cookie_special(async_cookie_t cookie, struct list_head *r starttime = ktime_get(); } - wait_event(async_done, __lowest_in_progress(running) >= cookie); + wait_event(async_done, lowest_in_progress(running) >= cookie); if (initcall_debug && system_state == SYSTEM_BOOTING) { endtime = ktime_get(); delta = ktime_sub(endtime, starttime); printk("async_continuing @ %i after %lli usec\n", - task_pid_nr(current), ktime_to_ns(delta) >> 10); + task_pid_nr(current), + (long long)ktime_to_ns(delta) >> 10); } } -EXPORT_SYMBOL_GPL(async_synchronize_cookie_special); +EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain); +/** + * async_synchronize_cookie - synchronize asynchronous function calls with cookie checkpointing + * @cookie: async_cookie_t to use as checkpoint + * + * This function waits until all asynchronous function calls prior to @cookie + * have been done. + */ void async_synchronize_cookie(async_cookie_t cookie) { - async_synchronize_cookie_special(cookie, &async_running); + async_synchronize_cookie_domain(cookie, &async_running); } EXPORT_SYMBOL_GPL(async_synchronize_cookie); @@ -304,7 +369,11 @@ static int async_manager_thread(void *unused) ec = atomic_read(&entry_count); while (tc < ec && tc < MAX_THREADS) { - kthread_run(async_thread, NULL, "async/%i", tc); + if (IS_ERR(kthread_run(async_thread, NULL, "async/%i", + tc))) { + msleep(100); + continue; + } atomic_inc(&thread_count); tc++; } @@ -319,7 +388,9 @@ static int async_manager_thread(void *unused) static int __init async_init(void) { if (async_enabled) - kthread_run(async_manager_thread, NULL, "async/mgr"); + if (IS_ERR(kthread_run(async_manager_thread, NULL, + "async/mgr"))) + async_enabled = 0; return 0; } diff --git a/kernel/capability.c b/kernel/capability.c index 688926e496b..4e17041963f 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -161,7 +161,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) +SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) { int ret = 0; pid_t pid; @@ -235,7 +235,7 @@ asmlinkage long sys_capget(cap_user_header_t header, cap_user_data_t dataptr) * * Returns 0 on success and < 0 on error. */ -asmlinkage long sys_capset(cap_user_header_t header, const cap_user_data_t data) +SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; unsigned i, tocopy; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index c29831076e7..e14db9c089b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1115,8 +1115,10 @@ static void cgroup_kill_sb(struct super_block *sb) { } write_unlock(&css_set_lock); - list_del(&root->root_list); - root_count--; + if (!list_empty(&root->root_list)) { + list_del(&root->root_list); + root_count--; + } mutex_unlock(&cgroup_mutex); @@ -2349,7 +2351,7 @@ static void cgroup_lock_hierarchy(struct cgroupfs_root *root) for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { struct cgroup_subsys *ss = subsys[i]; if (ss->root == root) - mutex_lock_nested(&ss->hierarchy_mutex, i); + mutex_lock(&ss->hierarchy_mutex); } } @@ -2434,7 +2436,9 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry, err_remove: + cgroup_lock_hierarchy(root); list_del(&cgrp->sibling); + cgroup_unlock_hierarchy(root); root->number_of_cgroups--; err_destroy: @@ -2507,7 +2511,7 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) for_each_subsys(cgrp->root, ss) { struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; int refcnt; - do { + while (1) { /* We can only remove a CSS with a refcnt==1 */ refcnt = atomic_read(&css->refcnt); if (refcnt > 1) { @@ -2521,7 +2525,10 @@ static int cgroup_clear_css_refs(struct cgroup *cgrp) * css_tryget() to spin until we set the * CSS_REMOVED bits or abort */ - } while (atomic_cmpxchg(&css->refcnt, refcnt, 0) != refcnt); + if (atomic_cmpxchg(&css->refcnt, refcnt, 0) == refcnt) + break; + cpu_relax(); + } } done: for_each_subsys(cgrp->root, ss) { @@ -2630,6 +2637,7 @@ static void __init cgroup_init_subsys(struct cgroup_subsys *ss) BUG_ON(!list_empty(&init_task.tasks)); mutex_init(&ss->hierarchy_mutex); + lockdep_set_class(&ss->hierarchy_mutex, &ss->subsys_key); ss->active = 1; } @@ -2991,20 +2999,21 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&cgroup_mutex); return 0; } - task_lock(tsk); - cg = tsk->cgroups; - parent = task_cgroup(tsk, subsys->subsys_id); /* Pin the hierarchy */ - if (!atomic_inc_not_zero(&parent->root->sb->s_active)) { + if (!atomic_inc_not_zero(&root->sb->s_active)) { /* We race with the final deactivate_super() */ mutex_unlock(&cgroup_mutex); return 0; } /* Keep the cgroup alive */ + task_lock(tsk); + parent = task_cgroup(tsk, subsys->subsys_id); + cg = tsk->cgroups; get_css_set(cg); task_unlock(tsk); + mutex_unlock(&cgroup_mutex); /* Now do the VFS work to create a cgroup */ @@ -3043,7 +3052,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_unlock(&inode->i_mutex); put_css_set(cg); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); /* The cgroup is still accessible in the VFS, but * we're not going to try to rmdir() it at this * point. */ @@ -3069,7 +3078,7 @@ int cgroup_clone(struct task_struct *tsk, struct cgroup_subsys *subsys, mutex_lock(&cgroup_mutex); put_css_set(cg); mutex_unlock(&cgroup_mutex); - deactivate_super(parent->root->sb); + deactivate_super(root->sb); return ret; } diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 647c77a88fc..f76db9dcaa0 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -61,6 +61,14 @@ #include <linux/cgroup.h> /* + * Workqueue for cpuset related tasks. + * + * Using kevent workqueue may cause deadlock when memory_migrate + * is set. So we create a separate workqueue thread for cpuset. + */ +static struct workqueue_struct *cpuset_wq; + +/* * Tracks how many cpusets are currently defined in system. * When there is only one cpuset (the root cpuset) we can * short circuit some hooks. @@ -568,7 +576,7 @@ update_domain_attr_tree(struct sched_domain_attr *dattr, struct cpuset *c) * load balancing domains (sched domains) as specified by that partial * partition. * - * See "What is sched_load_balance" in Documentation/cpusets.txt + * See "What is sched_load_balance" in Documentation/cgroups/cpusets.txt * for a background explanation of this. * * Does not return errors, on the theory that the callers of this @@ -831,7 +839,7 @@ static DECLARE_WORK(rebuild_sched_domains_work, do_rebuild_sched_domains); */ static void async_rebuild_sched_domains(void) { - schedule_work(&rebuild_sched_domains_work); + queue_work(cpuset_wq, &rebuild_sched_domains_work); } /* @@ -2111,6 +2119,9 @@ void __init cpuset_init_smp(void) hotcpu_notifier(cpuset_track_online_cpus, 0); hotplug_memory_notifier(cpuset_track_online_nodes, 10); + + cpuset_wq = create_singlethread_workqueue("cpuset"); + BUG_ON(!cpuset_wq); } /** diff --git a/kernel/dma-coherent.c b/kernel/dma-coherent.c index 038707404b7..962a3b574f2 100644 --- a/kernel/dma-coherent.c +++ b/kernel/dma-coherent.c @@ -98,7 +98,7 @@ EXPORT_SYMBOL(dma_mark_declared_memory_occupied); * @size: size of requested memory area * @dma_handle: This will be filled with the correct dma handle * @ret: This pointer will be filled with the virtual address - * to allocated area. + * to allocated area. * * This function should be only called from per-arch dma_alloc_coherent() * to support allocation from per-device coherent memory pools. @@ -118,31 +118,32 @@ int dma_alloc_from_coherent(struct device *dev, ssize_t size, mem = dev->dma_mem; if (!mem) return 0; - if (unlikely(size > mem->size)) - return 0; + + *ret = NULL; + + if (unlikely(size > (mem->size << PAGE_SHIFT))) + goto err; pageno = bitmap_find_free_region(mem->bitmap, mem->size, order); - if (pageno >= 0) { - /* - * Memory was found in the per-device arena. - */ - *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); - *ret = mem->virt_base + (pageno << PAGE_SHIFT); - memset(*ret, 0, size); - } else if (mem->flags & DMA_MEMORY_EXCLUSIVE) { - /* - * The per-device arena is exhausted and we are not - * permitted to fall back to generic memory. - */ - *ret = NULL; - } else { - /* - * The per-device arena is exhausted and we are - * permitted to fall back to generic memory. - */ - return 0; - } + if (unlikely(pageno < 0)) + goto err; + + /* + * Memory was found in the per-device area. + */ + *dma_handle = mem->device_base + (pageno << PAGE_SHIFT); + *ret = mem->virt_base + (pageno << PAGE_SHIFT); + memset(*ret, 0, size); + return 1; + +err: + /* + * In the case where the allocation can not be satisfied from the + * per-device area, try to fall back to generic memory if the + * constraints allow it. + */ + return mem->flags & DMA_MEMORY_EXCLUSIVE; } EXPORT_SYMBOL(dma_alloc_from_coherent); diff --git a/kernel/exec_domain.c b/kernel/exec_domain.c index 0511716e942..667c841c295 100644 --- a/kernel/exec_domain.c +++ b/kernel/exec_domain.c @@ -209,8 +209,7 @@ static int __init proc_execdomains_init(void) module_init(proc_execdomains_init); #endif -asmlinkage long -sys_personality(u_long personality) +SYSCALL_DEFINE1(personality, u_long, personality) { u_long old = current->personality; diff --git a/kernel/exit.c b/kernel/exit.c index c7740fa3252..efd30ccf385 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -118,6 +118,8 @@ static void __exit_signal(struct task_struct *tsk) * We won't ever get here for the group leader, since it * will have been the last reference on the signal_struct. */ + sig->utime = cputime_add(sig->utime, task_utime(tsk)); + sig->stime = cputime_add(sig->stime, task_stime(tsk)); sig->gtime = cputime_add(sig->gtime, task_gtime(tsk)); sig->min_flt += tsk->min_flt; sig->maj_flt += tsk->maj_flt; @@ -126,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk) sig->inblock += task_io_get_inblock(tsk); sig->oublock += task_io_get_oublock(tsk); task_io_accounting_add(&sig->ioac, &tsk->ioac); + sig->sum_sched_runtime += tsk->se.sum_exec_runtime; sig = NULL; /* Marker for below. */ } @@ -1141,7 +1144,7 @@ NORET_TYPE void complete_and_exit(struct completion *comp, long code) EXPORT_SYMBOL(complete_and_exit); -asmlinkage long sys_exit(int error_code) +SYSCALL_DEFINE1(exit, int, error_code) { do_exit((error_code&0xff)<<8); } @@ -1182,9 +1185,11 @@ do_group_exit(int exit_code) * wait4()-ing process will get the correct exit code - even if this * thread is not the thread group leader. */ -asmlinkage void sys_exit_group(int error_code) +SYSCALL_DEFINE1(exit_group, int, error_code) { do_group_exit((error_code & 0xff) << 8); + /* NOTREACHED */ + return 0; } static struct pid *task_pid_type(struct task_struct *task, enum pid_type type) @@ -1752,9 +1757,8 @@ end: return retval; } -asmlinkage long sys_waitid(int which, pid_t upid, - struct siginfo __user *infop, int options, - struct rusage __user *ru) +SYSCALL_DEFINE5(waitid, int, which, pid_t, upid, struct siginfo __user *, + infop, int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; @@ -1793,8 +1797,8 @@ asmlinkage long sys_waitid(int which, pid_t upid, return ret; } -asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, - int options, struct rusage __user *ru) +SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr, + int, options, struct rusage __user *, ru) { struct pid *pid = NULL; enum pid_type type; @@ -1831,7 +1835,7 @@ asmlinkage long sys_wait4(pid_t upid, int __user *stat_addr, * sys_waitpid() remains for compatibility. waitpid() should be * implemented by calling sys_wait4() from libc.a. */ -asmlinkage long sys_waitpid(pid_t pid, int __user *stat_addr, int options) +SYSCALL_DEFINE3(waitpid, pid_t, pid, int __user *, stat_addr, int, options) { return sys_wait4(pid, stat_addr, options, NULL); } diff --git a/kernel/fork.c b/kernel/fork.c index 1d68f1255dd..a66fbde2071 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -817,17 +817,17 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { struct signal_struct *sig; - int ret; if (clone_flags & CLONE_THREAD) { - ret = thread_group_cputime_clone_thread(current); - if (likely(!ret)) { - atomic_inc(¤t->signal->count); - atomic_inc(¤t->signal->live); - } - return ret; + atomic |