diff options
author | Ingo Molnar <mingo@elte.hu> | 2010-04-02 20:02:55 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2010-04-02 20:03:08 +0200 |
commit | c9494727cf293ae2ec66af57547a3e79c724fec2 (patch) | |
tree | 44ae197b64fa7530ee695a90ad31326dda06f1e1 /kernel | |
parent | 6427462bfa50f50dc6c088c07037264fcc73eca1 (diff) | |
parent | 42be79e37e264557f12860fa4cc84b4de3685954 (diff) |
Merge branch 'linus' into sched/core
Merge reason: update to latest upstream
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
77 files changed, 2950 insertions, 789 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 6aebdeb2aa3..a987aa1676b 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,8 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o + async.o range.o +obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o ifdef CONFIG_FUNCTION_TRACER @@ -90,6 +91,9 @@ obj-$(CONFIG_TASK_DELAY_ACCT) += delayacct.o obj-$(CONFIG_TASKSTATS) += taskstats.o tsacct.o obj-$(CONFIG_TRACEPOINTS) += tracepoint.o obj-$(CONFIG_LATENCYTOP) += latencytop.o +obj-$(CONFIG_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_COMPAT_BINFMT_ELF) += elfcore.o +obj-$(CONFIG_BINFMT_ELF_FDPIC) += elfcore.o obj-$(CONFIG_FUNCTION_TRACER) += trace/ obj-$(CONFIG_TRACING) += trace/ obj-$(CONFIG_X86_DS) += trace/ diff --git a/kernel/acct.c b/kernel/acct.c index a6605ca921b..24f8c81fc48 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -588,16 +588,6 @@ out: } /** - * acct_init_pacct - initialize a new pacct_struct - * @pacct: per-process accounting info struct to initialize - */ -void acct_init_pacct(struct pacct_struct *pacct) -{ - memset(pacct, 0, sizeof(struct pacct_struct)); - pacct->ac_utime = pacct->ac_stime = cputime_zero; -} - -/** * acct_collect - collect accounting information into pacct_struct * @exitcode: task exit code * @group_dead: not 0, if this thread is the last one in the process. diff --git a/kernel/audit.c b/kernel/audit.c index 5feed232be9..78f7f86aa23 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -398,7 +398,7 @@ static void kauditd_send_skb(struct sk_buff *skb) skb_get(skb); err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); if (err < 0) { - BUG_ON(err != -ECONNREFUSED); /* Shoudn't happen */ + BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); audit_log_lost("auditd dissapeared\n"); audit_pid = 0; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 4b05bd9479d..028e85663f2 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -548,6 +548,11 @@ int audit_remove_tree_rule(struct audit_krule *rule) return 0; } +static int compare_root(struct vfsmount *mnt, void *arg) +{ + return mnt->mnt_root->d_inode == arg; +} + void audit_trim_trees(void) { struct list_head cursor; @@ -559,7 +564,6 @@ void audit_trim_trees(void) struct path path; struct vfsmount *root_mnt; struct node *node; - struct list_head list; int err; tree = container_of(cursor.next, struct audit_tree, list); @@ -577,24 +581,16 @@ void audit_trim_trees(void) if (!root_mnt) goto skip_it; - list_add_tail(&list, &root_mnt->mnt_list); spin_lock(&hash_lock); list_for_each_entry(node, &tree->chunks, list) { - struct audit_chunk *chunk = find_chunk(node); - struct inode *inode = chunk->watch.inode; - struct vfsmount *mnt; + struct inode *inode = find_chunk(node)->watch.inode; node->index |= 1U<<31; - list_for_each_entry(mnt, &list, mnt_list) { - if (mnt->mnt_root->d_inode == inode) { - node->index &= ~(1U<<31); - break; - } - } + if (iterate_mounts(compare_root, inode, root_mnt)) + node->index &= ~(1U<<31); } spin_unlock(&hash_lock); trim_marked(tree); put_tree(tree); - list_del_init(&list); drop_collected_mounts(root_mnt); skip_it: mutex_lock(&audit_filter_mutex); @@ -603,22 +599,6 @@ skip_it: mutex_unlock(&audit_filter_mutex); } -static int is_under(struct vfsmount *mnt, struct dentry *dentry, - struct path *path) -{ - if (mnt != path->mnt) { - for (;;) { - if (mnt->mnt_parent == mnt) - return 0; - if (mnt->mnt_parent == path->mnt) - break; - mnt = mnt->mnt_parent; - } - dentry = mnt->mnt_mountpoint; - } - return is_subdir(dentry, path->dentry); -} - int audit_make_tree(struct audit_krule *rule, char *pathname, u32 op) { @@ -638,13 +618,17 @@ void audit_put_tree(struct audit_tree *tree) put_tree(tree); } +static int tag_mount(struct vfsmount *mnt, void *arg) +{ + return tag_chunk(mnt->mnt_root->d_inode, arg); +} + /* called with audit_filter_mutex */ int audit_add_tree_rule(struct audit_krule *rule) { struct audit_tree *seed = rule->tree, *tree; struct path path; - struct vfsmount *mnt, *p; - struct list_head list; + struct vfsmount *mnt; int err; list_for_each_entry(tree, &tree_list, list) { @@ -670,16 +654,9 @@ int audit_add_tree_rule(struct audit_krule *rule) err = -ENOMEM; goto Err; } - list_add_tail(&list, &mnt->mnt_list); get_tree(tree); - list_for_each_entry(p, &list, mnt_list) { - err = tag_chunk(p->mnt_root->d_inode, tree); - if (err) - break; - } - - list_del(&list); + err = iterate_mounts(tag_mount, tree, mnt); drop_collected_mounts(mnt); if (!err) { @@ -714,31 +691,23 @@ int audit_tag_tree(char *old, char *new) { struct list_head cursor, barrier; int failed = 0; - struct path path; + struct path path1, path2; struct vfsmount *tagged; - struct list_head list; - struct vfsmount *mnt; - struct dentry *dentry; int err; - err = kern_path(new, 0, &path); + err = kern_path(new, 0, &path2); if (err) return err; - tagged = collect_mounts(&path); - path_put(&path); + tagged = collect_mounts(&path2); + path_put(&path2); if (!tagged) return -ENOMEM; - err = kern_path(old, 0, &path); + err = kern_path(old, 0, &path1); if (err) { drop_collected_mounts(tagged); return err; } - mnt = mntget(path.mnt); - dentry = dget(path.dentry); - path_put(&path); - - list_add_tail(&list, &tagged->mnt_list); mutex_lock(&audit_filter_mutex); list_add(&barrier, &tree_list); @@ -746,7 +715,7 @@ int audit_tag_tree(char *old, char *new) while (cursor.next != &tree_list) { struct audit_tree *tree; - struct vfsmount *p; + int good_one = 0; tree = container_of(cursor.next, struct audit_tree, list); get_tree(tree); @@ -754,30 +723,19 @@ int audit_tag_tree(char *old, char *new) list_add(&cursor, &tree->list); mutex_unlock(&audit_filter_mutex); - err = kern_path(tree->pathname, 0, &path); - if (err) { - put_tree(tree); - mutex_lock(&audit_filter_mutex); - continue; + err = kern_path(tree->pathname, 0, &path2); + if (!err) { + good_one = path_is_under(&path1, &path2); + path_put(&path2); } - spin_lock(&vfsmount_lock); - if (!is_under(mnt, dentry, &path)) { - spin_unlock(&vfsmount_lock); - path_put(&path); + if (!good_one) { put_tree(tree); mutex_lock(&audit_filter_mutex); continue; } - spin_unlock(&vfsmount_lock); - path_put(&path); - - list_for_each_entry(p, &list, mnt_list) { - failed = tag_chunk(p->mnt_root->d_inode, tree); - if (failed) - break; - } + failed = iterate_mounts(tag_mount, tree, tagged); if (failed) { put_tree(tree); mutex_lock(&audit_filter_mutex); @@ -818,10 +776,8 @@ int audit_tag_tree(char *old, char *new) } list_del(&barrier); list_del(&cursor); - list_del(&list); mutex_unlock(&audit_filter_mutex); - dput(dentry); - mntput(mnt); + path_put(&path1); drop_collected_mounts(tagged); return failed; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index fc0f928167e..f3a461c0970 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1988,7 +1988,6 @@ void __audit_inode(const char *name, const struct dentry *dentry) /** * audit_inode_child - collect inode info for created/removed objects - * @dname: inode's dentry name * @dentry: dentry being audited * @parent: inode of dentry parent * @@ -2000,13 +1999,14 @@ void __audit_inode(const char *name, const struct dentry *dentry) * must be hooked prior, in order to capture the target inode during * unsuccessful attempts. */ -void __audit_inode_child(const char *dname, const struct dentry *dentry, +void __audit_inode_child(const struct dentry *dentry, const struct inode *parent) { int idx; struct audit_context *context = current->audit_context; const char *found_parent = NULL, *found_child = NULL; const struct inode *inode = dentry->d_inode; + const char *dname = dentry->d_name.name; int dirlen = 0; if (!context->in_syscall) @@ -2014,9 +2014,6 @@ void __audit_inode_child(const char *dname, const struct dentry *dentry, if (inode) handle_one(inode); - /* determine matching parent */ - if (!dname) - goto add_names; /* parent is more likely, look for it first */ for (idx = 0; idx < context->name_count; idx++) { diff --git a/kernel/capability.c b/kernel/capability.c index 7f876e60521..9e4697e9b27 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -135,7 +135,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, if (pid && (pid != task_pid_vnr(current))) { struct task_struct *target; - read_lock(&tasklist_lock); + rcu_read_lock(); target = find_task_by_vpid(pid); if (!target) @@ -143,7 +143,7 @@ static inline int cap_get_target_pid(pid_t pid, kernel_cap_t *pEp, else ret = security_capget(target, pEp, pIp, pPp); - read_unlock(&tasklist_lock); + rcu_read_unlock(); } else ret = security_capget(current, pEp, pIp, pPp); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4fd90e12977..e2769e13980 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -4,6 +4,10 @@ * Based originally on the cpuset system, extracted by Paul Menage * Copyright (C) 2006 Google, Inc * + * Notifications support + * Copyright (C) 2009 Nokia Corporation + * Author: Kirill A. Shutemov + * * Copyright notices from the original cpuset code: * -------------------------------------------------- * Copyright (C) 2003 BULL SA. @@ -23,7 +27,6 @@ */ #include <linux/cgroup.h> -#include <linux/module.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/fs.h> @@ -44,6 +47,7 @@ #include <linux/string.h> #include <linux/sort.h> #include <linux/kmod.h> +#include <linux/module.h> #include <linux/delayacct.h> #include <linux/cgroupstats.h> #include <linux/hash.h> @@ -52,15 +56,21 @@ #include <linux/pid_namespace.h> #include <linux/idr.h> #include <linux/vmalloc.h> /* TODO: replace with more sophisticated array */ +#include <linux/eventfd.h> +#include <linux/poll.h> #include <asm/atomic.h> static DEFINE_MUTEX(cgroup_mutex); -/* Generate an array of cgroup subsystem pointers */ +/* + * Generate an array of cgroup subsystem pointers. At boot time, this is + * populated up to CGROUP_BUILTIN_SUBSYS_COUNT, and modular subsystems are + * registered after that. The mutable section of this array is protected by + * cgroup_mutex. + */ #define SUBSYS(_x) &_x ## _subsys, - -static struct cgroup_subsys *subsys[] = { +static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { #include <linux/cgroup_subsys.h> }; @@ -147,6 +157,35 @@ struct css_id { unsigned short stack[0]; /* Array of Length (depth+1) */ }; +/* + * cgroup_event represents events which userspace want to recieve. + */ +struct cgroup_event { + /* + * Cgroup which the event belongs to. + */ + struct cgroup *cgrp; + /* + * Control file which the event associated. + */ + struct cftype *cft; + /* + * eventfd to signal userspace about the event. + */ + struct eventfd_ctx *eventfd; + /* + * Each of these stored in a list by the cgroup. + */ + struct list_head list; + /* + * All fields below needed to unregister event when + * userspace closes eventfd. + */ + poll_table pt; + wait_queue_head_t *wqh; + wait_queue_t wait; + struct work_struct remove; +}; /* The list of hierarchy roots */ @@ -250,7 +289,8 @@ struct cg_cgroup_link { static struct css_set init_css_set; static struct cg_cgroup_link init_css_set_link; -static int cgroup_subsys_init_idr(struct cgroup_subsys *ss); +static int cgroup_init_idr(struct cgroup_subsys *ss, + struct cgroup_subsys_state *css); /* css_set_lock protects the list of css_set objects, and the * chain of tasks off each css_set. Nests outside task->alloc_lock @@ -448,8 +488,11 @@ static struct css_set *find_existing_css_set( struct hlist_node *node; struct css_set *cg; - /* Built the set of subsystem state objects that we want to - * see in the new css_set */ + /* + * Build the set of subsystem state objects that we want to see in the + * new css_set. while subsystems can change globally, the entries here + * won't change, so no need for locking. + */ for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) { if (root->subsys_bits & (1UL << i)) { /* Subsystem is in this hierarchy. So we want @@ -696,6 +739,7 @@ void cgroup_lock(void) { mutex_lock(&cgroup_mutex); } +EXPORT_SYMBOL_GPL(cgroup_lock); /** * cgroup_unlock - release lock on cgroup changes @@ -706,6 +750,7 @@ void cgroup_unlock(void) { mutex_unlock(&cgroup_mutex); } +EXPORT_SYMBOL_GPL(cgroup_unlock); /* * A couple of forward declarations required, due to cyclic reference loop: @@ -757,6 +802,7 @@ static int cgroup_call_pre_destroy(struct cgroup *cgrp) if (ret) break; } + return ret; } @@ -884,7 +930,11 @@ void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) css_put(css); } - +/* + * Call with cgroup_mutex held. Drops reference counts on modules, including + * any duplicate ones that parse_cgroupfs_options took. If this function + * returns an error, no reference counts are touched. + */ static int rebind_subsystems(struct cgroupfs_root *root, unsigned long final_bits) { @@ -892,6 +942,8 @@ static int rebind_subsystems(struct cgroupfs_root *root, struct cgroup *cgrp = &root->top_cgroup; int i; + BUG_ON(!mutex_is_locked(&cgroup_mutex)); + removed_bits = root->actual_subsys_bits & ~final_bits; added_bits = final_bits & ~root->actual_subsys_bits; /* Check that any added subsystems are currently free */ @@ -900,6 +952,12 @@ static int rebind_subsystems(struct cgroupfs_root *root, struct cgroup_subsys *ss = subsys[i]; if (!(bit & added_bits)) continue; + /* + * Nobody should tell us to do a subsys that doesn't exist: + * parse_cgroupfs_options should catch that case and refcounts + * ensure that subsystems won't disappear once selected. + */ + BUG_ON(ss == NULL); if (ss->root != &rootnode) { /* Subsystem isn't free */ return -EBUSY; @@ -919,6 +977,7 @@ static int rebind_subsystems(struct cgroupfs_root *root, unsigned long bit = 1UL << i; if (bit & added_bits) { /* We're binding this subsystem to this hierarchy */ + BUG_ON(ss == NULL); BUG_ON(cgrp->subsys[i]); BUG_ON(!dummytop->subsys[i]); BUG_ON(dummytop->subsys[i]->cgroup != dummytop); @@ -930,8 +989,10 @@ static int rebind_subsystems(struct cgroupfs_root *root, if (ss->bind) ss->bind(ss, cgrp); mutex_unlock(&ss->hierarchy_mutex); + /* refcount was already take |