diff options
Diffstat (limited to 'kernel')
81 files changed, 3568 insertions, 2231 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 86e3285ae7e..ac0d533eb7d 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -110,6 +110,7 @@ obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o obj-$(CONFIG_PADATA) += padata.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o +obj-$(CONFIG_CONTEXT_TRACKING) += context_tracking.o $(obj)/configs.o: $(obj)/config_data.h diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2f186ed80c4..e37e6a12c5e 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -200,7 +200,6 @@ struct audit_context { struct list_head names_list; /* anchor for struct audit_names->list */ char * filterkey; /* key for rule that triggered record */ struct path pwd; - struct audit_context *previous; /* For nested syscalls */ struct audit_aux_data *aux; struct audit_aux_data *aux_pids; struct sockaddr_storage *sockaddr; @@ -1091,29 +1090,13 @@ int audit_alloc(struct task_struct *tsk) static inline void audit_free_context(struct audit_context *context) { - struct audit_context *previous; - int count = 0; - - do { - previous = context->previous; - if (previous || (count && count < 10)) { - ++count; - printk(KERN_ERR "audit(:%d): major=%d name_count=%d:" - " freeing multiple contexts (%d)\n", - context->serial, context->major, - context->name_count, count); - } - audit_free_names(context); - unroll_tree_refs(context, NULL, 0); - free_tree_refs(context); - audit_free_aux(context); - kfree(context->filterkey); - kfree(context->sockaddr); - kfree(context); - context = previous; - } while (context); - if (count >= 10) - printk(KERN_ERR "audit: freed %d contexts\n", count); + audit_free_names(context); + unroll_tree_refs(context, NULL, 0); + free_tree_refs(context); + audit_free_aux(context); + kfree(context->filterkey); + kfree(context->sockaddr); + kfree(context); } void audit_log_task_context(struct audit_buffer *ab) @@ -1159,7 +1142,7 @@ void audit_log_task_info(struct audit_buffer *ab, struct task_struct *tsk) cred = current_cred(); spin_lock_irq(&tsk->sighand->siglock); - if (tsk->signal && tsk->signal->tty && tsk->signal->tty->name) + if (tsk->signal && tsk->signal->tty) tty = tsk->signal->tty->name; else tty = "(none)"; @@ -1783,42 +1766,6 @@ void __audit_syscall_entry(int arch, int major, if (!context) return; - /* - * This happens only on certain architectures that make system - * calls in kernel_thread via the entry.S interface, instead of - * with direct calls. (If you are porting to a new - * architecture, hitting this condition can indicate that you - * got the _exit/_leave calls backward in entry.S.) - * - * i386 no - * x86_64 no - * ppc64 yes (see arch/powerpc/platforms/iseries/misc.S) - * - * This also happens with vm86 emulation in a non-nested manner - * (entries without exits), so this case must be caught. - */ - if (context->in_syscall) { - struct audit_context *newctx; - -#if AUDIT_DEBUG - printk(KERN_ERR - "audit(:%d) pid=%d in syscall=%d;" - " entering syscall=%d\n", - context->serial, tsk->pid, context->major, major); -#endif - newctx = audit_alloc_context(context->state); - if (newctx) { - newctx->previous = context; - context = newctx; - tsk->audit_context = newctx; - } else { - /* If we can't alloc a new context, the best we - * can do is to leak memory (any pending putname - * will be lost). The only other alternative is - * to abandon auditing. */ - audit_zero_context(context, context->state); - } - } BUG_ON(context->in_syscall || context->name_count); if (!audit_enabled) @@ -1881,28 +1828,21 @@ void __audit_syscall_exit(int success, long return_code) if (!list_empty(&context->killed_trees)) audit_kill_trees(&context->killed_trees); - if (context->previous) { - struct audit_context *new_context = context->previous; - context->previous = NULL; - audit_free_context(context); - tsk->audit_context = new_context; - } else { - audit_free_names(context); - unroll_tree_refs(context, NULL, 0); - audit_free_aux(context); - context->aux = NULL; - context->aux_pids = NULL; - context->target_pid = 0; - context->target_sid = 0; - context->sockaddr_len = 0; - context->type = 0; - context->fds[0] = -1; - if (context->state != AUDIT_RECORD_CONTEXT) { - kfree(context->filterkey); - context->filterkey = NULL; - } - tsk->audit_context = context; + audit_free_names(context); + unroll_tree_refs(context, NULL, 0); + audit_free_aux(context); + context->aux = NULL; + context->aux_pids = NULL; + context->target_pid = 0; + context->target_sid = 0; + context->sockaddr_len = 0; + context->type = 0; + context->fds[0] = -1; + if (context->state != AUDIT_RECORD_CONTEXT) { + kfree(context->filterkey); + context->filterkey = NULL; } + tsk->audit_context = context; } static inline void handle_one(const struct inode *inode) diff --git a/kernel/cgroup.c b/kernel/cgroup.c index f24f724620d..f34c41bfaa3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -138,6 +138,9 @@ struct cgroupfs_root { /* Hierarchy-specific flags */ unsigned long flags; + /* IDs for cgroups in this hierarchy */ + struct ida cgroup_ida; + /* The path to use for release notifications. */ char release_agent_path[PATH_MAX]; @@ -171,8 +174,8 @@ struct css_id { * The css to which this ID points. This pointer is set to valid value * after cgroup is populated. If cgroup is removed, this will be NULL. * This pointer is expected to be RCU-safe because destroy() - * is called after synchronize_rcu(). But for safe use, css_is_removed() - * css_tryget() should be used for avoiding race. + * is called after synchronize_rcu(). But for safe use, css_tryget() + * should be used for avoiding race. */ struct cgroup_subsys_state __rcu *css; /* @@ -242,6 +245,10 @@ static DEFINE_SPINLOCK(hierarchy_id_lock); */ static int need_forkexit_callback __read_mostly; +static int cgroup_destroy_locked(struct cgroup *cgrp); +static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, + struct cftype cfts[], bool is_add); + #ifdef CONFIG_PROVE_LOCKING int cgroup_lock_is_held(void) { @@ -294,11 +301,6 @@ static int notify_on_release(const struct cgroup *cgrp) return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } -static int clone_children(const struct cgroup *cgrp) -{ - return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags); -} - /* * for_each_subsys() allows you to iterate on each subsystem attached to * an active hierarchy @@ -782,12 +784,12 @@ static struct cgroup *task_cgroup_from_root(struct task_struct *task, * The task_lock() exception * * The need for this exception arises from the action of - * cgroup_attach_task(), which overwrites one tasks cgroup pointer with + * cgroup_attach_task(), which overwrites one task's cgroup pointer with * another. It does so using cgroup_mutex, however there are * several performance critical places that need to reference * task->cgroup without the expense of grabbing a system global * mutex. Therefore except as noted below, when dereferencing or, as - * in cgroup_attach_task(), modifying a task'ss cgroup pointer we use + * in cgroup_attach_task(), modifying a task's cgroup pointer we use * task_lock(), which acts on a spinlock (task->alloc_lock) already in * the task_struct routinely used for such matters. * @@ -854,30 +856,6 @@ static struct inode *cgroup_new_inode(umode_t mode, struct super_block *sb) return inode; } -/* - * Call subsys's pre_destroy handler. - * This is called before css refcnt check. - */ -static int cgroup_call_pre_destroy(struct cgroup *cgrp) -{ - struct cgroup_subsys *ss; - int ret = 0; - - for_each_subsys(cgrp->root, ss) { - if (!ss->pre_destroy) - continue; - - ret = ss->pre_destroy(cgrp); - if (ret) { - /* ->pre_destroy() failure is being deprecated */ - WARN_ON_ONCE(!ss->__DEPRECATED_clear_css_refs); - break; - } - } - - return ret; -} - static void cgroup_diput(struct dentry *dentry, struct inode *inode) { /* is dentry a directory ? if so, kfree() associated cgroup */ @@ -898,7 +876,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) * Release the subsystem state objects. */ for_each_subsys(cgrp->root, ss) - ss->destroy(cgrp); + ss->css_free(cgrp); cgrp->root->number_of_cgroups--; mutex_unlock(&cgroup_mutex); @@ -917,6 +895,7 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode) simple_xattrs_free(&cgrp->xattrs); + ida_simple_remove(&cgrp->root->cgroup_ida, cgrp->id); kfree_rcu(cgrp, rcu_head); } else { struct cfent *cfe = __d_cfe(dentry); @@ -987,7 +966,7 @@ static void cgroup_clear_directory(struct dentry *dir, bool base_files, if (!test_bit(ss->subsys_id, &subsys_mask)) continue; list_for_each_entry(set, &ss->cftsets, node) - cgroup_rm_file(cgrp, set->cfts); + cgroup_addrm_files(cgrp, NULL, set->cfts, false); } if (base_files) { while (!list_empty(&cgrp->files)) @@ -1015,33 +994,6 @@ static void cgroup_d_remove_dir(struct dentry *dentry) } /* - * A queue for waiters to do rmdir() cgroup. A tasks will sleep when - * cgroup->count == 0 && list_empty(&cgroup->children) && subsys has some - * reference to css->refcnt. In general, this refcnt is expected to goes down - * to zero, soon. - * - * CGRP_WAIT_ON_RMDIR flag is set under cgroup's inode->i_mutex; - */ -static DECLARE_WAIT_QUEUE_HEAD(cgroup_rmdir_waitq); - -static void cgroup_wakeup_rmdir_waiter(struct cgroup *cgrp) -{ - if (unlikely(test_and_clear_bit(CGRP_WAIT_ON_RMDIR, &cgrp->flags))) - wake_up_all(&cgroup_rmdir_waitq); -} - -void cgroup_exclude_rmdir(struct cgroup_subsys_state *css) -{ - css_get(css); -} - -void cgroup_release_and_wakeup_rmdir(struct cgroup_subsys_state *css) -{ - cgroup_wakeup_rmdir_waiter(css->cgroup); - css_put(css); -} - -/* * Call with cgroup_mutex held. Drops reference counts on modules, including * any duplicate ones that parse_cgroupfs_options took. If this function * returns an error, no reference counts are touched. @@ -1150,7 +1102,7 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",xattr"); if (strlen(root->release_agent_path)) seq_printf(seq, ",release_agent=%s", root->release_agent_path); - if (clone_children(&root->top_cgroup)) + if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags)) seq_puts(seq, ",clone_children"); if (strlen(root->name)) seq_printf(seq, ",name=%s", root->name); @@ -1162,7 +1114,7 @@ struct cgroup_sb_opts { unsigned long subsys_mask; unsigned long flags; char *release_agent; - bool clone_children; + bool cpuset_clone_children; char *name; /* User explicitly requested empty subsystem */ bool none; @@ -1213,7 +1165,7 @@ static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts) continue; } if (!strcmp(token, "clone_children")) { - opts->clone_children = true; + opts->cpuset_clone_children = true; continue; } if (!strcmp(token, "xattr")) { @@ -1397,14 +1349,21 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) goto out_unlock; } + /* + * Clear out the files of subsystems that should be removed, do + * this before rebind_subsystems, since rebind_subsystems may + * change this hierarchy's subsys_list. + */ + cgroup_clear_directory(cgrp->dentry, false, removed_mask); + ret = rebind_subsystems(root, opts.subsys_mask); if (ret) { + /* rebind_subsystems failed, re-populate the removed files */ + cgroup_populate_dir(cgrp, false, removed_mask); drop_parsed_module_refcounts(opts.subsys_mask); goto out_unlock; } - /* clear out any existing files and repopulate subsystem files */ - cgroup_clear_directory(cgrp->dentry, false, removed_mask); /* re-populate subsystem files */ cgroup_populate_dir(cgrp, false, added_mask); @@ -1432,6 +1391,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp) INIT_LIST_HEAD(&cgrp->children); INIT_LIST_HEAD(&cgrp->files); INIT_LIST_HEAD(&cgrp->css_sets); + INIT_LIST_HEAD(&cgrp->allcg_node); INIT_LIST_HEAD(&cgrp->release_list); INIT_LIST_HEAD(&cgrp->pidlists); mutex_init(&cgrp->pidlist_mutex); @@ -1450,8 +1410,8 @@ static void init_cgroup_root(struct cgroupfs_root *root) root->number_of_cgroups = 1; cgrp->root = root; cgrp->top_cgroup = cgrp; - list_add_tail(&cgrp->allcg_node, &root->allcg_list); init_cgroup_housekeeping(cgrp); + list_add_tail(&cgrp->allcg_node, &root->allcg_list); } static bool init_root_id(struct cgroupfs_root *root) @@ -1518,12 +1478,13 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) root->subsys_mask = opts->subsys_mask; root->flags = opts->flags; + ida_init(&root->cgroup_ida); if (opts->release_agent) strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); - if (opts->clone_children) - set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags); + if (opts->cpuset_clone_children) + set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags); return root; } @@ -1536,6 +1497,7 @@ static void cgroup_drop_root(struct cgroupfs_root *root) |