diff options
Diffstat (limited to 'kernel')
89 files changed, 5517 insertions, 4617 deletions
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index 44511d100ea..d2b32ac27a3 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks @@ -138,7 +138,7 @@ config INLINE_SPIN_UNLOCK_BH config INLINE_SPIN_UNLOCK_IRQ def_bool y - depends on !PREEMPT || ARCH_INLINE_SPIN_UNLOCK_BH + depends on !PREEMPT || ARCH_INLINE_SPIN_UNLOCK_IRQ config INLINE_SPIN_UNLOCK_IRQRESTORE def_bool y @@ -175,7 +175,7 @@ config INLINE_READ_UNLOCK_BH config INLINE_READ_UNLOCK_IRQ def_bool y - depends on !PREEMPT || ARCH_INLINE_READ_UNLOCK_BH + depends on !PREEMPT || ARCH_INLINE_READ_UNLOCK_IRQ config INLINE_READ_UNLOCK_IRQRESTORE def_bool y @@ -212,7 +212,7 @@ config INLINE_WRITE_UNLOCK_BH config INLINE_WRITE_UNLOCK_IRQ def_bool y - depends on !PREEMPT || ARCH_INLINE_WRITE_UNLOCK_BH + depends on !PREEMPT || ARCH_INLINE_WRITE_UNLOCK_IRQ config INLINE_WRITE_UNLOCK_IRQRESTORE def_bool y diff --git a/kernel/audit.c b/kernel/audit.c index 21c7fa615bd..91e53d04b6a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -1056,7 +1056,7 @@ static inline void audit_get_stamp(struct audit_context *ctx, static void wait_for_auditd(unsigned long sleep_time) { DECLARE_WAITQUEUE(wait, current); - set_current_state(TASK_INTERRUPTIBLE); + set_current_state(TASK_UNINTERRUPTIBLE); add_wait_queue(&audit_backlog_wait, &wait); if (audit_backlog_limit && diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index a291aa23fb3..43c307dc945 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -658,6 +658,7 @@ int audit_add_tree_rule(struct audit_krule *rule) struct vfsmount *mnt; int err; + rule->tree = NULL; list_for_each_entry(tree, &tree_list, list) { if (!strcmp(seed->pathname, tree->pathname)) { put_tree(seed); diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 83a2970295d..6bd4a90d199 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -1021,9 +1021,6 @@ static void audit_log_rule_change(char *action, struct audit_krule *rule, int re * @seq: netlink audit message sequence (serial) number * @data: payload data * @datasz: size of payload data - * @loginuid: loginuid of sender - * @sessionid: sessionid for netlink audit message - * @sid: SE Linux Security ID of sender */ int audit_receive_filter(int type, int pid, int seq, void *data, size_t datasz) { diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 2a9926275f8..e5583d10a32 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -63,9 +63,6 @@ #include <linux/atomic.h> -/* css deactivation bias, makes css->refcnt negative to deny new trygets */ -#define CSS_DEACT_BIAS INT_MIN - /* * cgroup_mutex is the master lock. Any modification to cgroup or its * hierarchy must be performed while holding it. @@ -99,16 +96,19 @@ static DEFINE_MUTEX(cgroup_root_mutex); */ #define SUBSYS(_x) [_x ## _subsys_id] = &_x ## _subsys, #define IS_SUBSYS_ENABLED(option) IS_BUILTIN(option) -static struct cgroup_subsys *subsys[CGROUP_SUBSYS_COUNT] = { +static struct cgroup_subsys *cgroup_subsys[CGROUP_SUBSYS_COUNT] = { #include <linux/cgroup_subsys.h> }; /* - * The "rootnode" hierarchy is the "dummy hierarchy", reserved for the - * subsystems that are otherwise unattached - it never has more than a - * single cgroup, and all tasks are part of that cgroup. + * The dummy hierarchy, reserved for the subsystems that are otherwise + * unattached - it never has more than a single cgroup, and all tasks are + * part of that cgroup. */ -static struct cgroupfs_root rootnode; +static struct cgroupfs_root cgroup_dummy_root; + +/* dummy_top is a shorthand for the dummy hierarchy's top cgroup */ +static struct cgroup * const cgroup_dummy_top = &cgroup_dummy_root.top_cgroup; /* * cgroupfs file entry, pointed to from leaf dentry->d_fsdata. @@ -186,18 +186,28 @@ struct cgroup_event { /* The list of hierarchy roots */ -static LIST_HEAD(roots); -static int root_count; +static LIST_HEAD(cgroup_roots); +static int cgroup_root_count; -static DEFINE_IDA(hierarchy_ida); -static int next_hierarchy_id; -static DEFINE_SPINLOCK(hierarchy_id_lock); - -/* dummytop is a shorthand for the dummy hierarchy's top cgroup */ -#define dummytop (&rootnode.top_cgroup) +/* + * Hierarchy ID allocation and mapping. It follows the same exclusion + * rules as other root ops - both cgroup_mutex and cgroup_root_mutex for + * writes, either for reads. + */ +static DEFINE_IDR(cgroup_hierarchy_idr); static struct cgroup_name root_cgroup_name = { .name = "/" }; +/* + * Assign a monotonically increasing serial number to cgroups. It + * guarantees cgroups with bigger numbers are newer than those with smaller + * numbers. Also, as cgroups are always appended to the parent's + * ->children list, it guarantees that sibling cgroups are always sorted in + * the ascending serial number order on the list. Protected by + * cgroup_mutex. + */ +static u64 cgroup_serial_nr_next = 1; + /* This flag indicates whether tasks in the fork and exit paths should * check for fork/exit handlers to call. This avoids us having to do * extra work in the fork/exit path if none of the subsystems need to @@ -205,27 +215,15 @@ static struct cgroup_name root_cgroup_name = { .name = "/" }; */ static int need_forkexit_callback __read_mostly; +static void cgroup_offline_fn(struct work_struct *work); static int cgroup_destroy_locked(struct cgroup *cgrp); static int cgroup_addrm_files(struct cgroup *cgrp, struct cgroup_subsys *subsys, struct cftype cfts[], bool is_add); -static int css_unbias_refcnt(int refcnt) -{ - return refcnt >= 0 ? refcnt : refcnt - CSS_DEACT_BIAS; -} - -/* the current nr of refs, always >= 0 whether @css is deactivated or not */ -static int css_refcnt(struct cgroup_subsys_state *css) -{ - int v = atomic_read(&css->refcnt); - - return css_unbias_refcnt(v); -} - /* convenient tests for these bits */ -inline int cgroup_is_removed(const struct cgroup *cgrp) +static inline bool cgroup_is_dead(const struct cgroup *cgrp) { - return test_bit(CGRP_REMOVED, &cgrp->flags); + return test_bit(CGRP_DEAD, &cgrp->flags); } /** @@ -261,16 +259,38 @@ static int notify_on_release(const struct cgroup *cgrp) return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags); } -/* - * for_each_subsys() allows you to iterate on each subsystem attached to - * an active hierarchy +/** + * for_each_subsys - iterate all loaded cgroup subsystems + * @ss: the iteration cursor + * @i: the index of @ss, CGROUP_SUBSYS_COUNT after reaching the end + * + * Should be called under cgroup_mutex. */ -#define for_each_subsys(_root, _ss) \ -list_for_each_entry(_ss, &_root->subsys_list, sibling) +#define for_each_subsys(ss, i) \ + for ((i) = 0; (i) < CGROUP_SUBSYS_COUNT; (i)++) \ + if (({ lockdep_assert_held(&cgroup_mutex); \ + !((ss) = cgroup_subsys[i]); })) { } \ + else -/* for_each_active_root() allows you to iterate across the active hierarchies */ -#define for_each_active_root(_root) \ -list_for_each_entry(_root, &roots, root_list) +/** + * for_each_builtin_subsys - iterate all built-in cgroup subsystems + * @ss: the iteration cursor + * @i: the index of @ss, CGROUP_BUILTIN_SUBSYS_COUNT after reaching the end + * + * Bulit-in subsystems are always present and iteration itself doesn't + * require any synchronization. + */ +#define for_each_builtin_subsys(ss, i) \ + for ((i) = 0; (i) < CGROUP_BUILTIN_SUBSYS_COUNT && \ + (((ss) = cgroup_subsys[i]) || true); (i)++) + +/* iterate each subsystem attached to a hierarchy */ +#define for_each_root_subsys(root, ss) \ + list_for_each_entry((ss), &(root)->subsys_list, sibling) + +/* iterate across the active hierarchies */ +#define for_each_active_root(root) \ + list_for_each_entry((root), &cgroup_roots, root_list) static inline struct cgroup *__d_cgrp(struct dentry *dentry) { @@ -297,7 +317,7 @@ static inline struct cftype *__d_cft(struct dentry *dentry) static bool cgroup_lock_live_group(struct cgroup *cgrp) { mutex_lock(&cgroup_mutex); - if (cgroup_is_removed(cgrp)) { + if (cgroup_is_dead(cgrp)) { mutex_unlock(&cgroup_mutex); return false; } @@ -312,20 +332,24 @@ static void cgroup_release_agent(struct work_struct *work); static DECLARE_WORK(release_agent_work, cgroup_release_agent); static void check_for_release(struct cgroup *cgrp); -/* Link structure for associating css_set objects with cgroups */ -struct cg_cgroup_link { - /* - * List running through cg_cgroup_links associated with a - * cgroup, anchored on cgroup->css_sets - */ - struct list_head cgrp_link_list; - struct cgroup *cgrp; - /* - * List running through cg_cgroup_links pointing at a - * single css_set object, anchored on css_set->cg_links - */ - struct list_head cg_link_list; - struct css_set *cg; +/* + * A cgroup can be associated with multiple css_sets as different tasks may + * belong to different cgroups on different hierarchies. In the other + * direction, a css_set is naturally associated with multiple cgroups. + * This M:N relationship is represented by the following link structure + * which exists for each association and allows traversing the associations + * from both sides. + */ +struct cgrp_cset_link { + /* the cgroup and css_set this link associates */ + struct cgroup *cgrp; + struct css_set *cset; + + /* list of cgrp_cset_links anchored at cgrp->cset_links */ + struct list_head cset_link; + + /* list of cgrp_cset_links anchored at css_set->cgrp_links */ + struct list_head cgrp_link; }; /* The default css_set - used by init and its children prior to any @@ -336,7 +360,7 @@ struct cg_cgroup_link { */ static struct css_set init_css_set; -static struct cg_cgroup_link init_css_set_link; +static struct cgrp_cset_link init_cgrp_cset_link; static int cgroup_init_idr(struct cgroup_subsys *ss, struct cgroup_subsys_state *css); @@ -357,10 +381,11 @@ static DEFINE_HASHTABLE(css_set_table, CSS_SET_HASH_BITS); static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) { - int i; unsigned long key = 0UL; + struct cgroup_subsys *ss; + int i; - for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) + for_each_subsys(ss, i) key += (unsigned long)css[i]; key = (key >> 16) ^ key; @@ -373,90 +398,83 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[]) * compiled into their kernel but not actually in use */ static int use_task_css_set_links __read_mostly; -static void __put_css_set(struct css_set *cg, int taskexit) +static void __put_css_set(struct css_set *cset, int taskexit) { - struct cg_cgroup_link *link; - struct cg_cgroup_link *saved_link; + struct cgrp_cset_link *link, *tmp_link; + /* * Ensure that the refcount doesn't hit zero while any readers * can see it. Similar to atomic_dec_and_lock(), but for an * rwlock */ - if (atomic_add_unless(&cg->refcount, -1, 1)) + if (atomic_add_unless(&cset->refcount, -1, 1)) return; write_lock(&css_set_lock); - if (!atomic_dec_and_test(&cg->refcount)) { + if (!atomic_dec_and_test(&cset->refcount)) { write_unlock(&css_set_lock); return; } /* This css_set is dead. unlink it and release cgroup refcounts */ - hash_del(&cg->hlist); + hash_del(&cset->hlist); css_set_count--; - list_for_each_entry_safe(link, saved_link, &cg->cg_links, - cg_link_list) { + list_for_each_entry_safe(link, tmp_link, &cset->cgrp_links, cgrp_link) { struct cgroup *cgrp = link->cgrp; - list_del(&link->cg_link_list); - list_del(&link->cgrp_link_list); - /* - * We may not be holding cgroup_mutex, and if cgrp->count is - * dropped to 0 the cgroup can be destroyed at any time, hence - * rcu_read_lock is used to keep it alive. - */ - rcu_read_lock(); - if (atomic_dec_and_test(&cgrp->count) && - notify_on_release(cgrp)) { + list_del(&link->cset_link); + list_del(&link->cgrp_link); + + /* @cgrp can't go away while we're holding css_set_lock */ + if (list_empty(&cgrp->cset_links) && notify_on_release(cgrp)) { if (taskexit) set_bit(CGRP_RELEASABLE, &cgrp->flags); check_for_release(cgrp); } - rcu_read_unlock(); kfree(link); } write_unlock(&css_set_lock); - kfree_rcu(cg, rcu_head); |