diff options
Diffstat (limited to 'kernel/cgroup_freezer.c')
| -rw-r--r-- | kernel/cgroup_freezer.c | 622 |
1 files changed, 355 insertions, 267 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e7bebb7c6c3..a79e40f9d70 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -14,383 +14,471 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/slab.h> #include <linux/cgroup.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/freezer.h> #include <linux/seq_file.h> +#include <linux/mutex.h> -enum freezer_state { - CGROUP_THAWED = 0, - CGROUP_FREEZING, - CGROUP_FROZEN, +/* + * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is + * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared + * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING + * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of + * its ancestors has FREEZING_SELF set. + */ +enum freezer_state_flags { + CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ + CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ + CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ + CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ + + /* mask for all FREEZING flags */ + CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, }; struct freezer { - struct cgroup_subsys_state css; - enum freezer_state state; - spinlock_t lock; /* protects _writes_ to state */ + struct cgroup_subsys_state css; + unsigned int state; }; -static inline struct freezer *cgroup_freezer( - struct cgroup *cgroup) +static DEFINE_MUTEX(freezer_mutex); + +static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) { - return container_of( - cgroup_subsys_state(cgroup, freezer_subsys_id), - struct freezer, css); + return css ? container_of(css, struct freezer, css) : NULL; } static inline struct freezer *task_freezer(struct task_struct *task) { - return container_of(task_subsys_state(task, freezer_subsys_id), - struct freezer, css); + return css_freezer(task_css(task, freezer_cgrp_id)); } -static inline int __cgroup_freezing_or_frozen(struct task_struct *task) +static struct freezer *parent_freezer(struct freezer *freezer) { - enum freezer_state state = task_freezer(task)->state; - return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); + return css_freezer(freezer->css.parent); } -int cgroup_freezing_or_frozen(struct task_struct *task) +bool cgroup_freezing(struct task_struct *task) { - int result; - task_lock(task); - result = __cgroup_freezing_or_frozen(task); - task_unlock(task); - return result; + bool ret; + + rcu_read_lock(); + ret = task_freezer(task)->state & CGROUP_FREEZING; + rcu_read_unlock(); + + return ret; } -/* - * cgroups_write_string() limits the size of freezer state strings to - * CGROUP_LOCAL_BUFFER_SIZE - */ -static const char *freezer_state_strs[] = { - "THAWED", - "FREEZING", - "FROZEN", +static const char *freezer_state_strs(unsigned int state) +{ + if (state & CGROUP_FROZEN) + return "FROZEN"; + if (state & CGROUP_FREEZING) + return "FREEZING"; + return "THAWED"; }; -/* - * State diagram - * Transitions are caused by userspace writes to the freezer.state file. - * The values in parenthesis are state labels. The rest are edge labels. +static struct cgroup_subsys_state * +freezer_css_alloc(struct cgroup_subsys_state *parent_css) +{ + struct freezer *freezer; + + freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); + if (!freezer) + return ERR_PTR(-ENOMEM); + + return &freezer->css; +} + +/** + * freezer_css_online - commit creation of a freezer css + * @css: css being created * - * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) - * ^ ^ | | - * | \_______THAWED_______/ | - * \__________________________THAWED____________/ + * We're committing to creation of @css. Mark it online and inherit + * parent's freezing state while holding both parent's and our + * freezer->lock. */ +static int freezer_css_online(struct cgroup_subsys_state *css) +{ + struct freezer *freezer = css_freezer(css); + struct freezer *parent = parent_freezer(freezer); -struct cgroup_subsys freezer_subsys; + mutex_lock(&freezer_mutex); -/* Locks taken and their ordering - * ------------------------------ - * cgroup_mutex (AKA cgroup_lock) - * freezer->lock - * css_set_lock - * task->alloc_lock (AKA task_lock) - * task->sighand->siglock - * - * cgroup code forces css_set_lock to be taken before task->alloc_lock - * - * freezer_create(), freezer_destroy(): - * cgroup_mutex [ by cgroup core ] - * - * freezer_can_attach(): - * cgroup_mutex (held by caller of can_attach) - * - * cgroup_freezing_or_frozen(): - * task->alloc_lock (to get task's cgroup) - * - * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): - * freezer->lock - * sighand->siglock (if the cgroup is freezing) - * - * freezer_read(): - * cgroup_mutex - * freezer->lock - * write_lock css_set_lock (cgroup iterator start) - * task->alloc_lock - * read_lock css_set_lock (cgroup iterator start) - * - * freezer_write() (freeze): - * cgroup_mutex - * freezer->lock - * write_lock css_set_lock (cgroup iterator start) - * task->alloc_lock - * read_lock css_set_lock (cgroup iterator start) - * sighand->siglock (fake signal delivery inside freeze_task()) + freezer->state |= CGROUP_FREEZER_ONLINE; + + if (parent && (parent->state & CGROUP_FREEZING)) { + freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; + atomic_inc(&system_freezing_cnt); + } + + mutex_unlock(&freezer_mutex); + return 0; +} + +/** + * freezer_css_offline - initiate destruction of a freezer css + * @css: css being destroyed * - * freezer_write() (unfreeze): - * cgroup_mutex - * freezer->lock - * write_lock css_set_lock (cgroup iterator start) - * task->alloc_lock - * read_lock css_set_lock (cgroup iterator start) - * task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) - * sighand->siglock + * @css is going away. Mark it dead and decrement system_freezing_count if + * it was holding one. */ -static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void freezer_css_offline(struct cgroup_subsys_state *css) { - struct freezer *freezer; + struct freezer *freezer = css_freezer(css); - freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); - if (!freezer) - return ERR_PTR(-ENOMEM); + mutex_lock(&freezer_mutex); - spin_lock_init(&freezer->lock); - freezer->state = CGROUP_THAWED; - return &freezer->css; + if (freezer->state & CGROUP_FREEZING) + atomic_dec(&system_freezing_cnt); + + freezer->state = 0; + + mutex_unlock(&freezer_mutex); } -static void freezer_destroy(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static void freezer_css_free(struct cgroup_subsys_state *css) { - kfree(cgroup_freezer(cgroup)); + kfree(css_freezer(css)); } /* - * The call to cgroup_lock() in the freezer.state write method prevents - * a write to that file racing against an attach, and hence the - * can_attach() result will remain valid until the attach completes. + * Tasks can be migrated into a different freezer anytime regardless of its + * current state. freezer_attach() is responsible for making new tasks + * conform to the current state. + * + * Freezer state changes and task migration are synchronized via + * @freezer->lock. freezer_attach() makes the new tasks conform to the + * current state and all following state changes can see the new tasks. */ -static int freezer_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, - struct task_struct *task, bool threadgroup) +static void freezer_attach(struct cgroup_subsys_state *new_css, + struct cgroup_taskset *tset) { - struct freezer *freezer; + struct freezer *freezer = css_freezer(new_css); + struct task_struct *task; + bool clear_frozen = false; + + mutex_lock(&freezer_mutex); /* - * Anything frozen can't move or be moved to/from. + * Make the new tasks conform to the current state of @new_css. + * For simplicity, when migrating any task to a FROZEN cgroup, we + * revert it to FREEZING and let update_if_frozen() determine the + * correct state later. + * + * Tasks in @tset are on @new_css but may not conform to its + * current state before executing the following - !frozen tasks may + * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. */ - - freezer = cgroup_freezer(new_cgroup); - if (freezer->state != CGROUP_THAWED) - return -EBUSY; - - rcu_read_lock(); - if (__cgroup_freezing_or_frozen(task)) { - rcu_read_unlock(); - return -EBUSY; + cgroup_taskset_for_each(task, tset) { + if (!(freezer->state & CGROUP_FREEZING)) { + __thaw_task(task); + } else { + freeze_task(task); + freezer->state &= ~CGROUP_FROZEN; + clear_frozen = true; + } } - rcu_read_unlock(); - - if (threadgroup) { - struct task_struct *c; - rcu_read_lock(); - list_for_each_entry_rcu(c, &task->thread_group, thread_group) { - if (__cgroup_freezing_or_frozen(c)) { - rcu_read_unlock(); - return -EBUSY; - } - } - rcu_read_unlock(); + /* propagate FROZEN clearing upwards */ + while (clear_frozen && (freezer = parent_freezer(freezer))) { + freezer->state &= ~CGROUP_FROZEN; + clear_frozen = freezer->state & CGROUP_FREEZING; } - return 0; + mutex_unlock(&freezer_mutex); } -static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) +/** + * freezer_fork - cgroup post fork callback + * @task: a task which has just been forked + * + * @task has just been created and should conform to the current state of + * the cgroup_freezer it belongs to. This function may race against + * freezer_attach(). Losing to freezer_attach() means that we don't have + * to do anything as freezer_attach() will put @task into the appropriate + * state. + */ +static void freezer_fork(struct task_struct *task) { struct freezer *freezer; /* - * No lock is needed, since the task isn't on tasklist yet, - * so it can't be moved to another cgroup, which means the - * freezer won't be removed and will be valid during this - * function call. Nevertheless, apply RCU read-side critical - * section to suppress RCU lockdep false positives. + * The root cgroup is non-freezable, so we can skip locking the + * freezer. This is safe regardless of race with task migration. + * If we didn't race or won, skipping is obviously the right thing + * to do. If we lost and root is the new cgroup, noop is still the + * right thing to do. */ - rcu_read_lock(); - freezer = task_freezer(task); - rcu_read_unlock(); - - /* - * The root cgroup is non-freezable, so we can skip the - * following check. - */ - if (!freezer->css.cgroup->parent) + if (task_css_is_root(task, freezer_cgrp_id)) return; - spin_lock_irq(&freezer->lock); - BUG_ON(freezer->state == CGROUP_FROZEN); + mutex_lock(&freezer_mutex); + rcu_read_lock(); + + freezer = task_freezer(task); + if (freezer->state & CGROUP_FREEZING) + freeze_task(task); - /* Locking avoids race with FREEZING -> THAWED transitions. */ - if (freezer->state == CGROUP_FREEZING) - freeze_task(task, true); - spin_unlock_irq(&freezer->lock); + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); } -/* - * caller must hold freezer->lock +/** + * update_if_frozen - update whether a cgroup finished freezing + * @css: css of interest + * + * Once FREEZING is initiated, transition to FROZEN is lazily updated by + * calling this function. If the current state is FREEZING but not FROZEN, + * this function checks whether all tasks of this cgroup and the descendant + * cgroups finished freezing and, if so, sets FROZEN. + * + * The caller is responsible for grabbing RCU read lock and calling + * update_if_frozen() on all descendants prior to invoking this function. + * + * Task states and freezer state might disagree while tasks are being + * migrated into or out of @css, so we can't verify task states against + * @freezer state here. See freezer_attach() for details. */ -static void update_if_frozen(struct cgroup *cgroup, - struct freezer *freezer) +static void update_if_frozen(struct cgroup_subsys_state *css) { - struct cgroup_iter it; + struct freezer *freezer = css_freezer(css); + struct cgroup_subsys_state *pos; + struct css_task_iter it; struct task_struct *task; - unsigned int nfrozen = 0, ntotal = 0; - enum freezer_state old_state = freezer->state; - - cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - ntotal++; - if (frozen(task)) - nfrozen++; + + lockdep_assert_held(&freezer_mutex); + + if (!(freezer->state & CGROUP_FREEZING) || + (freezer->state & CGROUP_FROZEN)) + return; + + /* are all (live) children frozen? */ + rcu_read_lock(); + css_for_each_child(pos, css) { + struct freezer *child = css_freezer(pos); + + if ((child->state & CGROUP_FREEZER_ONLINE) && + !(child->state & CGROUP_FROZEN)) { + rcu_read_unlock(); + return; + } } + rcu_read_unlock(); - if (old_state == CGROUP_THAWED) { - BUG_ON(nfrozen > 0); - } else if (old_state == CGROUP_FREEZING) { - if (nfrozen == ntotal) - freezer->state = CGROUP_FROZEN; - } else { /* old_state == CGROUP_FROZEN */ - BUG_ON(nfrozen != ntotal); + /* are all tasks frozen? */ + css_task_iter_start(css, &it); + + while ((task = css_task_iter_next(&it))) { + if (freezing(task)) { + /* + * freezer_should_skip() indicates that the task + * should be skipped when determining freezing + * completion. Consider it frozen in addition to + * the usual frozen condition. + */ + if (!frozen(task) && !freezer_should_skip(task)) + goto out_iter_end; + } } - cgroup_iter_end(cgroup, &it); + freezer->state |= CGROUP_FROZEN; +out_iter_end: + css_task_iter_end(&it); } -static int freezer_read(struct cgroup *cgroup, struct cftype *cft, - struct seq_file *m) +static int freezer_read(struct seq_file *m, void *v) { - struct freezer *freezer; - enum freezer_state state; - - if (!cgroup_lock_live_group(cgroup)) - return -ENODEV; - - freezer = cgroup_freezer(cgroup); - spin_lock_irq(&freezer->lock); - state = freezer->state; - if (state == CGROUP_FREEZING) { - /* We change from FREEZING to FROZEN lazily if the cgroup was - * only partially frozen when we exitted write. */ - update_if_frozen(cgroup, freezer); - state = freezer->state; + struct cgroup_subsys_state *css = seq_css(m), *pos; + + mutex_lock(&freezer_mutex); + rcu_read_lock(); + + /* update states bottom-up */ + css_for_each_descendant_post(pos, css) { + if (!css_tryget_online(pos)) + continue; + rcu_read_unlock(); + + update_if_frozen(pos); + + rcu_read_lock(); + css_put(pos); } - spin_unlock_irq(&freezer->lock); - cgroup_unlock(); - seq_puts(m, freezer_state_strs[state]); + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); + + seq_puts(m, freezer_state_strs(css_freezer(css)->state)); seq_putc(m, '\n'); return 0; } -static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void freeze_cgroup(struct freezer *freezer) { - struct cgroup_iter it; + struct css_task_iter it; struct task_struct *task; - unsigned int num_cant_freeze_now = 0; - freezer->state = CGROUP_FREEZING; - cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - if (!freeze_task(task, true)) - continue; - if (frozen(task)) - continue; - if (!freezing(task) && !freezer_should_skip(task)) - num_cant_freeze_now++; - } - cgroup_iter_end(cgroup, &it); - - return num_cant_freeze_now ? -EBUSY : 0; + css_task_iter_start(&freezer->css, &it); + while ((task = css_task_iter_next(&it))) + freeze_task(task); + css_task_iter_end(&it); } -static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void unfreeze_cgroup(struct freezer *freezer) { - struct cgroup_iter it; + struct css_task_iter it; struct task_struct *task; - cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - thaw_process(task); - } - cgroup_iter_end(cgroup, &it); + css_task_iter_start(&freezer->css, &it); + while ((task = css_task_iter_next(&it))) + __thaw_task(task); + css_task_iter_end(&it); +} - freezer->state = CGROUP_THAWED; +/** + * freezer_apply_state - apply state change to a single cgroup_freezer + * @freezer: freezer to apply state change to + * @freeze: whether to freeze or unfreeze + * @state: CGROUP_FREEZING_* flag to set or clear + * + * Set or clear @state on @cgroup according to @freeze, and perform + * freezing or thawing as necessary. + */ +static void freezer_apply_state(struct freezer *freezer, bool freeze, + unsigned int state) +{ + /* also synchronizes against task migration, see freezer_attach() */ + lockdep_assert_held(&freezer_mutex); + + if (!(freezer->state & CGROUP_FREEZER_ONLINE)) + return; + + if (freeze) { + if (!(freezer->state & CGROUP_FREEZING)) + atomic_inc(&system_freezing_cnt); + freezer->state |= state; + freeze_cgroup(freezer); + } else { + bool was_freezing = freezer->state & CGROUP_FREEZING; + + freezer->state &= ~state; + + if (!(freezer->state & CGROUP_FREEZING)) { + if (was_freezing) + atomic_dec(&system_freezing_cnt); + freezer->state &= ~CGROUP_FROZEN; + unfreeze_cgroup(freezer); + } + } } -static int freezer_change_state(struct cgroup *cgroup, - enum freezer_state goal_state) +/** + * freezer_change_state - change the freezing state of a cgroup_freezer + * @freezer: freezer of interest + * @freeze: whether to freeze or thaw + * + * Freeze or thaw @freezer according to @freeze. The operations are + * recursive - all descendants of @freezer will be affected. + */ +static void freezer_change_state(struct freezer *freezer, bool freeze) { - struct freezer *freezer; - int retval = 0; + struct cgroup_subsys_state *pos; - freezer = cgroup_freezer(cgroup); + /* + * Update all its descendants in pre-order traversal. Each + * descendant will try to inherit its parent's FREEZING state as + * CGROUP_FREEZING_PARENT. + */ + mutex_lock(&freezer_mutex); + rcu_read_lock(); + css_for_each_descendant_pre(pos, &freezer->css) { + struct freezer *pos_f = css_freezer(pos); + struct freezer *parent = parent_freezer(pos_f); - spin_lock_irq(&freezer->lock); + if (!css_tryget_online(pos)) + continue; + rcu_read_unlock(); - update_if_frozen(cgroup, freezer); - if (goal_state == freezer->state) - goto out; + if (pos_f == freezer) + freezer_apply_state(pos_f, freeze, + CGROUP_FREEZING_SELF); + else + freezer_apply_state(pos_f, + parent->state & CGROUP_FREEZING, + CGROUP_FREEZING_PARENT); - switch (goal_state) { - case CGROUP_THAWED: - unfreeze_cgroup(cgroup, freezer); - break; - case CGROUP_FROZEN: - retval = try_to_freeze_cgroup(cgroup, freezer); - break; - default: - BUG(); + rcu_read_lock(); + css_put(pos); } -out: - spin_unlock_irq(&freezer->lock); - - return retval; + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); } -static int freezer_write(struct cgroup *cgroup, - struct cftype *cft, - const char *buffer) +static ssize_t freezer_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - int retval; - enum freezer_state goal_state; + bool freeze; - if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0) - goal_state = CGROUP_THAWED; - else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) - goal_state = CGROUP_FROZEN; + buf = strstrip(buf); + + if (strcmp(buf, freezer_state_strs(0)) == 0) + freeze = false; + else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) + freeze = true; else return -EINVAL; - if (!cgroup_lock_live_group(cgroup)) - return -ENODEV; - retval = freezer_change_state(cgroup, goal_state); - cgroup_unlock(); - return retval; + freezer_change_state(css_freezer(of_css(of)), freeze); + return nbytes; +} + +static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct freezer *freezer = css_freezer(css); + + return (bool)(freezer->state & CGROUP_FREEZING_SELF); +} + +static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct freezer *freezer = css_freezer(css); + + return (bool)(freezer->state & CGROUP_FREEZING_PARENT); } static struct cftype files[] = { { .name = "state", - .read_seq_string = freezer_read, - .write_string = freezer_write, + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = freezer_read, + .write = freezer_write, + }, + { + .name = "self_freezing", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = freezer_self_freezing_read, + }, + { + .name = "parent_freezing", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = freezer_parent_freezing_read, }, + { } /* terminate */ }; -static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) -{ - if (!cgroup->parent) - return 0; - return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); -} - -struct cgroup_subsys freezer_subsys = { - .name = "freezer", - .create = freezer_create, - .destroy = freezer_destroy, - .populate = freezer_populate, - .subsys_id = freezer_subsys_id, - .can_attach = freezer_can_attach, - .attach = NULL, +struct cgroup_subsys freezer_cgrp_subsys = { + .css_alloc = freezer_css_alloc, + .css_online = freezer_css_online, + .css_offline = freezer_css_offline, + .css_free = freezer_css_free, + .attach = freezer_attach, .fork = freezer_fork, - .exit = NULL, + .base_cftypes = files, }; |
