diff options
Diffstat (limited to 'kernel/cgroup_freezer.c')
| -rw-r--r-- | kernel/cgroup_freezer.c | 590 |
1 files changed, 345 insertions, 245 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index fc0646b78a6..a79e40f9d70 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -21,115 +21,69 @@ #include <linux/uaccess.h> #include <linux/freezer.h> #include <linux/seq_file.h> +#include <linux/mutex.h> -enum freezer_state { - CGROUP_THAWED = 0, - CGROUP_FREEZING, - CGROUP_FROZEN, +/* + * A cgroup is freezing if any FREEZING flags are set. FREEZING_SELF is + * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared + * for "THAWED". FREEZING_PARENT is set if the parent freezer is FREEZING + * for whatever reason. IOW, a cgroup has FREEZING_PARENT set if one of + * its ancestors has FREEZING_SELF set. + */ +enum freezer_state_flags { + CGROUP_FREEZER_ONLINE = (1 << 0), /* freezer is fully online */ + CGROUP_FREEZING_SELF = (1 << 1), /* this freezer is freezing */ + CGROUP_FREEZING_PARENT = (1 << 2), /* the parent freezer is freezing */ + CGROUP_FROZEN = (1 << 3), /* this and its descendants frozen */ + + /* mask for all FREEZING flags */ + CGROUP_FREEZING = CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT, }; struct freezer { - struct cgroup_subsys_state css; - enum freezer_state state; - spinlock_t lock; /* protects _writes_ to state */ + struct cgroup_subsys_state css; + unsigned int state; }; -static inline struct freezer *cgroup_freezer( - struct cgroup *cgroup) +static DEFINE_MUTEX(freezer_mutex); + +static inline struct freezer *css_freezer(struct cgroup_subsys_state *css) { - return container_of( - cgroup_subsys_state(cgroup, freezer_subsys_id), - struct freezer, css); + return css ? container_of(css, struct freezer, css) : NULL; } static inline struct freezer *task_freezer(struct task_struct *task) { - return container_of(task_subsys_state(task, freezer_subsys_id), - struct freezer, css); + return css_freezer(task_css(task, freezer_cgrp_id)); +} + +static struct freezer *parent_freezer(struct freezer *freezer) +{ + return css_freezer(freezer->css.parent); } bool cgroup_freezing(struct task_struct *task) { - enum freezer_state state; bool ret; rcu_read_lock(); - state = task_freezer(task)->state; - ret = state == CGROUP_FREEZING || state == CGROUP_FROZEN; + ret = task_freezer(task)->state & CGROUP_FREEZING; rcu_read_unlock(); return ret; } -/* - * cgroups_write_string() limits the size of freezer state strings to - * CGROUP_LOCAL_BUFFER_SIZE - */ -static const char *freezer_state_strs[] = { - "THAWED", - "FREEZING", - "FROZEN", +static const char *freezer_state_strs(unsigned int state) +{ + if (state & CGROUP_FROZEN) + return "FROZEN"; + if (state & CGROUP_FREEZING) + return "FREEZING"; + return "THAWED"; }; -/* - * State diagram - * Transitions are caused by userspace writes to the freezer.state file. - * The values in parenthesis are state labels. The rest are edge labels. - * - * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) - * ^ ^ | | - * | \_______THAWED_______/ | - * \__________________________THAWED____________/ - */ - -struct cgroup_subsys freezer_subsys; - -/* Locks taken and their ordering - * ------------------------------ - * cgroup_mutex (AKA cgroup_lock) - * freezer->lock - * css_set_lock - * task->alloc_lock (AKA task_lock) - * task->sighand->siglock - * - * cgroup code forces css_set_lock to be taken before task->alloc_lock - * - * freezer_create(), freezer_destroy(): - * cgroup_mutex [ by cgroup core ] - * - * freezer_can_attach(): - * cgroup_mutex (held by caller of can_attach) - * - * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): - * freezer->lock - * sighand->siglock (if the cgroup is freezing) - * - * freezer_read(): - * cgroup_mutex - * freezer->lock - * write_lock css_set_lock (cgroup iterator start) - * task->alloc_lock - * read_lock css_set_lock (cgroup iterator start) - * - * freezer_write() (freeze): - * cgroup_mutex - * freezer->lock - * write_lock css_set_lock (cgroup iterator start) - * task->alloc_lock - * read_lock css_set_lock (cgroup iterator start) - * sighand->siglock (fake signal delivery inside freeze_task()) - * - * freezer_write() (unfreeze): - * cgroup_mutex - * freezer->lock - * write_lock css_set_lock (cgroup iterator start) - * task->alloc_lock - * read_lock css_set_lock (cgroup iterator start) - * task->alloc_lock (inside __thaw_task(), prevents race with refrigerator()) - * sighand->siglock - */ -static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, - struct cgroup *cgroup) +static struct cgroup_subsys_state * +freezer_css_alloc(struct cgroup_subsys_state *parent_css) { struct freezer *freezer; @@ -137,248 +91,394 @@ static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, if (!freezer) return ERR_PTR(-ENOMEM); - spin_lock_init(&freezer->lock); - freezer->state = CGROUP_THAWED; return &freezer->css; } -static void freezer_destroy(struct cgroup_subsys *ss, - struct cgroup *cgroup) +/** + * freezer_css_online - commit creation of a freezer css + * @css: css being created + * + * We're committing to creation of @css. Mark it online and inherit + * parent's freezing state while holding both parent's and our + * freezer->lock. + */ +static int freezer_css_online(struct cgroup_subsys_state *css) { - struct freezer *freezer = cgroup_freezer(cgroup); + struct freezer *freezer = css_freezer(css); + struct freezer *parent = parent_freezer(freezer); + + mutex_lock(&freezer_mutex); + + freezer->state |= CGROUP_FREEZER_ONLINE; - if (freezer->state != CGROUP_THAWED) + if (parent && (parent->state & CGROUP_FREEZING)) { + freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; + atomic_inc(&system_freezing_cnt); + } + + mutex_unlock(&freezer_mutex); + return 0; +} + +/** + * freezer_css_offline - initiate destruction of a freezer css + * @css: css being destroyed + * + * @css is going away. Mark it dead and decrement system_freezing_count if + * it was holding one. + */ +static void freezer_css_offline(struct cgroup_subsys_state *css) +{ + struct freezer *freezer = css_freezer(css); + + mutex_lock(&freezer_mutex); + + if (freezer->state & CGROUP_FREEZING) atomic_dec(&system_freezing_cnt); - kfree(freezer); + + freezer->state = 0; + + mutex_unlock(&freezer_mutex); } -/* task is frozen or will freeze immediately when next it gets woken */ -static bool is_task_frozen_enough(struct task_struct *task) +static void freezer_css_free(struct cgroup_subsys_state *css) { - return frozen(task) || - (task_is_stopped_or_traced(task) && freezing(task)); + kfree(css_freezer(css)); } /* - * The call to cgroup_lock() in the freezer.state write method prevents - * a write to that file racing against an attach, and hence the - * can_attach() result will remain valid until the attach completes. + * Tasks can be migrated into a different freezer anytime regardless of its + * current state. freezer_attach() is responsible for making new tasks + * conform to the current state. + * + * Freezer state changes and task migration are synchronized via + * @freezer->lock. freezer_attach() makes the new tasks conform to the + * current state and all following state changes can see the new tasks. */ -static int freezer_can_attach(struct cgroup_subsys *ss, - struct cgroup *new_cgroup, - struct cgroup_taskset *tset) +static void freezer_attach(struct cgroup_subsys_state *new_css, + struct cgroup_taskset *tset) { - struct freezer *freezer; + struct freezer *freezer = css_freezer(new_css); struct task_struct *task; + bool clear_frozen = false; + + mutex_lock(&freezer_mutex); /* - * Anything frozen can't move or be moved to/from. + * Make the new tasks conform to the current state of @new_css. + * For simplicity, when migrating any task to a FROZEN cgroup, we + * revert it to FREEZING and let update_if_frozen() determine the + * correct state later. + * + * Tasks in @tset are on @new_css but may not conform to its + * current state before executing the following - !frozen tasks may + * be visible in a FROZEN cgroup and frozen tasks in a THAWED one. */ - cgroup_taskset_for_each(task, new_cgroup, tset) - if (cgroup_freezing(task)) - return -EBUSY; + cgroup_taskset_for_each(task, tset) { + if (!(freezer->state & CGROUP_FREEZING)) { + __thaw_task(task); + } else { + freeze_task(task); + freezer->state &= ~CGROUP_FROZEN; + clear_frozen = true; + } + } - freezer = cgroup_freezer(new_cgroup); - if (freezer->state != CGROUP_THAWED) - return -EBUSY; + /* propagate FROZEN clearing upwards */ + while (clear_frozen && (freezer = parent_freezer(freezer))) { + freezer->state &= ~CGROUP_FROZEN; + clear_frozen = freezer->state & CGROUP_FREEZING; + } - return 0; + mutex_unlock(&freezer_mutex); } -static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) +/** + * freezer_fork - cgroup post fork callback + * @task: a task which has just been forked + * + * @task has just been created and should conform to the current state of + * the cgroup_freezer it belongs to. This function may race against + * freezer_attach(). Losing to freezer_attach() means that we don't have + * to do anything as freezer_attach() will put @task into the appropriate + * state. + */ +static void freezer_fork(struct task_struct *task) { struct freezer *freezer; /* - * No lock is needed, since the task isn't on tasklist yet, - * so it can't be moved to another cgroup, which means the - * freezer won't be removed and will be valid during this - * function call. Nevertheless, apply RCU read-side critical - * section to suppress RCU lockdep false positives. - */ - rcu_read_lock(); - freezer = task_freezer(task); - rcu_read_unlock(); - - /* - * The root cgroup is non-freezable, so we can skip the - * following check. + * The root cgroup is non-freezable, so we can skip locking the + * freezer. This is safe regardless of race with task migration. + * If we didn't race or won, skipping is obviously the right thing + * to do. If we lost and root is the new cgroup, noop is still the + * right thing to do. */ - if (!freezer->css.cgroup->parent) + if (task_css_is_root(task, freezer_cgrp_id)) return; - spin_lock_irq(&freezer->lock); - BUG_ON(freezer->state == CGROUP_FROZEN); + mutex_lock(&freezer_mutex); + rcu_read_lock(); - /* Locking avoids race with FREEZING -> THAWED transitions. */ - if (freezer->state == CGROUP_FREEZING) + freezer = task_freezer(task); + if (freezer->state & CGROUP_FREEZING) freeze_task(task); - spin_unlock_irq(&freezer->lock); + + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); } -/* - * caller must hold freezer->lock +/** + * update_if_frozen - update whether a cgroup finished freezing + * @css: css of interest + * + * Once FREEZING is initiated, transition to FROZEN is lazily updated by + * calling this function. If the current state is FREEZING but not FROZEN, + * this function checks whether all tasks of this cgroup and the descendant + * cgroups finished freezing and, if so, sets FROZEN. + * + * The caller is responsible for grabbing RCU read lock and calling + * update_if_frozen() on all descendants prior to invoking this function. + * + * Task states and freezer state might disagree while tasks are being + * migrated into or out of @css, so we can't verify task states against + * @freezer state here. See freezer_attach() for details. */ -static void update_if_frozen(struct cgroup *cgroup, - struct freezer *freezer) +static void update_if_frozen(struct cgroup_subsys_state *css) { - struct cgroup_iter it; + struct freezer *freezer = css_freezer(css); + struct cgroup_subsys_state *pos; + struct css_task_iter it; struct task_struct *task; - unsigned int nfrozen = 0, ntotal = 0; - enum freezer_state old_state = freezer->state; - - cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - ntotal++; - if (freezing(task) && is_task_frozen_enough(task)) - nfrozen++; + + lockdep_assert_held(&freezer_mutex); + + if (!(freezer->state & CGROUP_FREEZING) || + (freezer->state & CGROUP_FROZEN)) + return; + + /* are all (live) children frozen? */ + rcu_read_lock(); + css_for_each_child(pos, css) { + struct freezer *child = css_freezer(pos); + + if ((child->state & CGROUP_FREEZER_ONLINE) && + !(child->state & CGROUP_FROZEN)) { + rcu_read_unlock(); + return; + } } + rcu_read_unlock(); - if (old_state == CGROUP_THAWED) { - BUG_ON(nfrozen > 0); - } else if (old_state == CGROUP_FREEZING) { - if (nfrozen == ntotal) - freezer->state = CGROUP_FROZEN; - } else { /* old_state == CGROUP_FROZEN */ - BUG_ON(nfrozen != ntotal); + /* are all tasks frozen? */ + css_task_iter_start(css, &it); + + while ((task = css_task_iter_next(&it))) { + if (freezing(task)) { + /* + * freezer_should_skip() indicates that the task + * should be skipped when determining freezing + * completion. Consider it frozen in addition to + * the usual frozen condition. + */ + if (!frozen(task) && !freezer_should_skip(task)) + goto out_iter_end; + } } - cgroup_iter_end(cgroup, &it); + freezer->state |= CGROUP_FROZEN; +out_iter_end: + css_task_iter_end(&it); } -static int freezer_read(struct cgroup *cgroup, struct cftype *cft, - struct seq_file *m) +static int freezer_read(struct seq_file *m, void *v) { - struct freezer *freezer; - enum freezer_state state; - - if (!cgroup_lock_live_group(cgroup)) - return -ENODEV; - - freezer = cgroup_freezer(cgroup); - spin_lock_irq(&freezer->lock); - state = freezer->state; - if (state == CGROUP_FREEZING) { - /* We change from FREEZING to FROZEN lazily if the cgroup was - * only partially frozen when we exitted write. */ - update_if_frozen(cgroup, freezer); - state = freezer->state; + struct cgroup_subsys_state *css = seq_css(m), *pos; + + mutex_lock(&freezer_mutex); + rcu_read_lock(); + + /* update states bottom-up */ + css_for_each_descendant_post(pos, css) { + if (!css_tryget_online(pos)) + continue; + rcu_read_unlock(); + + update_if_frozen(pos); + + rcu_read_lock(); + css_put(pos); } - spin_unlock_irq(&freezer->lock); - cgroup_unlock(); - seq_puts(m, freezer_state_strs[state]); + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); + + seq_puts(m, freezer_state_strs(css_freezer(css)->state)); seq_putc(m, '\n'); return 0; } -static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void freeze_cgroup(struct freezer *freezer) { - struct cgroup_iter it; + struct css_task_iter it; struct task_struct *task; - unsigned int num_cant_freeze_now = 0; - - cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) { - if (!freeze_task(task)) - continue; - if (is_task_frozen_enough(task)) - continue; - if (!freezing(task) && !freezer_should_skip(task)) - num_cant_freeze_now++; - } - cgroup_iter_end(cgroup, &it); - return num_cant_freeze_now ? -EBUSY : 0; + css_task_iter_start(&freezer->css, &it); + while ((task = css_task_iter_next(&it))) + freeze_task(task); + css_task_iter_end(&it); } -static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void unfreeze_cgroup(struct freezer *freezer) { - struct cgroup_iter it; + struct css_task_iter it; struct task_struct *task; - cgroup_iter_start(cgroup, &it); - while ((task = cgroup_iter_next(cgroup, &it))) + css_task_iter_start(&freezer->css, &it); + while ((task = css_task_iter_next(&it))) __thaw_task(task); - cgroup_iter_end(cgroup, &it); + css_task_iter_end(&it); } -static int freezer_change_state(struct cgroup *cgroup, - enum freezer_state goal_state) +/** + * freezer_apply_state - apply state change to a single cgroup_freezer + * @freezer: freezer to apply state change to + * @freeze: whether to freeze or unfreeze + * @state: CGROUP_FREEZING_* flag to set or clear + * + * Set or clear @state on @cgroup according to @freeze, and perform + * freezing or thawing as necessary. + */ +static void freezer_apply_state(struct freezer *freezer, bool freeze, + unsigned int state) { - struct freezer *freezer; - int retval = 0; - - freezer = cgroup_freezer(cgroup); - - spin_lock_irq(&freezer->lock); + /* also synchronizes against task migration, see freezer_attach() */ + lockdep_assert_held(&freezer_mutex); - update_if_frozen(cgroup, freezer); + if (!(freezer->state & CGROUP_FREEZER_ONLINE)) + return; - switch (goal_state) { - case CGROUP_THAWED: - if (freezer->state != CGROUP_THAWED) - atomic_dec(&system_freezing_cnt); - freezer->state = CGROUP_THAWED; - unfreeze_cgroup(cgroup, freezer); - break; - case CGROUP_FROZEN: - if (freezer->state == CGROUP_THAWED) + if (freeze) { + if (!(freezer->state & CGROUP_FREEZING)) atomic_inc(&system_freezing_cnt); - freezer->state = CGROUP_FREEZING; - retval = try_to_freeze_cgroup(cgroup, freezer); - break; - default: - BUG(); + freezer->state |= state; + freeze_cgroup(freezer); + } else { + bool was_freezing = freezer->state & CGROUP_FREEZING; + + freezer->state &= ~state; + + if (!(freezer->state & CGROUP_FREEZING)) { + if (was_freezing) + atomic_dec(&system_freezing_cnt); + freezer->state &= ~CGROUP_FROZEN; + unfreeze_cgroup(freezer); + } } +} - spin_unlock_irq(&freezer->lock); +/** + * freezer_change_state - change the freezing state of a cgroup_freezer + * @freezer: freezer of interest + * @freeze: whether to freeze or thaw + * + * Freeze or thaw @freezer according to @freeze. The operations are + * recursive - all descendants of @freezer will be affected. + */ +static void freezer_change_state(struct freezer *freezer, bool freeze) +{ + struct cgroup_subsys_state *pos; + + /* + * Update all its descendants in pre-order traversal. Each + * descendant will try to inherit its parent's FREEZING state as + * CGROUP_FREEZING_PARENT. + */ + mutex_lock(&freezer_mutex); + rcu_read_lock(); + css_for_each_descendant_pre(pos, &freezer->css) { + struct freezer *pos_f = css_freezer(pos); + struct freezer *parent = parent_freezer(pos_f); - return retval; + if (!css_tryget_online(pos)) + continue; + rcu_read_unlock(); + + if (pos_f == freezer) + freezer_apply_state(pos_f, freeze, + CGROUP_FREEZING_SELF); + else + freezer_apply_state(pos_f, + parent->state & CGROUP_FREEZING, + CGROUP_FREEZING_PARENT); + + rcu_read_lock(); + css_put(pos); + } + rcu_read_unlock(); + mutex_unlock(&freezer_mutex); } -static int freezer_write(struct cgroup *cgroup, - struct cftype *cft, - const char *buffer) +static ssize_t freezer_write(struct kernfs_open_file *of, + char *buf, size_t nbytes, loff_t off) { - int retval; - enum freezer_state goal_state; + bool freeze; + + buf = strstrip(buf); - if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0) - goal_state = CGROUP_THAWED; - else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) - goal_state = CGROUP_FROZEN; + if (strcmp(buf, freezer_state_strs(0)) == 0) + freeze = false; + else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) + freeze = true; else return -EINVAL; - if (!cgroup_lock_live_group(cgroup)) - return -ENODEV; - retval = freezer_change_state(cgroup, goal_state); - cgroup_unlock(); - return retval; + freezer_change_state(css_freezer(of_css(of)), freeze); + return nbytes; +} + +static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct freezer *freezer = css_freezer(css); + + return (bool)(freezer->state & CGROUP_FREEZING_SELF); +} + +static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, + struct cftype *cft) +{ + struct freezer *freezer = css_freezer(css); + + return (bool)(freezer->state & CGROUP_FREEZING_PARENT); } static struct cftype files[] = { { .name = "state", - .read_seq_string = freezer_read, - .write_string = freezer_write, + .flags = CFTYPE_NOT_ON_ROOT, + .seq_show = freezer_read, + .write = freezer_write, }, + { + .name = "self_freezing", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = freezer_self_freezing_read, + }, + { + .name = "parent_freezing", + .flags = CFTYPE_NOT_ON_ROOT, + .read_u64 = freezer_parent_freezing_read, + }, + { } /* terminate */ }; -static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) -{ - if (!cgroup->parent) - return 0; - return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); -} - -struct cgroup_subsys freezer_subsys = { - .name = "freezer", - .create = freezer_create, - .destroy = freezer_destroy, - .populate = freezer_populate, - .subsys_id = freezer_subsys_id, - .can_attach = freezer_can_attach, +struct cgroup_subsys freezer_cgrp_subsys = { + .css_alloc = freezer_css_alloc, + .css_online = freezer_css_online, + .css_offline = freezer_css_offline, + .css_free = freezer_css_free, + .attach = freezer_attach, .fork = freezer_fork, + .base_cftypes = files, }; |
