diff options
Diffstat (limited to 'kernel/cgroup_freezer.c')
| -rw-r--r-- | kernel/cgroup_freezer.c | 622 | 
1 files changed, 355 insertions, 267 deletions
diff --git a/kernel/cgroup_freezer.c b/kernel/cgroup_freezer.c index e7bebb7c6c3..a79e40f9d70 100644 --- a/kernel/cgroup_freezer.c +++ b/kernel/cgroup_freezer.c @@ -14,383 +14,471 @@   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.   */ -#include <linux/module.h> +#include <linux/export.h>  #include <linux/slab.h>  #include <linux/cgroup.h>  #include <linux/fs.h>  #include <linux/uaccess.h>  #include <linux/freezer.h>  #include <linux/seq_file.h> +#include <linux/mutex.h> -enum freezer_state { -	CGROUP_THAWED = 0, -	CGROUP_FREEZING, -	CGROUP_FROZEN, +/* + * A cgroup is freezing if any FREEZING flags are set.  FREEZING_SELF is + * set if "FROZEN" is written to freezer.state cgroupfs file, and cleared + * for "THAWED".  FREEZING_PARENT is set if the parent freezer is FREEZING + * for whatever reason.  IOW, a cgroup has FREEZING_PARENT set if one of + * its ancestors has FREEZING_SELF set. + */ +enum freezer_state_flags { +	CGROUP_FREEZER_ONLINE	= (1 << 0), /* freezer is fully online */ +	CGROUP_FREEZING_SELF	= (1 << 1), /* this freezer is freezing */ +	CGROUP_FREEZING_PARENT	= (1 << 2), /* the parent freezer is freezing */ +	CGROUP_FROZEN		= (1 << 3), /* this and its descendants frozen */ + +	/* mask for all FREEZING flags */ +	CGROUP_FREEZING		= CGROUP_FREEZING_SELF | CGROUP_FREEZING_PARENT,  };  struct freezer { -	struct cgroup_subsys_state css; -	enum freezer_state state; -	spinlock_t lock; /* protects _writes_ to state */ +	struct cgroup_subsys_state	css; +	unsigned int			state;  }; -static inline struct freezer *cgroup_freezer( -		struct cgroup *cgroup) +static DEFINE_MUTEX(freezer_mutex); + +static inline struct freezer *css_freezer(struct cgroup_subsys_state *css)  { -	return container_of( -		cgroup_subsys_state(cgroup, freezer_subsys_id), -		struct freezer, css); +	return css ? container_of(css, struct freezer, css) : NULL;  }  static inline struct freezer *task_freezer(struct task_struct *task)  { -	return container_of(task_subsys_state(task, freezer_subsys_id), -			    struct freezer, css); +	return css_freezer(task_css(task, freezer_cgrp_id));  } -static inline int __cgroup_freezing_or_frozen(struct task_struct *task) +static struct freezer *parent_freezer(struct freezer *freezer)  { -	enum freezer_state state = task_freezer(task)->state; -	return (state == CGROUP_FREEZING) || (state == CGROUP_FROZEN); +	return css_freezer(freezer->css.parent);  } -int cgroup_freezing_or_frozen(struct task_struct *task) +bool cgroup_freezing(struct task_struct *task)  { -	int result; -	task_lock(task); -	result = __cgroup_freezing_or_frozen(task); -	task_unlock(task); -	return result; +	bool ret; + +	rcu_read_lock(); +	ret = task_freezer(task)->state & CGROUP_FREEZING; +	rcu_read_unlock(); + +	return ret;  } -/* - * cgroups_write_string() limits the size of freezer state strings to - * CGROUP_LOCAL_BUFFER_SIZE - */ -static const char *freezer_state_strs[] = { -	"THAWED", -	"FREEZING", -	"FROZEN", +static const char *freezer_state_strs(unsigned int state) +{ +	if (state & CGROUP_FROZEN) +		return "FROZEN"; +	if (state & CGROUP_FREEZING) +		return "FREEZING"; +	return "THAWED";  }; -/* - * State diagram - * Transitions are caused by userspace writes to the freezer.state file. - * The values in parenthesis are state labels. The rest are edge labels. +static struct cgroup_subsys_state * +freezer_css_alloc(struct cgroup_subsys_state *parent_css) +{ +	struct freezer *freezer; + +	freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); +	if (!freezer) +		return ERR_PTR(-ENOMEM); + +	return &freezer->css; +} + +/** + * freezer_css_online - commit creation of a freezer css + * @css: css being created   * - * (THAWED) --FROZEN--> (FREEZING) --FROZEN--> (FROZEN) - *    ^ ^                    |                     | - *    | \_______THAWED_______/                     | - *    \__________________________THAWED____________/ + * We're committing to creation of @css.  Mark it online and inherit + * parent's freezing state while holding both parent's and our + * freezer->lock.   */ +static int freezer_css_online(struct cgroup_subsys_state *css) +{ +	struct freezer *freezer = css_freezer(css); +	struct freezer *parent = parent_freezer(freezer); -struct cgroup_subsys freezer_subsys; +	mutex_lock(&freezer_mutex); -/* Locks taken and their ordering - * ------------------------------ - * cgroup_mutex (AKA cgroup_lock) - * freezer->lock - * css_set_lock - * task->alloc_lock (AKA task_lock) - * task->sighand->siglock - * - * cgroup code forces css_set_lock to be taken before task->alloc_lock - * - * freezer_create(), freezer_destroy(): - * cgroup_mutex [ by cgroup core ] - * - * freezer_can_attach(): - * cgroup_mutex (held by caller of can_attach) - * - * cgroup_freezing_or_frozen(): - * task->alloc_lock (to get task's cgroup) - * - * freezer_fork() (preserving fork() performance means can't take cgroup_mutex): - * freezer->lock - *  sighand->siglock (if the cgroup is freezing) - * - * freezer_read(): - * cgroup_mutex - *  freezer->lock - *   write_lock css_set_lock (cgroup iterator start) - *    task->alloc_lock - *   read_lock css_set_lock (cgroup iterator start) - * - * freezer_write() (freeze): - * cgroup_mutex - *  freezer->lock - *   write_lock css_set_lock (cgroup iterator start) - *    task->alloc_lock - *   read_lock css_set_lock (cgroup iterator start) - *    sighand->siglock (fake signal delivery inside freeze_task()) +	freezer->state |= CGROUP_FREEZER_ONLINE; + +	if (parent && (parent->state & CGROUP_FREEZING)) { +		freezer->state |= CGROUP_FREEZING_PARENT | CGROUP_FROZEN; +		atomic_inc(&system_freezing_cnt); +	} + +	mutex_unlock(&freezer_mutex); +	return 0; +} + +/** + * freezer_css_offline - initiate destruction of a freezer css + * @css: css being destroyed   * - * freezer_write() (unfreeze): - * cgroup_mutex - *  freezer->lock - *   write_lock css_set_lock (cgroup iterator start) - *    task->alloc_lock - *   read_lock css_set_lock (cgroup iterator start) - *    task->alloc_lock (inside thaw_process(), prevents race with refrigerator()) - *     sighand->siglock + * @css is going away.  Mark it dead and decrement system_freezing_count if + * it was holding one.   */ -static struct cgroup_subsys_state *freezer_create(struct cgroup_subsys *ss, -						  struct cgroup *cgroup) +static void freezer_css_offline(struct cgroup_subsys_state *css)  { -	struct freezer *freezer; +	struct freezer *freezer = css_freezer(css); -	freezer = kzalloc(sizeof(struct freezer), GFP_KERNEL); -	if (!freezer) -		return ERR_PTR(-ENOMEM); +	mutex_lock(&freezer_mutex); -	spin_lock_init(&freezer->lock); -	freezer->state = CGROUP_THAWED; -	return &freezer->css; +	if (freezer->state & CGROUP_FREEZING) +		atomic_dec(&system_freezing_cnt); + +	freezer->state = 0; + +	mutex_unlock(&freezer_mutex);  } -static void freezer_destroy(struct cgroup_subsys *ss, -			    struct cgroup *cgroup) +static void freezer_css_free(struct cgroup_subsys_state *css)  { -	kfree(cgroup_freezer(cgroup)); +	kfree(css_freezer(css));  }  /* - * The call to cgroup_lock() in the freezer.state write method prevents - * a write to that file racing against an attach, and hence the - * can_attach() result will remain valid until the attach completes. + * Tasks can be migrated into a different freezer anytime regardless of its + * current state.  freezer_attach() is responsible for making new tasks + * conform to the current state. + * + * Freezer state changes and task migration are synchronized via + * @freezer->lock.  freezer_attach() makes the new tasks conform to the + * current state and all following state changes can see the new tasks.   */ -static int freezer_can_attach(struct cgroup_subsys *ss, -			      struct cgroup *new_cgroup, -			      struct task_struct *task, bool threadgroup) +static void freezer_attach(struct cgroup_subsys_state *new_css, +			   struct cgroup_taskset *tset)  { -	struct freezer *freezer; +	struct freezer *freezer = css_freezer(new_css); +	struct task_struct *task; +	bool clear_frozen = false; + +	mutex_lock(&freezer_mutex);  	/* -	 * Anything frozen can't move or be moved to/from. +	 * Make the new tasks conform to the current state of @new_css. +	 * For simplicity, when migrating any task to a FROZEN cgroup, we +	 * revert it to FREEZING and let update_if_frozen() determine the +	 * correct state later. +	 * +	 * Tasks in @tset are on @new_css but may not conform to its +	 * current state before executing the following - !frozen tasks may +	 * be visible in a FROZEN cgroup and frozen tasks in a THAWED one.  	 */ - -	freezer = cgroup_freezer(new_cgroup); -	if (freezer->state != CGROUP_THAWED) -		return -EBUSY; - -	rcu_read_lock(); -	if (__cgroup_freezing_or_frozen(task)) { -		rcu_read_unlock(); -		return -EBUSY; +	cgroup_taskset_for_each(task, tset) { +		if (!(freezer->state & CGROUP_FREEZING)) { +			__thaw_task(task); +		} else { +			freeze_task(task); +			freezer->state &= ~CGROUP_FROZEN; +			clear_frozen = true; +		}  	} -	rcu_read_unlock(); - -	if (threadgroup) { -		struct task_struct *c; -		rcu_read_lock(); -		list_for_each_entry_rcu(c, &task->thread_group, thread_group) { -			if (__cgroup_freezing_or_frozen(c)) { -				rcu_read_unlock(); -				return -EBUSY; -			} -		} -		rcu_read_unlock(); +	/* propagate FROZEN clearing upwards */ +	while (clear_frozen && (freezer = parent_freezer(freezer))) { +		freezer->state &= ~CGROUP_FROZEN; +		clear_frozen = freezer->state & CGROUP_FREEZING;  	} -	return 0; +	mutex_unlock(&freezer_mutex);  } -static void freezer_fork(struct cgroup_subsys *ss, struct task_struct *task) +/** + * freezer_fork - cgroup post fork callback + * @task: a task which has just been forked + * + * @task has just been created and should conform to the current state of + * the cgroup_freezer it belongs to.  This function may race against + * freezer_attach().  Losing to freezer_attach() means that we don't have + * to do anything as freezer_attach() will put @task into the appropriate + * state. + */ +static void freezer_fork(struct task_struct *task)  {  	struct freezer *freezer;  	/* -	 * No lock is needed, since the task isn't on tasklist yet, -	 * so it can't be moved to another cgroup, which means the -	 * freezer won't be removed and will be valid during this -	 * function call.  Nevertheless, apply RCU read-side critical -	 * section to suppress RCU lockdep false positives. +	 * The root cgroup is non-freezable, so we can skip locking the +	 * freezer.  This is safe regardless of race with task migration. +	 * If we didn't race or won, skipping is obviously the right thing +	 * to do.  If we lost and root is the new cgroup, noop is still the +	 * right thing to do.  	 */ -	rcu_read_lock(); -	freezer = task_freezer(task); -	rcu_read_unlock(); - -	/* -	 * The root cgroup is non-freezable, so we can skip the -	 * following check. -	 */ -	if (!freezer->css.cgroup->parent) +	if (task_css_is_root(task, freezer_cgrp_id))  		return; -	spin_lock_irq(&freezer->lock); -	BUG_ON(freezer->state == CGROUP_FROZEN); +	mutex_lock(&freezer_mutex); +	rcu_read_lock(); + +	freezer = task_freezer(task); +	if (freezer->state & CGROUP_FREEZING) +		freeze_task(task); -	/* Locking avoids race with FREEZING -> THAWED transitions. */ -	if (freezer->state == CGROUP_FREEZING) -		freeze_task(task, true); -	spin_unlock_irq(&freezer->lock); +	rcu_read_unlock(); +	mutex_unlock(&freezer_mutex);  } -/* - * caller must hold freezer->lock +/** + * update_if_frozen - update whether a cgroup finished freezing + * @css: css of interest + * + * Once FREEZING is initiated, transition to FROZEN is lazily updated by + * calling this function.  If the current state is FREEZING but not FROZEN, + * this function checks whether all tasks of this cgroup and the descendant + * cgroups finished freezing and, if so, sets FROZEN. + * + * The caller is responsible for grabbing RCU read lock and calling + * update_if_frozen() on all descendants prior to invoking this function. + * + * Task states and freezer state might disagree while tasks are being + * migrated into or out of @css, so we can't verify task states against + * @freezer state here.  See freezer_attach() for details.   */ -static void update_if_frozen(struct cgroup *cgroup, -				 struct freezer *freezer) +static void update_if_frozen(struct cgroup_subsys_state *css)  { -	struct cgroup_iter it; +	struct freezer *freezer = css_freezer(css); +	struct cgroup_subsys_state *pos; +	struct css_task_iter it;  	struct task_struct *task; -	unsigned int nfrozen = 0, ntotal = 0; -	enum freezer_state old_state = freezer->state; - -	cgroup_iter_start(cgroup, &it); -	while ((task = cgroup_iter_next(cgroup, &it))) { -		ntotal++; -		if (frozen(task)) -			nfrozen++; + +	lockdep_assert_held(&freezer_mutex); + +	if (!(freezer->state & CGROUP_FREEZING) || +	    (freezer->state & CGROUP_FROZEN)) +		return; + +	/* are all (live) children frozen? */ +	rcu_read_lock(); +	css_for_each_child(pos, css) { +		struct freezer *child = css_freezer(pos); + +		if ((child->state & CGROUP_FREEZER_ONLINE) && +		    !(child->state & CGROUP_FROZEN)) { +			rcu_read_unlock(); +			return; +		}  	} +	rcu_read_unlock(); -	if (old_state == CGROUP_THAWED) { -		BUG_ON(nfrozen > 0); -	} else if (old_state == CGROUP_FREEZING) { -		if (nfrozen == ntotal) -			freezer->state = CGROUP_FROZEN; -	} else { /* old_state == CGROUP_FROZEN */ -		BUG_ON(nfrozen != ntotal); +	/* are all tasks frozen? */ +	css_task_iter_start(css, &it); + +	while ((task = css_task_iter_next(&it))) { +		if (freezing(task)) { +			/* +			 * freezer_should_skip() indicates that the task +			 * should be skipped when determining freezing +			 * completion.  Consider it frozen in addition to +			 * the usual frozen condition. +			 */ +			if (!frozen(task) && !freezer_should_skip(task)) +				goto out_iter_end; +		}  	} -	cgroup_iter_end(cgroup, &it); +	freezer->state |= CGROUP_FROZEN; +out_iter_end: +	css_task_iter_end(&it);  } -static int freezer_read(struct cgroup *cgroup, struct cftype *cft, -			struct seq_file *m) +static int freezer_read(struct seq_file *m, void *v)  { -	struct freezer *freezer; -	enum freezer_state state; - -	if (!cgroup_lock_live_group(cgroup)) -		return -ENODEV; - -	freezer = cgroup_freezer(cgroup); -	spin_lock_irq(&freezer->lock); -	state = freezer->state; -	if (state == CGROUP_FREEZING) { -		/* We change from FREEZING to FROZEN lazily if the cgroup was -		 * only partially frozen when we exitted write. */ -		update_if_frozen(cgroup, freezer); -		state = freezer->state; +	struct cgroup_subsys_state *css = seq_css(m), *pos; + +	mutex_lock(&freezer_mutex); +	rcu_read_lock(); + +	/* update states bottom-up */ +	css_for_each_descendant_post(pos, css) { +		if (!css_tryget_online(pos)) +			continue; +		rcu_read_unlock(); + +		update_if_frozen(pos); + +		rcu_read_lock(); +		css_put(pos);  	} -	spin_unlock_irq(&freezer->lock); -	cgroup_unlock(); -	seq_puts(m, freezer_state_strs[state]); +	rcu_read_unlock(); +	mutex_unlock(&freezer_mutex); + +	seq_puts(m, freezer_state_strs(css_freezer(css)->state));  	seq_putc(m, '\n');  	return 0;  } -static int try_to_freeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void freeze_cgroup(struct freezer *freezer)  { -	struct cgroup_iter it; +	struct css_task_iter it;  	struct task_struct *task; -	unsigned int num_cant_freeze_now = 0; -	freezer->state = CGROUP_FREEZING; -	cgroup_iter_start(cgroup, &it); -	while ((task = cgroup_iter_next(cgroup, &it))) { -		if (!freeze_task(task, true)) -			continue; -		if (frozen(task)) -			continue; -		if (!freezing(task) && !freezer_should_skip(task)) -			num_cant_freeze_now++; -	} -	cgroup_iter_end(cgroup, &it); - -	return num_cant_freeze_now ? -EBUSY : 0; +	css_task_iter_start(&freezer->css, &it); +	while ((task = css_task_iter_next(&it))) +		freeze_task(task); +	css_task_iter_end(&it);  } -static void unfreeze_cgroup(struct cgroup *cgroup, struct freezer *freezer) +static void unfreeze_cgroup(struct freezer *freezer)  { -	struct cgroup_iter it; +	struct css_task_iter it;  	struct task_struct *task; -	cgroup_iter_start(cgroup, &it); -	while ((task = cgroup_iter_next(cgroup, &it))) { -		thaw_process(task); -	} -	cgroup_iter_end(cgroup, &it); +	css_task_iter_start(&freezer->css, &it); +	while ((task = css_task_iter_next(&it))) +		__thaw_task(task); +	css_task_iter_end(&it); +} -	freezer->state = CGROUP_THAWED; +/** + * freezer_apply_state - apply state change to a single cgroup_freezer + * @freezer: freezer to apply state change to + * @freeze: whether to freeze or unfreeze + * @state: CGROUP_FREEZING_* flag to set or clear + * + * Set or clear @state on @cgroup according to @freeze, and perform + * freezing or thawing as necessary. + */ +static void freezer_apply_state(struct freezer *freezer, bool freeze, +				unsigned int state) +{ +	/* also synchronizes against task migration, see freezer_attach() */ +	lockdep_assert_held(&freezer_mutex); + +	if (!(freezer->state & CGROUP_FREEZER_ONLINE)) +		return; + +	if (freeze) { +		if (!(freezer->state & CGROUP_FREEZING)) +			atomic_inc(&system_freezing_cnt); +		freezer->state |= state; +		freeze_cgroup(freezer); +	} else { +		bool was_freezing = freezer->state & CGROUP_FREEZING; + +		freezer->state &= ~state; + +		if (!(freezer->state & CGROUP_FREEZING)) { +			if (was_freezing) +				atomic_dec(&system_freezing_cnt); +			freezer->state &= ~CGROUP_FROZEN; +			unfreeze_cgroup(freezer); +		} +	}  } -static int freezer_change_state(struct cgroup *cgroup, -				enum freezer_state goal_state) +/** + * freezer_change_state - change the freezing state of a cgroup_freezer + * @freezer: freezer of interest + * @freeze: whether to freeze or thaw + * + * Freeze or thaw @freezer according to @freeze.  The operations are + * recursive - all descendants of @freezer will be affected. + */ +static void freezer_change_state(struct freezer *freezer, bool freeze)  { -	struct freezer *freezer; -	int retval = 0; +	struct cgroup_subsys_state *pos; -	freezer = cgroup_freezer(cgroup); +	/* +	 * Update all its descendants in pre-order traversal.  Each +	 * descendant will try to inherit its parent's FREEZING state as +	 * CGROUP_FREEZING_PARENT. +	 */ +	mutex_lock(&freezer_mutex); +	rcu_read_lock(); +	css_for_each_descendant_pre(pos, &freezer->css) { +		struct freezer *pos_f = css_freezer(pos); +		struct freezer *parent = parent_freezer(pos_f); -	spin_lock_irq(&freezer->lock); +		if (!css_tryget_online(pos)) +			continue; +		rcu_read_unlock(); -	update_if_frozen(cgroup, freezer); -	if (goal_state == freezer->state) -		goto out; +		if (pos_f == freezer) +			freezer_apply_state(pos_f, freeze, +					    CGROUP_FREEZING_SELF); +		else +			freezer_apply_state(pos_f, +					    parent->state & CGROUP_FREEZING, +					    CGROUP_FREEZING_PARENT); -	switch (goal_state) { -	case CGROUP_THAWED: -		unfreeze_cgroup(cgroup, freezer); -		break; -	case CGROUP_FROZEN: -		retval = try_to_freeze_cgroup(cgroup, freezer); -		break; -	default: -		BUG(); +		rcu_read_lock(); +		css_put(pos);  	} -out: -	spin_unlock_irq(&freezer->lock); - -	return retval; +	rcu_read_unlock(); +	mutex_unlock(&freezer_mutex);  } -static int freezer_write(struct cgroup *cgroup, -			 struct cftype *cft, -			 const char *buffer) +static ssize_t freezer_write(struct kernfs_open_file *of, +			     char *buf, size_t nbytes, loff_t off)  { -	int retval; -	enum freezer_state goal_state; +	bool freeze; -	if (strcmp(buffer, freezer_state_strs[CGROUP_THAWED]) == 0) -		goal_state = CGROUP_THAWED; -	else if (strcmp(buffer, freezer_state_strs[CGROUP_FROZEN]) == 0) -		goal_state = CGROUP_FROZEN; +	buf = strstrip(buf); + +	if (strcmp(buf, freezer_state_strs(0)) == 0) +		freeze = false; +	else if (strcmp(buf, freezer_state_strs(CGROUP_FROZEN)) == 0) +		freeze = true;  	else  		return -EINVAL; -	if (!cgroup_lock_live_group(cgroup)) -		return -ENODEV; -	retval = freezer_change_state(cgroup, goal_state); -	cgroup_unlock(); -	return retval; +	freezer_change_state(css_freezer(of_css(of)), freeze); +	return nbytes; +} + +static u64 freezer_self_freezing_read(struct cgroup_subsys_state *css, +				      struct cftype *cft) +{ +	struct freezer *freezer = css_freezer(css); + +	return (bool)(freezer->state & CGROUP_FREEZING_SELF); +} + +static u64 freezer_parent_freezing_read(struct cgroup_subsys_state *css, +					struct cftype *cft) +{ +	struct freezer *freezer = css_freezer(css); + +	return (bool)(freezer->state & CGROUP_FREEZING_PARENT);  }  static struct cftype files[] = {  	{  		.name = "state", -		.read_seq_string = freezer_read, -		.write_string = freezer_write, +		.flags = CFTYPE_NOT_ON_ROOT, +		.seq_show = freezer_read, +		.write = freezer_write, +	}, +	{ +		.name = "self_freezing", +		.flags = CFTYPE_NOT_ON_ROOT, +		.read_u64 = freezer_self_freezing_read, +	}, +	{ +		.name = "parent_freezing", +		.flags = CFTYPE_NOT_ON_ROOT, +		.read_u64 = freezer_parent_freezing_read,  	}, +	{ }	/* terminate */  }; -static int freezer_populate(struct cgroup_subsys *ss, struct cgroup *cgroup) -{ -	if (!cgroup->parent) -		return 0; -	return cgroup_add_files(cgroup, ss, files, ARRAY_SIZE(files)); -} - -struct cgroup_subsys freezer_subsys = { -	.name		= "freezer", -	.create		= freezer_create, -	.destroy	= freezer_destroy, -	.populate	= freezer_populate, -	.subsys_id	= freezer_subsys_id, -	.can_attach	= freezer_can_attach, -	.attach		= NULL, +struct cgroup_subsys freezer_cgrp_subsys = { +	.css_alloc	= freezer_css_alloc, +	.css_online	= freezer_css_online, +	.css_offline	= freezer_css_offline, +	.css_free	= freezer_css_free, +	.attach		= freezer_attach,  	.fork		= freezer_fork, -	.exit		= NULL, +	.base_cftypes	= files,  };  | 
