aboutsummaryrefslogtreecommitdiff
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c116
1 files changed, 66 insertions, 50 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 290792a13e3..28d227c5ca7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -80,7 +80,7 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
blkg->q = q;
INIT_LIST_HEAD(&blkg->q_node);
blkg->blkcg = blkcg;
- blkg->refcnt = 1;
+ atomic_set(&blkg->refcnt, 1);
/* root blkg uses @q->root_rl, init rl only for !root blkgs */
if (blkcg != &blkcg_root) {
@@ -185,7 +185,7 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
lockdep_assert_held(q->queue_lock);
/* blkg holds a reference to blkcg */
- if (!css_tryget(&blkcg->css)) {
+ if (!css_tryget_online(&blkcg->css)) {
ret = -EINVAL;
goto err_free_blkg;
}
@@ -235,8 +235,13 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
blkg->online = true;
spin_unlock(&blkcg->lock);
- if (!ret)
+ if (!ret) {
+ if (blkcg == &blkcg_root) {
+ q->root_blkg = blkg;
+ q->root_rl.blkg = blkg;
+ }
return blkg;
+ }
/* @blkg failed fully initialized, use the usual release path */
blkg_put(blkg);
@@ -331,10 +336,19 @@ static void blkg_destroy(struct blkcg_gq *blkg)
* under queue_lock. If it's not pointing to @blkg now, it never
* will. Hint assignment itself can race safely.
*/
- if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
+ if (rcu_access_pointer(blkcg->blkg_hint) == blkg)
rcu_assign_pointer(blkcg->blkg_hint, NULL);
/*
+ * If root blkg is destroyed. Just clear the pointer since root_rl
+ * does not take reference on root blkg.
+ */
+ if (blkcg == &blkcg_root) {
+ blkg->q->root_blkg = NULL;
+ blkg->q->root_rl.blkg = NULL;
+ }
+
+ /*
* Put the reference taken at the time of creation so that when all
* queues are gone, group can be destroyed.
*/
@@ -360,13 +374,6 @@ static void blkg_destroy_all(struct request_queue *q)
blkg_destroy(blkg);
spin_unlock(&blkcg->lock);
}
-
- /*
- * root blkg is destroyed. Just clear the pointer since
- * root_rl does not take reference on root blkg.
- */
- q->root_blkg = NULL;
- q->root_rl.blkg = NULL;
}
/*
@@ -392,11 +399,8 @@ void __blkg_release_rcu(struct rcu_head *rcu_head)
/* release the blkcg and parent blkg refs this blkg has been holding */
css_put(&blkg->blkcg->css);
- if (blkg->parent) {
- spin_lock_irq(blkg->q->queue_lock);
+ if (blkg->parent)
blkg_put(blkg->parent);
- spin_unlock_irq(blkg->q->queue_lock);
- }
blkg_free(blkg);
}
@@ -437,14 +441,27 @@ struct request_list *__blk_queue_next_rl(struct request_list *rl,
return &blkg->rl;
}
-static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype,
- u64 val)
+static int blkcg_reset_stats(struct cgroup_subsys_state *css,
+ struct cftype *cftype, u64 val)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+ struct blkcg *blkcg = css_to_blkcg(css);
struct blkcg_gq *blkg;
int i;
- mutex_lock(&blkcg_pol_mutex);
+ /*
+ * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex
+ * which ends up putting cgroup's internal cgroup_tree_mutex under
+ * it; however, cgroup_tree_mutex is nested above cgroup file
+ * active protection and grabbing blkcg_pol_mutex from a cgroup
+ * file operation creates a possible circular dependency. cgroup
+ * internal locking is planned to go through further simplification
+ * and this issue should go away soon. For now, let's trylock
+ * blkcg_pol_mutex and restart the write on failure.
+ *
+ * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com
+ */
+ if (!mutex_trylock(&blkcg_pol_mutex))
+ return restart_syscall();
spin_lock_irq(&blkcg->lock);
/*
@@ -614,15 +631,13 @@ u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off)
{
struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
- struct cgroup *pos_cgrp;
- u64 sum;
+ struct cgroup_subsys_state *pos_css;
+ u64 sum = 0;
lockdep_assert_held(pd->blkg->q->queue_lock);
- sum = blkg_stat_read((void *)pd + off);
-
rcu_read_lock();
- blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+ blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
struct blkg_stat *stat = (void *)pos_pd + off;
@@ -649,16 +664,14 @@ struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd,
{
struct blkcg_policy *pol = blkcg_policy[pd->plid];
struct blkcg_gq *pos_blkg;
- struct cgroup *pos_cgrp;
- struct blkg_rwstat sum;
+ struct cgroup_subsys_state *pos_css;
+ struct blkg_rwstat sum = { };
int i;
lockdep_assert_held(pd->blkg->q->queue_lock);
- sum = blkg_rwstat_read((void *)pd + off);
-
rcu_read_lock();
- blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) {
+ blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) {
struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol);
struct blkg_rwstat *rwstat = (void *)pos_pd + off;
struct blkg_rwstat tmp;
@@ -765,18 +778,18 @@ struct cftype blkcg_files[] = {
/**
* blkcg_css_offline - cgroup css_offline callback
- * @cgroup: cgroup of interest
+ * @css: css of interest
*
- * This function is called when @cgroup is about to go away and responsible
- * for shooting down all blkgs associated with @cgroup. blkgs should be
+ * This function is called when @css is about to go away and responsible
+ * for shooting down all blkgs associated with @css. blkgs should be
* removed while holding both q and blkcg locks. As blkcg lock is nested
* inside q lock, this function performs reverse double lock dancing.
*
* This is the blkcg counterpart of ioc_release_fn().
*/
-static void blkcg_css_offline(struct cgroup *cgroup)
+static void blkcg_css_offline(struct cgroup_subsys_state *css)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+ struct blkcg *blkcg = css_to_blkcg(css);
spin_lock_irq(&blkcg->lock);
@@ -798,21 +811,21 @@ static void blkcg_css_offline(struct cgroup *cgroup)
spin_unlock_irq(&blkcg->lock);
}
-static void blkcg_css_free(struct cgroup *cgroup)
+static void blkcg_css_free(struct cgroup_subsys_state *css)
{
- struct blkcg *blkcg = cgroup_to_blkcg(cgroup);
+ struct blkcg *blkcg = css_to_blkcg(css);
if (blkcg != &blkcg_root)
kfree(blkcg);
}
-static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup)
+static struct cgroup_subsys_state *
+blkcg_css_alloc(struct cgroup_subsys_state *parent_css)
{
static atomic64_t id_seq = ATOMIC64_INIT(0);
struct blkcg *blkcg;
- struct cgroup *parent = cgroup->parent;
- if (!parent) {
+ if (!parent_css) {
blkcg = &blkcg_root;
goto done;
}
@@ -859,6 +872,13 @@ void blkcg_drain_queue(struct request_queue *q)
{
lockdep_assert_held(q->queue_lock);
+ /*
+ * @q could be exiting and already have destroyed all blkgs as
+ * indicated by NULL root_blkg. If so, don't confuse policies.
+ */
+ if (!q->root_blkg)
+ return;
+
blk_throtl_drain(q);
}
@@ -883,14 +903,15 @@ void blkcg_exit_queue(struct request_queue *q)
* of the main cic data structures. For now we allow a task to change
* its cgroup only if it's the only owner of its ioc.
*/
-static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
+static int blkcg_can_attach(struct cgroup_subsys_state *css,
+ struct cgroup_taskset *tset)
{
struct task_struct *task;
struct io_context *ioc;
int ret = 0;
/* task_lock() is needed to avoid races with exit_io_context() */
- cgroup_taskset_for_each(task, cgrp, tset) {
+ cgroup_taskset_for_each(task, tset) {
task_lock(task);
ioc = task->io_context;
if (ioc && atomic_read(&ioc->nr_tasks) > 1)
@@ -902,17 +923,14 @@ static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset)
return ret;
}
-struct cgroup_subsys blkio_subsys = {
- .name = "blkio",
+struct cgroup_subsys blkio_cgrp_subsys = {
.css_alloc = blkcg_css_alloc,
.css_offline = blkcg_css_offline,
.css_free = blkcg_css_free,
.can_attach = blkcg_can_attach,
- .subsys_id = blkio_subsys_id,
.base_cftypes = blkcg_files,
- .module = THIS_MODULE,
};
-EXPORT_SYMBOL_GPL(blkio_subsys);
+EXPORT_SYMBOL_GPL(blkio_cgrp_subsys);
/**
* blkcg_activate_policy - activate a blkcg policy on a request_queue
@@ -973,8 +991,6 @@ int blkcg_activate_policy(struct request_queue *q,
ret = PTR_ERR(blkg);
goto out_unlock;
}
- q->root_blkg = blkg;
- q->root_rl.blkg = blkg;
list_for_each_entry(blkg, &q->blkg_list, q_node)
cnt++;
@@ -1104,7 +1120,7 @@ int blkcg_policy_register(struct blkcg_policy *pol)
/* everything is in place, add intf files for the new policy */
if (pol->cftypes)
- WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes));
+ WARN_ON(cgroup_add_cftypes(&blkio_cgrp_subsys, pol->cftypes));
ret = 0;
out_unlock:
mutex_unlock(&blkcg_pol_mutex);
@@ -1127,7 +1143,7 @@ void blkcg_policy_unregister(struct blkcg_policy *pol)
/* kill the intf files first */
if (pol->cftypes)
- cgroup_rm_cftypes(&blkio_subsys, pol->cftypes);
+ cgroup_rm_cftypes(pol->cftypes);
/* unregister and update blkgs */
blkcg_policy[pol->plid] = NULL;