diff options
Diffstat (limited to 'block/blk-cgroup.c')
| -rw-r--r-- | block/blk-cgroup.c | 518 |
1 files changed, 380 insertions, 138 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e7dee617358..28d227c5ca7 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -26,32 +26,12 @@ static DEFINE_MUTEX(blkcg_pol_mutex); -struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT }; +struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, + .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; -struct blkcg *cgroup_to_blkcg(struct cgroup *cgroup) -{ - return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), - struct blkcg, css); -} -EXPORT_SYMBOL_GPL(cgroup_to_blkcg); - -static struct blkcg *task_blkcg(struct task_struct *tsk) -{ - return container_of(task_subsys_state(tsk, blkio_subsys_id), - struct blkcg, css); -} - -struct blkcg *bio_blkcg(struct bio *bio) -{ - if (bio && bio->bi_css) - return container_of(bio->bi_css, struct blkcg, css); - return task_blkcg(current); -} -EXPORT_SYMBOL_GPL(bio_blkcg); - static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -71,19 +51,10 @@ static void blkg_free(struct blkcg_gq *blkg) if (!blkg) return; - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; - struct blkg_policy_data *pd = blkg->pd[i]; - - if (!pd) - continue; - - if (pol && pol->pd_exit_fn) - pol->pd_exit_fn(blkg); - - kfree(pd); - } + for (i = 0; i < BLKCG_MAX_POLS; i++) + kfree(blkg->pd[i]); + blk_exit_rl(&blkg->rl); kfree(blkg); } @@ -91,23 +62,32 @@ static void blkg_free(struct blkcg_gq *blkg) * blkg_alloc - allocate a blkg * @blkcg: block cgroup the new blkg is associated with * @q: request_queue the new blkg is associated with + * @gfp_mask: allocation mask to use * * Allocate a new blkg assocating @blkcg and @q. */ -static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) +static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, + gfp_t gfp_mask) { struct blkcg_gq *blkg; int i; /* alloc and init base part */ - blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node); + blkg = kzalloc_node(sizeof(*blkg), gfp_mask, q->node); if (!blkg) return NULL; blkg->q = q; INIT_LIST_HEAD(&blkg->q_node); blkg->blkcg = blkcg; - blkg->refcnt = 1; + atomic_set(&blkg->refcnt, 1); + + /* root blkg uses @q->root_rl, init rl only for !root blkgs */ + if (blkcg != &blkcg_root) { + if (blk_init_rl(&blkg->rl, q, gfp_mask)) + goto err_free; + blkg->rl.blkg = blkg; + } for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -117,25 +97,35 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q) continue; /* alloc per-policy data and attach it to blkg */ - pd = kzalloc_node(pol->pd_size, GFP_ATOMIC, q->node); - if (!pd) { - blkg_free(blkg); - return NULL; - } + pd = kzalloc_node(pol->pd_size, gfp_mask, q->node); + if (!pd) + goto err_free; blkg->pd[i] = pd; pd->blkg = blkg; - - /* invoke per-policy init */ - if (blkcg_policy_enabled(blkg->q, pol)) - pol->pd_init_fn(blkg); + pd->plid = i; } return blkg; + +err_free: + blkg_free(blkg); + return NULL; } -static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q) +/** + * __blkg_lookup - internal version of blkg_lookup() + * @blkcg: blkcg of interest + * @q: request_queue of interest + * @update_hint: whether to update lookup hint with the result or not + * + * This is internal version and shouldn't be used by policy + * implementations. Looks up blkgs for the @blkcg - @q pair regardless of + * @q's bypass state. If @update_hint is %true, the caller should be + * holding @q->queue_lock and lookup hint is updated on success. + */ +struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, + bool update_hint) { struct blkcg_gq *blkg; @@ -144,14 +134,19 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, return blkg; /* - * Hint didn't match. Look up from the radix tree. Note that we - * may not be holding queue_lock and thus are not sure whether - * @blkg from blkg_tree has already been removed or not, so we - * can't update hint to the lookup result. Leave it to the caller. + * Hint didn't match. Look up from the radix tree. Note that the + * hint can only be updated under queue_lock as otherwise @blkg + * could have already been removed from blkg_tree. The caller is + * responsible for grabbing queue_lock if @update_hint. */ blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); - if (blkg && blkg->q == q) + if (blkg && blkg->q == q) { + if (update_hint) { + lockdep_assert_held(q->queue_lock); + rcu_assign_pointer(blkcg->blkg_hint, blkg); + } return blkg; + } return NULL; } @@ -171,77 +166,151 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) if (unlikely(blk_queue_bypass(q))) return NULL; - return __blkg_lookup(blkcg, q); + return __blkg_lookup(blkcg, q, false); } EXPORT_SYMBOL_GPL(blkg_lookup); -static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q) - __releases(q->queue_lock) __acquires(q->queue_lock) +/* + * If @new_blkg is %NULL, this function tries to allocate a new one as + * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. + */ +static struct blkcg_gq *blkg_create(struct blkcg *blkcg, + struct request_queue *q, + struct blkcg_gq *new_blkg) { struct blkcg_gq *blkg; - int ret; + int i, ret; WARN_ON_ONCE(!rcu_read_lock_held()); lockdep_assert_held(q->queue_lock); - /* lookup and update hint on success, see __blkg_lookup() for details */ - blkg = __blkg_lookup(blkcg, q); - if (blkg) { - rcu_assign_pointer(blkcg->blkg_hint, blkg); - return blkg; - } - /* blkg holds a reference to blkcg */ - if (!css_tryget(&blkcg->css)) - return ERR_PTR(-EINVAL); + if (!css_tryget_online(&blkcg->css)) { + ret = -EINVAL; + goto err_free_blkg; + } /* allocate */ - ret = -ENOMEM; - blkg = blkg_alloc(blkcg, q); - if (unlikely(!blkg)) - goto err_put; + if (!new_blkg) { + new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); + if (unlikely(!new_blkg)) { + ret = -ENOMEM; + goto err_put_css; + } + } + blkg = new_blkg; - /* insert */ - ret = radix_tree_preload(GFP_ATOMIC); - if (ret) - goto err_free; + /* link parent */ + if (blkcg_parent(blkcg)) { + blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); + if (WARN_ON_ONCE(!blkg->parent)) { + ret = -EINVAL; + goto err_put_css; + } + blkg_get(blkg->parent); + } + + /* invoke per-policy init */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + if (blkg->pd[i] && pol->pd_init_fn) + pol->pd_init_fn(blkg); + } + + /* insert */ spin_lock(&blkcg->lock); ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); if (likely(!ret)) { hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); list_add(&blkg->q_node, &q->blkg_list); + + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_online_fn) + pol->pd_online_fn(blkg); + } } + blkg->online = true; spin_unlock(&blkcg->lock); - radix_tree_preload_end(); - - if (!ret) + if (!ret) { + if (blkcg == &blkcg_root) { + q->root_blkg = blkg; + q->root_rl.blkg = blkg; + } return blkg; -err_free: - blkg_free(blkg); -err_put: + } + + /* @blkg failed fully initialized, use the usual release path */ + blkg_put(blkg); + return ERR_PTR(ret); + +err_put_css: css_put(&blkcg->css); +err_free_blkg: + blkg_free(new_blkg); return ERR_PTR(ret); } +/** + * blkg_lookup_create - lookup blkg, try to create one if not there + * @blkcg: blkcg of interest + * @q: request_queue of interest + * + * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to + * create one. blkg creation is performed recursively from blkcg_root such + * that all non-root blkg's have access to the parent blkg. This function + * should be called under RCU read lock and @q->queue_lock. + * + * Returns pointer to the looked up or created blkg on success, ERR_PTR() + * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not + * dead and bypassing, returns ERR_PTR(-EBUSY). + */ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q) { + struct blkcg_gq *blkg; + + WARN_ON_ONCE(!rcu_read_lock_held()); + lockdep_assert_held(q->queue_lock); + /* * This could be the first entry point of blkcg implementation and * we shouldn't allow anything to go through for a bypassing queue. */ if (unlikely(blk_queue_bypass(q))) - return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); - return __blkg_lookup_create(blkcg, q); + return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); + + blkg = __blkg_lookup(blkcg, q, true); + if (blkg) + return blkg; + + /* + * Create blkgs walking down from blkcg_root to @blkcg, so that all + * non-root blkgs have access to their parents. + */ + while (true) { + struct blkcg *pos = blkcg; + struct blkcg *parent = blkcg_parent(blkcg); + + while (parent && !__blkg_lookup(parent, q, false)) { + pos = parent; + parent = blkcg_parent(parent); + } + + blkg = blkg_create(pos, q, NULL); + if (pos == blkcg || IS_ERR(blkg)) + return blkg; + } } EXPORT_SYMBOL_GPL(blkg_lookup_create); static void blkg_destroy(struct blkcg_gq *blkg) { struct blkcg *blkcg = blkg->blkcg; + int i; lockdep_assert_held(blkg->q->queue_lock); lockdep_assert_held(&blkcg->lock); @@ -250,6 +319,14 @@ static void blkg_destroy(struct blkcg_gq *blkg) WARN_ON_ONCE(list_empty(&blkg->q_node)); WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_offline_fn) + pol->pd_offline_fn(blkg); + } + blkg->online = false; + radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); list_del_init(&blkg->q_node); hlist_del_init_rcu(&blkg->blkcg_node); @@ -259,10 +336,19 @@ static void blkg_destroy(struct blkcg_gq *blkg) * under queue_lock. If it's not pointing to @blkg now, it never * will. Hint assignment itself can race safely. */ - if (rcu_dereference_raw(blkcg->blkg_hint) == blkg) + if (rcu_access_pointer(blkcg->blkg_hint) == blkg) rcu_assign_pointer(blkcg->blkg_hint, NULL); /* + * If root blkg is destroyed. Just clear the pointer since root_rl + * does not take reference on root blkg. + */ + if (blkcg == &blkcg_root) { + blkg->q->root_blkg = NULL; + blkg->q->root_rl.blkg = NULL; + } + + /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ @@ -290,38 +376,92 @@ static void blkg_destroy_all(struct request_queue *q) } } -static void blkg_rcu_free(struct rcu_head *rcu_head) +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +void __blkg_release_rcu(struct rcu_head *rcu_head) { - blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); + struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); + int i; + + /* tell policies that this one is being freed */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_exit_fn) + pol->pd_exit_fn(blkg); + } + + /* release the blkcg and parent blkg refs this blkg has been holding */ + css_put(&blkg->blkcg->css); + if (blkg->parent) + blkg_put(blkg->parent); + + blkg_free(blkg); } +EXPORT_SYMBOL_GPL(__blkg_release_rcu); -void __blkg_release(struct blkcg_gq *blkg) +/* + * The next function used by blk_queue_for_each_rl(). It's a bit tricky + * because the root blkg uses @q->root_rl instead of its own rl. + */ +struct request_list *__blk_queue_next_rl(struct request_list *rl, + struct request_queue *q) { - /* release the extra blkcg reference this blkg has been holding */ - css_put(&blkg->blkcg->css); + struct list_head *ent; + struct blkcg_gq *blkg; /* - * A group is freed in rcu manner. But having an rcu lock does not - * mean that one can access all the fields of blkg and assume these - * are valid. For example, don't try to follow throtl_data and - * request queue links. - * - * Having a reference to blkg under an rcu allows acess to only - * values local to groups like group stats and group rate limits + * Determine the current blkg list_head. The first entry is + * root_rl which is off @q->blkg_list and mapped to the head. */ - call_rcu(&blkg->rcu_head, blkg_rcu_free); + if (rl == &q->root_rl) { + ent = &q->blkg_list; + /* There are no more block groups, hence no request lists */ + if (list_empty(ent)) + return NULL; + } else { + blkg = container_of(rl, struct blkcg_gq, rl); + ent = &blkg->q_node; + } + + /* walk to the next list_head, skip root blkcg */ + ent = ent->next; + if (ent == &q->root_blkg->q_node) + ent = ent->next; + if (ent == &q->blkg_list) + return NULL; + + blkg = container_of(ent, struct blkcg_gq, q_node); + return &blkg->rl; } -EXPORT_SYMBOL_GPL(__blkg_release); -static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, - u64 val) +static int blkcg_reset_stats(struct cgroup_subsys_state *css, + struct cftype *cftype, u64 val) { - struct blkcg *blkcg = cgroup_to_blkcg(cgroup); + struct blkcg *blkcg = css_to_blkcg(css); struct blkcg_gq *blkg; - struct hlist_node *n; int i; - mutex_lock(&blkcg_pol_mutex); + /* + * XXX: We invoke cgroup_add/rm_cftypes() under blkcg_pol_mutex + * which ends up putting cgroup's internal cgroup_tree_mutex under + * it; however, cgroup_tree_mutex is nested above cgroup file + * active protection and grabbing blkcg_pol_mutex from a cgroup + * file operation creates a possible circular dependency. cgroup + * internal locking is planned to go through further simplification + * and this issue should go away soon. For now, let's trylock + * blkcg_pol_mutex and restart the write on failure. + * + * http://lkml.kernel.org/g/5363C04B.4010400@oracle.com + */ + if (!mutex_trylock(&blkcg_pol_mutex)) + return restart_syscall(); spin_lock_irq(&blkcg->lock); /* @@ -329,7 +469,7 @@ static int blkcg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, * stat updates. This is a debug feature which shouldn't exist * anyway. If you get hit by a race, retry. */ - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { + hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) { for (i = 0; i < BLKCG_MAX_POLS; i++) { struct blkcg_policy *pol = blkcg_policy[i]; @@ -363,8 +503,9 @@ static const char *blkg_dev_name(struct blkcg_gq *blkg) * * This function invokes @prfill on each blkg of @blkcg if pd for the * policy specified by @pol exists. @prfill is invoked with @sf, the - * policy data and @data. If @show_total is %true, the sum of the return - * values from @prfill is printed with "Total" label at the end. + * policy data and @data and the matching queue lock held. If @show_total + * is %true, the sum of the return values from @prfill is printed with + * "Total" label at the end. * * This is to be used to construct print functions for * cftype->read_seq_string method. @@ -376,14 +517,16 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, bool show_total) { struct blkcg_gq *blkg; - struct hlist_node *n; u64 total = 0; - spin_lock_irq(&blkcg->lock); - hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) + rcu_read_lock(); + hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { + spin_lock_irq(blkg->q->queue_lock); if (blkcg_policy_enabled(blkg->q, pol)) total += prfill(sf, blkg->pd[pol->plid], data); - spin_unlock_irq(&blkcg->lock); + spin_unlock_irq(blkg->q->queue_lock); + } + rcu_read_unlock(); if (show_total) seq_printf(sf, "Total %llu\n", (unsigned long long)total); @@ -442,6 +585,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); return v; } +EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); /** * blkg_prfill_stat - prfill callback for blkg_stat @@ -475,6 +619,78 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); /** + * blkg_stat_recursive_sum - collect hierarchical blkg_stat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_stat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup_subsys_state *pos_css; + u64 sum = 0; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_stat *stat = (void *)pos_pd + off; + + if (pos_blkg->online) + sum += blkg_stat_read(stat); + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); + +/** + * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat + * @pd: policy private data of interest + * @off: offset to the blkg_stat in @pd + * + * Collect the blkg_rwstat specified by @off from @pd and all its online + * descendants and return the sum. The caller must be holding the queue + * lock for online tests. + */ +struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, + int off) +{ + struct blkcg_policy *pol = blkcg_policy[pd->plid]; + struct blkcg_gq *pos_blkg; + struct cgroup_subsys_state *pos_css; + struct blkg_rwstat sum = { }; + int i; + + lockdep_assert_held(pd->blkg->q->queue_lock); + + rcu_read_lock(); + blkg_for_each_descendant_pre(pos_blkg, pos_css, pd_to_blkg(pd)) { + struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); + struct blkg_rwstat *rwstat = (void *)pos_pd + off; + struct blkg_rwstat tmp; + + if (!pos_blkg->online) + continue; + + tmp = blkg_rwstat_read(rwstat); + + for (i = 0; i < BLKG_RWSTAT_NR; i++) + sum.cnt[i] += tmp.cnt[i]; + } + rcu_read_unlock(); + + return sum; +} +EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); + +/** * blkg_conf_prep - parse and prepare for per-blkg config update * @blkcg: target block cgroup * @pol: target policy @@ -561,19 +777,19 @@ struct cftype blkcg_files[] = { }; /** - * blkcg_pre_destroy - cgroup pre_destroy callback - * @cgroup: cgroup of interest + * blkcg_css_offline - cgroup css_offline callback + * @css: css of interest * - * This function is called when @cgroup is about to go away and responsible - * for shooting down all blkgs associated with @cgroup. blkgs should be + * This function is called when @css is about to go away and responsible + * for shooting down all blkgs associated with @css. blkgs should be * removed while holding both q and blkcg locks. As blkcg lock is nested * inside q lock, this function performs reverse double lock dancing. * * This is the blkcg counterpart of ioc_release_fn(). */ -static int blkcg_pre_destroy(struct cgroup *cgroup) +static void blkcg_css_offline(struct cgroup_subsys_state *css) { - struct blkcg *blkcg = cgroup_to_blkcg(cgroup); + struct blkcg *blkcg = css_to_blkcg(css); spin_lock_irq(&blkcg->lock); @@ -593,24 +809,23 @@ static int blkcg_pre_destroy(struct cgroup *cgroup) } spin_unlock_irq(&blkcg->lock); - return 0; } -static void blkcg_destroy(struct cgroup *cgroup) +static void blkcg_css_free(struct cgroup_subsys_state *css) { - struct blkcg *blkcg = cgroup_to_blkcg(cgroup); + struct blkcg *blkcg = css_to_blkcg(css); if (blkcg != &blkcg_root) kfree(blkcg); } -static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup) +static struct cgroup_subsys_state * +blkcg_css_alloc(struct cgroup_subsys_state *parent_css) { static atomic64_t id_seq = ATOMIC64_INIT(0); struct blkcg *blkcg; - struct cgroup *parent = cgroup->parent; - if (!parent) { + if (!parent_css) { blkcg = &blkcg_root; goto done; } @@ -620,6 +835,7 @@ static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup) return ERR_PTR(-ENOMEM); blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; + blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ done: spin_lock_init(&blkcg->lock); @@ -656,6 +872,13 @@ void blkcg_drain_queue(struct request_queue *q) { lockdep_assert_held(q->queue_lock); + /* + * @q could be exiting and already have destroyed all blkgs as + * indicated by NULL root_blkg. If so, don't confuse policies. + */ + if (!q->root_blkg) + return; + blk_throtl_drain(q); } @@ -680,14 +903,15 @@ void blkcg_exit_queue(struct request_queue *q) * of the main cic data structures. For now we allow a task to change * its cgroup only if it's the only owner of its ioc. */ -static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) +static int blkcg_can_attach(struct cgroup_subsys_state *css, + struct cgroup_taskset *tset) { struct task_struct *task; struct io_context *ioc; int ret = 0; /* task_lock() is needed to avoid races with exit_io_context() */ - cgroup_taskset_for_each(task, cgrp, tset) { + cgroup_taskset_for_each(task, tset) { task_lock(task); ioc = task->io_context; if (ioc && atomic_read(&ioc->nr_tasks) > 1) @@ -699,17 +923,14 @@ static int blkcg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) return ret; } -struct cgroup_subsys blkio_subsys = { - .name = "blkio", - .create = blkcg_create, +struct cgroup_subsys blkio_cgrp_subsys = { + .css_alloc = blkcg_css_alloc, + .css_offline = blkcg_css_offline, + .css_free = blkcg_css_free, .can_attach = blkcg_can_attach, - .pre_destroy = blkcg_pre_destroy, - .destroy = blkcg_destroy, - .subsys_id = blkio_subsys_id, .base_cftypes = blkcg_files, - .module = THIS_MODULE, }; -EXPORT_SYMBOL_GPL(blkio_subsys); +EXPORT_SYMBOL_GPL(blkio_cgrp_subsys); /** * blkcg_activate_policy - activate a blkcg policy on a request_queue @@ -731,27 +952,45 @@ int blkcg_activate_policy(struct request_queue *q, const struct blkcg_policy *pol) { LIST_HEAD(pds); - struct blkcg_gq *blkg; + struct blkcg_gq *blkg, *new_blkg; struct blkg_policy_data *pd, *n; int cnt = 0, ret; + bool preloaded; if (blkcg_policy_enabled(q, pol)) return 0; + /* preallocations for root blkg */ + new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); + if (!new_blkg) + return -ENOMEM; + blk_queue_bypass_start(q); - /* make sure the root blkg exists and count the existing blkgs */ + preloaded = !radix_tree_preload(GFP_KERNEL); + + /* + * Make sure the root blkg exists and count the existing blkgs. As + * @q is bypassing at this point, blkg_lookup_create() can't be + * used. Open code it. + */ spin_lock_irq(q->queue_lock); rcu_read_lock(); - blkg = __blkg_lookup_create(&blkcg_root, q); + blkg = __blkg_lookup(&blkcg_root, q, false); + if (blkg) + blkg_free(new_blkg); + else + blkg = blkg_create(&blkcg_root, q, new_blkg); rcu_read_unlock(); + if (preloaded) + radix_tree_preload_end(); + if (IS_ERR(blkg)) { ret = PTR_ERR(blkg); goto out_unlock; } - q->root_blkg = blkg; list_for_each_entry(blkg, &q->blkg_list, q_node) cnt++; @@ -788,6 +1027,7 @@ int blkcg_activate_policy(struct request_queue *q, blkg->pd[pol->plid] = pd; pd->blkg = blkg; + pd->plid = pol->plid; pol->pd_init_fn(blkg); spin_unlock(&blkg->blkcg->lock); @@ -834,6 +1074,8 @@ void blkcg_deactivate_policy(struct request_queue *q, /* grab blkcg lock too while removing @pd from @blkg */ spin_lock(&blkg->blkcg->lock); + if (pol->pd_offline_fn) + pol->pd_offline_fn(blkg); if (pol->pd_exit_fn) pol->pd_exit_fn(blkg); @@ -878,7 +1120,7 @@ int blkcg_policy_register(struct blkcg_policy *pol) /* everything is in place, add intf files for the new policy */ if (pol->cftypes) - WARN_ON(cgroup_add_cftypes(&blkio_subsys, pol->cftypes)); + WARN_ON(cgroup_add_cftypes(&blkio_cgrp_subsys, pol->cftypes)); ret = 0; out_unlock: mutex_unlock(&blkcg_pol_mutex); @@ -901,7 +1143,7 @@ void blkcg_policy_unregister(struct blkcg_policy *pol) /* kill the intf files first */ if (pol->cftypes) - cgroup_rm_cftypes(&blkio_subsys, pol->cftypes); + cgroup_rm_cftypes(pol->cftypes); /* unregister and update blkgs */ blkcg_policy[pol->plid] = NULL; |
