diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-11 13:03:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-11 13:03:24 -0700 |
commit | 36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b (patch) | |
tree | 5565132549a0733772b3a2ac6b5cda516ea8cdce /block/blk-cgroup.c | |
parent | 6d2fa9e141ea56a571ec842fd4f3a86bea44a203 (diff) | |
parent | d50235b7bc3ee0a0427984d763ea7534149531b4 (diff) |
Merge branch 'for-3.11/core' of git://git.kernel.dk/linux-block
Pull core block IO updates from Jens Axboe:
"Here are the core IO block bits for 3.11. It contains:
- A tweak to the reserved tag logic from Jan, for weirdo devices with
just 3 free tags. But for those it improves things substantially
for random writes.
- Periodic writeback fix from Jan. Marked for stable as well.
- Fix for a race condition in IO scheduler switching from Jianpeng.
- The hierarchical blk-cgroup support from Tejun. This is the grunt
of the series.
- blk-throttle fix from Vivek.
Just a note that I'm in the middle of a relocation, whole family is
flying out tomorrow. Hence I will be awal the remainder of this week,
but back at work again on Monday the 15th. CC'ing Tejun, since any
potential "surprises" will most likely be from the blk-cgroup work.
But it's been brewing for a while and sitting in my tree and
linux-next for a long time, so should be solid."
* 'for-3.11/core' of git://git.kernel.dk/linux-block: (36 commits)
elevator: Fix a race in elevator switching
block: Reserve only one queue tag for sync IO if only 3 tags are available
writeback: Fix periodic writeback after fs mount
blk-throttle: implement proper hierarchy support
blk-throttle: implement throtl_grp->has_rules[]
blk-throttle: Account for child group's start time in parent while bio climbs up
blk-throttle: add throtl_qnode for dispatch fairness
blk-throttle: make throtl_pending_timer_fn() ready for hierarchy
blk-throttle: make tg_dispatch_one_bio() ready for hierarchy
blk-throttle: make blk_throtl_bio() ready for hierarchy
blk-throttle: make blk_throtl_drain() ready for hierarchy
blk-throttle: dispatch from throtl_pending_timer_fn()
blk-throttle: implement dispatch looping
blk-throttle: separate out throtl_service_queue->pending_timer from throtl_data->dispatch_work
blk-throttle: set REQ_THROTTLED from throtl_charge_bio() and gate stats update with it
blk-throttle: implement sq_to_tg(), sq_to_td() and throtl_log()
blk-throttle: add throtl_service_queue->parent_sq
blk-throttle: generalize update_disptime optimization in blk_throtl_bio()
blk-throttle: dispatch to throtl_data->service_queue.bio_lists[]
blk-throttle: move bio_lists[] and friends to throtl_service_queue
...
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 105 |
1 files changed, 40 insertions, 65 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e8918ffaf96..290792a13e3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root); static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; -static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, bool update_hint); - -/** - * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants - * @d_blkg: loop cursor pointing to the current descendant - * @pos_cgrp: used for iteration - * @p_blkg: target blkg to walk descendants of - * - * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU - * read locked. If called under either blkcg or queue lock, the iteration - * is guaranteed to include all and only online blkgs. The caller may - * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip - * subtree. - */ -#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ - cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ - if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ - (p_blkg)->q, false))) - static bool blkcg_policy_enabled(struct request_queue *q, const struct blkcg_policy *pol) { @@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg) if (!blkg) return; - for (i = 0; i < BLKCG_MAX_POLS; i++) { - struct blkcg_policy *pol = blkcg_policy[i]; - struct blkg_policy_data *pd = blkg->pd[i]; - - if (!pd) - continue; - - if (pol && pol->pd_exit_fn) - pol->pd_exit_fn(blkg); - - kfree(pd); - } + for (i = 0; i < BLKCG_MAX_POLS; i++) + kfree(blkg->pd[i]); blk_exit_rl(&blkg->rl); kfree(blkg); @@ -134,10 +104,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, blkg->pd[i] = pd; pd->blkg = blkg; pd->plid = i; - - /* invoke per-policy init */ - if (pol->pd_init_fn) - pol->pd_init_fn(blkg); } return blkg; @@ -158,8 +124,8 @@ err_free: * @q's bypass state. If @update_hint is %true, the caller should be * holding @q->queue_lock and lookup hint is updated on success. */ -static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, - struct request_queue *q, bool update_hint) +struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, + bool update_hint) { struct blkcg_gq *blkg; @@ -234,16 +200,25 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, } blkg = new_blkg; - /* link parent and insert */ + /* link parent */ if (blkcg_parent(blkcg)) { blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); if (WARN_ON_ONCE(!blkg->parent)) { - blkg = ERR_PTR(-EINVAL); + ret = -EINVAL; goto err_put_css; } blkg_get(blkg->parent); } + /* invoke per-policy init */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_init_fn) + pol->pd_init_fn(blkg); + } + + /* insert */ spin_lock(&blkcg->lock); ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); if (likely(!ret)) { @@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q) q->root_rl.blkg = NULL; } -static void blkg_rcu_free(struct rcu_head *rcu_head) +/* + * A group is RCU protected, but having an rcu lock does not mean that one + * can access all the fields of blkg and assume these are valid. For + * example, don't try to follow throtl_data and request queue links. + * + * Having a reference to blkg under an rcu allows accesses to only values + * local to groups like group stats and group rate limits. + */ +void __blkg_release_rcu(struct rcu_head *rcu_head) { - blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); -} + struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); + int i; + + /* tell policies that this one is being freed */ + for (i = 0; i < BLKCG_MAX_POLS; i++) { + struct blkcg_policy *pol = blkcg_policy[i]; + + if (blkg->pd[i] && pol->pd_exit_fn) + pol->pd_exit_fn(blkg); + } -void __blkg_release(struct blkcg_gq *blkg) -{ /* release the blkcg and parent blkg refs this blkg has been holding */ css_put(&blkg->blkcg->css); - if (blkg->parent) + if (blkg->parent) { + spin_lock_irq(blkg->q->queue_lock); blkg_put(blkg->parent); + spin_unlock_irq(blkg->q->queue_lock); + } - /* - * A group is freed in rcu manner. But having an rcu lock does not - * mean that one can access all the fields of blkg and assume these - * are valid. For example, don't try to follow throtl_data and - * request queue links. - * - * Having a reference to blkg under an rcu allows acess to only - * values local to groups like group stats and group rate limits - */ - call_rcu(&blkg->rcu_head, blkg_rcu_free); + blkg_free(blkg); } -EXPORT_SYMBOL_GPL(__blkg_release); +EXPORT_SYMBOL_GPL(__blkg_release_rcu); /* * The next function used by blk_queue_for_each_rl(). It's a bit tricky @@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = { .subsys_id = blkio_subsys_id, .base_cftypes = blkcg_files, .module = THIS_MODULE, - - /* - * blkio subsystem is utterly broken in terms of hierarchy support. - * It treats all cgroups equally regardless of where they're - * located in the hierarchy - all cgroups are treated as if they're - * right below the root. Fix it and remove the following. - */ - .broken_hierarchy = true, }; EXPORT_SYMBOL_GPL(blkio_subsys); |