From f469a7b4d5b1d1d053200a9015fd25d59c057f49 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Thu, 19 May 2011 15:38:23 -0400 Subject: blk-cgroup: Allow sleeping while dynamically allocating a group Currently, all the cfq_group or throtl_group allocations happen while we are holding ->queue_lock and sleeping is not allowed. Soon, we will move to per cpu stats and also need to allocate the per group stats. As one can not call alloc_percpu() from atomic context as it can sleep, we need to drop ->queue_lock, allocate the group, retake the lock and continue processing. In throttling code, I check the queue DEAD flag again to make sure that driver did not call blk_cleanup_queue() in the mean time. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 128 ++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 95 insertions(+), 33 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index e2e6719832e..606020fe93f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1016,28 +1016,47 @@ void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, cfqg->needs_update = true; } -static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, - struct blkio_cgroup *blkcg) +static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, + struct cfq_group *cfqg, struct blkio_cgroup *blkcg) { - struct cfq_group *cfqg = NULL; - void *key = cfqd; - int i, j; - struct cfq_rb_root *st; struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; unsigned int major, minor; - cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); - if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { + /* + * Add group onto cgroup list. It might happen that bdi->dev is + * not initialized yet. Initialize this new group without major + * and minor info and this info will be filled in once a new thread + * comes for IO. + */ + if (bdi->dev) { sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); - cfqg->blkg.dev = MKDEV(major, minor); - goto done; - } - if (cfqg) - goto done; + cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, + (void *)cfqd, MKDEV(major, minor)); + } else + cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, + (void *)cfqd, 0); + + cfqd->nr_blkcg_linked_grps++; + cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); + + /* Add group on cfqd list */ + hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); +} + +/* + * Should be called from sleepable context. No request queue lock as per + * cpu stats are allocated dynamically and alloc_percpu needs to be called + * from sleepable context. + */ +static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) +{ + struct cfq_group *cfqg = NULL; + int i, j; + struct cfq_rb_root *st; cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); if (!cfqg) - goto done; + return NULL; for_each_cfqg_st(cfqg, i, j, st) *st = CFQ_RB_ROOT; @@ -1050,28 +1069,31 @@ static struct cfq_group * cfq_find_alloc_cfqg(struct cfq_data *cfqd, * or cgroup deletion path depending on who is exiting first. */ cfqg->ref = 1; + return cfqg; +} + +static struct cfq_group * +cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg) +{ + struct cfq_group *cfqg = NULL; + void *key = cfqd; + struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; + unsigned int major, minor; /* - * Add group onto cgroup list. It might happen that bdi->dev is - * not initialized yet. Initialize this new group without major - * and minor info and this info will be filled in once a new thread - * comes for IO. See code above. + * This is the common case when there are no blkio cgroups. + * Avoid lookup in this case */ - if (bdi->dev) { - sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, - MKDEV(major, minor)); - } else - cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd, - 0); - - cfqd->nr_blkcg_linked_grps++; - cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); + if (blkcg == &blkio_root_cgroup) + cfqg = &cfqd->root_group; + else + cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key)); - /* Add group on cfqd list */ - hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); + if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { + sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); + cfqg->blkg.dev = MKDEV(major, minor); + } -done: return cfqg; } @@ -1082,13 +1104,53 @@ done: static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) { struct blkio_cgroup *blkcg; - struct cfq_group *cfqg = NULL; + struct cfq_group *cfqg = NULL, *__cfqg = NULL; + struct request_queue *q = cfqd->queue; + + rcu_read_lock(); + blkcg = task_blkio_cgroup(current); + cfqg = cfq_find_cfqg(cfqd, blkcg); + if (cfqg) { + rcu_read_unlock(); + return cfqg; + } + + /* + * Need to allocate a group. Allocation of group also needs allocation + * of per cpu stats which in-turn takes a mutex() and can block. Hence + * we need to drop rcu lock and queue_lock before we call alloc. + * + * Not taking any queue reference here and assuming that queue is + * around by the time we return. CFQ queue allocation code does + * the same. It might be racy though. + */ + + rcu_read_unlock(); + spin_unlock_irq(q->queue_lock); + + cfqg = cfq_alloc_cfqg(cfqd); + + spin_lock_irq(q->queue_lock); rcu_read_lock(); blkcg = task_blkio_cgroup(current); - cfqg = cfq_find_alloc_cfqg(cfqd, blkcg); + + /* + * If some other thread already allocated the group while we were + * not holding queue lock, free up the group + */ + __cfqg = cfq_find_cfqg(cfqd, blkcg); + + if (__cfqg) { + kfree(cfqg); + rcu_read_unlock(); + return __cfqg; + } + if (!cfqg) cfqg = &cfqd->root_group; + + cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg); rcu_read_unlock(); return cfqg; } -- cgit v1.2.3-18-g5258