diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 18 | ||||
-rw-r--r-- | block/blk-flush.c | 8 | ||||
-rw-r--r-- | block/blk-lib.c | 21 | ||||
-rw-r--r-- | block/blk-throttle.c | 39 | ||||
-rw-r--r-- | block/cfq-iosched.c | 16 | ||||
-rw-r--r-- | block/elevator.c | 4 | ||||
-rw-r--r-- | block/genhd.c | 2 | ||||
-rw-r--r-- | block/ioctl.c | 8 |
8 files changed, 64 insertions, 52 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index b3cf121f1de..a63336d49f3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -352,7 +352,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - __blk_run_queue(q); + __blk_run_queue(q, false); } EXPORT_SYMBOL(blk_start_queue); @@ -403,13 +403,14 @@ EXPORT_SYMBOL(blk_sync_queue); /** * __blk_run_queue - run a single device queue * @q: The queue to run + * @force_kblockd: Don't run @q->request_fn directly. Use kblockd. * * Description: * See @blk_run_queue. This variant must be called with the queue lock * held and interrupts disabled. * */ -void __blk_run_queue(struct request_queue *q) +void __blk_run_queue(struct request_queue *q, bool force_kblockd) { blk_remove_plug(q); @@ -423,7 +424,7 @@ void __blk_run_queue(struct request_queue *q) * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + if (!force_kblockd && !queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { q->request_fn(q); queue_flag_clear(QUEUE_FLAG_REENTER, q); } else { @@ -446,7 +447,7 @@ void blk_run_queue(struct request_queue *q) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __blk_run_queue(q); + __blk_run_queue(q, false); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -1053,7 +1054,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, drive_stat_acct(rq, 1); __elv_add_request(q, rq, where, 0); - __blk_run_queue(q); + __blk_run_queue(q, false); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); @@ -2627,13 +2628,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); -int kblockd_schedule_delayed_work(struct request_queue *q, - struct delayed_work *dwork, unsigned long delay) -{ - return queue_delayed_work(kblockd_workqueue, dwork, delay); -} -EXPORT_SYMBOL(kblockd_schedule_delayed_work); - int __init blk_dev_init(void) { BUILD_BUG_ON(__REQ_NR_BITS > 8 * diff --git a/block/blk-flush.c b/block/blk-flush.c index 54b123d6563..b27d0208611 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -66,10 +66,12 @@ static void blk_flush_complete_seq_end_io(struct request_queue *q, /* * Moving a request silently to empty queue_head may stall the - * queue. Kick the queue in those cases. + * queue. Kick the queue in those cases. This function is called + * from request completion path and calling directly into + * request_fn may confuse the driver. Always use kblockd. */ if (was_empty && next_rq) - __blk_run_queue(q); + __blk_run_queue(q, true); } static void pre_flush_end_io(struct request *rq, int error) @@ -130,7 +132,7 @@ static struct request *queue_next_fseq(struct request_queue *q) BUG(); } - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); return rq; } diff --git a/block/blk-lib.c b/block/blk-lib.c index 1a320d2406b..bd3e8df4d5e 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -109,7 +109,6 @@ struct bio_batch atomic_t done; unsigned long flags; struct completion *wait; - bio_end_io_t *end_io; }; static void bio_batch_end_io(struct bio *bio, int err) @@ -122,17 +121,14 @@ static void bio_batch_end_io(struct bio *bio, int err) else clear_bit(BIO_UPTODATE, &bb->flags); } - if (bb) { - if (bb->end_io) - bb->end_io(bio, err); - atomic_inc(&bb->done); - complete(bb->wait); - } + if (bb) + if (atomic_dec_and_test(&bb->done)) + complete(bb->wait); bio_put(bio); } /** - * blkdev_issue_zeroout generate number of zero filed write bios + * blkdev_issue_zeroout - generate number of zero filed write bios * @bdev: blockdev to issue * @sector: start sector * @nr_sects: number of sectors to write @@ -150,13 +146,12 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, int ret; struct bio *bio; struct bio_batch bb; - unsigned int sz, issued = 0; + unsigned int sz; DECLARE_COMPLETION_ONSTACK(wait); - atomic_set(&bb.done, 0); + atomic_set(&bb.done, 1); bb.flags = 1 << BIO_UPTODATE; bb.wait = &wait; - bb.end_io = NULL; submit: ret = 0; @@ -185,12 +180,12 @@ submit: break; } ret = 0; - issued++; + atomic_inc(&bb.done); submit_bio(WRITE, bio); } /* Wait for bios in-flight */ - while (issued != atomic_read(&bb.done)) + if (!atomic_dec_and_test(&bb.done)) wait_for_completion(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 381b09bb562..e36cc10a346 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -20,6 +20,11 @@ static int throtl_quantum = 32; /* Throttling is performed over 100ms slice and after that slice is renewed */ static unsigned long throtl_slice = HZ/10; /* 100 ms */ +/* A workqueue to queue throttle related work */ +static struct workqueue_struct *kthrotld_workqueue; +static void throtl_schedule_delayed_work(struct throtl_data *td, + unsigned long delay); + struct throtl_rb_root { struct rb_root rb; struct rb_node *left; @@ -168,7 +173,15 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, * tree of blkg (instead of traversing through hash list all * the time. */ - tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key)); + + /* + * This is the common case when there are no blkio cgroups. + * Avoid lookup in this case + */ + if (blkcg == &blkio_root_cgroup) + tg = &td->root_tg; + else + tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key)); /* Fill in device details for root group */ if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { @@ -337,10 +350,9 @@ static void throtl_schedule_next_dispatch(struct throtl_data *td) update_min_dispatch_time(st); if (time_before_eq(st->min_disptime, jiffies)) - throtl_schedule_delayed_work(td->queue, 0); + throtl_schedule_delayed_work(td, 0); else - throtl_schedule_delayed_work(td->queue, - (st->min_disptime - jiffies)); + throtl_schedule_delayed_work(td, (st->min_disptime - jiffies)); } static inline void @@ -807,10 +819,10 @@ void blk_throtl_work(struct work_struct *work) } /* Call with queue lock held */ -void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) +static void +throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) { - struct throtl_data *td = q->td; struct delayed_work *dwork = &td->throtl_work; if (total_nr_queued(td) > 0) { @@ -819,12 +831,11 @@ void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) * Cancel that and schedule a new one. */ __cancel_delayed_work(dwork); - kblockd_schedule_delayed_work(q, dwork, delay); + queue_delayed_work(kthrotld_workqueue, dwork, delay); throtl_log(td, "schedule work. delay=%lu jiffies=%lu", delay, jiffies); } } -EXPORT_SYMBOL(throtl_schedule_delayed_work); static void throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) @@ -912,7 +923,7 @@ static void throtl_update_blkio_group_read_bps(void *key, smp_mb__after_atomic_inc(); /* Schedule a work now to process the limit change */ - throtl_schedule_delayed_work(td->queue, 0); + throtl_schedule_delayed_work(td, 0); } static void throtl_update_blkio_group_write_bps(void *key, @@ -926,7 +937,7 @@ static void throtl_update_blkio_group_write_bps(void *key, smp_mb__before_atomic_inc(); atomic_inc(&td->limits_changed); smp_mb__after_atomic_inc(); - throtl_schedule_delayed_work(td->queue, 0); + throtl_schedule_delayed_work(td, 0); } static void throtl_update_blkio_group_read_iops(void *key, @@ -940,7 +951,7 @@ static void throtl_update_blkio_group_read_iops(void *key, smp_mb__before_atomic_inc(); atomic_inc(&td->limits_changed); smp_mb__after_atomic_inc(); - throtl_schedule_delayed_work(td->queue, 0); + throtl_schedule_delayed_work(td, 0); } static void throtl_update_blkio_group_write_iops(void *key, @@ -954,7 +965,7 @@ static void throtl_update_blkio_group_write_iops(void *key, smp_mb__before_atomic_inc(); atomic_inc(&td->limits_changed); smp_mb__after_atomic_inc(); - throtl_schedule_delayed_work(td->queue, 0); + throtl_schedule_delayed_work(td, 0); } void throtl_shutdown_timer_wq(struct request_queue *q) @@ -1127,6 +1138,10 @@ void blk_throtl_exit(struct request_queue *q) static int __init throtl_init(void) { + kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0); + if (!kthrotld_workqueue) + panic("Failed to create kthrotld\n"); + blkio_policy_register(&blkio_policy_throtl); return 0; } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 501ffdf0399..ea83a4f0c27 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -599,7 +599,7 @@ cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg) } static inline unsigned -cfq_scaled_group_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) +cfq_scaled_cfqq_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { unsigned slice = cfq_prio_to_slice(cfqd, cfqq); if (cfqd->cfq_latency) { @@ -631,7 +631,7 @@ cfq_scaled_group_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) static inline void cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - unsigned slice = cfq_scaled_group_slice(cfqd, cfqq); + unsigned slice = cfq_scaled_cfqq_slice(cfqd, cfqq); cfqq->slice_start = jiffies; cfqq->slice_end = jiffies + slice; @@ -1671,7 +1671,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, */ if (timed_out) { if (cfq_cfqq_slice_new(cfqq)) - cfqq->slice_resid = cfq_scaled_group_slice(cfqd, cfqq); + cfqq->slice_resid = cfq_scaled_cfqq_slice(cfqd, cfqq); else cfqq->slice_resid = cfqq->slice_end - jiffies; cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid); @@ -3355,7 +3355,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqd->busy_queues > 1) { cfq_del_timer(cfqd, cfqq); cfq_clear_cfqq_wait_request(cfqq); - __blk_run_queue(cfqd->queue); + __blk_run_queue(cfqd->queue, false); } else { cfq_blkiocg_update_idle_time_stats( &cfqq->cfqg->blkg); @@ -3370,7 +3370,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - __blk_run_queue(cfqd->queue); + __blk_run_queue(cfqd->queue, false); } } @@ -3432,6 +3432,10 @@ static bool cfq_should_wait_busy(struct cfq_data *cfqd, struct cfq_queue *cfqq) { struct cfq_io_context *cic = cfqd->active_cic; + /* If the queue already has requests, don't wait */ + if (!RB_EMPTY_ROOT(&cfqq->sort_list)) + return false; + /* If there are other queues in the group, don't wait */ if (cfqq->cfqg->nr_cfqq > 1) return false; @@ -3727,7 +3731,7 @@ static void cfq_kick_queue(struct work_struct *work) struct request_queue *q = cfqd->queue; spin_lock_irq(q->queue_lock); - __blk_run_queue(cfqd->queue); + __blk_run_queue(cfqd->queue, false); spin_unlock_irq(q->queue_lock); } diff --git a/block/elevator.c b/block/elevator.c index 2569512830d..236e93c1f46 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -602,7 +602,7 @@ void elv_quiesce_start(struct request_queue *q) */ elv_drain_elevator(q); while (q->rq.elvpriv) { - __blk_run_queue(q); + __blk_run_queue(q, false); spin_unlock_irq(q->queue_lock); msleep(10); spin_lock_irq(q->queue_lock); @@ -651,7 +651,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) * with anything. There's no point in delaying queue * processing. */ - __blk_run_queue(q); + __blk_run_queue(q, false); break; case ELEVATOR_INSERT_SORT: diff --git a/block/genhd.c b/block/genhd.c index 6a5b772aa20..cbf1112a885 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1355,7 +1355,7 @@ int invalidate_partition(struct gendisk *disk, int partno) struct block_device *bdev = bdget_disk(disk, partno); if (bdev) { fsync_bdev(bdev); - res = __invalidate_device(bdev); + res = __invalidate_device(bdev, true); bdput(bdev); } return res; diff --git a/block/ioctl.c b/block/ioctl.c index 9049d460fa8..1124cd29726 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -294,9 +294,11 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return -EINVAL; if (get_user(n, (int __user *) arg)) return -EFAULT; - if (!(mode & FMODE_EXCL) && - blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) - return -EBUSY; + if (!(mode & FMODE_EXCL)) { + bdgrab(bdev); + if (blkdev_get(bdev, mode | FMODE_EXCL, &bdev) < 0) + return -EBUSY; + } ret = set_blocksize(bdev, n); if (!(mode & FMODE_EXCL)) blkdev_put(bdev, mode | FMODE_EXCL); |