diff options
Diffstat (limited to 'block/blk-core.c')
| -rw-r--r-- | block/blk-core.c | 213 |
1 files changed, 147 insertions, 66 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 8bdd0121212..6f8dba161bf 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -38,10 +38,12 @@ #include "blk.h" #include "blk-cgroup.h" +#include "blk-mq.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap); EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_split); EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug); DEFINE_IDA(blk_queue_ida); @@ -130,7 +132,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, bio_advance(bio, nbytes); /* don't actually finish bio if it's part of flush sequence */ - if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) + if (bio->bi_iter.bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ)) bio_endio(bio, error); } @@ -145,8 +147,8 @@ void blk_dump_rq_flags(struct request *rq, char *msg) printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", (unsigned long long)blk_rq_pos(rq), blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); - printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", - rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); + printk(KERN_INFO " bio %p, biotail %p, len %u\n", + rq->bio, rq->biotail, blk_rq_bytes(rq)); if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { printk(KERN_INFO " cdb: "); @@ -245,7 +247,18 @@ EXPORT_SYMBOL(blk_stop_queue); void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->timeout); - cancel_delayed_work_sync(&q->delay_work); + + if (q->mq_ops) { + struct blk_mq_hw_ctx *hctx; + int i; + + queue_for_each_hw_ctx(q, hctx, i) { + cancel_delayed_work_sync(&hctx->run_work); + cancel_delayed_work_sync(&hctx->delay_work); + } + } else { + cancel_delayed_work_sync(&q->delay_work); + } } EXPORT_SYMBOL(blk_sync_queue); @@ -497,8 +510,13 @@ void blk_cleanup_queue(struct request_queue *q) * Drain all requests queued before DYING marking. Set DEAD flag to * prevent that q->request_fn() gets invoked after draining finished. */ - spin_lock_irq(lock); - __blk_drain_queue(q, true); + if (q->mq_ops) { + blk_mq_drain_queue(q); + spin_lock_irq(lock); + } else { + spin_lock_irq(lock); + __blk_drain_queue(q, true); + } queue_flag_set(QUEUE_FLAG_DEAD, q); spin_unlock_irq(lock); @@ -559,12 +577,9 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) if (!q) return NULL; - if (percpu_counter_init(&q->mq_usage_counter, 0)) - goto fail_q; - q->id = ida_simple_get(&blk_queue_ida, 0, 0, gfp_mask); if (q->id < 0) - goto fail_c; + goto fail_q; q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; @@ -622,8 +637,6 @@ fail_bdi: bdi_destroy(&q->backing_dev_info); fail_id: ida_simple_remove(&blk_queue_ida, q->id); -fail_c: - percpu_counter_destroy(&q->mq_usage_counter); fail_q: kmem_cache_free(blk_requestq_cachep, q); return NULL; @@ -693,9 +706,13 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (!q) return NULL; - if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) + q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); + if (!q->flush_rq) return NULL; + if (blk_init_rl(&q->root_rl, q, GFP_KERNEL)) + goto fail; + q->request_fn = rfn; q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; @@ -718,12 +735,16 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) { mutex_unlock(&q->sysfs_lock); - return NULL; + goto fail; } mutex_unlock(&q->sysfs_lock); return q; + +fail: + kfree(q->flush_rq); + return NULL; } EXPORT_SYMBOL(blk_init_allocated_queue); @@ -823,6 +844,47 @@ static void freed_request(struct request_list *rl, unsigned int flags) __freed_request(rl, sync ^ 1); } +int blk_update_nr_requests(struct request_queue *q, unsigned int nr) +{ + struct request_list *rl; + + spin_lock_irq(q->queue_lock); + q->nr_requests = nr; + blk_queue_congestion_threshold(q); + + /* congestion isn't cgroup aware and follows root blkcg for now */ + rl = &q->root_rl; + + if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_SYNC); + else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_SYNC); + + if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_ASYNC); + else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_ASYNC); + + blk_queue_for_each_rl(rl, q) { + if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { + blk_set_rl_full(rl, BLK_RW_SYNC); + } else { + blk_clear_rl_full(rl, BLK_RW_SYNC); + wake_up(&rl->wait[BLK_RW_SYNC]); + } + + if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { + blk_set_rl_full(rl, BLK_RW_ASYNC); + } else { + blk_clear_rl_full(rl, BLK_RW_ASYNC); + wake_up(&rl->wait[BLK_RW_ASYNC]); + } + } + + spin_unlock_irq(q->queue_lock); + return 0; +} + /* * Determine if elevator data should be initialized when allocating the * request associated with @bio. @@ -1157,6 +1219,8 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, if (unlikely(!rq)) return ERR_PTR(-ENOMEM); + blk_rq_set_block_pc(rq); + for_each_bio(bio) { struct bio *bounce_bio = bio; int ret; @@ -1174,6 +1238,22 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio, EXPORT_SYMBOL(blk_make_request); /** + * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC + * @rq: request to be initialized + * + */ +void blk_rq_set_block_pc(struct request *rq) +{ + rq->cmd_type = REQ_TYPE_BLOCK_PC; + rq->__data_len = 0; + rq->__sector = (sector_t) -1; + rq->bio = rq->biotail = NULL; + memset(rq->__cmd, 0, sizeof(rq->__cmd)); + rq->cmd = rq->__cmd; +} +EXPORT_SYMBOL(blk_rq_set_block_pc); + +/** * blk_requeue_request - put a request back on queue * @q: request queue where request should be inserted * @rq: request to be inserted @@ -1208,12 +1288,15 @@ static void add_acct_request(struct request_queue *q, struct request *rq, static void part_round_stats_single(int cpu, struct hd_struct *part, unsigned long now) { + int inflight; + if (now == part->stamp) return; - if (part_in_flight(part)) { + inflight = part_in_flight(part); + if (inflight) { __part_stat_add(cpu, part, time_in_queue, - part_in_flight(part) * (now - part->stamp)); + inflight * (now - part->stamp)); __part_stat_add(cpu, part, io_ticks, (now - part->stamp)); } part->stamp = now; @@ -1263,6 +1346,11 @@ void __blk_put_request(struct request_queue *q, struct request *req) if (unlikely(!q)) return; + if (q->mq_ops) { + blk_mq_free_request(req); + return; + } + blk_pm_put_request(req); elv_completed_request(q, req); @@ -1279,7 +1367,7 @@ void __blk_put_request(struct request_queue *q, struct request *req) struct request_list *rl = blk_rq_rl(req); BUG_ON(!list_empty(&req->queuelist)); - BUG_ON(!hlist_unhashed(&req->hash)); + BUG_ON(ELV_ON_HASH(req)); blk_free_request(rl, req); freed_request(rl, flags); @@ -1326,13 +1414,12 @@ void blk_add_request_payload(struct request *rq, struct page *page, bio->bi_io_vec->bv_offset = 0; bio->bi_io_vec->bv_len = len; - bio->bi_size = len; + bio->bi_iter.bi_size = len; bio->bi_vcnt = 1; bio->bi_phys_segments = 1; rq->__data_len = rq->resid_len = len; rq->nr_phys_segments = 1; - rq->buffer = bio_data(bio); } EXPORT_SYMBOL_GPL(blk_add_request_payload); @@ -1351,7 +1438,7 @@ bool bio_attempt_back_merge(struct request_queue *q, struct request *req, req->biotail->bi_next = bio; req->biotail = bio; - req->__data_len += bio->bi_size; + req->__data_len += bio->bi_iter.bi_size; req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); blk_account_io_start(req, false); @@ -1374,14 +1461,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, bio->bi_next = req->bio; req->bio = bio; - /* - * may not be valid. if the low level driver said - * it didn't need a bounce buffer then it better - * not touch req->buffer either... - */ - req->buffer = bio_data(bio); - req->__sector = bio->bi_sector; - req->__data_len += bio->bi_size; + req->__sector = bio->bi_iter.bi_sector; + req->__data_len += bio->bi_iter.bi_size; req->ioprio = ioprio_best(req->ioprio, bio_prio(bio)); blk_account_io_start(req, false); @@ -1404,6 +1485,8 @@ bool bio_attempt_front_merge(struct request_queue *q, struct request *req, * added on the elevator at this point. In addition, we don't have * reliable access to the elevator outside queue lock. Only check basic * merging parameters without querying the elevator. + * + * Caller must ensure !blk_queue_nomerges(q) beforehand. */ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, unsigned int *request_count) @@ -1413,9 +1496,6 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio, bool ret = false; struct list_head *plug_list; - if (blk_queue_nomerges(q)) - goto out; - plug = current->plug; if (!plug) goto out; @@ -1459,7 +1539,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_FAILFAST_MASK; req->errors = 0; - req->__sector = bio->bi_sector; + req->__sector = bio->bi_iter.bi_sector; req->ioprio = bio_prio(bio); blk_rq_bio_prep(req->q, req, bio); } @@ -1494,7 +1574,8 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (blk_attempt_plug_merge(q, bio, &request_count)) + if (!blk_queue_nomerges(q) && + blk_attempt_plug_merge(q, bio, &request_count)) return; spin_lock_irq(q->queue_lock); @@ -1583,12 +1664,12 @@ static inline void blk_partition_remap(struct bio *bio) if (bio_sectors(bio) && bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; - bio->bi_sector += p->start_sect; + bio->bi_iter.bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; trace_block_bio_remap(bdev_get_queue(bio->bi_bdev), bio, bdev->bd_dev, - bio->bi_sector - p->start_sect); + bio->bi_iter.bi_sector - p->start_sect); } } @@ -1626,7 +1707,7 @@ static int __init fail_make_request_debugfs(void) struct dentry *dir = fault_create_debugfs_attr("fail_make_request", NULL, &fail_make_request); - return IS_ERR(dir) ? PTR_ERR(dir) : 0; + return PTR_ERR_OR_ZERO(dir); } late_initcall(fail_make_request_debugfs); @@ -1654,7 +1735,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors) /* Test device or partition size, when known. */ maxsector = i_size_read(bio->bi_bdev->bd_inode) >> 9; if (maxsector) { - sector_t sector = bio->bi_sector; + sector_t sector = bio->bi_iter.bi_sector; if (maxsector < nr_sectors || maxsector - nr_sectors < sector) { /* @@ -1690,7 +1771,7 @@ generic_make_request_checks(struct bio *bio) "generic_make_request: Trying to access " "nonexistent block-device %s (%Lu)\n", bdevname(bio->bi_bdev, b), - (long long) bio->bi_sector); + (long long) bio->bi_iter.bi_sector); goto end_io; } @@ -1704,9 +1785,9 @@ generic_make_request_checks(struct bio *bio) } part = bio->bi_bdev->bd_part; - if (should_fail_request(part, bio->bi_size) || + if (should_fail_request(part, bio->bi_iter.bi_size) || should_fail_request(&part_to_disk(part)->part0, - bio->bi_size)) + bio->bi_iter.bi_size)) goto end_io; /* @@ -1865,7 +1946,7 @@ void submit_bio(int rw, struct bio *bio) if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { - task_io_account_read(bio->bi_size); + task_io_account_read(bio->bi_iter.bi_size); count_vm_events(PGPGIN, count); } @@ -1874,7 +1955,7 @@ void submit_bio(int rw, struct bio *bio) printk(KERN_DEBUG "%s(%d): %s block %Lu on %s (%u sectors)\n", current->comm, task_pid_nr(current), (rw & WRITE) ? "WRITE" : "READ", - (unsigned long long)bio->bi_sector, + (unsigned long long)bio->bi_iter.bi_sector, bdevname(bio->bi_bdev, b), count); } @@ -1900,7 +1981,7 @@ EXPORT_SYMBOL(submit_bio); * in some cases below, so export this function. * Request stacking drivers like request-based dm may change the queue * limits while requests are in the queue (e.g. dm's table swapping). - * Such request stacking drivers should check those requests agaist + * Such request stacking drivers should check those requests against * the new queue limits again when they dispatch those requests, * although such checkings are also done against the old queue limits * when submitting requests. @@ -2007,7 +2088,7 @@ unsigned int blk_rq_err_bytes(const struct request *rq) for (bio = rq->bio; bio; bio = bio->bi_next) { if ((bio->bi_rw & ff) != ff) break; - bytes += bio->bi_size; + bytes += bio->bi_iter.bi_size; } /* this could lead to infinite loop */ @@ -2325,7 +2406,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) if (!req->bio) return false; - trace_block_rq_complete(req->q, req); + trace_block_rq_complete(req->q, req, nr_bytes); /* * For fs requests, rq is just carrier of independent bio's @@ -2378,9 +2459,9 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) total_bytes = 0; while (req->bio) { struct bio *bio = req->bio; - unsigned bio_bytes = min(bio->bi_size, nr_bytes); + unsigned bio_bytes = min(bio->bi_iter.bi_size, nr_bytes); - if (bio_bytes == bio->bi_size) + if (bio_bytes == bio->bi_iter.bi_size) req->bio = bio->bi_next; req_bio_endio(req, bio, bio_bytes, error); @@ -2406,7 +2487,6 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) } req->__data_len -= total_bytes; - req->buffer = bio_data(req->bio); /* update sector only for requests with clear definition of sector */ if (req->cmd_type == REQ_TYPE_FS) @@ -2475,7 +2555,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request); /* * queue lock must be held */ -static void blk_finish_request(struct request *req, int error) +void blk_finish_request(struct request *req, int error) { if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); @@ -2501,6 +2581,7 @@ static void blk_finish_request(struct request *req, int error) __blk_put_request(req->q, req); } } +EXPORT_SYMBOL(blk_finish_request); /** * blk_end_bidi_request - Complete a bidi request @@ -2724,11 +2805,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ rq->cmd_flags |= bio->bi_rw & REQ_WRITE; - if (bio_has_data(bio)) { + if (bio_has_data(bio)) rq->nr_phys_segments = bio_phys_segments(q, bio); - rq->buffer = bio_data(bio); - } - rq->__data_len = bio->bi_size; + + rq->__data_len = bio->bi_iter.bi_size; rq->bio = rq->biotail = bio; if (bio->bi_bdev) @@ -2746,10 +2826,10 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, void rq_flush_dcache_pages(struct request *rq) { struct req_iterator iter; - struct bio_vec *bvec; + struct bio_vec bvec; rq_for_each_segment(bvec, rq, iter) - flush_dcache_page(bvec->bv_page); + flush_dcache_page(bvec.bv_page); } EXPORT_SYMBOL_GPL(rq_flush_dcache_pages); #endif @@ -2803,7 +2883,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); /* * Copy attributes of the original request to the clone request. - * The actual data parts (e.g. ->cmd, ->buffer, ->sense) are not copied. + * The actual data parts (e.g. ->cmd, ->sense) are not copied. */ static void __blk_rq_prep_clone(struct request *dst, struct request *src) { @@ -2829,7 +2909,7 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src) * * Description: * Clones bios in @rq_src to @rq, and copies attributes of @rq_src to @rq. - * The actual data parts of @rq_src (e.g. ->cmd, ->buffer, ->sense) + * The actual data parts of @rq_src (e.g. ->cmd, ->sense) * are not copied, and copying such parts is the caller's responsibility. * Also, pages which the original bios are pointing to are not copied * and the cloned bios just point same pages. @@ -2876,20 +2956,25 @@ free_and_out: } EXPORT_SYMBOL_GPL(blk_rq_prep_clone); -int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) +int kblockd_schedule_work(struct work_struct *work) { return queue_work(kblockd_workqueue, work); } EXPORT_SYMBOL(kblockd_schedule_work); -int kblockd_schedule_delayed_work(struct request_queue *q, - struct delayed_work *dwork, unsigned long delay) +int kblockd_schedule_delayed_work(struct delayed_work *dwork, + unsigned long delay) { return queue_delayed_work(kblockd_workqueue, dwork, delay); } EXPORT_SYMBOL(kblockd_schedule_delayed_work); -#define PLUG_MAGIC 0x91827364 +int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work_on(cpu, kblockd_workqueue, dwork, delay); +} +EXPORT_SYMBOL(kblockd_schedule_delayed_work_on); /** * blk_start_plug - initialize blk_plug and track it inside the task_struct @@ -2909,7 +2994,6 @@ void blk_start_plug(struct blk_plug *plug) { struct task_struct *tsk = current; - plug->magic = PLUG_MAGIC; INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->mq_list); INIT_LIST_HEAD(&plug->cb_list); @@ -3006,8 +3090,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) LIST_HEAD(list); unsigned int depth; - BUG_ON(plug->magic != PLUG_MAGIC); - flush_plug_callbacks(plug, from_schedule); if (!list_empty(&plug->mq_list)) @@ -3230,8 +3312,7 @@ int __init blk_dev_init(void) /* used for unplugging and affects IO latency/throughput - HIGHPRI */ kblockd_workqueue = alloc_workqueue("kblockd", - WQ_MEM_RECLAIM | WQ_HIGHPRI | - WQ_POWER_EFFICIENT, 0); + WQ_MEM_RECLAIM | WQ_HIGHPRI, 0); if (!kblockd_workqueue) panic("Failed to create kblockd\n"); |
