diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-11 10:52:27 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-11 11:10:35 -0700 |
commit | c9059598ea8981d02356eead3188bf7fa4d717b8 (patch) | |
tree | 03e73b20a30e988da7c6a3e0ad93b2dc5843274d /block | |
parent | 0a33f80a8373eca7f4bea3961d1346c3815fa5ed (diff) | |
parent | b0fd271d5fba0b2d00888363f3869e3f9b26caa9 (diff) |
Merge branch 'for-2.6.31' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.31' of git://git.kernel.dk/linux-2.6-block: (153 commits)
block: add request clone interface (v2)
floppy: fix hibernation
ramdisk: remove long-deprecated "ramdisk=" boot-time parameter
fs/bio.c: add missing __user annotation
block: prevent possible io_context->refcount overflow
Add serial number support for virtio_blk, V4a
block: Add missing bounce_pfn stacking and fix comments
Revert "block: Fix bounce limit setting in DM"
cciss: decode unit attention in SCSI error handling code
cciss: Remove no longer needed sendcmd reject processing code
cciss: change SCSI error handling routines to work with interrupts enabled.
cciss: separate error processing and command retrying code in sendcmd_withirq_core()
cciss: factor out fix target status processing code from sendcmd functions
cciss: simplify interface of sendcmd() and sendcmd_withirq()
cciss: factor out core of sendcmd_withirq() for use by SCSI error handling code
cciss: Use schedule_timeout_uninterruptible in SCSI error handling code
block: needs to set the residual length of a bidi request
Revert "block: implement blkdev_readpages"
block: Fix bounce limit setting in DM
Removed reference to non-existing file Documentation/PCI/PCI-DMA-mapping.txt
...
Manually fix conflicts with tracing updates in:
block/blk-sysfs.c
drivers/ide/ide-atapi.c
drivers/ide/ide-cd.c
drivers/ide/ide-floppy.c
drivers/ide/ide-tape.c
include/trace/events/block.h
kernel/trace/blktrace.c
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 11 | ||||
-rw-r--r-- | block/as-iosched.c | 24 | ||||
-rw-r--r-- | block/blk-barrier.c | 27 | ||||
-rw-r--r-- | block/blk-core.c | 848 | ||||
-rw-r--r-- | block/blk-exec.c | 1 | ||||
-rw-r--r-- | block/blk-integrity.c | 2 | ||||
-rw-r--r-- | block/blk-ioc.c | 12 | ||||
-rw-r--r-- | block/blk-map.c | 25 | ||||
-rw-r--r-- | block/blk-merge.c | 71 | ||||
-rw-r--r-- | block/blk-settings.c | 269 | ||||
-rw-r--r-- | block/blk-sysfs.c | 62 | ||||
-rw-r--r-- | block/blk-tag.c | 17 | ||||
-rw-r--r-- | block/blk-timeout.c | 22 | ||||
-rw-r--r-- | block/blk.h | 51 | ||||
-rw-r--r-- | block/bsg.c | 8 | ||||
-rw-r--r-- | block/cfq-iosched.c | 38 | ||||
-rw-r--r-- | block/compat_ioctl.c | 4 | ||||
-rw-r--r-- | block/deadline-iosched.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 185 | ||||
-rw-r--r-- | block/genhd.c | 11 | ||||
-rw-r--r-- | block/ioctl.c | 12 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 13 |
22 files changed, 1061 insertions, 654 deletions
diff --git a/block/Kconfig b/block/Kconfig index e7d12782bcf..2c39527aa7d 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -26,6 +26,7 @@ if BLOCK config LBD bool "Support for large block devices and files" depends on !64BIT + default y help Enable block devices or files of size 2TB and larger. @@ -38,11 +39,13 @@ config LBD The ext4 filesystem requires that this feature be enabled in order to support filesystems that have the huge_file feature - enabled. Otherwise, it will refuse to mount any filesystems - that use the huge_file feature, which is enabled by default - by mke2fs.ext4. The GFS2 filesystem also requires this feature. + enabled. Otherwise, it will refuse to mount in the read-write + mode any filesystems that use the huge_file feature, which is + enabled by default by mke2fs.ext4. - If unsure, say N. + The GFS2 filesystem also requires this feature. + + If unsure, say Y. config BLK_DEV_BSG bool "Block layer SG support v4 (EXPERIMENTAL)" diff --git a/block/as-iosched.c b/block/as-iosched.c index c48fa670d22..7a12cf6ee1d 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -306,8 +306,8 @@ as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2) data_dir = rq_is_sync(rq1); last = ad->last_sector[data_dir]; - s1 = rq1->sector; - s2 = rq2->sector; + s1 = blk_rq_pos(rq1); + s2 = blk_rq_pos(rq2); BUG_ON(data_dir != rq_is_sync(rq2)); @@ -566,13 +566,15 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, as_update_thinktime(ad, aic, thinktime); /* Calculate read -> read seek distance */ - if (aic->last_request_pos < rq->sector) - seek_dist = rq->sector - aic->last_request_pos; + if (aic->last_request_pos < blk_rq_pos(rq)) + seek_dist = blk_rq_pos(rq) - + aic->last_request_pos; else - seek_dist = aic->last_request_pos - rq->sector; + seek_dist = aic->last_request_pos - + blk_rq_pos(rq); as_update_seekdist(ad, aic, seek_dist); } - aic->last_request_pos = rq->sector + rq->nr_sectors; + aic->last_request_pos = blk_rq_pos(rq) + blk_rq_sectors(rq); set_bit(AS_TASK_IOSTARTED, &aic->state); spin_unlock(&aic->lock); } @@ -587,7 +589,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic, { unsigned long delay; /* jiffies */ sector_t last = ad->last_sector[ad->batch_data_dir]; - sector_t next = rq->sector; + sector_t next = blk_rq_pos(rq); sector_t delta; /* acceptable close offset (in sectors) */ sector_t s; @@ -981,7 +983,7 @@ static void as_move_to_dispatch(struct as_data *ad, struct request *rq) * This has to be set in order to be correctly updated by * as_find_next_rq */ - ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; + ad->last_sector[data_dir] = blk_rq_pos(rq) + blk_rq_sectors(rq); if (data_dir == BLK_RW_SYNC) { struct io_context *ioc = RQ_IOC(rq); @@ -1312,12 +1314,8 @@ static void as_merged_requests(struct request_queue *q, struct request *req, static void as_work_handler(struct work_struct *work) { struct as_data *ad = container_of(work, struct as_data, antic_work); - struct request_queue *q = ad->q; - unsigned long flags; - spin_lock_irqsave(q->queue_lock, flags); - blk_start_queueing(q); - spin_unlock_irqrestore(q->queue_lock, flags); + blk_run_queue(ad->q); } static int as_may_queue(struct request_queue *q, int rw) diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 20b4111fa05..30022b4e2f6 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -106,10 +106,7 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) */ q->ordseq = 0; rq = q->orig_bar_rq; - - if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) - BUG(); - + __blk_end_request_all(rq, q->orderr); return true; } @@ -166,7 +163,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ - if (!rq->hard_nr_sectors) { + if (!blk_rq_sectors(rq)) { q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); /* @@ -183,7 +180,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } /* stash away the original request */ - elv_dequeue_request(q, rq); + blk_dequeue_request(rq); q->orig_bar_rq = rq; rq = NULL; @@ -221,7 +218,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } else skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight) + if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) rq = NULL; else skip |= QUEUE_ORDSEQ_DRAIN; @@ -251,10 +248,8 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) * Queue ordering not supported. Terminate * with prejudice. */ - elv_dequeue_request(q, rq); - if (__blk_end_request(rq, -EOPNOTSUPP, - blk_rq_bytes(rq))) - BUG(); + blk_dequeue_request(rq); + __blk_end_request_all(rq, -EOPNOTSUPP); *rqp = NULL; return false; } @@ -329,7 +324,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector) /* * The driver must store the error location in ->bi_sector, if * it supports it. For non-stacked drivers, this should be copied - * from rq->sector. + * from blk_rq_pos(rq). */ if (error_sector) *error_sector = bio->bi_sector; @@ -393,10 +388,10 @@ int blkdev_issue_discard(struct block_device *bdev, bio->bi_sector = sector; - if (nr_sects > q->max_hw_sectors) { - bio->bi_size = q->max_hw_sectors << 9; - nr_sects -= q->max_hw_sectors; - sector += q->max_hw_sectors; + if (nr_sects > queue_max_hw_sectors(q)) { + bio->bi_size = queue_max_hw_sectors(q) << 9; + nr_sects -= queue_max_hw_sectors(q); + sector += queue_max_hw_sectors(q); } else { bio->bi_size = nr_sects << 9; nr_sects = 0; diff --git a/block/blk-core.c b/block/blk-core.c index 648f15cb41f..d17d71c71d4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -60,11 +60,11 @@ static void drive_stat_acct(struct request *rq, int new_io) int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !blk_do_io_stat(rq)) + if (!blk_do_io_stat(rq)) return; cpu = part_stat_lock(); - part = disk_map_sector_rcu(rq->rq_disk, rq->sector); + part = disk_map_sector_rcu(rq->rq_disk, blk_rq_pos(rq)); if (!new_io) part_stat_inc(cpu, part, merges[rw]); @@ -119,13 +119,14 @@ void blk_rq_init(struct request_queue *q, struct request *rq) INIT_LIST_HEAD(&rq->timeout_list); rq->cpu = -1; rq->q = q; - rq->sector = rq->hard_sector = (sector_t) -1; + rq->__sector = (sector_t) -1; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); rq->cmd = rq->__cmd; rq->cmd_len = BLK_MAX_CDB; rq->tag = -1; rq->ref_count = 1; + rq->start_time = jiffies; } EXPORT_SYMBOL(blk_rq_init); @@ -176,14 +177,11 @@ void blk_dump_rq_flags(struct request *rq, char *msg) rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->cmd_type, rq->cmd_flags); - printk(KERN_INFO " sector %llu, nr/cnr %lu/%u\n", - (unsigned long long)rq->sector, - rq->nr_sectors, - rq->current_nr_sectors); - printk(KERN_INFO " bio %p, biotail %p, buffer %p, data %p, len %u\n", - rq->bio, rq->biotail, - rq->buffer, rq->data, - rq->data_len); + printk(KERN_INFO " sector %llu, nr/cnr %u/%u\n", + (unsigned long long)blk_rq_pos(rq), + blk_rq_sectors(rq), blk_rq_cur_sectors(rq)); + printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", + rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); if (blk_pc_request(rq)) { printk(KERN_INFO " cdb: "); @@ -325,24 +323,6 @@ void blk_unplug(struct request_queue *q) } EXPORT_SYMBOL(blk_unplug); -static void blk_invoke_request_fn(struct request_queue *q) -{ - if (unlikely(blk_queue_stopped(q))) - return; - - /* - * one level of recursion is ok and is much faster than kicking - * the unplug handling - */ - if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { - q->request_fn(q); - queue_flag_clear(QUEUE_FLAG_REENTER, q); - } else { - queue_flag_set(QUEUE_FLAG_PLUGGED, q); - kblockd_schedule_work(q, &q->unplug_work); - } -} - /** * blk_start_queue - restart a previously stopped queue * @q: The &struct request_queue in question @@ -357,7 +337,7 @@ void blk_start_queue(struct request_queue *q) WARN_ON(!irqs_disabled()); queue_flag_clear(QUEUE_FLAG_STOPPED, q); - blk_invoke_request_fn(q); + __blk_run_queue(q); } EXPORT_SYMBOL(blk_start_queue); @@ -417,12 +397,23 @@ void __blk_run_queue(struct request_queue *q) { blk_remove_plug(q); + if (unlikely(blk_queue_stopped(q))) + return; + + if (elv_queue_empty(q)) + return; + /* * Only recurse once to avoid overrunning the stack, let the unplug * handling reinvoke the handler shortly if we already got there. */ - if (!elv_queue_empty(q)) - blk_invoke_request_fn(q); + if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) { + q->request_fn(q); + queue_flag_clear(QUEUE_FLAG_REENTER, q); + } else { + queue_flag_set(QUEUE_FLAG_PLUGGED, q); + kblockd_schedule_work(q, &q->unplug_work); + } } EXPORT_SYMBOL(__blk_run_queue); @@ -432,9 +423,7 @@ EXPORT_SYMBOL(__blk_run_queue); * * Description: * Invoke request handling on this queue, if it has pending work to do. - * May be used to restart queueing when a request has completed. Also - * See @blk_start_queueing. - * + * May be used to restart queueing when a request has completed. */ void blk_run_queue(struct request_queue *q) { @@ -894,26 +883,58 @@ struct request *blk_get_request(struct request_queue *q, int rw, gfp_t gfp_mask) EXPORT_SYMBOL(blk_get_request); /** - * blk_start_queueing - initiate dispatch of requests to device - * @q: request queue to kick into gear + * blk_make_request - given a bio, allocate a corresponding struct request. + * + * @bio: The bio describing the memory mappings that will be submitted for IO. + * It may be a chained-bio properly constructed by block/bio layer. * - * This is basically a helper to remove the need to know whether a queue - * is plugged or not if someone just wants to initiate dispatch of requests - * for this queue. Should be used to start queueing on a device outside - * of ->request_fn() context. Also see @blk_run_queue. + * blk_make_request is the parallel of generic_make_request for BLOCK_PC + * type commands. Where the struct request needs to be farther initialized by + * the caller. It is passed a &struct bio, which describes the memory info of + * the I/O transfer. * - * The queue lock must be held with interrupts disabled. + * The caller of blk_make_request must make sure that bi_io_vec + * are set to describe the memory buffers. That bio_data_dir() will return + * the needed direction of the request. (And all bio's in the passed bio-chain + * are properly set accordingly) + * + * If called under none-sleepable conditions, mapped bio buffers must not + * need bouncing, by calling the appropriate masked or flagged allocator, + * suitable for the target device. Otherwise the call to blk_queue_bounce will + * BUG. + * + * WARNING: When allocating/cloning a bio-chain, careful consideration should be + * given to how you allocate bios. In particular, you cannot use __GFP_WAIT for + * anything but the first bio in the chain. Otherwise you risk waiting for IO + * completion of a bio that hasn't been submitted yet, thus resulting in a + * deadlock. Alternatively bios should be allocated using bio_kmalloc() instead + * of bio_alloc(), as that avoids the mempool deadlock. + * If possible a big IO should be split into smaller parts when allocation + * fails. Partial allocation should not be an error, or you risk a live-lock. */ -void blk_start_queueing(struct request_queue *q) +struct request *blk_make_request(struct request_queue *q, struct bio *bio, + gfp_t gfp_mask) { - if (!blk_queue_plugged(q)) { - if (unlikely(blk_queue_stopped(q))) - return; - q->request_fn(q); - } else - __generic_unplug_device(q); + struct request *rq = blk_get_request(q, bio_data_dir(bio), gfp_mask); + + if (unlikely(!rq)) + return ERR_PTR(-ENOMEM); + + for_each_bio(bio) { + struct bio *bounce_bio = bio; + int ret; + + blk_queue_bounce(q, &bounce_bio); + ret = blk_rq_append_bio(q, rq, bounce_bio); + if (unlikely(ret)) { + blk_put_request(rq); + return ERR_PTR(ret); + } + } + + return rq; } -EXPORT_SYMBOL(blk_start_queueing); +EXPORT_SYMBOL(blk_make_request); /** * blk_requeue_request - put a request back on queue @@ -934,6 +955,8 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); + BUG_ON(blk_queued_rq(rq)); + elv_requeue_request(q, rq); } EXPORT_SYMBOL(blk_requeue_request); @@ -969,7 +992,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, * barrier */ rq->cmd_type = REQ_TYPE_SPECIAL; - rq->cmd_flags |= REQ_SOFTBARRIER; rq->special = data; @@ -983,7 +1005,7 @@ void blk_insert_request(struct request_queue *q, struct request *rq, drive_stat_acct(rq, 1); __elv_add_request(q, rq, where, 0); - blk_start_queueing(q); + __blk_run_queue(q); spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_insert_request); @@ -1105,16 +1127,13 @@ void init_request_from_bio(struct request *req, struct bio *bio) if (bio_failfast_driver(bio)) req->cmd_flags |= REQ_FAILFAST_DRIVER; - /* - * REQ_BARRIER implies no merging, but lets make it explicit - */ if (unlikely(bio_discard(bio))) { req->cmd_flags |= REQ_DISCARD; if (bio_barrier(bio)) req->cmd_flags |= REQ_SOFTBARRIER; req->q->prepare_discard_fn(req->q, req); } else if (unlikely(bio_barrier(bio))) - req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE); + req->cmd_flags |= REQ_HARDBARRIER; if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; @@ -1124,9 +1143,8 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cmd_flags |= REQ_NOIDLE; req->errors = 0; - req->hard_sector = req->sector = bio->bi_sector; + req->__sector = bio->bi_sector; req->ioprio = bio_prio(bio); - req->start_time = jiffies; blk_rq_bio_prep(req->q, req, bio); } @@ -1142,14 +1160,13 @@ static inline bool queue_should_plug(struct request_queue *q) static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors; + int el_ret; + unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); const int unplug = bio_unplug(bio); int rw_flags; - nr_sectors = bio_sectors(bio); - /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even @@ -1174,7 +1191,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) req->biotail->bi_next = bio; req->biotail = bio; - req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->__data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1200,10 +1217,8 @@ static int __make_request(struct request_queue *q, struct bio *bio) * not touch req->buffer either... */ req->buffer = bio_data(bio); - req->current_nr_sectors = bio_cur_sectors(bio); - req->hard_cur_sectors = req->current_nr_sectors; - req->sector = req->hard_sector = bio->bi_sector; - req->nr_sectors = req->hard_nr_sectors += nr_sectors; + req->__sector = bio->bi_sector; + req->__data_len += bytes; req->ioprio = ioprio_best(req->ioprio, prio); if (!blk_rq_cpu_valid(req)) req->cpu = bio->bi_comp_cpu; @@ -1414,11 +1429,11 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(nr_sectors > q->max_hw_sectors)) { + if (unlikely(nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", - bdevname(bio->bi_bdev, b), - bio_sectors(bio), - q->max_hw_sectors); + bdevname(bio->bi_bdev, b), + bio_sectors(bio), + queue_max_hw_sectors(q)); goto end_io; } @@ -1584,8 +1599,8 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { - if (rq->nr_sectors > q->max_sectors || - rq->data_len > q->max_hw_sectors << 9) { + if (blk_rq_sectors(rq) > queue_max_sectors(q) || + blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } @@ -1597,8 +1612,8 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq) * limitation. */ blk_recalc_rq_segments(rq); - if (rq->nr_phys_segments > q->max_phys_segments || - rq->nr_phys_segments > q->max_hw_segments) { + if (rq->nr_phys_segments > queue_max_phys_segments(q) || + rq->nr_phys_segments > queue_max_hw_segments(q)) { printk(KERN_ERR "%s: over max segments limit.\n", __func__); return -EIO; } @@ -1642,40 +1657,15 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) } EXPORT_SYMBOL_GPL(blk_insert_cloned_request); -/** - * blkdev_dequeue_request - dequeue request and start timeout timer - * @req: request to dequeue - * - * Dequeue @req and start timeout timer on it. This hands off the - * request to the driver. - * - * Block internal functions which don't want to start timer should - * call elv_dequeue_request(). - */ -void blkdev_dequeue_request(struct request *req) -{ - elv_dequeue_request(req->q, req); - - /* - * We are now handing the request to the hardware, add the - * timeout handler. - */ - blk_add_timer(req); -} -EXPORT_SYMBOL(blkdev_dequeue_request); - static void blk_account_io_completion(struct request *req, unsigned int bytes) { - if (!blk_do_io_stat(req)) - return; - - if (blk_fs_request(req)) { + if (blk_do_io_stat(req)) { const int rw = rq_data_dir(req); struct hd_struct *part; int cpu; cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); + part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_stat_add(cpu, part, sectors[rw], bytes >> 9); part_stat_unlock(); } @@ -1683,22 +1673,19 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { - if (!blk_do_io_stat(req)) - return; - /* * Account IO completion. bar_rq isn't accounted as a normal * IO on queueing nor completion. Accounting the containing * request is enough. */ - if (blk_fs_request(req) && req != &req->q->bar_rq) { + if (blk_do_io_stat(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; int cpu; cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); + part = disk_map_sector_rcu(req->rq_disk, blk_rq_pos(req)); part_stat_inc(cpu, part, ios[rw]); part_stat_add(cpu, part, ticks[rw], duration); @@ -1710,25 +1697,209 @@ static void blk_account_io_done(struct request *req) } /** - * __end_that_request_first - end I/O on a request - * @req: the request being processed + * blk_peek_request - peek at the top of a request queue + * @q: request queue to peek at + * + * Description: + * Return the request at the top of @q. The returned request + * should be started using blk_start_request() before LLD starts + * processing it. + * + * Return: + * Pointer to the request at the top of @q if available. Null + * otherwise. + * + * Context: + * queue_lock must be held. + */ +struct request *blk_peek_request(struct request_queue *q) +{ + struct request *rq; + int ret; + + while ((rq = __elv_next_request(q)) != NULL) { + if (!(rq->cmd_flags & REQ_STARTED)) { + /* + * This is the first time the device driver + * sees this request (possibly after + * requeueing). Notify IO scheduler. + */ + if (blk_sorted_rq(rq)) + elv_activate_rq(q, rq); + + /* + * just mark as started even if we don't start + * it, a request that has been delayed should + * not be passed by new incoming requests + */ + rq->cmd_flags |= REQ_STARTED; + trace_block_rq_issue(q, rq); + } + + if (!q->boundary_rq || q->boundary_rq == rq) { + q->end_sector = rq_end_sector(rq); + q->boundary_rq = NULL; + } + + if (rq->cmd_flags & REQ_DONTPREP) + break; + + if (q->dma_drain_size && blk_rq_bytes(rq)) { + /* + * make sure space for the drain appears we + * know we can do this because max_hw_segments + * has been adjusted to be one fewer than the + * device can handle + */ + rq->nr_phys_segments++; + } + + if (!q->prep_rq_fn) + break; + + ret = q->prep_rq_fn(q, rq); + if (ret == BLKPREP_OK) { + break; + } else if (ret == BLKPREP_DEFER) { + /* + * the request may have been (partially) prepped. + * we need to keep this request in the front to + * avoid resource deadlock. REQ_STARTED will + * prevent other fs requests from passing this one. + */ + if (q->dma_drain_size && blk_rq_bytes(rq) && + !(rq->cmd_flags & REQ_DONTPREP)) { + /* + * remove the space for the drain we added + * so that we don't add it again + */ + --rq->nr_phys_segments; + } + + rq = NULL; + break; + } else if (ret == BLKPREP_KILL) { + rq->cmd_flags |= REQ_QUIET; + /* + * Mark this request as started so we don't trigger + * any debug logic in the end I/O path. + */ + blk_start_request(rq); + __blk_end_request_all(rq, -EIO); + } else { + printk(KERN_ERR "%s: bad return=%d\n", __func__, ret); + break; + } + } + + return rq; +} +EXPORT_SYMBOL(blk_peek_request); + +void blk_dequeue_request(struct request *rq) +{ + struct request_queue *q = rq->q; + + BUG_ON(list_empty(&rq->queuelist)); + BUG_ON(ELV_ON_HASH(rq)); + + list_del_init(&rq->queuelist); + + /* + * the time frame between a request being removed from the lists + * and to it is freed is accounted as io that is in progress at + * the driver side. + */ + if (blk_account_rq(rq)) + q->in_flight[rq_is_sync(rq)]++; +} + +/** + * blk_start_request - start request processing on the driver + * @req: request to dequeue + * + * Description: + * Dequeue @req and start timeout timer on it. This hands off the + * request to the driver. + * + * Block internal functions which don't want to start timer should + * call blk_dequeue_request(). + * + * Context: + * queue_lock must be held. + */ +void blk_start_request(struct request *req) +{ + blk_dequeue_request(req); + + /* + * We are now handing the request to the hardware, initialize + * resid_len to full count and add the timeout handler. + */ + req->resid_len = blk_rq_bytes(req); + if (unlikely(blk_bidi_rq(req))) + req->next_rq->resid_len = blk_rq_bytes(req->next_rq); + + blk_add_timer(req); +} +EXPORT_SYMBOL(blk_start_request); + +/** + * blk_fetch_request - fetch a request from a request queue + * @q: request queue to fetch a request from + * + * Description: + * Return the request at the top of @q. The request is started on + * return and LLD can start processing it immediately. + * + * Return: + * Pointer to the request at the top of @q if available. Null + * otherwise. + * + * Context: + * queue_lock must be held. + */ +struct request *blk_fetch_request(struct request_queue *q) +{ + struct request *rq; + + rq = blk_peek_request(q); + if (rq) + blk_start_request(rq); + return rq; +} +EXPORT_SYMBOL(blk_fetch_request); + +/** + * blk_update_request - Special helper function for request stacking drivers + * @rq: the request being processed * @error: %0 for success, < %0 for error - * @nr_bytes: number of bytes to complete + * @nr_bytes: number of bytes to complete @rq * * Description: - * Ends I/O on a number of bytes attached to @req, and sets it up - * for the next range of segments (if any) in the cluster. + * Ends I/O on a number of bytes attached to @rq, but doesn't complete + * the request structure even if @rq doesn't have leftover. + * If @rq has leftover, sets it up for the next range of segments. + * + * This special helper function is only for request stacking drivers + * (e.g. request-based dm) so that they can handle partial completion. + * Actual device drivers should use blk_end_request instead. + * + * Passing the result of blk_rq_bytes() as @nr_bytes guarantees + * %false return from this function. * * Return: - * %0 - we are done with this request, call end_that_request_last() - * %1 - still buffers pending for this request + * %false - this request doesn't have any more data + * %true - this request has more data **/ -static int __end_that_request_first(struct request *req, int error, - int nr_bytes) +bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) { int total_bytes, bio_nbytes, next_idx = 0; struct bio *bio; + if (!req->bio) + return false; + trace_block_rq_complete(req->q, req); /* @@ -1745,7 +1916,7 @@ static int __end_that_request_first(struct request *req, int error, if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", req->rq_disk ? req->rq_disk->disk_name : "?", - (unsigned long long)req->sector); + (unsigned long long)blk_rq_pos(req)); } blk_account_io_completion(req, nr_bytes); @@ -1805,8 +1976,15 @@ static int __end_that_request_first(struct request *req, int error, /* * completely done */ - if (!req->bio) - return 0; + if (!req->bio) { + /* + * Reset counters so that the request stacking driver + * can find how many bytes remain in the request + * later. + */ + req->__data_len = 0; + return false; + } /* * if the request wasn't completed, update state @@ -1818,21 +1996,55 @@ static int __end_that_request_first(struct request *req, int error, bio_iovec(bio)->bv_len -= nr_bytes; } - blk_recalc_rq_sectors(req, total_bytes >> 9); + req->__data_len -= total_bytes; + req->buffer = bio_data(req->bio); + + /* update sector only for requests with clear definition of sector */ + if (blk_fs_request(req) || blk_discard_rq(req)) + req->__sector += total_bytes >> 9; + + /* + * If total number of sectors is less than the first segment + * size, something has gone terribly wrong. + */ + if (blk_rq_bytes(req) < blk_rq_cur_bytes(req)) { + printk(KERN_ERR "blk: request botched\n"); + req->__data_len = blk_rq_cur_bytes(req); + } + + /* recalculate the number of segments */ blk_recalc_rq_segments(req); - return 1; + + return true; +} +EXPORT_SYMBOL_GPL(blk_update_request); + +static bool blk_update_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, + unsigned int bidi_bytes) +{ + if (blk_update_request(rq, error, nr_bytes)) + return true; + + /* Bidi request must be completed as a whole */ + if (unlikely(blk_bidi_rq(rq)) && + blk_update_request(rq->next_rq, error, bidi_bytes)) + return true; + + add_disk_randomness(rq->rq_disk); + + return false; } /* * queue lock must be held */ -static void end_that_request_last(struct request *req, int error) +static void blk_finish_request(struct request *req, int error) { if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); - if (blk_queued_rq(req)) - elv_dequeue_request(req->q, req); + BUG_ON(blk_queued_rq(req)); if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); @@ -1852,117 +2064,62 @@ static void end_that_request_last(struct request *req, int error) } /** - * blk_rq_bytes - Returns bytes |