aboutsummaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2008-10-13 17:13:56 +0100
committerDavid Woodhouse <David.Woodhouse@intel.com>2008-10-13 17:13:56 +0100
commite758936e02700ff88a0b08b722a3847b95283ef2 (patch)
tree50c919bef1b459a778b85159d5929de95b6c4a01 /block
parent239cfbde1f5843c4a24199f117d5f67f637d72d5 (diff)
parent4480f15b3306f43bbb0310d461142b4e897ca45b (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
Conflicts: include/asm-x86/statfs.h
Diffstat (limited to 'block')
-rw-r--r--block/Makefile4
-rw-r--r--block/as-iosched.c14
-rw-r--r--block/blk-barrier.c72
-rw-r--r--block/blk-core.c605
-rw-r--r--block/blk-exec.c6
-rw-r--r--block/blk-integrity.c33
-rw-r--r--block/blk-map.c68
-rw-r--r--block/blk-merge.c129
-rw-r--r--block/blk-settings.c43
-rw-r--r--block/blk-softirq.c175
-rw-r--r--block/blk-sysfs.c35
-rw-r--r--block/blk-tag.c22
-rw-r--r--block/blk-timeout.c238
-rw-r--r--block/blk.h48
-rw-r--r--block/blktrace.c32
-rw-r--r--block/bsg.c6
-rw-r--r--block/cfq-iosched.c57
-rw-r--r--block/cmd-filter.c11
-rw-r--r--block/compat_ioctl.c1
-rw-r--r--block/deadline-iosched.c40
-rw-r--r--block/elevator.c40
-rw-r--r--block/genhd.c965
-rw-r--r--block/ioctl.c124
-rw-r--r--block/scsi_ioctl.c8
24 files changed, 1896 insertions, 880 deletions
diff --git a/block/Makefile b/block/Makefile
index 208000b0750..bfe73049f93 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -4,8 +4,8 @@
obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \
blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \
- blk-exec.o blk-merge.o ioctl.o genhd.o scsi_ioctl.o \
- cmd-filter.o
+ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \
+ ioctl.o genhd.o scsi_ioctl.o cmd-filter.o
obj-$(CONFIG_BLK_DEV_BSG) += bsg.o
obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o
diff --git a/block/as-iosched.c b/block/as-iosched.c
index cf4eb0eefbb..71f0abb219e 100644
--- a/block/as-iosched.c
+++ b/block/as-iosched.c
@@ -462,7 +462,7 @@ static void as_antic_stop(struct as_data *ad)
del_timer(&ad->antic_timer);
ad->antic_status = ANTIC_FINISHED;
/* see as_work_handler */
- kblockd_schedule_work(&ad->antic_work);
+ kblockd_schedule_work(ad->q, &ad->antic_work);
}
}
@@ -483,7 +483,7 @@ static void as_antic_timeout(unsigned long data)
aic = ad->io_context->aic;
ad->antic_status = ANTIC_FINISHED;
- kblockd_schedule_work(&ad->antic_work);
+ kblockd_schedule_work(q, &ad->antic_work);
if (aic->ttime_samples == 0) {
/* process anticipated on has exited or timed out*/
@@ -745,6 +745,14 @@ static int as_can_break_anticipation(struct as_data *ad, struct request *rq)
*/
static int as_can_anticipate(struct as_data *ad, struct request *rq)
{
+#if 0 /* disable for now, we need to check tag level as well */
+ /*
+ * SSD device without seek penalty, disable idling
+ */
+ if (blk_queue_nonrot(ad->q)) axman
+ return 0;
+#endif
+
if (!ad->io_context)
/*
* Last request submitted was a write
@@ -844,7 +852,7 @@ static void as_completed_request(struct request_queue *q, struct request *rq)
if (ad->changed_batch && ad->nr_dispatched == 1) {
ad->current_batch_expires = jiffies +
ad->batch_expire[ad->batch_data_dir];
- kblockd_schedule_work(&ad->antic_work);
+ kblockd_schedule_work(q, &ad->antic_work);
ad->changed_batch = 0;
if (ad->batch_data_dir == REQ_SYNC)
diff --git a/block/blk-barrier.c b/block/blk-barrier.c
index a09ead19f9c..5c99ff8d2db 100644
--- a/block/blk-barrier.c
+++ b/block/blk-barrier.c
@@ -293,7 +293,7 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
bio->bi_end_io = bio_end_empty_barrier;
bio->bi_private = &wait;
bio->bi_bdev = bdev;
- submit_bio(1 << BIO_RW_BARRIER, bio);
+ submit_bio(WRITE_BARRIER, bio);
wait_for_completion(&wait);
@@ -315,3 +315,73 @@ int blkdev_issue_flush(struct block_device *bdev, sector_t *error_sector)
return ret;
}
EXPORT_SYMBOL(blkdev_issue_flush);
+
+static void blkdev_discard_end_io(struct bio *bio, int err)
+{
+ if (err) {
+ if (err == -EOPNOTSUPP)
+ set_bit(BIO_EOPNOTSUPP, &bio->bi_flags);
+ clear_bit(BIO_UPTODATE, &bio->bi_flags);
+ }
+
+ bio_put(bio);
+}
+
+/**
+ * blkdev_issue_discard - queue a discard
+ * @bdev: blockdev to issue discard for
+ * @sector: start sector
+ * @nr_sects: number of sectors to discard
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ *
+ * Description:
+ * Issue a discard request for the sectors in question. Does not wait.
+ */
+int blkdev_issue_discard(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask)
+{
+ struct request_queue *q;
+ struct bio *bio;
+ int ret = 0;
+
+ if (bdev->bd_disk == NULL)
+ return -ENXIO;
+
+ q = bdev_get_queue(bdev);
+ if (!q)
+ return -ENXIO;
+
+ if (!q->prepare_discard_fn)
+ return -EOPNOTSUPP;
+
+ while (nr_sects && !ret) {
+ bio = bio_alloc(gfp_mask, 0);
+ if (!bio)
+ return -ENOMEM;
+
+ bio->bi_end_io = blkdev_discard_end_io;
+ bio->bi_bdev = bdev;
+
+ bio->bi_sector = sector;
+
+ if (nr_sects > q->max_hw_sectors) {
+ bio->bi_size = q->max_hw_sectors << 9;
+ nr_sects -= q->max_hw_sectors;
+ sector += q->max_hw_sectors;
+ } else {
+ bio->bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+ bio_get(bio);
+ submit_bio(DISCARD_BARRIER, bio);
+
+ /* Check if it failed immediately */
+ if (bio_flagged(bio, BIO_EOPNOTSUPP))
+ ret = -EOPNOTSUPP;
+ else if (!bio_flagged(bio, BIO_UPTODATE))
+ ret = -EIO;
+ bio_put(bio);
+ }
+ return ret;
+}
+EXPORT_SYMBOL(blkdev_issue_discard);
diff --git a/block/blk-core.c b/block/blk-core.c
index 2cba5ef97b2..2d053b58441 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -26,8 +26,6 @@
#include <linux/swap.h>
#include <linux/writeback.h>
#include <linux/task_io_accounting_ops.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
#include <linux/blktrace_api.h>
#include <linux/fault-inject.h>
@@ -50,27 +48,26 @@ struct kmem_cache *blk_requestq_cachep;
*/
static struct workqueue_struct *kblockd_workqueue;
-static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
-
static void drive_stat_acct(struct request *rq, int new_io)
{
struct hd_struct *part;
int rw = rq_data_dir(rq);
+ int cpu;
if (!blk_fs_request(rq) || !rq->rq_disk)
return;
- part = get_part(rq->rq_disk, rq->sector);
+ cpu = part_stat_lock();
+ part = disk_map_sector_rcu(rq->rq_disk, rq->sector);
+
if (!new_io)
- __all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
+ part_stat_inc(cpu, part, merges[rw]);
else {
- disk_round_stats(rq->rq_disk);
- rq->rq_disk->in_flight++;
- if (part) {
- part_round_stats(part);
- part->in_flight++;
- }
+ part_round_stats(cpu, part);
+ part_inc_in_flight(part);
}
+
+ part_stat_unlock();
}
void blk_queue_congestion_threshold(struct request_queue *q)
@@ -113,7 +110,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
memset(rq, 0, sizeof(*rq));
INIT_LIST_HEAD(&rq->queuelist);
- INIT_LIST_HEAD(&rq->donelist);
+ INIT_LIST_HEAD(&rq->timeout_list);
+ rq->cpu = -1;
rq->q = q;
rq->sector = rq->hard_sector = (sector_t) -1;
INIT_HLIST_NODE(&rq->hash);
@@ -308,7 +306,7 @@ void blk_unplug_timeout(unsigned long data)
blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL,
q->rq.count[READ] + q->rq.count[WRITE]);
- kblockd_schedule_work(&q->unplug_work);
+ kblockd_schedule_work(q, &q->unplug_work);
}
void blk_unplug(struct request_queue *q)
@@ -325,6 +323,21 @@ void blk_unplug(struct request_queue *q)
}
EXPORT_SYMBOL(blk_unplug);
+static void blk_invoke_request_fn(struct request_queue *q)
+{
+ /*
+ * one level of recursion is ok and is much faster than kicking
+ * the unplug handling
+ */
+ if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+ q->request_fn(q);
+ queue_flag_clear(QUEUE_FLAG_REENTER, q);
+ } else {
+ queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+ kblockd_schedule_work(q, &q->unplug_work);
+ }
+}
+
/**
* blk_start_queue - restart a previously stopped queue
* @q: The &struct request_queue in question
@@ -339,18 +352,7 @@ void blk_start_queue(struct request_queue *q)
WARN_ON(!irqs_disabled());
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-
- /*
- * one level of recursion is ok and is much faster than kicking
- * the unplug handling
- */
- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
- q->request_fn(q);
- queue_flag_clear(QUEUE_FLAG_REENTER, q);
- } else {
- blk_plug_device(q);
- kblockd_schedule_work(&q->unplug_work);
- }
+ blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(blk_start_queue);
@@ -408,15 +410,8 @@ void __blk_run_queue(struct request_queue *q)
* Only recurse once to avoid overrunning the stack, let the unplug
* handling reinvoke the handler shortly if we already got there.
*/
- if (!elv_queue_empty(q)) {
- if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
- q->request_fn(q);
- queue_flag_clear(QUEUE_FLAG_REENTER, q);
- } else {
- blk_plug_device(q);
- kblockd_schedule_work(&q->unplug_work);
- }
- }
+ if (!elv_queue_empty(q))
+ blk_invoke_request_fn(q);
}
EXPORT_SYMBOL(__blk_run_queue);
@@ -441,6 +436,14 @@ void blk_put_queue(struct request_queue *q)
void blk_cleanup_queue(struct request_queue *q)
{
+ /*
+ * We know we have process context here, so we can be a little
+ * cautious and ensure that pending block actions on this device
+ * are done before moving on. Going into this function, we should
+ * not have processes doing IO to this device.
+ */
+ blk_sync_queue(q);
+
mutex_lock(&q->sysfs_lock);
queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
mutex_unlock(&q->sysfs_lock);
@@ -496,6 +499,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
}
init_timer(&q->unplug_timer);
+ setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
+ INIT_LIST_HEAD(&q->timeout_list);
kobject_init(&q->kobj, &blk_queue_ktype);
@@ -531,7 +536,7 @@ EXPORT_SYMBOL(blk_alloc_queue_node);
* request queue; this lock will be taken also from interrupt context, so irq
* disabling is needed for it.
*
- * Function returns a pointer to the initialized request queue, or NULL if
+ * Function returns a pointer to the initialized request queue, or %NULL if
* it didn't succeed.
*
* Note:
@@ -569,7 +574,8 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id)
q->request_fn = rfn;
q->prep_rq_fn = NULL;
q->unplug_fn = generic_unplug_device;
- q->queue_flags = (1 << QUEUE_FLAG_CLUSTER);
+ q->queue_flags = (1 << QUEUE_FLAG_CLUSTER |
+ 1 << QUEUE_FLAG_STACKABLE);
q->queue_lock = lock;
blk_queue_segment_boundary(q, 0xffffffff);
@@ -624,10 +630,6 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask)
blk_rq_init(q, rq);
- /*
- * first three bits are identical in rq->cmd_flags and bio->bi_rw,
- * see bio.h and blkdev.h
- */
rq->cmd_flags = rw | REQ_ALLOCED;
if (priv) {
@@ -888,9 +890,11 @@ EXPORT_SYMBOL(blk_get_request);
*/
void blk_start_queueing(struct request_queue *q)
{
- if (!blk_queue_plugged(q))
+ if (!blk_queue_plugged(q)) {
+ if (unlikely(blk_queue_stopped(q)))
+ return;
q->request_fn(q);
- else
+ } else
__generic_unplug_device(q);
}
EXPORT_SYMBOL(blk_start_queueing);
@@ -907,6 +911,8 @@ EXPORT_SYMBOL(blk_start_queueing);
*/
void blk_requeue_request(struct request_queue *q, struct request *rq)
{
+ blk_delete_timer(rq);
+ blk_clear_rq_complete(rq);
blk_add_trace_rq(q, rq, BLK_TA_REQUEUE);
if (blk_rq_tagged(rq))
@@ -917,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
EXPORT_SYMBOL(blk_requeue_request);
/**
- * blk_insert_request - insert a special request in to a request queue
+ * blk_insert_request - insert a special request into a request queue
* @q: request queue where request should be inserted
* @rq: request to be inserted
* @at_head: insert request at head or tail of queue
@@ -927,8 +933,8 @@ EXPORT_SYMBOL(blk_requeue_request);
* Many block devices need to execute commands asynchronously, so they don't
* block the whole kernel from preemption during request execution. This is
* accomplished normally by inserting aritficial requests tagged as
- * REQ_SPECIAL in to the corresponding request queue, and letting them be
- * scheduled for actual execution by the request queue.
+ * REQ_TYPE_SPECIAL in to the corresponding request queue, and letting them
+ * be scheduled for actual execution by the request queue.
*
* We have the option of inserting the head or the tail of the queue.
* Typically we use the tail for new ioctls and so forth. We use the head
@@ -982,8 +988,22 @@ static inline void add_request(struct request_queue *q, struct request *req)
__elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0);
}
-/*
- * disk_round_stats() - Round off the performance stats on a struct
+static void part_round_stats_single(int cpu, struct hd_struct *part,
+ unsigned long now)
+{
+ if (now == part->stamp)
+ return;
+
+ if (part->in_flight) {
+ __part_stat_add(cpu, part, time_in_queue,
+ part->in_flight * (now - part->stamp));
+ __part_stat_add(cpu, part, io_ticks, (now - part->stamp));
+ }
+ part->stamp = now;
+}
+
+/**
+ * part_round_stats() - Round off the performance stats on a struct
* disk_stats.
*
* The average IO queue length and utilisation statistics are maintained
@@ -997,36 +1017,15 @@ static inline void add_request(struct request_queue *q, struct request *req)
* /proc/diskstats. This accounts immediately for all queue usage up to
* the current jiffies and restarts the counters again.
*/
-void disk_round_stats(struct gendisk *disk)
+void part_round_stats(int cpu, struct hd_struct *part)
{
unsigned long now = jiffies;
- if (now == disk->stamp)
- return;
-
- if (disk->in_flight) {
- __disk_stat_add(disk, time_in_queue,
- disk->in_flight * (now - disk->stamp));
- __disk_stat_add(disk, io_ticks, (now - disk->stamp));
- }
- disk->stamp = now;
-}
-EXPORT_SYMBOL_GPL(disk_round_stats);
-
-void part_round_stats(struct hd_struct *part)
-{
- unsigned long now = jiffies;
-
- if (now == part->stamp)
- return;
-
- if (part->in_flight) {
- __part_stat_add(part, time_in_queue,
- part->in_flight * (now - part->stamp));
- __part_stat_add(part, io_ticks, (now - part->stamp));
- }
- part->stamp = now;
+ if (part->partno)
+ part_round_stats_single(cpu, &part_to_disk(part)->part0, now);
+ part_round_stats_single(cpu, part, now);
}
+EXPORT_SYMBOL_GPL(part_round_stats);
/*
* queue lock must be held
@@ -1070,6 +1069,7 @@ EXPORT_SYMBOL(blk_put_request);
void init_request_from_bio(struct request *req, struct bio *bio)
{
+ req->cpu = bio->bi_comp_cpu;
req->cmd_type = REQ_TYPE_FS;
/*
@@ -1081,7 +1081,12 @@ void init_request_from_bio(struct request *req, struct bio *bio)
/*
* REQ_BARRIER implies no merging, but lets make it explicit
*/
- if (unlikely(bio_barrier(bio)))
+ if (unlikely(bio_discard(bio))) {
+ req->cmd_flags |= REQ_DISCARD;
+ if (bio_barrier(bio))
+ req->cmd_flags |= REQ_SOFTBARRIER;
+ req->q->prepare_discard_fn(req->q, req);
+ } else if (unlikely(bio_barrier(bio)))
req->cmd_flags |= (REQ_HARDBARRIER | REQ_NOMERGE);
if (bio_sync(bio))
@@ -1099,7 +1104,7 @@ void init_request_from_bio(struct request *req, struct bio *bio)
static int __make_request(struct request_queue *q, struct bio *bio)
{
struct request *req;
- int el_ret, nr_sectors, barrier, err;
+ int el_ret, nr_sectors, barrier, discard, err;
const unsigned short prio = bio_prio(bio);
const int sync = bio_sync(bio);
int rw_flags;
@@ -1114,7 +1119,14 @@ static int __make_request(struct request_queue *q, struct bio *bio)
blk_queue_bounce(q, &bio);
barrier = bio_barrier(bio);
- if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
+ if (unlikely(barrier) && bio_has_data(bio) &&
+ (q->next_ordered == QUEUE_ORDERED_NONE)) {
+ err = -EOPNOTSUPP;
+ goto end_io;
+ }
+
+ discard = bio_discard(bio);
+ if (unlikely(discard) && !q->prepare_discard_fn) {
err = -EOPNOTSUPP;
goto end_io;
}
@@ -1138,6 +1150,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
req->biotail = bio;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
+ if (!blk_rq_cpu_valid(req))
+ req->cpu = bio->bi_comp_cpu;
drive_stat_acct(req, 0);
if (!attempt_back_merge(q, req))
elv_merged_request(q, req, el_ret);
@@ -1165,6 +1179,8 @@ static int __make_request(struct request_queue *q, struct bio *bio)
req->sector = req->hard_sector = bio->bi_sector;
req->nr_sectors = req->hard_nr_sectors += nr_sectors;
req->ioprio = ioprio_best(req->ioprio, prio);
+ if (!blk_rq_cpu_valid(req))
+ req->cpu = bio->bi_comp_cpu;
drive_stat_acct(req, 0);
if (!attempt_front_merge(q, req))
elv_merged_request(q, req, el_ret);
@@ -1200,13 +1216,15 @@ get_rq:
init_request_from_bio(req, bio);
spin_lock_irq(q->queue_lock);
+ if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
+ bio_flagged(bio, BIO_CPU_AFFINE))
+ req->cpu = blk_cpu_to_group(smp_processor_id());
if (elv_queue_empty(q))
blk_plug_device(q);
add_request(q, req);
out:
if (sync)
__generic_unplug_device(q);
-
spin_unlock_irq(q->queue_lock);
return 0;
@@ -1260,8 +1278,9 @@ __setup("fail_make_request=", setup_fail_make_request);
static int should_fail_request(struct bio *bio)
{
- if ((bio->bi_bdev->bd_disk->flags & GENHD_FL_FAIL) ||
- (bio->bi_bdev->bd_part && bio->bi_bdev->bd_part->make_it_fail))
+ struct hd_struct *part = bio->bi_bdev->bd_part;
+
+ if (part_to_disk(part)->part0.make_it_fail || part->make_it_fail)
return should_fail(&fail_make_request, bio->bi_size);
return 0;
@@ -1314,7 +1333,7 @@ static inline int bio_check_eod(struct bio *bio, unsigned int nr_sectors)
}
/**
- * generic_make_request: hand a buffer to its device driver for I/O
+ * generic_make_request - hand a buffer to its device driver for I/O
* @bio: The bio describing the location in memory and on the device.
*
* generic_make_request() is used to make I/O requests of block
@@ -1409,7 +1428,8 @@ end_io:
if (bio_check_eod(bio, nr_sectors))
goto end_io;
- if (bio_empty_barrier(bio) && !q->prepare_flush_fn) {
+ if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) ||
+ (bio_discard(bio) && !q->prepare_discard_fn)) {
err = -EOPNOTSUPP;
goto end_io;
}
@@ -1471,13 +1491,13 @@ void generic_make_request(struct bio *bio)
EXPORT_SYMBOL(generic_make_request);
/**
- * submit_bio: submit a bio to the block device layer for I/O
+ * submit_bio - submit a bio to the block device layer for I/O
* @rw: whether to %READ or %WRITE, or maybe to %READA (read ahead)
* @bio: The &struct bio which describes the I/O
*
* submit_bio() is very similar in purpose to generic_make_request(), and
* uses that function to do most of the work. Both are fairly rough
- * interfaces, @bio must be presetup and ready for I/O.
+ * interfaces; @bio must be presetup and ready for I/O.
*
*/
void submit_bio(int rw, struct bio *bio)
@@ -1490,11 +1510,7 @@ void submit_bio(int rw, struct bio *bio)
* If it's a regular read/write or a barrier with data attached,
* go through the normal accounting stuff before submission.
*/
- if (!bio_empty_barrier(bio)) {
-
- BIO_BUG_ON(!bio->bi_size);
- BIO_BUG_ON(!bio->bi_io_vec);
-
+ if (bio_has_data(bio)) {
if (rw & WRITE) {
count_vm_events(PGPGOUT, count);
} else {
@@ -1517,9 +1533,90 @@ void submit_bio(int rw, struct bio *bio)
EXPORT_SYMBOL(submit_bio);
/**
+ * blk_rq_check_limits - Helper function to check a request for the queue limit
+ * @q: the queue
+ * @rq: the request being checked
+ *
+ * Description:
+ * @rq may have been made based on weaker limitations of upper-level queues
+ * in request stacking drivers, and it may violate the limitation of @q.
+ * Since the block layer and the underlying device driver trust @rq
+ * after it is inserted to @q, it should be checked against @q before
+ * the insertion using this generic function.
+ *
+ * This function should also be useful for request stacking drivers
+ * in some cases below, so export this fuction.
+ * Request stacking drivers like request-based dm may change the queue
+ * limits while requests are in the queue (e.g. dm's table swapping).
+ * Such request stacking drivers should check those requests agaist
+ * the new queue limits again when they dispatch those requests,
+ * although such checkings are also done against the old queue limits
+ * when submitting requests.
+ */
+int blk_rq_check_limits(struct request_queue *q, struct request *rq)
+{
+ if (rq->nr_sectors > q->max_sectors ||
+ rq->data_len > q->max_hw_sectors << 9) {
+ printk(KERN_ERR "%s: over max size limit.\n", __func__);
+ return -EIO;
+ }
+
+ /*
+ * queue's settings related to segment counting like q->bounce_pfn
+ * may differ from that of other stacking queues.
+ * Recalculate it to check the request correctly on this queue's
+ * limitation.
+ */
+ blk_recalc_rq_segments(rq);
+ if (rq->nr_phys_segments > q->max_phys_segments ||
+ rq->nr_phys_segments > q->max_hw_segments) {
+ printk(KERN_ERR "%s: over max segments limit.\n", __func__);
+ return -EIO;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blk_rq_check_limits);
+
+/**
+ * blk_insert_cloned_request - Helper for stacking drivers to submit a request
+ * @q: the queue to submit the request
+ * @rq: the request being queued
+ */
+int blk_insert_cloned_request(struct request_queue *q, struct request *rq)
+{
+ unsigned long flags;
+
+ if (blk_rq_check_limits(q, rq))
+ return -EIO;
+
+#ifdef CONFIG_FAIL_MAKE_REQUEST
+ if (rq->rq_disk && rq->rq_disk->part0.make_it_fail &&
+ should_fail(&fail_make_request, blk_rq_bytes(rq)))
+ return -EIO;
+#endif
+
+ spin_lock_irqsave(q->queue_lock, flags);
+
+ /*
+ * Submitting request must be dequeued before calling this function
+ * because it will be linked to another request_queue
+ */
+ BUG_ON(blk_queued_rq(rq));
+
+ drive_stat_acct(rq, 1);
+ __elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
+
+ spin_unlock_irqrestore(q->queue_lock, flags);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(blk_insert_cloned_request);
+
+/**
* __end_that_request_first - end I/O on a request
* @req: the request being processed
- * @error: 0 for success, < 0 for error
+ * @error: %0 for success, < %0 for error
* @nr_bytes: number of bytes to complete
*
* Description:
@@ -1527,8 +1624,8 @@ EXPORT_SYMBOL(submit_bio);
* for the next range of segments (if any) in the cluster.
*
* Return:
- * 0 - we are done with this request, call end_that_request_last()
- * 1 - still buffers pending for this request
+ * %0 - we are done with this request, call end_that_request_last()
+ * %1 - still buffers pending for this request
**/
static int __end_that_request_first(struct request *req, int error,
int nr_bytes)
@@ -1539,7 +1636,7 @@ static int __end_that_request_first(struct request *req, int error,
blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE);
/*
- * for a REQ_BLOCK_PC request, we want to carry any eventual
+ * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual
* sense key with us all the way through
*/
if (!blk_pc_request(req))
@@ -1552,11 +1649,14 @@ static int __end_that_request_first(struct request *req, int error,
}
if (blk_fs_request(req) && req->rq_disk) {
- struct hd_struct *part = get_part(req->rq_disk, req->sector);
const int rw = rq_data_dir(req);
+ struct hd_struct *part;
+ int cpu;
- all_stat_add(req->rq_disk, part, sectors[rw],
- nr_bytes >> 9, req->sector);
+ cpu = part_stat_lock();
+ part = disk_map_sector_rcu(req->rq_disk, req->sector);
+ part_stat_add(cpu, part, sectors[rw], nr_bytes >> 9);
+ part_stat_unlock();
}
total_bytes = bio_nbytes = 0;
@@ -1641,88 +1741,14 @@ static int __end_that_request_first(struct request *req, int error,
}
/*
- * splice the completion data to a local structure and hand off to
- * process_completion_queue() to complete the requests
- */
-static void blk_done_softirq(struct softirq_action *h)
-{
- struct list_head *cpu_list, local_list;
-
- local_irq_disable();
- cpu_list = &__get_cpu_var(blk_cpu_done);
- list_replace_init(cpu_list, &local_list);
- local_irq_enable();
-
- while (!list_empty(&local_list)) {
- struct request *rq;
-
- rq = list_entry(local_list.next, struct request, donelist);
- list_del_init(&rq->donelist);
- rq->q->softirq_done_fn(rq);
- }
-}
-
-static int __cpuinit blk_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- /*
- * If a CPU goes away, splice its entries to the current CPU
- * and trigger a run of the softirq
- */
- if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
- int cpu = (unsigned long) hcpu;
-
- local_irq_disable();
- list_splice_init(&per_cpu(blk_cpu_done, cpu),
- &__get_cpu_var(blk_cpu_done));
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
- local_irq_enable();
- }
-
- return NOTIFY_OK;
-}
-
-
-static struct notifier_block blk_cpu_notifier __cpuinitdata = {
- .notifier_call = blk_cpu_notify,
-};
-
-/**
- * blk_complete_request - end I/O on a request
- * @req: the request being processed
- *
- * Description:
- * Ends all I/O on a request. It does not handle partial completions,
- * unless the driver actually implements this in its completion callback
- * through requeueing. The actual completion happens out-of-order,
- * through a softirq handler. The user must have registered a completion
- * callback through blk_queue_softirq_done().
- **/
-
-void blk_complete_request(struct request *req)
-{
- struct list_head *cpu_list;
- unsigned long flags;
-
- BUG_ON(!req->q->softirq_done_fn);
-
- local_irq_save(flags);
-
- cpu_list = &__get_cpu_var(blk_cpu_done);
- list_add_tail(&req->donelist, cpu_list);
- raise_softirq_irqoff(BLOCK_SOFTIRQ);
-
- local_irq_restore(flags);
-}
-EXPORT_SYMBOL(blk_complete_request);
-
-/*
* queue lock must be held
*/
static void end_that_request_last(struct request *req, int error)
{
struct gendisk *disk = req->rq_disk;
+ blk_delete_timer(req);
+
if (blk_rq_tagged(req))
blk_queue_end_tag(req->q, req);
@@ -1740,16 +1766,18 @@ static void end_that_request_last(struct request *req, int error)
if (disk && blk_fs_request(req) && req != &req->q->bar_rq) {
unsigned long duration = jiffies - req->start_time;
const int rw = rq_data_dir(req);
- struct hd_struct *part = get_part(disk, req->sector);
-
- __all_stat_inc(disk, part, ios[rw], req->sector);
- __all_stat_add(disk, part, ticks[rw], duration, req->sector);
- disk_round_stats(disk);
- disk->in_flight--;
- if (part) {
- part_round_stats(part);
- part->in_flight--;
- }
+ struct hd_struct *part;
+ int cpu;
+
+ cpu = part_stat_lock();
+ part = disk_map_sector_rcu(disk, req->sector);
+
+ part_stat_inc(cpu, part, ios[rw]);
+ part_stat_add(cpu, part, ticks[rw], duration);
+ part_round_stats(cpu, part);
+ part_dec_in_flight(part);
+
+ part_stat_unlock();
}
if (req->end_io)
@@ -1762,17 +1790,6 @@ static void end_that_request_last(struct request *req, int error)
}
}
-static inline void __end_request(struct request *rq, int uptodate,
- unsigned int nr_bytes)
-{
- int error = 0;
-
- if (uptodate <= 0)
- error = uptodate ? uptodate : -EIO;
-
- __blk_end_request(rq, error, nr_bytes);
-}
-
/**
* blk_rq_bytes - Returns bytes left to complete in the entire request
* @rq: the request being processed
@@ -1803,74 +1820,57 @@ unsigned int blk_rq_cur_bytes(struct request *rq)
EXPORT_SYMBOL_GPL(blk_rq_cur_bytes);
/**
- * end_queued_request - end all I/O on a queued request
- * @rq: the request being processed
- * @uptodate: error value or 0/1 uptodate flag
- *
- * Description:
- * Ends all I/O on a request, and removes it from the block layer queues.
- * Not suitable for normal IO completion, unless the driver still has
- * the request attached to the block layer.
- *
- **/
-void end_queued_request(struct request *rq, int uptodate)
-{
- __end_request(rq, uptodate, blk_rq_bytes(rq));
-}
-EXPORT_SYMBOL(end_queued_request);
-
-/**
- * end_dequeued_request - end all I/O on a dequeued request
- * @rq: the request being processed
- * @uptodate: error value or 0/1 uptodate flag
- *
- * Description:
- * Ends all I/O on a request. The request must already have been
- * dequeued using blkdev_dequeue_request(), as is normally the case
- * for most drivers.
- *
- **/
-void end_dequeued_request(struct request *rq, int uptodate)
-{
- __end_request(rq, uptodate, blk_rq_bytes(rq));
-}
-EXPORT_SYMBOL(end_dequeued_request);
-
-
-/**
* end_request - end I/O on the current segment of the request
* @req: the request being processed
- * @uptodate: error value or 0/1 uptodate flag
+ * @uptodate: error value or %0/%1 uptodate flag
*
* Description:
* Ends I/O on the current segment of a request. If that is the only
* remaining segment, the request is also completed and freed.
*
- * This is a remnant of how older block drivers handled IO completions.
- * Modern drivers typically end IO on the full request in one go, unless
+ *