diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-12-30 17:20:05 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-12-30 17:20:05 -0800 |
commit | 1dff81f20cd55ffa5a8ee984da70ce0b99d29606 (patch) | |
tree | 06eb07bda250abfa8a78c3141db56862c8c7cf98 | |
parent | 179475a3b46f86e2d06f83e2312218ac3f0cf3a7 (diff) | |
parent | d3f761104b097738932afcc310fbbbbfb007ef92 (diff) |
Merge branch 'for-2.6.29' of git://git.kernel.dk/linux-2.6-block
* 'for-2.6.29' of git://git.kernel.dk/linux-2.6-block: (43 commits)
bio: get rid of bio_vec clearing
bounce: don't rely on a zeroed bio_vec list
cciss: simplify parameters to deregister_disk function
cfq-iosched: fix race between exiting queue and exiting task
loop: Do not call loop_unplug for not configured loop device.
loop: Flush possible running bios when loop device is released.
alpha: remove dead BIO_VMERGE_BOUNDARY
Get rid of CONFIG_LSF
block: make blk_softirq_init() static
block: use min_not_zero in blk_queue_stack_limits
block: add one-hit cache for disk partition lookup
cfq-iosched: remove limit of dispatch depth of max 4 times quantum
nbd: tell the block layer that it is not a rotational device
block: get rid of elevator_t typedef
aio: make the lookup_ioctx() lockless
bio: add support for inlining a number of bio_vecs inside the bio
bio: allow individual slabs in the bio_set
bio: move the slab pointer inside the bio_set
bio: only mempool back the largest bio_vec slab cache
block: don't use plugging on SSD devices
...
47 files changed, 1089 insertions, 751 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 4dbb8be1c99..3c5434c83da 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -914,7 +914,7 @@ I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch queue and specific I/O schedulers. Unless stated otherwise, elevator is used to refer to both parts and I/O scheduler to specific I/O schedulers. -Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c. +Block layer implements generic dispatch queue in block/*.c. The generic dispatch queue is responsible for properly ordering barrier requests, requeueing, handling non-fs requests and all other subtleties. @@ -926,8 +926,8 @@ be built inside the kernel. Each queue can choose different one and can also change to another one dynamically. A block layer call to the i/o scheduler follows the convention elv_xxx(). This -calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh, -xxx and xxx might not match exactly, but use your imagination. If an elevator +calls elevator_xxx_fn in the elevator switch (block/elevator.c). Oh, xxx +and xxx might not match exactly, but use your imagination. If an elevator doesn't implement a function, the switch does nothing or some minimal house keeping work. diff --git a/arch/alpha/include/asm/io.h b/arch/alpha/include/asm/io.h index e971ab000f9..eda9b909aa0 100644 --- a/arch/alpha/include/asm/io.h +++ b/arch/alpha/include/asm/io.h @@ -96,9 +96,6 @@ static inline dma_addr_t __deprecated isa_page_to_bus(struct page *page) return page_to_phys(page); } -/* This depends on working iommu. */ -#define BIO_VMERGE_BOUNDARY (alpha_mv.mv_pci_tbi ? PAGE_SIZE : 0) - /* Maximum PIO space address supported? */ #define IO_SPACE_LIMIT 0xffff diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ef3635b52fc..0767827540b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -263,7 +263,7 @@ int s390_enable_sie(void) /* lets check if we are allowed to replace the mm */ task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || - tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) { task_unlock(tsk); return -EINVAL; } @@ -279,7 +279,7 @@ int s390_enable_sie(void) /* Now lets check again if something happened */ task_lock(tsk); if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 || - tsk->mm != tsk->active_mm || tsk->mm->ioctx_list) { + tsk->mm != tsk->active_mm || !hlist_empty(&tsk->mm->ioctx_list)) { mmput(mm); task_unlock(tsk); return -EINVAL; diff --git a/block/Kconfig b/block/Kconfig index 290b219fad9..ac0956f7778 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -24,21 +24,17 @@ menuconfig BLOCK if BLOCK config LBD - bool "Support for Large Block Devices" + bool "Support for large block devices and files" depends on !64BIT help - Enable block devices of size 2TB and larger. + Enable block devices or files of size 2TB and larger. This option is required to support the full capacity of large (2TB+) block devices, including RAID, disk, Network Block Device, Logical Volume Manager (LVM) and loopback. - - For example, RAID devices are frequently bigger than the capacity - of the largest individual hard drive. - - This option is not required if you have individual disk drives - which total 2TB+ and you are not aggregating the capacity into - a large block device (e.g. using RAID or LVM). + + This option also enables support for single files larger than + 2TB. If unsure, say N. @@ -58,15 +54,6 @@ config BLK_DEV_IO_TRACE If unsure, say N. -config LSF - bool "Support for Large Single Files" - depends on !64BIT - help - Say Y here if you want to be able to handle very large files (2TB - and larger), otherwise say N. - - If unsure, say Y. - config BLK_DEV_BSG bool "Block layer SG support v4 (EXPERIMENTAL)" depends on EXPERIMENTAL diff --git a/block/as-iosched.c b/block/as-iosched.c index 71f0abb219e..631f6f44460 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1339,12 +1339,12 @@ static int as_may_queue(struct request_queue *q, int rw) return ret; } -static void as_exit_queue(elevator_t *e) +static void as_exit_queue(struct elevator_queue *e) { struct as_data *ad = e->elevator_data; del_timer_sync(&ad->antic_timer); - kblockd_flush_work(&ad->antic_work); + cancel_work_sync(&ad->antic_work); BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); @@ -1409,7 +1409,7 @@ as_var_store(unsigned long *var, const char *page, size_t count) return count; } -static ssize_t est_time_show(elevator_t *e, char *page) +static ssize_t est_time_show(struct elevator_queue *e, char *page) { struct as_data *ad = e->elevator_data; int pos = 0; @@ -1427,7 +1427,7 @@ static ssize_t est_time_show(elevator_t *e, char *page) } #define SHOW_FUNCTION(__FUNC, __VAR) \ -static ssize_t __FUNC(elevator_t *e, char *page) \ +static ssize_t __FUNC(struct elevator_queue *e, char *page) \ { \ struct as_data *ad = e->elevator_data; \ return as_var_show(jiffies_to_msecs((__VAR)), (page)); \ @@ -1440,7 +1440,7 @@ SHOW_FUNCTION(as_write_batch_expire_show, ad->batch_expire[REQ_ASYNC]); #undef SHOW_FUNCTION #define STORE_FUNCTION(__FUNC, __PTR, MIN, MAX) \ -static ssize_t __FUNC(elevator_t *e, const char *page, size_t count) \ +static ssize_t __FUNC(struct elevator_queue *e, const char *page, size_t count) \ { \ struct as_data *ad = e->elevator_data; \ int ret = as_var_store(__PTR, (page), count); \ diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 6e72d661ae4..8eba4e43bb0 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -24,8 +24,8 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered, prepare_flush_fn *prepare_flush_fn) { - if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) && - prepare_flush_fn == NULL) { + if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH | + QUEUE_ORDERED_DO_POSTFLUSH))) { printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__); return -EINVAL; } @@ -88,7 +88,7 @@ unsigned blk_ordered_req_seq(struct request *rq) return QUEUE_ORDSEQ_DONE; } -void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) +bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) { struct request *rq; @@ -99,7 +99,7 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) q->ordseq |= seq; if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) - return; + return false; /* * Okay, sequence complete. @@ -109,6 +109,8 @@ void blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) if (__blk_end_request(rq, q->orderr, blk_rq_bytes(rq))) BUG(); + + return true; } static void pre_flush_end_io(struct request *rq, int error) @@ -134,7 +136,7 @@ static void queue_flush(struct request_queue *q, unsigned which) struct request *rq; rq_end_io_fn *end_io; - if (which == QUEUE_ORDERED_PREFLUSH) { + if (which == QUEUE_ORDERED_DO_PREFLUSH) { rq = &q->pre_flush_rq; end_io = pre_flush_end_io; } else { @@ -151,80 +153,110 @@ static void queue_flush(struct request_queue *q, unsigned which) elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline struct request *start_ordered(struct request_queue *q, - struct request *rq) +static inline bool start_ordered(struct request_queue *q, struct request **rqp) { + struct request *rq = *rqp; + unsigned skip = 0; + q->orderr = 0; q->ordered = q->next_ordered; q->ordseq |= QUEUE_ORDSEQ_STARTED; /* - * Prep proxy barrier request. + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. */ + if (!rq->hard_nr_sectors) { + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); + /* + * Empty barrier on a write-through device w/ ordered + * tag has no command to issue and without any command + * to issue, ordering by tag can't be used. Drain + * instead. + */ + if ((q->ordered & QUEUE_ORDERED_BY_TAG) && + !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { + q->ordered &= ~QUEUE_ORDERED_BY_TAG; + q->ordered |= QUEUE_ORDERED_BY_DRAIN; + } + } + + /* stash away the original request */ elv_dequeue_request(q, rq); q->orig_bar_rq = rq; - rq = &q->bar_rq; - blk_rq_init(q, rq); - if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_RW; - if (q->ordered & QUEUE_ORDERED_FUA) - rq->cmd_flags |= REQ_FUA; - init_request_from_bio(rq, q->orig_bar_rq->bio); - rq->end_io = bar_end_io; + rq = NULL; /* * Queue ordered sequence. As we stack them at the head, we * need to queue in reverse order. Note that we rely on that * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. If this request is - * an empty barrier, we don't need to do a postflush ever since - * there will be no data written between the pre and post flush. - * Hence a single flush will suffice. + * request gets inbetween ordered sequence. */ - if ((q->ordered & QUEUE_ORDERED_POSTFLUSH) && !blk_empty_barrier(rq)) - queue_flush(q, QUEUE_ORDERED_POSTFLUSH); - else - q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH; + if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { + queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); + rq = &q->post_flush_rq; + } else + skip |= QUEUE_ORDSEQ_POSTFLUSH; - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + if (q->ordered & QUEUE_ORDERED_DO_BAR) { + rq = &q->bar_rq; + + /* initialize proxy request and queue it */ + blk_rq_init(q, rq); + if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) + rq->cmd_flags |= REQ_RW; + if (q->ordered & QUEUE_ORDERED_DO_FUA) + rq->cmd_flags |= REQ_FUA; + init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->end_io = bar_end_io; - if (q->ordered & QUEUE_ORDERED_PREFLUSH) { - queue_flush(q, QUEUE_ORDERED_PREFLUSH); + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + } else + skip |= QUEUE_ORDSEQ_BAR; + + if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { + queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); rq = &q->pre_flush_rq; } else - q->ordseq |= QUEUE_ORDSEQ_PREFLUSH; + skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0) - q->ordseq |= QUEUE_ORDSEQ_DRAIN; - else + if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && q->in_flight) rq = NULL; + else + skip |= QUEUE_ORDSEQ_DRAIN; + + *rqp = rq; - return rq; + /* + * Complete skipped sequences. If whole sequence is complete, + * return false to tell elevator that this request is gone. + */ + return !blk_ordered_complete_seq(q, skip, 0); } -int blk_do_ordered(struct request_queue *q, struct request **rqp) +bool blk_do_ordered(struct request_queue *q, struct request **rqp) { struct request *rq = *rqp; const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); if (!q->ordseq) { if (!is_barrier) - return 1; + return true; - if (q->next_ordered != QUEUE_ORDERED_NONE) { - *rqp = start_ordered(q, rq); - return 1; - } else { + if (q->next_ordered != QUEUE_ORDERED_NONE) + return start_ordered(q, rqp); + else { /* - * This can happen when the queue switches to - * ORDERED_NONE while this request is on it. + * Queue ordering not supported. Terminate + * with prejudice. */ elv_dequeue_request(q, rq); if (__blk_end_request(rq, -EOPNOTSUPP, blk_rq_bytes(rq))) BUG(); *rqp = NULL; - return 0; + return false; } } @@ -235,9 +267,9 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp) /* Special requests are not subject to ordering rules. */ if (!blk_fs_request(rq) && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return 1; + return true; - if (q->ordered & QUEUE_ORDERED_TAG) { + if (q->ordered & QUEUE_ORDERED_BY_TAG) { /* Ordered by tag. Blocking the next barrier is enough. */ if (is_barrier && rq != &q->bar_rq) *rqp = NULL; @@ -248,7 +280,7 @@ int blk_do_ordered(struct request_queue *q, struct request **rqp) *rqp = NULL; } - return 1; + return true; } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index 561e8a1b43a..a824e49c0d0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -153,6 +153,9 @@ static void req_bio_endio(struct request *rq, struct bio *bio, nbytes = bio->bi_size; } + if (unlikely(rq->cmd_flags & REQ_QUIET)) + set_bit(BIO_QUIET, &bio->bi_flags); + bio->bi_size -= nbytes; bio->bi_sector += (nbytes >> 9); @@ -265,8 +268,7 @@ void __generic_unplug_device(struct request_queue *q) { if (unlikely(blk_queue_stopped(q))) return; - - if (!blk_remove_plug(q)) + if (!blk_remove_plug(q) && !blk_queue_nonrot(q)) return; q->request_fn(q); @@ -404,7 +406,8 @@ EXPORT_SYMBOL(blk_stop_queue); void blk_sync_queue(struct request_queue *q) { del_timer_sync(&q->unplug_timer); - kblockd_flush_work(&q->unplug_work); + del_timer_sync(&q->timeout); + cancel_work_sync(&q->unplug_work); } EXPORT_SYMBOL(blk_sync_queue); @@ -1135,7 +1138,7 @@ void init_request_from_bio(struct request *req, struct bio *bio) static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; - int el_ret, nr_sectors, barrier, discard, err; + int el_ret, nr_sectors; const unsigned short prio = bio_prio(bio); const int sync = bio_sync(bio); int rw_flags; @@ -1149,22 +1152,9 @@ static int __make_request(struct request_queue *q, struct bio *bio) */ blk_queue_bounce(q, &bio); - barrier = bio_barrier(bio); - if (unlikely(barrier) && bio_has_data(bio) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { - err = -EOPNOTSUPP; - goto end_io; - } - - discard = bio_discard(bio); - if (unlikely(discard) && !q->prepare_discard_fn) { - err = -EOPNOTSUPP; - goto end_io; - } - spin_lock_irq(q->queue_lock); - if (unlikely(barrier) || elv_queue_empty(q)) + if (unlikely(bio_barrier(bio)) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1250,18 +1240,14 @@ get_rq: if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || bio_flagged(bio, BIO_CPU_AFFINE)) req->cpu = blk_cpu_to_group(smp_processor_id()); - if (elv_queue_empty(q)) + if (!blk_queue_nonrot(q) && elv_queue_empty(q)) blk_plug_device(q); add_request(q, req); out: - if (sync) + if (sync || blk_queue_nonrot(q)) __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); return 0; - -end_io: - bio_endio(bio, err); - return 0; } /* @@ -1414,15 +1400,13 @@ static inline void __generic_make_request(struct bio *bio) char b[BDEVNAME_SIZE]; q = bdev_get_queue(bio->bi_bdev); - if (!q) { + if (unlikely(!q)) { printk(KERN_ERR "generic_make_request: Trying to access " "nonexistent block-device %s (%Lu)\n", bdevname(bio->bi_bdev, b), (long long) bio->bi_sector); -end_io: - bio_endio(bio, err); - break; + goto end_io; } if (unlikely(nr_sectors > q->max_hw_sectors)) { @@ -1459,14 +1443,19 @@ end_io: if (bio_check_eod(bio, nr_sectors)) goto end_io; - if ((bio_empty_barrier(bio) && !q->prepare_flush_fn) || - (bio_discard(bio) && !q->prepare_discard_fn)) { + + if (bio_discard(bio) && !q->prepare_discard_fn) { err = -EOPNOTSUPP; goto end_io; } ret = q->make_request_fn(q, bio); } while (ret); + + return; + +end_io: + bio_endio(bio, err); } /* @@ -1716,14 +1705,6 @@ static int __end_that_request_first(struct request *req, int error, while ((bio = req->bio) != NULL) { int nbytes; - /* - * For an empty barrier request, the low level driver must - * store a potential error location in ->sector. We pass - * that back up in ->bi_sector. - */ - if (blk_empty_barrier(req)) - bio->bi_sector = req->sector; - if (nr_bytes >= bio->bi_size) { req->bio = bio->bi_next; nbytes = bio->bi_size; @@ -2143,12 +2124,6 @@ int kblockd_schedule_work(struct request_queue *q, struct work_struct *work) } EXPORT_SYMBOL(kblockd_schedule_work); -void kblockd_flush_work(struct work_struct *work) -{ - cancel_work_sync(work); -} -EXPORT_SYMBOL(kblockd_flush_work); - int __init blk_dev_init(void) { kblockd_workqueue = create_workqueue("kblockd"); diff --git a/block/blk-settings.c b/block/blk-settings.c index afa55e14e27..59fd05d9f1d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -319,9 +319,9 @@ void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b) t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); - t->max_phys_segments = min(t->max_phys_segments, b->max_phys_segments); - t->max_hw_segments = min(t->max_hw_segments, b->max_hw_segments); - t->max_segment_size = min(t->max_segment_size, b->max_segment_size); + t->max_phys_segments = min_not_zero(t->max_phys_segments, b->max_phys_segments); + t->max_hw_segments = min_not_zero(t->max_hw_segments, b->max_hw_segments); + t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); t->hardsect_size = max(t->hardsect_size, b->hardsect_size); if (!t->queue_lock) WARN_ON_ONCE(1); diff --git a/block/blk-softirq.c b/block/blk-softirq.c index e660d26ca65..ce0efc6b26d 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -161,7 +161,7 @@ void blk_complete_request(struct request *req) } EXPORT_SYMBOL(blk_complete_request); -__init int blk_softirq_init(void) +static __init int blk_softirq_init(void) { int i; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 21e275d7eed..a29cb788e40 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -88,9 +88,7 @@ queue_ra_store(struct request_queue *q, const char *page, size_t count) unsigned long ra_kb; ssize_t ret = queue_var_store(&ra_kb, page, count); - spin_lock_irq(q->queue_lock); q->backing_dev_info.ra_pages = ra_kb >> (PAGE_CACHE_SHIFT - 10); - spin_unlock_irq(q->queue_lock); return ret; } @@ -117,10 +115,7 @@ queue_max_sectors_store(struct request_queue *q, const char *page, size_t count) if (max_sectors_kb > max_hw_sectors_kb || max_sectors_kb < page_kb) return -EINVAL; - /* - * Take the queue lock to update the readahead and max_sectors - * values synchronously: - */ + spin_lock_irq(q->queue_lock); q->max_sectors = max_sectors_kb << 1; spin_unlock_irq(q->queue_lock); diff --git a/block/blk-tag.c b/block/blk-tag.c index c0d419e84ce..3c518e3303a 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c @@ -158,7 +158,6 @@ fail: /** * blk_init_tags - initialize the tag info for an external tag map * @depth: the maximum queue depth supported - * @tags: the tag to use **/ struct blk_queue_tag *blk_init_tags(int depth) { diff --git a/block/blk-timeout.c b/block/blk-timeout.c index 69185ea9fae..a09535377a9 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -73,11 +73,7 @@ ssize_t part_timeout_store(struct device *dev, struct device_attribute *attr, */ void blk_delete_timer(struct request *req) { - struct request_queue *q = req->q; - list_del_init(&req->timeout_list); - if (list_empty(&q->timeout_list)) - del_timer(&q->timeout); } static void blk_rq_timed_out(struct request *req) @@ -111,7 +107,7 @@ static void blk_rq_timed_out(struct request *req) void blk_rq_timed_out_timer(unsigned long data) { struct request_queue *q = (struct request_queue *) data; - unsigned long flags, uninitialized_var(next), next_set = 0; + unsigned long flags, next = 0; struct request *rq, *tmp; spin_lock_irqsave(q->queue_lock, flags); @@ -126,15 +122,18 @@ void blk_rq_timed_out_timer(unsigned long data) if (blk_mark_rq_complete(rq)) continue; blk_rq_timed_out(rq); + } else { + if (!next || time_after(next, rq->deadline)) + next = rq->deadline; } - if (!next_set) { - next = rq->deadline; - next_set = 1; - } else if (time_after(next, rq->deadline)) - next = rq->deadline; } - if (next_set && !list_empty(&q->timeout_list)) + /* + * next can never be 0 here with the list non-empty, since we always + * bump ->deadline to 1 so we can detect if the timer was ever added + * or not. See comment in blk_add_timer() + */ + if (next) mod_timer(&q->timeout, round_jiffies_up(next)); spin_unlock_irqrestore(q->queue_lock, flags); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6a062eebbd1..e8525fa7282 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1136,12 +1136,8 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) if (cfq_class_idle(cfqq)) max_dispatch = 1; - if (cfqq->dispatched >= max_dispatch) { - if (cfqd->busy_queues > 1) - break; - if (cfqq->dispatched >= 4 * max_dispatch) - break; - } + if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1) + break; if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) break; @@ -1318,7 +1314,15 @@ static void cfq_exit_single_io_context(struct io_context *ioc, unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - __cfq_exit_single_io_context(cfqd, cic); + + /* + * Ensure we get a fresh copy of the ->key to prevent + * race between exiting task and queue + */ + smp_read_barrier_depends(); + i |