aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2005-10-28 08:53:49 -0700
committerLinus Torvalds <torvalds@g5.osdl.org>2005-10-28 08:53:49 -0700
commit28d721e24c88496ff8e9c4a0959bdc1415c0658e (patch)
tree0652161bbbcbfddf47c7ddb25d2db8ecd4cbec89
parent0ee40c6628434f0535da31deeacc28b61e80d810 (diff)
parentcb19833dccb32f97cacbfff834b53523915f13f6 (diff)
Merge branch 'generic-dispatch' of git://brick.kernel.dk/data/git/linux-2.6-block
-rw-r--r--Documentation/block/biodoc.txt113
-rw-r--r--drivers/block/as-iosched.c325
-rw-r--r--drivers/block/cfq-iosched.c364
-rw-r--r--drivers/block/deadline-iosched.c123
-rw-r--r--drivers/block/elevator.c266
-rw-r--r--drivers/block/ll_rw_blk.c23
-rw-r--r--drivers/block/noop-iosched.c48
-rw-r--r--include/linux/blkdev.h26
-rw-r--r--include/linux/elevator.h18
9 files changed, 459 insertions, 847 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 6dd274d7e1c..2d65c218216 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -906,9 +906,20 @@ Aside:
4. The I/O scheduler
-I/O schedulers are now per queue. They should be runtime switchable and modular
-but aren't yet. Jens has most bits to do this, but the sysfs implementation is
-missing.
+I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch
+queue and specific I/O schedulers. Unless stated otherwise, elevator is used
+to refer to both parts and I/O scheduler to specific I/O schedulers.
+
+Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c.
+The generic dispatch queue is responsible for properly ordering barrier
+requests, requeueing, handling non-fs requests and all other subtleties.
+
+Specific I/O schedulers are responsible for ordering normal filesystem
+requests. They can also choose to delay certain requests to improve
+throughput or whatever purpose. As the plural form indicates, there are
+multiple I/O schedulers. They can be built as modules but at least one should
+be built inside the kernel. Each queue can choose different one and can also
+change to another one dynamically.
A block layer call to the i/o scheduler follows the convention elv_xxx(). This
calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh,
@@ -921,44 +932,36 @@ keeping work.
The functions an elevator may implement are: (* are mandatory)
elevator_merge_fn called to query requests for merge with a bio
-elevator_merge_req_fn " " " with another request
+elevator_merge_req_fn called when two requests get merged. the one
+ which gets merged into the other one will be
+ never seen by I/O scheduler again. IOW, after
+ being merged, the request is gone.
elevator_merged_fn called when a request in the scheduler has been
involved in a merge. It is used in the deadline
scheduler for example, to reposition the request
if its sorting order has changed.
-*elevator_next_req_fn returns the next scheduled request, or NULL
- if there are none (or none are ready).
+elevator_dispatch_fn fills the dispatch queue with ready requests.
+ I/O schedulers are free to postpone requests by
+ not filling the dispatch queue unless @force
+ is non-zero. Once dispatched, I/O schedulers
+ are not allowed to manipulate the requests -
+ they belong to generic dispatch queue.
-*elevator_add_req_fn called to add a new request into the scheduler
+elevator_add_req_fn called to add a new request into the scheduler
elevator_queue_empty_fn returns true if the merge queue is empty.
Drivers shouldn't use this, but rather check
if elv_next_request is NULL (without losing the
request if one exists!)
-elevator_remove_req_fn This is called when a driver claims ownership of
- the target request - it now belongs to the
- driver. It must not be modified or merged.
- Drivers must not lose the request! A subsequent
- call of elevator_next_req_fn must return the
- _next_ request.
-
-elevator_requeue_req_fn called to add a request to the scheduler. This
- is used when the request has alrnadebeen
- returned by elv_next_request, but hasn't
- completed. If this is not implemented then
- elevator_add_req_fn is called instead.
-
elevator_former_req_fn
elevator_latter_req_fn These return the request before or after the
one specified in disk sort order. Used by the
block layer to find merge possibilities.
-elevator_completed_req_fn called when a request is completed. This might
- come about due to being merged with another or
- when the device completes the request.
+elevator_completed_req_fn called when a request is completed.
elevator_may_queue_fn returns true if the scheduler wants to allow the
current context to queue a new request even if
@@ -967,13 +970,33 @@ elevator_may_queue_fn returns true if the scheduler wants to allow the
elevator_set_req_fn
elevator_put_req_fn Must be used to allocate and free any elevator
- specific storate for a request.
+ specific storage for a request.
+
+elevator_activate_req_fn Called when device driver first sees a request.
+ I/O schedulers can use this callback to
+ determine when actual execution of a request
+ starts.
+elevator_deactivate_req_fn Called when device driver decides to delay
+ a request by requeueing it.
elevator_init_fn
elevator_exit_fn Allocate and free any elevator specific storage
for a queue.
-4.2 I/O scheduler implementation
+4.2 Request flows seen by I/O schedulers
+All requests seens by I/O schedulers strictly follow one of the following three
+flows.
+
+ set_req_fn ->
+
+ i. add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn ->
+ (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn
+ ii. add_req_fn -> (merged_fn ->)* -> merge_req_fn
+ iii. [none]
+
+ -> put_req_fn
+
+4.3 I/O scheduler implementation
The generic i/o scheduler algorithm attempts to sort/merge/batch requests for
optimal disk scan and request servicing performance (based on generic
principles and device capabilities), optimized for:
@@ -993,18 +1016,7 @@ request in sort order to prevent binary tree lookups.
This arrangement is not a generic block layer characteristic however, so
elevators may implement queues as they please.
-ii. Last merge hint
-The last merge hint is part of the generic queue layer. I/O schedulers must do
-some management on it. For the most part, the most important thing is to make
-sure q->last_merge is cleared (set to NULL) when the request on it is no longer
-a candidate for merging (for example if it has been sent to the driver).
-
-The last merge performed is cached as a hint for the subsequent request. If
-sequential data is being submitted, the hint is used to perform merges without
-any scanning. This is not sufficient when there are multiple processes doing
-I/O though, so a "merge hash" is used by some schedulers.
-
-iii. Merge hash
+ii. Merge hash
AS and deadline use a hash table indexed by the last sector of a request. This
enables merging code to quickly look up "back merge" candidates, even when
multiple I/O streams are being performed at once on one disk.
@@ -1013,29 +1025,8 @@ multiple I/O streams are being performed at once on one disk.
are far less common than "back merges" due to the nature of most I/O patterns.
Front merges are handled by the binary trees in AS and deadline schedulers.
-iv. Handling barrier cases
-A request with flags REQ_HARDBARRIER or REQ_SOFTBARRIER must not be ordered
-around. That is, they must be processed after all older requests, and before
-any newer ones. This includes merges!
-
-In AS and deadline schedulers, barriers have the effect of flushing the reorder
-queue. The performance cost of this will vary from nothing to a lot depending
-on i/o patterns and device characteristics. Obviously they won't improve
-performance, so their use should be kept to a minimum.
-
-v. Handling insertion position directives
-A request may be inserted with a position directive. The directives are one of
-ELEVATOR_INSERT_BACK, ELEVATOR_INSERT_FRONT, ELEVATOR_INSERT_SORT.
-
-ELEVATOR_INSERT_SORT is a general directive for non-barrier requests.
-ELEVATOR_INSERT_BACK is used to insert a barrier to the back of the queue.
-ELEVATOR_INSERT_FRONT is used to insert a barrier to the front of the queue, and
-overrides the ordering requested by any previous barriers. In practice this is
-harmless and required, because it is used for SCSI requeueing. This does not
-require flushing the reorder queue, so does not impose a performance penalty.
-
-vi. Plugging the queue to batch requests in anticipation of opportunities for
- merge/sort optimizations
+iii. Plugging the queue to batch requests in anticipation of opportunities for
+ merge/sort optimizations
This is just the same as in 2.4 so far, though per-device unplugging
support is anticipated for 2.5. Also with a priority-based i/o scheduler,
@@ -1069,7 +1060,7 @@ Aside:
blk_kick_queue() to unplug a specific queue (right away ?)
or optionally, all queues, is in the plan.
-4.3 I/O contexts
+4.4 I/O contexts
I/O contexts provide a dynamically allocated per process data area. They may
be used in I/O schedulers, and in the block layer (could be used for IO statis,
priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and
diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c
index 1f08e14697e..4081c36c8c1 100644
--- a/drivers/block/as-iosched.c
+++ b/drivers/block/as-iosched.c
@@ -98,7 +98,6 @@ struct as_data {
struct as_rq *next_arq[2]; /* next in sort order */
sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */
- struct list_head *dispatch; /* driver dispatch queue */
struct list_head *hash; /* request hash */
unsigned long exit_prob; /* probability a task will exit while
@@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void)
return ioc;
}
+static void as_put_io_context(struct as_rq *arq)
+{
+ struct as_io_context *aic;
+
+ if (unlikely(!arq->io_context))
+ return;
+
+ aic = arq->io_context->aic;
+
+ if (arq->is_sync == REQ_SYNC && aic) {
+ spin_lock(&aic->lock);
+ set_bit(AS_TASK_IORUNNING, &aic->state);
+ aic->last_end_request = jiffies;
+ spin_unlock(&aic->lock);
+ }
+
+ put_io_context(arq->io_context);
+}
+
/*
* the back merge hash support functions
*/
@@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq)
__as_del_arq_hash(arq);
}
-static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq)
-{
- as_del_arq_hash(arq);
-
- if (q->last_merge == arq->request)
- q->last_merge = NULL;
-}
-
static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq)
{
struct request *rq = arq->request;
@@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset)
BUG_ON(!arq->on_hash);
if (!rq_mergeable(__rq)) {
- as_remove_merge_hints(ad->q, arq);
+ as_del_arq_hash(arq);
continue;
}
@@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
WARN_ON(!list_empty(&rq->queuelist));
- if (arq->state == AS_RQ_PRESCHED) {
- WARN_ON(arq->io_context);
- goto out;
- }
-
- if (arq->state == AS_RQ_MERGED)
- goto out_ioc;
-
if (arq->state != AS_RQ_REMOVED) {
printk("arq->state %d\n", arq->state);
WARN_ON(1);
goto out;
}
- if (!blk_fs_request(rq))
- goto out;
-
if (ad->changed_batch && ad->nr_dispatched == 1) {
kblockd_schedule_work(&ad->antic_work);
ad->changed_batch = 0;
@@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq)
}
}
-out_ioc:
- if (!arq->io_context)
- goto out;
-
- if (arq->is_sync == REQ_SYNC) {
- struct as_io_context *aic = arq->io_context->aic;
- if (aic) {
- spin_lock(&aic->lock);
- set_bit(AS_TASK_IORUNNING, &aic->state);
- aic->last_end_request = jiffies;
- spin_unlock(&aic->lock);
- }
- }
-
- put_io_context(arq->io_context);
+ as_put_io_context(arq);
out:
arq->state = AS_RQ_POSTSCHED;
}
@@ -1047,73 +1032,11 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq)
ad->next_arq[data_dir] = as_find_next_arq(ad, arq);
list_del_init(&arq->fifo);
- as_remove_merge_hints(q, arq);
+ as_del_arq_hash(arq);
as_del_arq_rb(ad, arq);
}
/*
- * as_remove_dispatched_request is called to remove a request which has gone
- * to the dispatch list.
- */
-static void as_remove_dispatched_request(request_queue_t *q, struct request *rq)
-{
- struct as_rq *arq = RQ_DATA(rq);
- struct as_io_context *aic;
-
- if (!arq) {
- WARN_ON(1);
- return;
- }
-
- WARN_ON(arq->state != AS_RQ_DISPATCHED);
- WARN_ON(ON_RB(&arq->rb_node));
- if (arq->io_context && arq->io_context->aic) {
- aic = arq->io_context->aic;
- if (aic) {
- WARN_ON(!atomic_read(&aic->nr_dispatched));
- atomic_dec(&aic->nr_dispatched);
- }
- }
-}
-
-/*
- * as_remove_request is called when a driver has finished with a request.
- * This should be only called for dispatched requests, but for some reason
- * a POWER4 box running hwscan it does not.
- */
-static void as_remove_request(request_queue_t *q, struct request *rq)
-{
- struct as_rq *arq = RQ_DATA(rq);
-
- if (unlikely(arq->state == AS_RQ_NEW))
- goto out;
-
- if (ON_RB(&arq->rb_node)) {
- if (arq->state != AS_RQ_QUEUED) {
- printk("arq->state %d\n", arq->state);
- WARN_ON(1);
- goto out;
- }
- /*
- * We'll lose the aliased request(s) here. I don't think this
- * will ever happen, but if it does, hopefully someone will
- * report it.
- */
- WARN_ON(!list_empty(&rq->queuelist));
- as_remove_queued_request(q, rq);
- } else {
- if (arq->state != AS_RQ_DISPATCHED) {
- printk("arq->state %d\n", arq->state);
- WARN_ON(1);
- goto out;
- }
- as_remove_dispatched_request(q, rq);
- }
-out:
- arq->state = AS_RQ_REMOVED;
-}
-
-/*
* as_fifo_expired returns 0 if there are no expired reads on the fifo,
* 1 otherwise. It is ratelimited so that we only perform the check once per
* `fifo_expire' interval. Otherwise a large number of expired requests
@@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad)
static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
{
struct request *rq = arq->request;
- struct list_head *insert;
const int data_dir = arq->is_sync;
BUG_ON(!ON_RB(&arq->rb_node));
@@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
/*
* take it off the sort and fifo list, add to dispatch queue
*/
- insert = ad->dispatch->prev;
-
while (!list_empty(&rq->queuelist)) {
struct request *__rq = list_entry_rq(rq->queuelist.next);
struct as_rq *__arq = RQ_DATA(__rq);
- list_move_tail(&__rq->queuelist, ad->dispatch);
+ list_del(&__rq->queuelist);
+
+ elv_dispatch_add_tail(ad->q, __rq);
if (__arq->io_context && __arq->io_context->aic)
atomic_inc(&__arq->io_context->aic->nr_dispatched);
@@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
as_remove_queued_request(ad->q, rq);
WARN_ON(arq->state != AS_RQ_QUEUED);
- list_add(&rq->queuelist, insert);
+ elv_dispatch_sort(ad->q, rq);
+
arq->state = AS_RQ_DISPATCHED;
if (arq->io_context && arq->io_context->aic)
atomic_inc(&arq->io_context->aic->nr_dispatched);
@@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq)
* read/write expire, batch expire, etc, and moves it to the dispatch
* queue. Returns 1 if a request was found, 0 otherwise.
*/
-static int as_dispatch_request(struct as_data *ad)
+static int as_dispatch_request(request_queue_t *q, int force)
{
+ struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq;
const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]);
const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]);
+ if (unlikely(force)) {
+ /*
+ * Forced dispatch, accounting is useless. Reset
+ * accounting states and dump fifo_lists. Note that
+ * batch_data_dir is reset to REQ_SYNC to avoid
+ * screwing write batch accounting as write batch
+ * accounting occurs on W->R transition.
+ */
+ int dispatched = 0;
+
+ ad->batch_data_dir = REQ_SYNC;
+ ad->changed_batch = 0;
+ ad->new_batch = 0;
+
+ while (ad->next_arq[REQ_SYNC]) {
+ as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
+ dispatched++;
+ }
+ ad->last_check_fifo[REQ_SYNC] = jiffies;
+
+ while (ad->next_arq[REQ_ASYNC]) {
+ as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
+ dispatched++;
+ }
+ ad->last_check_fifo[REQ_ASYNC] = jiffies;
+
+ return dispatched;
+ }
+
/* Signal that the write batch was uncontended, so we can't time it */
if (ad->batch_data_dir == REQ_ASYNC && !reads) {
if (ad->current_write_count == 0 || !writes)
@@ -1359,20 +1312,6 @@ fifo_expired:
return 1;
}
-static struct request *as_next_request(request_queue_t *q)
-{
- struct as_data *ad = q->elevator->elevator_data;
- struct request *rq = NULL;
-
- /*
- * if there are still requests on the dispatch queue, grab the first
- */
- if (!list_empty(ad->dispatch) || as_dispatch_request(ad))
- rq = list_entry_rq(ad->dispatch->next);
-
- return rq;
-}
-
/*
* Add arq to a list behind alias
*/
@@ -1404,17 +1343,25 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia
/*
* Don't want to have to handle merges.
*/
- as_remove_merge_hints(ad->q, arq);
+ as_del_arq_hash(arq);
}
/*
* add arq to rbtree and fifo
*/
-static void as_add_request(struct as_data *ad, struct as_rq *arq)
+static void as_add_request(request_queue_t *q, struct request *rq)
{
+ struct as_data *ad = q->elevator->elevator_data;
+ struct as_rq *arq = RQ_DATA(rq);
struct as_rq *alias;
int data_dir;
+ if (arq->state != AS_RQ_PRESCHED) {
+ printk("arq->state: %d\n", arq->state);
+ WARN_ON(1);
+ }
+ arq->state = AS_RQ_NEW;
+
if (rq_data_dir(arq->request) == READ
|| current->flags&PF_SYNCWRITE)
arq->is_sync = 1;
@@ -1437,12 +1384,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
arq->expires = jiffies + ad->fifo_expire[data_dir];
list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]);
- if (rq_mergeable(arq->request)) {
+ if (rq_mergeable(arq->request))
as_add_arq_hash(ad, arq);
-
- if (!ad->q->last_merge)
- ad->q->last_merge = arq->request;
- }
as_update_arq(ad, arq); /* keep state machine up to date */
} else {
@@ -1463,96 +1406,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq)
arq->state = AS_RQ_QUEUED;
}
-static void as_deactivate_request(request_queue_t *q, struct request *rq)
+static void as_activate_request(request_queue_t *q, struct request *rq)
{
- struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(rq);
- if (arq) {
- if (arq->state == AS_RQ_REMOVED) {
- arq->state = AS_RQ_DISPATCHED;
- if (arq->io_context && arq->io_context->aic)
- atomic_inc(&arq->io_context->aic->nr_dispatched);
- }
- } else
- WARN_ON(blk_fs_request(rq)
- && (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) );
-
- /* Stop anticipating - let this request get through */
- as_antic_stop(ad);
-}
-
-/*
- * requeue the request. The request has not been completed, nor is it a
- * new request, so don't touch accounting.
- */
-static void as_requeue_request(request_queue_t *q, struct request *rq)
-{
- as_deactivate_request(q, rq);
- list_add(&rq->queuelist, &q->queue_head);
-}
-
-/*
- * Account a request that is inserted directly onto the dispatch queue.
- * arq->io_context->aic->nr_dispatched should not need to be incremented
- * because only new requests should come through here: requeues go through
- * our explicit requeue handler.
- */
-static void as_account_queued_request(struct as_data *ad, struct request *rq)
-{
- if (blk_fs_request(rq)) {
- struct as_rq *arq = RQ_DATA(rq);
- arq->state = AS_RQ_DISPATCHED;
- ad->nr_dispatched++;
- }
+ WARN_ON(arq->state != AS_RQ_DISPATCHED);
+ arq->state = AS_RQ_REMOVED;
+ if (arq->io_context && arq->io_context->aic)
+ atomic_dec(&arq->io_context->aic->nr_dispatched);
}
-static void
-as_insert_request(request_queue_t *q, struct request *rq, int where)
+static void as_deactivate_request(request_queue_t *q, struct request *rq)
{
- struct as_data *ad = q->elevator->elevator_data;
struct as_rq *arq = RQ_DATA(rq);
- if (arq) {
- if (arq->state != AS_RQ_PRESCHED) {
- printk("arq->state: %d\n", arq->state);
- WARN_ON(1);
- }
- arq->state = AS_RQ_NEW;
- }
-
- /* barriers must flush the reorder queue */
- if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)
- && where == ELEVATOR_INSERT_SORT)) {
- WARN_ON(1);
- where = ELEVATOR_INSERT_BACK;
- }
-
- switch (where) {
- case ELEVATOR_INSERT_BACK:
- while (ad->next_arq[REQ_SYNC])
- as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]);
-
- while (ad->next_arq[REQ_ASYNC])
- as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]);
-
- list_add_tail(&rq->queuelist, ad->dispatch);
- as_account_queued_request(ad, rq);
- as_antic_stop(ad);
- break;
- case ELEVATOR_INSERT_FRONT:
- list_add(&rq->queuelist, ad->dispatch);
- as_account_queued_request(ad, rq);
- as_antic_stop(ad);
- break;
- case ELEVATOR_INSERT_SORT:
- BUG_ON(!blk_fs_request(rq));
- as_add_request(ad, arq);
- break;
- default:
- BUG();
- return;
- }
+ WARN_ON(arq->state != AS_RQ_REMOVED);
+ arq->state = AS_RQ_DISPATCHED;
+ if (arq->io_context && arq->io_context->aic)
+ atomic_inc(&arq->io_context->aic->nr_dispatched);
}
/*
@@ -1565,12 +1436,8 @@ static int as_queue_empty(request_queue_t *q)
{
struct as_data *ad = q->elevator->elevator_data;
- if (!list_empty(&ad->fifo_list[REQ_ASYNC])
- || !list_empty(&ad->fifo_list[REQ_SYNC])
- || !list_empty(ad->dispatch))
- return 0;
-
- return 1;
+ return list_empty(&ad->fifo_list[REQ_ASYNC])
+ && list_empty(&ad->fifo_list[REQ_SYNC]);
}
static struct request *
@@ -1608,15 +1475,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
int ret;
/*
- * try last_merge to avoid going to hash
- */
- ret = elv_try_last_merge(q, bio);
- if (ret != ELEVATOR_NO_MERGE) {
- __rq = q->last_merge;
- goto out_insert;
- }
-
- /*
* see if the merge hash can satisfy a back merge
*/
__rq = as_find_arq_hash(ad, bio->bi_sector);
@@ -1644,9 +1502,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio)
return ELEVATOR_NO_MERGE;
out:
- if (rq_mergeable(__rq))
- q->last_merge = __rq;
-out_insert:
if (ret) {
if (rq_mergeable(__rq))
as_hot_arq_hash(ad, RQ_DATA(__rq));
@@ -1693,9 +1548,6 @@ static void as_merged_request(request_queue_t *q, struct request *req)
* behind the disk head. We currently don't bother adjusting.
*/
}
-
- if (arq->on_hash)
- q->last_merge = req;
}
static void
@@ -1763,6 +1615,7 @@ as_merged_requests(request_queue_t *q, struct request *req,
* kill knowledge of next, this one is a goner
*/
as_remove_queued_request(q, next);
+ as_put_io_context(anext);
anext->state = AS_RQ_MERGED;
}
@@ -1782,7 +1635,7 @@ static void as_work_handler(void *data)
unsigned long flags;
spin_lock_irqsave(q->queue_lock, flags);
- if (as_next_request(q))
+ if (!as_queue_empty(q))
q->request_fn(q);
spin_unlock_irqrestore(q->queue_lock, flags);
}
@@ -1797,7 +1650,9 @@ static void as_put_request(request_queue_t *q, struct request *rq)
return;
}
- if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) {
+ if (unlikely(arq->state != AS_RQ_POSTSCHED &&
+ arq->state != AS_RQ_PRESCHED &&
+ arq->state != AS_RQ_MERGED)) {
printk("arq->state %d\n", arq->state);
WARN_ON(1);
}
@@ -1907,7 +1762,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e)
INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]);
ad->sort_list[REQ_SYNC] = RB_ROOT;
ad->sort_list[REQ_ASYNC] = RB_ROOT;
- ad->dispatch = &q->queue_head;
ad->fifo_expire[REQ_SYNC] = default_read_expire;
ad->fifo_expire[REQ_ASYNC] = default_write_expire;
ad->antic_expire = default_antic_expire;
@@ -2072,10 +1926,9 @@ static struct elevator_type iosched_as = {
.elevator_merge_fn = as_merge,
.elevator_merged_fn = as_merged_request,
.elevator_merge_req_fn = as_merged_requests,
- .elevator_next_req_fn = as_next_request,
- .elevator_add_req_fn = as_insert_request,
- .elevator_remove_req_fn = as_remove_request,
- .elevator_requeue_req_fn = as_requeue_request,
+ .elevator_dispatch_fn = as_dispatch_request,
+ .elevator_add_req_fn = as_add_request,
+ .elevator_activate_req_fn = as_activate_request,
.elevator_deactivate_req_fn = as_deactivate_request,
.elevator_queue_empty_fn = as_queue_empty,
.elevator_completed_req_fn = as_completed_request,
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c
index d3bfe8cfb03..94690e4d41e 100644
--- a/drivers/block/cfq-iosched.c
+++ b/drivers/block/cfq-iosched.c
@@ -84,7 +84,6 @@ static int cfq_max_depth = 2;
(node)->rb_left = NULL; \
} while (0)
#define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL)
-#define ON_RB(node) ((node)->rb_color != RB_NONE)
#define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node)
#define rq_rb_key(rq) (rq)->sector
@@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired);
#undef CFQ_CFQQ_FNS
enum cfq_rq_state_flags {
- CFQ_CRQ_FLAG_in_flight = 0,
- CFQ_CRQ_FLAG_in_driver,
- CFQ_CRQ_FLAG_is_sync,
- CFQ_CRQ_FLAG_requeued,
+ CFQ_CRQ_FLAG_is_sync = 0,
};
#define CFQ_CRQ_FNS(name) \
@@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq) \
return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \
}
-CFQ_CRQ_FNS(in_flight);
-CFQ_CRQ_FNS(in_driver);
CFQ_CRQ_FNS(is_sync);
-CFQ_CRQ_FNS(requeued);
#undef CFQ_CRQ_FNS
static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short);
-static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *);
+static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *);
static void cfq_put_cfqd(struct cfq_data *cfqd);
#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE)
@@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq)
hlist_del_init(&crq->hash);
}
-static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq)
-{
- cfq_del_crq_hash(crq);
-
- if (q->last_merge == crq->request)
- q->last_merge = NULL;
-}
-
static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq)
{
const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request));
@@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset)
return NULL;
}
-static inline int cfq_pending_requests(struct cfq_data *cfqd)
-{
- return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues;
-}
-
/*
* scheduler run of queue, if there are requests pending and no one in the
* driver that will restart queueing
*/
static inline void cfq_schedule_dispatch(struct cfq_data *cfqd)
{
- if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd))
+ if (!cfqd->rq_in_driver && cfqd->busy_queues)
kblockd_schedule_work(&cfqd->unplug_work);
}
@@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- return !cfq_pending_requests(cfqd);
+ return !cfqd->busy_queues;
}
/*
@@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2)
if (crq2 == NULL)
return crq1;
- if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2))
- return crq1;
- else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1))
- return crq2;
-
if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2))
return crq1;
else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1))
@@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
struct cfq_rq *crq_next = NULL, *crq_prev = NULL;
struct rb_node *rbnext, *rbprev;
- rbnext = NULL;
- if (ON_RB(&last->rb_node))
- rbnext = rb_next(&last->rb_node);
- if (!rbnext) {
+ if (!(rbnext = rb_next(&last->rb_node))) {
rbnext = rb_first(&cfqq->sort_list);
if (rbnext == &last->rb_node)
rbnext = NULL;
@@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted)
* the pending list according to last request service
*/
static inline void
-cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue)
+cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
{
BUG_ON(cfq_cfqq_on_rr(cfqq));
cfq_mark_cfqq_on_rr(cfqq);
cfqd->busy_queues++;
- cfq_resort_rr_list(cfqq, requeue);
+ cfq_resort_rr_list(cfqq, 0);
}
static inline void
@@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq)
static inline void cfq_del_crq_rb(struct cfq_rq *crq)
{
struct cfq_queue *cfqq = crq->cfq_queue;
+ struct cfq_data *cfqd = cfqq->cfqd;
+ const int sync = cfq_crq_is_sync(crq);
- if (ON_RB(&crq->rb_node)) {
- struct cfq_data *cfqd = cfqq->cfqd;
- const int sync = cfq_crq_is_sync(crq);
+ BUG_ON(!cfqq->queued[sync]);
+ cfqq->queued[sync]--;
- BUG_ON(!cfqq->queued[sync]);
- cfqq->queued[sync]--;
+ cfq_update_next_crq(crq);
- cfq_update_next_crq(crq);
+ rb_erase(&crq->rb_node, &cfqq->sort_list);
+ RB_CLEAR_COLOR(&crq->rb_node);
- rb_erase(&crq->rb_node, &cfqq->sort_list);
- RB_CLEAR_COLOR(&crq->rb_node);
-
- if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
- cfq_del_cfqq_rr(cfqd, cfqq);
- }
+ if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list))
+ cfq_del_cfqq_rr(cfqd, cfqq);
}
static struct cfq_rq *
@@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
* if that happens, put the alias on the dispatch list
*/
while ((__alias = __cfq_add_crq_rb(crq)) != NULL)
- cfq_dispatch_sort(cfqd->queue, __alias);
+ cfq_dispatch_insert(cfqd->queue, __alias);
rb_insert_color(&crq->rb_node, &cfqq->sort_list);
if (!cfq_cfqq_on_rr(cfqq))
- cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq));
+ cfq_add_cfqq_rr(cfqd, cfqq);
/*
* check if this request is a better next-serve candidate
@@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq)
static inline void
cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq)
{
- if (ON_RB(&crq->rb_node)) {
- rb_erase(&crq->rb_node, &cfqq->sort_list);
- cfqq->queued[cfq_crq_is_sync(crq)]--;
- }
+ rb_erase(&crq->rb_node, &cfqq->sort_list);
+ cfqq->queued[cfq_crq_is_sync(crq)]--;
cfq_add_crq_rb(crq);
}
@@ -676,49 +643,28 @@ out:
return NULL;
}
-static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
+static void cfq_activate_request(request_queue_t *q, struct request *rq)
{
struct cfq_data *cfqd = q->elevator->elevator_data;
- struct cfq_rq *crq = RQ_DATA(rq);
-
- if (crq) {
- struct cfq_queue *cfqq = crq->cfq_queue;
-
- if (cfq_crq_in_driver(crq)) {
- cfq_clear_crq_in_driver(crq);
- WARN_ON(!cfqd->rq_in_driver);
- cfqd->rq_in_driver--;
- }
- if (cfq_crq_in_flight(crq)) {
- const int sync = cfq_crq_is_sync(crq);
- cfq_clear_crq_in_flight(crq);
- WARN_ON(!cfqq->on_dispatch[sync]);
- cfqq->on_dispatch[sync]--;
- }
- cfq_mark_crq_requeued(crq);
- }
+ cfqd->rq_in_driver++;
}
-/*
- * make sure the service time gets corrected on reissue of this request
- */
-static void cfq_requeue_request(request_queue_t *q, struct request *rq)
+static void cfq_deactivate_request(request_queue_t *q, struct request *rq)
{
- cfq_deactivate_request(q, rq);
- list_add(&rq->queuelist, &q->queue_head);
+ struct cfq_data *cfqd = q->elevator->elevator_data;
+
+ WARN_ON(!cfqd->rq_in_driver);
+ cfqd->rq_in_driver--;
}
-static void cfq_remove_request(request_queue_t *q, struct request *rq)
+static void cfq_remove_request(struct request *rq)
{
struct cfq_rq *crq = RQ_DATA(rq);
- if (crq) {
- list_del_init(&rq->queuelist);
- cfq_del_crq_rb(crq);
- cfq_remove_merge_hints(q, crq);
-
- }
+ list_del_init(&rq->queuelist);
+ cfq_del_crq_rb(crq);
+ cfq_del_crq_hash(crq);
}
static int
@@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
struct request *__rq;
int ret;
- ret = elv_try_last_merge(q, bio);
- if (ret != ELEVATOR_NO_MERGE) {
- __rq = q->last_merge;
- goto out_insert;
- }
-
__rq = cfq_find_rq_hash(cfqd, bio->bi_sector);
if (__rq && elv_rq_merge_ok(__rq, bio)) {
ret = ELEVATOR_BACK_MERGE;
@@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio)
return ELEVATOR_NO_MERGE;
out:
- q->last_merge = __rq;
-out_insert:
*req = __rq;
return ret;
}
@@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req)
cfq_del_crq_hash(crq);
cfq_add_crq_hash(cfqd, crq);
- if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) {
+ if (rq_rb_key(req) != crq->rb_key) {
struct cfq_queue *cfqq = crq->cfq_queue;
cfq_update_nex