diff options
author | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-28 08:53:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@g5.osdl.org> | 2005-10-28 08:53:49 -0700 |
commit | 28d721e24c88496ff8e9c4a0959bdc1415c0658e (patch) | |
tree | 0652161bbbcbfddf47c7ddb25d2db8ecd4cbec89 | |
parent | 0ee40c6628434f0535da31deeacc28b61e80d810 (diff) | |
parent | cb19833dccb32f97cacbfff834b53523915f13f6 (diff) |
Merge branch 'generic-dispatch' of git://brick.kernel.dk/data/git/linux-2.6-block
-rw-r--r-- | Documentation/block/biodoc.txt | 113 | ||||
-rw-r--r-- | drivers/block/as-iosched.c | 325 | ||||
-rw-r--r-- | drivers/block/cfq-iosched.c | 364 | ||||
-rw-r--r-- | drivers/block/deadline-iosched.c | 123 | ||||
-rw-r--r-- | drivers/block/elevator.c | 266 | ||||
-rw-r--r-- | drivers/block/ll_rw_blk.c | 23 | ||||
-rw-r--r-- | drivers/block/noop-iosched.c | 48 | ||||
-rw-r--r-- | include/linux/blkdev.h | 26 | ||||
-rw-r--r-- | include/linux/elevator.h | 18 |
9 files changed, 459 insertions, 847 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 6dd274d7e1c..2d65c218216 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt @@ -906,9 +906,20 @@ Aside: 4. The I/O scheduler -I/O schedulers are now per queue. They should be runtime switchable and modular -but aren't yet. Jens has most bits to do this, but the sysfs implementation is -missing. +I/O scheduler, a.k.a. elevator, is implemented in two layers. Generic dispatch +queue and specific I/O schedulers. Unless stated otherwise, elevator is used +to refer to both parts and I/O scheduler to specific I/O schedulers. + +Block layer implements generic dispatch queue in ll_rw_blk.c and elevator.c. +The generic dispatch queue is responsible for properly ordering barrier +requests, requeueing, handling non-fs requests and all other subtleties. + +Specific I/O schedulers are responsible for ordering normal filesystem +requests. They can also choose to delay certain requests to improve +throughput or whatever purpose. As the plural form indicates, there are +multiple I/O schedulers. They can be built as modules but at least one should +be built inside the kernel. Each queue can choose different one and can also +change to another one dynamically. A block layer call to the i/o scheduler follows the convention elv_xxx(). This calls elevator_xxx_fn in the elevator switch (drivers/block/elevator.c). Oh, @@ -921,44 +932,36 @@ keeping work. The functions an elevator may implement are: (* are mandatory) elevator_merge_fn called to query requests for merge with a bio -elevator_merge_req_fn " " " with another request +elevator_merge_req_fn called when two requests get merged. the one + which gets merged into the other one will be + never seen by I/O scheduler again. IOW, after + being merged, the request is gone. elevator_merged_fn called when a request in the scheduler has been involved in a merge. It is used in the deadline scheduler for example, to reposition the request if its sorting order has changed. -*elevator_next_req_fn returns the next scheduled request, or NULL - if there are none (or none are ready). +elevator_dispatch_fn fills the dispatch queue with ready requests. + I/O schedulers are free to postpone requests by + not filling the dispatch queue unless @force + is non-zero. Once dispatched, I/O schedulers + are not allowed to manipulate the requests - + they belong to generic dispatch queue. -*elevator_add_req_fn called to add a new request into the scheduler +elevator_add_req_fn called to add a new request into the scheduler elevator_queue_empty_fn returns true if the merge queue is empty. Drivers shouldn't use this, but rather check if elv_next_request is NULL (without losing the request if one exists!) -elevator_remove_req_fn This is called when a driver claims ownership of - the target request - it now belongs to the - driver. It must not be modified or merged. - Drivers must not lose the request! A subsequent - call of elevator_next_req_fn must return the - _next_ request. - -elevator_requeue_req_fn called to add a request to the scheduler. This - is used when the request has alrnadebeen - returned by elv_next_request, but hasn't - completed. If this is not implemented then - elevator_add_req_fn is called instead. - elevator_former_req_fn elevator_latter_req_fn These return the request before or after the one specified in disk sort order. Used by the block layer to find merge possibilities. -elevator_completed_req_fn called when a request is completed. This might - come about due to being merged with another or - when the device completes the request. +elevator_completed_req_fn called when a request is completed. elevator_may_queue_fn returns true if the scheduler wants to allow the current context to queue a new request even if @@ -967,13 +970,33 @@ elevator_may_queue_fn returns true if the scheduler wants to allow the elevator_set_req_fn elevator_put_req_fn Must be used to allocate and free any elevator - specific storate for a request. + specific storage for a request. + +elevator_activate_req_fn Called when device driver first sees a request. + I/O schedulers can use this callback to + determine when actual execution of a request + starts. +elevator_deactivate_req_fn Called when device driver decides to delay + a request by requeueing it. elevator_init_fn elevator_exit_fn Allocate and free any elevator specific storage for a queue. -4.2 I/O scheduler implementation +4.2 Request flows seen by I/O schedulers +All requests seens by I/O schedulers strictly follow one of the following three +flows. + + set_req_fn -> + + i. add_req_fn -> (merged_fn ->)* -> dispatch_fn -> activate_req_fn -> + (deactivate_req_fn -> activate_req_fn ->)* -> completed_req_fn + ii. add_req_fn -> (merged_fn ->)* -> merge_req_fn + iii. [none] + + -> put_req_fn + +4.3 I/O scheduler implementation The generic i/o scheduler algorithm attempts to sort/merge/batch requests for optimal disk scan and request servicing performance (based on generic principles and device capabilities), optimized for: @@ -993,18 +1016,7 @@ request in sort order to prevent binary tree lookups. This arrangement is not a generic block layer characteristic however, so elevators may implement queues as they please. -ii. Last merge hint -The last merge hint is part of the generic queue layer. I/O schedulers must do -some management on it. For the most part, the most important thing is to make -sure q->last_merge is cleared (set to NULL) when the request on it is no longer -a candidate for merging (for example if it has been sent to the driver). - -The last merge performed is cached as a hint for the subsequent request. If -sequential data is being submitted, the hint is used to perform merges without -any scanning. This is not sufficient when there are multiple processes doing -I/O though, so a "merge hash" is used by some schedulers. - -iii. Merge hash +ii. Merge hash AS and deadline use a hash table indexed by the last sector of a request. This enables merging code to quickly look up "back merge" candidates, even when multiple I/O streams are being performed at once on one disk. @@ -1013,29 +1025,8 @@ multiple I/O streams are being performed at once on one disk. are far less common than "back merges" due to the nature of most I/O patterns. Front merges are handled by the binary trees in AS and deadline schedulers. -iv. Handling barrier cases -A request with flags REQ_HARDBARRIER or REQ_SOFTBARRIER must not be ordered -around. That is, they must be processed after all older requests, and before -any newer ones. This includes merges! - -In AS and deadline schedulers, barriers have the effect of flushing the reorder -queue. The performance cost of this will vary from nothing to a lot depending -on i/o patterns and device characteristics. Obviously they won't improve -performance, so their use should be kept to a minimum. - -v. Handling insertion position directives -A request may be inserted with a position directive. The directives are one of -ELEVATOR_INSERT_BACK, ELEVATOR_INSERT_FRONT, ELEVATOR_INSERT_SORT. - -ELEVATOR_INSERT_SORT is a general directive for non-barrier requests. -ELEVATOR_INSERT_BACK is used to insert a barrier to the back of the queue. -ELEVATOR_INSERT_FRONT is used to insert a barrier to the front of the queue, and -overrides the ordering requested by any previous barriers. In practice this is -harmless and required, because it is used for SCSI requeueing. This does not -require flushing the reorder queue, so does not impose a performance penalty. - -vi. Plugging the queue to batch requests in anticipation of opportunities for - merge/sort optimizations +iii. Plugging the queue to batch requests in anticipation of opportunities for + merge/sort optimizations This is just the same as in 2.4 so far, though per-device unplugging support is anticipated for 2.5. Also with a priority-based i/o scheduler, @@ -1069,7 +1060,7 @@ Aside: blk_kick_queue() to unplug a specific queue (right away ?) or optionally, all queues, is in the plan. -4.3 I/O contexts +4.4 I/O contexts I/O contexts provide a dynamically allocated per process data area. They may be used in I/O schedulers, and in the block layer (could be used for IO statis, priorities for example). See *io_context in drivers/block/ll_rw_blk.c, and diff --git a/drivers/block/as-iosched.c b/drivers/block/as-iosched.c index 1f08e14697e..4081c36c8c1 100644 --- a/drivers/block/as-iosched.c +++ b/drivers/block/as-iosched.c @@ -98,7 +98,6 @@ struct as_data { struct as_rq *next_arq[2]; /* next in sort order */ sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ - struct list_head *dispatch; /* driver dispatch queue */ struct list_head *hash; /* request hash */ unsigned long exit_prob; /* probability a task will exit while @@ -239,6 +238,25 @@ static struct io_context *as_get_io_context(void) return ioc; } +static void as_put_io_context(struct as_rq *arq) +{ + struct as_io_context *aic; + + if (unlikely(!arq->io_context)) + return; + + aic = arq->io_context->aic; + + if (arq->is_sync == REQ_SYNC && aic) { + spin_lock(&aic->lock); + set_bit(AS_TASK_IORUNNING, &aic->state); + aic->last_end_request = jiffies; + spin_unlock(&aic->lock); + } + + put_io_context(arq->io_context); +} + /* * the back merge hash support functions */ @@ -261,14 +279,6 @@ static inline void as_del_arq_hash(struct as_rq *arq) __as_del_arq_hash(arq); } -static void as_remove_merge_hints(request_queue_t *q, struct as_rq *arq) -{ - as_del_arq_hash(arq); - - if (q->last_merge == arq->request) - q->last_merge = NULL; -} - static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) { struct request *rq = arq->request; @@ -312,7 +322,7 @@ static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) BUG_ON(!arq->on_hash); if (!rq_mergeable(__rq)) { - as_remove_merge_hints(ad->q, arq); + as_del_arq_hash(arq); continue; } @@ -950,23 +960,12 @@ static void as_completed_request(request_queue_t *q, struct request *rq) WARN_ON(!list_empty(&rq->queuelist)); - if (arq->state == AS_RQ_PRESCHED) { - WARN_ON(arq->io_context); - goto out; - } - - if (arq->state == AS_RQ_MERGED) - goto out_ioc; - if (arq->state != AS_RQ_REMOVED) { printk("arq->state %d\n", arq->state); WARN_ON(1); goto out; } - if (!blk_fs_request(rq)) - goto out; - if (ad->changed_batch && ad->nr_dispatched == 1) { kblockd_schedule_work(&ad->antic_work); ad->changed_batch = 0; @@ -1001,21 +1000,7 @@ static void as_completed_request(request_queue_t *q, struct request *rq) } } -out_ioc: - if (!arq->io_context) - goto out; - - if (arq->is_sync == REQ_SYNC) { - struct as_io_context *aic = arq->io_context->aic; - if (aic) { - spin_lock(&aic->lock); - set_bit(AS_TASK_IORUNNING, &aic->state); - aic->last_end_request = jiffies; - spin_unlock(&aic->lock); - } - } - - put_io_context(arq->io_context); + as_put_io_context(arq); out: arq->state = AS_RQ_POSTSCHED; } @@ -1047,73 +1032,11 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) ad->next_arq[data_dir] = as_find_next_arq(ad, arq); list_del_init(&arq->fifo); - as_remove_merge_hints(q, arq); + as_del_arq_hash(arq); as_del_arq_rb(ad, arq); } /* - * as_remove_dispatched_request is called to remove a request which has gone - * to the dispatch list. - */ -static void as_remove_dispatched_request(request_queue_t *q, struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - struct as_io_context *aic; - - if (!arq) { - WARN_ON(1); - return; - } - - WARN_ON(arq->state != AS_RQ_DISPATCHED); - WARN_ON(ON_RB(&arq->rb_node)); - if (arq->io_context && arq->io_context->aic) { - aic = arq->io_context->aic; - if (aic) { - WARN_ON(!atomic_read(&aic->nr_dispatched)); - atomic_dec(&aic->nr_dispatched); - } - } -} - -/* - * as_remove_request is called when a driver has finished with a request. - * This should be only called for dispatched requests, but for some reason - * a POWER4 box running hwscan it does not. - */ -static void as_remove_request(request_queue_t *q, struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - - if (unlikely(arq->state == AS_RQ_NEW)) - goto out; - - if (ON_RB(&arq->rb_node)) { - if (arq->state != AS_RQ_QUEUED) { - printk("arq->state %d\n", arq->state); - WARN_ON(1); - goto out; - } - /* - * We'll lose the aliased request(s) here. I don't think this - * will ever happen, but if it does, hopefully someone will - * report it. - */ - WARN_ON(!list_empty(&rq->queuelist)); - as_remove_queued_request(q, rq); - } else { - if (arq->state != AS_RQ_DISPATCHED) { - printk("arq->state %d\n", arq->state); - WARN_ON(1); - goto out; - } - as_remove_dispatched_request(q, rq); - } -out: - arq->state = AS_RQ_REMOVED; -} - -/* * as_fifo_expired returns 0 if there are no expired reads on the fifo, * 1 otherwise. It is ratelimited so that we only perform the check once per * `fifo_expire' interval. Otherwise a large number of expired requests @@ -1165,7 +1088,6 @@ static inline int as_batch_expired(struct as_data *ad) static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) { struct request *rq = arq->request; - struct list_head *insert; const int data_dir = arq->is_sync; BUG_ON(!ON_RB(&arq->rb_node)); @@ -1198,13 +1120,13 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) /* * take it off the sort and fifo list, add to dispatch queue */ - insert = ad->dispatch->prev; - while (!list_empty(&rq->queuelist)) { struct request *__rq = list_entry_rq(rq->queuelist.next); struct as_rq *__arq = RQ_DATA(__rq); - list_move_tail(&__rq->queuelist, ad->dispatch); + list_del(&__rq->queuelist); + + elv_dispatch_add_tail(ad->q, __rq); if (__arq->io_context && __arq->io_context->aic) atomic_inc(&__arq->io_context->aic->nr_dispatched); @@ -1218,7 +1140,8 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) as_remove_queued_request(ad->q, rq); WARN_ON(arq->state != AS_RQ_QUEUED); - list_add(&rq->queuelist, insert); + elv_dispatch_sort(ad->q, rq); + arq->state = AS_RQ_DISPATCHED; if (arq->io_context && arq->io_context->aic) atomic_inc(&arq->io_context->aic->nr_dispatched); @@ -1230,12 +1153,42 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) * read/write expire, batch expire, etc, and moves it to the dispatch * queue. Returns 1 if a request was found, 0 otherwise. */ -static int as_dispatch_request(struct as_data *ad) +static int as_dispatch_request(request_queue_t *q, int force) { + struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq; const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); + if (unlikely(force)) { + /* + * Forced dispatch, accounting is useless. Reset + * accounting states and dump fifo_lists. Note that + * batch_data_dir is reset to REQ_SYNC to avoid + * screwing write batch accounting as write batch + * accounting occurs on W->R transition. + */ + int dispatched = 0; + + ad->batch_data_dir = REQ_SYNC; + ad->changed_batch = 0; + ad->new_batch = 0; + + while (ad->next_arq[REQ_SYNC]) { + as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); + dispatched++; + } + ad->last_check_fifo[REQ_SYNC] = jiffies; + + while (ad->next_arq[REQ_ASYNC]) { + as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); + dispatched++; + } + ad->last_check_fifo[REQ_ASYNC] = jiffies; + + return dispatched; + } + /* Signal that the write batch was uncontended, so we can't time it */ if (ad->batch_data_dir == REQ_ASYNC && !reads) { if (ad->current_write_count == 0 || !writes) @@ -1359,20 +1312,6 @@ fifo_expired: return 1; } -static struct request *as_next_request(request_queue_t *q) -{ - struct as_data *ad = q->elevator->elevator_data; - struct request *rq = NULL; - - /* - * if there are still requests on the dispatch queue, grab the first - */ - if (!list_empty(ad->dispatch) || as_dispatch_request(ad)) - rq = list_entry_rq(ad->dispatch->next); - - return rq; -} - /* * Add arq to a list behind alias */ @@ -1404,17 +1343,25 @@ as_add_aliased_request(struct as_data *ad, struct as_rq *arq, struct as_rq *alia /* * Don't want to have to handle merges. */ - as_remove_merge_hints(ad->q, arq); + as_del_arq_hash(arq); } /* * add arq to rbtree and fifo */ -static void as_add_request(struct as_data *ad, struct as_rq *arq) +static void as_add_request(request_queue_t *q, struct request *rq) { + struct as_data *ad = q->elevator->elevator_data; + struct as_rq *arq = RQ_DATA(rq); struct as_rq *alias; int data_dir; + if (arq->state != AS_RQ_PRESCHED) { + printk("arq->state: %d\n", arq->state); + WARN_ON(1); + } + arq->state = AS_RQ_NEW; + if (rq_data_dir(arq->request) == READ || current->flags&PF_SYNCWRITE) arq->is_sync = 1; @@ -1437,12 +1384,8 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq) arq->expires = jiffies + ad->fifo_expire[data_dir]; list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); - if (rq_mergeable(arq->request)) { + if (rq_mergeable(arq->request)) as_add_arq_hash(ad, arq); - - if (!ad->q->last_merge) - ad->q->last_merge = arq->request; - } as_update_arq(ad, arq); /* keep state machine up to date */ } else { @@ -1463,96 +1406,24 @@ static void as_add_request(struct as_data *ad, struct as_rq *arq) arq->state = AS_RQ_QUEUED; } -static void as_deactivate_request(request_queue_t *q, struct request *rq) +static void as_activate_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); - if (arq) { - if (arq->state == AS_RQ_REMOVED) { - arq->state = AS_RQ_DISPATCHED; - if (arq->io_context && arq->io_context->aic) - atomic_inc(&arq->io_context->aic->nr_dispatched); - } - } else - WARN_ON(blk_fs_request(rq) - && (!(rq->flags & (REQ_HARDBARRIER|REQ_SOFTBARRIER))) ); - - /* Stop anticipating - let this request get through */ - as_antic_stop(ad); -} - -/* - * requeue the request. The request has not been completed, nor is it a - * new request, so don't touch accounting. - */ -static void as_requeue_request(request_queue_t *q, struct request *rq) -{ - as_deactivate_request(q, rq); - list_add(&rq->queuelist, &q->queue_head); -} - -/* - * Account a request that is inserted directly onto the dispatch queue. - * arq->io_context->aic->nr_dispatched should not need to be incremented - * because only new requests should come through here: requeues go through - * our explicit requeue handler. - */ -static void as_account_queued_request(struct as_data *ad, struct request *rq) -{ - if (blk_fs_request(rq)) { - struct as_rq *arq = RQ_DATA(rq); - arq->state = AS_RQ_DISPATCHED; - ad->nr_dispatched++; - } + WARN_ON(arq->state != AS_RQ_DISPATCHED); + arq->state = AS_RQ_REMOVED; + if (arq->io_context && arq->io_context->aic) + atomic_dec(&arq->io_context->aic->nr_dispatched); } -static void -as_insert_request(request_queue_t *q, struct request *rq, int where) +static void as_deactivate_request(request_queue_t *q, struct request *rq) { - struct as_data *ad = q->elevator->elevator_data; struct as_rq *arq = RQ_DATA(rq); - if (arq) { - if (arq->state != AS_RQ_PRESCHED) { - printk("arq->state: %d\n", arq->state); - WARN_ON(1); - } - arq->state = AS_RQ_NEW; - } - - /* barriers must flush the reorder queue */ - if (unlikely(rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER) - && where == ELEVATOR_INSERT_SORT)) { - WARN_ON(1); - where = ELEVATOR_INSERT_BACK; - } - - switch (where) { - case ELEVATOR_INSERT_BACK: - while (ad->next_arq[REQ_SYNC]) - as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); - - while (ad->next_arq[REQ_ASYNC]) - as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); - - list_add_tail(&rq->queuelist, ad->dispatch); - as_account_queued_request(ad, rq); - as_antic_stop(ad); - break; - case ELEVATOR_INSERT_FRONT: - list_add(&rq->queuelist, ad->dispatch); - as_account_queued_request(ad, rq); - as_antic_stop(ad); - break; - case ELEVATOR_INSERT_SORT: - BUG_ON(!blk_fs_request(rq)); - as_add_request(ad, arq); - break; - default: - BUG(); - return; - } + WARN_ON(arq->state != AS_RQ_REMOVED); + arq->state = AS_RQ_DISPATCHED; + if (arq->io_context && arq->io_context->aic) + atomic_inc(&arq->io_context->aic->nr_dispatched); } /* @@ -1565,12 +1436,8 @@ static int as_queue_empty(request_queue_t *q) { struct as_data *ad = q->elevator->elevator_data; - if (!list_empty(&ad->fifo_list[REQ_ASYNC]) - || !list_empty(&ad->fifo_list[REQ_SYNC]) - || !list_empty(ad->dispatch)) - return 0; - - return 1; + return list_empty(&ad->fifo_list[REQ_ASYNC]) + && list_empty(&ad->fifo_list[REQ_SYNC]); } static struct request * @@ -1608,15 +1475,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio) int ret; /* - * try last_merge to avoid going to hash - */ - ret = elv_try_last_merge(q, bio); - if (ret != ELEVATOR_NO_MERGE) { - __rq = q->last_merge; - goto out_insert; - } - - /* * see if the merge hash can satisfy a back merge */ __rq = as_find_arq_hash(ad, bio->bi_sector); @@ -1644,9 +1502,6 @@ as_merge(request_queue_t *q, struct request **req, struct bio *bio) return ELEVATOR_NO_MERGE; out: - if (rq_mergeable(__rq)) - q->last_merge = __rq; -out_insert: if (ret) { if (rq_mergeable(__rq)) as_hot_arq_hash(ad, RQ_DATA(__rq)); @@ -1693,9 +1548,6 @@ static void as_merged_request(request_queue_t *q, struct request *req) * behind the disk head. We currently don't bother adjusting. */ } - - if (arq->on_hash) - q->last_merge = req; } static void @@ -1763,6 +1615,7 @@ as_merged_requests(request_queue_t *q, struct request *req, * kill knowledge of next, this one is a goner */ as_remove_queued_request(q, next); + as_put_io_context(anext); anext->state = AS_RQ_MERGED; } @@ -1782,7 +1635,7 @@ static void as_work_handler(void *data) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - if (as_next_request(q)) + if (!as_queue_empty(q)) q->request_fn(q); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -1797,7 +1650,9 @@ static void as_put_request(request_queue_t *q, struct request *rq) return; } - if (arq->state != AS_RQ_POSTSCHED && arq->state != AS_RQ_PRESCHED) { + if (unlikely(arq->state != AS_RQ_POSTSCHED && + arq->state != AS_RQ_PRESCHED && + arq->state != AS_RQ_MERGED)) { printk("arq->state %d\n", arq->state); WARN_ON(1); } @@ -1907,7 +1762,6 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); ad->sort_list[REQ_SYNC] = RB_ROOT; ad->sort_list[REQ_ASYNC] = RB_ROOT; - ad->dispatch = &q->queue_head; ad->fifo_expire[REQ_SYNC] = default_read_expire; ad->fifo_expire[REQ_ASYNC] = default_write_expire; ad->antic_expire = default_antic_expire; @@ -2072,10 +1926,9 @@ static struct elevator_type iosched_as = { .elevator_merge_fn = as_merge, .elevator_merged_fn = as_merged_request, .elevator_merge_req_fn = as_merged_requests, - .elevator_next_req_fn = as_next_request, - .elevator_add_req_fn = as_insert_request, - .elevator_remove_req_fn = as_remove_request, - .elevator_requeue_req_fn = as_requeue_request, + .elevator_dispatch_fn = as_dispatch_request, + .elevator_add_req_fn = as_add_request, + .elevator_activate_req_fn = as_activate_request, .elevator_deactivate_req_fn = as_deactivate_request, .elevator_queue_empty_fn = as_queue_empty, .elevator_completed_req_fn = as_completed_request, diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c index d3bfe8cfb03..94690e4d41e 100644 --- a/drivers/block/cfq-iosched.c +++ b/drivers/block/cfq-iosched.c @@ -84,7 +84,6 @@ static int cfq_max_depth = 2; (node)->rb_left = NULL; \ } while (0) #define RB_CLEAR_ROOT(root) ((root)->rb_node = NULL) -#define ON_RB(node) ((node)->rb_color != RB_NONE) #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) #define rq_rb_key(rq) (rq)->sector @@ -271,10 +270,7 @@ CFQ_CFQQ_FNS(expired); #undef CFQ_CFQQ_FNS enum cfq_rq_state_flags { - CFQ_CRQ_FLAG_in_flight = 0, - CFQ_CRQ_FLAG_in_driver, - CFQ_CRQ_FLAG_is_sync, - CFQ_CRQ_FLAG_requeued, + CFQ_CRQ_FLAG_is_sync = 0, }; #define CFQ_CRQ_FNS(name) \ @@ -291,14 +287,11 @@ static inline int cfq_crq_##name(const struct cfq_rq *crq) \ return (crq->crq_flags & (1 << CFQ_CRQ_FLAG_##name)) != 0; \ } -CFQ_CRQ_FNS(in_flight); -CFQ_CRQ_FNS(in_driver); CFQ_CRQ_FNS(is_sync); -CFQ_CRQ_FNS(requeued); #undef CFQ_CRQ_FNS static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int, unsigned short); -static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *); +static void cfq_dispatch_insert(request_queue_t *, struct cfq_rq *); static void cfq_put_cfqd(struct cfq_data *cfqd); #define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) @@ -311,14 +304,6 @@ static inline void cfq_del_crq_hash(struct cfq_rq *crq) hlist_del_init(&crq->hash); } -static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq) -{ - cfq_del_crq_hash(crq); - - if (q->last_merge == crq->request) - q->last_merge = NULL; -} - static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) { const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); @@ -347,18 +332,13 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) return NULL; } -static inline int cfq_pending_requests(struct cfq_data *cfqd) -{ - return !list_empty(&cfqd->queue->queue_head) || cfqd->busy_queues; -} - /* * scheduler run of queue, if there are requests pending and no one in the * driver that will restart queueing */ static inline void cfq_schedule_dispatch(struct cfq_data *cfqd) { - if (!cfqd->rq_in_driver && cfq_pending_requests(cfqd)) + if (!cfqd->rq_in_driver && cfqd->busy_queues) kblockd_schedule_work(&cfqd->unplug_work); } @@ -366,7 +346,7 @@ static int cfq_queue_empty(request_queue_t *q) { struct cfq_data *cfqd = q->elevator->elevator_data; - return !cfq_pending_requests(cfqd); + return !cfqd->busy_queues; } /* @@ -386,11 +366,6 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) if (crq2 == NULL) return crq1; - if (cfq_crq_requeued(crq1) && !cfq_crq_requeued(crq2)) - return crq1; - else if (cfq_crq_requeued(crq2) && !cfq_crq_requeued(crq1)) - return crq2; - if (cfq_crq_is_sync(crq1) && !cfq_crq_is_sync(crq2)) return crq1; else if (cfq_crq_is_sync(crq2) && !cfq_crq_is_sync(crq1)) @@ -461,10 +436,7 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_rq *crq_next = NULL, *crq_prev = NULL; struct rb_node *rbnext, *rbprev; - rbnext = NULL; - if (ON_RB(&last->rb_node)) - rbnext = rb_next(&last->rb_node); - if (!rbnext) { + if (!(rbnext = rb_next(&last->rb_node))) { rbnext = rb_first(&cfqq->sort_list); if (rbnext == &last->rb_node) rbnext = NULL; @@ -545,13 +517,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) * the pending list according to last request service */ static inline void -cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue) +cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { BUG_ON(cfq_cfqq_on_rr(cfqq)); cfq_mark_cfqq_on_rr(cfqq); cfqd->busy_queues++; - cfq_resort_rr_list(cfqq, requeue); + cfq_resort_rr_list(cfqq, 0); } static inline void @@ -571,22 +543,19 @@ cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) static inline void cfq_del_crq_rb(struct cfq_rq *crq) { struct cfq_queue *cfqq = crq->cfq_queue; + struct cfq_data *cfqd = cfqq->cfqd; + const int sync = cfq_crq_is_sync(crq); - if (ON_RB(&crq->rb_node)) { - struct cfq_data *cfqd = cfqq->cfqd; - const int sync = cfq_crq_is_sync(crq); + BUG_ON(!cfqq->queued[sync]); + cfqq->queued[sync]--; - BUG_ON(!cfqq->queued[sync]); - cfqq->queued[sync]--; + cfq_update_next_crq(crq); - cfq_update_next_crq(crq); + rb_erase(&crq->rb_node, &cfqq->sort_list); + RB_CLEAR_COLOR(&crq->rb_node); - rb_erase(&crq->rb_node, &cfqq->sort_list); - RB_CLEAR_COLOR(&crq->rb_node); - - if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list)) - cfq_del_cfqq_rr(cfqd, cfqq); - } + if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY(&cfqq->sort_list)) + cfq_del_cfqq_rr(cfqd, cfqq); } static struct cfq_rq * @@ -627,12 +596,12 @@ static void cfq_add_crq_rb(struct cfq_rq *crq) * if that happens, put the alias on the dispatch list */ while ((__alias = __cfq_add_crq_rb(crq)) != NULL) - cfq_dispatch_sort(cfqd->queue, __alias); + cfq_dispatch_insert(cfqd->queue, __alias); rb_insert_color(&crq->rb_node, &cfqq->sort_list); if (!cfq_cfqq_on_rr(cfqq)) - cfq_add_cfqq_rr(cfqd, cfqq, cfq_crq_requeued(crq)); + cfq_add_cfqq_rr(cfqd, cfqq); /* * check if this request is a better next-serve candidate @@ -643,10 +612,8 @@ static void cfq_add_crq_rb(struct cfq_rq *crq) static inline void cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) { - if (ON_RB(&crq->rb_node)) { - rb_erase(&crq->rb_node, &cfqq->sort_list); - cfqq->queued[cfq_crq_is_sync(crq)]--; - } + rb_erase(&crq->rb_node, &cfqq->sort_list); + cfqq->queued[cfq_crq_is_sync(crq)]--; cfq_add_crq_rb(crq); } @@ -676,49 +643,28 @@ out: return NULL; } -static void cfq_deactivate_request(request_queue_t *q, struct request *rq) +static void cfq_activate_request(request_queue_t *q, struct request *rq) { struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_rq *crq = RQ_DATA(rq); - - if (crq) { - struct cfq_queue *cfqq = crq->cfq_queue; - - if (cfq_crq_in_driver(crq)) { - cfq_clear_crq_in_driver(crq); - WARN_ON(!cfqd->rq_in_driver); - cfqd->rq_in_driver--; - } - if (cfq_crq_in_flight(crq)) { - const int sync = cfq_crq_is_sync(crq); - cfq_clear_crq_in_flight(crq); - WARN_ON(!cfqq->on_dispatch[sync]); - cfqq->on_dispatch[sync]--; - } - cfq_mark_crq_requeued(crq); - } + cfqd->rq_in_driver++; } -/* - * make sure the service time gets corrected on reissue of this request - */ -static void cfq_requeue_request(request_queue_t *q, struct request *rq) +static void cfq_deactivate_request(request_queue_t *q, struct request *rq) { - cfq_deactivate_request(q, rq); - list_add(&rq->queuelist, &q->queue_head); + struct cfq_data *cfqd = q->elevator->elevator_data; + + WARN_ON(!cfqd->rq_in_driver); + cfqd->rq_in_driver--; } -static void cfq_remove_request(request_queue_t *q, struct request *rq) +static void cfq_remove_request(struct request *rq) { struct cfq_rq *crq = RQ_DATA(rq); - if (crq) { - list_del_init(&rq->queuelist); - cfq_del_crq_rb(crq); - cfq_remove_merge_hints(q, crq); - - } + list_del_init(&rq->queuelist); + cfq_del_crq_rb(crq); + cfq_del_crq_hash(crq); } static int @@ -728,12 +674,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) struct request *__rq; int ret; - ret = elv_try_last_merge(q, bio); - if (ret != ELEVATOR_NO_MERGE) { - __rq = q->last_merge; - goto out_insert; - } - __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); if (__rq && elv_rq_merge_ok(__rq, bio)) { ret = ELEVATOR_BACK_MERGE; @@ -748,8 +688,6 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) return ELEVATOR_NO_MERGE; out: - q->last_merge = __rq; -out_insert: *req = __rq; return ret; } @@ -762,14 +700,12 @@ static void cfq_merged_request(request_queue_t *q, struct request *req) cfq_del_crq_hash(crq); cfq_add_crq_hash(cfqd, crq); - if (ON_RB(&crq->rb_node) && (rq_rb_key(req) != crq->rb_key)) { + if (rq_rb_key(req) != crq->rb_key) { struct cfq_queue *cfqq = crq->cfq_queue; cfq_update_nex |