diff options
Diffstat (limited to 'drivers/block/cfq-iosched.c')
-rw-r--r-- | drivers/block/cfq-iosched.c | 1906 |
1 files changed, 1274 insertions, 632 deletions
diff --git a/drivers/block/cfq-iosched.c b/drivers/block/cfq-iosched.c index 3ac47dde64d..35f6e569d5e 100644 --- a/drivers/block/cfq-iosched.c +++ b/drivers/block/cfq-iosched.c @@ -21,22 +21,33 @@ #include <linux/hash.h> #include <linux/rbtree.h> #include <linux/mempool.h> - -static unsigned long max_elapsed_crq; -static unsigned long max_elapsed_dispatch; +#include <linux/ioprio.h> +#include <linux/writeback.h> /* * tunables */ static int cfq_quantum = 4; /* max queue in one round of service */ static int cfq_queued = 8; /* minimum rq allocate limit per-queue*/ -static int cfq_service = HZ; /* period over which service is avg */ -static int cfq_fifo_expire_r = HZ / 2; /* fifo timeout for sync requests */ -static int cfq_fifo_expire_w = 5 * HZ; /* fifo timeout for async requests */ -static int cfq_fifo_rate = HZ / 8; /* fifo expiry rate */ +static int cfq_fifo_expire[2] = { HZ / 4, HZ / 8 }; static int cfq_back_max = 16 * 1024; /* maximum backwards seek, in KiB */ static int cfq_back_penalty = 2; /* penalty of a backwards seek */ +static int cfq_slice_sync = HZ / 10; +static int cfq_slice_async = HZ / 50; +static int cfq_slice_async_rq = 2; +static int cfq_slice_idle = HZ / 50; + +#define CFQ_IDLE_GRACE (HZ / 10) +#define CFQ_SLICE_SCALE (5) + +#define CFQ_KEY_ASYNC (0) + +/* + * disable queueing at the driver/hardware level + */ +static int cfq_max_depth = 1; + /* * for the hash of cfqq inside the cfqd */ @@ -55,6 +66,7 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */ #define list_entry_hash(ptr) hlist_entry((ptr), struct cfq_rq, hash) #define list_entry_cfqq(ptr) list_entry((ptr), struct cfq_queue, cfq_list) +#define list_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define RQ_DATA(rq) (rq)->elevator_private @@ -75,78 +87,101 @@ static int cfq_back_penalty = 2; /* penalty of a backwards seek */ #define rb_entry_crq(node) rb_entry((node), struct cfq_rq, rb_node) #define rq_rb_key(rq) (rq)->sector -/* - * threshold for switching off non-tag accounting - */ -#define CFQ_MAX_TAG (4) - -/* - * sort key types and names - */ -enum { - CFQ_KEY_PGID, - CFQ_KEY_TGID, - CFQ_KEY_UID, - CFQ_KEY_GID, - CFQ_KEY_LAST, -}; - -static char *cfq_key_types[] = { "pgid", "tgid", "uid", "gid", NULL }; - static kmem_cache_t *crq_pool; static kmem_cache_t *cfq_pool; static kmem_cache_t *cfq_ioc_pool; +#define CFQ_PRIO_LISTS IOPRIO_BE_NR +#define cfq_class_idle(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_IDLE) +#define cfq_class_be(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_BE) +#define cfq_class_rt(cfqq) ((cfqq)->ioprio_class == IOPRIO_CLASS_RT) + +#define cfq_cfqq_sync(cfqq) ((cfqq)->key != CFQ_KEY_ASYNC) + +/* + * Per block device queue structure + */ struct cfq_data { - struct list_head rr_list; + atomic_t ref; + request_queue_t *queue; + + /* + * rr list of queues with requests and the count of them + */ + struct list_head rr_list[CFQ_PRIO_LISTS]; + struct list_head busy_rr; + struct list_head cur_rr; + struct list_head idle_rr; + unsigned int busy_queues; + + /* + * non-ordered list of empty cfqq's + */ struct list_head empty_list; + /* + * cfqq lookup hash + */ struct hlist_head *cfq_hash; - struct hlist_head *crq_hash; - /* queues on rr_list (ie they have pending requests */ - unsigned int busy_queues; + /* + * global crq hash for all queues + */ + struct hlist_head *crq_hash; unsigned int max_queued; - atomic_t ref; + mempool_t *crq_pool; - int key_type; + int rq_in_driver; - mempool_t *crq_pool; + /* + * schedule slice state info + */ + /* + * idle window management + */ + struct timer_list idle_slice_timer; + struct work_struct unplug_work; - request_queue_t *queue; + struct cfq_queue *active_queue; + struct cfq_io_context *active_cic; + int cur_prio, cur_end_prio; + unsigned int dispatch_slice; + + struct timer_list idle_class_timer; sector_t last_sector; + unsigned long last_end_request; - int rq_in_driver; + unsigned int rq_starved; /* * tunables, see top of file */ unsigned int cfq_quantum; unsigned int cfq_queued; - unsigned int cfq_fifo_expire_r; - unsigned int cfq_fifo_expire_w; - unsigned int cfq_fifo_batch_expire; + unsigned int cfq_fifo_expire[2]; unsigned int cfq_back_penalty; unsigned int cfq_back_max; - unsigned int find_best_crq; - - unsigned int cfq_tagged; + unsigned int cfq_slice[2]; + unsigned int cfq_slice_async_rq; + unsigned int cfq_slice_idle; + unsigned int cfq_max_depth; }; +/* + * Per process-grouping structure + */ struct cfq_queue { /* reference count */ atomic_t ref; /* parent cfq_data */ struct cfq_data *cfqd; - /* hash of mergeable requests */ + /* cfqq lookup hash */ struct hlist_node cfq_hash; /* hash key */ - unsigned long key; - /* whether queue is on rr (or empty) list */ - int on_rr; + unsigned int key; /* on either rr or empty list of cfqd */ struct list_head cfq_list; /* sorted list of pending requests */ @@ -158,21 +193,35 @@ struct cfq_queue { /* currently allocated requests */ int allocated[2]; /* fifo list of requests in sort_list */ - struct list_head fifo[2]; - /* last time fifo expired */ - unsigned long last_fifo_expire; - - int key_type; - - unsigned long service_start; - unsigned long service_used; + struct list_head fifo; - unsigned int max_rate; + unsigned long slice_start; + unsigned long slice_end; + unsigned long slice_left; + unsigned long service_last; /* number of requests that have been handed to the driver */ int in_flight; - /* number of currently allocated requests */ - int alloc_limit[2]; + + /* io prio of this group */ + unsigned short ioprio, org_ioprio; + unsigned short ioprio_class, org_ioprio_class; + + /* whether queue is on rr (or empty) list */ + unsigned on_rr : 1; + /* idle slice, waiting for new request submission */ + unsigned wait_request : 1; + /* set when wait_request gets set, reset on first rq alloc */ + unsigned must_alloc : 1; + /* only gets one must_alloc per slice */ + unsigned must_alloc_slice : 1; + /* idle slice, request added, now waiting to dispatch it */ + unsigned must_dispatch : 1; + /* fifo expire per-slice */ + unsigned fifo_expire : 1; + + unsigned idle_window : 1; + unsigned prio_changed : 1; }; struct cfq_rq { @@ -184,42 +233,17 @@ struct cfq_rq { struct cfq_queue *cfq_queue; struct cfq_io_context *io_context; - unsigned long service_start; - unsigned long queue_start; - - unsigned int in_flight : 1; - unsigned int accounted : 1; - unsigned int is_sync : 1; - unsigned int is_write : 1; + unsigned in_flight : 1; + unsigned accounted : 1; + unsigned is_sync : 1; + unsigned requeued : 1; }; -static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned long); +static struct cfq_queue *cfq_find_cfq_hash(struct cfq_data *, unsigned int); static void cfq_dispatch_sort(request_queue_t *, struct cfq_rq *); -static void cfq_update_next_crq(struct cfq_rq *); static void cfq_put_cfqd(struct cfq_data *cfqd); -/* - * what the fairness is based on (ie how processes are grouped and - * differentiated) - */ -static inline unsigned long -cfq_hash_key(struct cfq_data *cfqd, struct task_struct *tsk) -{ - /* - * optimize this so that ->key_type is the offset into the struct - */ - switch (cfqd->key_type) { - case CFQ_KEY_PGID: - return process_group(tsk); - default: - case CFQ_KEY_TGID: - return tsk->tgid; - case CFQ_KEY_UID: - return tsk->uid; - case CFQ_KEY_GID: - return tsk->gid; - } -} +#define process_sync(tsk) ((tsk)->flags & PF_SYNCWRITE) /* * lots of deadline iosched dupes, can be abstracted later... @@ -235,16 +259,12 @@ static void cfq_remove_merge_hints(request_queue_t *q, struct cfq_rq *crq) if (q->last_merge == crq->request) q->last_merge = NULL; - - cfq_update_next_crq(crq); } static inline void cfq_add_crq_hash(struct cfq_data *cfqd, struct cfq_rq *crq) { const int hash_idx = CFQ_MHASH_FN(rq_hash_key(crq->request)); - BUG_ON(!hlist_unhashed(&crq->hash)); - hlist_add_head(&crq->hash, &cfqd->crq_hash[hash_idx]); } @@ -257,8 +277,6 @@ static struct request *cfq_find_rq_hash(struct cfq_data *cfqd, sector_t offset) struct cfq_rq *crq = list_entry_hash(entry); struct request *__rq = crq->request; - BUG_ON(hlist_unhashed(&crq->hash)); - if (!rq_mergeable(__rq)) { cfq_del_crq_hash(crq); continue; @@ -287,36 +305,16 @@ cfq_choose_req(struct cfq_data *cfqd, struct cfq_rq *crq1, struct cfq_rq *crq2) return crq2; if (crq2 == NULL) return crq1; + if (crq1->requeued) + return crq1; + if (crq2->requeued) + return crq2; s1 = crq1->request->sector; s2 = crq2->request->sector; last = cfqd->last_sector; -#if 0 - if (!list_empty(&cfqd->queue->queue_head)) { - struct list_head *entry = &cfqd->queue->queue_head; - unsigned long distance = ~0UL; - struct request *rq; - - while ((entry = entry->prev) != &cfqd->queue->queue_head) { - rq = list_entry_rq(entry); - - if (blk_barrier_rq(rq)) - break; - - if (distance < abs(s1 - rq->sector + rq->nr_sectors)) { - distance = abs(s1 - rq->sector +rq->nr_sectors); - last = rq->sector + rq->nr_sectors; - } - if (distance < abs(s2 - rq->sector + rq->nr_sectors)) { - distance = abs(s2 - rq->sector +rq->nr_sectors); - last = rq->sector + rq->nr_sectors; - } - } - } -#endif - /* * by definition, 1KiB is 2 sectors */ @@ -377,11 +375,13 @@ cfq_find_next_crq(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_rq *crq_next = NULL, *crq_prev = NULL; struct rb_node *rbnext, *rbprev; - if (!ON_RB(&last->rb_node)) - return NULL; - - if ((rbnext = rb_next(&last->rb_node)) == NULL) + if (ON_RB(&last->rb_node)) + rbnext = rb_next(&last->rb_node); + else { rbnext = rb_first(&cfqq->sort_list); + if (rbnext == &last->rb_node) + rbnext = NULL; + } rbprev = rb_prev(&last->rb_node); @@ -401,67 +401,53 @@ static void cfq_update_next_crq(struct cfq_rq *crq) cfqq->next_crq = cfq_find_next_crq(cfqq->cfqd, cfqq, crq); } -static int cfq_check_sort_rr_list(struct cfq_queue *cfqq) +static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) { - struct list_head *head = &cfqq->cfqd->rr_list; - struct list_head *next, *prev; - - /* - * list might still be ordered - */ - next = cfqq->cfq_list.next; - if (next != head) { - struct cfq_queue *cnext = list_entry_cfqq(next); + struct cfq_data *cfqd = cfqq->cfqd; + struct list_head *list, *entry; - if (cfqq->service_used > cnext->service_used) - return 1; - } + BUG_ON(!cfqq->on_rr); - prev = cfqq->cfq_list.prev; - if (prev != head) { - struct cfq_queue *cprev = list_entry_cfqq(prev); + list_del(&cfqq->cfq_list); - if (cfqq->service_used < cprev->service_used) - return 1; + if (cfq_class_rt(cfqq)) + list = &cfqd->cur_rr; + else if (cfq_class_idle(cfqq)) + list = &cfqd->idle_rr; + else { + /* + * if cfqq has requests in flight, don't allow it to be + * found in cfq_set_active_queue before it has finished them. + * this is done to increase fairness between a process that + * has lots of io pending vs one that only generates one + * sporadically or synchronously + */ + if (cfqq->in_flight) + list = &cfqd->busy_rr; + else + list = &cfqd->rr_list[cfqq->ioprio]; } - return 0; -} - -static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue) -{ - struct list_head *entry = &cfqq->cfqd->rr_list; - - if (!cfqq->on_rr) - return; - if (!new_queue && !cfq_check_sort_rr_list(cfqq)) + /* + * if queue was preempted, just add to front to be fair. busy_rr + * isn't sorted. + */ + if (preempted || list == &cfqd->busy_rr) { + list_add(&cfqq->cfq_list, list); return; - - list_del(&cfqq->cfq_list); + } /* - * sort by our mean service_used, sub-sort by in-flight requests + * sort by when queue was last serviced */ - while ((entry = entry->prev) != &cfqq->cfqd->rr_list) { + entry = list; + while ((entry = entry->prev) != list) { struct cfq_queue *__cfqq = list_entry_cfqq(entry); - if (cfqq->service_used > __cfqq->service_used) + if (!__cfqq->service_last) + break; + if (time_before(__cfqq->service_last, cfqq->service_last)) break; - else if (cfqq->service_used == __cfqq->service_used) { - struct list_head *prv; - - while ((prv = entry->prev) != &cfqq->cfqd->rr_list) { - __cfqq = list_entry_cfqq(prv); - - WARN_ON(__cfqq->service_used > cfqq->service_used); - if (cfqq->service_used != __cfqq->service_used) - break; - if (cfqq->in_flight > __cfqq->in_flight) - break; - - entry = prv; - } - } } list_add(&cfqq->cfq_list, entry); @@ -469,28 +455,24 @@ static void cfq_sort_rr_list(struct cfq_queue *cfqq, int new_queue) /* * add to busy list of queues for service, trying to be fair in ordering - * the pending list according to requests serviced + * the pending list according to last request service */ static inline void -cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) +cfq_add_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq, int requeue) { - /* - * it's currently on the empty list - */ + BUG_ON(cfqq->on_rr); cfqq->on_rr = 1; cfqd->busy_queues++; - if (time_after(jiffies, cfqq->service_start + cfq_service)) - cfqq->service_used >>= 3; - - cfq_sort_rr_list(cfqq, 1); + cfq_resort_rr_list(cfqq, requeue); } static inline void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - list_move(&cfqq->cfq_list, &cfqd->empty_list); + BUG_ON(!cfqq->on_rr); cfqq->on_rr = 0; + list_move(&cfqq->cfq_list, &cfqd->empty_list); BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; @@ -505,16 +487,17 @@ static inline void cfq_del_crq_rb(struct cfq_rq *crq) if (ON_RB(&crq->rb_node)) { struct cfq_data *cfqd = cfqq->cfqd; + const int sync = crq->is_sync; - BUG_ON(!cfqq->queued[crq->is_sync]); + BUG_ON(!cfqq->queued[sync]); + cfqq->queued[sync]--; cfq_update_next_crq(crq); - cfqq->queued[crq->is_sync]--; rb_erase(&crq->rb_node, &cfqq->sort_list); RB_CLEAR_COLOR(&crq->rb_node); - if (RB_EMPTY(&cfqq->sort_list) && cfqq->on_rr) + if (cfqq->on_rr && RB_EMPTY(&cfqq->sort_list)) cfq_del_cfqq_rr(cfqd, cfqq); } } @@ -562,7 +545,7 @@ static void cfq_add_crq_rb(struct cfq_rq *crq) rb_insert_color(&crq->rb_node, &cfqq->sort_list); if (!cfqq->on_rr) - cfq_add_cfqq_rr(cfqd, cfqq); + cfq_add_cfqq_rr(cfqd, cfqq, crq->requeued); /* * check if this request is a better next-serve candidate @@ -581,11 +564,10 @@ cfq_reposition_crq_rb(struct cfq_queue *cfqq, struct cfq_rq *crq) cfq_add_crq_rb(crq); } -static struct request * -cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) +static struct request *cfq_find_rq_rb(struct cfq_data *cfqd, sector_t sector) + { - const unsigned long key = cfq_hash_key(cfqd, current); - struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, key); + struct cfq_queue *cfqq = cfq_find_cfq_hash(cfqd, current->pid); struct rb_node *n; if (!cfqq) @@ -609,20 +591,23 @@ out: static void cfq_deactivate_request(request_queue_t *q, struct request *rq) { + struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_rq *crq = RQ_DATA(rq); if (crq) { struct cfq_queue *cfqq = crq->cfq_queue; - if (cfqq->cfqd->cfq_tagged) { - cfqq->service_used--; - cfq_sort_rr_list(cfqq, 0); - } - if (crq->accounted) { crq->accounted = 0; - cfqq->cfqd->rq_in_driver--; + WARN_ON(!cfqd->rq_in_driver); + cfqd->rq_in_driver--; + } + if (crq->in_flight) { + crq->in_flight = 0; + WARN_ON(!cfqq->in_flight); + cfqq->in_flight--; } + crq->requeued = 1; } } @@ -640,11 +625,10 @@ static void cfq_remove_request(request_queue_t *q, struct request *rq) struct cfq_rq *crq = RQ_DATA(rq); if (crq) { - cfq_remove_merge_hints(q, crq); list_del_init(&rq->queuelist); + cfq_del_crq_rb(crq); + cfq_remove_merge_hints(q, crq); - if (crq->cfq_queue) - cfq_del_crq_rb(crq); } } @@ -662,21 +646,15 @@ cfq_merge(request_queue_t *q, struct request **req, struct bio *bio) } __rq = cfq_find_rq_hash(cfqd, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } + if (__rq && elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_BACK_MERGE; + goto out; } __rq = cfq_find_rq_rb(cfqd, bio->bi_sector + bio_sectors(bio)); - if (__rq) { - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; - } + if (__rq && elv_rq_merge_ok(__rq, bio)) { + ret = ELEVATOR_FRONT_MERGE; + goto out; } return ELEVATOR_NO_MERGE; @@ -709,20 +687,194 @@ static void cfq_merged_requests(request_queue_t *q, struct request *rq, struct request *next) { - struct cfq_rq *crq = RQ_DATA(rq); - struct cfq_rq *cnext = RQ_DATA(next); - cfq_merged_request(q, rq); - if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist)) { - if (time_before(cnext->queue_start, crq->queue_start)) { - list_move(&rq->queuelist, &next->queuelist); - crq->queue_start = cnext->queue_start; + /* + * reposition in fifo if next is older than rq + */ + if (!list_empty(&rq->queuelist) && !list_empty(&next->queuelist) && + time_before(next->start_time, rq->start_time)) + list_move(&rq->queuelist, &next->queuelist); + + cfq_remove_request(q, next); +} + +static inline void +__cfq_set_active_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + if (cfqq) { + /* + * stop potential idle class queues waiting service + */ + del_timer(&cfqd->idle_class_timer); + + cfqq->slice_start = jiffies; + cfqq->slice_end = 0; + cfqq->slice_left = 0; + cfqq->must_alloc_slice = 0; + cfqq->fifo_expire = 0; + } + + cfqd->active_queue = cfqq; +} + +/* + * 0 + * 0,1 + * 0,1,2 + * 0,1,2,3 + * 0,1,2,3,4 + * 0,1,2,3,4,5 + * 0,1,2,3,4,5,6 + * 0,1,2,3,4,5,6,7 + */ +static int cfq_get_next_prio_level(struct cfq_data *cfqd) +{ + int prio, wrap; + + prio = -1; + wrap = 0; + do { + int p; + + for (p = cfqd->cur_prio; p <= cfqd->cur_end_prio; p++) { + if (!list_empty(&cfqd->rr_list[p])) { + prio = p; + break; + } } + + if (prio != -1) + break; + cfqd->cur_prio = 0; + if (++cfqd->cur_end_prio == CFQ_PRIO_LISTS) { + cfqd->cur_end_prio = 0; + if (wrap) + break; + wrap = 1; + } + } while (1); + + if (unlikely(prio == -1)) + return -1; + + BUG_ON(prio >= CFQ_PRIO_LISTS); + + list_splice_init(&cfqd->rr_list[prio], &cfqd->cur_rr); + + cfqd->cur_prio = prio + 1; + if (cfqd->cur_prio > cfqd->cur_end_prio) { + cfqd->cur_end_prio = cfqd->cur_prio; + cfqd->cur_prio = 0; + } + if (cfqd->cur_end_prio == CFQ_PRIO_LISTS) { + cfqd->cur_prio = 0; + cfqd->cur_end_prio = 0; } - cfq_update_next_crq(cnext); - cfq_remove_request(q, next); + return prio; +} + +static void cfq_set_active_queue(struct cfq_data *cfqd) +{ + struct cfq_queue *cfqq = NULL; + + /* + * if current list is non-empty, grab first entry. if it is empty, + * get next prio level and grab first entry then if any are spliced + */ + if (!list_empty(&cfqd->cur_rr) || cfq_get_next_prio_level(cfqd) != -1) + cfqq = list_entry_cfqq(cfqd->cur_rr.next); + + /* + * if we have idle queues and no rt or be queues had pending + * requests, either allow immediate service if the grace period + * has passed or arm the idle grace timer + */ + if (!cfqq && !list_empty(&cfqd->idle_rr)) { + unsigned long end = cfqd->last_end_request + CFQ_IDLE_GRACE; + + if (time_after_eq(jiffies, end)) + cfqq = list_entry_cfqq(cfqd->idle_rr.next); + else + mod_timer(&cfqd->idle_class_timer, end); + } + + __cfq_set_active_queue(cfqd, cfqq); +} + +/* + * current cfqq expired its slice (or was too idle), select new one + */ +static inline void cfq_slice_expired(struct cfq_data *cfqd, int preempted) +{ + struct cfq_queue *cfqq = cfqd->active_queue; + + if (cfqq) { + unsigned long now = jiffies; + + if (cfqq->wait_request) + del_timer(&cfqd->idle_slice_timer); + + if (!preempted && !cfqq->in_flight) + cfqq->service_last = now; + + cfqq->must_dispatch = 0; + cfqq->wait_request = 0; + + /* + * store what was left of this slice, if the queue idled out + * or was preempted + */ + if (time_after(now, cfqq->slice_end)) + cfqq->slice_left = now - cfqq->slice_end; + else + cfqq->slice_left = 0; + + if (cfqq->on_rr) + cfq_resort_rr_list(cfqq, preempted); + + cfqd->active_queue = NULL; + + if (cfqd->active_cic) { + put_io_context(cfqd->active_cic->ioc); + cfqd->active_cic = NULL; + } + } + + cfqd->dispatch_slice = 0; +} + +static int cfq_arm_slice_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq) + +{ + WARN_ON(!RB_EMPTY(&cfqq->sort_list)); + WARN_ON(cfqq != cfqd->active_queue); + + /* + * idle is disabled, either manually or by past process history + */ + if (!cfqd->cfq_slice_idle) + return 0; + if (!cfqq->idle_window) + return 0; + /* + * task has exited, don't wait + */ + if (cfqd->active_cic && !cfqd->active_cic->ioc->task) + return 0; + + cfqq->wait_request = 1; + cfqq->must_alloc = 1; + + if (!timer_pending(&cfqd->idle_slice_timer)) { + unsigned long slice_left = cfqq->slice_end - 1; + + cfqd->idle_slice_timer.expires = min(jiffies + cfqd->cfq_slice_idle, slice_left); + add_timer(&cfqd->idle_slice_timer); + } + + return 1; } /* @@ -738,31 +890,39 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq) struct request *__rq; sector_t last; - cfq_del_crq_rb(crq); - cfq_remove_merge_hints(q, crq); list_del(&crq->request->queuelist); last = cfqd->last_sector; - while ((entry = entry->prev) != head) { - __rq = list_entry_rq(entry); + list_for_each_entry_reverse(__rq, head, queuelist) { + struct cfq_rq *__crq = RQ_DATA(__rq); - if (blk_barrier_rq(crq->request)) + if (blk_barrier_rq(__rq)) break; - if (!blk_fs_request(crq->request)) + if (!blk_fs_request(__rq)) + break; + if (__crq->requeued) break; - if (crq->request->sector > __rq->sector) + if (__rq->sector <= crq->request->sector) break; if (__rq->sector > last && crq->request->sector < last) { - last = crq->request->sector; + last = crq->request->sector + crq->request->nr_sectors; break; } + entry = &__rq->queuelist; } cfqd->last_sector = last; + + cfqq->next_crq = cfq_find_next_crq(cfqd, cfqq, crq); + + cfq_del_crq_rb(crq); + cfq_remove_merge_hints(q, crq); + crq->in_flight = 1; + crq->requeued = 0; cfqq->in_flight++; - list_add(&crq->request->queuelist, entry); + list_add_tail(&crq->request->queuelist, entry); } /* @@ -771,105 +931,176 @@ static void cfq_dispatch_sort(request_queue_t *q, struct cfq_rq *crq) static inline struct cfq_rq *cfq_check_fifo(struct cfq_queue *cfqq) { struct cfq_data *cfqd = cfqq->cfqd; - const int reads = !list_empty(&cfqq->fifo[0]); - const int writes = !list_empty(&cfqq->fifo[1]); - unsigned long now = jiffies; + struct request *rq; struct cfq_rq *crq; - if (time_before(now, cfqq->last_fifo_expire + cfqd->cfq_fifo_batch_expire)) + if (cfqq->fifo_expire) return NULL; - crq = RQ_DATA(list_entry(cfqq->fifo[0].next, struct request, queuelist)); - if (reads && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_r)) { - cfqq->last_fifo_expire = now; - return crq; - } + if (!list_empty(&cfqq->fifo)) { + int fifo = cfq_cfqq_sync(cfqq); - crq = RQ_DATA(list_entry(cfqq->fifo[1].next, struct request, queuelist)); - if (writes && time_after(now, crq->queue_start + cfqd->cfq_fifo_expire_w)) { - cfqq->last_fifo_expire = now; - return crq; + crq = RQ_DATA(list_entry_fifo(cfqq->fifo.next)); + rq = crq->request; + if (time_after(jiffies, rq->start_time + cfqd->cfq_fifo_expire[fifo])) { + cfqq->fifo_expire = 1; + return crq; + } } return NULL; } /* - * dispatch a single request from given queue + * Scale schedule slice based on io priority */ +static inline int +cfq_prio_to_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + const int base_slice = cfqd->cfq_slice[cfq_cfqq_sync(cfqq)]; + + WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); + + return base_slice + (base_slice/CFQ_SLICE_SCALE * (4 - cfqq->ioprio)); +} + static inline void -cfq_dispatch_request(request_queue_t *q, struct cfq_data *cfqd, - struct cfq_queue *cfqq) +cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - struct cfq_rq *crq; + cfqq->slice_end = cfq_prio_to_slice(cfqd, cfqq) + jiffies; +} + +static inline int +cfq_prio_to_maxrq(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + const int base_rq = cfqd->cfq_slice_async_rq; + + WARN_ON(cfqq->ioprio >= IOPRIO_BE_NR); + + return 2 * (base_rq + base_rq * (CFQ_PRIO_LISTS - 1 - cfqq->ioprio)); +} + +/* + * get next queue for service + */ +static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd, int force) +{ + unsigned long now = jiffies; + struct cfq_queue *cfqq; + + cfqq = cfqd->active_queue; + if (!cfqq) + goto new_queue; /* - * follow expired path, else get first next available + * slice has expired */ - if ((crq = cfq_check_fifo(cfqq)) == NULL) { - if (cfqd->find_best_crq) - crq = cfqq->next_crq; - else - crq = rb_entry_crq(rb_first(&cfqq->sort_list)); - } - - cfqd->last_sector = crq->request->sector + crq->request->nr_sectors; + if (!cfqq->must_dispatch && time_after(jiffies, cfqq->slice_end)) + goto new_queue; /* - * finally, insert request into driver list + * if queue has requests, dispatch one. if not, check if + * enough slice is left to wait for one */ - cfq_dispatch_sort(q, crq); + if (!RB_EMPTY(&cfqq->sort_list)) + goto keep_queue; + else if (!force && cfq_cfqq_sync(cfqq) && + time_before(now, cfqq->slice_end)) { + if (cfq_arm_slice_timer(cfqd, cfqq)) + return NULL; + } + +new_queue: + cfq_slice_expired(cfqd, 0); + cfq_set_active_queue(cfqd); +keep_queue: + return cfqd->active_queue; } -static int cfq_dispatch_requests(request_queue_t *q, int max_dispatch) +static int +__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, + int max_dispatch) { - struct cfq_data *cfqd = q->elevator->elevator_data; - struct cfq_queue *cfqq; - struct list_head *entry, *tmp; - int queued, busy_queues, first_round; + int dispatched = 0; - if (list_empty(&cfqd->rr_list)) - return 0; + BUG_ON(RB_EMPTY(&cfqq->sort_list)); - queued = 0; - first_round = 1; -restart: - busy_queues = 0; - list_for_each_safe(entry, tmp, &cfqd->rr_list) { - cfqq = list_entry_cfqq(entry); + do { + struct cfq_rq *crq; - BUG_ON(RB_EMPTY(&cfqq->sort_list)); + /* + * follow expired path, else get first next available + */ + if ((crq = cfq_check_fifo(cfqq)) == NULL) + crq = cfqq->next_crq; /* - * first round of queueing, only select from queues that - * don't already have io in-flight + * finally, insert request into driver dispatch list */ - if (first_round && cfqq->in_flight) - continue; + cfq_dispatch_sort(cfqd->queue, crq); - cfq_dispatch_request(q, cfqd, cfqq); + cfqd->dispatch_slice++; + dispatched++; - if (!RB_EMPTY(&cfqq->sort_list)) - busy_queues++; + if (!cfqd->active_cic) { + atomic_inc(&crq->io_context->ioc->refcount); + cfqd->active_cic = crq->io_context; + } - queued++; - } + if (RB_EMPTY(&cfqq->sort_list)) + break; + + } while (dispatched < max_dispatch); + + /* + * if slice end isn't set yet, set it. if at least one request was + * sync, use the sync time slice value + */ + if (!cfqq->slice_end) + cfq_set_prio_slice(cfqd, cfqq); + + /* + * expire an async queue immediately if it has used up its slice. idle + * queue always expire after 1 dispatch round. + */ + if ((!cfq_cfqq_sync(cfqq) && + cfqd->dispatch_slice >= cfq_prio_to_maxrq(cfqd, cfqq)) || + cfq_class_idle(cfqq)) + cfq_slice_expired(cfqd, 0); + + return dispatched; +} + +static int +cfq_dispatch_requests(request_queue_t *q, int max_dispatch, int force) +{ + struct cfq_data *cfqd = q->elevator->elevator_data; + struct cfq_queue *cfqq; - if ((queued < max_dispatch) && (busy_queues || first_round)) { - first_round = 0; - goto restart; + if (!cfqd->busy_queues) + return 0; + + cfqq = cfq_select_queue(cfqd, force); + if (cfqq) { + cfqq->wait_request = 0; + cfqq->must_dispatch = 0; + del_timer(&cfqd->idle_slice_timer); + + if (cfq_class_idle(cfqq)) + max_dispatch = 1; + + return __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); } - return queued; + return 0; } static inline void cfq_account_dispatch(struct cfq_rq *crq) { struct cfq_queue *cfqq = crq->cfq_queue; struct cfq_data *cfqd = cfqq->cfqd; - unsigned long now, elapsed; - if (!blk_fs_request(crq->request)) + if (unlikely(!blk_fs_request(crq->request))) return; /* @@ -879,65 +1110,34 @@ static inline void cfq_account_dispatch(struct cfq_rq *crq) if (crq->accounted) return; - now = jiffies; - if (cfqq->service_start == ~0UL) - cfqq->service_start = now; - - /* - * on drives with tagged command queueing, command turn-around time - * doesn't necessarily reflect the time spent processing this very - * command inside the drive. so do the accounting differently there, - * by just sorting on the number of requests - */ - if (cfqd->cfq_tagged) { - if (time_after(now, cfqq->service_start + cfq_service)) { - cfqq->service_start = now; - cfqq->service_used /= 10; - } - - cfqq->service_used++; - cfq_sort_rr_list(cfqq, 0); - } - - elapsed = now - crq->queue_start; - if (elapsed > max_elapsed_dispatch) - max_elapsed_dispatch = elapsed; - crq->accounted = 1; - crq->service_start = now; - - if (++cfqd->rq_in_driver >= CFQ_MAX_TAG && !cfqd->cfq_tagged) { - cfqq->cfqd->cfq_tagged = 1; - printk("cfq: depth %d reached, tagging now on\n", CFQ_MAX_TAG); - } + cfqd->rq_in_driver++; } static inline void cfq_account_completion(struct cfq_queue *cfqq, struct cfq_rq *crq) { struct cfq_data *cfqd = cfqq->cfqd; + unsigned long now; if (!crq->accounted) return; + now = jiffies; + WARN_ON(!cfqd->rq_in_driver); cfqd->rq_in_driver--; - if (!cfqd->cfq_tagged) { - unsigned long now = jiffies; - unsigned long duration = now - crq->service_start; - - if (time_after(now, cfqq->service_start + cfq_service)) { - cfqq->service_start = now; - cfqq->service_used >>= 3; - } - - cfqq->service_used += duration; - cfq_sort_rr_list(cfqq, 0); + if (!cfq_class_idle(cfqq)) + cfqd->last_end_request = now; - if (duration > max_elapsed_crq) - max_elapsed_crq = duration; + if (!cfqq->in_flight && cfqq->on_rr) { + cfqq->service_last = now; + cfq_resort_rr_list(cfqq, 0); } + + if (crq->is_sync) + crq->io_context->last_end_request = now; } static struct request *cfq_next_request(request_queue_t *q) @@ -950,7 +1150,15 @@ static struct request *cfq_next_request(request_queue_t *q) dispatch: rq = list_entry_rq(q->queue_head.next); - if ((crq = RQ_DATA(rq)) != NULL) { + crq = RQ_DATA(rq); + if (crq) { + /* + * if idle window is disabled, allow queue buildup + */ + if (!crq->in_flight && !crq->cfq_queue->idle_window && + cfqd->rq_in_driver >= cfqd->cfq_max_depth) + return NULL; + cfq_remove_merge_hints(q, crq); cfq_account_dispatch(crq); } @@ -958,7 +1166,7 @@ dispatch: return rq; } - if (cfq_dispatch_requests(q, cfqd->cfq_quantum)) + if (cfq_dispatch_requests(q, cfqd->cfq_quantum, 0)) goto dispatch; return NULL; @@ -972,14 +1180,22 @@ dispatch: */ static void cfq_put_queue(struct cfq_queue *cf |