diff options
author | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-06-10 13:47:26 -0500 |
---|---|---|
committer | James Bottomley <jejb@mulgrave.il.steeleye.com> | 2006-06-10 13:47:26 -0500 |
commit | f0cd91a68acdc9b49d7f6738b514a426da627649 (patch) | |
tree | 8ad73564015794197583b094217ae0a71e71e753 /block | |
parent | 60eef25701d25e99c991dd0f4a9f3832a0c3ad3e (diff) | |
parent | 128e6ced247cda88f96fa9f2e4ba8b2c4a681560 (diff) |
Merge ../linux-2.6
Diffstat (limited to 'block')
-rw-r--r-- | block/as-iosched.c | 18 | ||||
-rw-r--r-- | block/cfq-iosched.c | 134 | ||||
-rw-r--r-- | block/deadline-iosched.c | 13 | ||||
-rw-r--r-- | block/elevator.c | 65 | ||||
-rw-r--r-- | block/ll_rw_blk.c | 30 | ||||
-rw-r--r-- | block/noop-iosched.c | 7 |
6 files changed, 178 insertions, 89 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c index 296708ceceb..a7caf35ca0c 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1648,17 +1648,17 @@ static void as_exit_queue(elevator_t *e) * initialize elevator private data (as_data), and alloc a arq for * each request on the free lists */ -static int as_init_queue(request_queue_t *q, elevator_t *e) +static void *as_init_queue(request_queue_t *q, elevator_t *e) { struct as_data *ad; int i; if (!arq_pool) - return -ENOMEM; + return NULL; ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); if (!ad) - return -ENOMEM; + return NULL; memset(ad, 0, sizeof(*ad)); ad->q = q; /* Identify what queue the data belongs to */ @@ -1667,7 +1667,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) GFP_KERNEL, q->node); if (!ad->hash) { kfree(ad); - return -ENOMEM; + return NULL; } ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, @@ -1675,7 +1675,7 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) if (!ad->arq_pool) { kfree(ad->hash); kfree(ad); - return -ENOMEM; + return NULL; } /* anticipatory scheduling helpers */ @@ -1696,14 +1696,13 @@ static int as_init_queue(request_queue_t *q, elevator_t *e) ad->antic_expire = default_antic_expire; ad->batch_expire[REQ_SYNC] = default_read_batch_expire; ad->batch_expire[REQ_ASYNC] = default_write_batch_expire; - e->elevator_data = ad; ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; ad->write_batch_count = ad->batch_expire[REQ_ASYNC] / 10; if (ad->write_batch_count < 2) ad->write_batch_count = 2; - return 0; + return ad; } /* @@ -1844,9 +1843,10 @@ static void __exit as_exit(void) DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_as); ioc_gone = &all_gone; - barrier(); + /* ioc_gone's update must be visible before reading ioc_count */ + smp_wmb(); if (atomic_read(&ioc_count)) - complete(ioc_gone); + wait_for_completion(ioc_gone); synchronize_rcu(); kmem_cache_destroy(arq_pool); } diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 67d446de022..a46d030e092 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -33,7 +33,7 @@ static int cfq_slice_idle = HZ / 70; #define CFQ_KEY_ASYNC (0) -static DEFINE_RWLOCK(cfq_exit_lock); +static DEFINE_SPINLOCK(cfq_exit_lock); /* * for the hash of cfqq inside the cfqd @@ -133,6 +133,7 @@ struct cfq_data { mempool_t *crq_pool; int rq_in_driver; + int hw_tag; /* * schedule slice state info @@ -500,10 +501,13 @@ static void cfq_resort_rr_list(struct cfq_queue *cfqq, int preempted) /* * if queue was preempted, just add to front to be fair. busy_rr - * isn't sorted. + * isn't sorted, but insert at the back for fairness. */ if (preempted || list == &cfqd->busy_rr) { - list_add(&cfqq->cfq_list, list); + if (preempted) + list = list->prev; + + list_add_tail(&cfqq->cfq_list, list); return; } @@ -664,6 +668,15 @@ static void cfq_activate_request(request_queue_t *q, struct request *rq) struct cfq_data *cfqd = q->elevator->elevator_data; cfqd->rq_in_driver++; + + /* + * If the depth is larger 1, it really could be queueing. But lets + * make the mark a little higher - idling could still be good for + * low queueing, and a low queueing number could also just indicate + * a SCSI mid layer like behaviour where limit+1 is often seen. + */ + if (!cfqd->hw_tag && cfqd->rq_in_driver > 4) + cfqd->hw_tag = 1; } static void cfq_deactivate_request(request_queue_t *q, struct request *rq) @@ -879,6 +892,13 @@ static struct cfq_queue *cfq_set_active_queue(struct cfq_data *cfqd) cfqq = list_entry_cfqq(cfqd->cur_rr.next); /* + * If no new queues are available, check if the busy list has some + * before falling back to idle io. + */ + if (!cfqq && !list_empty(&cfqd->busy_rr)) + cfqq = list_entry_cfqq(cfqd->busy_rr.next); + + /* * if we have idle queues and no rt or be queues had pending * requests, either allow immediate service if the grace period * has passed or arm the idle grace timer @@ -1284,7 +1304,7 @@ static void cfq_exit_io_context(struct io_context *ioc) /* * put the reference this task is holding to the various queues */ - read_lock_irqsave(&cfq_exit_lock, flags); + spin_lock_irqsave(&cfq_exit_lock, flags); n = rb_first(&ioc->cic_root); while (n != NULL) { @@ -1294,7 +1314,7 @@ static void cfq_exit_io_context(struct io_context *ioc) n = rb_next(n); } - read_unlock_irqrestore(&cfq_exit_lock, flags); + spin_unlock_irqrestore(&cfq_exit_lock, flags); } static struct cfq_io_context * @@ -1400,17 +1420,17 @@ static int cfq_ioc_set_ioprio(struct io_context *ioc, unsigned int ioprio) struct cfq_io_context *cic; struct rb_node *n; - write_lock(&cfq_exit_lock); + spin_lock(&cfq_exit_lock); n = rb_first(&ioc->cic_root); while (n != NULL) { cic = rb_entry(n, struct cfq_io_context, rb_node); - + changed_ioprio(cic); n = rb_next(n); } - write_unlock(&cfq_exit_lock); + spin_unlock(&cfq_exit_lock); return 0; } @@ -1458,7 +1478,8 @@ retry: * set ->slice_left to allow preemption for a new process */ cfqq->slice_left = 2 * cfqd->cfq_slice_idle; - cfq_mark_cfqq_idle_window(cfqq); + if (!cfqd->hw_tag) + cfq_mark_cfqq_idle_window(cfqq); cfq_mark_cfqq_prio_changed(cfqq); cfq_init_prio_data(cfqq); } @@ -1472,19 +1493,38 @@ out: return cfqq; } +static void +cfq_drop_dead_cic(struct io_context *ioc, struct cfq_io_context *cic) +{ + spin_lock(&cfq_exit_lock); + rb_erase(&cic->rb_node, &ioc->cic_root); + list_del_init(&cic->queue_list); + spin_unlock(&cfq_exit_lock); + kmem_cache_free(cfq_ioc_pool, cic); + atomic_dec(&ioc_count); +} + static struct cfq_io_context * cfq_cic_rb_lookup(struct cfq_data *cfqd, struct io_context *ioc) { - struct rb_node *n = ioc->cic_root.rb_node; + struct rb_node *n; struct cfq_io_context *cic; - void *key = cfqd; + void *k, *key = cfqd; +restart: + n = ioc->cic_root.rb_node; while (n) { cic = rb_entry(n, struct cfq_io_context, rb_node); + /* ->key must be copied to avoid race with cfq_exit_queue() */ + k = cic->key; + if (unlikely(!k)) { + cfq_drop_dead_cic(ioc, cic); + goto restart; + } - if (key < cic->key) + if (key < k) n = n->rb_left; - else if (key > cic->key) + else if (key > k) n = n->rb_right; else return cic; @@ -1497,33 +1537,41 @@ static inline void cfq_cic_link(struct cfq_data *cfqd, struct io_context *ioc, struct cfq_io_context *cic) { - struct rb_node **p = &ioc->cic_root.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p; + struct rb_node *parent; struct cfq_io_context *__cic; - - read_lock(&cfq_exit_lock); + void *k; cic->ioc = ioc; cic->key = cfqd; ioc->set_ioprio = cfq_ioc_set_ioprio; - +restart: + parent = NULL; + p = &ioc->cic_root.rb_node; while (*p) { parent = *p; __cic = rb_entry(parent, struct cfq_io_context, rb_node); + /* ->key must be copied to avoid race with cfq_exit_queue() */ + k = __cic->key; + if (unlikely(!k)) { + cfq_drop_dead_cic(ioc, cic); + goto restart; + } - if (cic->key < __cic->key) + if (cic->key < k) p = &(*p)->rb_left; - else if (cic->key > __cic->key) + else if (cic->key > k) p = &(*p)->rb_right; else BUG(); } + spin_lock(&cfq_exit_lock); rb_link_node(&cic->rb_node, parent, p); rb_insert_color(&cic->rb_node, &ioc->cic_root); list_add(&cic->queue_list, &cfqd->cic_list); - read_unlock(&cfq_exit_lock); + spin_unlock(&cfq_exit_lock); } /* @@ -1622,7 +1670,7 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, { int enable_idle = cfq_cfqq_idle_window(cfqq); - if (!cic->ioc->task || !cfqd->cfq_slice_idle) + if (!cic->ioc->task || !cfqd->cfq_slice_idle || cfqd->hw_tag) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { if (cic->ttime_mean > cfqd->cfq_slice_idle) @@ -1713,14 +1761,24 @@ cfq_crq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, cfqq->next_crq = cfq_choose_req(cfqd, cfqq->next_crq, crq); + cic = crq->io_context; + /* * we never wait for an async request and we don't allow preemption * of an async request. so just return early */ - if (!cfq_crq_is_sync(crq)) + if (!cfq_crq_is_sync(crq)) { + /* + * sync process issued an async request, if it's waiting + * then expire it and kick rq handling. + */ + if (cic == cfqd->active_cic && + del_timer(&cfqd->idle_slice_timer)) { + cfq_slice_expired(cfqd, 0); + cfq_start_queueing(cfqd, cfqq); + } return; - - cic = crq->io_context; + } cfq_update_io_thinktime(cfqd, cic); cfq_update_io_seektime(cfqd, cic, crq); @@ -2138,10 +2196,9 @@ static void cfq_idle_class_timer(unsigned long data) * race with a non-idle queue, reset timer */ end = cfqd->last_end_request + CFQ_IDLE_GRACE; - if (!time_after_eq(jiffies, end)) { - cfqd->idle_class_timer.expires = end; - add_timer(&cfqd->idle_class_timer); - } else + if (!time_after_eq(jiffies, end)) + mod_timer(&cfqd->idle_class_timer, end); + else cfq_schedule_dispatch(cfqd); spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); @@ -2161,7 +2218,7 @@ static void cfq_exit_queue(elevator_t *e) cfq_shutdown_timer_wq(cfqd); - write_lock(&cfq_exit_lock); + spin_lock(&cfq_exit_lock); spin_lock_irq(q->queue_lock); if (cfqd->active_queue) @@ -2184,7 +2241,7 @@ static void cfq_exit_queue(elevator_t *e) } spin_unlock_irq(q->queue_lock); - write_unlock(&cfq_exit_lock); + spin_unlock(&cfq_exit_lock); cfq_shutdown_timer_wq(cfqd); @@ -2194,14 +2251,14 @@ static void cfq_exit_queue(elevator_t *e) kfree(cfqd); } -static int cfq_init_queue(request_queue_t *q, elevator_t *e) +static void *cfq_init_queue(request_queue_t *q, elevator_t *e) { struct cfq_data *cfqd; int i; cfqd = kmalloc(sizeof(*cfqd), GFP_KERNEL); if (!cfqd) - return -ENOMEM; + return NULL; memset(cfqd, 0, sizeof(*cfqd)); @@ -2231,8 +2288,6 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) for (i = 0; i < CFQ_QHASH_ENTRIES; i++) INIT_HLIST_HEAD(&cfqd->cfq_hash[i]); - e->elevator_data = cfqd; - cfqd->queue = q; cfqd->max_queued = q->nr_requests / 4; @@ -2259,14 +2314,14 @@ static int cfq_init_queue(request_queue_t *q, elevator_t *e) cfqd->cfq_slice_async_rq = cfq_slice_async_rq; cfqd->cfq_slice_idle = cfq_slice_idle; - return 0; + return cfqd; out_crqpool: kfree(cfqd->cfq_hash); out_cfqhash: kfree(cfqd->crq_hash); out_crqhash: kfree(cfqd); - return -ENOMEM; + return NULL; } static void cfq_slab_kill(void) @@ -2439,9 +2494,10 @@ static void __exit cfq_exit(void) DECLARE_COMPLETION(all_gone); elv_unregister(&iosched_cfq); ioc_gone = &all_gone; - barrier(); + /* ioc_gone's update must be visible before reading ioc_count */ + smp_wmb(); if (atomic_read(&ioc_count)) - complete(ioc_gone); + wait_for_completion(ioc_gone); synchronize_rcu(); cfq_slab_kill(); } diff --git a/block/deadline-iosched.c b/block/deadline-iosched.c index 399fa1e60e1..3bd0415a982 100644 --- a/block/deadline-iosched.c +++ b/block/deadline-iosched.c @@ -613,24 +613,24 @@ static void deadline_exit_queue(elevator_t *e) * initialize elevator private data (deadline_data), and alloc a drq for * each request on the free lists */ -static int deadline_init_queue(request_queue_t *q, elevator_t *e) +static void *deadline_init_queue(request_queue_t *q, elevator_t *e) { struct deadline_data *dd; int i; if (!drq_pool) - return -ENOMEM; + return NULL; dd = kmalloc_node(sizeof(*dd), GFP_KERNEL, q->node); if (!dd) - return -ENOMEM; + return NULL; memset(dd, 0, sizeof(*dd)); dd->hash = kmalloc_node(sizeof(struct list_head)*DL_HASH_ENTRIES, GFP_KERNEL, q->node); if (!dd->hash) { kfree(dd); - return -ENOMEM; + return NULL; } dd->drq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, @@ -638,7 +638,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e) if (!dd->drq_pool) { kfree(dd->hash); kfree(dd); - return -ENOMEM; + return NULL; } for (i = 0; i < DL_HASH_ENTRIES; i++) @@ -653,8 +653,7 @@ static int deadline_init_queue(request_queue_t *q, elevator_t *e) dd->writes_starved = writes_starved; dd->front_merges = 1; dd->fifo_batch = fifo_batch; - e->elevator_data = dd; - return 0; + return dd; } static void deadline_put_request(request_queue_t *q, struct request *rq) diff --git a/block/elevator.c b/block/elevator.c index 0d6be03d929..a0afdd317ce 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -121,16 +121,16 @@ static struct elevator_type *elevator_get(const char *name) return e; } -static int elevator_attach(request_queue_t *q, struct elevator_queue *eq) +static void *elevator_init_queue(request_queue_t *q, struct elevator_queue *eq) { - int ret = 0; + return eq->ops->elevator_init_fn(q, eq); +} +static void elevator_attach(request_queue_t *q, struct elevator_queue *eq, + void *data) +{ q->elevator = eq; - - if (eq->ops->elevator_init_fn) - ret = eq->ops->elevator_init_fn(q, eq); - - return ret; + eq->elevator_data = data; } static char chosen_elevator[16]; @@ -181,6 +181,7 @@ int elevator_init(request_queue_t *q, char *name) struct elevator_type *e = NULL; struct elevator_queue *eq; int ret = 0; + void *data; INIT_LIST_HEAD(&q->queue_head); q->last_merge = NULL; @@ -202,10 +203,13 @@ int elevator_init(request_queue_t *q, char *name) if (!eq) return -ENOMEM; - ret = elevator_attach(q, eq); - if (ret) + data = elevator_init_queue(q, eq); + if (!data) { kobject_put(&eq->kobj); + return -ENOMEM; + } + elevator_attach(q, eq, data); return ret; } @@ -333,6 +337,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) { struct list_head *pos; unsigned ordseq; + int unplug_it = 1; blk_add_trace_rq(q, rq, BLK_TA_INSERT); @@ -399,6 +404,11 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) } list_add_tail(&rq->queuelist, pos); + /* + * most requeues happen because of a busy condition, don't + * force unplug of the queue for that case. + */ + unplug_it = 0; break; default: @@ -407,7 +417,7 @@ void elv_insert(request_queue_t *q, struct request *rq, int where) BUG(); } - if (blk_queue_plugged(q)) { + if (unplug_it && blk_queue_plugged(q)) { int nrq = q->rq.count[READ] + q->rq.count[WRITE] - q->in_flight; @@ -716,13 +726,16 @@ int elv_register_queue(struct request_queue *q) return error; } +static void __elv_unregister_queue(elevator_t *e) +{ + kobject_uevent(&e->kobj, KOBJ_REMOVE); + kobject_del(&e->kobj); +} + void elv_unregister_queue(struct request_queue *q) { - if (q) { - elevator_t *e = q->elevator; - kobject_uevent(&e->kobj, KOBJ_REMOVE); - kobject_del(&e->kobj); - } + if (q) + __elv_unregister_queue(q->elevator); } int elv_register(struct elevator_type *e) @@ -774,6 +787,7 @@ EXPORT_SYMBOL_GPL(elv_unregister); static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) { elevator_t *old_elevator, *e; + void *data; /* * Allocate new elevator @@ -782,6 +796,12 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) if (!e) return 0; + data = elevator_init_queue(q, e); + if (!data) { + kobject_put(&e->kobj); + return 0; + } + /* * Turn on BYPASS and drain all requests w/ elevator private data */ @@ -800,19 +820,19 @@ static int elevator_switch(request_queue_t *q, struct elevator_type *new_e) elv_drain_elevator(q); } - spin_unlock_irq(q->queue_lock); - /* - * unregister old elevator data + * Remember old elevator. */ - elv_unregister_queue(q); old_elevator = q->elevator; /* * attach and start new elevator */ - if (elevator_attach(q, e)) - goto fail; + elevator_attach(q, e, data); + + spin_unlock_irq(q->queue_lock); + + __elv_unregister_queue(old_elevator); if (elv_register_queue(q)) goto fail_register; @@ -831,7 +851,6 @@ fail_register: */ elevator_exit(e); e = NULL; -fail: q->elevator = old_elevator; elv_register_queue(q); clear_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); @@ -895,10 +914,8 @@ ssize_t elv_iosched_show(request_queue_t *q, char *name) EXPORT_SYMBOL(elv_dispatch_sort); EXPORT_SYMBOL(elv_add_request); EXPORT_SYMBOL(__elv_add_request); -EXPORT_SYMBOL(elv_requeue_request); EXPORT_SYMBOL(elv_next_request); EXPORT_SYMBOL(elv_dequeue_request); EXPORT_SYMBOL(elv_queue_empty); -EXPORT_SYMBOL(elv_completed_request); EXPORT_SYMBOL(elevator_exit); EXPORT_SYMBOL(elevator_init); diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c index e112d1a5dab..7eb36c53f4b 100644 --- a/block/ll_rw_blk.c +++ b/block/ll_rw_blk.c @@ -1554,7 +1554,7 @@ void blk_plug_device(request_queue_t *q) * don't plug a stopped queue, it must be paired with blk_start_queue() * which will restart the queueing */ - if (test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags)) + if (blk_queue_stopped(q)) return; if (!test_and_set_bit(QUEUE_FLAG_PLUGGED, &q->queue_flags)) { @@ -1587,7 +1587,7 @@ EXPORT_SYMBOL(blk_remove_plug); */ void __generic_unplug_device(request_queue_t *q) { - if (unlikely(test_bit(QUEUE_FLAG_STOPPED, &q->queue_flags))) + if (unlikely(blk_queue_stopped(q))) return; if (!blk_remove_plug(q)) @@ -1732,8 +1732,21 @@ void blk_run_queue(struct request_queue *q) spin_lock_irqsave(q->queue_lock, flags); blk_remove_plug(q); - if (!elv_queue_empty(q)) - q->request_fn(q); + + /* + * Only recurse once to avoid overrunning the stack, let the unplug + * handling reinvoke the handler shortly if we already got there. + */ + if (!elv_queue_empty(q)) { + if (!test_and_set_bit(QUEUE_FLAG_REENTER, &q->queue_flags)) { + q->request_fn(q); + clear_bit(QUEUE_FLAG_REENTER, &q->queue_flags); + } else { + blk_plug_device(q); + kblockd_schedule_work(&q->unplug_work); + } + } + spin_unlock_irqrestore(q->queue_lock, flags); } EXPORT_SYMBOL(blk_run_queue); @@ -3385,7 +3398,7 @@ static int blk_cpu_notify(struct notifier_block *self, unsigned long action, } -static struct notifier_block __devinitdata blk_cpu_notifier = { +static struct notifier_block blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; @@ -3439,7 +3452,12 @@ void end_that_request_last(struct request *req, int uptodate) if (unlikely(laptop_mode) && blk_fs_request(req)) laptop_io_completion(); - if (disk && blk_fs_request(req)) { + /* + * Account IO completion. bar_rq isn't accounted as a normal + * IO on queueing nor completion. Accounting the containing + * request is enough. + */ + if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); diff --git a/block/noop-iosched.c b/block/noop-iosched.c index f370e4a7fe6..56a7c620574 100644 --- a/block/noop-iosched.c +++ b/block/noop-iosched.c @@ -65,16 +65,15 @@ noop_latter_request(request_queue_t *q, struct request *rq) return list_entry(rq->queuelist.next, struct request, queuelist); } -static int noop_init_queue(request_queue_t *q, elevator_t *e) +static void *noop_init_queue(request_queue_t *q, elevator_t *e) { struct noop_data *nd; nd = kmalloc(sizeof(*nd), GFP_KERNEL); if (!nd) - return -ENOMEM; + return NULL; INIT_LIST_HEAD(&nd->queue); - e->elevator_data = nd; - return 0; + return nd; } static void noop_exit_queue(elevator_t *e) |