diff options
77 files changed, 704 insertions, 903 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block index d2f90334bb9..4873c759d53 100644 --- a/Documentation/ABI/testing/sysfs-block +++ b/Documentation/ABI/testing/sysfs-block @@ -128,3 +128,17 @@ Description: preferred request size for workloads where sustained throughput is desired. If no optimal I/O size is reported this file contains 0. + +What: /sys/block/<disk>/queue/nomerges +Date: January 2010 +Contact: +Description: + Standard I/O elevator operations include attempts to + merge contiguous I/Os. For known random I/O loads these + attempts will always fail and result in extra cycles + being spent in the kernel. This allows one to turn off + this behavior on one of two ways: When set to 1, complex + merge checks are disabled, but the simple one-shot merges + with the previous I/O request are enabled. When set to 2, + all merge tries are disabled. The default value is 0 - + which enables all types of merge tries. diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt index e164403f60e..f65274081c8 100644 --- a/Documentation/block/queue-sysfs.txt +++ b/Documentation/block/queue-sysfs.txt @@ -25,11 +25,11 @@ size allowed by the hardware. nomerges (RW) ------------- -This enables the user to disable the lookup logic involved with IO merging -requests in the block layer. Merging may still occur through a direct -1-hit cache, since that comes for (almost) free. The IO scheduler will not -waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults -to 0, enabling all merges. +This enables the user to disable the lookup logic involved with IO +merging requests in the block layer. By default (0) all merges are +enabled. When set to 1 only simple one-hit merges will be tried. When +set to 2 no merge algorithms will be tried (including one-hit or more +complex tree/hash lookups). nr_requests (RW) ---------------- diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index 5ff554677f4..c1ff6903b62 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -747,7 +747,7 @@ static int ubd_open_dev(struct ubd *ubd_dev) ubd_dev->fd = fd; if(ubd_dev->cow.file != NULL){ - blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long)); + blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long)); err = -ENOMEM; ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len); @@ -849,7 +849,7 @@ static int ubd_add(int n, char **error_out) } ubd_dev->queue->queuedata = ubd_dev; - blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG); + blk_queue_max_segments(ubd_dev->queue, MAX_SG); err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]); if(err){ *error_out = "Failed to register device"; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e7dbbaf5fb3..c85d74cae20 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -23,20 +23,6 @@ static LIST_HEAD(blkio_list); struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; EXPORT_SYMBOL_GPL(blkio_root_cgroup); -bool blkiocg_css_tryget(struct blkio_cgroup *blkcg) -{ - if (!css_tryget(&blkcg->css)) - return false; - return true; -} -EXPORT_SYMBOL_GPL(blkiocg_css_tryget); - -void blkiocg_css_put(struct blkio_cgroup *blkcg) -{ - css_put(&blkcg->css); -} -EXPORT_SYMBOL_GPL(blkiocg_css_put); - struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id), diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 4d316df863b..84bf745fa77 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -43,9 +43,6 @@ struct blkio_group { unsigned long sectors; }; -extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg); -extern void blkiocg_css_put(struct blkio_cgroup *blkcg); - typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg, unsigned int weight); diff --git a/block/blk-core.c b/block/blk-core.c index d1a9a0a64f9..9fe174dc74d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1490,9 +1490,9 @@ end_io: /* * We only want one ->make_request_fn to be active at a time, * else stack usage with stacked devices could be a problem. - * So use current->bio_{list,tail} to keep a list of requests + * So use current->bio_list to keep a list of requests * submited by a make_request_fn function. - * current->bio_tail is also used as a flag to say if + * current->bio_list is also used as a flag to say if * generic_make_request is currently active in this task or not. * If it is NULL, then no make_request is active. If it is non-NULL, * then a make_request is active, and new requests should be added @@ -1500,11 +1500,11 @@ end_io: */ void generic_make_request(struct bio *bio) { - if (current->bio_tail) { + struct bio_list bio_list_on_stack; + + if (current->bio_list) { /* make_request is active */ - *(current->bio_tail) = bio; - bio->bi_next = NULL; - current->bio_tail = &bio->bi_next; + bio_list_add(current->bio_list, bio); return; } /* following loop may be a bit non-obvious, and so deserves some @@ -1512,30 +1512,27 @@ void generic_make_request(struct bio *bio) * Before entering the loop, bio->bi_next is NULL (as all callers * ensure that) so we have a list with a single bio. * We pretend that we have just taken it off a longer list, so - * we assign bio_list to the next (which is NULL) and bio_tail - * to &bio_list, thus initialising the bio_list of new bios to be + * we assign bio_list to a pointer to the bio_list_on_stack, + * thus initialising the bio_list of new bios to be * added. __generic_make_request may indeed add some more bios * through a recursive call to generic_make_request. If it * did, we find a non-NULL value in bio_list and re-enter the loop * from the top. In this case we really did just take the bio - * of the top of the list (no pretending) and so fixup bio_list and - * bio_tail or bi_next, and call into __generic_make_request again. + * of the top of the list (no pretending) and so remove it from + * bio_list, and call into __generic_make_request again. * * The loop was structured like this to make only one call to * __generic_make_request (which is important as it is large and * inlined) and to keep the structure simple. */ BUG_ON(bio->bi_next); + bio_list_init(&bio_list_on_stack); + current->bio_list = &bio_list_on_stack; do { - current->bio_list = bio->bi_next; - if (bio->bi_next == NULL) - current->bio_tail = ¤t->bio_list; - else - bio->bi_next = NULL; __generic_make_request(bio); - bio = current->bio_list; + bio = bio_list_pop(current->bio_list); } while (bio); - current->bio_tail = NULL; /* deactivate */ + current->bio_list = NULL; /* deactivate */ } EXPORT_SYMBOL(generic_make_request); @@ -1617,8 +1614,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq) * limitation. */ blk_recalc_rq_segments(rq); - if (rq->nr_phys_segments > queue_max_phys_segments(q) || - rq->nr_phys_segments > queue_max_hw_segments(q)) { + if (rq->nr_phys_segments > queue_max_segments(q)) { printk(KERN_ERR "%s: over max segments limit.\n", __func__); return -EIO; } diff --git a/block/blk-ioc.c b/block/blk-ioc.c index 98e6bf61b0a..3f65c8aadb2 100644 --- a/block/blk-ioc.c +++ b/block/blk-ioc.c @@ -91,7 +91,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node) spin_lock_init(&ret->lock); ret->ioprio_changed = 0; ret->ioprio = 0; - ret->last_waited = jiffies; /* doesn't matter... */ + ret->last_waited = 0; /* doesn't matter... */ ret->nr_batch_requests = 0; /* because this is 0 */ INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH); INIT_HLIST_HEAD(&ret->cic_list); diff --git a/block/blk-merge.c b/block/blk-merge.c index 99cb5cf1f44..5e7dc997345 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -206,8 +206,7 @@ static inline int ll_new_hw_segment(struct request_queue *q, { int nr_phys_segs = bio_phys_segments(q, bio); - if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) || - req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) { + if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) { req->cmd_flags |= REQ_NOMERGE; if (req == q->last_merge) q->last_merge = NULL; @@ -300,10 +299,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, total_phys_segments--; } - if (total_phys_segments > queue_max_phys_segments(q)) - return 0; - - if (total_phys_segments > queue_max_hw_segments(q)) + if (total_phys_segments > queue_max_segments(q)) return 0; /* Merge is OK... */ diff --git a/block/blk-settings.c b/block/blk-settings.c index 5eeb9e0d256..31e7a9375c1 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -91,10 +91,9 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy); */ void blk_set_default_limits(struct queue_limits *lim) { - lim->max_phys_segments = MAX_PHYS_SEGMENTS; - lim->max_hw_segments = MAX_HW_SEGMENTS; + lim->max_segments = BLK_MAX_SEGMENTS; lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; - lim->max_segment_size = MAX_SEGMENT_SIZE; + lim->max_segment_size = BLK_MAX_SEGMENT_SIZE; lim->max_sectors = BLK_DEF_MAX_SECTORS; lim->max_hw_sectors = INT_MAX; lim->max_discard_sectors = 0; @@ -154,7 +153,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn) q->unplug_timer.data = (unsigned long)q; blk_set_default_limits(&q->limits); - blk_queue_max_sectors(q, SAFE_MAX_SECTORS); + blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS); /* * If the caller didn't supply a lock, fall back to our embedded @@ -210,37 +209,32 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) EXPORT_SYMBOL(blk_queue_bounce_limit); /** - * blk_queue_max_sectors - set max sectors for a request for this queue + * blk_queue_max_hw_sectors - set max sectors for a request for this queue * @q: the request queue for the device - * @max_sectors: max sectors in the usual 512b unit + * @max_hw_sectors: max hardware sectors in the usual 512b unit * * Description: - * Enables a low level driver to set an upper limit on the size of - * received requests. + * Enables a low level driver to set a hard upper limit, + * max_hw_sectors, on the size of requests. max_hw_sectors is set by + * the device driver based upon the combined capabilities of I/O + * controller and storage device. + * + * max_sectors is a soft limit imposed by the block layer for + * filesystem type requests. This value can be overridden on a + * per-device basis in /sys/block/<device>/queue/max_sectors_kb. + * The soft limit can not exceed max_hw_sectors. **/ -void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors) +void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors) { - if ((max_sectors << 9) < PAGE_CACHE_SIZE) { - max_sectors = 1 << (PAGE_CACHE_SHIFT - 9); + if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) { + max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9); printk(KERN_INFO "%s: set to minimum %d\n", - __func__, max_sectors); + __func__, max_hw_sectors); } - if (BLK_DEF_MAX_SECTORS > max_sectors) - q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors; - else { - q->limits.max_sectors = BLK_DEF_MAX_SECTORS; - q->limits.max_hw_sectors = max_sectors; - } -} -EXPORT_SYMBOL(blk_queue_max_sectors); - -void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors) -{ - if (BLK_DEF_MAX_SECTORS > max_sectors) - q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS; - else - q->limits.max_hw_sectors = max_sectors; + q->limits.max_hw_sectors = max_hw_sectors; + q->limits.max_sectors = min_t(unsigned int, max_hw_sectors, + BLK_DEF_MAX_SECTORS); } EXPORT_SYMBOL(blk_queue_max_hw_sectors); @@ -257,17 +251,15 @@ void blk_queue_max_discard_sectors(struct request_queue *q, EXPORT_SYMBOL(blk_queue_max_discard_sectors); /** - * blk_queue_max_phys_segments - set max phys segments for a request for this queue + * blk_queue_max_segments - set max hw segments for a request for this queue * @q: the request queue for the device * @max_segments: max number of segments * * Description: * Enables a low level driver to set an upper limit on the number of - * physical data segments in a request. This would be the largest sized - * scatter list the driver could handle. + * hw data segments in a request. **/ -void blk_queue_max_phys_segments(struct request_queue *q, - unsigned short max_segments) +void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments) { if (!max_segments) { max_segments = 1; @@ -275,33 +267,9 @@ void blk_queue_max_phys_segments(struct request_queue *q, __func__, max_segments); } - q->limits.max_phys_segments = max_segments; + q->limits.max_segments = max_segments; } -EXPORT_SYMBOL(blk_queue_max_phys_segments); - -/** - * blk_queue_max_hw_segments - set max hw segments for a request for this queue - * @q: the request queue for the device - * @max_segments: max number of segments - * - * Description: - * Enables a low level driver to set an upper limit on the number of - * hw data segments in a request. This would be the largest number of - * address/length pairs the host adapter can actually give at once - * to the device. - **/ -void blk_queue_max_hw_segments(struct request_queue *q, - unsigned short max_segments) -{ - if (!max_segments) { - max_segments = 1; - printk(KERN_INFO "%s: set to minimum %d\n", - __func__, max_segments); - } - - q->limits.max_hw_segments = max_segments; -} -EXPORT_SYMBOL(blk_queue_max_hw_segments); +EXPORT_SYMBOL(blk_queue_max_segments); /** * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg @@ -507,7 +475,7 @@ static unsigned int lcm(unsigned int a, unsigned int b) * blk_stack_limits - adjust queue_limits for stacked devices * @t: the stacking driver limits (top device) * @b: the underlying queue limits (bottom, component device) - * @offset: offset to beginning of data within component device + * @start: first data sector within component device * * Description: * This function is used by stacking drivers like MD and DM to ensure @@ -525,10 +493,9 @@ static unsigned int lcm(unsigned int a, unsigned int b) * the alignment_offset is undefined. */ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, - sector_t offset) + sector_t start) { - sector_t alignment; - unsigned int top, bottom, ret = 0; + unsigned int top, bottom, alignment, ret = 0; t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors); t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors); @@ -537,18 +504,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, b->seg_boundary_mask); - t->max_phys_segments = min_not_zero(t->max_phys_segments, - b->max_phys_segments); - - t->max_hw_segments = min_not_zero(t->max_hw_segments, - b->max_hw_segments); + t->max_segments = min_not_zero(t->max_segments, b->max_segments); t->max_segment_size = min_not_zero(t->max_segment_size, b->max_segment_size); t->misaligned |= b->misaligned; - alignment = queue_limit_alignment_offset(b, offset); + alignment = queue_limit_alignment_offset(b, start); /* Bottom device has different alignment. Check that it is * compatible with the current top alignment. @@ -611,11 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, /* Discard alignment and granularity */ if (b->discard_granularity) { - unsigned int granularity = b->discard_granularity; - offset &= granularity - 1; - - alignment = (granularity + b->discard_alignment - offset) - & (granularity - 1); + alignment = queue_limit_discard_alignment(b, start); if (t->discard_granularity != 0 && t->discard_alignment != alignment) { @@ -657,7 +616,7 @@ int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, start += get_start_sect(bdev); - return blk_stack_limits(t, &bq->limits, start << 9); + return blk_stack_limits(t, &bq->limits, start); } EXPORT_SYMBOL(bdev_stack_limits); @@ -668,9 +627,8 @@ EXPORT_SYMBOL(bdev_stack_limits); * @offset: offset to beginning of data within component device * * Description: - * Merges the limits for two queues. Returns 0 if alignment - * didn't change. Returns -1 if adding the bottom device caused - * misalignment. + * Merges the limits for a top level gendisk and a bottom level + * block_device. */ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset) @@ -678,9 +636,7 @@ void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, struct request_queue *t = disk->queue; struct request_queue *b = bdev_get_queue(bdev); - offset += get_start_sect(bdev) << 9; - - if (blk_stack_limits(&t->limits, &b->limits, offset) < 0) { |