Merge branch 'for-2.6.34' of git://git.kernel.dk/linux-2.6-block

* 'for-2.6.34' of git://git.kernel.dk/linux-2.6-block: (38 commits) block: don't access jiffies when initialising io_context cfq: remove 8 bytes of padding from cfq_rb_root on 64 bit builds block: fix for "Consolidate phys_segment and hw_segment limits" cfq-iosched: quantum check tweak blktrace: perform cleanup after setup error blkdev: fix merge_bvec_fn return value checks cfq-iosched: requests "in flight" vs "in driver" clarification cciss: Fix problem with scatter gather elements in the scsi half of the driver cciss: eliminate unnecessary pointer use in cciss scsi code cciss: do not use void pointer for scsi hba data cciss: factor out scatter gather chain block mapping code cciss: fix scatter gather chain block dma direction kludge cciss: simplify scatter gather code cciss: factor out scatter gather chain block allocation and freeing cciss: detect bad alignment of scsi commands at build time cciss: clarify command list padding calculation cfq-iosched: rethink seeky detection for SSDs cfq-iosched: rework seeky detection block: remove padding from io_context on 64bit builds block: Consolidate phys_segment and hw_segment limits ...
author: Linus Torvalds <torvalds@linux-foundation.org> 2010-03-01 09:00:29 -0800
committer: Linus Torvalds <torvalds@linux-foundation.org> 2010-03-01 09:00:29 -0800
commit: b1bf9368407ae7e89d8a005bb40beb70a41df539 (patch)
tree: 3815c8aab19c6c186736673c624fef5f3faab716
parent: 524df55725217b13d5a232fb5badb5846418ea0e (diff)
parent: 4671a1322052425afa38fcb7980d2fd2bb0fc99b (diff)
77 files changed, 704 insertions, 903 deletions
diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index d2f90334bb9..4873c759d53 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -128,3 +128,17 @@ Description:
 		preferred request size for workloads where sustained
 		throughput is desired.  If no optimal I/O size is
 		reported this file contains 0.
+
+What:		/sys/block/<disk>/queue/nomerges
+Date:		January 2010
+Contact:
+Description:
+		Standard I/O elevator operations include attempts to
+		merge contiguous I/Os. For known random I/O loads these
+		attempts will always fail and result in extra cycles
+		being spent in the kernel. This allows one to turn off
+		this behavior on one of two ways: When set to 1, complex
+		merge checks are disabled, but the simple one-shot merges
+		with the previous I/O request are enabled. When set to 2,
+		all merge tries are disabled. The default value is 0 -
+		which enables all types of merge tries.
diff --git a/Documentation/block/queue-sysfs.txt b/Documentation/block/queue-sysfs.txt
index e164403f60e..f65274081c8 100644
--- a/Documentation/block/queue-sysfs.txt
+++ b/Documentation/block/queue-sysfs.txt
@@ -25,11 +25,11 @@ size allowed by the hardware.
 
 nomerges (RW)
 -------------
-This enables the user to disable the lookup logic involved with IO merging
-requests in the block layer. Merging may still occur through a direct
-1-hit cache, since that comes for (almost) free. The IO scheduler will not
-waste cycles doing tree/hash lookups for merges if nomerges is 1. Defaults
-to 0, enabling all merges.
+This enables the user to disable the lookup logic involved with IO
+merging requests in the block layer. By default (0) all merges are
+enabled. When set to 1 only simple one-hit merges will be tried. When
+set to 2 no merge algorithms will be tried (including one-hit or more
+complex tree/hash lookups).
 
 nr_requests (RW)
 ----------------
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 5ff554677f4..c1ff6903b62 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -747,7 +747,7 @@ static int ubd_open_dev(struct ubd *ubd_dev)
 	ubd_dev->fd = fd;
 
 	if(ubd_dev->cow.file != NULL){
-		blk_queue_max_sectors(ubd_dev->queue, 8 * sizeof(long));
+		blk_queue_max_hw_sectors(ubd_dev->queue, 8 * sizeof(long));
 
 		err = -ENOMEM;
 		ubd_dev->cow.bitmap = vmalloc(ubd_dev->cow.bitmap_len);
@@ -849,7 +849,7 @@ static int ubd_add(int n, char **error_out)
 	}
 	ubd_dev->queue->queuedata = ubd_dev;
 
-	blk_queue_max_hw_segments(ubd_dev->queue, MAX_SG);
+	blk_queue_max_segments(ubd_dev->queue, MAX_SG);
 	err = ubd_disk_register(UBD_MAJOR, ubd_dev->size, n, &ubd_gendisk[n]);
 	if(err){
 		*error_out = "Failed to register device";
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e7dbbaf5fb3..c85d74cae20 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -23,20 +23,6 @@ static LIST_HEAD(blkio_list);
 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
 
-bool blkiocg_css_tryget(struct blkio_cgroup *blkcg)
-{
-	if (!css_tryget(&blkcg->css))
-		return false;
-	return true;
-}
-EXPORT_SYMBOL_GPL(blkiocg_css_tryget);
-
-void blkiocg_css_put(struct blkio_cgroup *blkcg)
-{
-	css_put(&blkcg->css);
-}
-EXPORT_SYMBOL_GPL(blkiocg_css_put);
-
 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
 {
 	return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 4d316df863b..84bf745fa77 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -43,9 +43,6 @@ struct blkio_group {
 	unsigned long sectors;
 };
 
-extern bool blkiocg_css_tryget(struct blkio_cgroup *blkcg);
-extern void blkiocg_css_put(struct blkio_cgroup *blkcg);
-
 typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
 typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
 						unsigned int weight);
diff --git a/block/blk-core.c b/block/blk-core.c
index d1a9a0a64f9..9fe174dc74d 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1490,9 +1490,9 @@ end_io:
 /*
  * We only want one ->make_request_fn to be active at a time,
  * else stack usage with stacked devices could be a problem.
- * So use current->bio_{list,tail} to keep a list of requests
+ * So use current->bio_list to keep a list of requests
  * submited by a make_request_fn function.
- * current->bio_tail is also used as a flag to say if
+ * current->bio_list is also used as a flag to say if
  * generic_make_request is currently active in this task or not.
  * If it is NULL, then no make_request is active.  If it is non-NULL,
  * then a make_request is active, and new requests should be added
@@ -1500,11 +1500,11 @@ end_io:
  */
 void generic_make_request(struct bio *bio)
 {
-	if (current->bio_tail) {
+	struct bio_list bio_list_on_stack;
+
+	if (current->bio_list) {
 		/* make_request is active */
-		*(current->bio_tail) = bio;
-		bio->bi_next = NULL;
-		current->bio_tail = &bio->bi_next;
+		bio_list_add(current->bio_list, bio);
 		return;
 	}
 	/* following loop may be a bit non-obvious, and so deserves some
@@ -1512,30 +1512,27 @@ void generic_make_request(struct bio *bio)
 	 * Before entering the loop, bio->bi_next is NULL (as all callers
 	 * ensure that) so we have a list with a single bio.
 	 * We pretend that we have just taken it off a longer list, so
-	 * we assign bio_list to the next (which is NULL) and bio_tail
-	 * to &bio_list, thus initialising the bio_list of new bios to be
+	 * we assign bio_list to a pointer to the bio_list_on_stack,
+	 * thus initialising the bio_list of new bios to be
 	 * added.  __generic_make_request may indeed add some more bios
 	 * through a recursive call to generic_make_request.  If it
 	 * did, we find a non-NULL value in bio_list and re-enter the loop
 	 * from the top.  In this case we really did just take the bio
-	 * of the top of the list (no pretending) and so fixup bio_list and
-	 * bio_tail or bi_next, and call into __generic_make_request again.
+	 * of the top of the list (no pretending) and so remove it from
+	 * bio_list, and call into __generic_make_request again.
 	 *
 	 * The loop was structured like this to make only one call to
 	 * __generic_make_request (which is important as it is large and
 	 * inlined) and to keep the structure simple.
 	 */
 	BUG_ON(bio->bi_next);
+	bio_list_init(&bio_list_on_stack);
+	current->bio_list = &bio_list_on_stack;
 	do {
-		current->bio_list = bio->bi_next;
-		if (bio->bi_next == NULL)
-			current->bio_tail = &current->bio_list;
-		else
-			bio->bi_next = NULL;
 		__generic_make_request(bio);
-		bio = current->bio_list;
+		bio = bio_list_pop(current->bio_list);
 	} while (bio);
-	current->bio_tail = NULL; /* deactivate */
+	current->bio_list = NULL; /* deactivate */
 }
 EXPORT_SYMBOL(generic_make_request);
 
@@ -1617,8 +1614,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq)
 	 * limitation.
 	 */
 	blk_recalc_rq_segments(rq);
-	if (rq->nr_phys_segments > queue_max_phys_segments(q) ||
-	    rq->nr_phys_segments > queue_max_hw_segments(q)) {
+	if (rq->nr_phys_segments > queue_max_segments(q)) {
 		printk(KERN_ERR "%s: over max segments limit.\n", __func__);
 		return -EIO;
 	}
diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index 98e6bf61b0a..3f65c8aadb2 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -91,7 +91,7 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 		spin_lock_init(&ret->lock);
 		ret->ioprio_changed = 0;
 		ret->ioprio = 0;
-		ret->last_waited = jiffies; /* doesn't matter... */
+		ret->last_waited = 0; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
 		INIT_HLIST_HEAD(&ret->cic_list);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 99cb5cf1f44..5e7dc997345 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -206,8 +206,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 {
 	int nr_phys_segs = bio_phys_segments(q, bio);
 
-	if (req->nr_phys_segments + nr_phys_segs > queue_max_hw_segments(q) ||
-	    req->nr_phys_segments + nr_phys_segs > queue_max_phys_segments(q)) {
+	if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q)) {
 		req->cmd_flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -300,10 +299,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 		total_phys_segments--;
 	}
 
-	if (total_phys_segments > queue_max_phys_segments(q))
-		return 0;
-
-	if (total_phys_segments > queue_max_hw_segments(q))
+	if (total_phys_segments > queue_max_segments(q))
 		return 0;
 
 	/* Merge is OK... */
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 5eeb9e0d256..31e7a9375c1 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -91,10 +91,9 @@ EXPORT_SYMBOL_GPL(blk_queue_lld_busy);
  */
 void blk_set_default_limits(struct queue_limits *lim)
 {
-	lim->max_phys_segments = MAX_PHYS_SEGMENTS;
-	lim->max_hw_segments = MAX_HW_SEGMENTS;
+	lim->max_segments = BLK_MAX_SEGMENTS;
 	lim->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK;
-	lim->max_segment_size = MAX_SEGMENT_SIZE;
+	lim->max_segment_size = BLK_MAX_SEGMENT_SIZE;
 	lim->max_sectors = BLK_DEF_MAX_SECTORS;
 	lim->max_hw_sectors = INT_MAX;
 	lim->max_discard_sectors = 0;
@@ -154,7 +153,7 @@ void blk_queue_make_request(struct request_queue *q, make_request_fn *mfn)
 	q->unplug_timer.data = (unsigned long)q;
 
 	blk_set_default_limits(&q->limits);
-	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
+	blk_queue_max_hw_sectors(q, BLK_SAFE_MAX_SECTORS);
 
 	/*
 	 * If the caller didn't supply a lock, fall back to our embedded
@@ -210,37 +209,32 @@ void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask)
 EXPORT_SYMBOL(blk_queue_bounce_limit);
 
 /**
- * blk_queue_max_sectors - set max sectors for a request for this queue
+ * blk_queue_max_hw_sectors - set max sectors for a request for this queue
  * @q:  the request queue for the device
- * @max_sectors:  max sectors in the usual 512b unit
+ * @max_hw_sectors:  max hardware sectors in the usual 512b unit
  *
  * Description:
- *    Enables a low level driver to set an upper limit on the size of
- *    received requests.
+ *    Enables a low level driver to set a hard upper limit,
+ *    max_hw_sectors, on the size of requests.  max_hw_sectors is set by
+ *    the device driver based upon the combined capabilities of I/O
+ *    controller and storage device.
+ *
+ *    max_sectors is a soft limit imposed by the block layer for
+ *    filesystem type requests.  This value can be overridden on a
+ *    per-device basis in /sys/block/<device>/queue/max_sectors_kb.
+ *    The soft limit can not exceed max_hw_sectors.
  **/
-void blk_queue_max_sectors(struct request_queue *q, unsigned int max_sectors)
+void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_hw_sectors)
 {
-	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
-		max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
+	if ((max_hw_sectors << 9) < PAGE_CACHE_SIZE) {
+		max_hw_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
 		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_sectors);
+		       __func__, max_hw_sectors);
 	}
 
-	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->limits.max_hw_sectors = q->limits.max_sectors = max_sectors;
-	else {
-		q->limits.max_sectors = BLK_DEF_MAX_SECTORS;
-		q->limits.max_hw_sectors = max_sectors;
-	}
-}
-EXPORT_SYMBOL(blk_queue_max_sectors);
-
-void blk_queue_max_hw_sectors(struct request_queue *q, unsigned int max_sectors)
-{
-	if (BLK_DEF_MAX_SECTORS > max_sectors)
-		q->limits.max_hw_sectors = BLK_DEF_MAX_SECTORS;
-	else
-		q->limits.max_hw_sectors = max_sectors;
+	q->limits.max_hw_sectors = max_hw_sectors;
+	q->limits.max_sectors = min_t(unsigned int, max_hw_sectors,
+				      BLK_DEF_MAX_SECTORS);
 }
 EXPORT_SYMBOL(blk_queue_max_hw_sectors);
 
@@ -257,17 +251,15 @@ void blk_queue_max_discard_sectors(struct request_queue *q,
 EXPORT_SYMBOL(blk_queue_max_discard_sectors);
 
 /**
- * blk_queue_max_phys_segments - set max phys segments for a request for this queue
+ * blk_queue_max_segments - set max hw segments for a request for this queue
  * @q:  the request queue for the device
  * @max_segments:  max number of segments
  *
  * Description:
  *    Enables a low level driver to set an upper limit on the number of
- *    physical data segments in a request.  This would be the largest sized
- *    scatter list the driver could handle.
+ *    hw data segments in a request.
  **/
-void blk_queue_max_phys_segments(struct request_queue *q,
-				 unsigned short max_segments)
+void blk_queue_max_segments(struct request_queue *q, unsigned short max_segments)
 {
 	if (!max_segments) {
 		max_segments = 1;
@@ -275,33 +267,9 @@ void blk_queue_max_phys_segments(struct request_queue *q,
 		       __func__, max_segments);
 	}
 
-	q->limits.max_phys_segments = max_segments;
+	q->limits.max_segments = max_segments;
 }
-EXPORT_SYMBOL(blk_queue_max_phys_segments);
-
-/**
- * blk_queue_max_hw_segments - set max hw segments for a request for this queue
- * @q:  the request queue for the device
- * @max_segments:  max number of segments
- *
- * Description:
- *    Enables a low level driver to set an upper limit on the number of
- *    hw data segments in a request.  This would be the largest number of
- *    address/length pairs the host adapter can actually give at once
- *    to the device.
- **/
-void blk_queue_max_hw_segments(struct request_queue *q,
-			       unsigned short max_segments)
-{
-	if (!max_segments) {
-		max_segments = 1;
-		printk(KERN_INFO "%s: set to minimum %d\n",
-		       __func__, max_segments);
-	}
-
-	q->limits.max_hw_segments = max_segments;
-}
-EXPORT_SYMBOL(blk_queue_max_hw_segments);
+EXPORT_SYMBOL(blk_queue_max_segments);
 
 /**
  * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
@@ -507,7 +475,7 @@ static unsigned int lcm(unsigned int a, unsigned int b)
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t:	the stacking driver limits (top device)
  * @b:  the underlying queue limits (bottom, component device)
- * @offset:  offset to beginning of data within component device
+ * @start:  first data sector within component device
  *
  * Description:
  *    This function is used by stacking drivers like MD and DM to ensure
@@ -525,10 +493,9 @@ static unsigned int lcm(unsigned int a, unsigned int b)
  *    the alignment_offset is undefined.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
-		     sector_t offset)
+		     sector_t start)
 {
-	sector_t alignment;
-	unsigned int top, bottom, ret = 0;
+	unsigned int top, bottom, alignment, ret = 0;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -537,18 +504,14 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
 					    b->seg_boundary_mask);
 
-	t->max_phys_segments = min_not_zero(t->max_phys_segments,
-					    b->max_phys_segments);
-
-	t->max_hw_segments = min_not_zero(t->max_hw_segments,
-					  b->max_hw_segments);
+	t->max_segments = min_not_zero(t->max_segments, b->max_segments);
 
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
 	t->misaligned |= b->misaligned;
 
-	alignment = queue_limit_alignment_offset(b, offset);
+	alignment = queue_limit_alignment_offset(b, start);
 
 	/* Bottom devic
author	Linus Torvalds <torvalds@linux-foundation.org>	2010-03-01 09:00:29 -0800
committer	Linus Torvalds <torvalds@linux-foundation.org>	2010-03-01 09:00:29 -0800
commit	b1bf9368407ae7e89d8a005bb40beb70a41df539 (patch)
tree	3815c8aab19c6c186736673c624fef5f3faab716
parent	524df55725217b13d5a232fb5badb5846418ea0e (diff)
parent	4671a1322052425afa38fcb7980d2fd2bb0fc99b (diff)