[BLOCK] Move all core block layer code to new block/ directory

drivers/block/ is right now a mix of core and driver parts. Lets move the core parts to a new top level directory. Al will move the fs/ related block parts to block/ next. Signed-off-by: Jens Axboe <axboe@suse.de>
author: Jens Axboe <axboe@suse.de> 2005-11-04 08:43:35 +0100
committer: Jens Axboe <axboe@suse.de> 2005-11-04 08:43:35 +0100
commit: 3a65dfe8c088143c7155cfd36a72f4b0ad2fc4b2 (patch)
tree: db930c9f71f94d3ee674f65e38c38e95ca97227e /block/ll_rw_blk.c
parent: 0f3278d14f0255e4cd9e07ccefc33ff12d8bb59c (diff)
1 files changed, 3613 insertions, 0 deletions
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
new file mode 100644
index 00000000000..2747741677f
--- /dev/null
+++ b/block/ll_rw_blk.c
@@ -0,0 +1,3613 @@
+/*
+ *  linux/drivers/block/ll_rw_blk.c
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 1994,      Karl Keyte: Added support for disk statistics
+ * Elevator latency, (C) 2000  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Queue request tables / lock, selectable elevator, Jens Axboe <axboe@suse.de>
+ * kernel-doc documentation started by NeilBrown <neilb@cse.unsw.edu.au> -  July2000
+ * bio rewrite, highmem i/o, etc, Jens Axboe <axboe@suse.de> - may 2001
+ */
+
+/*
+ * This handles all read/write requests to block devices
+ */
+#include <linux/config.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/backing-dev.h>
+#include <linux/bio.h>
+#include <linux/blkdev.h>
+#include <linux/highmem.h>
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/string.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>	/* for max_pfn/max_low_pfn */
+#include <linux/completion.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
+#include <linux/writeback.h>
+#include <linux/blkdev.h>
+
+/*
+ * for max sense size
+ */
+#include <scsi/scsi_cmnd.h>
+
+static void blk_unplug_work(void *data);
+static void blk_unplug_timeout(unsigned long data);
+static void drive_stat_acct(struct request *rq, int nr_sectors, int new_io);
+
+/*
+ * For the allocated request tables
+ */
+static kmem_cache_t *request_cachep;
+
+/*
+ * For queue allocation
+ */
+static kmem_cache_t *requestq_cachep;
+
+/*
+ * For io context allocations
+ */
+static kmem_cache_t *iocontext_cachep;
+
+static wait_queue_head_t congestion_wqh[2] = {
+		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
+		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
+	};
+
+/*
+ * Controlling structure to kblockd
+ */
+static struct workqueue_struct *kblockd_workqueue; 
+
+unsigned long blk_max_low_pfn, blk_max_pfn;
+
+EXPORT_SYMBOL(blk_max_low_pfn);
+EXPORT_SYMBOL(blk_max_pfn);
+
+/* Amount of time in which a process may batch requests */
+#define BLK_BATCH_TIME	(HZ/50UL)
+
+/* Number of requests a "batching" process may submit */
+#define BLK_BATCH_REQ	32
+
+/*
+ * Return the threshold (number of used requests) at which the queue is
+ * considered to be congested.  It include a little hysteresis to keep the
+ * context switch rate down.
+ */
+static inline int queue_congestion_on_threshold(struct request_queue *q)
+{
+	return q->nr_congestion_on;
+}
+
+/*
+ * The threshold at which a queue is considered to be uncongested
+ */
+static inline int queue_congestion_off_threshold(struct request_queue *q)
+{
+	return q->nr_congestion_off;
+}
+
+static void blk_queue_congestion_threshold(struct request_queue *q)
+{
+	int nr;
+
+	nr = q->nr_requests - (q->nr_requests / 8) + 1;
+	if (nr > q->nr_requests)
+		nr = q->nr_requests;
+	q->nr_congestion_on = nr;
+
+	nr = q->nr_requests - (q->nr_requests / 8) - (q->nr_requests / 16) - 1;
+	if (nr < 1)
+		nr = 1;
+	q->nr_congestion_off = nr;
+}
+
+/*
+ * A queue has just exitted congestion.  Note this in the global counter of
+ * congested queues, and wake up anyone who was waiting for requests to be
+ * put back.
+ */
+static void clear_queue_congested(request_queue_t *q, int rw)
+{
+	enum bdi_state bit;
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+	clear_bit(bit, &q->backing_dev_info.state);
+	smp_mb__after_clear_bit();
+	if (waitqueue_active(wqh))
+		wake_up(wqh);
+}
+
+/*
+ * A queue has just entered congestion.  Flag that in the queue's VM-visible
+ * state flags and increment the global gounter of congested queues.
+ */
+static void set_queue_congested(request_queue_t *q, int rw)
+{
+	enum bdi_state bit;
+
+	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+	set_bit(bit, &q->backing_dev_info.state);
+}
+
+/**
+ * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
+ * @bdev:	device
+ *
+ * Locates the passed device's request queue and returns the address of its
+ * backing_dev_info
+ *
+ * Will return NULL if the request queue cannot be located.
+ */
+struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
+{
+	struct backing_dev_info *ret = NULL;
+	request_queue_t *q = bdev_get_queue(bdev);
+
+	if (q)
+		ret = &q->backing_dev_info;
+	return ret;
+}
+
+EXPORT_SYMBOL(blk_get_backing_dev_info);
+
+void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
+{
+	q->activity_fn = fn;
+	q->activity_data = data;
+}
+
+EXPORT_SYMBOL(blk_queue_activity_fn);
+
+/**
+ * blk_queue_prep_rq - set a prepare_request function for queue
+ * @q:		queue
+ * @pfn:	prepare_request function
+ *
+ * It's possible for a queue to register a prepare_request callback which
+ * is invoked before the request is handed to the request_fn. The goal of
+ * the function is to prepare a request for I/O, it can be used to build a
+ * cdb from the request data for instance.
+ *
+ */
+void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn)
+{
+	q->prep_rq_fn = pfn;
+}
+
+EXPORT_SYMBOL(blk_queue_prep_rq);
+
+/**
+ * blk_queue_merge_bvec - set a merge_bvec function for queue
+ * @q:		queue
+ * @mbfn:	merge_bvec_fn
+ *
+ * Usually queues have static limitations on the max sectors or segments that
+ * we can put in a request. Stacking drivers may have some settings that
+ * are dynamic, and thus we have to query the queue whether it is ok to
+ * add a new bio_vec to a bio at a given offset or not. If the block device
+ * has such limitations, it needs to register a merge_bvec_fn to control
+ * the size of bio's sent to it. Note that a block device *must* allow a
+ * single page to be added to an empty bio. The block device driver may want
+ * to use the bio_split() function to deal with these bio's. By default
+ * no merge_bvec_fn is defined for a queue, and only the fixed limits are
+ * honored.
+ */
+void blk_queue_merge_bvec(request_queue_t *q, merge_bvec_fn *mbfn)
+{
+	q->merge_bvec_fn = mbfn;
+}
+
+EXPORT_SYMBOL(blk_queue_merge_bvec);
+
+/**
+ * blk_queue_make_request - define an alternate make_request function for a device
+ * @q:  the request queue for the device to be affected
+ * @mfn: the alternate make_request function
+ *
+ * Description:
+ *    The normal way for &struct bios to be passed to a device
+ *    driver is for them to be collected into requests on a request
+ *    queue, and then to allow the device driver to select requests
+ *    off that queue when it is ready.  This works well for many block
+ *    devices. However some block devices (typically virtual devices
+ *    such as md or lvm) do not benefit from the processing on the
+ *    request queue, and are served best by having the requests passed
+ *    directly to them.  This can be achieved by providing a function
+ *    to blk_queue_make_request().
+ *
+ * Caveat:
+ *    The driver that does this *must* be able to deal appropriately
+ *    with buffers in "highmemory". This can be accomplished by either calling
+ *    __bio_kmap_atomic() to get a temporary kernel mapping, or by calling
+ *    blk_queue_bounce() to create a buffer in normal memory.
+ **/
+void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
+{
+	/*
+	 * set defaults
+	 */
+	q->nr_requests = BLKDEV_MAX_RQ;
+	blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS);
+	blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS);
+	q->make_request_fn = mfn;
+	q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+	q->backing_dev_info.state = 0;
+	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
+	blk_queue_max_sectors(q, MAX_SECTORS);
+	blk_queue_hardsect_size(q, 512);
+	blk_queue_dma_alignment(q, 511);
+	blk_queue_congestion_threshold(q);
+	q->nr_batching = BLK_BATCH_REQ;
+
+	q->unplug_thresh = 4;		/* hmm */
+	q->unplug_delay = (3 * HZ) / 1000;	/* 3 milliseconds */
+	if (q->unplug_delay == 0)
+		q->unplug_delay = 1;
+
+	INIT_WORK(&q->unplug_work, blk_unplug_work, q);
+
+	q->unplug_timer.function = blk_unplug_timeout;
+	q->unplug_timer.data = (unsigned long)q;
+
+	/*
+	 * by default assume old behaviour and bounce for any highmem page
+	 */
+	blk_queue_bounce_limit(q, BLK_BOUNCE_HIGH);
+
+	blk_queue_activity_fn(q, NULL, NULL);
+}
+
+EXPORT_SYMBOL(blk_queue_make_request);
+
+static inline void rq_init(request_queue_t *q, struct request *rq)
+{
+	INIT_LIST_HEAD(&rq->queuelist);
+
+	rq->errors = 0;
+	rq->rq_status = RQ_ACTIVE;
+	rq->bio = rq->biotail = NULL;
+	rq->ioprio = 0;
+	rq->buffer = NULL;
+	rq->ref_count = 1;
+	rq->q = q;
+	rq->waiting = NULL;
+	rq->special = NULL;
+	rq->data_len = 0;
+	rq->data = NULL;
+	rq->nr_phys_segments = 0;
+	rq->sense = NULL;
+	rq->end_io = NULL;
+	rq->end_io_data = NULL;
+}
+
+/**
+ * blk_queue_ordered - does this queue support ordered writes
+ * @q:     the request queue
+ * @flag:  see below
+ *
+ * Description:
+ *   For journalled file systems, doing ordered writes on a commit
+ *   block instead of explicitly doing wait_on_buffer (which is bad
+ *   for performance) can be a big win. Block drivers supporting this
+ *   feature should call this function and indicate so.
+ *
+ **/
+void blk_queue_ordered(request_queue_t *q, int flag)
+{
+	switch (flag) {
+		case QUEUE_ORDERED_NONE:
+			if (q->flush_rq)
+				kmem_cache_free(request_cachep, q->flush_rq);
+			q->flush_rq = NULL;
+			q->ordered = flag;
+			break;
+		case QUEUE_ORDERED_TAG:
+			q->ordered = flag;
+			break;
+		case QUEUE_ORDERED_FLUSH:
+			q->ordered = flag;
+			if (!q->flush_rq)
+				q->flush_rq = kmem_cache_alloc(request_cachep,
+								GFP_KERNEL);
+			break;
+		default:
+			printk("blk_queue_ordered: bad value %d\n", flag);
+			break;
+	}
+}
+
+EXPORT_SYMBOL(blk_queue_ordered);
+
+/**
+ * blk_queue_issue_flush_fn - set function for issuing a flush
+ * @q:     the request queue
+ * @iff:   the function to be called issuing the flush
+ *
+ * Description:
+ *   If a driver supports issuing a flush command, the support is notified
+ *   to the block layer by defining it through this call.
+ *
+ **/
+void blk_queue_issue_flush_fn(request_queue_t *q, issue_flush_fn *iff)
+{
+	q->issue_flush_fn = iff;
+}
+
+EXPORT_SYMBOL(blk_queue_issue_flush_fn);
+
+/*
+ * Cache flushing for ordered writes handling
+ */
+static void blk_pre_flush_end_io(struct request *flush_rq)
+{
+	struct request *rq = flush_rq->end_io_data;
+	request_queue_t *q = rq->q;
+
+	elv_completed_request(q, flush_rq);
+
+	rq->flags |= REQ_BAR_PREFLUSH;
+
+	if (!flush_rq->errors)
+		elv_requeue_request(q, rq);
+	else {
+		q->end_flush_fn(q, flush_rq);
+		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
+		q->request_fn(q);
+	}
+}
+
+static void blk_post_flush_end_io(struct request *flush_rq)
+{
+	struct request *rq = flush_rq->end_io_data;
+	request_queue_t *q = rq->q;
+
+	elv_completed_request(q, flush_rq);
+
+	rq->flags |= REQ_BAR_POSTFLUSH;
+
+	q->end_flush_fn(q, flush_rq);
+	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
+	q->request_fn(q);
+}
+
+struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
+{
+	struct request *flush_rq = q->flush_rq;
+
+	BUG_ON(!blk_barrier_rq(rq));
+
+	if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))
+		return NULL;
+
+	rq_init(q, flush_rq);
+	flush_rq->elevator_private = NULL;
+	flush_rq->flags = REQ_BAR_FLUSH;
+	flush_rq->rq_disk = rq->rq_disk;
+	flush_rq->rl = NULL;
+
+	/*
+	 * prepare_flush returns 0 if no flush is needed, just mark both
+	 * pre and post flush as done in that case
+	 */
+	if (!q->prepare_flush_fn(q, flush_rq)) {
+		rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
+		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
+		return rq;
+	}
+
+	/*
+	 * some drivers dequeue requests right away, some only after io
+	 * completion. make sure the request is dequeued.
+	 */
+	if (!list_empty(&rq->queuelist))
+		blkdev_dequeue_request(rq);
+
+	flush_rq->end_io_data = rq;
+	flush_rq->end_io = blk_pre_flush_end_io;
+
+	__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
+	return flush_rq;
+}
+
+static void blk_start_post_flush(request_queue_t *q, struct request *rq)
+{
+	struct request *flush_rq = q->flush_rq;
+
+	BUG_ON(!blk_barrier_rq(rq));
+
+	rq_init(q, flush_rq);
+	flush_rq->elevator_private = NULL;
+	flush_rq->flags = REQ_BAR_FLUSH;
+	flush_rq->rq_disk = rq->rq_disk;
+	flush_rq->rl = NULL;
+
+	if (q->prepare_flush_fn(q, flush_rq)) {
+		flush_rq->end_io_data = rq;
+		flush_rq->end_io = blk_post_flush_end_io;
+
+		__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
+		q->request_fn(q);
+	}
+}
+
+static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,
+					int sectors)
+{
+	if (sectors > rq->nr_sectors)
+		sectors = rq->nr_sectors;
+
+	rq->nr_sectors -= sectors;
+	return rq->nr_sectors;
+}
+
+static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,
+				     int sectors, int queue_locked)
+{
+	if (q->ordered != QUEUE_ORDERED_FLUSH)
+		return 0;
+	if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
+		return 0;
+	if (blk_barrier_postflush(rq))
+		return 0;
+
+	if (!blk_check_end_barrier(q, rq, sectors)) {
+		unsigned long flags = 0;
+
+		if (!queue_locked)
+			spin_lock_irqsave(q->queue_lock, flags);
+
+		blk_start_post_flush(q, rq);
+
+		if (!queue_locked)
+			spin_unlock_irqrestore(q->queue_lock, flags);
+	}
+
+	return 1;
+}
+
+/**
+ * blk_complete_barrier_rq - complete possible barrier request
+ * @q:  the request queue for the device
+ * @rq:  the request
+ * @sectors:  number of sectors to complete
+ *
+ * Description:
+ *   Used in driver end_io handling to determine whether to postpone
+ *   completion of a barrier request until a post flush has been done. This
+ *   is the unlocked variant, used if the caller doesn't already hold the
+ *   queue lock.
+ **/
+int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
+{
+	return __blk_complete_barrier_rq(q, rq, sectors, 0);
+}
+EXPORT_SYMBOL(blk_complete_barrier_rq);
+
+/**
+ * blk_complete_barrier_rq_locked - complete possible barrier request
+ * @q:  the request queue for the device
+ * @rq:  the request
+ * @sectors:  number of sectors to complete
+ *
+ * Description:
+ *   See blk_complete_barrier_rq(). This variant must be used if the caller
+ *   holds the queue lock.
+ **/
+int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
+				   int sectors)
+{
+	return __blk_complete_barrier_rq(q, rq, sectors, 1);
+}
+EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
+
+/**
+ * blk_queue_bounce_limit - set bounce buffer limit for queue
+ * @q:  the request queue for the device
+ * @dma_addr:   bus address limit
+ *
+ * Description:
+ *    Different hardware can have different requirements as to what pages
+ *    it can do I/O directly to. A low level driver can call
+ *    blk_queue_bounce_limit to have lower memory pages allocated as bounce
+ *    buffers for doing I/O to pages residing above @page. By default
+ *    the block layer sets this to the highest numbered "low" memory page.
+ **/
+void blk_queue_bounce_limit(request_queue_t *q, u64 dma_addr)
+{
+	unsigned long bounce_pfn = dma_addr >> PAGE_SHIFT;
+
+	/*
+	 * set appropriate bounce gfp mask -- unfortunately we don't have a
+	 * full 4GB zone, so we have to resort to low memory for any bounces.
+	 * ISA has its own < 16MB zone.
+	 */
+	if (bounce_pfn < blk_max_low_pfn) {
+		BUG_ON(dma_addr < BLK_BOUNCE_ISA);
+		init_emergency_isa_pool();
+		q->bounce_gfp = GFP_NOIO | GFP_DMA;
+	} else
+		q->bounce_gfp = GFP_NOIO;
+
+	q->bounce_pfn = bounce_pfn;
+}
+
+EXPORT_SYMBOL(blk_queue_bounce_limit);
+
+/**
+ * blk_queue_max_sectors - set max sectors for a request for this queue
+ * @q:  the request queue for the device
+ * @max_sectors:  max sectors in the usual 512b unit
+ *
+ * Description:
+ *    Enables a low level driver to set an upper limit on the size of
+ *    received requests.
+ **/
+void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
+{
+	if ((max_sectors << 9) < PAGE_CACHE_SIZE) {
+		max_sectors = 1 << (PAGE_CACHE_SHIFT - 9);
+		printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
+	}
+
+	q->max_sectors = q->max_hw_sectors = max_sectors;
+}
+
+EXPORT_SYMBOL(blk_queue_max_sectors);
+
+/**
+ * blk_queue_max_phys_segments - set max phys segments for a request for this queue
+ * @q:  the request queue for the device
+ * @max_segments:  max number of segments
+ *
+ * Description:
+ *    Enables a low level driver to set an upper limit on the number of
+ *    physical data segments in a request.  This would be the largest sized
+ *    scatter list the driver could handle.
+ **/
+void blk_queue_max_phys_segments(request_queue_t *q, unsigned short max_segments)
+{
+	if (!max_segments) {
+		max_segments = 1;
+		printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
+	}
+
+	q->max_phys_segments = max_segments;
+}
+
+EXPORT_SYMBOL(blk_queue_max_phys_segments);
+
+/**
+ * blk_queue_max_hw_segments - set max hw segments for a request for this queue
+ * @q:  the request queue for the device
+ * @max_segments:  max number of segments
+ *
+ * Description:
+ *    Enables a low level driver to set an upper limit on the number of
+ *    hw data segments in a request.  This would be the largest number of
+ *    address/length pairs the host adapter can actually give as once
+ *    to the device.
+ **/
+void blk_queue_max_hw_segments(request_queue_t *q, unsigned short max_segments)
+{
+	if (!max_segments) {
+		max_segments = 1;
+		printk("%s: set to minimum %d\n", __FUNCTION__, max_segments);
+	}
+
+	q->max_hw_segments = max_segments;
+}
+
+EXPORT_SYMBOL(blk_queue_max_hw_segments);
+
+/**
+ * blk_queue_max_segment_size - set max segment size for blk_rq_map_sg
+ * @q:  the request queue for the device
+ * @max_size:  max size of segment in bytes
+ *
+ * Description:
+ *    Enables a low level driver to set an upper limit on the size of a
+ *    coalesced segment
+ **/
+void blk_queue_max_segment_size(request_queue_t *q, unsigned int max_size)
+{
+	if (max_size < PAGE_CACHE_SIZE) {
+		max_size = PAGE_CACHE_SIZE;
+		printk("%s: set to minimum %d\n", __FUNCTION__, max_size);
+	}
+
+	q->max_segment_size = max_size;
+}
+
+EXPORT_SYMBOL(blk_queue_max_segment_size);
+
+/**
+ * blk_queue_hardsect_size - set hardware sector size for the queue
+ * @q:  the request queue for the device
+ * @size:  the hardware sector size, in bytes
+ *
+ * Description:
+ *   This should typically be set to the lowest possible sector size
+ *   that the hardware can operate on (possible without reverting to
+ *   even internal read-modify-write operations). Usually the default
+ *   of 512 covers most hardware.
+ **/
+void blk_queue_hardsect_size(request_queue_t *q, unsigned short size)
+{
+	q->hardsect_size = size;
+}
+
+EXPORT_SYMBOL(blk_queue_hardsect_size);
+
+/*
+ * Returns the minimum that is _not_ zero, unless both are zero.
+ */
+#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
+
+/**
+ * blk_queue_stack_limits - inherit underlying queue limits for stacked drivers
+ * @t:	the stacking driver (top)
+ * @b:  the underlying device (bottom)
+ **/
+void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
+{
+	/* zero is "infinity" */
+	t->max_sectors = t->max_hw_sectors =
+		min_not_zero(t->max_sectors,b->max_sectors);
+
+	t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
+	t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
+	t->max_segment_size = min(t->max_segment_size,b->max_segment_size);
+	t->hardsect_size = max(t->hardsect_size,b->hardsect_size);
+}
+
+EXPORT_SYMBOL(blk_queue_stack_limits);
+
+/**
+ * blk_queue_segment_boundary - set boundary rules for segment merging
+ * @q:  the request queue for the device
+ * @mask:  the memory boundary mask
+ **/
+void blk_queue_segment_boundary(request_queue_t *q, unsigned long mask)
+{
+	if (mask < PAGE_CACHE_SIZE - 1) {
+		mask = PAGE_CACHE_SIZE - 1;
+		printk("%s: set to minimum %lx\n", __FUNCTION__, mask);
+	}
+
+	q->seg_boundary_mask = mask;
+}
+
+EXPORT_SYMBOL(blk_queue_segment_boundary);
+
+/**
+ * blk_queue_dma_alignment - set dma length and memory alignment
+ * @q:     the request queue for the device
+ * @mask:  alignment mask
+ *
+ * description:
+ *    set required memory and length aligment for direct dma transactions.
+ *    this is used when buiding direct io requests for the queue.
+ *
+ **/
+void blk_queue_dma_alignment(request_queue_t *q, int mask)
+{
+	q->dma_alignment = mask;
+}
+
+EXPORT_SYMBOL(blk_queue_dma_alignment);
+
+/**
+ * blk_queue_find_tag - find a request by its tag and queue
+ *
+ * @q:	 The request queue for the device
+ * @tag: The tag of the request
+ *
+ * Notes:
+ *    Should be used when a device returns a tag and you want to match
+ *    it with a request.
+ *
+ *    no locks need be held.
+ **/
+struct request *blk_queue_find_tag(request_queue_t *q, int tag)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+
+	if (unlikely(bqt == NULL || tag >= bqt->real_max_depth))
+		return NULL;
+
+	return bqt->tag_index[tag];
+}
+
+EXPORT_SYMBOL(blk_queue_find_tag);
+
+/**
+ * __blk_queue_free_tags - release tag maintenance info
+ * @q:  the request queue for the device
+ *
+ *  Notes:
+ *    blk_cleanup_queue() will take care of calling this function, if tagging
+ *    has been used. So there's no need to call this directly.
+ **/
+static void __blk_queue_free_tags(request_queue_t *q)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+
+	if (!bqt)
+		return;
+
+	if (atomic_dec_and_test(&bqt->refcnt)) {
+		BUG_ON(bqt->busy);
+		BUG_ON(!list_empty(&bqt->busy_list));
+
+		kfree(bqt->tag_index);
+		bqt->tag_index = NULL;
+
+		kfree(bqt->tag_map);
+		bqt->tag_map = NULL;
+
+		kfree(bqt);
+	}
+
+	q->queue_tags = NULL;
+	q->queue_flags &= ~(1 << QUEUE_FLAG_QUEUED);
+}
+
+/**
+ * blk_queue_free_tags - release tag maintenance info
+ * @q:  the request queue for the device
+ *
+ *  Notes:
+ *	This is used to disabled tagged queuing to a device, yet leave
+ *	queue in function.
+ **/
+void blk_queue_free_tags(request_queue_t *q)
+{
+	clear_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+}
+
+EXPORT_SYMBOL(blk_queue_free_tags);
+
+static int
+init_tag_map(request_queue_t *q, struct blk_queue_tag *tags, int depth)
+{
+	struct request **tag_index;
+	unsigned long *tag_map;
+	int nr_ulongs;
+
+	if (depth > q->nr_requests * 2) {
+		depth = q->nr_requests * 2;
+		printk(KERN_ERR "%s: adjusted depth to %d\n",
+				__FUNCTION__, depth);
+	}
+
+	tag_index = kmalloc(depth * sizeof(struct request *), GFP_ATOMIC);
+	if (!tag_index)
+		goto fail;
+
+	nr_ulongs = ALIGN(depth, BITS_PER_LONG) / BITS_PER_LONG;
+	tag_map = kmalloc(nr_ulongs * sizeof(unsigned long), GFP_ATOMIC);
+	if (!tag_map)
+		goto fail;
+
+	memset(tag_index, 0, depth * sizeof(struct request *));
+	memset(tag_map, 0, nr_ulongs * sizeof(unsigned long));
+	tags->real_max_depth = depth;
+	tags->max_depth = depth;
+	tags->tag_index = tag_index;
+	tags->tag_map = tag_map;
+
+	return 0;
+fail:
+	kfree(tag_index);
+	return -ENOMEM;
+}
+
+/**
+ * blk_queue_init_tags - initialize the queue tag info
+ * @q:  the request queue for the device
+ * @depth:  the maximum queue depth supported
+ * @tags: the tag to use
+ **/
+int blk_queue_init_tags(request_queue_t *q, int depth,
+			struct blk_queue_tag *tags)
+{
+	int rc;
+
+	BUG_ON(tags && q->queue_tags && tags != q->queue_tags);
+
+	if (!tags && !q->queue_tags) {
+		tags = kmalloc(sizeof(struct blk_queue_tag), GFP_ATOMIC);
+		if (!tags)
+			goto fail;
+
+		if (init_tag_map(q, tags, depth))
+			goto fail;
+
+		INIT_LIST_HEAD(&tags->busy_list);
+		tags->busy = 0;
+		atomic_set(&tags->refcnt, 1);
+	} else if (q->queue_tags) {
+		if ((rc = blk_queue_resize_tags(q, depth)))
+			return rc;
+		set_bit(QUEUE_FLAG_QUEUED, &q->queue_flags);
+		return 0;
+	} else
+		atomic_inc(&tags->refcnt);
+
+	/*
+	 * assign it, all done
+	 */
+	q->queue_tags = tags;
+	q->queue_flags |= (1 << QUEUE_FLAG_QUEUED);
+	return 0;
+fail:
+	kfree(tags);
+	return -ENOMEM;
+}
+
+EXPORT_SYMBOL(blk_queue_init_tags);
+
+/**
+ * blk_queue_resize_tags - change the queueing depth
+ * @q:  the request queue for the device
+ * @new_depth: the new max command queueing depth
+ *
+ *  Notes:
+ *    Must be called with the queue lock held.
+ **/
+int blk_queue_resize_tags(request_queue_t *q, int new_depth)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+	struct request **tag_index;
+	unsigned long *tag_map;
+	int max_depth, nr_ulongs;
+
+	if (!bqt)
+		return -ENXIO;
+
+	/*
+	 * if we already have large enough real_max_depth.  just
+	 * adjust max_depth.  *NOTE* as requests with tag value
+	 * between new_depth and real_max_depth can be in-flight, tag
+	 * map can not be shrunk blindly here.
+	 */
+	if (new_depth <= bqt->real_max_depth) {
+		bqt->max_depth = new_depth;
+		return 0;
+	}
+
+	/*
+	 * save the old state info, so we can copy it back
+	 */
+	tag_index = bqt->tag_index;
+	tag_map = bqt->tag_map;
+	max_depth = bqt->real_max_depth;
+
+	if (init_tag_map(q, bqt, new_depth))
+		return -ENOMEM;
+
+	memcpy(bqt->tag_index, tag_index, max_depth * sizeof(struct request *));
+	nr_ulongs = ALIGN(max_depth, BITS_PER_LONG) / BITS_PER_LONG;
+	memcpy(bqt->tag_map, tag_map, nr_ulongs * sizeof(unsigned long));
+
+	kfree(tag_index);
+	kfree(tag_map);
+	return 0;
+}
+
+EXPORT_SYMBOL(blk_queue_resize_tags);
+
+/**
+ * blk_queue_end_tag - end tag operations for a request
+ * @q:  the request queue for the device
+ * @rq: the request that has completed
+ *
+ *  Description:
+ *    Typically called when end_that_request_first() returns 0, meaning
+ *    all transfers have been done for a request. It's important to call
+ *    this function before end_that_request_last(), as that will put the
+ *    request back on the free list thus corrupting the internal tag list.
+ *
+ *  Notes:
+ *   queue lock must be held.
+ **/
+void blk_queue_end_tag(request_queue_t *q, struct request *rq)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+	int tag = rq->tag;
+
+	BUG_ON(tag == -1);
+
+	if (unlikely(tag >= bqt->real_max_depth))
+		/*
+		 * This can happen after tag depth has been reduced.
+		 * FIXME: how about a warning or info message here?
+		 */
+		return;
+
+	if (unlikely(!__test_and_clear_bit(tag, bqt->tag_map))) {
+		printk(KERN_ERR "%s: attempt to clear non-busy tag (%d)\n",
+		       __FUNCTION__, tag);
+		return;
+	}
+
+	list_del_init(&rq->queuelist);
+	rq->flags &= ~REQ_QUEUED;
+	rq->tag = -1;
+
+	if (unlikely(bqt->tag_index[tag] == NULL))
+		printk(KERN_ERR "%s: tag %d is missing\n",
+		       __FUNCTION__, tag);
+
+	bqt->tag_index[tag] = NULL;
+	bqt->busy--;
+}
+
+EXPORT_SYMBOL(blk_queue_end_tag);
+
+/**
+ * blk_queue_start_tag - find a free tag and assign it
+ * @q:  the request queue for the device
+ * @rq:  the block request that needs tagging
+ *
+ *  Description:
+ *    This can either be used as a stand-alone helper, or possibly be
+ *    assigned as the queue &prep_rq_fn (in which case &struct request
+ *    automagically gets a tag assigned). Note that this function
+ *    assumes that any type of request can be queued! if this is not
+ *    true for your device, you must check the request type before
+ *    calling this function.  The request will also be removed from
+ *    the request queue, so it's the drivers responsibility to readd
+ *    it if it should need to be restarted for some reason.
+ *
+ *  Notes:
+ *   queue lock must be held.
+ **/
+int blk_queue_start_tag(request_queue_t *q, struct request *rq)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+	int tag;
+
+	if (unlikely((rq->flags & REQ_QUEUED))) {
+		printk(KERN_ERR 
+		       "%s: request %p for device [%s] already tagged %d",
+		       __FUNCTION__, rq,
+		       rq->rq_disk ? rq->rq_disk->disk_name : "?", rq->tag);
+		BUG();
+	}
+
+	tag = find_first_zero_bit(bqt->tag_map, bqt->max_depth);
+	if (tag >= bqt->max_depth)
+		return 1;
+
+	__set_bit(tag, bqt->tag_map);
+
+	rq->flags |= REQ_QUEUED;
+	rq->tag = tag;
+	bqt->tag_index[tag] = rq;
+	blkdev_dequeue_request(rq);
+	list_add(&rq->queuelist, &bqt->busy_list);
+	bqt->busy++;
+	return 0;
+}
+
+EXPORT_SYMBOL(blk_queue_start_tag);
+
+/**
+ * blk_queue_invalidate_tags - invalidate all pending tags
+ * @q:  the request queue for the device
+ *
+ *  Description:
+ *   Hardware conditions may dictate a need to stop all pending requests.
+ *   In this case, we will safely clear the block side of the tag queue and
+ *   readd all requests to the request queue in the right order.
+ *
+ *  Notes:
+ *   queue lock must be held.
+ **/
+void blk_queue_invalidate_tags(request_queue_t *q)
+{
+	struct blk_queue_tag *bqt = q->queue_tags;
+	struct list_head *tmp, *n;
+	struct request *rq;
+
+	list_for_each_safe(tmp, n, &bqt->busy_list) {
+		rq = list_entry_rq(tmp);
+
+		if (rq->tag == -1) {
+			printk(KERN_ERR
+			       "%s: bad tag found on list\n", __FUNCTION__);
+			list_del_init(&rq->queuelist);
+			rq->flags &= ~REQ_QUEUED;
+		} else
+			blk_queue_end_tag(q, rq);
+
+		rq->flags &= ~REQ_STARTED;
+		__elv_add_request(q, rq, ELEVATOR_INSERT_BACK, 0);
+	}
+}
+
+EXPORT_SYMBOL(blk_queue_invalidate_tags);
+
+static char *rq_flags[] = {
+	"REQ_RW",
+	"REQ_FAILFAST",
+	"REQ_SORTED",
+	"REQ_SOFTBARRIER",
+	"REQ_HARDBARRIER",
+	"REQ_CMD",
+	"REQ_NOMERGE",
+	"REQ_STARTED",
+	"REQ_DONTPREP",
+	"REQ_QUEUED",
+	"REQ_ELVPRIV",
+	"REQ_PC",
+	"REQ_BLOCK_PC",
+	"REQ_SENSE",
+	"REQ_FAILED",
+	"REQ_QUIET",
+	"REQ_SPECIAL",
+	"REQ_DRIVE_CMD",
+	"REQ_DRIVE_TASK",
+	"REQ_DRIVE_TASKFILE",
+	"REQ_PREEMPT",
+	"REQ_PM_SUSPEND",
+	"REQ_PM_RESUME",
+	"REQ_PM_SHUTDOWN",
+};
+
+void blk_dump_rq_flags(struct request *rq, char *msg)
+{
+	int bit;
+
+	printk("%s: dev %s: flags = ", msg,
+		rq->rq_disk ? rq->rq_disk->disk_name : "?");
+	bit = 0;
+	do {
+		if (rq->flags & (1 << bit))
+			printk("%s ", rq_flags[bit]);
+		bit++;
+	} while (bit < __REQ_NR_BITS);
+
+	printk("\nsector %llu, nr/cnr %lu/%u\n", (unsigned long long)rq->sector,
+						       rq->nr_sectors,
+						       rq->current_nr_sectors);
+	printk("bio %p, biotail %p, buffer %p, data %p, len %u\n", rq->bio, rq->biotail, rq->buffer, rq->data, rq->data_len);
+
+	if (rq->flags & (REQ_BLOCK_PC | REQ_PC)) {
+		printk("cdb: ");
+		for (bit = 0; bit < sizeof(rq->cmd); bit++)
+			printk("%02x ", rq->cmd[bit]);
+		printk("\n");
+	}
+}
+
+EXPORT_SYMBOL(blk_dump_rq_flags);
+
+void blk_recount_segments(request_queue_t *q, struct bio *bio)
+{
+	struct bio_vec *bv, *bvprv = NULL;
+	int i, nr_phys_segs, nr_hw_segs, seg_size, hw_seg_size, cluster;
+	int high, highprv = 1;
+
+	if (unlikely(!bio->bi_io_vec))
+		return;
+
+	cluster = q->queue_flags & (1 << QUEUE_FLAG_CLUSTER);
+	hw_seg_size = seg_size = nr_phys_segs = nr_hw_segs = 0;
+	bio_for_each_segment(bv, bio, i) {
+		/*
+		 * the trick here is making sure that a high page is never
+		 * considered part of another segment, since that might
+		 * change with the bounce page.
+		 */
+		high = page_to_pfn(bv->bv_page) >= q->bounce_pfn;
+		if (high || highprv)
+			goto new_hw_segment;
+		if (cluster) {
+			if (seg_size + bv->bv_len > q->max_segment_size)
+				goto new_segment;
+			if (!BIOVEC_PHYS_MERGEABLE(bvprv, bv))
+				goto new_segment;
+			if (!BIOVEC_SEG_BOUNDARY(q, bvprv, bv))
+				goto new_segment;
+			if (BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len))
+				goto new_hw_segment;
+
+			seg_size += bv->bv_len;
+			hw_seg_size += bv->bv_len;
+			bvprv = bv;
+			continue;
+		}
+new_segment:
+		if (BIOVEC_VIRT_MERGEABLE(bvprv, bv) &&
+		    !BIOVEC_VIRT_OVERSIZE(hw_seg_size + bv->bv_len)) {
+			hw_seg_size += bv->bv_len;
+		} else {
+new_hw_segment:
+			if (hw_seg_size > bio->bi_hw_front_size)
+				bio->bi_hw_front_size = hw_seg_size;
+			hw_seg_size = BIOVEC_VIRT_START_SIZE(bv) + bv->bv_len;
+			nr_hw_segs++;
+		}
+
+		nr_phys_segs++;
+		bvprv = bv;
+		seg_size = bv->bv_len;
+		highprv = high;
+	}
+	if (hw_seg_size > bio->bi_hw_back_size)
+		bio->bi_hw_back_size = hw_seg_size;
+	if (nr_hw_segs == 1 && hw_seg_size > bio->bi_hw_front_size)
+		bio->bi_hw_front_size = hw_seg_size;
+	bio->bi_phys_segments = nr_phys_segs;
+	bio->bi_hw_segments = nr_hw_segs;
+	bio->bi_flags |= (1 << BIO_SEG_VALID);
+}
+
+</
author	Jens Axboe <axboe@suse.de>	2005-11-04 08:43:35 +0100
committer	Jens Axboe <axboe@suse.de>	2005-11-04 08:43:35 +0100
commit	3a65dfe8c088143c7155cfd36a72f4b0ad2fc4b2 (patch)
tree	db930c9f71f94d3ee674f65e38c38e95ca97227e /block/ll_rw_blk.c
parent	0f3278d14f0255e4cd9e07ccefc33ff12d8bb59c (diff)