aboutsummaryrefslogtreecommitdiff
path: root/include/linux/blkdev.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/blkdev.h')
-rw-r--r--include/linux/blkdev.h223
1 files changed, 191 insertions, 32 deletions
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 1756001210d..8699bcf5f09 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -8,6 +8,7 @@
#include <linux/major.h>
#include <linux/genhd.h>
#include <linux/list.h>
+#include <linux/llist.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
#include <linux/pagemap.h>
@@ -19,6 +20,7 @@
#include <linux/gfp.h>
#include <linux/bsg.h>
#include <linux/smp.h>
+#include <linux/rcupdate.h>
#include <asm/scatterlist.h>
@@ -88,17 +90,22 @@ enum rq_cmd_type_bits {
#define BLK_MAX_CDB 16
/*
- * try to put the fields that are referenced together in the same cacheline.
- * if you modify this structure, be sure to check block/blk-core.c:blk_rq_init()
- * as well!
+ * Try to put the fields that are referenced together in the same cacheline.
+ *
+ * If you modify this structure, make sure to update blk_rq_init() and
+ * especially blk_mq_rq_ctx_init() to take care of the added fields.
*/
struct request {
struct list_head queuelist;
- struct call_single_data csd;
+ union {
+ struct call_single_data csd;
+ unsigned long fifo_time;
+ };
struct request_queue *q;
+ struct blk_mq_ctx *mq_ctx;
- unsigned int cmd_flags;
+ u64 cmd_flags;
enum rq_cmd_type_bits cmd_type;
unsigned long atomic_flags;
@@ -111,7 +118,18 @@ struct request {
struct bio *bio;
struct bio *biotail;
- struct hlist_node hash; /* merge hash */
+ /*
+ * The hash is used inside the scheduler, and killed once the
+ * request reaches the dispatch list. The ipi_list is only used
+ * to queue the request for softirq completion, which is long
+ * after the request has been unhashed (and even removed from
+ * the dispatch list).
+ */
+ union {
+ struct hlist_node hash; /* merge hash */
+ struct list_head ipi_list;
+ };
+
/*
* The rb_node is only used inside the io scheduler, requests
* are pruned when moved to the dispatch queue. So let the
@@ -159,10 +177,7 @@ struct request {
unsigned short ioprio;
- int ref_count;
-
void *special; /* opaque pointer available for LLD use */
- char *buffer; /* kaddr of the current segment if available */
int tag;
int errors;
@@ -214,6 +229,8 @@ struct request_pm_state
#include <linux/elevator.h>
+struct blk_queue_ctx;
+
typedef void (request_fn_proc) (struct request_queue *q);
typedef void (make_request_fn) (struct request_queue *q, struct bio *bio);
typedef int (prep_rq_fn) (struct request_queue *, struct request *);
@@ -263,6 +280,7 @@ struct queue_limits {
unsigned long seg_boundary_mask;
unsigned int max_hw_sectors;
+ unsigned int chunk_sectors;
unsigned int max_sectors;
unsigned int max_segment_size;
unsigned int physical_block_size;
@@ -282,6 +300,7 @@ struct queue_limits {
unsigned char discard_misaligned;
unsigned char cluster;
unsigned char discard_zeroes_data;
+ unsigned char raid_partial_stripes_expensive;
};
struct request_queue {
@@ -312,6 +331,18 @@ struct request_queue {
dma_drain_needed_fn *dma_drain_needed;
lld_busy_fn *lld_busy_fn;
+ struct blk_mq_ops *mq_ops;
+
+ unsigned int *mq_map;
+
+ /* sw queues */
+ struct blk_mq_ctx __percpu *queue_ctx;
+ unsigned int nr_queues;
+
+ /* hw dispatch queues */
+ struct blk_mq_hw_ctx **queue_hw_ctx;
+ unsigned int nr_hw_queues;
+
/*
* Dispatch queue sorting
*/
@@ -361,6 +392,17 @@ struct request_queue {
struct kobject kobj;
/*
+ * mq queue kobject
+ */
+ struct kobject mq_kobj;
+
+#ifdef CONFIG_PM_RUNTIME
+ struct device *dev;
+ int rpm_status;
+ unsigned int nr_pending;
+#endif
+
+ /*
* queue settings
*/
unsigned long nr_requests; /* Max # of requests */
@@ -378,6 +420,12 @@ struct request_queue {
unsigned int nr_sorted;
unsigned int in_flight[2];
+ /*
+ * Number of active block driver functions for which blk_drain_queue()
+ * must wait. Must be incremented around functions that unlock the
+ * queue_lock internally, e.g. scsi_request_fn().
+ */
+ unsigned int request_fn_active;
unsigned int rq_timeout;
struct timer_list timeout;
@@ -412,7 +460,12 @@ struct request_queue {
unsigned long flush_pending_since;
struct list_head flush_queue[2];
struct list_head flush_data_in_flight;
- struct request flush_rq;
+ struct request *flush_rq;
+ spinlock_t mq_flush_lock;
+
+ struct list_head requeue_list;
+ spinlock_t requeue_lock;
+ struct work_struct requeue_work;
struct mutex sysfs_lock;
@@ -424,20 +477,24 @@ struct request_queue {
struct bsg_class_device bsg_dev;
#endif
-#ifdef CONFIG_BLK_CGROUP
- struct list_head all_q_node;
-#endif
#ifdef CONFIG_BLK_DEV_THROTTLING
/* Throttle data */
struct throtl_data *td;
#endif
+ struct rcu_head rcu_head;
+ wait_queue_head_t mq_freeze_wq;
+ struct percpu_counter mq_usage_counter;
+ struct list_head all_q_node;
+
+ struct blk_mq_tag_set *tag_set;
+ struct list_head tag_set_list;
};
#define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */
#define QUEUE_FLAG_STOPPED 2 /* queue is stopped */
#define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */
#define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */
-#define QUEUE_FLAG_DEAD 5 /* queue being torn down */
+#define QUEUE_FLAG_DYING 5 /* queue being torn down */
#define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */
#define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */
#define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */
@@ -452,12 +509,19 @@ struct request_queue {
#define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */
#define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */
#define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */
+#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */
+#define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */
+#define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/
+#define QUEUE_FLAG_SG_GAPS 22 /* queue doesn't support SG gaps */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
(1 << QUEUE_FLAG_ADD_RANDOM))
+#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
+ (1 << QUEUE_FLAG_SAME_COMP))
+
static inline void queue_lockdep_assert_held(struct request_queue *q)
{
if (q->queue_lock)
@@ -521,8 +585,10 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
+#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags)
#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags)
+#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags)
@@ -554,7 +620,16 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist)
-#define rq_data_dir(rq) ((rq)->cmd_flags & 1)
+#define rq_data_dir(rq) (((rq)->cmd_flags & 1) != 0)
+
+/*
+ * Driver can handle struct request, if it either has an old style
+ * request_fn defined, or is blk-mq based.
+ */
+static inline bool queue_is_rq_based(struct request_queue *q)
+{
+ return q->request_fn || q->mq_ops;
+}
static inline unsigned int blk_queue_cluster(struct request_queue *q)
{
@@ -683,7 +758,7 @@ struct rq_map_data {
};
struct req_iterator {
- int i;
+ struct bvec_iter iter;
struct bio *bio;
};
@@ -696,10 +771,11 @@ struct req_iterator {
#define rq_for_each_segment(bvl, _rq, _iter) \
__rq_for_each_bio(_iter.bio, _rq) \
- bio_for_each_segment(bvl, _iter.bio, _iter.i)
+ bio_for_each_segment(bvl, _iter.bio, _iter.iter)
-#define rq_iter_last(rq, _iter) \
- (_iter.bio->bi_next == NULL && _iter.i == _iter.bio->bi_vcnt-1)
+#define rq_iter_last(bvec, _iter) \
+ (_iter.bio->bi_next == NULL && \
+ bio_iter_last(bvec, _iter.iter))
#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
@@ -721,6 +797,7 @@ extern void __blk_put_request(struct request_queue *, struct request *);
extern struct request *blk_get_request(struct request_queue *, int, gfp_t);
extern struct request *blk_make_request(struct request_queue *, struct bio *,
gfp_t);
+extern void blk_rq_set_block_pc(struct request *);
extern void blk_requeue_request(struct request_queue *, struct request *);
extern void blk_add_request_payload(struct request *rq, struct page *page,
unsigned int len);
@@ -777,8 +854,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *,
extern int blk_rq_unmap_user(struct bio *);
extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t);
extern int blk_rq_map_user_iov(struct request_queue *, struct request *,
- struct rq_map_data *, struct sg_iovec *, int,
- unsigned int, gfp_t);
+ struct rq_map_data *, const struct sg_iovec *,
+ int, unsigned int, gfp_t);
extern int blk_execute_rq(struct request_queue *, struct gendisk *,
struct request *, int);
extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
@@ -828,7 +905,7 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
unsigned int cmd_flags)
{
if (unlikely(cmd_flags & REQ_DISCARD))
- return q->limits.max_discard_sectors;
+ return min(q->limits.max_discard_sectors, UINT_MAX >> 9);
if (unlikely(cmd_flags & REQ_WRITE_SAME))
return q->limits.max_write_same_sectors;
@@ -836,6 +913,20 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q,
return q->limits.max_sectors;
}
+/*
+ * Return maximum size of a request at given offset. Only valid for
+ * file system requests.
+ */
+static inline unsigned int blk_max_size_offset(struct request_queue *q,
+ sector_t offset)
+{
+ if (!q->limits.chunk_sectors)
+ return q->limits.max_sectors;
+
+ return q->limits.chunk_sectors -
+ (offset & (q->limits.chunk_sectors - 1));
+}
+
static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
{
struct request_queue *q = rq->q;
@@ -843,7 +934,22 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq)
if (unlikely(rq->cmd_type == REQ_TYPE_BLOCK_PC))
return q->limits.max_hw_sectors;
- return blk_queue_get_max_sectors(q, rq->cmd_flags);
+ if (!q->limits.chunk_sectors)
+ return blk_queue_get_max_sectors(q, rq->cmd_flags);
+
+ return min(blk_max_size_offset(q, blk_rq_pos(rq)),
+ blk_queue_get_max_sectors(q, rq->cmd_flags));
+}
+
+static inline unsigned int blk_rq_count_bios(struct request *rq)
+{
+ unsigned int nr_bios = 0;
+ struct bio *bio;
+
+ __rq_for_each_bio(bio, rq)
+ nr_bios++;
+
+ return nr_bios;
}
/*
@@ -868,6 +974,7 @@ extern struct request *blk_fetch_request(struct request_queue *q);
*/
extern bool blk_update_request(struct request *rq, int error,
unsigned int nr_bytes);
+extern void blk_finish_request(struct request *rq, int error);
extern bool blk_end_request(struct request *rq, int error,
unsigned int nr_bytes);
extern void blk_end_request_all(struct request *rq, int error);
@@ -897,6 +1004,7 @@ extern void blk_queue_make_request(struct request_queue *, make_request_fn *);
extern void blk_queue_bounce_limit(struct request_queue *, u64);
extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int);
extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int);
+extern void blk_queue_chunk_sectors(struct request_queue *, unsigned int);
extern void blk_queue_max_segments(struct request_queue *, unsigned short);
extern void blk_queue_max_segment_size(struct request_queue *, unsigned int);
extern void blk_queue_max_discard_sectors(struct request_queue *q,
@@ -951,6 +1059,27 @@ struct request_queue *blk_alloc_queue_node(gfp_t, int);
extern void blk_put_queue(struct request_queue *);
/*
+ * block layer runtime pm functions
+ */
+#ifdef CONFIG_PM_RUNTIME
+extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
+extern int blk_pre_runtime_suspend(struct request_queue *q);
+extern void blk_post_runtime_suspend(struct request_queue *q, int err);
+extern void blk_pre_runtime_resume(struct request_queue *q);
+extern void blk_post_runtime_resume(struct request_queue *q, int err);
+#else
+static inline void blk_pm_runtime_init(struct request_queue *q,
+ struct device *dev) {}
+static inline int blk_pre_runtime_suspend(struct request_queue *q)
+{
+ return -ENOSYS;
+}
+static inline void blk_post_runtime_suspend(struct request_queue *q, int err) {}
+static inline void blk_pre_runtime_resume(struct request_queue *q) {}
+static inline void blk_post_runtime_resume(struct request_queue *q, int err) {}
+#endif
+
+/*
* blk_plug permits building a queue of related requests by holding the I/O
* fragments for a short period. This allows merging of sequential requests
* into single larger request. As the requests are moved from a per-task list to
@@ -963,10 +1092,9 @@ extern void blk_put_queue(struct request_queue *);
* schedule() where blk_schedule_flush_plug() is called.
*/
struct blk_plug {
- unsigned long magic; /* detect uninitialized use-cases */
struct list_head list; /* requests */
+ struct list_head mq_list; /* blk-mq requests */
struct list_head cb_list; /* md requires an unplug callback */
- unsigned int should_sort; /* list to be sorted before flushing? */
};
#define BLK_MAX_REQUEST_COUNT 16
@@ -1003,13 +1131,17 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
{
struct blk_plug *plug = tsk->plug;
- return plug && (!list_empty(&plug->list) || !list_empty(&plug->cb_list));
+ return plug &&
+ (!list_empty(&plug->list) ||
+ !list_empty(&plug->mq_list) ||
+ !list_empty(&plug->cb_list));
}
/*
* tag stuff
*/
-#define blk_rq_tagged(rq) ((rq)->cmd_flags & REQ_QUEUED)
+#define blk_rq_tagged(rq) \
+ ((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED))
extern int blk_queue_start_tag(struct request_queue *, struct request *);
extern struct request *blk_queue_find_tag(struct request_queue *, int);
extern void blk_queue_end_tag(struct request_queue *, struct request *);
@@ -1180,13 +1312,25 @@ static inline int queue_discard_alignment(struct request_queue *q)
static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector)
{
- unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+ unsigned int alignment, granularity, offset;
if (!lim->max_discard_sectors)
return 0;
- return (lim->discard_granularity + lim->discard_alignment - alignment)
- & (lim->discard_granularity - 1);
+ /* Why are these in bytes, not sectors? */
+ alignment = lim->discard_alignment >> 9;
+ granularity = lim->discard_granularity >> 9;
+ if (!granularity)
+ return 0;
+
+ /* Offset of the partition start in 'granularity' sectors */
+ offset = sector_div(sector, granularity);
+
+ /* And why do we do this modulus *again* in blkdev_issue_discard()? */
+ offset = (granularity + alignment - offset) % granularity;
+
+ /* Turn it back into bytes, gaah */
+ return offset << 9;
}
static inline int bdev_discard_alignment(struct block_device *bdev)
@@ -1265,7 +1409,9 @@ static inline void put_dev_sector(Sector p)
}
struct work_struct;
-int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_work(struct work_struct *work);
+int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay);
+int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
/*
@@ -1463,7 +1609,8 @@ static inline bool blk_integrity_is_initialized(struct gendisk *g)
struct block_device_operations {
int (*open) (struct block_device *, fmode_t);
- int (*release) (struct gendisk *, fmode_t);
+ void (*release) (struct gendisk *, fmode_t);
+ int (*rw_page)(struct block_device *, sector_t, struct page *, int rw);
int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
int (*direct_access) (struct block_device *, sector_t,
@@ -1482,7 +1629,13 @@ struct block_device_operations {
extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int,
unsigned long);
+extern int bdev_read_page(struct block_device *, sector_t, struct page *);
+extern int bdev_write_page(struct block_device *, sector_t, struct page *,
+ struct writeback_control *);
#else /* CONFIG_BLOCK */
+
+struct block_device;
+
/*
* stubs for when the block layer is configured out
*/
@@ -1518,6 +1671,12 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
return false;
}
+static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
+ sector_t *error_sector)
+{
+ return 0;
+}
+
#endif /* CONFIG_BLOCK */
#endif