aboutsummaryrefslogtreecommitdiff
path: root/drivers/block/xen-blkback/common.h
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/block/xen-blkback/common.h')
-rw-r--r--drivers/block/xen-blkback/common.h343
1 files changed, 298 insertions, 45 deletions
diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h
index 9e40b283a46..f65b807e323 100644
--- a/drivers/block/xen-blkback/common.h
+++ b/drivers/block/xen-blkback/common.h
@@ -27,7 +27,6 @@
#ifndef __XEN_BLKIF__BACKEND__COMMON_H__
#define __XEN_BLKIF__BACKEND__COMMON_H__
-#include <linux/version.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
@@ -35,6 +34,7 @@
#include <linux/vmalloc.h>
#include <linux/wait.h>
#include <linux/io.h>
+#include <linux/rbtree.h>
#include <asm/setup.h>
#include <asm/pgalloc.h>
#include <asm/hypervisor.h>
@@ -46,10 +46,23 @@
#define DRV_PFX "xen-blkback:"
#define DPRINTK(fmt, args...) \
- pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
+ pr_debug(DRV_PFX "(%s:%d) " fmt ".\n", \
__func__, __LINE__, ##args)
+/*
+ * This is the maximum number of segments that would be allowed in indirect
+ * requests. This value will also be passed to the frontend.
+ */
+#define MAX_INDIRECT_SEGMENTS 256
+
+#define SEGS_PER_INDIRECT_FRAME \
+ (PAGE_SIZE/sizeof(struct blkif_request_segment))
+#define MAX_INDIRECT_PAGES \
+ ((MAX_INDIRECT_SEGMENTS + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+#define INDIRECT_PAGES(_segs) \
+ ((_segs + SEGS_PER_INDIRECT_FRAME - 1)/SEGS_PER_INDIRECT_FRAME)
+
/* Not a real protocol. Used to generate ring structs which contain
* the elements common to all protocols only. This way we get a
* compiler-checkable way to use common struct elements, so we can
@@ -61,32 +74,120 @@ struct blkif_common_response {
char dummy;
};
-/* i386 protocol version */
-#pragma pack(push, 4)
-struct blkif_x86_32_request {
- uint8_t operation; /* BLKIF_OP_??? */
+struct blkif_x86_32_request_rw {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request_discard {
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
+ blkif_vdev_t _pad1; /* was "handle" for read/write requests */
+ uint64_t id; /* private guest value, echoed in resp */
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ uint64_t nr_sectors;
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request_other {
+ uint8_t _pad1;
+ blkif_vdev_t _pad2;
+ uint64_t id; /* private guest value, echoed in resp */
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request_indirect {
+ uint8_t indirect_op;
+ uint16_t nr_segments;
+ uint64_t id;
+ blkif_sector_t sector_number;
+ blkif_vdev_t handle;
+ uint16_t _pad1;
+ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+ /*
+ * The maximum number of indirect segments (and pages) that will
+ * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+ * is also exported to the guest (via xenstore
+ * feature-max-indirect-segments entry), so the frontend knows how
+ * many indirect segments the backend supports.
+ */
+ uint64_t _pad2; /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
+struct blkif_x86_32_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ union {
+ struct blkif_x86_32_request_rw rw;
+ struct blkif_x86_32_request_discard discard;
+ struct blkif_x86_32_request_other other;
+ struct blkif_x86_32_request_indirect indirect;
+ } u;
+} __attribute__((__packed__));
+
+/* i386 protocol version */
+#pragma pack(push, 4)
struct blkif_x86_32_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
int16_t status; /* BLKIF_RSP_??? */
};
#pragma pack(pop)
-
/* x86_64 protocol version */
-struct blkif_x86_64_request {
- uint8_t operation; /* BLKIF_OP_??? */
+
+struct blkif_x86_64_request_rw {
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
- uint64_t __attribute__((__aligned__(8))) id;
+ uint32_t _pad1; /* offsetof(blkif_reqest..,u.rw.id)==8 */
+ uint64_t id;
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
-};
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request_discard {
+ uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
+ blkif_vdev_t _pad1; /* was "handle" for read/write requests */
+ uint32_t _pad2; /* offsetof(blkif_..,u.discard.id)==8 */
+ uint64_t id;
+ blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
+ uint64_t nr_sectors;
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request_other {
+ uint8_t _pad1;
+ blkif_vdev_t _pad2;
+ uint32_t _pad3; /* offsetof(blkif_..,u.discard.id)==8 */
+ uint64_t id; /* private guest value, echoed in resp */
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request_indirect {
+ uint8_t indirect_op;
+ uint16_t nr_segments;
+ uint32_t _pad1; /* offsetof(blkif_..,u.indirect.id)==8 */
+ uint64_t id;
+ blkif_sector_t sector_number;
+ blkif_vdev_t handle;
+ uint16_t _pad2;
+ grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
+ /*
+ * The maximum number of indirect segments (and pages) that will
+ * be used is determined by MAX_INDIRECT_SEGMENTS, this value
+ * is also exported to the guest (via xenstore
+ * feature-max-indirect-segments entry), so the frontend knows how
+ * many indirect segments the backend supports.
+ */
+ uint32_t _pad3; /* make it 64 byte aligned */
+} __attribute__((__packed__));
+
+struct blkif_x86_64_request {
+ uint8_t operation; /* BLKIF_OP_??? */
+ union {
+ struct blkif_x86_64_request_rw rw;
+ struct blkif_x86_64_request_discard discard;
+ struct blkif_x86_64_request_other other;
+ struct blkif_x86_64_request_indirect indirect;
+ } u;
+} __attribute__((__packed__));
+
struct blkif_x86_64_response {
uint64_t __attribute__((__aligned__(8))) id;
uint8_t operation; /* copied from request */
@@ -125,11 +226,36 @@ struct xen_vbd {
struct block_device *bdev;
/* Cached size parameter. */
sector_t size;
- bool flush_support;
+ unsigned int flush_support:1;
+ unsigned int discard_secure:1;
+ unsigned int feature_gnt_persistent:1;
+ unsigned int overflow_max_grants:1;
};
struct backend_info;
+/* Number of available flags */
+#define PERSISTENT_GNT_FLAGS_SIZE 2
+/* This persistent grant is currently in use */
+#define PERSISTENT_GNT_ACTIVE 0
+/*
+ * This persistent grant has been used, this flag is set when we remove the
+ * PERSISTENT_GNT_ACTIVE, to know that this grant has been used recently.
+ */
+#define PERSISTENT_GNT_WAS_ACTIVE 1
+
+/* Number of requests that we can fit in a ring */
+#define XEN_BLKIF_REQS 32
+
+struct persistent_gnt {
+ struct page *page;
+ grant_ref_t gnt;
+ grant_handle_t handle;
+ DECLARE_BITMAP(flags, PERSISTENT_GNT_FLAGS_SIZE);
+ struct rb_node node;
+ struct list_head remove_node;
+};
+
struct xen_blkif {
/* Unique identifier for this interface. */
domid_t domid;
@@ -139,7 +265,7 @@ struct xen_blkif {
/* Comms information. */
enum blkif_protocol blk_protocol;
union blkif_back_rings blk_rings;
- struct vm_struct *blk_ring_area;
+ void *blk_ring;
/* The VBD attached to this interface. */
struct xen_vbd vbd;
/* Back pointer to the backend_info. */
@@ -149,23 +275,81 @@ struct xen_blkif {
atomic_t refcnt;
wait_queue_head_t wq;
+ /* for barrier (drain) requests */
+ struct completion drain_complete;
+ atomic_t drain;
+ atomic_t inflight;
/* One thread per one blkif. */
struct task_struct *xenblkd;
unsigned int waiting_reqs;
+ /* tree to store persistent grants */
+ struct rb_root persistent_gnts;
+ unsigned int persistent_gnt_c;
+ atomic_t persistent_gnt_in_use;
+ unsigned long next_lru;
+
+ /* used by the kworker that offload work from the persistent purge */
+ struct list_head persistent_purge_list;
+ struct work_struct persistent_purge_work;
+
+ /* buffer of free pages to map grant refs */
+ spinlock_t free_pages_lock;
+ int free_pages_num;
+ struct list_head free_pages;
+
+ /* List of all 'pending_req' available */
+ struct list_head pending_free;
+ /* And its spinlock. */
+ spinlock_t pending_free_lock;
+ wait_queue_head_t pending_free_wq;
+
/* statistics */
unsigned long st_print;
- int st_rd_req;
- int st_wr_req;
- int st_oo_req;
- int st_f_req;
- int st_rd_sect;
- int st_wr_sect;
+ unsigned long long st_rd_req;
+ unsigned long long st_wr_req;
+ unsigned long long st_oo_req;
+ unsigned long long st_f_req;
+ unsigned long long st_ds_req;
+ unsigned long long st_rd_sect;
+ unsigned long long st_wr_sect;
- wait_queue_head_t waiting_to_free;
+ struct work_struct free_work;
+ /* Thread shutdown wait queue. */
+ wait_queue_head_t shutdown_wq;
+};
- grant_handle_t shmem_handle;
- grant_ref_t shmem_ref;
+struct seg_buf {
+ unsigned long offset;
+ unsigned int nsec;
+};
+
+struct grant_page {
+ struct page *page;
+ struct persistent_gnt *persistent_gnt;
+ grant_handle_t handle;
+ grant_ref_t gref;
+};
+
+/*
+ * Each outstanding request that we've passed to the lower device layers has a
+ * 'pending_req' allocated to it. Each buffer_head that completes decrements
+ * the pendcnt towards zero. When it hits zero, the specified domain has a
+ * response queued for it, with the saved 'id' passed back.
+ */
+struct pending_req {
+ struct xen_blkif *blkif;
+ u64 id;
+ int nr_pages;
+ atomic_t pendcnt;
+ unsigned short operation;
+ int status;
+ struct list_head free_list;
+ struct grant_page *segments[MAX_INDIRECT_SEGMENTS];
+ /* Indirect descriptors */
+ struct grant_page *indirect_pages[MAX_INDIRECT_PAGES];
+ struct seg_buf seg[MAX_INDIRECT_SEGMENTS];
+ struct bio *biolist[MAX_INDIRECT_SEGMENTS];
};
@@ -177,12 +361,12 @@ struct xen_blkif {
#define xen_blkif_put(_b) \
do { \
if (atomic_dec_and_test(&(_b)->refcnt)) \
- wake_up(&(_b)->waiting_to_free);\
+ schedule_work(&(_b)->free_work);\
} while (0)
struct phys_req {
unsigned short dev;
- unsigned short nr_sects;
+ blkif_sector_t nr_sects;
struct block_device *bdev;
blkif_sector_t sector_number;
};
@@ -192,42 +376,111 @@ int xen_blkif_xenbus_init(void);
irqreturn_t xen_blkif_be_int(int irq, void *dev_id);
int xen_blkif_schedule(void *arg);
+int xen_blkif_purge_persistent(void *arg);
+void xen_blkbk_free_caches(struct xen_blkif *blkif);
int xen_blkbk_flush_diskcache(struct xenbus_transaction xbt,
struct backend_info *be, int state);
+int xen_blkbk_barrier(struct xenbus_transaction xbt,
+ struct backend_info *be, int state);
struct xenbus_device *xen_blkbk_xenbus(struct backend_info *be);
+void xen_blkbk_unmap_purged_grants(struct work_struct *work);
static inline void blkif_get_x86_32_req(struct blkif_request *dst,
struct blkif_x86_32_request *src)
{
- int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
dst->operation = src->operation;
- dst->nr_segments = src->nr_segments;
- dst->handle = src->handle;
- dst->id = src->id;
- dst->u.rw.sector_number = src->sector_number;
- barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->seg[i];
+ switch (src->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.nr_segments = src->u.rw.nr_segments;
+ dst->u.rw.handle = src->u.rw.handle;
+ dst->u.rw.id = src->u.rw.id;
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ barrier();
+ if (n > dst->u.rw.nr_segments)
+ n = dst->u.rw.nr_segments;
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+ case BLKIF_OP_INDIRECT:
+ dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+ dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+ dst->u.indirect.handle = src->u.indirect.handle;
+ dst->u.indirect.id = src->u.indirect.id;
+ dst->u.indirect.sector_number = src->u.indirect.sector_number;
+ barrier();
+ j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+ for (i = 0; i < j; i++)
+ dst->u.indirect.indirect_grefs[i] =
+ src->u.indirect.indirect_grefs[i];
+ break;
+ default:
+ /*
+ * Don't know how to translate this op. Only get the
+ * ID so failure can be reported to the frontend.
+ */
+ dst->u.other.id = src->u.other.id;
+ break;
+ }
}
static inline void blkif_get_x86_64_req(struct blkif_request *dst,
struct blkif_x86_64_request *src)
{
- int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST;
+ int i, n = BLKIF_MAX_SEGMENTS_PER_REQUEST, j;
dst->operation = src->operation;
- dst->nr_segments = src->nr_segments;
- dst->handle = src->handle;
- dst->id = src->id;
- dst->u.rw.sector_number = src->sector_number;
- barrier();
- if (n > dst->nr_segments)
- n = dst->nr_segments;
- for (i = 0; i < n; i++)
- dst->u.rw.seg[i] = src->seg[i];
+ switch (src->operation) {
+ case BLKIF_OP_READ:
+ case BLKIF_OP_WRITE:
+ case BLKIF_OP_WRITE_BARRIER:
+ case BLKIF_OP_FLUSH_DISKCACHE:
+ dst->u.rw.nr_segments = src->u.rw.nr_segments;
+ dst->u.rw.handle = src->u.rw.handle;
+ dst->u.rw.id = src->u.rw.id;
+ dst->u.rw.sector_number = src->u.rw.sector_number;
+ barrier();
+ if (n > dst->u.rw.nr_segments)
+ n = dst->u.rw.nr_segments;
+ for (i = 0; i < n; i++)
+ dst->u.rw.seg[i] = src->u.rw.seg[i];
+ break;
+ case BLKIF_OP_DISCARD:
+ dst->u.discard.flag = src->u.discard.flag;
+ dst->u.discard.id = src->u.discard.id;
+ dst->u.discard.sector_number = src->u.discard.sector_number;
+ dst->u.discard.nr_sectors = src->u.discard.nr_sectors;
+ break;
+ case BLKIF_OP_INDIRECT:
+ dst->u.indirect.indirect_op = src->u.indirect.indirect_op;
+ dst->u.indirect.nr_segments = src->u.indirect.nr_segments;
+ dst->u.indirect.handle = src->u.indirect.handle;
+ dst->u.indirect.id = src->u.indirect.id;
+ dst->u.indirect.sector_number = src->u.indirect.sector_number;
+ barrier();
+ j = min(MAX_INDIRECT_PAGES, INDIRECT_PAGES(dst->u.indirect.nr_segments));
+ for (i = 0; i < j; i++)
+ dst->u.indirect.indirect_grefs[i] =
+ src->u.indirect.indirect_grefs[i];
+ break;
+ default:
+ /*
+ * Don't know how to translate this op. Only get the
+ * ID so failure can be reported to the frontend.
+ */
+ dst->u.other.id = src->u.other.id;
+ break;
+ }
}
#endif /* __XEN_BLKIF__BACKEND__COMMON_H__ */