aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig55
-rw-r--r--drivers/md/Makefile6
-rw-r--r--drivers/md/dm-bio-prison.c155
-rw-r--r--drivers/md/dm-bio-prison.h58
-rw-r--r--drivers/md/dm-bufio.c2
-rw-r--r--drivers/md/dm-cache-block-types.h54
-rw-r--r--drivers/md/dm-cache-metadata.c1146
-rw-r--r--drivers/md/dm-cache-metadata.h142
-rw-r--r--drivers/md/dm-cache-policy-cleaner.c464
-rw-r--r--drivers/md/dm-cache-policy-internal.h124
-rw-r--r--drivers/md/dm-cache-policy-mq.c1195
-rw-r--r--drivers/md/dm-cache-policy.c161
-rw-r--r--drivers/md/dm-cache-policy.h228
-rw-r--r--drivers/md/dm-cache-target.c2584
-rw-r--r--drivers/md/dm-crypt.c45
-rw-r--r--drivers/md/dm-delay.c12
-rw-r--r--drivers/md/dm-flakey.c11
-rw-r--r--drivers/md/dm-ioctl.c166
-rw-r--r--drivers/md/dm-kcopyd.c121
-rw-r--r--drivers/md/dm-linear.c13
-rw-r--r--drivers/md/dm-mpath.c12
-rw-r--r--drivers/md/dm-raid.c10
-rw-r--r--drivers/md/dm-raid1.c17
-rw-r--r--drivers/md/dm-snap.c33
-rw-r--r--drivers/md/dm-stripe.c27
-rw-r--r--drivers/md/dm-table.c11
-rw-r--r--drivers/md/dm-target.c2
-rw-r--r--drivers/md/dm-thin-metadata.c12
-rw-r--r--drivers/md/dm-thin.c277
-rw-r--r--drivers/md/dm-verity.c8
-rw-r--r--drivers/md/dm-zero.c2
-rw-r--r--drivers/md/dm.c452
-rw-r--r--drivers/md/persistent-data/Kconfig2
-rw-r--r--drivers/md/persistent-data/Makefile2
-rw-r--r--drivers/md/persistent-data/dm-array.c808
-rw-r--r--drivers/md/persistent-data/dm-array.h166
-rw-r--r--drivers/md/persistent-data/dm-bitset.c163
-rw-r--r--drivers/md/persistent-data/dm-bitset.h165
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c1
-rw-r--r--drivers/md/persistent-data/dm-btree-internal.h1
-rw-r--r--drivers/md/persistent-data/dm-btree-spine.c7
-rw-r--r--drivers/md/persistent-data/dm-btree.c52
-rw-r--r--drivers/md/persistent-data/dm-btree.h15
43 files changed, 8407 insertions, 580 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 91a02eeeb31..e30b490055a 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -210,7 +210,7 @@ config DM_DEBUG
config DM_BUFIO
tristate
- depends on BLK_DEV_DM && EXPERIMENTAL
+ depends on BLK_DEV_DM
---help---
This interface allows you to do buffered I/O on a device and acts
as a cache, holding recently-read blocks in memory and performing
@@ -218,7 +218,7 @@ config DM_BUFIO
config DM_BIO_PRISON
tristate
- depends on BLK_DEV_DM && EXPERIMENTAL
+ depends on BLK_DEV_DM
---help---
Some bio locking schemes used by other device-mapper targets
including thin provisioning.
@@ -251,8 +251,8 @@ config DM_SNAPSHOT
Allow volume managers to take writable snapshots of a device.
config DM_THIN_PROVISIONING
- tristate "Thin provisioning target (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "Thin provisioning target"
+ depends on BLK_DEV_DM
select DM_PERSISTENT_DATA
select DM_BIO_PRISON
---help---
@@ -268,6 +268,37 @@ config DM_DEBUG_BLOCK_STACK_TRACING
If unsure, say N.
+config DM_CACHE
+ tristate "Cache target (EXPERIMENTAL)"
+ depends on BLK_DEV_DM
+ default n
+ select DM_PERSISTENT_DATA
+ select DM_BIO_PRISON
+ ---help---
+ dm-cache attempts to improve performance of a block device by
+ moving frequently used data to a smaller, higher performance
+ device. Different 'policy' plugins can be used to change the
+ algorithms used to select which blocks are promoted, demoted,
+ cleaned etc. It supports writeback and writethrough modes.
+
+config DM_CACHE_MQ
+ tristate "MQ Cache Policy (EXPERIMENTAL)"
+ depends on DM_CACHE
+ default y
+ ---help---
+ A cache policy that uses a multiqueue ordered by recent hit
+ count to select which blocks should be promoted and demoted.
+ This is meant to be a general purpose policy. It prioritises
+ reads over writes.
+
+config DM_CACHE_CLEANER
+ tristate "Cleaner Cache Policy (EXPERIMENTAL)"
+ depends on DM_CACHE
+ default y
+ ---help---
+ A simple cache policy that writes back all data to the
+ origin. Used when decommissioning a dm-cache.
+
config DM_MIRROR
tristate "Mirror target"
depends on BLK_DEV_DM
@@ -302,8 +333,8 @@ config DM_RAID
in one of the available parity distribution methods.
config DM_LOG_USERSPACE
- tristate "Mirror userspace logging (EXPERIMENTAL)"
- depends on DM_MIRROR && EXPERIMENTAL && NET
+ tristate "Mirror userspace logging"
+ depends on DM_MIRROR && NET
select CONNECTOR
---help---
The userspace logging module provides a mechanism for
@@ -350,8 +381,8 @@ config DM_MULTIPATH_ST
If unsure, say N.
config DM_DELAY
- tristate "I/O delaying target (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "I/O delaying target"
+ depends on BLK_DEV_DM
---help---
A target that delays reads and/or writes and can send
them to different devices. Useful for testing.
@@ -365,14 +396,14 @@ config DM_UEVENT
Generate udev events for DM events.
config DM_FLAKEY
- tristate "Flakey target (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "Flakey target"
+ depends on BLK_DEV_DM
---help---
A target that intermittently fails I/O for debugging purposes.
config DM_VERITY
- tristate "Verity target support (EXPERIMENTAL)"
- depends on BLK_DEV_DM && EXPERIMENTAL
+ tristate "Verity target support"
+ depends on BLK_DEV_DM
select CRYPTO
select CRYPTO_HASH
select DM_BUFIO
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 94dce8b4932..7ceeaefc0e9 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -11,6 +11,9 @@ dm-mirror-y += dm-raid1.o
dm-log-userspace-y \
+= dm-log-userspace-base.o dm-log-userspace-transfer.o
dm-thin-pool-y += dm-thin.o dm-thin-metadata.o
+dm-cache-y += dm-cache-target.o dm-cache-metadata.o dm-cache-policy.o
+dm-cache-mq-y += dm-cache-policy-mq.o
+dm-cache-cleaner-y += dm-cache-policy-cleaner.o
md-mod-y += md.o bitmap.o
raid456-y += raid5.o
@@ -44,6 +47,9 @@ obj-$(CONFIG_DM_ZERO) += dm-zero.o
obj-$(CONFIG_DM_RAID) += dm-raid.o
obj-$(CONFIG_DM_THIN_PROVISIONING) += dm-thin-pool.o
obj-$(CONFIG_DM_VERITY) += dm-verity.o
+obj-$(CONFIG_DM_CACHE) += dm-cache.o
+obj-$(CONFIG_DM_CACHE_MQ) += dm-cache-mq.o
+obj-$(CONFIG_DM_CACHE_CLEANER) += dm-cache-cleaner.o
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c
index d9d3f1c7b66..85f0b707425 100644
--- a/drivers/md/dm-bio-prison.c
+++ b/drivers/md/dm-bio-prison.c
@@ -14,14 +14,6 @@
/*----------------------------------------------------------------*/
-struct dm_bio_prison_cell {
- struct hlist_node list;
- struct dm_bio_prison *prison;
- struct dm_cell_key key;
- struct bio *holder;
- struct bio_list bios;
-};
-
struct dm_bio_prison {
spinlock_t lock;
mempool_t *cell_pool;
@@ -87,6 +79,19 @@ void dm_bio_prison_destroy(struct dm_bio_prison *prison)
}
EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
+struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
+{
+ return mempool_alloc(prison->cell_pool, gfp);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
+
+void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell)
+{
+ mempool_free(cell, prison->cell_pool);
+}
+EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
+
static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key)
{
const unsigned long BIG_PRIME = 4294967291UL;
@@ -114,91 +119,95 @@ static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket,
return NULL;
}
-/*
- * This may block if a new cell needs allocating. You must ensure that
- * cells will be unlocked even if the calling thread is blocked.
- *
- * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
- */
-int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
- struct bio *inmate, struct dm_bio_prison_cell **ref)
+static void __setup_new_cell(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *holder,
+ uint32_t hash,
+ struct dm_bio_prison_cell *cell)
{
- int r = 1;
- unsigned long flags;
- uint32_t hash = hash_key(prison, key);
- struct dm_bio_prison_cell *cell, *cell2;
-
- BUG_ON(hash > prison->nr_buckets);
-
- spin_lock_irqsave(&prison->lock, flags);
-
- cell = __search_bucket(prison->cells + hash, key);
- if (cell) {
- bio_list_add(&cell->bios, inmate);
- goto out;
- }
+ memcpy(&cell->key, key, sizeof(cell->key));
+ cell->holder = holder;
+ bio_list_init(&cell->bios);
+ hlist_add_head(&cell->list, prison->cells + hash);
+}
- /*
- * Allocate a new cell
- */
- spin_unlock_irqrestore(&prison->lock, flags);
- cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO);
- spin_lock_irqsave(&prison->lock, flags);
+static int __bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result)
+{
+ uint32_t hash = hash_key(prison, key);
+ struct dm_bio_prison_cell *cell;
- /*
- * We've been unlocked, so we have to double check that
- * nobody else has inserted this cell in the meantime.
- */
cell = __search_bucket(prison->cells + hash, key);
if (cell) {
- mempool_free(cell2, prison->cell_pool);
- bio_list_add(&cell->bios, inmate);
- goto out;
+ if (inmate)
+ bio_list_add(&cell->bios, inmate);
+ *cell_result = cell;
+ return 1;
}
- /*
- * Use new cell.
- */
- cell = cell2;
-
- cell->prison = prison;
- memcpy(&cell->key, key, sizeof(cell->key));
- cell->holder = inmate;
- bio_list_init(&cell->bios);
- hlist_add_head(&cell->list, prison->cells + hash);
+ __setup_new_cell(prison, key, inmate, hash, cell_prealloc);
+ *cell_result = cell_prealloc;
+ return 0;
+}
- r = 0;
+static int bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result)
+{
+ int r;
+ unsigned long flags;
-out:
+ spin_lock_irqsave(&prison->lock, flags);
+ r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result);
spin_unlock_irqrestore(&prison->lock, flags);
- *ref = cell;
-
return r;
}
+
+int dm_bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result)
+{
+ return bio_detain(prison, key, inmate, cell_prealloc, cell_result);
+}
EXPORT_SYMBOL_GPL(dm_bio_detain);
+int dm_get_cell(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result)
+{
+ return bio_detain(prison, key, NULL, cell_prealloc, cell_result);
+}
+EXPORT_SYMBOL_GPL(dm_get_cell);
+
/*
* @inmates must have been initialised prior to this call
*/
-static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+static void __cell_release(struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates)
{
- struct dm_bio_prison *prison = cell->prison;
-
hlist_del(&cell->list);
if (inmates) {
- bio_list_add(inmates, cell->holder);
+ if (cell->holder)
+ bio_list_add(inmates, cell->holder);
bio_list_merge(inmates, &cell->bios);
}
-
- mempool_free(cell, prison->cell_pool);
}
-void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios)
+void dm_cell_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *bios)
{
unsigned long flags;
- struct dm_bio_prison *prison = cell->prison;
spin_lock_irqsave(&prison->lock, flags);
__cell_release(cell, bios);
@@ -209,20 +218,18 @@ EXPORT_SYMBOL_GPL(dm_cell_release);
/*
* Sometimes we don't want the holder, just the additional bios.
*/
-static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+static void __cell_release_no_holder(struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates)
{
- struct dm_bio_prison *prison = cell->prison;
-
hlist_del(&cell->list);
bio_list_merge(inmates, &cell->bios);
-
- mempool_free(cell, prison->cell_pool);
}
-void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates)
+void dm_cell_release_no_holder(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates)
{
unsigned long flags;
- struct dm_bio_prison *prison = cell->prison;
spin_lock_irqsave(&prison->lock, flags);
__cell_release_no_holder(cell, inmates);
@@ -230,9 +237,9 @@ void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list
}
EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
-void dm_cell_error(struct dm_bio_prison_cell *cell)
+void dm_cell_error(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell)
{
- struct dm_bio_prison *prison = cell->prison;
struct bio_list bios;
struct bio *bio;
unsigned long flags;
diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h
index 53d1a7a84e2..3f833190ead 100644
--- a/drivers/md/dm-bio-prison.h
+++ b/drivers/md/dm-bio-prison.h
@@ -22,7 +22,6 @@
* subsequently unlocked the bios become available.
*/
struct dm_bio_prison;
-struct dm_bio_prison_cell;
/* FIXME: this needs to be more abstract */
struct dm_cell_key {
@@ -31,21 +30,62 @@ struct dm_cell_key {
dm_block_t block;
};
+/*
+ * Treat this as opaque, only in header so callers can manage allocation
+ * themselves.
+ */
+struct dm_bio_prison_cell {
+ struct hlist_node list;
+ struct dm_cell_key key;
+ struct bio *holder;
+ struct bio_list bios;
+};
+
struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells);
void dm_bio_prison_destroy(struct dm_bio_prison *prison);
/*
- * This may block if a new cell needs allocating. You must ensure that
- * cells will be unlocked even if the calling thread is blocked.
+ * These two functions just wrap a mempool. This is a transitory step:
+ * Eventually all bio prison clients should manage their own cell memory.
*
- * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
+ * Like mempool_alloc(), dm_bio_prison_alloc_cell() can only fail if called
+ * in interrupt context or passed GFP_NOWAIT.
*/
-int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key,
- struct bio *inmate, struct dm_bio_prison_cell **ref);
+struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison,
+ gfp_t gfp);
+void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell);
-void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios);
-void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates);
-void dm_cell_error(struct dm_bio_prison_cell *cell);
+/*
+ * Creates, or retrieves a cell for the given key.
+ *
+ * Returns 1 if pre-existing cell returned, zero if new cell created using
+ * @cell_prealloc.
+ */
+int dm_get_cell(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result);
+
+/*
+ * An atomic op that combines retrieving a cell, and adding a bio to it.
+ *
+ * Returns 1 if the cell was already held, 0 if @inmate is the new holder.
+ */
+int dm_bio_detain(struct dm_bio_prison *prison,
+ struct dm_cell_key *key,
+ struct bio *inmate,
+ struct dm_bio_prison_cell *cell_prealloc,
+ struct dm_bio_prison_cell **cell_result);
+
+void dm_cell_release(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *bios);
+void dm_cell_release_no_holder(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell,
+ struct bio_list *inmates);
+void dm_cell_error(struct dm_bio_prison *prison,
+ struct dm_bio_prison_cell *cell);
/*----------------------------------------------------------------*/
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 93205e32a00..3c955e10a61 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1192,7 +1192,7 @@ EXPORT_SYMBOL_GPL(dm_bufio_write_dirty_buffers);
int dm_bufio_issue_flush(struct dm_bufio_client *c)
{
struct dm_io_request io_req = {
- .bi_rw = REQ_FLUSH,
+ .bi_rw = WRITE_FLUSH,
.mem.type = DM_IO_KMEM,
.mem.ptr.addr = NULL,
.client = c->dm_io,
diff --git a/drivers/md/dm-cache-block-types.h b/drivers/md/dm-cache-block-types.h
new file mode 100644
index 00000000000..bed4ad4e1b7
--- /dev/null
+++ b/drivers/md/dm-cache-block-types.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#ifndef DM_CACHE_BLOCK_TYPES_H
+#define DM_CACHE_BLOCK_TYPES_H
+
+#include "persistent-data/dm-block-manager.h"
+
+/*----------------------------------------------------------------*/
+
+/*
+ * It's helpful to get sparse to differentiate between indexes into the
+ * origin device, indexes into the cache device, and indexes into the
+ * discard bitset.
+ */
+
+typedef dm_block_t __bitwise__ dm_oblock_t;
+typedef uint32_t __bitwise__ dm_cblock_t;
+typedef dm_block_t __bitwise__ dm_dblock_t;
+
+static inline dm_oblock_t to_oblock(dm_block_t b)
+{
+ return (__force dm_oblock_t) b;
+}
+
+static inline dm_block_t from_oblock(dm_oblock_t b)
+{
+ return (__force dm_block_t) b;
+}
+
+static inline dm_cblock_t to_cblock(uint32_t b)
+{
+ return (__force dm_cblock_t) b;
+}
+
+static inline uint32_t from_cblock(dm_cblock_t b)
+{
+ return (__force uint32_t) b;
+}
+
+static inline dm_dblock_t to_dblock(dm_block_t b)
+{
+ return (__force dm_dblock_t) b;
+}
+
+static inline dm_block_t from_dblock(dm_dblock_t b)
+{
+ return (__force dm_block_t) b;
+}
+
+#endif /* DM_CACHE_BLOCK_TYPES_H */
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
new file mode 100644
index 00000000000..fbd3625f274
--- /dev/null
+++ b/drivers/md/dm-cache-metadata.c
@@ -0,0 +1,1146 @@
+/*
+ * Copyright (C) 2012 Red Hat, Inc.
+ *
+ * This file is released under the GPL.
+ */
+
+#include "dm-cache-metadata.h"
+
+#include "persistent-data/dm-array.h"
+#include "persistent-data/dm-bitset.h"
+#include "persistent-data/dm-space-map.h"
+#include "persistent-data/dm-space-map-disk.h"
+#include "persistent-data/dm-transaction-manager.h"
+
+#include <linux/device-mapper.h>
+
+/*----------------------------------------------------------------*/
+
+#define DM_MSG_PREFIX "cache metadata"
+
+#define CACHE_SUPERBLOCK_MAGIC 06142003
+#define CACHE_SUPERBLOCK_LOCATION 0
+#define CACHE_VERSION 1
+#define CACHE_METADATA_CACHE_SIZE 64
+
+/*
+ * 3 for btree insert +
+ * 2 for btree lookup used within space map
+ */
+#define CACHE_MAX_CONCURRENT_LOCKS 5
+#define SPACE_MAP_ROOT_SIZE 128
+
+enum superblock_flag_bits {
+ /* for spotting crashes that would invalidate the dirty bitset */
+ CLEAN_SHUTDOWN,
+};
+
+/*
+ * Each mapping from cache block -> origin block carries a set of flags.
+ */
+enum mapping_bits {
+ /*
+ * A valid mapping. Because we're using an array we clear this
+ * flag for an non existant mapping.
+ */
+ M_VALID = 1,
+
+ /*
+ * The data on the cache is different from that on the origin.
+ */
+ M_DIRTY = 2
+};
+
+struct cache_disk_superblock {
+ __le32 csum;
+ __le32 flags;
+ __le64 blocknr;
+
+ __u8 uuid[16];
+ __le64 magic;
+ __le32 version;
+
+ __u8 policy_name[CACHE_POLICY_NAME_SIZE];
+ __le32 policy_hint_size;
+
+ __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE];
+ __le64 mapping_root;
+ __le64 hint_root;
+
+ __le64 discard_root;
+ __le64 discard_block_size;
+ __le64 discard_nr_blocks;
+
+ __le32 data_block_size;
+ __le32 metadata_block_size;
+ __le32 cache_blocks;
+
+ __le32 compat_flags;
+ __le32 compat_ro_flags;
+ __le32 incompat_flags;
+
+ __le32 read_hits;
+ __le32 read_misses;
+ __le32 write_hits;
+ __le32 write_misses;
+} __packed;
+
+struct dm_cache_metadata {
+ struct block_device *bdev;
+ struct dm_block_manager *bm;
+ struct dm_space_map *metadata_sm;
+ struct dm_transaction_manager *tm;
+
+ struct dm_array_info info;
+ struct dm_array_info hint_info;
+ struct dm_disk_bitset discard_info;
+
+ struct rw_semaphore root_lock;
+ dm_block_t root;
+ dm_block_t hint_root;
+ dm_block_t discard_root;
+
+ sector_t discard_block_size;
+ dm_dblock_t discard_nr_blocks;
+
+ sector_t data_block_size;
+ dm_cblock_t cache_blocks;
+ bool changed:1;
+ bool clean_when_opened:1;
+
+ char policy_name[CACHE_POLICY_NAME_SIZE];
+ size_t policy_hint_size;
+ struct dm_cache_statistics stats;
+};
+
+/*-------------------------------------------------------------------
+ * superblock validator
+ *-----------------------------------------------------------------*/
+
+#define SUPERBLOCK_CSUM_XOR 9031977
+
+static void sb_prepare_for_write(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t sb_block_size)
+{
+ struct cache_disk_superblock *disk_super = dm_block_data(b);
+
+ disk_super->blocknr = cpu_to_le64(dm_block_location(b));
+ disk_super->csum = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
+ sb_block_size - sizeof(__le32),
+ SUPERBLOCK_CSUM_XOR));
+}
+
+static int sb_check(struct dm_block_validator *v,
+ struct dm_block *b,
+ size_t sb_block_size)
+{
+ struct cache_disk_superblock *disk_super = dm_block_data(b);
+ __le32 csum_le;
+
+ if (dm_block_location(b) != le64_to_cpu(disk_super->blocknr)) {
+ DMERR("sb_check failed: blocknr %llu: wanted %llu",
+ le64_to_cpu(disk_super->blocknr),
+ (unsigned long long)dm_block_location(b));
+ return -ENOTBLK;
+ }
+
+ if (le64_to_cpu(disk_super->magic) != CACHE_SUPERBLOCK_MAGIC) {
+ DMERR("sb_check failed: magic %llu: wanted %llu",
+ le64_to_cpu(disk_super->magic),
+ (unsigned long long)CACHE_SUPERBLOCK_MAGIC);
+ return -EILSEQ;
+ }
+
+ csum_le = cpu_to_le32(dm_bm_checksum(&disk_super->flags,
+ sb_block_size - sizeof(__le32),
+ SUPERBLOCK_CSUM_XOR));
+ if (csum_le != disk_super->csum) {
+ DMERR("sb_check failed: csum %u: wanted %u",
+ le32_to_cpu(csum_le), le32_to_cpu(disk_super->csum));
+ return -EILSEQ;
+ }
+
+ return 0;
+}
+
+static struct dm_block_validator sb_validator = {
+ .name = "superblock",
+ .prepare_for_write = sb_prepare_for_write,
+ .check = sb_check
+};
+
+/*----------------------------------------------------------------*/
+
+static int superblock_read_lock(struct dm_cache_metadata *cmd,
+ struct dm_block **sblock)
+{
+ return dm_bm_read_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+ &sb_validator, sblock);
+}
+
+static int superblock_lock_zero(struct dm_cache_metadata *cmd,
+ struct dm_block **sblock)
+{
+ return dm_bm_write_lock_zero(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+ &sb_validator, sblock);
+}
+
+static int superblock_lock(struct dm_cache_metadata *cmd,
+ struct dm_block **sblock)
+{
+ return dm_bm_write_lock(cmd->bm, CACHE_SUPERBLOCK_LOCATION,
+ &sb_validator, sblock);
+}
+
+/*----------------------------------------------------------------*/
+
+static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result)
+{
+ int r;
+ unsigned i;
+ struct dm_block *b;
+ __le64 *data_le, zero = cpu_to_le64(0);
+ unsigned sb_block_size = dm_bm_block_size(bm) / sizeof(__le64);
+
+ /*
+ * We can't use a validator here - it may be all zeroes.
+ */
+ r = dm_bm_read_lock(bm, CACHE_SUPERBLOCK_LOCATION, NULL, &b);
+ if (r)
+ return r;
+
+ data_le = dm_block_data(b);
+ *result = 1;
+ for (i = 0; i < sb_block_size; i++) {
+ if (data_le[i] != zero) {
+ *result = 0;
+ break;
+ }
+ }
+
+ return dm_bm_unlock(b);
+}
+
+static void __setup_mapping_info(struct dm_cache_metadata *cmd)
+{
+ struct dm_btree_value_type vt;
+
+ vt.context = NULL;
+ vt.size = sizeof(__le64);
+ vt.inc = NULL;
+ vt.dec = NULL;
+ vt.equal = NULL;
+ dm_array_info_init(&cmd->info, cmd->tm, &vt);
+
+ if (cmd->policy_hint_size) {
+ vt.size = sizeof(__le32);
+ dm_array_info_init(&cmd->hint_info, cmd->tm, &vt);
+ }
+}
+
+static int __write_initial_superblock(struct dm_cache_metadata *cmd)
+{
+ int r;
+ struct dm_block *sblock;
+ size_t metadata_len;
+ struct cache_disk_superblock *disk_super;
+ sector_t bdev_size = i_size_read(cmd->bdev->bd_inode) >> SECTOR_SHIFT;
+
+ /* FIXME: see if we can lose the max sectors limit */
+ if (bdev_size > DM_CACHE_METADATA_MAX_SECTORS)
+ bdev_size = DM_CACHE_METADATA_MAX_SECTORS;
+
+ r = dm_sm_root_size(cmd->metadata_sm, &metadata_len);
+ if (r < 0)
+ return r;
+
+ r = dm_tm_pre_commit(cmd->tm);
+ if (r < 0)
+ return r;
+
+ r = superblock_lock_zero(cmd, &sblock);
+ if (r)
+ return r;
+
+ disk_super = dm_block_data(sblock);
+ disk_super->flags = 0;
+ memset(disk_super->uuid, 0, sizeof(disk_super->uuid));
+ disk_super->magic = cpu_to_le64(CACHE_SUPERBLOCK_MAGIC);
+ disk_super->version = cpu_to_le32(CACHE_VERSION);
+ memset(disk_super->policy_name, 0, CACHE_POLICY_NAME_SIZE);
+ disk_super->policy_hint_size = 0;
+
+ r = dm_sm_copy