aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/persistent-data
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r--drivers/md/persistent-data/Kconfig10
-rw-r--r--drivers/md/persistent-data/dm-array.c15
-rw-r--r--drivers/md/persistent-data/dm-bitset.c10
-rw-r--r--drivers/md/persistent-data/dm-bitset.h1
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c23
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h10
-rw-r--r--drivers/md/persistent-data/dm-btree.c33
-rw-r--r--drivers/md/persistent-data/dm-btree.h8
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c38
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c18
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c155
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.h11
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c5
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.h17
14 files changed, 257 insertions, 97 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index 19b26879541..0c2dec7aec2 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA
---help---
Library providing immutable on-disk data structure support for
device-mapper targets such as the thin provisioning target.
+
+config DM_DEBUG_BLOCK_STACK_TRACING
+ boolean "Keep stack trace of persistent data block lock holders"
+ depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA
+ select STACKTRACE
+ ---help---
+ Enable this for messages that may help debug problems with the
+ block manager locking used by thin provisioning and caching.
+
+ If unsure, say N.
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 172147eb1d4..1d75b1dc1e2 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
* The shadow op will often be a noop. Only insert if it really
* copied data.
*/
- if (dm_block_location(*block) != b)
+ if (dm_block_location(*block) != b) {
+ /*
+ * dm_tm_shadow_block will have already decremented the old
+ * block, but it is still referenced by the btree. We
+ * increment to stop the insert decrementing it below zero
+ * when overwriting the old value.
+ */
+ dm_tm_inc(info->btree_info.tm, b);
r = insert_ablock(info, index, *block, root);
+ }
return r;
}
@@ -509,15 +517,18 @@ static int grow_add_tail_block(struct resize *resize)
static int grow_needs_more_blocks(struct resize *resize)
{
int r;
+ unsigned old_nr_blocks = resize->old_nr_full_blocks;
if (resize->old_nr_entries_in_last_block > 0) {
+ old_nr_blocks++;
+
r = grow_extend_tail_block(resize, resize->max_entries);
if (r)
return r;
}
r = insert_full_ablocks(resize->info, resize->size_of_block,
- resize->old_nr_full_blocks,
+ old_nr_blocks,
resize->new_nr_full_blocks,
resize->max_entries, resize->value,
&resize->root);
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c
index cd9a86d4cdf..36f7cc2c710 100644
--- a/drivers/md/persistent-data/dm-bitset.c
+++ b/drivers/md/persistent-data/dm-bitset.c
@@ -65,7 +65,7 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
int r;
__le64 value;
- if (!info->current_index_set)
+ if (!info->current_index_set || !info->dirty)
return 0;
value = cpu_to_le64(info->current_bits);
@@ -77,6 +77,8 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
return r;
info->current_index_set = false;
+ info->dirty = false;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_flush);
@@ -94,6 +96,8 @@ static int read_bits(struct dm_disk_bitset *info, dm_block_t root,
info->current_bits = le64_to_cpu(value);
info->current_index_set = true;
info->current_index = array_index;
+ info->dirty = false;
+
return 0;
}
@@ -126,6 +130,8 @@ int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
return r;
set_bit(b, (unsigned long *) &info->current_bits);
+ info->dirty = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_set_bit);
@@ -141,6 +147,8 @@ int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
return r;
clear_bit(b, (unsigned long *) &info->current_bits);
+ info->dirty = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_clear_bit);
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h
index e1b9bea14aa..c2287d672ef 100644
--- a/drivers/md/persistent-data/dm-bitset.h
+++ b/drivers/md/persistent-data/dm-bitset.h
@@ -71,6 +71,7 @@ struct dm_disk_bitset {
uint64_t current_bits;
bool current_index_set:1;
+ bool dirty:1;
};
/*
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index a7e8bf29638..087411c95ff 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -104,7 +104,7 @@ static int __check_holder(struct block_lock *lock)
for (i = 0; i < MAX_HOLDERS; i++) {
if (lock->holders[i] == current) {
- DMERR("recursive lock detected in pool metadata");
+ DMERR("recursive lock detected in metadata");
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
DMERR("previously held here:");
print_stack_trace(lock->traces + i, 4);
@@ -595,25 +595,14 @@ int dm_bm_unlock(struct dm_block *b)
}
EXPORT_SYMBOL_GPL(dm_bm_unlock);
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
- struct dm_block *superblock)
+int dm_bm_flush(struct dm_block_manager *bm)
{
- int r;
-
if (bm->read_only)
return -EPERM;
- r = dm_bufio_write_dirty_buffers(bm->bufio);
- if (unlikely(r)) {
- dm_bm_unlock(superblock);
- return r;
- }
-
- dm_bm_unlock(superblock);
-
return dm_bufio_write_dirty_buffers(bm->bufio);
}
-EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock);
+EXPORT_SYMBOL_GPL(dm_bm_flush);
void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
{
@@ -626,6 +615,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm)
}
EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
+void dm_bm_set_read_write(struct dm_block_manager *bm)
+{
+ bm->read_only = false;
+}
+EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
+
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
{
return crc32c(~(u32) 0, data, len) ^ init_xor;
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index 9a82083a66b..1b95dfc1778 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -105,12 +105,11 @@ int dm_bm_unlock(struct dm_block *b);
*
* This method always blocks.
*/
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
- struct dm_block *superblock);
+int dm_bm_flush(struct dm_block_manager *bm);
- /*
- * Request data be prefetched into the cache.
- */
+/*
+ * Request data is prefetched into the cache.
+ */
void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);
/*
@@ -125,6 +124,7 @@ void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);
* be returned if you do.
*/
void dm_bm_set_read_only(struct dm_block_manager *bm);
+void dm_bm_set_read_write(struct dm_block_manager *bm);
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 468e371ee9b..416060c2570 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -770,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
/*----------------------------------------------------------------*/
-static int find_highest_key(struct ro_spine *s, dm_block_t block,
- uint64_t *result_key, dm_block_t *next_block)
+static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest,
+ uint64_t *result_key, dm_block_t *next_block)
{
int i, r;
uint32_t flags;
@@ -788,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
else
i--;
- *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ if (find_highest)
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ else
+ *result_key = le64_to_cpu(ro_node(s)->keys[0]);
+
if (next_block || flags & INTERNAL_NODE)
block = value64(ro_node(s), i);
@@ -799,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
return 0;
}
-int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
- uint64_t *result_keys)
+static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root,
+ bool find_highest, uint64_t *result_keys)
{
int r = 0, count = 0, level;
struct ro_spine spine;
init_ro_spine(&spine, info);
for (level = 0; level < info->levels; level++) {
- r = find_highest_key(&spine, root, result_keys + level,
- level == info->levels - 1 ? NULL : &root);
+ r = find_key(&spine, root, find_highest, result_keys + level,
+ level == info->levels - 1 ? NULL : &root);
if (r == -ENODATA) {
r = 0;
break;
@@ -822,8 +826,23 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
return r ? r : count;
}
+
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, true, result_keys);
+}
EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
+int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, false, result_keys);
+}
+EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key);
+
+/*----------------------------------------------------------------*/
+
/*
* FIXME: We shouldn't use a recursive algorithm when we have limited stack
* space. Also this only works for single level trees.
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 8672d159e0b..dacfc34180b 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -137,6 +137,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
/*
* Returns < 0 on failure. Otherwise the number of key entries that have
* been filled out. Remember trees can have zero entries, and as such have
+ * no lowest key.
+ */
+int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys);
+
+/*
+ * Returns < 0 on failure. Otherwise the number of key entries that have
+ * been filled out. Remember trees can have zero entries, and as such have
* no highest key.
*/
int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 6058569fe86..aacbe70c2c2 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
return -EINVAL;
}
+ /*
+ * We need to set this before the dm_tm_new_block() call below.
+ */
+ ll->nr_blocks = nr_blocks;
for (i = old_blocks; i < blocks; i++) {
struct dm_block *b;
struct disk_index_entry idx;
@@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
if (r < 0)
return r;
+
idx.blocknr = cpu_to_le64(dm_block_location(b));
r = dm_tm_unlock(ll->tm, b);
@@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
return r;
}
- ll->nr_blocks = nr_blocks;
return 0;
}
@@ -381,7 +385,7 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
}
static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
- uint32_t (*mutator)(void *context, uint32_t old),
+ int (*mutator)(void *context, uint32_t old, uint32_t *new),
void *context, enum allocation_event *ev)
{
int r;
@@ -410,11 +414,17 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
if (old > 2) {
r = sm_ll_lookup_big_ref_count(ll, b, &old);
- if (r < 0)
+ if (r < 0) {
+ dm_tm_unlock(ll->tm, nb);
return r;
+ }
}
- ref_count = mutator(context, old);
+ r = mutator(context, old, &ref_count);
+ if (r) {
+ dm_tm_unlock(ll->tm, nb);
+ return r;
+ }
if (ref_count <= 2) {
sm_set_bitmap(bm_le, bit, ref_count);
@@ -465,9 +475,10 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
return ll->save_ie(ll, index, &ie_disk);
}
-static uint32_t set_ref_count(void *context, uint32_t old)
+static int set_ref_count(void *context, uint32_t old, uint32_t *new)
{
- return *((uint32_t *) context);
+ *new = *((uint32_t *) context);
+ return 0;
}
int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
@@ -476,9 +487,10 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
}
-static uint32_t inc_ref_count(void *context, uint32_t old)
+static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
{
- return old + 1;
+ *new = old + 1;
+ return 0;
}
int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
@@ -486,9 +498,15 @@ int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
}
-static uint32_t dec_ref_count(void *context, uint32_t old)
+static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
{
- return old - 1;
+ if (!old) {
+ DMERR_LIMIT("unable to decrement a reference count below 0");
+ return -EINVAL;
+ }
+
+ *new = old - 1;
+ return 0;
}
int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index e735a6d5a79..cfbf9617e46 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -140,26 +140,10 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
{
- int r;
- uint32_t old_count;
enum allocation_event ev;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- r = sm_ll_dec(&smd->ll, b, &ev);
- if (!r && (ev == SM_FREE)) {
- /*
- * It's only free if it's also free in the last
- * transaction.
- */
- r = sm_ll_lookup(&smd->old_ll, b, &old_count);
- if (r)
- return r;
-
- if (!old_count)
- smd->nr_allocated_this_transaction--;
- }
-
- return r;
+ return sm_ll_dec(&smd->ll, b, &ev);
}
static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 1c959684cae..786b689bdfc 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -91,6 +91,69 @@ struct block_op {
dm_block_t block;
};
+struct bop_ring_buffer {
+ unsigned begin;
+ unsigned end;
+ struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
+};
+
+static void brb_init(struct bop_ring_buffer *brb)
+{
+ brb->begin = 0;
+ brb->end = 0;
+}
+
+static bool brb_empty(struct bop_ring_buffer *brb)
+{
+ return brb->begin == brb->end;
+}
+
+static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
+{
+ unsigned r = old + 1;
+ return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r;
+}
+
+static int brb_push(struct bop_ring_buffer *brb,
+ enum block_op_type type, dm_block_t b)
+{
+ struct block_op *bop;
+ unsigned next = brb_next(brb, brb->end);
+
+ /*
+ * We don't allow the last bop to be filled, this way we can
+ * differentiate between full and empty.
+ */
+ if (next == brb->begin)
+ return -ENOMEM;
+
+ bop = brb->bops + brb->end;
+ bop->type = type;
+ bop->block = b;
+
+ brb->end = next;
+
+ return 0;
+}
+
+static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
+{
+ struct block_op *bop;
+
+ if (brb_empty(brb))
+ return -ENODATA;
+
+ bop = brb->bops + brb->begin;
+ result->type = bop->type;
+ result->block = bop->block;
+
+ brb->begin = brb_next(brb, brb->begin);
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
struct sm_metadata {
struct dm_space_map sm;
@@ -101,25 +164,20 @@ struct sm_metadata {
unsigned recursion_count;
unsigned allocated_this_transaction;
- unsigned nr_uncommitted;
- struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
+ struct bop_ring_buffer uncommitted;
struct threshold threshold;
};
static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
{
- struct block_op *op;
+ int r = brb_push(&smm->uncommitted, type, b);
- if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) {
+ if (r) {
DMERR("too many recursive allocations");
return -ENOMEM;
}
- op = smm->uncommitted + smm->nr_uncommitted++;
- op->type = type;
- op->block = b;
-
return 0;
}
@@ -158,11 +216,17 @@ static int out(struct sm_metadata *smm)
return -ENOMEM;
}
- if (smm->recursion_count == 1 && smm->nr_uncommitted) {
- while (smm->nr_uncommitted && !r) {
- smm->nr_uncommitted--;
- r = commit_bop(smm, smm->uncommitted +
- smm->nr_uncommitted);
+ if (smm->recursion_count == 1) {
+ while (!brb_empty(&smm->uncommitted)) {
+ struct block_op bop;
+
+ r = brb_pop(&smm->uncommitted, &bop);
+ if (r) {
+ DMERR("bug in bop ring buffer");
+ break;
+ }
+
+ r = commit_bop(smm, &bop);
if (r)
break;
}
@@ -217,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
uint32_t *result)
{
- int r, i;
+ int r;
+ unsigned i;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
unsigned adjustment = 0;
@@ -225,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
* We may have some uncommitted adjustments to add. This list
* should always be really short.
*/
- for (i = 0; i < smm->nr_uncommitted; i++) {
- struct block_op *op = smm->uncommitted + i;
+ for (i = smm->uncommitted.begin;
+ i != smm->uncommitted.end;
+ i = brb_next(&smm->uncommitted, i)) {
+ struct block_op *op = smm->uncommitted.bops + i;
if (op->block != b)
continue;
@@ -254,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
dm_block_t b, int *result)
{
- int r, i, adjustment = 0;
+ int r, adjustment = 0;
+ unsigned i;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
uint32_t rc;
@@ -262,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
* We may have some uncommitted adjustments to add. This list
* should always be really short.
*/
- for (i = 0; i < smm->nr_uncommitted; i++) {
- struct block_op *op = smm->uncommitted + i;
+ for (i = smm->uncommitted.begin;
+ i != smm->uncommitted.end;
+ i = brb_next(&smm->uncommitted, i)) {
+
+ struct block_op *op = smm->uncommitted.bops + i;
if (op->block != b)
continue;
@@ -384,12 +455,16 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
int r = sm_metadata_new_block_(sm, b);
- if (r)
- DMERR("unable to allocate new metadata block");
+ if (r) {
+ DMERR_LIMIT("unable to allocate new metadata block");
+ return r;
+ }
r = sm_metadata_get_nr_free(sm, &count);
- if (r)
- DMERR("couldn't get free block count");
+ if (r) {
+ DMERR_LIMIT("couldn't get free block count");
+ return r;
+ }
check_threshold(&smm->threshold, count);
@@ -604,20 +679,38 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
* Flick into a mode where all blocks get allocated in the new area.
*/
smm->begin = old_len;
- memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
+ memcpy(sm, &bootstrap_ops, sizeof(*sm));
/*
* Extend.
*/
r = sm_ll_extend(&smm->ll, extra_blocks);
+ if (r)
+ goto out;
/*
- * Switch back to normal behaviour.
+ * We repeatedly increment then commit until the commit doesn't
+ * allocate any new blocks.
*/
- memcpy(&smm->sm, &ops, sizeof(smm->sm));
- for (i = old_len; !r && i < smm->begin; i++)
- r = sm_ll_inc(&smm->ll, i, &ev);
+ do {
+ for (i = old_len; !r && i < smm->begin; i++) {
+ r = sm_ll_inc(&smm->ll, i, &ev);
+ if (r)
+ goto out;
+ }
+ old_len = smm->begin;
+
+ r = sm_ll_commit(&smm->ll);
+ if (r)
+ goto out;
+
+ } while (old_len != smm->begin);
+out:
+ /*
+ * Switch back to normal behaviour.
+ */
+ memcpy(sm, &ops, sizeof(*sm));
return r;
}
@@ -649,7 +742,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
smm->begin = superblock + 1;
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
- smm->nr_uncommitted = 0;
+ brb_init(&smm->uncommitted);
threshold_init(&smm->threshold);
memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
@@ -658,6 +751,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
if (r)
return r;
+ if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
+ nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
r = sm_ll_extend(&smm->ll, nr_blocks);
if (r)
return r;
@@ -691,7 +786,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm,
smm->begin = 0;
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
- smm->nr_uncommitted = 0;
+ brb_init(&smm->uncommitted);
threshold_init(&smm->threshold);
memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h
index 39bba0801cf..64df923974d 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.h
+++ b/drivers/md/persistent-data/dm-space-map-metadata.h
@@ -9,6 +9,17 @@
#include "dm-transaction-manager.h"
+#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT)
+
+/*
+ * The metadata device is currently limited in size.
+ *
+ * We have one block of index, which can hold 255 index entries. Each
+ * index entry contains allocation info about ~16k metadata blocks.
+ */
+#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64))
+#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE)
+
/*
* Unfortunately we have to use two-phase construction due to the cycle
* between the tm and sm.
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 81da1a26042..3bc30a0ae3d 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm)
if (r < 0)
return r;
- return 0;
+ return dm_bm_flush(tm->bm);
}
EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
@@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
return -EWOULDBLOCK;
wipe_shadow_table(tm);
+ dm_bm_unlock(root);
- return dm_bm_flush_and_unlock(tm->bm, root);
+ return dm_bm_flush(tm->bm);
}
EXPORT_SYMBOL_GPL(dm_tm_commit);
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index b5b139076ca..2772ed2a781 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac
/*
* We use a 2-phase commit here.
*
- * i) In the first phase the block manager is told to start flushing, and
- * the changes to the space map are written to disk. You should interrogate
- * your particular space map to get detail of its root node etc. to be
- * included in your superblock.
+ * i) Make all changes for the transaction *except* for the superblock.
+ * Then call dm_tm_pre_commit() to flush them to disk.
*
- * ii) @root will be committed last. You shouldn't use more than the
- * first 512 bytes of @root if you wish the transaction to survive a power
- * failure. You *must* have a write lock held on @root for both stage (i)
- * and (ii). The commit will drop the write lock.
+ * ii) Lock your superblock. Update. Then call dm_tm_commit() which will
+ * unlock the superblock and flush it. No other blocks should be updated
+ * during this period. Care should be taken to never unlock a partially
+ * updated superblock; perform any operations that could fail *before* you
+ * take the superblock lock.
*/
int dm_tm_pre_commit(struct dm_transaction_manager *tm);
-int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root);
+int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock);
/*
* These methods are the only way to get hold of a writeable block.