aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/persistent-data
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r--drivers/md/persistent-data/Kconfig10
-rw-r--r--drivers/md/persistent-data/dm-array.c15
-rw-r--r--drivers/md/persistent-data/dm-bitset.c10
-rw-r--r--drivers/md/persistent-data/dm-bitset.h1
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c28
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h9
-rw-r--r--drivers/md/persistent-data/dm-btree.c61
-rw-r--r--drivers/md/persistent-data/dm-btree.h8
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c95
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c18
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c155
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.h11
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c5
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.h17
14 files changed, 325 insertions, 118 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index 19b26879541..0c2dec7aec2 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA
---help---
Library providing immutable on-disk data structure support for
device-mapper targets such as the thin provisioning target.
+
+config DM_DEBUG_BLOCK_STACK_TRACING
+ boolean "Keep stack trace of persistent data block lock holders"
+ depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA
+ select STACKTRACE
+ ---help---
+ Enable this for messages that may help debug problems with the
+ block manager locking used by thin provisioning and caching.
+
+ If unsure, say N.
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 172147eb1d4..1d75b1dc1e2 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
* The shadow op will often be a noop. Only insert if it really
* copied data.
*/
- if (dm_block_location(*block) != b)
+ if (dm_block_location(*block) != b) {
+ /*
+ * dm_tm_shadow_block will have already decremented the old
+ * block, but it is still referenced by the btree. We
+ * increment to stop the insert decrementing it below zero
+ * when overwriting the old value.
+ */
+ dm_tm_inc(info->btree_info.tm, b);
r = insert_ablock(info, index, *block, root);
+ }
return r;
}
@@ -509,15 +517,18 @@ static int grow_add_tail_block(struct resize *resize)
static int grow_needs_more_blocks(struct resize *resize)
{
int r;
+ unsigned old_nr_blocks = resize->old_nr_full_blocks;
if (resize->old_nr_entries_in_last_block > 0) {
+ old_nr_blocks++;
+
r = grow_extend_tail_block(resize, resize->max_entries);
if (r)
return r;
}
r = insert_full_ablocks(resize->info, resize->size_of_block,
- resize->old_nr_full_blocks,
+ old_nr_blocks,
resize->new_nr_full_blocks,
resize->max_entries, resize->value,
&resize->root);
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c
index cd9a86d4cdf..36f7cc2c710 100644
--- a/drivers/md/persistent-data/dm-bitset.c
+++ b/drivers/md/persistent-data/dm-bitset.c
@@ -65,7 +65,7 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
int r;
__le64 value;
- if (!info->current_index_set)
+ if (!info->current_index_set || !info->dirty)
return 0;
value = cpu_to_le64(info->current_bits);
@@ -77,6 +77,8 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
return r;
info->current_index_set = false;
+ info->dirty = false;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_flush);
@@ -94,6 +96,8 @@ static int read_bits(struct dm_disk_bitset *info, dm_block_t root,
info->current_bits = le64_to_cpu(value);
info->current_index_set = true;
info->current_index = array_index;
+ info->dirty = false;
+
return 0;
}
@@ -126,6 +130,8 @@ int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
return r;
set_bit(b, (unsigned long *) &info->current_bits);
+ info->dirty = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_set_bit);
@@ -141,6 +147,8 @@ int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
return r;
clear_bit(b, (unsigned long *) &info->current_bits);
+ info->dirty = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_clear_bit);
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h
index e1b9bea14aa..c2287d672ef 100644
--- a/drivers/md/persistent-data/dm-bitset.h
+++ b/drivers/md/persistent-data/dm-bitset.h
@@ -71,6 +71,7 @@ struct dm_disk_bitset {
uint64_t current_bits;
bool current_index_set:1;
+ bool dirty:1;
};
/*
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 81b513890e2..087411c95ff 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -104,7 +104,7 @@ static int __check_holder(struct block_lock *lock)
for (i = 0; i < MAX_HOLDERS; i++) {
if (lock->holders[i] == current) {
- DMERR("recursive lock detected in pool metadata");
+ DMERR("recursive lock detected in metadata");
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
DMERR("previously held here:");
print_stack_trace(lock->traces + i, 4);
@@ -595,25 +595,19 @@ int dm_bm_unlock(struct dm_block *b)
}
EXPORT_SYMBOL_GPL(dm_bm_unlock);
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
- struct dm_block *superblock)
+int dm_bm_flush(struct dm_block_manager *bm)
{
- int r;
-
if (bm->read_only)
return -EPERM;
- r = dm_bufio_write_dirty_buffers(bm->bufio);
- if (unlikely(r)) {
- dm_bm_unlock(superblock);
- return r;
- }
-
- dm_bm_unlock(superblock);
-
return dm_bufio_write_dirty_buffers(bm->bufio);
}
-EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock);
+EXPORT_SYMBOL_GPL(dm_bm_flush);
+
+void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
+{
+ dm_bufio_prefetch(bm->bufio, b, 1);
+}
void dm_bm_set_read_only(struct dm_block_manager *bm)
{
@@ -621,6 +615,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm)
}
EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
+void dm_bm_set_read_write(struct dm_block_manager *bm)
+{
+ bm->read_only = false;
+}
+EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
+
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
{
return crc32c(~(u32) 0, data, len) ^ init_xor;
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index be5bff61be2..1b95dfc1778 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -105,8 +105,12 @@ int dm_bm_unlock(struct dm_block *b);
*
* This method always blocks.
*/
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
- struct dm_block *superblock);
+int dm_bm_flush(struct dm_block_manager *bm);
+
+/*
+ * Request data is prefetched into the cache.
+ */
+void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);
/*
* Switches the bm to a read only mode. Once read-only mode
@@ -120,6 +124,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
* be returned if you do.
*/
void dm_bm_set_read_only(struct dm_block_manager *bm);
+void dm_bm_set_read_write(struct dm_block_manager *bm);
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 35865425e4b..416060c2570 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -161,6 +161,7 @@ struct frame {
};
struct del_stack {
+ struct dm_btree_info *info;
struct dm_transaction_manager *tm;
int top;
struct frame spine[MAX_SPINE_DEPTH];
@@ -183,6 +184,20 @@ static int unprocessed_frames(struct del_stack *s)
return s->top >= 0;
}
+static void prefetch_children(struct del_stack *s, struct frame *f)
+{
+ unsigned i;
+ struct dm_block_manager *bm = dm_tm_get_bm(s->tm);
+
+ for (i = 0; i < f->nr_children; i++)
+ dm_bm_prefetch(bm, value64(f->n, i));
+}
+
+static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
+{
+ return f->level < (info->levels - 1);
+}
+
static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
{
int r;
@@ -205,6 +220,7 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
dm_tm_dec(s->tm, b);
else {
+ uint32_t flags;
struct frame *f = s->spine + ++s->top;
r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
@@ -217,6 +233,10 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
f->level = level;
f->nr_children = le32_to_cpu(f->n->header.nr_entries);
f->current_child = 0;
+
+ flags = le32_to_cpu(f->n->header.flags);
+ if (flags & INTERNAL_NODE || is_internal_level(s->info, f))
+ prefetch_children(s, f);
}
return 0;
@@ -230,11 +250,6 @@ static void pop_frame(struct del_stack *s)
dm_tm_unlock(s->tm, f->b);
}
-static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
-{
- return f->level < (info->levels - 1);
-}
-
int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
{
int r;
@@ -243,6 +258,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
+ s->info = info;
s->tm = info->tm;
s->top = -1;
@@ -287,7 +303,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
info->value_type.dec(info->value_type.context,
value_ptr(f->n, i));
}
- f->current_child = f->nr_children;
+ pop_frame(s);
}
}
@@ -754,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
/*----------------------------------------------------------------*/
-static int find_highest_key(struct ro_spine *s, dm_block_t block,
- uint64_t *result_key, dm_block_t *next_block)
+static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest,
+ uint64_t *result_key, dm_block_t *next_block)
{
int i, r;
uint32_t flags;
@@ -772,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
else
i--;
- *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ if (find_highest)
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ else
+ *result_key = le64_to_cpu(ro_node(s)->keys[0]);
+
if (next_block || flags & INTERNAL_NODE)
block = value64(ro_node(s), i);
@@ -783,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
return 0;
}
-int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
- uint64_t *result_keys)
+static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root,
+ bool find_highest, uint64_t *result_keys)
{
int r = 0, count = 0, level;
struct ro_spine spine;
init_ro_spine(&spine, info);
for (level = 0; level < info->levels; level++) {
- r = find_highest_key(&spine, root, result_keys + level,
- level == info->levels - 1 ? NULL : &root);
+ r = find_key(&spine, root, find_highest, result_keys + level,
+ level == info->levels - 1 ? NULL : &root);
if (r == -ENODATA) {
r = 0;
break;
@@ -806,8 +826,23 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
return r ? r : count;
}
+
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, true, result_keys);
+}
EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
+int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, false, result_keys);
+}
+EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key);
+
+/*----------------------------------------------------------------*/
+
/*
* FIXME: We shouldn't use a recursive algorithm when we have limited stack
* space. Also this only works for single level trees.
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 8672d159e0b..dacfc34180b 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -137,6 +137,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
/*
* Returns < 0 on failure. Otherwise the number of key entries that have
* been filled out. Remember trees can have zero entries, and as such have
+ * no lowest key.
+ */
+int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys);
+
+/*
+ * Returns < 0 on failure. Otherwise the number of key entries that have
+ * been filled out. Remember trees can have zero entries, and as such have
* no highest key.
*/
int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 3e7a88d99eb..aacbe70c2c2 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
return -EINVAL;
}
+ /*
+ * We need to set this before the dm_tm_new_block() call below.
+ */
+ ll->nr_blocks = nr_blocks;
for (i = old_blocks; i < blocks; i++) {
struct dm_block *b;
struct disk_index_entry idx;
@@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
if (r < 0)
return r;
+
idx.blocknr = cpu_to_le64(dm_block_location(b));
r = dm_tm_unlock(ll->tm, b);
@@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
return r;
}
- ll->nr_blocks = nr_blocks;
return 0;
}
@@ -292,16 +296,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
return dm_tm_unlock(ll->tm, blk);
}
-int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
+ uint32_t *result)
{
__le32 le_rc;
- int r = sm_ll_lookup_bitmap(ll, b, result);
-
- if (r)
- return r;
-
- if (*result != 3)
- return r;
+ int r;
r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
if (r < 0)
@@ -312,6 +311,19 @@ int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
return r;
}
+int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+{
+ int r = sm_ll_lookup_bitmap(ll, b, result);
+
+ if (r)
+ return r;
+
+ if (*result != 3)
+ return r;
+
+ return sm_ll_lookup_big_ref_count(ll, b, result);
+}
+
int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
dm_block_t end, dm_block_t *result)
{
@@ -372,11 +384,12 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
return -ENOSPC;
}
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
- uint32_t ref_count, enum allocation_event *ev)
+static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
+ int (*mutator)(void *context, uint32_t old, uint32_t *new),
+ void *context, enum allocation_event *ev)
{
int r;
- uint32_t bit, old;
+ uint32_t bit, old, ref_count;
struct dm_block *nb;
dm_block_t index = b;
struct disk_index_entry ie_disk;
@@ -399,6 +412,20 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
bm_le = dm_bitmap_data(nb);
old = sm_lookup_bitmap(bm_le, bit);
+ if (old > 2) {
+ r = sm_ll_lookup_big_ref_count(ll, b, &old);
+ if (r < 0) {
+ dm_tm_unlock(ll->tm, nb);
+ return r;
+ }
+ }
+
+ r = mutator(context, old, &ref_count);
+ if (r) {
+ dm_tm_unlock(ll->tm, nb);
+ return r;
+ }
+
if (ref_count <= 2) {
sm_set_bitmap(bm_le, bit, ref_count);
@@ -448,31 +475,43 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
return ll->save_ie(ll, index, &ie_disk);
}
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int set_ref_count(void *context, uint32_t old, uint32_t *new)
{
- int r;
- uint32_t rc;
-
- r = sm_ll_lookup(ll, b, &rc);
- if (r)
- return r;
+ *new = *((uint32_t *) context);
+ return 0;
+}
- return sm_ll_insert(ll, b, rc + 1, ev);
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+ uint32_t ref_count, enum allocation_event *ev)
+{
+ return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
}
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
{
- int r;
- uint32_t rc;
+ *new = old + 1;
+ return 0;
+}
- r = sm_ll_lookup(ll, b, &rc);
- if (r)
- return r;
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+{
+ return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
+}
- if (!rc)
+static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
+{
+ if (!old) {
+ DMERR_LIMIT("unable to decrement a reference count below 0");
return -EINVAL;
+ }
- return sm_ll_insert(ll, b, rc - 1, ev);
+ *new = old - 1;
+ return 0;
+}
+
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+{
+ return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev);
}
int sm_ll_commit(struct ll_disk *ll)
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index e735a6d5a79..cfbf9617e46 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -140,26 +140,10 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
{
- int r;
- uint32_t old_count;
enum allocation_event ev;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- r = sm_ll_dec(&smd->ll, b, &ev);
- if (!r && (ev == SM_FREE)) {
- /*
- * It's only free if it's also free in the last
- * transaction.
- */
- r = sm_ll_lookup(&smd->old_ll, b, &old_count);
- if (r)
- return r;
-
- if (!old_count)
- smd->nr_allocated_this_transaction--;
- }
-
- return r;
+ return sm_ll_dec(&smd->ll, b, &ev);
}
static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 1c959684cae..786b689bdfc 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -91,6 +91,69 @@ struct block_op {
dm_block_t block;
};
+struct bop_ring_buffer {
+ unsigned begin;
+ unsigned end;
+ struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
+};
+
+static void brb_init(struct bop_ring_buffer *brb)
+{
+ brb->begin = 0;
+ brb->end = 0;
+}
+
+static bool brb_empty(struct bop_ring_buffer *brb)
+{
+ return brb->begin == brb->end;
+}
+
+static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
+{
+ unsigned r = old + 1;
+ return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r;
+}
+
+static int brb_push(struct bop_ring_buffer *brb,
+ enum block_op_type type, dm_block_t b)
+{
+ struct block_op *bop;
+ unsigned next = brb_next(brb, brb->end);
+
+ /*
+ * We don't allow the last bop to be filled, this way we can
+ * differentiate between full and empty.
+ */
+ if (next == brb->begin)
+ return -ENOMEM;
+
+ bop = brb->bops + brb->end;
+ bop->type = type;
+ bop->block = b;
+
+ brb->end = next;
+
+ return 0;
+}
+
+static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
+{
+ struct block_op *bop;
+
+ if (brb_empty(brb))
+ return -ENODATA;
+
+ bop = brb->bops + brb->begin;
+ result->type = bop->type;
+ result->block = bop->block;
+
+ brb->begin = brb_next(brb, brb->begin);
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
struct sm_metadata {
struct dm_space_map sm;
@@ -101,25 +164,20 @@ struct sm_metadata {
unsigned recursion_count;
unsigned allocated_this_transaction;
- unsigned nr_uncommitted;
- struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
+ struct bop_ring_buffer uncommitted;
struct threshold threshold;
};
static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
{
- struct block_op *op;
+ int r = brb_push(&smm->uncommitted, type, b);
- if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) {
+ if (r) {
DMERR("too many recursive allocations");
return -ENOMEM;
}
- op = smm->uncommitted + smm->nr_uncommitted++;
- op->type = type;
- op->block = b;
-
return 0;
}
@@ -158,11 +216,17 @@ static int out(struct sm_metadata *smm)
return -ENOMEM;
}
- if (smm->recursion_count == 1 && smm->nr_uncommitted) {
- while (smm->nr_uncommitted && !r) {
- smm->nr_uncommitted--;
- r = commit_bop(smm, smm->uncommitted +
- smm->nr_uncommitted);
+ if (smm->recursion_count == 1) {
+ while (!brb_empty(&smm->uncommitted)) {
+ struct block_op bop;
+
+ r = brb_pop(&smm->uncommitted, &bop);
+ if (r) {
+ DMERR("bug in bop ring buffer");
+ break;
+ }
+
+ r = commit_bop(smm, &bop);
if (r)
break;
}
@@ -217,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
uint32_t *result)
{
- int r, i;
+ int r;
+ unsigned i;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
unsigned adjustment = 0;
@@ -225,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
* We may have some uncommitted adjustments to add. This list
* should always be really short.
*/
- for (i = 0; i < smm->nr_uncommitted; i++) {
- struct block_op *op = smm->uncommitted + i;
+ for (i = smm->uncommitted.begin;
+ i != smm->uncommitted.end;
+ i = brb_next(&smm->uncommitted, i)) {
+ struct block_op *op = smm->uncommitted.bops + i;
if (op->block != b)
continue;
@@ -254,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
dm_block_t b, int *result)
{
- int r, i, adjustment = 0;
+ int r, adjustment = 0;
+ unsigned i;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
uint32_t rc;
@@ -262,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
* We may have some uncommitted adjustments to add. This list
* should always be really short.
*/
- for (i = 0; i < smm->nr_uncommitted; i++) {
- struct block_op *op = smm->uncommitted + i;
+ for (i = smm->uncommitted.begin;
+ i != smm->uncommitted.end;
+ i = brb_next(&smm->uncommitted, i)) {
+
+ struct block_op *op = smm->uncommitted.bops + i;
if (op->block != b)
continue;
@@ -384,12 +455,16 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
int r = sm_metadata_new_block_(sm, b);
- if (r)
- DMERR("unable to allocate new metadata block");
+ if (r) {
+ DMERR_LIMIT("unable to allocate new metadata block");
+ return r;
+ }
r = sm_metadata_get_nr_free(sm, &count);
- if (r)
- DMERR("couldn't get free block count");
+ if (r) {
+ DMERR_LIMIT("couldn't get free block count");
+ return r;
+ }
check_threshold(&smm->threshold, count);
@@ -604,20 +679,38 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
* Flick into a mode where all blocks get allocated in the new area.
*/
smm->begin = old_len;
- memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
+ memcpy(sm, &bootstrap_ops, sizeof(*sm));
/*
* Extend.
*/
r = sm_ll_extend(&smm->ll, extra_blocks);
+ if (r)
+ goto out;
/*
- * Switch back to normal behaviour.
+ * We repeatedly increment then commit until the commit doesn't
+ * allocate any new blocks.
*/
- memcpy(&smm->sm, &ops, sizeof(smm->sm));
- for (i = old_len; !r && i < smm->begin; i++)
- r = sm_ll_inc(&smm->ll, i, &ev);
+ do {
+ for (i = old_len; !r && i < smm->begin; i++) {
+ r = sm_ll_inc(&smm->ll, i, &ev);
+ if (r)
+ goto out;
+ }
+ old_len = smm->begin;
+
+ r = sm_ll_commit(&smm->ll);
+ if (r)
+ goto out;
+
+ } while (old_len != smm->begin);
+out:
+ /*
+ * Switch back to normal behaviour.
+ */
+ memcpy(sm, &ops, sizeof(*sm));
return r;
}
@@ -649,7 +742,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
smm->begin = superblock + 1;
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
- smm->nr_uncommitted = 0;
+ brb_init(&smm->uncommitted);
threshold_init(&smm->threshold);
memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
@@ -658,6 +751,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
if (r)
return r;
+ if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
+ nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
r = sm_ll_extend(&smm->ll, nr_blocks);
if (r)
return r;
@@ -691,7 +786,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm,
smm->begin = 0;
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
- smm->nr_uncommitted = 0;
+ brb_init(&smm->uncommitted);
threshold_init(&smm->threshold);
memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h
index 39bba0801cf..64df923974d 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.h
+++ b/drivers/md/persistent-data/dm-space-map-metadata.h
@@ -9,6 +9,17 @@
#include "dm-transaction-manager.h"
+#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT)
+
+/*
+ * The metadata device is currently limited in size.
+ *
+ * We have one block of index, which can hold 255 index entries. Each
+ * index entry contains allocation info about ~16k metadata blocks.
+ */
+#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64))
+#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE)
+
/*
* Unfortunately we have to use two-phase construction due to the cycle
* between the tm and sm.
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 81da1a26042..3bc30a0ae3d 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm)
if (r < 0)
return r;
- return 0;
+ return dm_bm_flush(tm->bm);
}
EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
@@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
return -EWOULDBLOCK;
wipe_shadow_table(tm);
+ dm_bm_unlock(root);
- return dm_bm_flush_and_unlock(tm->bm, root);
+ return dm_bm_flush(tm->bm);
}
EXPORT_SYMBOL_GPL(dm_tm_commit);
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index b5b139076ca..2772ed2a781 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac
/*
* We use a 2-phase commit here.
*
- * i) In the first phase the block manager is told to start flushing, and
- * the changes to the space map are written to disk. You should interrogate
- * your particular space map to get detail of its root node etc. to be
- * included in your superblock.
+ * i) Make all changes for the transaction *except* for the superblock.
+ * Then call dm_tm_pre_commit() to flush them to disk.
*
- * ii) @root will be committed last. You shouldn't use more than the
- * first 512 bytes of @root if you wish the transaction to survive a power
- * failure. You *must* have a write lock held on @root for both stage (i)
- * and (ii). The commit will drop the write lock.
+ * ii) Lock your superblock. Update. Then call dm_tm_commit() which will
+ * unlock the superblock and flush it. No other blocks should be updated
+ * during this period. Care should be taken to never unlock a partially
+ * updated superblock; perform any operations that could fail *before* you
+ * take the superblock lock.
*/
int dm_tm_pre_commit(struct dm_transaction_manager *tm);
-int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root);
+int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock);
/*
* These methods are the only way to get hold of a writeable block.