aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/persistent-data
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/persistent-data')
-rw-r--r--drivers/md/persistent-data/Kconfig10
-rw-r--r--drivers/md/persistent-data/dm-array.c15
-rw-r--r--drivers/md/persistent-data/dm-bitset.c10
-rw-r--r--drivers/md/persistent-data/dm-bitset.h1
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c28
-rw-r--r--drivers/md/persistent-data/dm-block-manager.h9
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c46
-rw-r--r--drivers/md/persistent-data/dm-btree.c61
-rw-r--r--drivers/md/persistent-data/dm-btree.h8
-rw-r--r--drivers/md/persistent-data/dm-space-map-common.c95
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c21
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.c268
-rw-r--r--drivers/md/persistent-data/dm-space-map-metadata.h11
-rw-r--r--drivers/md/persistent-data/dm-space-map.h23
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c5
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.h17
16 files changed, 482 insertions, 146 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig
index 19b26879541..0c2dec7aec2 100644
--- a/drivers/md/persistent-data/Kconfig
+++ b/drivers/md/persistent-data/Kconfig
@@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA
---help---
Library providing immutable on-disk data structure support for
device-mapper targets such as the thin provisioning target.
+
+config DM_DEBUG_BLOCK_STACK_TRACING
+ boolean "Keep stack trace of persistent data block lock holders"
+ depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA
+ select STACKTRACE
+ ---help---
+ Enable this for messages that may help debug problems with the
+ block manager locking used by thin provisioning and caching.
+
+ If unsure, say N.
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 172147eb1d4..1d75b1dc1e2 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,
* The shadow op will often be a noop. Only insert if it really
* copied data.
*/
- if (dm_block_location(*block) != b)
+ if (dm_block_location(*block) != b) {
+ /*
+ * dm_tm_shadow_block will have already decremented the old
+ * block, but it is still referenced by the btree. We
+ * increment to stop the insert decrementing it below zero
+ * when overwriting the old value.
+ */
+ dm_tm_inc(info->btree_info.tm, b);
r = insert_ablock(info, index, *block, root);
+ }
return r;
}
@@ -509,15 +517,18 @@ static int grow_add_tail_block(struct resize *resize)
static int grow_needs_more_blocks(struct resize *resize)
{
int r;
+ unsigned old_nr_blocks = resize->old_nr_full_blocks;
if (resize->old_nr_entries_in_last_block > 0) {
+ old_nr_blocks++;
+
r = grow_extend_tail_block(resize, resize->max_entries);
if (r)
return r;
}
r = insert_full_ablocks(resize->info, resize->size_of_block,
- resize->old_nr_full_blocks,
+ old_nr_blocks,
resize->new_nr_full_blocks,
resize->max_entries, resize->value,
&resize->root);
diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c
index cd9a86d4cdf..36f7cc2c710 100644
--- a/drivers/md/persistent-data/dm-bitset.c
+++ b/drivers/md/persistent-data/dm-bitset.c
@@ -65,7 +65,7 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
int r;
__le64 value;
- if (!info->current_index_set)
+ if (!info->current_index_set || !info->dirty)
return 0;
value = cpu_to_le64(info->current_bits);
@@ -77,6 +77,8 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,
return r;
info->current_index_set = false;
+ info->dirty = false;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_flush);
@@ -94,6 +96,8 @@ static int read_bits(struct dm_disk_bitset *info, dm_block_t root,
info->current_bits = le64_to_cpu(value);
info->current_index_set = true;
info->current_index = array_index;
+ info->dirty = false;
+
return 0;
}
@@ -126,6 +130,8 @@ int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,
return r;
set_bit(b, (unsigned long *) &info->current_bits);
+ info->dirty = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_set_bit);
@@ -141,6 +147,8 @@ int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,
return r;
clear_bit(b, (unsigned long *) &info->current_bits);
+ info->dirty = true;
+
return 0;
}
EXPORT_SYMBOL_GPL(dm_bitset_clear_bit);
diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h
index e1b9bea14aa..c2287d672ef 100644
--- a/drivers/md/persistent-data/dm-bitset.h
+++ b/drivers/md/persistent-data/dm-bitset.h
@@ -71,6 +71,7 @@ struct dm_disk_bitset {
uint64_t current_bits;
bool current_index_set:1;
+ bool dirty:1;
};
/*
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 81b513890e2..087411c95ff 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -104,7 +104,7 @@ static int __check_holder(struct block_lock *lock)
for (i = 0; i < MAX_HOLDERS; i++) {
if (lock->holders[i] == current) {
- DMERR("recursive lock detected in pool metadata");
+ DMERR("recursive lock detected in metadata");
#ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING
DMERR("previously held here:");
print_stack_trace(lock->traces + i, 4);
@@ -595,25 +595,19 @@ int dm_bm_unlock(struct dm_block *b)
}
EXPORT_SYMBOL_GPL(dm_bm_unlock);
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
- struct dm_block *superblock)
+int dm_bm_flush(struct dm_block_manager *bm)
{
- int r;
-
if (bm->read_only)
return -EPERM;
- r = dm_bufio_write_dirty_buffers(bm->bufio);
- if (unlikely(r)) {
- dm_bm_unlock(superblock);
- return r;
- }
-
- dm_bm_unlock(superblock);
-
return dm_bufio_write_dirty_buffers(bm->bufio);
}
-EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock);
+EXPORT_SYMBOL_GPL(dm_bm_flush);
+
+void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)
+{
+ dm_bufio_prefetch(bm->bufio, b, 1);
+}
void dm_bm_set_read_only(struct dm_block_manager *bm)
{
@@ -621,6 +615,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm)
}
EXPORT_SYMBOL_GPL(dm_bm_set_read_only);
+void dm_bm_set_read_write(struct dm_block_manager *bm)
+{
+ bm->read_only = false;
+}
+EXPORT_SYMBOL_GPL(dm_bm_set_read_write);
+
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)
{
return crc32c(~(u32) 0, data, len) ^ init_xor;
diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h
index be5bff61be2..1b95dfc1778 100644
--- a/drivers/md/persistent-data/dm-block-manager.h
+++ b/drivers/md/persistent-data/dm-block-manager.h
@@ -105,8 +105,12 @@ int dm_bm_unlock(struct dm_block *b);
*
* This method always blocks.
*/
-int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
- struct dm_block *superblock);
+int dm_bm_flush(struct dm_block_manager *bm);
+
+/*
+ * Request data is prefetched into the cache.
+ */
+void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);
/*
* Switches the bm to a read only mode. Once read-only mode
@@ -120,6 +124,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm,
* be returned if you do.
*/
void dm_bm_set_read_only(struct dm_block_manager *bm);
+void dm_bm_set_read_write(struct dm_block_manager *bm);
u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor);
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index c4f28133ef8..b88757cd0d1 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -139,15 +139,8 @@ struct child {
struct btree_node *n;
};
-static struct dm_btree_value_type le64_type = {
- .context = NULL,
- .size = sizeof(__le64),
- .inc = NULL,
- .dec = NULL,
- .equal = NULL
-};
-
-static int init_child(struct dm_btree_info *info, struct btree_node *parent,
+static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt,
+ struct btree_node *parent,
unsigned index, struct child *result)
{
int r, inc;
@@ -164,7 +157,7 @@ static int init_child(struct dm_btree_info *info, struct btree_node *parent,
result->n = dm_block_data(result->block);
if (inc)
- inc_children(info->tm, result->n, &le64_type);
+ inc_children(info->tm, result->n, vt);
*((__le64 *) value_ptr(parent, index)) =
cpu_to_le64(dm_block_location(result->block));
@@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent,
}
static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
- unsigned left_index)
+ struct dm_btree_value_type *vt, unsigned left_index)
{
int r;
struct btree_node *parent;
@@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info,
parent = dm_block_data(shadow_current(s));
- r = init_child(info, parent, left_index, &left);
+ r = init_child(info, vt, parent, left_index, &left);
if (r)
return r;
- r = init_child(info, parent, left_index + 1, &right);
+ r = init_child(info, vt, parent, left_index + 1, &right);
if (r) {
exit_child(info, &left);
return r;
@@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent,
}
static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
- unsigned left_index)
+ struct dm_btree_value_type *vt, unsigned left_index)
{
int r;
struct btree_node *parent = dm_block_data(shadow_current(s));
@@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info,
/*
* FIXME: fill out an array?
*/
- r = init_child(info, parent, left_index, &left);
+ r = init_child(info, vt, parent, left_index, &left);
if (r)
return r;
- r = init_child(info, parent, left_index + 1, &center);
+ r = init_child(info, vt, parent, left_index + 1, &center);
if (r) {
exit_child(info, &left);
return r;
}
- r = init_child(info, parent, left_index + 2, &right);
+ r = init_child(info, vt, parent, left_index + 2, &right);
if (r) {
exit_child(info, &left);
exit_child(info, &center);
@@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_transaction_manager *tm,
}
static int rebalance_children(struct shadow_spine *s,
- struct dm_btree_info *info, uint64_t key)
+ struct dm_btree_info *info,
+ struct dm_btree_value_type *vt, uint64_t key)
{
int i, r, has_left_sibling, has_right_sibling;
uint32_t child_entries;
@@ -472,13 +466,13 @@ static int rebalance_children(struct shadow_spine *s,
has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1);
if (!has_left_sibling)
- r = rebalance2(s, info, i);
+ r = rebalance2(s, info, vt, i);
else if (!has_right_sibling)
- r = rebalance2(s, info, i - 1);
+ r = rebalance2(s, info, vt, i - 1);
else
- r = rebalance3(s, info, i - 1);
+ r = rebalance3(s, info, vt, i - 1);
return r;
}
@@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
if (le32_to_cpu(n->header.flags) & LEAF_NODE)
return do_leaf(n, key, index);
- r = rebalance_children(s, info, key);
+ r = rebalance_children(s, info, vt, key);
if (r)
break;
@@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info,
return r;
}
+static struct dm_btree_value_type le64_type = {
+ .context = NULL,
+ .size = sizeof(__le64),
+ .inc = NULL,
+ .dec = NULL,
+ .equal = NULL
+};
+
int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
uint64_t *keys, dm_block_t *new_root)
{
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index 35865425e4b..416060c2570 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -161,6 +161,7 @@ struct frame {
};
struct del_stack {
+ struct dm_btree_info *info;
struct dm_transaction_manager *tm;
int top;
struct frame spine[MAX_SPINE_DEPTH];
@@ -183,6 +184,20 @@ static int unprocessed_frames(struct del_stack *s)
return s->top >= 0;
}
+static void prefetch_children(struct del_stack *s, struct frame *f)
+{
+ unsigned i;
+ struct dm_block_manager *bm = dm_tm_get_bm(s->tm);
+
+ for (i = 0; i < f->nr_children; i++)
+ dm_bm_prefetch(bm, value64(f->n, i));
+}
+
+static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
+{
+ return f->level < (info->levels - 1);
+}
+
static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
{
int r;
@@ -205,6 +220,7 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
dm_tm_dec(s->tm, b);
else {
+ uint32_t flags;
struct frame *f = s->spine + ++s->top;
r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b);
@@ -217,6 +233,10 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level)
f->level = level;
f->nr_children = le32_to_cpu(f->n->header.nr_entries);
f->current_child = 0;
+
+ flags = le32_to_cpu(f->n->header.flags);
+ if (flags & INTERNAL_NODE || is_internal_level(s->info, f))
+ prefetch_children(s, f);
}
return 0;
@@ -230,11 +250,6 @@ static void pop_frame(struct del_stack *s)
dm_tm_unlock(s->tm, f->b);
}
-static bool is_internal_level(struct dm_btree_info *info, struct frame *f)
-{
- return f->level < (info->levels - 1);
-}
-
int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
{
int r;
@@ -243,6 +258,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
return -ENOMEM;
+ s->info = info;
s->tm = info->tm;
s->top = -1;
@@ -287,7 +303,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root)
info->value_type.dec(info->value_type.context,
value_ptr(f->n, i));
}
- f->current_child = f->nr_children;
+ pop_frame(s);
}
}
@@ -754,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify);
/*----------------------------------------------------------------*/
-static int find_highest_key(struct ro_spine *s, dm_block_t block,
- uint64_t *result_key, dm_block_t *next_block)
+static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest,
+ uint64_t *result_key, dm_block_t *next_block)
{
int i, r;
uint32_t flags;
@@ -772,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
else
i--;
- *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ if (find_highest)
+ *result_key = le64_to_cpu(ro_node(s)->keys[i]);
+ else
+ *result_key = le64_to_cpu(ro_node(s)->keys[0]);
+
if (next_block || flags & INTERNAL_NODE)
block = value64(ro_node(s), i);
@@ -783,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,
return 0;
}
-int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
- uint64_t *result_keys)
+static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root,
+ bool find_highest, uint64_t *result_keys)
{
int r = 0, count = 0, level;
struct ro_spine spine;
init_ro_spine(&spine, info);
for (level = 0; level < info->levels; level++) {
- r = find_highest_key(&spine, root, result_keys + level,
- level == info->levels - 1 ? NULL : &root);
+ r = find_key(&spine, root, find_highest, result_keys + level,
+ level == info->levels - 1 ? NULL : &root);
if (r == -ENODATA) {
r = 0;
break;
@@ -806,8 +826,23 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
return r ? r : count;
}
+
+int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, true, result_keys);
+}
EXPORT_SYMBOL_GPL(dm_btree_find_highest_key);
+int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys)
+{
+ return dm_btree_find_key(info, root, false, result_keys);
+}
+EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key);
+
+/*----------------------------------------------------------------*/
+
/*
* FIXME: We shouldn't use a recursive algorithm when we have limited stack
* space. Also this only works for single level trees.
diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h
index 8672d159e0b..dacfc34180b 100644
--- a/drivers/md/persistent-data/dm-btree.h
+++ b/drivers/md/persistent-data/dm-btree.h
@@ -137,6 +137,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,
/*
* Returns < 0 on failure. Otherwise the number of key entries that have
* been filled out. Remember trees can have zero entries, and as such have
+ * no lowest key.
+ */
+int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root,
+ uint64_t *result_keys);
+
+/*
+ * Returns < 0 on failure. Otherwise the number of key entries that have
+ * been filled out. Remember trees can have zero entries, and as such have
* no highest key.
*/
int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,
diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c
index 3e7a88d99eb..aacbe70c2c2 100644
--- a/drivers/md/persistent-data/dm-space-map-common.c
+++ b/drivers/md/persistent-data/dm-space-map-common.c
@@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
return -EINVAL;
}
+ /*
+ * We need to set this before the dm_tm_new_block() call below.
+ */
+ ll->nr_blocks = nr_blocks;
for (i = old_blocks; i < blocks; i++) {
struct dm_block *b;
struct disk_index_entry idx;
@@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);
if (r < 0)
return r;
+
idx.blocknr = cpu_to_le64(dm_block_location(b));
r = dm_tm_unlock(ll->tm, b);
@@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)
return r;
}
- ll->nr_blocks = nr_blocks;
return 0;
}
@@ -292,16 +296,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result)
return dm_tm_unlock(ll->tm, blk);
}
-int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b,
+ uint32_t *result)
{
__le32 le_rc;
- int r = sm_ll_lookup_bitmap(ll, b, result);
-
- if (r)
- return r;
-
- if (*result != 3)
- return r;
+ int r;
r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc);
if (r < 0)
@@ -312,6 +311,19 @@ int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
return r;
}
+int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result)
+{
+ int r = sm_ll_lookup_bitmap(ll, b, result);
+
+ if (r)
+ return r;
+
+ if (*result != 3)
+ return r;
+
+ return sm_ll_lookup_big_ref_count(ll, b, result);
+}
+
int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
dm_block_t end, dm_block_t *result)
{
@@ -372,11 +384,12 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,
return -ENOSPC;
}
-int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
- uint32_t ref_count, enum allocation_event *ev)
+static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,
+ int (*mutator)(void *context, uint32_t old, uint32_t *new),
+ void *context, enum allocation_event *ev)
{
int r;
- uint32_t bit, old;
+ uint32_t bit, old, ref_count;
struct dm_block *nb;
dm_block_t index = b;
struct disk_index_entry ie_disk;
@@ -399,6 +412,20 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
bm_le = dm_bitmap_data(nb);
old = sm_lookup_bitmap(bm_le, bit);
+ if (old > 2) {
+ r = sm_ll_lookup_big_ref_count(ll, b, &old);
+ if (r < 0) {
+ dm_tm_unlock(ll->tm, nb);
+ return r;
+ }
+ }
+
+ r = mutator(context, old, &ref_count);
+ if (r) {
+ dm_tm_unlock(ll->tm, nb);
+ return r;
+ }
+
if (ref_count <= 2) {
sm_set_bitmap(bm_le, bit, ref_count);
@@ -448,31 +475,43 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
return ll->save_ie(ll, index, &ie_disk);
}
-int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int set_ref_count(void *context, uint32_t old, uint32_t *new)
{
- int r;
- uint32_t rc;
-
- r = sm_ll_lookup(ll, b, &rc);
- if (r)
- return r;
+ *new = *((uint32_t *) context);
+ return 0;
+}
- return sm_ll_insert(ll, b, rc + 1, ev);
+int sm_ll_insert(struct ll_disk *ll, dm_block_t b,
+ uint32_t ref_count, enum allocation_event *ev)
+{
+ return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);
}
-int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+static int inc_ref_count(void *context, uint32_t old, uint32_t *new)
{
- int r;
- uint32_t rc;
+ *new = old + 1;
+ return 0;
+}
- r = sm_ll_lookup(ll, b, &rc);
- if (r)
- return r;
+int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+{
+ return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);
+}
- if (!rc)
+static int dec_ref_count(void *context, uint32_t old, uint32_t *new)
+{
+ if (!old) {
+ DMERR_LIMIT("unable to decrement a reference count below 0");
return -EINVAL;
+ }
- return sm_ll_insert(ll, b, rc - 1, ev);
+ *new = old - 1;
+ return 0;
+}
+
+int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)
+{
+ return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev);
}
int sm_ll_commit(struct ll_disk *ll)
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index f6d29e614ab..cfbf9617e46 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -140,26 +140,10 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)
static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)
{
- int r;
- uint32_t old_count;
enum allocation_event ev;
struct sm_disk *smd = container_of(sm, struct sm_disk, sm);
- r = sm_ll_dec(&smd->ll, b, &ev);
- if (!r && (ev == SM_FREE)) {
- /*
- * It's only free if it's also free in the last
- * transaction.
- */
- r = sm_ll_lookup(&smd->old_ll, b, &old_count);
- if (r)
- return r;
-
- if (!old_count)
- smd->nr_allocated_this_transaction--;
- }
-
- return r;
+ return sm_ll_dec(&smd->ll, b, &ev);
}
static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b)
@@ -248,7 +232,8 @@ static struct dm_space_map ops = {
.new_block = sm_disk_new_block,
.commit = sm_disk_commit,
.root_size = sm_disk_root_size,
- .copy_root = sm_disk_copy_root
+ .copy_root = sm_disk_copy_root,
+ .register_threshold_callback = NULL
};
struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c
index 906cf3df71a..786b689bdfc 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.c
+++ b/drivers/md/persistent-data/dm-space-map-metadata.c
@@ -17,6 +17,55 @@
/*----------------------------------------------------------------*/
/*
+ * An edge triggered threshold.
+ */
+struct threshold {
+ bool threshold_set;
+ bool value_set;
+ dm_block_t threshold;
+ dm_block_t current_value;
+ dm_sm_threshold_fn fn;
+ void *context;
+};
+
+static void threshold_init(struct threshold *t)
+{
+ t->threshold_set = false;
+ t->value_set = false;
+}
+
+static void set_threshold(struct threshold *t, dm_block_t value,
+ dm_sm_threshold_fn fn, void *context)
+{
+ t->threshold_set = true;
+ t->threshold = value;
+ t->fn = fn;
+ t->context = context;
+}
+
+static bool below_threshold(struct threshold *t, dm_block_t value)
+{
+ return t->threshold_set && value <= t->threshold;
+}
+
+static bool threshold_already_triggered(struct threshold *t)
+{
+ return t->value_set && below_threshold(t, t->current_value);
+}
+
+static void check_threshold(struct threshold *t, dm_block_t value)
+{
+ if (below_threshold(t, value) &&
+ !threshold_already_triggered(t))
+ t->fn(t->context);
+
+ t->value_set = true;
+ t->current_value = value;
+}
+
+/*----------------------------------------------------------------*/
+
+/*
* Space map interface.
*
* The low level disk format is written using the standard btree and
@@ -42,6 +91,69 @@ struct block_op {
dm_block_t block;
};
+struct bop_ring_buffer {
+ unsigned begin;
+ unsigned end;
+ struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1];
+};
+
+static void brb_init(struct bop_ring_buffer *brb)
+{
+ brb->begin = 0;
+ brb->end = 0;
+}
+
+static bool brb_empty(struct bop_ring_buffer *brb)
+{
+ return brb->begin == brb->end;
+}
+
+static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old)
+{
+ unsigned r = old + 1;
+ return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r;
+}
+
+static int brb_push(struct bop_ring_buffer *brb,
+ enum block_op_type type, dm_block_t b)
+{
+ struct block_op *bop;
+ unsigned next = brb_next(brb, brb->end);
+
+ /*
+ * We don't allow the last bop to be filled, this way we can
+ * differentiate between full and empty.
+ */
+ if (next == brb->begin)
+ return -ENOMEM;
+
+ bop = brb->bops + brb->end;
+ bop->type = type;
+ bop->block = b;
+
+ brb->end = next;
+
+ return 0;
+}
+
+static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result)
+{
+ struct block_op *bop;
+
+ if (brb_empty(brb))
+ return -ENODATA;
+
+ bop = brb->bops + brb->begin;
+ result->type = bop->type;
+ result->block = bop->block;
+
+ brb->begin = brb_next(brb, brb->begin);
+
+ return 0;
+}
+
+/*----------------------------------------------------------------*/
+
struct sm_metadata {
struct dm_space_map sm;
@@ -52,23 +164,20 @@ struct sm_metadata {
unsigned recursion_count;
unsigned allocated_this_transaction;
- unsigned nr_uncommitted;
- struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS];
+ struct bop_ring_buffer uncommitted;
+
+ struct threshold threshold;
};
static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)
{
- struct block_op *op;
+ int r = brb_push(&smm->uncommitted, type, b);
- if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) {
+ if (r) {
DMERR("too many recursive allocations");
return -ENOMEM;
}
- op = smm->uncommitted + smm->nr_uncommitted++;
- op->type = type;
- op->block = b;
-
return 0;
}
@@ -107,11 +216,17 @@ static int out(struct sm_metadata *smm)
return -ENOMEM;
}
- if (smm->recursion_count == 1 && smm->nr_uncommitted) {
- while (smm->nr_uncommitted && !r) {
- smm->nr_uncommitted--;
- r = commit_bop(smm, smm->uncommitted +
- smm->nr_uncommitted);
+ if (smm->recursion_count == 1) {
+ while (!brb_empty(&smm->uncommitted)) {
+ struct block_op bop;
+
+ r = brb_pop(&smm->uncommitted, &bop);
+ if (r) {
+ DMERR("bug in bop ring buffer");
+ break;
+ }
+
+ r = commit_bop(smm, &bop);
if (r)
break;
}
@@ -144,12 +259,6 @@ static void sm_metadata_destroy(struct dm_space_map *sm)
kfree(smm);
}
-static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
-{
- DMERR("doesn't support extend");
- return -EINVAL;
-}
-
static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count)
{
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
@@ -172,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)
static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
uint32_t *result)
{
- int r, i;
+ int r;
+ unsigned i;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
unsigned adjustment = 0;
@@ -180,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
* We may have some uncommitted adjustments to add. This list
* should always be really short.
*/
- for (i = 0; i < smm->nr_uncommitted; i++) {
- struct block_op *op = smm->uncommitted + i;
+ for (i = smm->uncommitted.begin;
+ i != smm->uncommitted.end;
+ i = brb_next(&smm->uncommitted, i)) {
+ struct block_op *op = smm->uncommitted.bops + i;
if (op->block != b)
continue;
@@ -209,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,
static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
dm_block_t b, int *result)
{
- int r, i, adjustment = 0;
+ int r, adjustment = 0;
+ unsigned i;
struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
uint32_t rc;
@@ -217,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,
* We may have some uncommitted adjustments to add. This list
* should always be really short.
*/
- for (i = 0; i < smm->nr_uncommitted; i++) {
- struct block_op *op = smm->uncommitted + i;
+ for (i = smm->uncommitted.begin;
+ i != smm->uncommitted.end;
+ i = brb_next(&smm->uncommitted, i)) {
+
+ struct block_op *op = smm->uncommitted.bops + i;
if (op->block != b)
continue;
@@ -335,9 +451,23 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b)
static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)
{
+ dm_block_t count;
+ struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
int r = sm_metadata_new_block_(sm, b);
- if (r)
- DMERR("unable to allocate new metadata block");
+ if (r) {
+ DMERR_LIMIT("unable to allocate new metadata block");
+ return r;
+ }
+
+ r = sm_metadata_get_nr_free(sm, &count);
+ if (r) {
+ DMERR_LIMIT("couldn't get free block count");
+ return r;
+ }
+
+ check_threshold(&smm->threshold, count);
+
return r;
}
@@ -357,6 +487,18 @@ static int sm_metadata_commit(struct dm_space_map *sm)
return 0;
}
+static int sm_metadata_register_threshold_callback(struct dm_space_map *sm,
+ dm_block_t threshold,
+ dm_sm_threshold_fn fn,
+ void *context)
+{
+ struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+
+ set_threshold(&smm->threshold, threshold, fn, context);
+
+ return 0;
+}
+
static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result)
{
*result = sizeof(struct disk_sm_root);
@@ -382,6 +524,8 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t
return 0;
}
+static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks);
+
static struct dm_space_map ops = {
.destroy = sm_metadata_destroy,
.extend = sm_metadata_extend,
@@ -395,7 +539,8 @@ static struct dm_space_map ops = {
.new_block = sm_metadata_new_block,
.commit = sm_metadata_commit,
.root_size = sm_metadata_root_size,
- .copy_root = sm_metadata_copy_root
+ .copy_root = sm_metadata_copy_root,
+ .register_threshold_callback = sm_metadata_register_threshold_callback
};
/*----------------------------------------------------------------*/
@@ -410,7 +555,7 @@ static void sm_bootstrap_destroy(struct dm_space_map *sm)
static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
{
- DMERR("boostrap doesn't support extend");
+ DMERR("bootstrap doesn't support extend");
return -EINVAL;
}
@@ -450,7 +595,7 @@ static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm,
static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b,
uint32_t count)
{
- DMERR("boostrap doesn't support set_count");
+ DMERR("bootstrap doesn't support set_count");
return -EINVAL;
}
@@ -491,7 +636,7 @@ static int sm_bootstrap_commit(struct dm_space_map *sm)
static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
{
- DMERR("boostrap doesn't support root_size");
+ DMERR("bootstrap doesn't support root_size");
return -EINVAL;
}
@@ -499,7 +644,7 @@ static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result)
static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where,
size_t max)
{
- DMERR("boostrap doesn't support copy_root");
+ DMERR("bootstrap doesn't support copy_root");
return -EINVAL;
}
@@ -517,11 +662,60 @@ static struct dm_space_map bootstrap_ops = {
.new_block = sm_bootstrap_new_block,
.commit = sm_bootstrap_commit,
.root_size = sm_bootstrap_root_size,
- .copy_root = sm_bootstrap_copy_root
+ .copy_root = sm_bootstrap_copy_root,
+ .register_threshold_callback = NULL
};
/*----------------------------------------------------------------*/
+static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)
+{
+ int r, i;
+ enum allocation_event ev;
+ struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);
+ dm_block_t old_len = smm->ll.nr_blocks;
+
+ /*
+ * Flick into a mode where all blocks get allocated in the new area.
+ */
+ smm->begin = old_len;
+ memcpy(sm, &bootstrap_ops, sizeof(*sm));
+
+ /*
+ * Extend.
+ */
+ r = sm_ll_extend(&smm->ll, extra_blocks);
+ if (r)
+ goto out;
+
+ /*
+ * We repeatedly increment then commit until the commit doesn't
+ * allocate any new blocks.
+ */
+ do {
+ for (i = old_len; !r && i < smm->begin; i++) {
+ r = sm_ll_inc(&smm->ll, i, &ev);
+ if (r)
+ goto out;
+ }
+ old_len = smm->begin;
+
+ r = sm_ll_commit(&smm->ll);
+ if (r)
+ goto out;
+
+ } while (old_len != smm->begin);
+
+out:
+ /*
+ * Switch back to normal behaviour.
+ */
+ memcpy(sm, &ops, sizeof(*sm));
+ return r;
+}
+
+/*----------------------------------------------------------------*/
+
struct dm_space_map *dm_sm_metadata_init(void)
{
struct sm_metadata *smm;
@@ -548,7 +742,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
smm->begin = superblock + 1;
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
- smm->nr_uncommitted = 0;
+ brb_init(&smm->uncommitted);
+ threshold_init(&smm->threshold);
memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm));
@@ -556,6 +751,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm,
if (r)
return r;
+ if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS)
+ nr_blocks = DM_SM_METADATA_MAX_BLOCKS;
r = sm_ll_extend(&smm->ll, nr_blocks);
if (r)
return r;
@@ -589,7 +786,8 @@ int dm_sm_metadata_open(struct dm_space_map *sm,
smm->begin = 0;
smm->recursion_count = 0;
smm->allocated_this_transaction = 0;
- smm->nr_uncommitted = 0;
+ brb_init(&smm->uncommitted);
+ threshold_init(&smm->threshold);
memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll));
return 0;
diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h
index 39bba0801cf..64df923974d 100644
--- a/drivers/md/persistent-data/dm-space-map-metadata.h
+++ b/drivers/md/persistent-data/dm-space-map-metadata.h
@@ -9,6 +9,17 @@
#include "dm-transaction-manager.h"
+#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT)
+
+/*
+ * The metadata device is currently limited in size.
+ *
+ * We have one block of index, which can hold 255 index entries. Each
+ * index entry contains allocation info about ~16k metadata blocks.
+ */
+#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64))
+#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE)
+
/*
* Unfortunately we have to use two-phase construction due to the cycle
* between the tm and sm.
diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h
index 1cbfc6b1638..3e6d1153b7c 100644
--- a/drivers/md/persistent-data/dm-space-map.h
+++ b/drivers/md/persistent-data/dm-space-map.h
@@ -9,6 +9,8 @@
#include "dm-block-manager.h"
+typedef void (*dm_sm_threshold_fn)(void *context);
+
/*
* struct dm_space_map keeps a record of how many times each block in a device
* is referenced. It needs to be fixed on disk as part of the transaction.
@@ -59,6 +61,15 @@ struct dm_space_map {
*/
int (*root_size)(struct dm_space_map *sm, size_t *result);
int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len);
+
+ /*
+ * You can register one threshold callback which is edge-triggered
+ * when the free space in the space map drops below the threshold.
+ */
+ int (*register_threshold_callback)(struct dm_space_map *sm,
+ dm_block_t threshold,
+ dm_sm_threshold_fn fn,
+ void *context);
};
/*----------------------------------------------------------------*/
@@ -131,4 +142,16 @@ static inline int dm_sm_copy_root(struct dm_space_map *sm, void *copy_to_here_le
return sm->copy_root(sm, copy_to_here_le, len);
}
+static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm,
+ dm_block_t threshold,
+ dm_sm_threshold_fn fn,
+ void *context)
+{
+ if (sm->register_threshold_callback)
+ return sm->register_threshold_callback(sm, threshold, fn, context);
+
+ return -EINVAL;
+}
+
+
#endif /* _LINUX_DM_SPACE_MAP_H */
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 81da1a26042..3bc30a0ae3d 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm)
if (r < 0)
return r;
- return 0;
+ return dm_bm_flush(tm->bm);
}
EXPORT_SYMBOL_GPL(dm_tm_pre_commit);
@@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)
return -EWOULDBLOCK;
wipe_shadow_table(tm);
+ dm_bm_unlock(root);
- return dm_bm_flush_and_unlock(tm->bm, root);
+ return dm_bm_flush(tm->bm);
}
EXPORT_SYMBOL_GPL(dm_tm_commit);
diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h
index b5b139076ca..2772ed2a781 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.h
+++ b/drivers/md/persistent-data/dm-transaction-manager.h
@@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac
/*
* We use a 2-phase commit here.
*
- * i) In the first phase the block manager is told to start flushing, and
- * the changes to the space map are written to disk. You should interrogate
- * your particular space map to get detail of its root node etc. to be
- * included in your superblock.
+ * i) Make all changes for the transaction *except* for the superblock.
+ * Then call dm_tm_pre_commit() to flush them to disk.
*
- * ii) @root will be committed last. You shouldn't use more than the
- * first 512 bytes of @root if you wish the transaction to survive a power
- * failure. You *must* have a write lock held on @root for both stage (i)
- * and (ii). The commit will drop the write lock.
+ * ii) Lock your superblock. Update. Then call dm_tm_commit() which will
+ * unlock the superblock and flush it. No other blocks should be updated
+ * during this period. Care should be taken to never unlock a partially
+ * updated superblock; perform any operations that could fail *before* you
+ * take the superblock lock.
*/
int dm_tm_pre_commit(struct dm_transaction_manager *tm);
-int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root);
+int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock);
/*
* These methods are the only way to get hold of a writeable block.