diff options
Diffstat (limited to 'drivers/md/persistent-data')
| -rw-r--r-- | drivers/md/persistent-data/Kconfig | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-array.c | 15 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.c | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.h | 1 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 28 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.h | 9 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-btree-remove.c | 46 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 61 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-btree.h | 8 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 95 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-disk.c | 21 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 268 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.h | 11 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map.h | 23 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 5 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.h | 17 |
16 files changed, 482 insertions, 146 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index 19b26879541..0c2dec7aec2 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA ---help--- Library providing immutable on-disk data structure support for device-mapper targets such as the thin provisioning target. + +config DM_DEBUG_BLOCK_STACK_TRACING + boolean "Keep stack trace of persistent data block lock holders" + depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA + select STACKTRACE + ---help--- + Enable this for messages that may help debug problems with the + block manager locking used by thin provisioning and caching. + + If unsure, say N. diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 172147eb1d4..1d75b1dc1e2 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, * The shadow op will often be a noop. Only insert if it really * copied data. */ - if (dm_block_location(*block) != b) + if (dm_block_location(*block) != b) { + /* + * dm_tm_shadow_block will have already decremented the old + * block, but it is still referenced by the btree. We + * increment to stop the insert decrementing it below zero + * when overwriting the old value. + */ + dm_tm_inc(info->btree_info.tm, b); r = insert_ablock(info, index, *block, root); + } return r; } @@ -509,15 +517,18 @@ static int grow_add_tail_block(struct resize *resize) static int grow_needs_more_blocks(struct resize *resize) { int r; + unsigned old_nr_blocks = resize->old_nr_full_blocks; if (resize->old_nr_entries_in_last_block > 0) { + old_nr_blocks++; + r = grow_extend_tail_block(resize, resize->max_entries); if (r) return r; } r = insert_full_ablocks(resize->info, resize->size_of_block, - resize->old_nr_full_blocks, + old_nr_blocks, resize->new_nr_full_blocks, resize->max_entries, resize->value, &resize->root); diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c index cd9a86d4cdf..36f7cc2c710 100644 --- a/drivers/md/persistent-data/dm-bitset.c +++ b/drivers/md/persistent-data/dm-bitset.c @@ -65,7 +65,7 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, int r; __le64 value; - if (!info->current_index_set) + if (!info->current_index_set || !info->dirty) return 0; value = cpu_to_le64(info->current_bits); @@ -77,6 +77,8 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, return r; info->current_index_set = false; + info->dirty = false; + return 0; } EXPORT_SYMBOL_GPL(dm_bitset_flush); @@ -94,6 +96,8 @@ static int read_bits(struct dm_disk_bitset *info, dm_block_t root, info->current_bits = le64_to_cpu(value); info->current_index_set = true; info->current_index = array_index; + info->dirty = false; + return 0; } @@ -126,6 +130,8 @@ int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, return r; set_bit(b, (unsigned long *) &info->current_bits); + info->dirty = true; + return 0; } EXPORT_SYMBOL_GPL(dm_bitset_set_bit); @@ -141,6 +147,8 @@ int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, return r; clear_bit(b, (unsigned long *) &info->current_bits); + info->dirty = true; + return 0; } EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h index e1b9bea14aa..c2287d672ef 100644 --- a/drivers/md/persistent-data/dm-bitset.h +++ b/drivers/md/persistent-data/dm-bitset.h @@ -71,6 +71,7 @@ struct dm_disk_bitset { uint64_t current_bits; bool current_index_set:1; + bool dirty:1; }; /* diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 81b513890e2..087411c95ff 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -104,7 +104,7 @@ static int __check_holder(struct block_lock *lock) for (i = 0; i < MAX_HOLDERS; i++) { if (lock->holders[i] == current) { - DMERR("recursive lock detected in pool metadata"); + DMERR("recursive lock detected in metadata"); #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING DMERR("previously held here:"); print_stack_trace(lock->traces + i, 4); @@ -595,25 +595,19 @@ int dm_bm_unlock(struct dm_block *b) } EXPORT_SYMBOL_GPL(dm_bm_unlock); -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, - struct dm_block *superblock) +int dm_bm_flush(struct dm_block_manager *bm) { - int r; - if (bm->read_only) return -EPERM; - r = dm_bufio_write_dirty_buffers(bm->bufio); - if (unlikely(r)) { - dm_bm_unlock(superblock); - return r; - } - - dm_bm_unlock(superblock); - return dm_bufio_write_dirty_buffers(bm->bufio); } -EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); +EXPORT_SYMBOL_GPL(dm_bm_flush); + +void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) +{ + dm_bufio_prefetch(bm->bufio, b, 1); +} void dm_bm_set_read_only(struct dm_block_manager *bm) { @@ -621,6 +615,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm) } EXPORT_SYMBOL_GPL(dm_bm_set_read_only); +void dm_bm_set_read_write(struct dm_block_manager *bm) +{ + bm->read_only = false; +} +EXPORT_SYMBOL_GPL(dm_bm_set_read_write); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { return crc32c(~(u32) 0, data, len) ^ init_xor; diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index be5bff61be2..1b95dfc1778 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -105,8 +105,12 @@ int dm_bm_unlock(struct dm_block *b); * * This method always blocks. */ -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, - struct dm_block *superblock); +int dm_bm_flush(struct dm_block_manager *bm); + +/* + * Request data is prefetched into the cache. + */ +void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b); /* * Switches the bm to a read only mode. Once read-only mode @@ -120,6 +124,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, * be returned if you do. */ void dm_bm_set_read_only(struct dm_block_manager *bm); +void dm_bm_set_read_write(struct dm_block_manager *bm); u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c index c4f28133ef8..b88757cd0d1 100644 --- a/drivers/md/persistent-data/dm-btree-remove.c +++ b/drivers/md/persistent-data/dm-btree-remove.c @@ -139,15 +139,8 @@ struct child { struct btree_node *n; }; -static struct dm_btree_value_type le64_type = { - .context = NULL, - .size = sizeof(__le64), - .inc = NULL, - .dec = NULL, - .equal = NULL -}; - -static int init_child(struct dm_btree_info *info, struct btree_node *parent, +static int init_child(struct dm_btree_info *info, struct dm_btree_value_type *vt, + struct btree_node *parent, unsigned index, struct child *result) { int r, inc; @@ -164,7 +157,7 @@ static int init_child(struct dm_btree_info *info, struct btree_node *parent, result->n = dm_block_data(result->block); if (inc) - inc_children(info->tm, result->n, &le64_type); + inc_children(info->tm, result->n, vt); *((__le64 *) value_ptr(parent, index)) = cpu_to_le64(dm_block_location(result->block)); @@ -236,7 +229,7 @@ static void __rebalance2(struct dm_btree_info *info, struct btree_node *parent, } static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, - unsigned left_index) + struct dm_btree_value_type *vt, unsigned left_index) { int r; struct btree_node *parent; @@ -244,11 +237,11 @@ static int rebalance2(struct shadow_spine *s, struct dm_btree_info *info, parent = dm_block_data(shadow_current(s)); - r = init_child(info, parent, left_index, &left); + r = init_child(info, vt, parent, left_index, &left); if (r) return r; - r = init_child(info, parent, left_index + 1, &right); + r = init_child(info, vt, parent, left_index + 1, &right); if (r) { exit_child(info, &left); return r; @@ -368,7 +361,7 @@ static void __rebalance3(struct dm_btree_info *info, struct btree_node *parent, } static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, - unsigned left_index) + struct dm_btree_value_type *vt, unsigned left_index) { int r; struct btree_node *parent = dm_block_data(shadow_current(s)); @@ -377,17 +370,17 @@ static int rebalance3(struct shadow_spine *s, struct dm_btree_info *info, /* * FIXME: fill out an array? */ - r = init_child(info, parent, left_index, &left); + r = init_child(info, vt, parent, left_index, &left); if (r) return r; - r = init_child(info, parent, left_index + 1, ¢er); + r = init_child(info, vt, parent, left_index + 1, ¢er); if (r) { exit_child(info, &left); return r; } - r = init_child(info, parent, left_index + 2, &right); + r = init_child(info, vt, parent, left_index + 2, &right); if (r) { exit_child(info, &left); exit_child(info, ¢er); @@ -434,7 +427,8 @@ static int get_nr_entries(struct dm_transaction_manager *tm, } static int rebalance_children(struct shadow_spine *s, - struct dm_btree_info *info, uint64_t key) + struct dm_btree_info *info, + struct dm_btree_value_type *vt, uint64_t key) { int i, r, has_left_sibling, has_right_sibling; uint32_t child_entries; @@ -472,13 +466,13 @@ static int rebalance_children(struct shadow_spine *s, has_right_sibling = i < (le32_to_cpu(n->header.nr_entries) - 1); if (!has_left_sibling) - r = rebalance2(s, info, i); + r = rebalance2(s, info, vt, i); else if (!has_right_sibling) - r = rebalance2(s, info, i - 1); + r = rebalance2(s, info, vt, i - 1); else - r = rebalance3(s, info, i - 1); + r = rebalance3(s, info, vt, i - 1); return r; } @@ -529,7 +523,7 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, if (le32_to_cpu(n->header.flags) & LEAF_NODE) return do_leaf(n, key, index); - r = rebalance_children(s, info, key); + r = rebalance_children(s, info, vt, key); if (r) break; @@ -550,6 +544,14 @@ static int remove_raw(struct shadow_spine *s, struct dm_btree_info *info, return r; } +static struct dm_btree_value_type le64_type = { + .context = NULL, + .size = sizeof(__le64), + .inc = NULL, + .dec = NULL, + .equal = NULL +}; + int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, uint64_t *keys, dm_block_t *new_root) { diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 35865425e4b..416060c2570 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -161,6 +161,7 @@ struct frame { }; struct del_stack { + struct dm_btree_info *info; struct dm_transaction_manager *tm; int top; struct frame spine[MAX_SPINE_DEPTH]; @@ -183,6 +184,20 @@ static int unprocessed_frames(struct del_stack *s) return s->top >= 0; } +static void prefetch_children(struct del_stack *s, struct frame *f) +{ + unsigned i; + struct dm_block_manager *bm = dm_tm_get_bm(s->tm); + + for (i = 0; i < f->nr_children; i++) + dm_bm_prefetch(bm, value64(f->n, i)); +} + +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) { int r; @@ -205,6 +220,7 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) dm_tm_dec(s->tm, b); else { + uint32_t flags; struct frame *f = s->spine + ++s->top; r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b); @@ -217,6 +233,10 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) f->level = level; f->nr_children = le32_to_cpu(f->n->header.nr_entries); f->current_child = 0; + + flags = le32_to_cpu(f->n->header.flags); + if (flags & INTERNAL_NODE || is_internal_level(s->info, f)) + prefetch_children(s, f); } return 0; @@ -230,11 +250,6 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } -static bool is_internal_level(struct dm_btree_info *info, struct frame *f) -{ - return f->level < (info->levels - 1); -} - int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -243,6 +258,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; + s->info = info; s->tm = info->tm; s->top = -1; @@ -287,7 +303,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) info->value_type.dec(info->value_type.context, value_ptr(f->n, i)); } - f->current_child = f->nr_children; + pop_frame(s); } } @@ -754,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify); /*----------------------------------------------------------------*/ -static int find_highest_key(struct ro_spine *s, dm_block_t block, - uint64_t *result_key, dm_block_t *next_block) +static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest, + uint64_t *result_key, dm_block_t *next_block) { int i, r; uint32_t flags; @@ -772,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block, else i--; - *result_key = le64_to_cpu(ro_node(s)->keys[i]); + if (find_highest) + *result_key = le64_to_cpu(ro_node(s)->keys[i]); + else + *result_key = le64_to_cpu(ro_node(s)->keys[0]); + if (next_block || flags & INTERNAL_NODE) block = value64(ro_node(s), i); @@ -783,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block, return 0; } -int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, - uint64_t *result_keys) +static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root, + bool find_highest, uint64_t *result_keys) { int r = 0, count = 0, level; struct ro_spine spine; init_ro_spine(&spine, info); for (level = 0; level < info->levels; level++) { - r = find_highest_key(&spine, root, result_keys + level, - level == info->levels - 1 ? NULL : &root); + r = find_key(&spine, root, find_highest, result_keys + level, + level == info->levels - 1 ? NULL : &root); if (r == -ENODATA) { r = 0; break; @@ -806,8 +826,23 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, return r ? r : count; } + +int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, + uint64_t *result_keys) +{ + return dm_btree_find_key(info, root, true, result_keys); +} EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); +int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, + uint64_t *result_keys) +{ + return dm_btree_find_key(info, root, false, result_keys); +} +EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key); + +/*----------------------------------------------------------------*/ + /* * FIXME: We shouldn't use a recursive algorithm when we have limited stack * space. Also this only works for single level trees. diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index 8672d159e0b..dacfc34180b 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -137,6 +137,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, /* * Returns < 0 on failure. Otherwise the number of key entries that have * been filled out. Remember trees can have zero entries, and as such have + * no lowest key. + */ +int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, + uint64_t *result_keys); + +/* + * Returns < 0 on failure. Otherwise the number of key entries that have + * been filled out. Remember trees can have zero entries, and as such have * no highest key. */ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 3e7a88d99eb..aacbe70c2c2 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) return -EINVAL; } + /* + * We need to set this before the dm_tm_new_block() call below. + */ + ll->nr_blocks = nr_blocks; for (i = old_blocks; i < blocks; i++) { struct dm_block *b; struct disk_index_entry idx; @@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b); if (r < 0) return r; + idx.blocknr = cpu_to_le64(dm_block_location(b)); r = dm_tm_unlock(ll->tm, b); @@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) return r; } - ll->nr_blocks = nr_blocks; return 0; } @@ -292,16 +296,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result) return dm_tm_unlock(ll->tm, blk); } -int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) +static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b, + uint32_t *result) { __le32 le_rc; - int r = sm_ll_lookup_bitmap(ll, b, result); - - if (r) - return r; - - if (*result != 3) - return r; + int r; r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc); if (r < 0) @@ -312,6 +311,19 @@ int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) return r; } +int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) +{ + int r = sm_ll_lookup_bitmap(ll, b, result); + + if (r) + return r; + + if (*result != 3) + return r; + + return sm_ll_lookup_big_ref_count(ll, b, result); +} + int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result) { @@ -372,11 +384,12 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, return -ENOSPC; } -int sm_ll_insert(struct ll_disk *ll, dm_block_t b, - uint32_t ref_count, enum allocation_event *ev) +static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, + int (*mutator)(void *context, uint32_t old, uint32_t *new), + void *context, enum allocation_event *ev) { int r; - uint32_t bit, old; + uint32_t bit, old, ref_count; struct dm_block *nb; dm_block_t index = b; struct disk_index_entry ie_disk; @@ -399,6 +412,20 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b, bm_le = dm_bitmap_data(nb); old = sm_lookup_bitmap(bm_le, bit); + if (old > 2) { + r = sm_ll_lookup_big_ref_count(ll, b, &old); + if (r < 0) { + dm_tm_unlock(ll->tm, nb); + return r; + } + } + + r = mutator(context, old, &ref_count); + if (r) { + dm_tm_unlock(ll->tm, nb); + return r; + } + if (ref_count <= 2) { sm_set_bitmap(bm_le, bit, ref_count); @@ -448,31 +475,43 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b, return ll->save_ie(ll, index, &ie_disk); } -int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +static int set_ref_count(void *context, uint32_t old, uint32_t *new) { - int r; - uint32_t rc; - - r = sm_ll_lookup(ll, b, &rc); - if (r) - return r; + *new = *((uint32_t *) context); + return 0; +} - return sm_ll_insert(ll, b, rc + 1, ev); +int sm_ll_insert(struct ll_disk *ll, dm_block_t b, + uint32_t ref_count, enum allocation_event *ev) +{ + return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev); } -int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +static int inc_ref_count(void *context, uint32_t old, uint32_t *new) { - int r; - uint32_t rc; + *new = old + 1; + return 0; +} - r = sm_ll_lookup(ll, b, &rc); - if (r) - return r; +int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +{ + return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev); +} - if (!rc) +static int dec_ref_count(void *context, uint32_t old, uint32_t *new) +{ + if (!old) { + DMERR_LIMIT("unable to decrement a reference count below 0"); return -EINVAL; + } - return sm_ll_insert(ll, b, rc - 1, ev); + *new = old - 1; + return 0; +} + +int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +{ + return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev); } int sm_ll_commit(struct ll_disk *ll) diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index f6d29e614ab..cfbf9617e46 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -140,26 +140,10 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) { - int r; - uint32_t old_count; enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - r = sm_ll_dec(&smd->ll, b, &ev); - if (!r && (ev == SM_FREE)) { - /* - * It's only free if it's also free in the last - * transaction. - */ - r = sm_ll_lookup(&smd->old_ll, b, &old_count); - if (r) - return r; - - if (!old_count) - smd->nr_allocated_this_transaction--; - } - - return r; + return sm_ll_dec(&smd->ll, b, &ev); } static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) @@ -248,7 +232,8 @@ static struct dm_space_map ops = { .new_block = sm_disk_new_block, .commit = sm_disk_commit, .root_size = sm_disk_root_size, - .copy_root = sm_disk_copy_root + .copy_root = sm_disk_copy_root, + .register_threshold_callback = NULL }; struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm, diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 906cf3df71a..786b689bdfc 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -17,6 +17,55 @@ /*----------------------------------------------------------------*/ /* + * An edge triggered threshold. + */ +struct threshold { + bool threshold_set; + bool value_set; + dm_block_t threshold; + dm_block_t current_value; + dm_sm_threshold_fn fn; + void *context; +}; + +static void threshold_init(struct threshold *t) +{ + t->threshold_set = false; + t->value_set = false; +} + +static void set_threshold(struct threshold *t, dm_block_t value, + dm_sm_threshold_fn fn, void *context) +{ + t->threshold_set = true; + t->threshold = value; + t->fn = fn; + t->context = context; +} + +static bool below_threshold(struct threshold *t, dm_block_t value) +{ + return t->threshold_set && value <= t->threshold; +} + +static bool threshold_already_triggered(struct threshold *t) +{ + return t->value_set && below_threshold(t, t->current_value); +} + +static void check_threshold(struct threshold *t, dm_block_t value) +{ + if (below_threshold(t, value) && + !threshold_already_triggered(t)) + t->fn(t->context); + + t->value_set = true; + t->current_value = value; +} + +/*----------------------------------------------------------------*/ + +/* * Space map interface. * * The low level disk format is written using the standard btree and @@ -42,6 +91,69 @@ struct block_op { dm_block_t block; }; +struct bop_ring_buffer { + unsigned begin; + unsigned end; + struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; +}; + +static void brb_init(struct bop_ring_buffer *brb) +{ + brb->begin = 0; + brb->end = 0; +} + +static bool brb_empty(struct bop_ring_buffer *brb) +{ + return brb->begin == brb->end; +} + +static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) +{ + unsigned r = old + 1; + return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; +} + +static int brb_push(struct bop_ring_buffer *brb, + enum block_op_type type, dm_block_t b) +{ + struct block_op *bop; + unsigned next = brb_next(brb, brb->end); + + /* + * We don't allow the last bop to be filled, this way we can + * differentiate between full and empty. + */ + if (next == brb->begin) + return -ENOMEM; + + bop = brb->bops + brb->end; + bop->type = type; + bop->block = b; + + brb->end = next; + + return 0; +} + +static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) +{ + struct block_op *bop; + + if (brb_empty(brb)) + return -ENODATA; + + bop = brb->bops + brb->begin; + result->type = bop->type; + result->block = bop->block; + + brb->begin = brb_next(brb, brb->begin); + + return 0; +} + +/*----------------------------------------------------------------*/ + struct sm_metadata { struct dm_space_map sm; @@ -52,23 +164,20 @@ struct sm_metadata { unsigned recursion_count; unsigned allocated_this_transaction; - unsigned nr_uncommitted; - struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; + struct bop_ring_buffer uncommitted; + + struct threshold threshold; }; static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) { - struct block_op *op; + int r = brb_push(&smm->uncommitted, type, b); - if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) { + if (r) { DMERR("too many recursive allocations"); return -ENOMEM; } - op = smm->uncommitted + smm->nr_uncommitted++; - op->type = type; - op->block = b; - return 0; } @@ -107,11 +216,17 @@ static int out(struct sm_metadata *smm) return -ENOMEM; } - if (smm->recursion_count == 1 && smm->nr_uncommitted) { - while (smm->nr_uncommitted && !r) { - smm->nr_uncommitted--; - r = commit_bop(smm, smm->uncommitted + - smm->nr_uncommitted); + if (smm->recursion_count == 1) { + while (!brb_empty(&smm->uncommitted)) { + struct block_op bop; + + r = brb_pop(&smm->uncommitted, &bop); + if (r) { + DMERR("bug in bop ring buffer"); + break; + } + + r = commit_bop(smm, &bop); if (r) break; } @@ -144,12 +259,6 @@ static void sm_metadata_destroy(struct dm_space_map *sm) kfree(smm); } -static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) -{ - DMERR("doesn't support extend"); - return -EINVAL; -} - static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) { struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); @@ -172,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) { - int r, i; + int r; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); unsigned adjustment = 0; @@ -180,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -209,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) { - int r, i, adjustment = 0; + int r, adjustment = 0; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); uint32_t rc; @@ -217,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -335,9 +451,23 @@ static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) { + dm_block_t count; + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + int r = sm_metadata_new_block_(sm, b); - if (r) - DMERR("unable to allocate new metadata block"); + if (r) { + DMERR_LIMIT("unable to allocate new metadata block"); + return r; + } + + r = sm_metadata_get_nr_free(sm, &count); + if (r) { + DMERR_LIMIT("couldn't get free block count"); + return r; + } + + check_threshold(&smm->threshold, count); + return r; } @@ -357,6 +487,18 @@ static int sm_metadata_commit(struct dm_space_map *sm) return 0; } +static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + + set_threshold(&smm->threshold, threshold, fn, context); + + return 0; +} + static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) { *result = sizeof(struct disk_sm_root); @@ -382,6 +524,8 @@ static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t return 0; } +static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); + static struct dm_space_map ops = { .destroy = sm_metadata_destroy, .extend = sm_metadata_extend, @@ -395,7 +539,8 @@ static struct dm_space_map ops = { .new_block = sm_metadata_new_block, .commit = sm_metadata_commit, .root_size = sm_metadata_root_size, - .copy_root = sm_metadata_copy_root + .copy_root = sm_metadata_copy_root, + .register_threshold_callback = sm_metadata_register_threshold_callback }; /*----------------------------------------------------------------*/ @@ -410,7 +555,7 @@ static void sm_bootstrap_destroy(struct dm_space_map *sm) static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) { - DMERR("boostrap doesn't support extend"); + DMERR("bootstrap doesn't support extend"); return -EINVAL; } @@ -450,7 +595,7 @@ static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, uint32_t count) { - DMERR("boostrap doesn't support set_count"); + DMERR("bootstrap doesn't support set_count"); return -EINVAL; } @@ -491,7 +636,7 @@ static int sm_bootstrap_commit(struct dm_space_map *sm) static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) { - DMERR("boostrap doesn't support root_size"); + DMERR("bootstrap doesn't support root_size"); return -EINVAL; } @@ -499,7 +644,7 @@ static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, size_t max) { - DMERR("boostrap doesn't support copy_root"); + DMERR("bootstrap doesn't support copy_root"); return -EINVAL; } @@ -517,11 +662,60 @@ static struct dm_space_map bootstrap_ops = { .new_block = sm_bootstrap_new_block, .commit = sm_bootstrap_commit, .root_size = sm_bootstrap_root_size, - .copy_root = sm_bootstrap_copy_root + .copy_root = sm_bootstrap_copy_root, + .register_threshold_callback = NULL }; /*----------------------------------------------------------------*/ +static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) +{ + int r, i; + enum allocation_event ev; + struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); + dm_block_t old_len = smm->ll.nr_blocks; + + /* + * Flick into a mode where all blocks get allocated in the new area. + */ + smm->begin = old_len; + memcpy(sm, &bootstrap_ops, sizeof(*sm)); + + /* + * Extend. + */ + r = sm_ll_extend(&smm->ll, extra_blocks); + if (r) + goto out; + + /* + * We repeatedly increment then commit until the commit doesn't + * allocate any new blocks. + */ + do { + for (i = old_len; !r && i < smm->begin; i++) { + r = sm_ll_inc(&smm->ll, i, &ev); + if (r) + goto out; + } + old_len = smm->begin; + + r = sm_ll_commit(&smm->ll); + if (r) + goto out; + + } while (old_len != smm->begin); + +out: + /* + * Switch back to normal behaviour. + */ + memcpy(sm, &ops, sizeof(*sm)); + return r; +} + +/*----------------------------------------------------------------*/ + struct dm_space_map *dm_sm_metadata_init(void) { struct sm_metadata *smm; @@ -548,7 +742,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm, smm->begin = superblock + 1; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); + threshold_init(&smm->threshold); memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); @@ -556,6 +751,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm, if (r) return r; + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; r = sm_ll_extend(&smm->ll, nr_blocks); if (r) return r; @@ -589,7 +786,8 @@ int dm_sm_metadata_open(struct dm_space_map *sm, smm->begin = 0; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); + threshold_init(&smm->threshold); memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); return 0; diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h index 39bba0801cf..64df923974d 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.h +++ b/drivers/md/persistent-data/dm-space-map-metadata.h @@ -9,6 +9,17 @@ #include "dm-transaction-manager.h" +#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT) + +/* + * The metadata device is currently limited in size. + * + * We have one block of index, which can hold 255 index entries. Each + * index entry contains allocation info about ~16k metadata blocks. + */ +#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64)) +#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE) + /* * Unfortunately we have to use two-phase construction due to the cycle * between the tm and sm. diff --git a/drivers/md/persistent-data/dm-space-map.h b/drivers/md/persistent-data/dm-space-map.h index 1cbfc6b1638..3e6d1153b7c 100644 --- a/drivers/md/persistent-data/dm-space-map.h +++ b/drivers/md/persistent-data/dm-space-map.h @@ -9,6 +9,8 @@ #include "dm-block-manager.h" +typedef void (*dm_sm_threshold_fn)(void *context); + /* * struct dm_space_map keeps a record of how many times each block in a device * is referenced. It needs to be fixed on disk as part of the transaction. @@ -59,6 +61,15 @@ struct dm_space_map { */ int (*root_size)(struct dm_space_map *sm, size_t *result); int (*copy_root)(struct dm_space_map *sm, void *copy_to_here_le, size_t len); + + /* + * You can register one threshold callback which is edge-triggered + * when the free space in the space map drops below the threshold. + */ + int (*register_threshold_callback)(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context); }; /*----------------------------------------------------------------*/ @@ -131,4 +142,16 @@ static inline int dm_sm_copy_root(struct dm_space_map *sm, void *copy_to_here_le return sm->copy_root(sm, copy_to_here_le, len); } +static inline int dm_sm_register_threshold_callback(struct dm_space_map *sm, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) +{ + if (sm->register_threshold_callback) + return sm->register_threshold_callback(sm, threshold, fn, context); + + return -EINVAL; +} + + #endif /* _LINUX_DM_SPACE_MAP_H */ diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 81da1a26042..3bc30a0ae3d 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm) if (r < 0) return r; - return 0; + return dm_bm_flush(tm->bm); } EXPORT_SYMBOL_GPL(dm_tm_pre_commit); @@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root) return -EWOULDBLOCK; wipe_shadow_table(tm); + dm_bm_unlock(root); - return dm_bm_flush_and_unlock(tm->bm, root); + return dm_bm_flush(tm->bm); } EXPORT_SYMBOL_GPL(dm_tm_commit); diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index b5b139076ca..2772ed2a781 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac /* * We use a 2-phase commit here. * - * i) In the first phase the block manager is told to start flushing, and - * the changes to the space map are written to disk. You should interrogate - * your particular space map to get detail of its root node etc. to be - * included in your superblock. + * i) Make all changes for the transaction *except* for the superblock. + * Then call dm_tm_pre_commit() to flush them to disk. * - * ii) @root will be committed last. You shouldn't use more than the - * first 512 bytes of @root if you wish the transaction to survive a power - * failure. You *must* have a write lock held on @root for both stage (i) - * and (ii). The commit will drop the write lock. + * ii) Lock your superblock. Update. Then call dm_tm_commit() which will + * unlock the superblock and flush it. No other blocks should be updated + * during this period. Care should be taken to never unlock a partially + * updated superblock; perform any operations that could fail *before* you + * take the superblock lock. */ int dm_tm_pre_commit(struct dm_transaction_manager *tm); -int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root); +int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock); /* * These methods are the only way to get hold of a writeable block. |
