diff options
Diffstat (limited to 'drivers/md/persistent-data')
| -rw-r--r-- | drivers/md/persistent-data/Kconfig | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-array.c | 15 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.c | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.h | 1 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 28 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.h | 9 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 61 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-btree.h | 8 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 95 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-disk.c | 18 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 155 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.h | 11 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 5 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.h | 17 |
14 files changed, 325 insertions, 118 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index 19b26879541..0c2dec7aec2 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA ---help--- Library providing immutable on-disk data structure support for device-mapper targets such as the thin provisioning target. + +config DM_DEBUG_BLOCK_STACK_TRACING + boolean "Keep stack trace of persistent data block lock holders" + depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA + select STACKTRACE + ---help--- + Enable this for messages that may help debug problems with the + block manager locking used by thin provisioning and caching. + + If unsure, say N. diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 172147eb1d4..1d75b1dc1e2 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root, * The shadow op will often be a noop. Only insert if it really * copied data. */ - if (dm_block_location(*block) != b) + if (dm_block_location(*block) != b) { + /* + * dm_tm_shadow_block will have already decremented the old + * block, but it is still referenced by the btree. We + * increment to stop the insert decrementing it below zero + * when overwriting the old value. + */ + dm_tm_inc(info->btree_info.tm, b); r = insert_ablock(info, index, *block, root); + } return r; } @@ -509,15 +517,18 @@ static int grow_add_tail_block(struct resize *resize) static int grow_needs_more_blocks(struct resize *resize) { int r; + unsigned old_nr_blocks = resize->old_nr_full_blocks; if (resize->old_nr_entries_in_last_block > 0) { + old_nr_blocks++; + r = grow_extend_tail_block(resize, resize->max_entries); if (r) return r; } r = insert_full_ablocks(resize->info, resize->size_of_block, - resize->old_nr_full_blocks, + old_nr_blocks, resize->new_nr_full_blocks, resize->max_entries, resize->value, &resize->root); diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c index cd9a86d4cdf..36f7cc2c710 100644 --- a/drivers/md/persistent-data/dm-bitset.c +++ b/drivers/md/persistent-data/dm-bitset.c @@ -65,7 +65,7 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, int r; __le64 value; - if (!info->current_index_set) + if (!info->current_index_set || !info->dirty) return 0; value = cpu_to_le64(info->current_bits); @@ -77,6 +77,8 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root, return r; info->current_index_set = false; + info->dirty = false; + return 0; } EXPORT_SYMBOL_GPL(dm_bitset_flush); @@ -94,6 +96,8 @@ static int read_bits(struct dm_disk_bitset *info, dm_block_t root, info->current_bits = le64_to_cpu(value); info->current_index_set = true; info->current_index = array_index; + info->dirty = false; + return 0; } @@ -126,6 +130,8 @@ int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root, return r; set_bit(b, (unsigned long *) &info->current_bits); + info->dirty = true; + return 0; } EXPORT_SYMBOL_GPL(dm_bitset_set_bit); @@ -141,6 +147,8 @@ int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root, return r; clear_bit(b, (unsigned long *) &info->current_bits); + info->dirty = true; + return 0; } EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h index e1b9bea14aa..c2287d672ef 100644 --- a/drivers/md/persistent-data/dm-bitset.h +++ b/drivers/md/persistent-data/dm-bitset.h @@ -71,6 +71,7 @@ struct dm_disk_bitset { uint64_t current_bits; bool current_index_set:1; + bool dirty:1; }; /* diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index 81b513890e2..087411c95ff 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -104,7 +104,7 @@ static int __check_holder(struct block_lock *lock) for (i = 0; i < MAX_HOLDERS; i++) { if (lock->holders[i] == current) { - DMERR("recursive lock detected in pool metadata"); + DMERR("recursive lock detected in metadata"); #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING DMERR("previously held here:"); print_stack_trace(lock->traces + i, 4); @@ -595,25 +595,19 @@ int dm_bm_unlock(struct dm_block *b) } EXPORT_SYMBOL_GPL(dm_bm_unlock); -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, - struct dm_block *superblock) +int dm_bm_flush(struct dm_block_manager *bm) { - int r; - if (bm->read_only) return -EPERM; - r = dm_bufio_write_dirty_buffers(bm->bufio); - if (unlikely(r)) { - dm_bm_unlock(superblock); - return r; - } - - dm_bm_unlock(superblock); - return dm_bufio_write_dirty_buffers(bm->bufio); } -EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); +EXPORT_SYMBOL_GPL(dm_bm_flush); + +void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b) +{ + dm_bufio_prefetch(bm->bufio, b, 1); +} void dm_bm_set_read_only(struct dm_block_manager *bm) { @@ -621,6 +615,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm) } EXPORT_SYMBOL_GPL(dm_bm_set_read_only); +void dm_bm_set_read_write(struct dm_block_manager *bm) +{ + bm->read_only = false; +} +EXPORT_SYMBOL_GPL(dm_bm_set_read_write); + u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor) { return crc32c(~(u32) 0, data, len) ^ init_xor; diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index be5bff61be2..1b95dfc1778 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -105,8 +105,12 @@ int dm_bm_unlock(struct dm_block *b); * * This method always blocks. */ -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, - struct dm_block *superblock); +int dm_bm_flush(struct dm_block_manager *bm); + +/* + * Request data is prefetched into the cache. + */ +void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b); /* * Switches the bm to a read only mode. Once read-only mode @@ -120,6 +124,7 @@ int dm_bm_flush_and_unlock(struct dm_block_manager *bm, * be returned if you do. */ void dm_bm_set_read_only(struct dm_block_manager *bm); +void dm_bm_set_read_write(struct dm_block_manager *bm); u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 35865425e4b..416060c2570 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -161,6 +161,7 @@ struct frame { }; struct del_stack { + struct dm_btree_info *info; struct dm_transaction_manager *tm; int top; struct frame spine[MAX_SPINE_DEPTH]; @@ -183,6 +184,20 @@ static int unprocessed_frames(struct del_stack *s) return s->top >= 0; } +static void prefetch_children(struct del_stack *s, struct frame *f) +{ + unsigned i; + struct dm_block_manager *bm = dm_tm_get_bm(s->tm); + + for (i = 0; i < f->nr_children; i++) + dm_bm_prefetch(bm, value64(f->n, i)); +} + +static bool is_internal_level(struct dm_btree_info *info, struct frame *f) +{ + return f->level < (info->levels - 1); +} + static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) { int r; @@ -205,6 +220,7 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) dm_tm_dec(s->tm, b); else { + uint32_t flags; struct frame *f = s->spine + ++s->top; r = dm_tm_read_lock(s->tm, b, &btree_node_validator, &f->b); @@ -217,6 +233,10 @@ static int push_frame(struct del_stack *s, dm_block_t b, unsigned level) f->level = level; f->nr_children = le32_to_cpu(f->n->header.nr_entries); f->current_child = 0; + + flags = le32_to_cpu(f->n->header.flags); + if (flags & INTERNAL_NODE || is_internal_level(s->info, f)) + prefetch_children(s, f); } return 0; @@ -230,11 +250,6 @@ static void pop_frame(struct del_stack *s) dm_tm_unlock(s->tm, f->b); } -static bool is_internal_level(struct dm_btree_info *info, struct frame *f) -{ - return f->level < (info->levels - 1); -} - int dm_btree_del(struct dm_btree_info *info, dm_block_t root) { int r; @@ -243,6 +258,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) return -ENOMEM; + s->info = info; s->tm = info->tm; s->top = -1; @@ -287,7 +303,7 @@ int dm_btree_del(struct dm_btree_info *info, dm_block_t root) info->value_type.dec(info->value_type.context, value_ptr(f->n, i)); } - f->current_child = f->nr_children; + pop_frame(s); } } @@ -754,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify); /*----------------------------------------------------------------*/ -static int find_highest_key(struct ro_spine *s, dm_block_t block, - uint64_t *result_key, dm_block_t *next_block) +static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest, + uint64_t *result_key, dm_block_t *next_block) { int i, r; uint32_t flags; @@ -772,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block, else i--; - *result_key = le64_to_cpu(ro_node(s)->keys[i]); + if (find_highest) + *result_key = le64_to_cpu(ro_node(s)->keys[i]); + else + *result_key = le64_to_cpu(ro_node(s)->keys[0]); + if (next_block || flags & INTERNAL_NODE) block = value64(ro_node(s), i); @@ -783,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block, return 0; } -int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, - uint64_t *result_keys) +static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root, + bool find_highest, uint64_t *result_keys) { int r = 0, count = 0, level; struct ro_spine spine; init_ro_spine(&spine, info); for (level = 0; level < info->levels; level++) { - r = find_highest_key(&spine, root, result_keys + level, - level == info->levels - 1 ? NULL : &root); + r = find_key(&spine, root, find_highest, result_keys + level, + level == info->levels - 1 ? NULL : &root); if (r == -ENODATA) { r = 0; break; @@ -806,8 +826,23 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, return r ? r : count; } + +int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, + uint64_t *result_keys) +{ + return dm_btree_find_key(info, root, true, result_keys); +} EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); +int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, + uint64_t *result_keys) +{ + return dm_btree_find_key(info, root, false, result_keys); +} +EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key); + +/*----------------------------------------------------------------*/ + /* * FIXME: We shouldn't use a recursive algorithm when we have limited stack * space. Also this only works for single level trees. diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index 8672d159e0b..dacfc34180b 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -137,6 +137,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root, /* * Returns < 0 on failure. Otherwise the number of key entries that have * been filled out. Remember trees can have zero entries, and as such have + * no lowest key. + */ +int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, + uint64_t *result_keys); + +/* + * Returns < 0 on failure. Otherwise the number of key entries that have + * been filled out. Remember trees can have zero entries, and as such have * no highest key. */ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 3e7a88d99eb..aacbe70c2c2 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) return -EINVAL; } + /* + * We need to set this before the dm_tm_new_block() call below. + */ + ll->nr_blocks = nr_blocks; for (i = old_blocks; i < blocks; i++) { struct dm_block *b; struct disk_index_entry idx; @@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b); if (r < 0) return r; + idx.blocknr = cpu_to_le64(dm_block_location(b)); r = dm_tm_unlock(ll->tm, b); @@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks) return r; } - ll->nr_blocks = nr_blocks; return 0; } @@ -292,16 +296,11 @@ int sm_ll_lookup_bitmap(struct ll_disk *ll, dm_block_t b, uint32_t *result) return dm_tm_unlock(ll->tm, blk); } -int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) +static int sm_ll_lookup_big_ref_count(struct ll_disk *ll, dm_block_t b, + uint32_t *result) { __le32 le_rc; - int r = sm_ll_lookup_bitmap(ll, b, result); - - if (r) - return r; - - if (*result != 3) - return r; + int r; r = dm_btree_lookup(&ll->ref_count_info, ll->ref_count_root, &b, &le_rc); if (r < 0) @@ -312,6 +311,19 @@ int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) return r; } +int sm_ll_lookup(struct ll_disk *ll, dm_block_t b, uint32_t *result) +{ + int r = sm_ll_lookup_bitmap(ll, b, result); + + if (r) + return r; + + if (*result != 3) + return r; + + return sm_ll_lookup_big_ref_count(ll, b, result); +} + int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, dm_block_t end, dm_block_t *result) { @@ -372,11 +384,12 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin, return -ENOSPC; } -int sm_ll_insert(struct ll_disk *ll, dm_block_t b, - uint32_t ref_count, enum allocation_event *ev) +static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, + int (*mutator)(void *context, uint32_t old, uint32_t *new), + void *context, enum allocation_event *ev) { int r; - uint32_t bit, old; + uint32_t bit, old, ref_count; struct dm_block *nb; dm_block_t index = b; struct disk_index_entry ie_disk; @@ -399,6 +412,20 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b, bm_le = dm_bitmap_data(nb); old = sm_lookup_bitmap(bm_le, bit); + if (old > 2) { + r = sm_ll_lookup_big_ref_count(ll, b, &old); + if (r < 0) { + dm_tm_unlock(ll->tm, nb); + return r; + } + } + + r = mutator(context, old, &ref_count); + if (r) { + dm_tm_unlock(ll->tm, nb); + return r; + } + if (ref_count <= 2) { sm_set_bitmap(bm_le, bit, ref_count); @@ -448,31 +475,43 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b, return ll->save_ie(ll, index, &ie_disk); } -int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +static int set_ref_count(void *context, uint32_t old, uint32_t *new) { - int r; - uint32_t rc; - - r = sm_ll_lookup(ll, b, &rc); - if (r) - return r; + *new = *((uint32_t *) context); + return 0; +} - return sm_ll_insert(ll, b, rc + 1, ev); +int sm_ll_insert(struct ll_disk *ll, dm_block_t b, + uint32_t ref_count, enum allocation_event *ev) +{ + return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev); } -int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +static int inc_ref_count(void *context, uint32_t old, uint32_t *new) { - int r; - uint32_t rc; + *new = old + 1; + return 0; +} - r = sm_ll_lookup(ll, b, &rc); - if (r) - return r; +int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +{ + return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev); +} - if (!rc) +static int dec_ref_count(void *context, uint32_t old, uint32_t *new) +{ + if (!old) { + DMERR_LIMIT("unable to decrement a reference count below 0"); return -EINVAL; + } - return sm_ll_insert(ll, b, rc - 1, ev); + *new = old - 1; + return 0; +} + +int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) +{ + return sm_ll_mutate(ll, b, dec_ref_count, NULL, ev); } int sm_ll_commit(struct ll_disk *ll) diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index e735a6d5a79..cfbf9617e46 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -140,26 +140,10 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b) static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b) { - int r; - uint32_t old_count; enum allocation_event ev; struct sm_disk *smd = container_of(sm, struct sm_disk, sm); - r = sm_ll_dec(&smd->ll, b, &ev); - if (!r && (ev == SM_FREE)) { - /* - * It's only free if it's also free in the last - * transaction. - */ - r = sm_ll_lookup(&smd->old_ll, b, &old_count); - if (r) - return r; - - if (!old_count) - smd->nr_allocated_this_transaction--; - } - - return r; + return sm_ll_dec(&smd->ll, b, &ev); } static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 1c959684cae..786b689bdfc 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -91,6 +91,69 @@ struct block_op { dm_block_t block; }; +struct bop_ring_buffer { + unsigned begin; + unsigned end; + struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; +}; + +static void brb_init(struct bop_ring_buffer *brb) +{ + brb->begin = 0; + brb->end = 0; +} + +static bool brb_empty(struct bop_ring_buffer *brb) +{ + return brb->begin == brb->end; +} + +static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) +{ + unsigned r = old + 1; + return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; +} + +static int brb_push(struct bop_ring_buffer *brb, + enum block_op_type type, dm_block_t b) +{ + struct block_op *bop; + unsigned next = brb_next(brb, brb->end); + + /* + * We don't allow the last bop to be filled, this way we can + * differentiate between full and empty. + */ + if (next == brb->begin) + return -ENOMEM; + + bop = brb->bops + brb->end; + bop->type = type; + bop->block = b; + + brb->end = next; + + return 0; +} + +static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) +{ + struct block_op *bop; + + if (brb_empty(brb)) + return -ENODATA; + + bop = brb->bops + brb->begin; + result->type = bop->type; + result->block = bop->block; + + brb->begin = brb_next(brb, brb->begin); + + return 0; +} + +/*----------------------------------------------------------------*/ + struct sm_metadata { struct dm_space_map sm; @@ -101,25 +164,20 @@ struct sm_metadata { unsigned recursion_count; unsigned allocated_this_transaction; - unsigned nr_uncommitted; - struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; + struct bop_ring_buffer uncommitted; struct threshold threshold; }; static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) { - struct block_op *op; + int r = brb_push(&smm->uncommitted, type, b); - if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) { + if (r) { DMERR("too many recursive allocations"); return -ENOMEM; } - op = smm->uncommitted + smm->nr_uncommitted++; - op->type = type; - op->block = b; - return 0; } @@ -158,11 +216,17 @@ static int out(struct sm_metadata *smm) return -ENOMEM; } - if (smm->recursion_count == 1 && smm->nr_uncommitted) { - while (smm->nr_uncommitted && !r) { - smm->nr_uncommitted--; - r = commit_bop(smm, smm->uncommitted + - smm->nr_uncommitted); + if (smm->recursion_count == 1) { + while (!brb_empty(&smm->uncommitted)) { + struct block_op bop; + + r = brb_pop(&smm->uncommitted, &bop); + if (r) { + DMERR("bug in bop ring buffer"); + break; + } + + r = commit_bop(smm, &bop); if (r) break; } @@ -217,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, uint32_t *result) { - int r, i; + int r; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); unsigned adjustment = 0; @@ -225,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -254,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, dm_block_t b, int *result) { - int r, i, adjustment = 0; + int r, adjustment = 0; + unsigned i; struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); uint32_t rc; @@ -262,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, * We may have some uncommitted adjustments to add. This list * should always be really short. */ - for (i = 0; i < smm->nr_uncommitted; i++) { - struct block_op *op = smm->uncommitted + i; + for (i = smm->uncommitted.begin; + i != smm->uncommitted.end; + i = brb_next(&smm->uncommitted, i)) { + + struct block_op *op = smm->uncommitted.bops + i; if (op->block != b) continue; @@ -384,12 +455,16 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); int r = sm_metadata_new_block_(sm, b); - if (r) - DMERR("unable to allocate new metadata block"); + if (r) { + DMERR_LIMIT("unable to allocate new metadata block"); + return r; + } r = sm_metadata_get_nr_free(sm, &count); - if (r) - DMERR("couldn't get free block count"); + if (r) { + DMERR_LIMIT("couldn't get free block count"); + return r; + } check_threshold(&smm->threshold, count); @@ -604,20 +679,38 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) * Flick into a mode where all blocks get allocated in the new area. */ smm->begin = old_len; - memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); + memcpy(sm, &bootstrap_ops, sizeof(*sm)); /* * Extend. */ r = sm_ll_extend(&smm->ll, extra_blocks); + if (r) + goto out; /* - * Switch back to normal behaviour. + * We repeatedly increment then commit until the commit doesn't + * allocate any new blocks. */ - memcpy(&smm->sm, &ops, sizeof(smm->sm)); - for (i = old_len; !r && i < smm->begin; i++) - r = sm_ll_inc(&smm->ll, i, &ev); + do { + for (i = old_len; !r && i < smm->begin; i++) { + r = sm_ll_inc(&smm->ll, i, &ev); + if (r) + goto out; + } + old_len = smm->begin; + + r = sm_ll_commit(&smm->ll); + if (r) + goto out; + + } while (old_len != smm->begin); +out: + /* + * Switch back to normal behaviour. + */ + memcpy(sm, &ops, sizeof(*sm)); return r; } @@ -649,7 +742,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm, smm->begin = superblock + 1; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); threshold_init(&smm->threshold); memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); @@ -658,6 +751,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm, if (r) return r; + if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) + nr_blocks = DM_SM_METADATA_MAX_BLOCKS; r = sm_ll_extend(&smm->ll, nr_blocks); if (r) return r; @@ -691,7 +786,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm, smm->begin = 0; smm->recursion_count = 0; smm->allocated_this_transaction = 0; - smm->nr_uncommitted = 0; + brb_init(&smm->uncommitted); threshold_init(&smm->threshold); memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h index 39bba0801cf..64df923974d 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.h +++ b/drivers/md/persistent-data/dm-space-map-metadata.h @@ -9,6 +9,17 @@ #include "dm-transaction-manager.h" +#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT) + +/* + * The metadata device is currently limited in size. + * + * We have one block of index, which can hold 255 index entries. Each + * index entry contains allocation info about ~16k metadata blocks. + */ +#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64)) +#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE) + /* * Unfortunately we have to use two-phase construction due to the cycle * between the tm and sm. diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 81da1a26042..3bc30a0ae3d 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm) if (r < 0) return r; - return 0; + return dm_bm_flush(tm->bm); } EXPORT_SYMBOL_GPL(dm_tm_pre_commit); @@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root) return -EWOULDBLOCK; wipe_shadow_table(tm); + dm_bm_unlock(root); - return dm_bm_flush_and_unlock(tm->bm, root); + return dm_bm_flush(tm->bm); } EXPORT_SYMBOL_GPL(dm_tm_commit); diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index b5b139076ca..2772ed2a781 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac /* * We use a 2-phase commit here. * - * i) In the first phase the block manager is told to start flushing, and - * the changes to the space map are written to disk. You should interrogate - * your particular space map to get detail of its root node etc. to be - * included in your superblock. + * i) Make all changes for the transaction *except* for the superblock. + * Then call dm_tm_pre_commit() to flush them to disk. * - * ii) @root will be committed last. You shouldn't use more than the - * first 512 bytes of @root if you wish the transaction to survive a power - * failure. You *must* have a write lock held on @root for both stage (i) - * and (ii). The commit will drop the write lock. + * ii) Lock your superblock. Update. Then call dm_tm_commit() which will + * unlock the superblock and flush it. No other blocks should be updated + * during this period. Care should be taken to never unlock a partially + * updated superblock; perform any operations that could fail *before* you + * take the superblock lock. */ int dm_tm_pre_commit(struct dm_transaction_manager *tm); -int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root); +int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock); /* * These methods are the only way to get hold of a writeable block. |
