diff options
Diffstat (limited to 'drivers/md/persistent-data')
| -rw-r--r-- | drivers/md/persistent-data/Kconfig | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-array.c | 15 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.c | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-bitset.h | 1 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.c | 23 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-block-manager.h | 10 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-btree.c | 33 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-btree.h | 8 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 38 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-disk.c | 18 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.c | 155 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-space-map-metadata.h | 11 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.c | 5 | ||||
| -rw-r--r-- | drivers/md/persistent-data/dm-transaction-manager.h | 17 | 
14 files changed, 257 insertions, 97 deletions
diff --git a/drivers/md/persistent-data/Kconfig b/drivers/md/persistent-data/Kconfig index 19b26879541..0c2dec7aec2 100644 --- a/drivers/md/persistent-data/Kconfig +++ b/drivers/md/persistent-data/Kconfig @@ -6,3 +6,13 @@ config DM_PERSISTENT_DATA         ---help---  	 Library providing immutable on-disk data structure support for  	 device-mapper targets such as the thin provisioning target. + +config DM_DEBUG_BLOCK_STACK_TRACING +       boolean "Keep stack trace of persistent data block lock holders" +       depends on STACKTRACE_SUPPORT && DM_PERSISTENT_DATA +       select STACKTRACE +       ---help--- +	 Enable this for messages that may help debug problems with the +	 block manager locking used by thin provisioning and caching. + +	 If unsure, say N. diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c index 172147eb1d4..1d75b1dc1e2 100644 --- a/drivers/md/persistent-data/dm-array.c +++ b/drivers/md/persistent-data/dm-array.c @@ -317,8 +317,16 @@ static int shadow_ablock(struct dm_array_info *info, dm_block_t *root,  	 * The shadow op will often be a noop.  Only insert if it really  	 * copied data.  	 */ -	if (dm_block_location(*block) != b) +	if (dm_block_location(*block) != b) { +		/* +		 * dm_tm_shadow_block will have already decremented the old +		 * block, but it is still referenced by the btree.  We +		 * increment to stop the insert decrementing it below zero +		 * when overwriting the old value. +		 */ +		dm_tm_inc(info->btree_info.tm, b);  		r = insert_ablock(info, index, *block, root); +	}  	return r;  } @@ -509,15 +517,18 @@ static int grow_add_tail_block(struct resize *resize)  static int grow_needs_more_blocks(struct resize *resize)  {  	int r; +	unsigned old_nr_blocks = resize->old_nr_full_blocks;  	if (resize->old_nr_entries_in_last_block > 0) { +		old_nr_blocks++; +  		r = grow_extend_tail_block(resize, resize->max_entries);  		if (r)  			return r;  	}  	r = insert_full_ablocks(resize->info, resize->size_of_block, -				resize->old_nr_full_blocks, +				old_nr_blocks,  				resize->new_nr_full_blocks,  				resize->max_entries, resize->value,  				&resize->root); diff --git a/drivers/md/persistent-data/dm-bitset.c b/drivers/md/persistent-data/dm-bitset.c index cd9a86d4cdf..36f7cc2c710 100644 --- a/drivers/md/persistent-data/dm-bitset.c +++ b/drivers/md/persistent-data/dm-bitset.c @@ -65,7 +65,7 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,  	int r;  	__le64 value; -	if (!info->current_index_set) +	if (!info->current_index_set || !info->dirty)  		return 0;  	value = cpu_to_le64(info->current_bits); @@ -77,6 +77,8 @@ int dm_bitset_flush(struct dm_disk_bitset *info, dm_block_t root,  		return r;  	info->current_index_set = false; +	info->dirty = false; +  	return 0;  }  EXPORT_SYMBOL_GPL(dm_bitset_flush); @@ -94,6 +96,8 @@ static int read_bits(struct dm_disk_bitset *info, dm_block_t root,  	info->current_bits = le64_to_cpu(value);  	info->current_index_set = true;  	info->current_index = array_index; +	info->dirty = false; +  	return 0;  } @@ -126,6 +130,8 @@ int dm_bitset_set_bit(struct dm_disk_bitset *info, dm_block_t root,  		return r;  	set_bit(b, (unsigned long *) &info->current_bits); +	info->dirty = true; +  	return 0;  }  EXPORT_SYMBOL_GPL(dm_bitset_set_bit); @@ -141,6 +147,8 @@ int dm_bitset_clear_bit(struct dm_disk_bitset *info, dm_block_t root,  		return r;  	clear_bit(b, (unsigned long *) &info->current_bits); +	info->dirty = true; +  	return 0;  }  EXPORT_SYMBOL_GPL(dm_bitset_clear_bit); diff --git a/drivers/md/persistent-data/dm-bitset.h b/drivers/md/persistent-data/dm-bitset.h index e1b9bea14aa..c2287d672ef 100644 --- a/drivers/md/persistent-data/dm-bitset.h +++ b/drivers/md/persistent-data/dm-bitset.h @@ -71,6 +71,7 @@ struct dm_disk_bitset {  	uint64_t current_bits;  	bool current_index_set:1; +	bool dirty:1;  };  /* diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c index a7e8bf29638..087411c95ff 100644 --- a/drivers/md/persistent-data/dm-block-manager.c +++ b/drivers/md/persistent-data/dm-block-manager.c @@ -104,7 +104,7 @@ static int __check_holder(struct block_lock *lock)  	for (i = 0; i < MAX_HOLDERS; i++) {  		if (lock->holders[i] == current) { -			DMERR("recursive lock detected in pool metadata"); +			DMERR("recursive lock detected in metadata");  #ifdef CONFIG_DM_DEBUG_BLOCK_STACK_TRACING  			DMERR("previously held here:");  			print_stack_trace(lock->traces + i, 4); @@ -595,25 +595,14 @@ int dm_bm_unlock(struct dm_block *b)  }  EXPORT_SYMBOL_GPL(dm_bm_unlock); -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, -			   struct dm_block *superblock) +int dm_bm_flush(struct dm_block_manager *bm)  { -	int r; -  	if (bm->read_only)  		return -EPERM; -	r = dm_bufio_write_dirty_buffers(bm->bufio); -	if (unlikely(r)) { -		dm_bm_unlock(superblock); -		return r; -	} - -	dm_bm_unlock(superblock); -  	return dm_bufio_write_dirty_buffers(bm->bufio);  } -EXPORT_SYMBOL_GPL(dm_bm_flush_and_unlock); +EXPORT_SYMBOL_GPL(dm_bm_flush);  void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b)  { @@ -626,6 +615,12 @@ void dm_bm_set_read_only(struct dm_block_manager *bm)  }  EXPORT_SYMBOL_GPL(dm_bm_set_read_only); +void dm_bm_set_read_write(struct dm_block_manager *bm) +{ +	bm->read_only = false; +} +EXPORT_SYMBOL_GPL(dm_bm_set_read_write); +  u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor)  {  	return crc32c(~(u32) 0, data, len) ^ init_xor; diff --git a/drivers/md/persistent-data/dm-block-manager.h b/drivers/md/persistent-data/dm-block-manager.h index 9a82083a66b..1b95dfc1778 100644 --- a/drivers/md/persistent-data/dm-block-manager.h +++ b/drivers/md/persistent-data/dm-block-manager.h @@ -105,12 +105,11 @@ int dm_bm_unlock(struct dm_block *b);   *   * This method always blocks.   */ -int dm_bm_flush_and_unlock(struct dm_block_manager *bm, -			   struct dm_block *superblock); +int dm_bm_flush(struct dm_block_manager *bm); - /* -  * Request data be prefetched into the cache. -  */ +/* + * Request data is prefetched into the cache. + */  void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);  /* @@ -125,6 +124,7 @@ void dm_bm_prefetch(struct dm_block_manager *bm, dm_block_t b);   * be returned if you do.   */  void dm_bm_set_read_only(struct dm_block_manager *bm); +void dm_bm_set_read_write(struct dm_block_manager *bm);  u32 dm_bm_checksum(const void *data, size_t len, u32 init_xor); diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c index 468e371ee9b..416060c2570 100644 --- a/drivers/md/persistent-data/dm-btree.c +++ b/drivers/md/persistent-data/dm-btree.c @@ -770,8 +770,8 @@ EXPORT_SYMBOL_GPL(dm_btree_insert_notify);  /*----------------------------------------------------------------*/ -static int find_highest_key(struct ro_spine *s, dm_block_t block, -			    uint64_t *result_key, dm_block_t *next_block) +static int find_key(struct ro_spine *s, dm_block_t block, bool find_highest, +		    uint64_t *result_key, dm_block_t *next_block)  {  	int i, r;  	uint32_t flags; @@ -788,7 +788,11 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,  		else  			i--; -		*result_key = le64_to_cpu(ro_node(s)->keys[i]); +		if (find_highest) +			*result_key = le64_to_cpu(ro_node(s)->keys[i]); +		else +			*result_key = le64_to_cpu(ro_node(s)->keys[0]); +  		if (next_block || flags & INTERNAL_NODE)  			block = value64(ro_node(s), i); @@ -799,16 +803,16 @@ static int find_highest_key(struct ro_spine *s, dm_block_t block,  	return 0;  } -int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, -			      uint64_t *result_keys) +static int dm_btree_find_key(struct dm_btree_info *info, dm_block_t root, +			     bool find_highest, uint64_t *result_keys)  {  	int r = 0, count = 0, level;  	struct ro_spine spine;  	init_ro_spine(&spine, info);  	for (level = 0; level < info->levels; level++) { -		r = find_highest_key(&spine, root, result_keys + level, -				     level == info->levels - 1 ? NULL : &root); +		r = find_key(&spine, root, find_highest, result_keys + level, +			     level == info->levels - 1 ? NULL : &root);  		if (r == -ENODATA) {  			r = 0;  			break; @@ -822,8 +826,23 @@ int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root,  	return r ? r : count;  } + +int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, +			      uint64_t *result_keys) +{ +	return dm_btree_find_key(info, root, true, result_keys); +}  EXPORT_SYMBOL_GPL(dm_btree_find_highest_key); +int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, +			     uint64_t *result_keys) +{ +	return dm_btree_find_key(info, root, false, result_keys); +} +EXPORT_SYMBOL_GPL(dm_btree_find_lowest_key); + +/*----------------------------------------------------------------*/ +  /*   * FIXME: We shouldn't use a recursive algorithm when we have limited stack   * space.  Also this only works for single level trees. diff --git a/drivers/md/persistent-data/dm-btree.h b/drivers/md/persistent-data/dm-btree.h index 8672d159e0b..dacfc34180b 100644 --- a/drivers/md/persistent-data/dm-btree.h +++ b/drivers/md/persistent-data/dm-btree.h @@ -137,6 +137,14 @@ int dm_btree_remove(struct dm_btree_info *info, dm_block_t root,  /*   * Returns < 0 on failure.  Otherwise the number of key entries that have   * been filled out.  Remember trees can have zero entries, and as such have + * no lowest key. + */ +int dm_btree_find_lowest_key(struct dm_btree_info *info, dm_block_t root, +			     uint64_t *result_keys); + +/* + * Returns < 0 on failure.  Otherwise the number of key entries that have + * been filled out.  Remember trees can have zero entries, and as such have   * no highest key.   */  int dm_btree_find_highest_key(struct dm_btree_info *info, dm_block_t root, diff --git a/drivers/md/persistent-data/dm-space-map-common.c b/drivers/md/persistent-data/dm-space-map-common.c index 6058569fe86..aacbe70c2c2 100644 --- a/drivers/md/persistent-data/dm-space-map-common.c +++ b/drivers/md/persistent-data/dm-space-map-common.c @@ -245,6 +245,10 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)  		return -EINVAL;  	} +	/* +	 * We need to set this before the dm_tm_new_block() call below. +	 */ +	ll->nr_blocks = nr_blocks;  	for (i = old_blocks; i < blocks; i++) {  		struct dm_block *b;  		struct disk_index_entry idx; @@ -252,6 +256,7 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)  		r = dm_tm_new_block(ll->tm, &dm_sm_bitmap_validator, &b);  		if (r < 0)  			return r; +  		idx.blocknr = cpu_to_le64(dm_block_location(b));  		r = dm_tm_unlock(ll->tm, b); @@ -266,7 +271,6 @@ int sm_ll_extend(struct ll_disk *ll, dm_block_t extra_blocks)  			return r;  	} -	ll->nr_blocks = nr_blocks;  	return 0;  } @@ -381,7 +385,7 @@ int sm_ll_find_free_block(struct ll_disk *ll, dm_block_t begin,  }  static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b, -			uint32_t (*mutator)(void *context, uint32_t old), +			int (*mutator)(void *context, uint32_t old, uint32_t *new),  			void *context, enum allocation_event *ev)  {  	int r; @@ -410,11 +414,17 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,  	if (old > 2) {  		r = sm_ll_lookup_big_ref_count(ll, b, &old); -		if (r < 0) +		if (r < 0) { +			dm_tm_unlock(ll->tm, nb);  			return r; +		}  	} -	ref_count = mutator(context, old); +	r = mutator(context, old, &ref_count); +	if (r) { +		dm_tm_unlock(ll->tm, nb); +		return r; +	}  	if (ref_count <= 2) {  		sm_set_bitmap(bm_le, bit, ref_count); @@ -465,9 +475,10 @@ static int sm_ll_mutate(struct ll_disk *ll, dm_block_t b,  	return ll->save_ie(ll, index, &ie_disk);  } -static uint32_t set_ref_count(void *context, uint32_t old) +static int set_ref_count(void *context, uint32_t old, uint32_t *new)  { -	return *((uint32_t *) context); +	*new = *((uint32_t *) context); +	return 0;  }  int sm_ll_insert(struct ll_disk *ll, dm_block_t b, @@ -476,9 +487,10 @@ int sm_ll_insert(struct ll_disk *ll, dm_block_t b,  	return sm_ll_mutate(ll, b, set_ref_count, &ref_count, ev);  } -static uint32_t inc_ref_count(void *context, uint32_t old) +static int inc_ref_count(void *context, uint32_t old, uint32_t *new)  { -	return old + 1; +	*new = old + 1; +	return 0;  }  int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) @@ -486,9 +498,15 @@ int sm_ll_inc(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev)  	return sm_ll_mutate(ll, b, inc_ref_count, NULL, ev);  } -static uint32_t dec_ref_count(void *context, uint32_t old) +static int dec_ref_count(void *context, uint32_t old, uint32_t *new)  { -	return old - 1; +	if (!old) { +		DMERR_LIMIT("unable to decrement a reference count below 0"); +		return -EINVAL; +	} + +	*new = old - 1; +	return 0;  }  int sm_ll_dec(struct ll_disk *ll, dm_block_t b, enum allocation_event *ev) diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c index e735a6d5a79..cfbf9617e46 100644 --- a/drivers/md/persistent-data/dm-space-map-disk.c +++ b/drivers/md/persistent-data/dm-space-map-disk.c @@ -140,26 +140,10 @@ static int sm_disk_inc_block(struct dm_space_map *sm, dm_block_t b)  static int sm_disk_dec_block(struct dm_space_map *sm, dm_block_t b)  { -	int r; -	uint32_t old_count;  	enum allocation_event ev;  	struct sm_disk *smd = container_of(sm, struct sm_disk, sm); -	r = sm_ll_dec(&smd->ll, b, &ev); -	if (!r && (ev == SM_FREE)) { -		/* -		 * It's only free if it's also free in the last -		 * transaction. -		 */ -		r = sm_ll_lookup(&smd->old_ll, b, &old_count); -		if (r) -			return r; - -		if (!old_count) -			smd->nr_allocated_this_transaction--; -	} - -	return r; +	return sm_ll_dec(&smd->ll, b, &ev);  }  static int sm_disk_new_block(struct dm_space_map *sm, dm_block_t *b) diff --git a/drivers/md/persistent-data/dm-space-map-metadata.c b/drivers/md/persistent-data/dm-space-map-metadata.c index 1c959684cae..786b689bdfc 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.c +++ b/drivers/md/persistent-data/dm-space-map-metadata.c @@ -91,6 +91,69 @@ struct block_op {  	dm_block_t block;  }; +struct bop_ring_buffer { +	unsigned begin; +	unsigned end; +	struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; +}; + +static void brb_init(struct bop_ring_buffer *brb) +{ +	brb->begin = 0; +	brb->end = 0; +} + +static bool brb_empty(struct bop_ring_buffer *brb) +{ +	return brb->begin == brb->end; +} + +static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) +{ +	unsigned r = old + 1; +	return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; +} + +static int brb_push(struct bop_ring_buffer *brb, +		    enum block_op_type type, dm_block_t b) +{ +	struct block_op *bop; +	unsigned next = brb_next(brb, brb->end); + +	/* +	 * We don't allow the last bop to be filled, this way we can +	 * differentiate between full and empty. +	 */ +	if (next == brb->begin) +		return -ENOMEM; + +	bop = brb->bops + brb->end; +	bop->type = type; +	bop->block = b; + +	brb->end = next; + +	return 0; +} + +static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) +{ +	struct block_op *bop; + +	if (brb_empty(brb)) +		return -ENODATA; + +	bop = brb->bops + brb->begin; +	result->type = bop->type; +	result->block = bop->block; + +	brb->begin = brb_next(brb, brb->begin); + +	return 0; +} + +/*----------------------------------------------------------------*/ +  struct sm_metadata {  	struct dm_space_map sm; @@ -101,25 +164,20 @@ struct sm_metadata {  	unsigned recursion_count;  	unsigned allocated_this_transaction; -	unsigned nr_uncommitted; -	struct block_op uncommitted[MAX_RECURSIVE_ALLOCATIONS]; +	struct bop_ring_buffer uncommitted;  	struct threshold threshold;  };  static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b)  { -	struct block_op *op; +	int r = brb_push(&smm->uncommitted, type, b); -	if (smm->nr_uncommitted == MAX_RECURSIVE_ALLOCATIONS) { +	if (r) {  		DMERR("too many recursive allocations");  		return -ENOMEM;  	} -	op = smm->uncommitted + smm->nr_uncommitted++; -	op->type = type; -	op->block = b; -  	return 0;  } @@ -158,11 +216,17 @@ static int out(struct sm_metadata *smm)  		return -ENOMEM;  	} -	if (smm->recursion_count == 1 && smm->nr_uncommitted) { -		while (smm->nr_uncommitted && !r) { -			smm->nr_uncommitted--; -			r = commit_bop(smm, smm->uncommitted + -				       smm->nr_uncommitted); +	if (smm->recursion_count == 1) { +		while (!brb_empty(&smm->uncommitted)) { +			struct block_op bop; + +			r = brb_pop(&smm->uncommitted, &bop); +			if (r) { +				DMERR("bug in bop ring buffer"); +				break; +			} + +			r = commit_bop(smm, &bop);  			if (r)  				break;  		} @@ -217,7 +281,8 @@ static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count)  static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,  				 uint32_t *result)  { -	int r, i; +	int r; +	unsigned i;  	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);  	unsigned adjustment = 0; @@ -225,8 +290,10 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,  	 * We may have some uncommitted adjustments to add.  This list  	 * should always be really short.  	 */ -	for (i = 0; i < smm->nr_uncommitted; i++) { -		struct block_op *op = smm->uncommitted + i; +	for (i = smm->uncommitted.begin; +	     i != smm->uncommitted.end; +	     i = brb_next(&smm->uncommitted, i)) { +		struct block_op *op = smm->uncommitted.bops + i;  		if (op->block != b)  			continue; @@ -254,7 +321,8 @@ static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b,  static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,  					      dm_block_t b, int *result)  { -	int r, i, adjustment = 0; +	int r, adjustment = 0; +	unsigned i;  	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);  	uint32_t rc; @@ -262,8 +330,11 @@ static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm,  	 * We may have some uncommitted adjustments to add.  This list  	 * should always be really short.  	 */ -	for (i = 0; i < smm->nr_uncommitted; i++) { -		struct block_op *op = smm->uncommitted + i; +	for (i = smm->uncommitted.begin; +	     i != smm->uncommitted.end; +	     i = brb_next(&smm->uncommitted, i)) { + +		struct block_op *op = smm->uncommitted.bops + i;  		if (op->block != b)  			continue; @@ -384,12 +455,16 @@ static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b)  	struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm);  	int r = sm_metadata_new_block_(sm, b); -	if (r) -		DMERR("unable to allocate new metadata block"); +	if (r) { +		DMERR_LIMIT("unable to allocate new metadata block"); +		return r; +	}  	r = sm_metadata_get_nr_free(sm, &count); -	if (r) -		DMERR("couldn't get free block count"); +	if (r) { +		DMERR_LIMIT("couldn't get free block count"); +		return r; +	}  	check_threshold(&smm->threshold, count); @@ -604,20 +679,38 @@ static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks)  	 * Flick into a mode where all blocks get allocated in the new area.  	 */  	smm->begin = old_len; -	memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); +	memcpy(sm, &bootstrap_ops, sizeof(*sm));  	/*  	 * Extend.  	 */  	r = sm_ll_extend(&smm->ll, extra_blocks); +	if (r) +		goto out;  	/* -	 * Switch back to normal behaviour. +	 * We repeatedly increment then commit until the commit doesn't +	 * allocate any new blocks.  	 */ -	memcpy(&smm->sm, &ops, sizeof(smm->sm)); -	for (i = old_len; !r && i < smm->begin; i++) -		r = sm_ll_inc(&smm->ll, i, &ev); +	do { +		for (i = old_len; !r && i < smm->begin; i++) { +			r = sm_ll_inc(&smm->ll, i, &ev); +			if (r) +				goto out; +		} +		old_len = smm->begin; + +		r = sm_ll_commit(&smm->ll); +		if (r) +			goto out; + +	} while (old_len != smm->begin); +out: +	/* +	 * Switch back to normal behaviour. +	 */ +	memcpy(sm, &ops, sizeof(*sm));  	return r;  } @@ -649,7 +742,7 @@ int dm_sm_metadata_create(struct dm_space_map *sm,  	smm->begin = superblock + 1;  	smm->recursion_count = 0;  	smm->allocated_this_transaction = 0; -	smm->nr_uncommitted = 0; +	brb_init(&smm->uncommitted);  	threshold_init(&smm->threshold);  	memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); @@ -658,6 +751,8 @@ int dm_sm_metadata_create(struct dm_space_map *sm,  	if (r)  		return r; +	if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) +		nr_blocks = DM_SM_METADATA_MAX_BLOCKS;  	r = sm_ll_extend(&smm->ll, nr_blocks);  	if (r)  		return r; @@ -691,7 +786,7 @@ int dm_sm_metadata_open(struct dm_space_map *sm,  	smm->begin = 0;  	smm->recursion_count = 0;  	smm->allocated_this_transaction = 0; -	smm->nr_uncommitted = 0; +	brb_init(&smm->uncommitted);  	threshold_init(&smm->threshold);  	memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); diff --git a/drivers/md/persistent-data/dm-space-map-metadata.h b/drivers/md/persistent-data/dm-space-map-metadata.h index 39bba0801cf..64df923974d 100644 --- a/drivers/md/persistent-data/dm-space-map-metadata.h +++ b/drivers/md/persistent-data/dm-space-map-metadata.h @@ -9,6 +9,17 @@  #include "dm-transaction-manager.h" +#define DM_SM_METADATA_BLOCK_SIZE (4096 >> SECTOR_SHIFT) + +/* + * The metadata device is currently limited in size. + * + * We have one block of index, which can hold 255 index entries.  Each + * index entry contains allocation info about ~16k metadata blocks. + */ +#define DM_SM_METADATA_MAX_BLOCKS (255 * ((1 << 14) - 64)) +#define DM_SM_METADATA_MAX_SECTORS (DM_SM_METADATA_MAX_BLOCKS * DM_SM_METADATA_BLOCK_SIZE) +  /*   * Unfortunately we have to use two-phase construction due to the cycle   * between the tm and sm. diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c index 81da1a26042..3bc30a0ae3d 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.c +++ b/drivers/md/persistent-data/dm-transaction-manager.c @@ -154,7 +154,7 @@ int dm_tm_pre_commit(struct dm_transaction_manager *tm)  	if (r < 0)  		return r; -	return 0; +	return dm_bm_flush(tm->bm);  }  EXPORT_SYMBOL_GPL(dm_tm_pre_commit); @@ -164,8 +164,9 @@ int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root)  		return -EWOULDBLOCK;  	wipe_shadow_table(tm); +	dm_bm_unlock(root); -	return dm_bm_flush_and_unlock(tm->bm, root); +	return dm_bm_flush(tm->bm);  }  EXPORT_SYMBOL_GPL(dm_tm_commit); diff --git a/drivers/md/persistent-data/dm-transaction-manager.h b/drivers/md/persistent-data/dm-transaction-manager.h index b5b139076ca..2772ed2a781 100644 --- a/drivers/md/persistent-data/dm-transaction-manager.h +++ b/drivers/md/persistent-data/dm-transaction-manager.h @@ -38,18 +38,17 @@ struct dm_transaction_manager *dm_tm_create_non_blocking_clone(struct dm_transac  /*   * We use a 2-phase commit here.   * - * i) In the first phase the block manager is told to start flushing, and - * the changes to the space map are written to disk.  You should interrogate - * your particular space map to get detail of its root node etc. to be - * included in your superblock. + * i) Make all changes for the transaction *except* for the superblock. + * Then call dm_tm_pre_commit() to flush them to disk.   * - * ii) @root will be committed last.  You shouldn't use more than the - * first 512 bytes of @root if you wish the transaction to survive a power - * failure.  You *must* have a write lock held on @root for both stage (i) - * and (ii).  The commit will drop the write lock. + * ii) Lock your superblock.  Update.  Then call dm_tm_commit() which will + * unlock the superblock and flush it.  No other blocks should be updated + * during this period.  Care should be taken to never unlock a partially + * updated superblock; perform any operations that could fail *before* you + * take the superblock lock.   */  int dm_tm_pre_commit(struct dm_transaction_manager *tm); -int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *root); +int dm_tm_commit(struct dm_transaction_manager *tm, struct dm_block *superblock);  /*   * These methods are the only way to get hold of a writeable block.  | 
