diff options
Diffstat (limited to 'drivers/md/dm-thin-metadata.c')
| -rw-r--r-- | drivers/md/dm-thin-metadata.c | 1041 |
1 files changed, 725 insertions, 316 deletions
diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index 237571af77f..e9d33ad59df 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2011 Red Hat, Inc. + * Copyright (C) 2011-2012 Red Hat, Inc. * * This file is released under the GPL. */ @@ -76,10 +76,16 @@ #define THIN_SUPERBLOCK_MAGIC 27022010 #define THIN_SUPERBLOCK_LOCATION 0 -#define THIN_VERSION 1 +#define THIN_VERSION 2 #define THIN_METADATA_CACHE_SIZE 64 #define SECTOR_TO_BLOCK_SHIFT 3 +/* + * 3 for btree insert + + * 2 for btree lookup used within space map + */ +#define THIN_MAX_CONCURRENT_LOCKS 5 + /* This should be plenty */ #define SPACE_MAP_ROOT_SIZE 128 @@ -172,13 +178,27 @@ struct dm_pool_metadata { struct rw_semaphore root_lock; uint32_t time; - int need_commit; dm_block_t root; dm_block_t details_root; struct list_head thin_devices; uint64_t trans_id; unsigned long flags; sector_t data_block_size; + bool read_only:1; + + /* + * Set if a transaction has to be aborted but the attempt to roll back + * to the previous (good) transaction failed. The only pool metadata + * operation possible in this state is the closing of the device. + */ + bool fail_io:1; + + /* + * Reading the space map roots can fail, so we read it into these + * buffers before the superblock is locked and updated. + */ + __u8 data_space_map_root[SPACE_MAP_ROOT_SIZE]; + __u8 metadata_space_map_root[SPACE_MAP_ROOT_SIZE]; }; struct dm_thin_device { @@ -187,7 +207,8 @@ struct dm_thin_device { dm_thin_id id; int open_count; - int changed; + bool changed:1; + bool aborted_with_changes:1; uint64_t mapped_blocks; uint64_t transaction_id; uint32_t creation_time; @@ -266,7 +287,7 @@ static void unpack_block_time(uint64_t v, dm_block_t *b, uint32_t *t) *t = v & ((1 << 24) - 1); } -static void data_block_inc(void *context, void *value_le) +static void data_block_inc(void *context, const void *value_le) { struct dm_space_map *sm = context; __le64 v_le; @@ -278,7 +299,7 @@ static void data_block_inc(void *context, void *value_le) dm_sm_inc_block(sm, b); } -static void data_block_dec(void *context, void *value_le) +static void data_block_dec(void *context, const void *value_le) { struct dm_space_map *sm = context; __le64 v_le; @@ -290,7 +311,7 @@ static void data_block_dec(void *context, void *value_le) dm_sm_dec_block(sm, b); } -static int data_block_equal(void *context, void *value1_le, void *value2_le) +static int data_block_equal(void *context, const void *value1_le, const void *value2_le) { __le64 v1_le, v2_le; uint64_t b1, b2; @@ -304,7 +325,7 @@ static int data_block_equal(void *context, void *value1_le, void *value2_le) return b1 == b2; } -static void subtree_inc(void *context, void *value) +static void subtree_inc(void *context, const void *value) { struct dm_btree_info *info = context; __le64 root_le; @@ -315,7 +336,7 @@ static void subtree_inc(void *context, void *value) dm_tm_inc(info->tm, root); } -static void subtree_dec(void *context, void *value) +static void subtree_dec(void *context, const void *value) { struct dm_btree_info *info = context; __le64 root_le; @@ -327,7 +348,7 @@ static void subtree_dec(void *context, void *value) DMERR("btree delete failed\n"); } -static int subtree_equal(void *context, void *value1_le, void *value2_le) +static int subtree_equal(void *context, const void *value1_le, const void *value2_le) { __le64 v1_le, v2_le; memcpy(&v1_le, value1_le, sizeof(v1_le)); @@ -338,7 +359,21 @@ static int subtree_equal(void *context, void *value1_le, void *value2_le) /*----------------------------------------------------------------*/ -static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) +static int superblock_lock_zero(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock_zero(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int superblock_lock(struct dm_pool_metadata *pmd, + struct dm_block **sblock) +{ + return dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, sblock); +} + +static int __superblock_all_zeroes(struct dm_block_manager *bm, int *result) { int r; unsigned i; @@ -365,72 +400,9 @@ static int superblock_all_zeroes(struct dm_block_manager *bm, int *result) return dm_bm_unlock(b); } -static int init_pmd(struct dm_pool_metadata *pmd, - struct dm_block_manager *bm, - dm_block_t nr_blocks, int create) +static void __setup_btree_details(struct dm_pool_metadata *pmd) { - int r; - struct dm_space_map *sm, *data_sm; - struct dm_transaction_manager *tm; - struct dm_block *sblock; - - if (create) { - r = dm_tm_create_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &tm, &sm, &sblock); - if (r < 0) { - DMERR("tm_create_with_sm failed"); - return r; - } - - data_sm = dm_sm_disk_create(tm, nr_blocks); - if (IS_ERR(data_sm)) { - DMERR("sm_disk_create failed"); - dm_tm_unlock(tm, sblock); - r = PTR_ERR(data_sm); - goto bad; - } - } else { - struct thin_disk_superblock *disk_super = NULL; - size_t space_map_root_offset = - offsetof(struct thin_disk_superblock, metadata_space_map_root); - - r = dm_tm_open_with_sm(bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, space_map_root_offset, - SPACE_MAP_ROOT_SIZE, &tm, &sm, &sblock); - if (r < 0) { - DMERR("tm_open_with_sm failed"); - return r; - } - - disk_super = dm_block_data(sblock); - data_sm = dm_sm_disk_open(tm, disk_super->data_space_map_root, - sizeof(disk_super->data_space_map_root)); - if (IS_ERR(data_sm)) { - DMERR("sm_disk_open failed"); - r = PTR_ERR(data_sm); - goto bad; - } - } - - - r = dm_tm_unlock(tm, sblock); - if (r < 0) { - DMERR("couldn't unlock superblock"); - goto bad_data_sm; - } - - pmd->bm = bm; - pmd->metadata_sm = sm; - pmd->data_sm = data_sm; - pmd->tm = tm; - pmd->nb_tm = dm_tm_create_non_blocking_clone(tm); - if (!pmd->nb_tm) { - DMERR("could not create clone tm"); - r = -ENOMEM; - goto bad_data_sm; - } - - pmd->info.tm = tm; + pmd->info.tm = pmd->tm; pmd->info.levels = 2; pmd->info.value_type.context = pmd->data_sm; pmd->info.value_type.size = sizeof(__le64); @@ -441,15 +413,15 @@ static int init_pmd(struct dm_pool_metadata *pmd, memcpy(&pmd->nb_info, &pmd->info, sizeof(pmd->nb_info)); pmd->nb_info.tm = pmd->nb_tm; - pmd->tl_info.tm = tm; + pmd->tl_info.tm = pmd->tm; pmd->tl_info.levels = 1; - pmd->tl_info.value_type.context = &pmd->info; + pmd->tl_info.value_type.context = &pmd->bl_info; pmd->tl_info.value_type.size = sizeof(__le64); pmd->tl_info.value_type.inc = subtree_inc; pmd->tl_info.value_type.dec = subtree_dec; pmd->tl_info.value_type.equal = subtree_equal; - pmd->bl_info.tm = tm; + pmd->bl_info.tm = pmd->tm; pmd->bl_info.levels = 1; pmd->bl_info.value_type.context = pmd->data_sm; pmd->bl_info.value_type.size = sizeof(__le64); @@ -457,92 +429,310 @@ static int init_pmd(struct dm_pool_metadata *pmd, pmd->bl_info.value_type.dec = data_block_dec; pmd->bl_info.value_type.equal = data_block_equal; - pmd->details_info.tm = tm; + pmd->details_info.tm = pmd->tm; pmd->details_info.levels = 1; pmd->details_info.value_type.context = NULL; pmd->details_info.value_type.size = sizeof(struct disk_device_details); pmd->details_info.value_type.inc = NULL; pmd->details_info.value_type.dec = NULL; pmd->details_info.value_type.equal = NULL; +} - pmd->root = 0; +static int save_sm_roots(struct dm_pool_metadata *pmd) +{ + int r; + size_t len; - init_rwsem(&pmd->root_lock); - pmd->time = 0; - pmd->need_commit = 0; - pmd->details_root = 0; - pmd->trans_id = 0; - pmd->flags = 0; - INIT_LIST_HEAD(&pmd->thin_devices); + r = dm_sm_root_size(pmd->metadata_sm, &len); + if (r < 0) + return r; - return 0; + r = dm_sm_copy_root(pmd->metadata_sm, &pmd->metadata_space_map_root, len); + if (r < 0) + return r; -bad_data_sm: - dm_sm_destroy(data_sm); -bad: - dm_tm_destroy(tm); - dm_sm_destroy(sm); + r = dm_sm_root_size(pmd->data_sm, &len); + if (r < 0) + return r; - return r; + return dm_sm_copy_root(pmd->data_sm, &pmd->data_space_map_root, len); } -static int __begin_transaction(struct dm_pool_metadata *pmd) +static void copy_sm_roots(struct dm_pool_metadata *pmd, + struct thin_disk_superblock *disk) +{ + memcpy(&disk->metadata_space_map_root, + &pmd->metadata_space_map_root, + sizeof(pmd->metadata_space_map_root)); + + memcpy(&disk->data_space_map_root, + &pmd->data_space_map_root, + sizeof(pmd->data_space_map_root)); +} + +static int __write_initial_superblock(struct dm_pool_metadata *pmd) { int r; - u32 features; - struct thin_disk_superblock *disk_super; struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + sector_t bdev_size = i_size_read(pmd->bdev->bd_inode) >> SECTOR_SHIFT; - /* - * __maybe_commit_transaction() resets these - */ - WARN_ON(pmd->need_commit); + if (bdev_size > THIN_METADATA_MAX_SECTORS) + bdev_size = THIN_METADATA_MAX_SECTORS; - /* - * We re-read the superblock every time. Shouldn't need to do this - * really. - */ - r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = dm_sm_commit(pmd->data_sm); + if (r < 0) + return r; + + r = save_sm_roots(pmd); + if (r < 0) + return r; + + r = dm_tm_pre_commit(pmd->tm); + if (r < 0) + return r; + + r = superblock_lock_zero(pmd, &sblock); if (r) return r; disk_super = dm_block_data(sblock); - pmd->time = le32_to_cpu(disk_super->time); - pmd->root = le64_to_cpu(disk_super->data_mapping_root); - pmd->details_root = le64_to_cpu(disk_super->device_details_root); - pmd->trans_id = le64_to_cpu(disk_super->trans_id); - pmd->flags = le32_to_cpu(disk_super->flags); - pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); + disk_super->flags = 0; + memset(disk_super->uuid, 0, sizeof(disk_super->uuid)); + disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); + disk_super->version = cpu_to_le32(THIN_VERSION); + disk_super->time = 0; + disk_super->trans_id = 0; + disk_super->held_root = 0; + + copy_sm_roots(pmd, disk_super); + + disk_super->data_mapping_root = cpu_to_le64(pmd->root); + disk_super->device_details_root = cpu_to_le64(pmd->details_root); + disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE); + disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); + disk_super->data_block_size = cpu_to_le32(pmd->data_block_size); + + return dm_tm_commit(pmd->tm, sblock); +} + +static int __format_metadata(struct dm_pool_metadata *pmd) +{ + int r; + + r = dm_tm_create_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_create_with_sm failed"); + return r; + } + + pmd->data_sm = dm_sm_disk_create(pmd->tm, 0); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_create failed"); + r = PTR_ERR(pmd->data_sm); + goto bad_cleanup_tm; + } + + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); + if (!pmd->nb_tm) { + DMERR("could not create non-blocking clone tm"); + r = -ENOMEM; + goto bad_cleanup_data_sm; + } + + __setup_btree_details(pmd); + + r = dm_btree_empty(&pmd->info, &pmd->root); + if (r < 0) + goto bad_cleanup_nb_tm; + + r = dm_btree_empty(&pmd->details_info, &pmd->details_root); + if (r < 0) { + DMERR("couldn't create devices root"); + goto bad_cleanup_nb_tm; + } + + r = __write_initial_superblock(pmd); + if (r) + goto bad_cleanup_nb_tm; + + return 0; + +bad_cleanup_nb_tm: + dm_tm_destroy(pmd->nb_tm); +bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); +bad_cleanup_tm: + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); + + return r; +} + +static int __check_incompat_features(struct thin_disk_superblock *disk_super, + struct dm_pool_metadata *pmd) +{ + uint32_t features; features = le32_to_cpu(disk_super->incompat_flags) & ~THIN_FEATURE_INCOMPAT_SUPP; if (features) { - DMERR("could not access metadata due to " - "unsupported optional features (%lx).", + DMERR("could not access metadata due to unsupported optional features (%lx).", (unsigned long)features); - r = -EINVAL; - goto out; + return -EINVAL; } /* * Check for read-only metadata to skip the following RDWR checks. */ if (get_disk_ro(pmd->bdev->bd_disk)) - goto out; + return 0; features = le32_to_cpu(disk_super->compat_ro_flags) & ~THIN_FEATURE_COMPAT_RO_SUPP; if (features) { - DMERR("could not access metadata RDWR due to " - "unsupported optional features (%lx).", + DMERR("could not access metadata RDWR due to unsupported optional features (%lx).", (unsigned long)features); + return -EINVAL; + } + + return 0; +} + +static int __open_metadata(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); + if (r < 0) { + DMERR("couldn't read superblock"); + return r; + } + + disk_super = dm_block_data(sblock); + + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk_super->data_block_size), + (unsigned long long)pmd->data_block_size); r = -EINVAL; + goto bad_unlock_sblock; } -out: + r = __check_incompat_features(disk_super, pmd); + if (r < 0) + goto bad_unlock_sblock; + + r = dm_tm_open_with_sm(pmd->bm, THIN_SUPERBLOCK_LOCATION, + disk_super->metadata_space_map_root, + sizeof(disk_super->metadata_space_map_root), + &pmd->tm, &pmd->metadata_sm); + if (r < 0) { + DMERR("tm_open_with_sm failed"); + goto bad_unlock_sblock; + } + + pmd->data_sm = dm_sm_disk_open(pmd->tm, disk_super->data_space_map_root, + sizeof(disk_super->data_space_map_root)); + if (IS_ERR(pmd->data_sm)) { + DMERR("sm_disk_open failed"); + r = PTR_ERR(pmd->data_sm); + goto bad_cleanup_tm; + } + + pmd->nb_tm = dm_tm_create_non_blocking_clone(pmd->tm); + if (!pmd->nb_tm) { + DMERR("could not create non-blocking clone tm"); + r = -ENOMEM; + goto bad_cleanup_data_sm; + } + + __setup_btree_details(pmd); + return dm_bm_unlock(sblock); + +bad_cleanup_data_sm: + dm_sm_destroy(pmd->data_sm); +bad_cleanup_tm: + dm_tm_destroy(pmd->tm); + dm_sm_destroy(pmd->metadata_sm); +bad_unlock_sblock: dm_bm_unlock(sblock); + + return r; +} + +static int __open_or_format_metadata(struct dm_pool_metadata *pmd, bool format_device) +{ + int r, unformatted; + + r = __superblock_all_zeroes(pmd->bm, &unformatted); + if (r) + return r; + + if (unformatted) + return format_device ? __format_metadata(pmd) : -EPERM; + + return __open_metadata(pmd); +} + +static int __create_persistent_data_objects(struct dm_pool_metadata *pmd, bool format_device) +{ + int r; + + pmd->bm = dm_block_manager_create(pmd->bdev, THIN_METADATA_BLOCK_SIZE << SECTOR_SHIFT, + THIN_METADATA_CACHE_SIZE, + THIN_MAX_CONCURRENT_LOCKS); + if (IS_ERR(pmd->bm)) { + DMERR("could not create block manager"); + return PTR_ERR(pmd->bm); + } + + r = __open_or_format_metadata(pmd, format_device); + if (r) + dm_block_manager_destroy(pmd->bm); + return r; } +static void __destroy_persistent_data_objects(struct dm_pool_metadata *pmd) +{ + dm_sm_destroy(pmd->data_sm); + dm_sm_destroy(pmd->metadata_sm); + dm_tm_destroy(pmd->nb_tm); + dm_tm_destroy(pmd->tm); + dm_block_manager_destroy(pmd->bm); +} + +static int __begin_transaction(struct dm_pool_metadata *pmd) +{ + int r; + struct thin_disk_superblock *disk_super; + struct dm_block *sblock; + + /* + * We re-read the superblock every time. Shouldn't need to do this + * really. + */ + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + pmd->time = le32_to_cpu(disk_super->time); + pmd->root = le64_to_cpu(disk_super->data_mapping_root); + pmd->details_root = le64_to_cpu(disk_super->device_details_root); + pmd->trans_id = le64_to_cpu(disk_super->trans_id); + pmd->flags = le32_to_cpu(disk_super->flags); + pmd->data_block_size = le32_to_cpu(disk_super->data_block_size); + + dm_bm_unlock(sblock); + return 0; +} + static int __write_changed_details(struct dm_pool_metadata *pmd) { int r; @@ -573,8 +763,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) list_del(&td->list); kfree(td); } - - pmd->need_commit = 1; } return 0; @@ -582,9 +770,6 @@ static int __write_changed_details(struct dm_pool_metadata *pmd) static int __commit_transaction(struct dm_pool_metadata *pmd) { - /* - * FIXME: Associated pool should be made read-only on failure. - */ int r; size_t metadata_len, data_len; struct thin_disk_superblock *disk_super; @@ -597,31 +782,31 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) r = __write_changed_details(pmd); if (r < 0) - goto out; - - if (!pmd->need_commit) - goto out; + return r; r = dm_sm_commit(pmd->data_sm); if (r < 0) - goto out; + return r; r = dm_tm_pre_commit(pmd->tm); if (r < 0) - goto out; + return r; r = dm_sm_root_size(pmd->metadata_sm, &metadata_len); if (r < 0) - goto out; + return r; - r = dm_sm_root_size(pmd->metadata_sm, &data_len); + r = dm_sm_root_size(pmd->data_sm, &data_len); if (r < 0) - goto out; + return r; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = save_sm_roots(pmd); + if (r < 0) + return r; + + r = superblock_lock(pmd, &sblock); if (r) - goto out; + return r; disk_super = dm_block_data(sblock); disk_super->time = cpu_to_le32(pmd->time); @@ -630,38 +815,17 @@ static int __commit_transaction(struct dm_pool_metadata *pmd) disk_super->trans_id = cpu_to_le64(pmd->trans_id); disk_super->flags = cpu_to_le32(pmd->flags); - r = dm_sm_copy_root(pmd->metadata_sm, &disk_super->metadata_space_map_root, - metadata_len); - if (r < 0) - goto out_locked; - - r = dm_sm_copy_root(pmd->data_sm, &disk_super->data_space_map_root, - data_len); - if (r < 0) - goto out_locked; - - r = dm_tm_commit(pmd->tm, sblock); - if (!r) - pmd->need_commit = 0; + copy_sm_roots(pmd, disk_super); -out: - return r; - -out_locked: - dm_bm_unlock(sblock); - return r; + return dm_tm_commit(pmd->tm, sblock); } struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, - sector_t data_block_size) + sector_t data_block_size, + bool format_device) { int r; - struct thin_disk_superblock *disk_super; struct dm_pool_metadata *pmd; - sector_t bdev_size = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; - struct dm_block_manager *bm; - int create; - struct dm_block *sblock; pmd = kmalloc(sizeof(*pmd), GFP_KERNEL); if (!pmd) { @@ -669,87 +833,28 @@ struct dm_pool_metadata *dm_pool_metadata_open(struct block_device *bdev, return ERR_PTR(-ENOMEM); } - /* - * Max hex locks: - * 3 for btree insert + - * 2 for btree lookup used within space map - */ - bm = dm_block_manager_create(bdev, THIN_METADATA_BLOCK_SIZE, - THIN_METADATA_CACHE_SIZE, 5); - if (!bm) { - DMERR("could not create block manager"); - kfree(pmd); - return ERR_PTR(-ENOMEM); - } - - r = superblock_all_zeroes(bm, &create); - if (r) { - dm_block_manager_destroy(bm); - kfree(pmd); - return ERR_PTR(r); - } - + init_rwsem(&pmd->root_lock); + pmd->time = 0; + INIT_LIST_HEAD(&pmd->thin_devices); + pmd->read_only = false; + pmd->fail_io = false; + pmd->bdev = bdev; + pmd->data_block_size = data_block_size; - r = init_pmd(pmd, bm, 0, create); + r = __create_persistent_data_objects(pmd, format_device); if (r) { - dm_block_manager_destroy(bm); kfree(pmd); return ERR_PTR(r); } - pmd->bdev = bdev; - - if (!create) { - r = __begin_transaction(pmd); - if (r < 0) - goto bad; - return pmd; - } - - /* - * Create. - */ - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); - if (r) - goto bad; - - disk_super = dm_block_data(sblock); - disk_super->magic = cpu_to_le64(THIN_SUPERBLOCK_MAGIC); - disk_super->version = cpu_to_le32(THIN_VERSION); - disk_super->time = 0; - disk_super->metadata_block_size = cpu_to_le32(THIN_METADATA_BLOCK_SIZE >> SECTOR_SHIFT); - disk_super->metadata_nr_blocks = cpu_to_le64(bdev_size >> SECTOR_TO_BLOCK_SHIFT); - disk_super->data_block_size = cpu_to_le32(data_block_size); - - r = dm_bm_unlock(sblock); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->info, &pmd->root); - if (r < 0) - goto bad; - - r = dm_btree_empty(&pmd->details_info, &pmd->details_root); - if (r < 0) { - DMERR("couldn't create devices root"); - goto bad; - } - pmd->flags = 0; - pmd->need_commit = 1; - r = dm_pool_commit_metadata(pmd); + r = __begin_transaction(pmd); if (r < 0) { - DMERR("%s: dm_pool_commit_metadata() failed, error = %d", - __func__, r); - goto bad; + if (dm_pool_metadata_close(pmd) < 0) + DMWARN("%s: dm_pool_metadata_close() failed.", __func__); + return ERR_PTR(r); } return pmd; - -bad: - if (dm_pool_metadata_close(pmd) < 0) - DMWARN("%s: dm_pool_metadata_close() failed.", __func__); - return ERR_PTR(r); } int dm_pool_metadata_close(struct dm_pool_metadata *pmd) @@ -775,18 +880,17 @@ int dm_pool_metadata_close(struct dm_pool_metadata *pmd) return -EBUSY; } - r = __commit_transaction(pmd); - if (r < 0) - DMWARN("%s: __commit_transaction() failed, error = %d", - __func__, r); + if (!pmd->read_only && !pmd->fail_io) { + r = __commit_transaction(pmd); + if (r < 0) + DMWARN("%s: __commit_transaction() failed, error = %d", + __func__, r); + } - dm_tm_destroy(pmd->tm); - dm_tm_destroy(pmd->nb_tm); - dm_block_manager_destroy(pmd->bm); - dm_sm_destroy(pmd->metadata_sm); - dm_sm_destroy(pmd->data_sm); - kfree(pmd); + if (!pmd->fail_io) + __destroy_persistent_data_objects(pmd); + kfree(pmd); return 0; } @@ -847,6 +951,7 @@ static int __open_device(struct dm_pool_metadata *pmd, (*td)->id = dev; (*td)->open_count = 1; (*td)->changed = changed; + (*td)->aborted_with_changes = false; (*td)->mapped_blocks = le64_to_cpu(details_le.mapped_blocks); (*td)->transaction_id = le64_to_cpu(details_le.transaction_id); (*td)->creation_time = le32_to_cpu(details_le.creation_time); @@ -908,10 +1013,11 @@ static int __create_thin(struct dm_pool_metadata *pmd, int dm_pool_create_thin(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_thin(pmd, dev); + if (!pmd->fail_io) + r = __create_thin(pmd, dev); up_write(&pmd->root_lock); return r; @@ -998,10 +1104,11 @@ int dm_pool_create_snap(struct dm_pool_metadata *pmd, dm_thin_id dev, dm_thin_id origin) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __create_snap(pmd, dev, origin); + if (!pmd->fail_io) + r = __create_snap(pmd, dev, origin); up_write(&pmd->root_lock); return r; @@ -1034,18 +1141,17 @@ static int __delete_device(struct dm_pool_metadata *pmd, dm_thin_id dev) if (r) return r; - pmd->need_commit = 1; - return 0; } int dm_pool_delete_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __delete_device(pmd, dev); + if (!pmd->fail_io) + r = __delete_device(pmd, dev); up_write(&pmd->root_lock); return r; @@ -1055,39 +1161,168 @@ int dm_pool_set_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t current_id, uint64_t new_id) { + int r = -EINVAL; + down_write(&pmd->root_lock); + + if (pmd->fail_io) + goto out; + if (pmd->trans_id != current_id) { - up_write(&pmd->root_lock); DMERR("mismatched transaction id"); - return -EINVAL; + goto out; } pmd->trans_id = new_id; - pmd->need_commit = 1; + r = 0; + +out: up_write(&pmd->root_lock); - return 0; + return r; } int dm_pool_get_metadata_transaction_id(struct dm_pool_metadata *pmd, uint64_t *result) { + int r = -EINVAL; + down_read(&pmd->root_lock); - *result = pmd->trans_id; + if (!pmd->fail_io) { + *result = pmd->trans_id; + r = 0; + } up_read(&pmd->root_lock); + return r; +} + +static int __reserve_metadata_snap(struct dm_pool_metadata *pmd) +{ + int r, inc; + struct thin_disk_superblock *disk_super; + struct dm_block *copy, *sblock; + dm_block_t held_root; + + /* + * Copy the superblock. + */ + dm_sm_inc_block(pmd->metadata_sm, THIN_SUPERBLOCK_LOCATION); + r = dm_tm_shadow_block(pmd->tm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, ©, &inc); + if (r) + return r; + + BUG_ON(!inc); + + held_root = dm_block_location(copy); + disk_super = dm_block_data(copy); + + if (le64_to_cpu(disk_super->held_root)) { + DMWARN("Pool metadata snapshot already exists: release this before taking another."); + + dm_tm_dec(pmd->tm, held_root); + dm_tm_unlock(pmd->tm, copy); + return -EBUSY; + } + + /* + * Wipe the spacemap since we're not publishing this. + */ + memset(&disk_super->data_space_map_root, 0, + sizeof(disk_super->data_space_map_root)); + memset(&disk_super->metadata_space_map_root, 0, + sizeof(disk_super->metadata_space_map_root)); + + /* + * Increment the data structures that need to be preserved. + */ + dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->data_mapping_root)); + dm_tm_inc(pmd->tm, le64_to_cpu(disk_super->device_details_root)); + dm_tm_unlock(pmd->tm, copy); + + /* + * Write the held root into the superblock. + */ + r = superblock_lock(pmd, &sblock); + if (r) { + dm_tm_dec(pmd->tm, held_root); + return r; + } + + disk_super = dm_block_data(sblock); + disk_super->held_root = cpu_to_le64(held_root); + dm_bm_unlock(sblock); return 0; } -static int __get_held_metadata_root(struct dm_pool_metadata *pmd, - dm_block_t *result) +int dm_pool_reserve_metadata_snap(struct dm_pool_metadata *pmd) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = __reserve_metadata_snap(pmd); + up_write(&pmd->root_lock); + + return r; +} + +static int __release_metadata_snap(struct dm_pool_metadata *pmd) +{ + int r; + struct thin_disk_superblock *disk_super; + struct dm_block *sblock, *copy; + dm_block_t held_root; + + r = superblock_lock(pmd, &sblock); + if (r) + return r; + + disk_super = dm_block_data(sblock); + held_root = le64_to_cpu(disk_super->held_root); + disk_super->held_root = cpu_to_le64(0); + + dm_bm_unlock(sblock); + + if (!held_root) { + DMWARN("No pool metadata snapshot found: nothing to release."); + return -EINVAL; + } + + r = dm_tm_read_lock(pmd->tm, held_root, &sb_validator, ©); + if (r) + return r; + + disk_super = dm_block_data(copy); + dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->data_mapping_root)); + dm_sm_dec_block(pmd->metadata_sm, le64_to_cpu(disk_super->device_details_root)); + dm_sm_dec_block(pmd->metadata_sm, held_root); + + return dm_tm_unlock(pmd->tm, copy); +} + +int dm_pool_release_metadata_snap(struct dm_pool_metadata *pmd) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = __release_metadata_snap(pmd); + up_write(&pmd->root_lock); + + return r; +} + +static int __get_metadata_snap(struct dm_pool_metadata *pmd, + dm_block_t *result) { int r; struct thin_disk_superblock *disk_super; struct dm_block *sblock; - r = dm_bm_write_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, - &sb_validator, &sblock); + r = dm_bm_read_lock(pmd->bm, THIN_SUPERBLOCK_LOCATION, + &sb_validator, &sblock); if (r) return r; @@ -1097,13 +1332,14 @@ static int __get_held_metadata_root(struct dm_pool_metadata *pmd, return dm_bm_unlock(sblock); } -int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd, - dm_block_t *result) +int dm_pool_get_metadata_snap(struct dm_pool_metadata *pmd, + dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = __get_held_metadata_root(pmd, result); + if (!pmd->fail_io) + r = __get_metadata_snap(pmd, result); up_read(&pmd->root_lock); return r; @@ -1112,10 +1348,11 @@ int dm_pool_get_held_metadata_root(struct dm_pool_metadata *pmd, int dm_pool_open_thin_device(struct dm_pool_metadata *pmd, dm_thin_id dev, struct dm_thin_device **td) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); - r = __open_device(pmd, dev, 0, td); + if (!pmd->fail_io) + r = __open_device(pmd, dev, 0, td); up_write(&pmd->root_lock); return r; @@ -1135,7 +1372,13 @@ dm_thin_id dm_thin_dev_id(struct dm_thin_device *td) return td->id; } -static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) +/* + * Check whether @time (of block creation) is older than @td's last snapshot. + * If so then the associated block is shared with the last snapshot device. + * Any block on a device created *after* the device last got snapshotted is + * necessarily not shared. + */ +static bool __snapshotted_since(struct dm_thin_device *td, uint32_t time) { return td->snapshotted_time > time; } @@ -1143,28 +1386,31 @@ static int __snapshotted_since(struct dm_thin_device *td, uint32_t time) int dm_thin_find_block(struct dm_thin_device *td, dm_block_t block, int can_block, struct dm_thin_lookup_result *result) { - int r; + int r = -EINVAL; uint64_t block_time = 0; __le64 value; struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; + struct dm_btree_info *info; if (can_block) { down_read(&pmd->root_lock); - r = dm_btree_lookup(&pmd->info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else if (down_read_trylock(&pmd->root_lock)) { - r = dm_btree_lookup(&pmd->nb_info, pmd->root, keys, &value); - if (!r) - block_time = le64_to_cpu(value); - up_read(&pmd->root_lock); - - } else + info = &pmd->info; + } else if (down_read_trylock(&pmd->root_lock)) + info = &pmd->nb_info; + else return -EWOULDBLOCK; + if (pmd->fail_io) + goto out; + + r = dm_btree_lookup(info, pmd->root, keys, &value); + if (!r) + block_time = le64_to_cpu(value); + +out: + up_read(&pmd->root_lock); + if (!r) { dm_block_t exception_block; uint32_t exception_time; @@ -1185,7 +1431,6 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, struct dm_pool_metadata *pmd = td->pmd; dm_block_t keys[2] = { td->id, block }; - pmd->need_commit = 1; value = cpu_to_le64(pack_block_time(data_block, pmd->time)); __dm_bless_for_disk(&value); @@ -1194,10 +1439,9 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, if (r) return r; - if (inserted) { + td->changed = 1; + if (inserted) td->mapped_blocks++; - td->changed = 1; - } return 0; } @@ -1205,10 +1449,11 @@ static int __insert(struct dm_thin_device *td, dm_block_t block, int dm_thin_insert_block(struct dm_thin_device *td, dm_block_t block, dm_block_t data_block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __insert(td, block, data_block); + if (!td->pmd->fail_io) + r = __insert(td, block, data_block); up_write(&td->pmd->root_lock); return r; @@ -1226,31 +1471,82 @@ static int __remove(struct dm_thin_device *td, dm_block_t block) td->mapped_blocks--; td->changed = 1; - pmd->need_commit = 1; return 0; } int dm_thin_remove_block(struct dm_thin_device *td, dm_block_t block) { - int r; + int r = -EINVAL; down_write(&td->pmd->root_lock); - r = __remove(td, block); + if (!td->pmd->fail_io) + r = __remove(td, block); up_write(&td->pmd->root_lock); return r; } -int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) +int dm_pool_block_is_used(struct dm_pool_metadata *pmd, dm_block_t b, bool *result) { int r; + uint32_t ref_count; - down_write(&pmd->root_lock); + down_read(&pmd->root_lock); + r = dm_sm_get_count(pmd->data_sm, b, &ref_count); + if (!r) + *result = (ref_count != 0); + up_read(&pmd->root_lock); + + return r; +} + +bool dm_thin_changed_this_transaction(struct dm_thin_device *td) +{ + int r; + + down_read(&td->pmd->root_lock); + r = td->changed; + up_read(&td->pmd->root_lock); + + return r; +} + +bool dm_pool_changed_this_transaction(struct dm_pool_metadata *pmd) +{ + bool r = false; + struct dm_thin_device *td, *tmp; - r = dm_sm_new_block(pmd->data_sm, result); - pmd->need_commit = 1; + down_read(&pmd->root_lock); + list_for_each_entry_safe(td, tmp, &pmd->thin_devices, list) { + if (td->changed) { + r = td->changed; + break; + } + } + up_read(&pmd->root_lock); + + return r; +} + +bool dm_thin_aborted_changes(struct dm_thin_device *td) +{ + bool r; + + down_read(&td->pmd->root_lock); + r = td->aborted_with_changes; + up_read(&td->pmd->root_lock); + + return r; +} + +int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) +{ + int r = -EINVAL; + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = dm_sm_new_block(pmd->data_sm, result); up_write(&pmd->root_lock); return r; @@ -1258,9 +1554,11 @@ int dm_pool_alloc_data_block(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_pool_commit_metadata(struct dm_pool_metadata *pmd) { - int r; + int r = -EINVAL; down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; r = __commit_transaction(pmd); if (r <= 0) @@ -1275,12 +1573,41 @@ out: return r; } +static void __set_abort_with_changes_flags(struct dm_pool_metadata *pmd) +{ + struct dm_thin_device *td; + + list_for_each_entry(td, &pmd->thin_devices, list) + td->aborted_with_changes = td->changed; +} + +int dm_pool_abort_metadata(struct dm_pool_metadata *pmd) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (pmd->fail_io) + goto out; + + __set_abort_with_changes_flags(pmd); + __destroy_persistent_data_objects(pmd); + r = __create_persistent_data_objects(pmd, false); + if (r) + pmd->fail_io = true; + +out: + up_write(&pmd->root_lock); + + return r; +} + int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1289,10 +1616,11 @@ int dm_pool_get_free_block_count(struct dm_pool_metadata *pmd, dm_block_t *resul int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_free(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_free(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1301,10 +1629,11 @@ int dm_pool_get_free_metadata_block_count(struct dm_pool_metadata *pmd, int dm_pool_get_metadata_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->metadata_sm, result); up_read(&pmd->root_lock); return r; @@ -1321,10 +1650,11 @@ int dm_pool_get_data_block_size(struct dm_pool_metadata *pmd, sector_t *result) int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) { - int r; + int r = -EINVAL; down_read(&pmd->root_lock); - r = dm_sm_get_nr_blocks(pmd->data_sm, result); + if (!pmd->fail_io) + r = dm_sm_get_nr_blocks(pmd->data_sm, result); up_read(&pmd->root_lock); return r; @@ -1332,13 +1662,17 @@ int dm_pool_get_data_dev_size(struct dm_pool_metadata *pmd, dm_block_t *result) int dm_thin_get_mapped_count(struct dm_thin_device *td, dm_block_t *result) { + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - *result = td->mapped_blocks; + if (!pmd->fail_io) { + *result = td->mapped_blocks; + r = 0; + } up_read(&pmd->root_lock); - return 0; + return r; } static int __highest_block(struct dm_thin_device *td, dm_block_t *result) @@ -1360,22 +1694,23 @@ static int __highest_block(struct dm_thin_device *td, dm_block_t *result) int dm_thin_get_highest_mapped_block(struct dm_thin_device *td, dm_block_t *result) { - int r; + int r = -EINVAL; struct dm_pool_metadata *pmd = td->pmd; down_read(&pmd->root_lock); - r = __highest_block(td, result); + if (!pmd->fail_io) + r = __highest_block(td, result); up_read(&pmd->root_lock); return r; } -static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) +static int __resize_space_map(struct dm_space_map *sm, dm_block_t new_count) { int r; dm_block_t old_count; - r = dm_sm_get_nr_blocks(pmd->data_sm, &old_count); + r = dm_sm_get_nr_blocks(sm, &old_count); if (r) return r; @@ -1383,24 +1718,98 @@ static int __resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) return 0; if (new_count < old_count) { - DMERR("cannot reduce size of data device"); + DMERR("cannot reduce size of space map"); return -EINVAL; } - r = dm_sm_extend(pmd->data_sm, new_count - old_count); - if (!r) - pmd->need_commit = 1; + return dm_sm_extend(sm, new_count - old_count); +} + +int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = __resize_space_map(pmd->data_sm, new_count); + up_write(&pmd->root_lock); return r; } -int dm_pool_resize_data_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) +int dm_pool_resize_metadata_dev(struct dm_pool_metadata *pmd, dm_block_t new_count) +{ + int r = -EINVAL; + + down_write(&pmd->root_lock); + if (!pmd->fail_io) + r = __resize_space_map(pmd->metadata_sm, new_count); + up_write(&pmd->root_lock); + + return r; +} + +void dm_pool_metadata_read_only(struct dm_pool_metadata *pmd) +{ + down_write(&pmd->root_lock); + pmd->read_only = true; + dm_bm_set_read_only(pmd->bm); + up_write(&pmd->root_lock); +} + +void dm_pool_metadata_read_write(struct dm_pool_metadata *pmd) +{ + down_write(&pmd->root_lock); + pmd->read_only = false; + dm_bm_set_read_write(pmd->bm); + up_write(&pmd->root_lock); +} + +int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd, + dm_block_t threshold, + dm_sm_threshold_fn fn, + void *context) { int r; down_write(&pmd->root_lock); - r = __resize_data_dev(pmd, new_count); + r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context); up_write(&pmd->root_lock); return r; } + +int dm_pool_metadata_set_needs_check(struct dm_pool_metadata *pmd) +{ + int r; + struct dm_block *sblock; + struct thin_disk_superblock *disk_super; + + down_write(&pmd->root_lock); + pmd->flags |= THIN_METADATA_NEEDS_CHECK_FLAG; + + r = superblock_lock(pmd, &sblock); + if (r) { + DMERR("couldn't read superblock"); + goto out; + } + + disk_super = dm_block_data(sblock); + disk_super->flags = cpu_to_le32(pmd->flags); + + dm_bm_unlock(sblock); +out: + up_write(&pmd->root_lock); + return r; +} + +bool dm_pool_metadata_needs_check(struct dm_pool_metadata *pmd) +{ + bool needs_check; + + down_read(&pmd->root_lock); + needs_check = pmd->flags & THIN_METADATA_NEEDS_CHECK_FLAG; + up_read(&pmd->root_lock); + + return needs_check; +} |
