diff options
Diffstat (limited to 'fs/btrfs/dev-replace.c')
| -rw-r--r-- | fs/btrfs/dev-replace.c | 171 |
1 files changed, 119 insertions, 52 deletions
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 70681686e8d..eea26e1b2fd 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -26,7 +26,6 @@ #include <linux/kthread.h> #include <linux/math64.h> #include <asm/div64.h> -#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -37,8 +36,8 @@ #include "check-integrity.h" #include "rcu-string.h" #include "dev-replace.h" +#include "sysfs.h" -static u64 btrfs_get_seconds_since_1970(void); static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret); static void btrfs_dev_replace_update_device_in_mapping_tree( @@ -104,7 +103,8 @@ no_valid_dev_replace_entry_found: ptr = btrfs_item_ptr(eb, slot, struct btrfs_dev_replace_item); if (item_size != sizeof(struct btrfs_dev_replace_item)) { - pr_warn("btrfs: dev_replace entry found has unexpected size, ignore entry\n"); + btrfs_warn(fs_info, + "dev_replace entry found has unexpected size, ignore entry"); goto no_valid_dev_replace_entry_found; } @@ -147,13 +147,19 @@ no_valid_dev_replace_entry_found: if (!dev_replace->srcdev && !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; - pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n", - src_devid); + btrfs_warn(fs_info, + "cannot mount because device replace operation is ongoing and"); + btrfs_warn(fs_info, + "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?", + src_devid); } if (!dev_replace->tgtdev && !btrfs_test_opt(dev_root, DEGRADED)) { ret = -EIO; - pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n", + btrfs_warn(fs_info, + "cannot mount because device replace operation is ongoing and"); + btrfs_warn(fs_info, + "tgtdev (devid %llu) is missing, need to run 'btrfs dev scan'?", BTRFS_DEV_REPLACE_DEVID); } if (dev_replace->tgtdev) { @@ -212,7 +218,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, } ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - pr_warn("btrfs: error %d while searching for dev_replace item!\n", + btrfs_warn(fs_info, "error %d while searching for dev_replace item!", ret); goto out; } @@ -232,7 +238,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - pr_warn("btrfs: delete too small dev_replace item failed %d!\n", + btrfs_warn(fs_info, "delete too small dev_replace item failed %d!", ret); goto out; } @@ -245,7 +251,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - pr_warn("btrfs: insert dev_replace item failed %d!\n", + btrfs_warn(fs_info, "insert dev_replace item failed %d!", ret); goto out; } @@ -296,13 +302,6 @@ void btrfs_after_dev_replace_commit(struct btrfs_fs_info *fs_info) dev_replace->cursor_left_last_write_of_item; } -static u64 btrfs_get_seconds_since_1970(void) -{ - struct timespec t = CURRENT_TIME_SEC; - - return t.tv_sec; -} - int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_ioctl_dev_replace_args *args) { @@ -314,8 +313,8 @@ int btrfs_dev_replace_start(struct btrfs_root *root, struct btrfs_device *src_device = NULL; if (btrfs_fs_incompat(fs_info, RAID56)) { - pr_warn("btrfs: dev_replace cannot yet handle RAID5/RAID6\n"); - return -EINVAL; + btrfs_warn(fs_info, "dev_replace cannot yet handle RAID5/RAID6"); + return -EOPNOTSUPP; } switch (args->start.cont_reading_from_srcdev_mode) { @@ -334,7 +333,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, ret = btrfs_init_dev_replace_tgtdev(root, args->start.tgtdev_name, &tgt_device); if (ret) { - pr_err("btrfs: target device %s is invalid!\n", + btrfs_err(fs_info, "target device %s is invalid!", args->start.tgtdev_name); mutex_unlock(&fs_info->volume_mutex); return -EINVAL; @@ -350,7 +349,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, } if (tgt_device->total_bytes < src_device->total_bytes) { - pr_err("btrfs: target device is smaller than source device!\n"); + btrfs_err(fs_info, "target device is smaller than source device!"); ret = -EINVAL; goto leave_no_lock; } @@ -375,7 +374,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, dev_replace->tgtdev = tgt_device; printk_in_rcu(KERN_INFO - "btrfs: dev_replace from %s (devid %llu) to %s) started\n", + "BTRFS: dev_replace from %s (devid %llu) to %s started\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, @@ -390,7 +389,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, * go to the tgtdev as well (refer to btrfs_map_block()). */ dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED; - dev_replace->time_started = btrfs_get_seconds_since_1970(); + dev_replace->time_started = get_seconds(); dev_replace->cursor_left = 0; dev_replace->committed_cursor_left = 0; dev_replace->cursor_left_last_write_of_item = 0; @@ -400,7 +399,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root, args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR; btrfs_dev_replace_unlock(dev_replace); - btrfs_wait_all_ordered_extents(root->fs_info); + btrfs_wait_ordered_roots(root->fs_info, -1); /* force writing the updated state information to disk */ trans = btrfs_start_transaction(root, 0); @@ -433,6 +432,35 @@ leave_no_lock: return ret; } +/* + * blocked until all flighting bios are finished. + */ +static void btrfs_rm_dev_replace_blocked(struct btrfs_fs_info *fs_info) +{ + s64 writers; + DEFINE_WAIT(wait); + + set_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); + do { + prepare_to_wait(&fs_info->replace_wait, &wait, + TASK_UNINTERRUPTIBLE); + writers = percpu_counter_sum(&fs_info->bio_counter); + if (writers) + schedule(); + finish_wait(&fs_info->replace_wait, &wait); + } while (writers); +} + +/* + * we have removed target device, it is safe to allow new bios request. + */ +static void btrfs_rm_dev_replace_unblocked(struct btrfs_fs_info *fs_info) +{ + clear_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state); + if (waitqueue_active(&fs_info->replace_wait)) + wake_up(&fs_info->replace_wait); +} + static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, int scrub_ret) { @@ -460,22 +488,16 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, src_device = dev_replace->srcdev; btrfs_dev_replace_unlock(dev_replace); - /* replace old device with new one in mapping tree */ - if (!scrub_ret) - btrfs_dev_replace_update_device_in_mapping_tree(fs_info, - src_device, - tgt_device); - /* * flush all outstanding I/O and inode extent mappings before the * copy operation is declared as being finished */ - ret = btrfs_start_all_delalloc_inodes(root->fs_info, 0); + ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); if (ret) { mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); return ret; } - btrfs_wait_all_ordered_extents(root->fs_info); + btrfs_wait_ordered_roots(root->fs_info, -1); trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { @@ -486,6 +508,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, WARN_ON(ret); /* keep away write_all_supers() during the finishing procedure */ + mutex_lock(&root->fs_info->chunk_mutex); mutex_lock(&root->fs_info->fs_devices->device_list_mutex); btrfs_dev_replace_lock(dev_replace); dev_replace->replace_state = @@ -493,18 +516,24 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, : BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED; dev_replace->tgtdev = NULL; dev_replace->srcdev = NULL; - dev_replace->time_stopped = btrfs_get_seconds_since_1970(); + dev_replace->time_stopped = get_seconds(); dev_replace->item_needs_writeback = 1; - if (scrub_ret) { + /* replace old device with new one in mapping tree */ + if (!scrub_ret) { + btrfs_dev_replace_update_device_in_mapping_tree(fs_info, + src_device, + tgt_device); + } else { printk_in_rcu(KERN_ERR - "btrfs: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", + "BTRFS: btrfs_scrub_dev(%s, %llu, %s) failed %d\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, rcu_str_deref(tgt_device->name), scrub_ret); btrfs_dev_replace_unlock(dev_replace); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); if (tgt_device) btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device); mutex_unlock(&dev_replace->lock_finishing_cancel_unmount); @@ -513,7 +542,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, } printk_in_rcu(KERN_INFO - "btrfs: dev_replace from %s (devid %llu) to %s) finished\n", + "BTRFS: dev_replace from %s (devid %llu) to %s) finished\n", src_device->missing ? "<missing disk>" : rcu_str_deref(src_device->name), src_device->devid, @@ -534,11 +563,16 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, fs_info->fs_devices->latest_bdev = tgt_device->bdev; list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list); + /* replace the sysfs entry */ + btrfs_kobj_rm_device(fs_info, src_device); + btrfs_kobj_add_device(fs_info, tgt_device); + + btrfs_rm_dev_replace_blocked(fs_info); + btrfs_rm_dev_replace_srcdev(fs_info, src_device); - if (src_device->bdev) { - /* zero out the old super */ - btrfs_scratch_superblock(src_device); - } + + btrfs_rm_dev_replace_unblocked(fs_info); + /* * this is again a consistent state where no dev_replace procedure * is running, the target device is part of the filesystem, the @@ -548,6 +582,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info, */ btrfs_dev_replace_unlock(dev_replace); mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + mutex_unlock(&root->fs_info->chunk_mutex); /* write back the superblocks */ trans = btrfs_start_transaction(root, 0); @@ -653,6 +688,9 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) u64 result; int ret; + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + mutex_lock(&dev_replace->lock_finishing_cancel_unmount); btrfs_dev_replace_lock(dev_replace); switch (dev_replace->replace_state) { @@ -671,7 +709,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info) break; } dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED; - dev_replace->time_stopped = btrfs_get_seconds_since_1970(); + dev_replace->time_stopped = get_seconds(); dev_replace->item_needs_writeback = 1; btrfs_dev_replace_unlock(dev_replace); btrfs_scrub_cancel(fs_info); @@ -706,9 +744,9 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info) case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED: dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED; - dev_replace->time_stopped = btrfs_get_seconds_since_1970(); + dev_replace->time_stopped = get_seconds(); dev_replace->item_needs_writeback = 1; - pr_info("btrfs: suspending dev_replace for unmount\n"); + btrfs_info(fs_info, "suspending dev_replace for unmount"); break; } @@ -737,8 +775,9 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info) break; } if (!dev_replace->tgtdev || !dev_replace->tgtdev->bdev) { - pr_info("btrfs: cannot continue dev_replace, tgtdev is missing\n" - "btrfs: you may cancel the operation after 'mount -o degraded'\n"); + btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing"); + btrfs_info(fs_info, + "you may cancel the operation after 'mount -o degraded'"); btrfs_dev_replace_unlock(dev_replace); return 0; } @@ -764,14 +803,14 @@ static int btrfs_dev_replace_kthread(void *data) kfree(status_args); do_div(progress, 10); printk_in_rcu(KERN_INFO - "btrfs: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", - dev_replace->srcdev->missing ? "<missing disk>" : - rcu_str_deref(dev_replace->srcdev->name), - dev_replace->srcdev->devid, - dev_replace->tgtdev ? - rcu_str_deref(dev_replace->tgtdev->name) : - "<missing target disk>", - (unsigned int)progress); + "BTRFS: continuing dev_replace from %s (devid %llu) to %s @%u%%\n", + dev_replace->srcdev->missing ? "<missing disk>" : + rcu_str_deref(dev_replace->srcdev->name), + dev_replace->srcdev->devid, + dev_replace->tgtdev ? + rcu_str_deref(dev_replace->tgtdev->name) : + "<missing target disk>", + (unsigned int)progress); } btrfs_dev_replace_continue_on_mount(fs_info); atomic_set(&fs_info->mutually_exclusive_operation_running, 0); @@ -863,3 +902,31 @@ void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace) mutex_unlock(&dev_replace->lock_management_lock); } } + +void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info) +{ + percpu_counter_inc(&fs_info->bio_counter); +} + +void btrfs_bio_counter_dec(struct btrfs_fs_info *fs_info) +{ + percpu_counter_dec(&fs_info->bio_counter); + + if (waitqueue_active(&fs_info->replace_wait)) + wake_up(&fs_info->replace_wait); +} + +void btrfs_bio_counter_inc_blocked(struct btrfs_fs_info *fs_info) +{ + DEFINE_WAIT(wait); +again: + percpu_counter_inc(&fs_info->bio_counter); + if (test_bit(BTRFS_FS_STATE_DEV_REPLACING, &fs_info->fs_state)) { + btrfs_bio_counter_dec(fs_info); + wait_event(fs_info->replace_wait, + !test_bit(BTRFS_FS_STATE_DEV_REPLACING, + &fs_info->fs_state)); + goto again; + } + +} |
