diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
| -rw-r--r-- | fs/btrfs/volumes.c | 399 |
1 files changed, 275 insertions, 124 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 0052ca8264d..6cb82f62cb7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -28,7 +28,6 @@ #include <linux/raid/pq.h> #include <linux/semaphore.h> #include <asm/div64.h> -#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -41,6 +40,7 @@ #include "rcu-string.h" #include "math.h" #include "dev-replace.h" +#include "sysfs.h" static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -126,7 +126,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev, ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); if (ret) - pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n", + pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n", action, kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), &disk_to_dev(bdev->bd_disk)->kobj); @@ -201,7 +201,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, if (IS_ERR(*bdev)) { ret = PTR_ERR(*bdev); - printk(KERN_INFO "btrfs: open %s failed\n", device_path); + printk(KERN_INFO "BTRFS: open %s failed\n", device_path); goto error; } @@ -416,7 +416,8 @@ loop_lock: device->running_pending = 1; spin_unlock(&device->io_lock); - btrfs_requeue_work(&device->work); + btrfs_queue_work(fs_info->submit_workers, + &device->work); goto done; } /* unplug every 64 requests just for good measure */ @@ -448,6 +449,14 @@ static void pending_bios_fn(struct btrfs_work *work) run_scheduled_bios(device); } +/* + * Add new device to list of registered devices + * + * Returns: + * 1 - first time device is seen + * 0 - device already known + * < 0 - error + */ static noinline int device_list_add(const char *path, struct btrfs_super_block *disk_super, u64 devid, struct btrfs_fs_devices **fs_devices_ret) @@ -455,6 +464,7 @@ static noinline int device_list_add(const char *path, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; struct rcu_string *name; + int ret = 0; u64 found_transid = btrfs_super_generation(disk_super); fs_devices = find_fsid(disk_super->fsid); @@ -495,6 +505,7 @@ static noinline int device_list_add(const char *path, fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); + ret = 1; device->fs_devices = fs_devices; } else if (!device->name || strcmp(device->name->str, path)) { name = rcu_string_strdup(path, GFP_NOFS); @@ -513,7 +524,8 @@ static noinline int device_list_add(const char *path, fs_devices->latest_trans = found_transid; } *fs_devices_ret = fs_devices; - return 0; + + return ret; } static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) @@ -543,12 +555,14 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) * This is ok to do without rcu read locked because we hold the * uuid mutex so nothing we touch in here is going to disappear. */ - name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); - if (!name) { - kfree(device); - goto error; + if (orig_dev->name) { + name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); + if (!name) { + kfree(device); + goto error; + } + rcu_assign_pointer(device->name, name); } - rcu_assign_pointer(device->name, name); list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; @@ -666,7 +680,8 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->bdev) fs_devices->open_devices--; - if (device->writeable && !device->is_tgtdev_for_dev_replace) { + if (device->writeable && + device->devid != BTRFS_DEV_REPLACE_DEVID) { list_del_init(&device->dev_alloc_list); fs_devices->rw_devices--; } @@ -796,7 +811,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fs_devices->rotating = 1; fs_devices->open_devices++; - if (device->writeable && !device->is_tgtdev_for_dev_replace) { + if (device->writeable && + device->devid != BTRFS_DEV_REPLACE_DEVID) { fs_devices->rw_devices++; list_add(&device->dev_alloc_list, &fs_devices->alloc_list); @@ -908,17 +924,19 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, transid = btrfs_super_generation(disk_super); total_devices = btrfs_super_num_devices(disk_super); - if (disk_super->label[0]) { - if (disk_super->label[BTRFS_LABEL_SIZE - 1]) - disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; - printk(KERN_INFO "device label %s ", disk_super->label); - } else { - printk(KERN_INFO "device fsid %pU ", disk_super->fsid); - } - - printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); - ret = device_list_add(path, disk_super, devid, fs_devices_ret); + if (ret > 0) { + if (disk_super->label[0]) { + if (disk_super->label[BTRFS_LABEL_SIZE - 1]) + disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; + printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); + } else { + printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); + } + + printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); + ret = 0; + } if (!ret && fs_devices_ret) (*fs_devices_ret)->total_devices = total_devices; @@ -1437,6 +1455,22 @@ out: return ret; } +/* + * Function to update ctime/mtime for a given device path. + * Mainly used for ctime/mtime based probe like libblkid. + */ +static void update_dev_time(char *path_name) +{ + struct file *filp; + + filp = filp_open(path_name, O_RDWR, 0); + if (!filp) + return; + file_update_time(filp); + filp_close(filp, NULL); + return; +} + static int btrfs_rm_dev_item(struct btrfs_root *root, struct btrfs_device *device) { @@ -1646,8 +1680,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev == root->fs_info->fs_devices->latest_bdev) root->fs_info->fs_devices->latest_bdev = next_device->bdev; - if (device->bdev) + if (device->bdev) { device->fs_devices->open_devices--; + /* remove sysfs entry */ + btrfs_kobj_rm_device(root->fs_info, device); + } call_rcu(&device->rcu, free_device); @@ -1659,11 +1696,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) struct btrfs_fs_devices *fs_devices; fs_devices = root->fs_info->fs_devices; while (fs_devices) { - if (fs_devices->seed == cur_devices) + if (fs_devices->seed == cur_devices) { + fs_devices->seed = cur_devices->seed; break; + } fs_devices = fs_devices->seed; } - fs_devices->seed = cur_devices->seed; cur_devices->seed = NULL; lock_chunks(root); __btrfs_close_devices(cur_devices); @@ -1679,20 +1717,55 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) * remove it from the devices list and zero out the old super */ if (clear_super && disk_super) { + u64 bytenr; + int i; + /* make sure this device isn't detected as part of * the FS anymore */ memset(&disk_super->magic, 0, sizeof(disk_super->magic)); set_buffer_dirty(bh); sync_dirty_buffer(bh); + + /* clear the mirror copies of super block on the disk + * being removed, 0th copy is been taken care above and + * the below would take of the rest + */ + for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + i_size_read(bdev->bd_inode)) + break; + + brelse(bh); + bh = __bread(bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); + if (!bh) + continue; + + disk_super = (struct btrfs_super_block *)bh->b_data; + + if (btrfs_super_bytenr(disk_super) != bytenr || + btrfs_super_magic(disk_super) != BTRFS_MAGIC) { + continue; + } + memset(&disk_super->magic, 0, + sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + } } ret = 0; - /* Notify udev that device has changed */ - if (bdev) + if (bdev) { + /* Notify udev that device has changed */ btrfs_kobject_uevent(bdev, KOBJ_CHANGE); + /* Update ctime/mtime for device path for libblkid */ + update_dev_time(device_path); + } + error_brelse: brelse(bh); if (bdev) @@ -1715,6 +1788,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev) { WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); + list_del_rcu(&srcdev->dev_list); list_del_rcu(&srcdev->dev_alloc_list); fs_info->fs_devices->num_devices--; @@ -1724,9 +1798,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, } if (srcdev->can_discard) fs_info->fs_devices->num_can_discard--; - if (srcdev->bdev) + if (srcdev->bdev) { fs_info->fs_devices->open_devices--; + /* zero out the old super */ + btrfs_scratch_superblock(srcdev); + } + call_rcu(&srcdev->rcu, free_device); } @@ -1807,7 +1885,7 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, } if (!*device) { - pr_err("btrfs: no missing device found\n"); + btrfs_err(root->fs_info, "no missing device found"); return -ENOENT; } @@ -1863,7 +1941,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; - fs_devices->total_devices = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); @@ -2035,6 +2112,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 0; device->mode = FMODE_EXCL; + device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); if (seeding_dev) { @@ -2071,9 +2149,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); btrfs_set_super_num_devices(root->fs_info->super_copy, total_bytes + 1); + + /* add sysfs device entry */ + btrfs_kobj_add_device(root->fs_info, device); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (seeding_dev) { + char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; ret = init_first_rw_device(trans, root, device); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -2084,6 +2167,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_abort_transaction(trans, root, ret); goto error_trans; } + + /* Sprouting would change fsid of the mounted root, + * so rename the fsid on the sysfs + */ + snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", + root->fs_info->fsid); + if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) + goto error_trans; } else { ret = btrfs_add_device(trans, root, device); if (ret) { @@ -2125,12 +2216,15 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = btrfs_commit_transaction(trans, root); } + /* Update ctime/mtime for libblkid */ + update_dev_time(device_path); return ret; error_trans: unlock_chunks(root); btrfs_end_transaction(trans, root); rcu_string_free(device->name); + btrfs_kobj_rm_device(root->fs_info, device); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -2202,6 +2296,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; + device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); device->fs_devices = fs_info->fs_devices; list_add(&device->dev_list, &fs_info->fs_devices->devices); @@ -2468,9 +2563,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); - kfree(map); - em->bdev = NULL; - /* once for the tree */ free_extent_map(em); /* once for us */ @@ -2544,8 +2636,7 @@ again: failed = 0; retried = true; goto again; - } else if (failed && retried) { - WARN_ON(1); + } else if (WARN_ON(failed && retried)) { ret = -ENOSPC; } error: @@ -2901,6 +2992,16 @@ static int should_balance_chunk(struct btrfs_root *root, return 0; } + /* + * limited by count, must be the last filter + */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) { + if (bargs->limit == 0) + return 0; + else + bargs->limit--; + } + return 1; } @@ -2923,6 +3024,9 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) int ret; int enospc_errors = 0; bool counting = true; + u64 limit_data = bctl->data.limit; + u64 limit_meta = bctl->meta.limit; + u64 limit_sys = bctl->sys.limit; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -2961,6 +3065,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) memset(&bctl->stat, 0, sizeof(bctl->stat)); spin_unlock(&fs_info->balance_lock); again: + if (!counting) { + bctl->data.limit = limit_data; + bctl->meta.limit = limit_meta; + bctl->sys.limit = limit_sys; + } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -3045,7 +3154,7 @@ loop: error: btrfs_free_path(path); if (enospc_errors) { - printk(KERN_INFO "btrfs: %d enospc errors during balance\n", + btrfs_info(fs_info, "%d enospc errors during balance", enospc_errors); if (!ret) ret = -ENOSPC; @@ -3131,8 +3240,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if (!(bctl->flags & BTRFS_BALANCE_DATA) || !(bctl->flags & BTRFS_BALANCE_METADATA) || memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { - printk(KERN_ERR "btrfs: with mixed groups data and " - "metadata balance options must be the same\n"); + btrfs_err(fs_info, "with mixed groups data and " + "metadata balance options must be the same"); ret = -EINVAL; goto out; } @@ -3158,8 +3267,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && (!alloc_profile_is_valid(bctl->data.target, 1) || (bctl->data.target & ~allowed))) { - printk(KERN_ERR "btrfs: unable to start balance with target " - "data profile %llu\n", + btrfs_err(fs_info, "unable to start balance with target " + "data profile %llu", bctl->data.target); ret = -EINVAL; goto out; @@ -3167,8 +3276,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && (!alloc_profile_is_valid(bctl->meta.target, 1) || (bctl->meta.target & ~allowed))) { - printk(KERN_ERR "btrfs: unable to start balance with target " - "metadata profile %llu\n", + btrfs_err(fs_info, + "unable to start balance with target metadata profile %llu", bctl->meta.target); ret = -EINVAL; goto out; @@ -3176,8 +3285,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && (!alloc_profile_is_valid(bctl->sys.target, 1) || (bctl->sys.target & ~allowed))) { - printk(KERN_ERR "btrfs: unable to start balance with target " - "system profile %llu\n", + btrfs_err(fs_info, + "unable to start balance with target system profile %llu", bctl->sys.target); ret = -EINVAL; goto out; @@ -3186,7 +3295,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, /* allow dup'ed data chunks only in mixed mode */ if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) { - printk(KERN_ERR "btrfs: dup for data is not allowed\n"); + btrfs_err(fs_info, "dup for data is not allowed"); ret = -EINVAL; goto out; } @@ -3206,11 +3315,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (fs_info->avail_metadata_alloc_bits & allowed) && !(bctl->meta.target & allowed))) { if (bctl->flags & BTRFS_BALANCE_FORCE) { - printk(KERN_INFO "btrfs: force reducing metadata " - "integrity\n"); + btrfs_info(fs_info, "force reducing metadata integrity"); } else { - printk(KERN_ERR "btrfs: balance will reduce metadata " - "integrity, use force if you want this\n"); + btrfs_err(fs_info, "balance will reduce metadata " + "integrity, use force if you want this"); ret = -EINVAL; goto out; } @@ -3296,7 +3404,7 @@ static int balance_kthread(void *data) mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) { - printk(KERN_INFO "btrfs: continuing balance\n"); + btrfs_info(fs_info, "continuing balance"); ret = btrfs_balance(fs_info->balance_ctl, NULL); } @@ -3318,7 +3426,7 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->balance_lock); if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { - printk(KERN_INFO "btrfs: force skipping balance\n"); + btrfs_info(fs_info, "force skipping balance"); return 0; } @@ -3417,6 +3525,9 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info) int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) { + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + mutex_lock(&fs_info->balance_mutex); if (!fs_info->balance_ctl) { mutex_unlock(&fs_info->balance_mutex); @@ -3482,7 +3593,7 @@ static int btrfs_uuid_scan_kthread(void *data) path->keep_locks = 1; while (1) { - ret = btrfs_search_forward(root, &key, &max_key, path, 0); + ret = btrfs_search_forward(root, &key, path, 0); if (ret) { if (ret > 0) ret = 0; @@ -3533,7 +3644,7 @@ update_tree: BTRFS_UUID_KEY_SUBVOL, key.objectid); if (ret < 0) { - pr_warn("btrfs: uuid_tree_add failed %d\n", + btrfs_warn(fs_info, "uuid_tree_add failed %d", ret); break; } @@ -3545,7 +3656,7 @@ update_tree: BTRFS_UUID_KEY_RECEIVED_SUBVOL, key.objectid); if (ret < 0) { - pr_warn("btrfs: uuid_tree_add failed %d\n", + btrfs_warn(fs_info, "uuid_tree_add failed %d", ret); break; } @@ -3580,7 +3691,7 @@ out: if (trans && !IS_ERR(trans)) btrfs_end_transaction(trans, fs_info->uuid_root); if (ret) - pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret); + btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret); else fs_info->update_uuid_tree_gen = 1; up(&fs_info->uuid_tree_rescan_sem); @@ -3644,7 +3755,7 @@ static int btrfs_uuid_rescan_kthread(void *data) */ ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); if (ret < 0) { - pr_warn("btrfs: iterating uuid_tree failed %d\n", ret); + btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret); up(&fs_info->uuid_tree_rescan_sem); return ret; } @@ -3685,7 +3796,7 @@ int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { /* fs_info->update_uuid_tree_gen remains 0 in all error case */ - pr_warn("btrfs: failed to start uuid_scan task\n"); + btrfs_warn(fs_info, "failed to start uuid_scan task"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); } @@ -3701,7 +3812,7 @@ int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); if (IS_ERR(task)) { /* fs_info->update_uuid_tree_gen remains 0 in all error case */ - pr_warn("btrfs: failed to start uuid_rescan task\n"); + btrfs_warn(fs_info, "failed to start uuid_rescan task"); up(&fs_info->uuid_tree_rescan_sem); return PTR_ERR(task); } @@ -3858,7 +3969,8 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, u8 *ptr; array_size = btrfs_super_sys_array_size(super_copy); - if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) + if (array_size + item_size + sizeof(disk_key) + > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) return -EFBIG; ptr = super_copy->sys_chunk_array + array_size; @@ -3963,6 +4075,16 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) btrfs_set_fs_incompat(info, RAID56); } +#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ + - sizeof(struct btrfs_item) \ + - sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + +#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ + - 2 * sizeof(struct btrfs_disk_key) \ + - 2 * sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 start, u64 type) @@ -4012,6 +4134,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_DATA) { max_stripe_size = 1024 * 1024 * 1024; max_chunk_size = 10 * max_stripe_size; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { /* for larger filesystems, use larger metadata chunks */ if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) @@ -4019,11 +4143,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, else max_stripe_size = 256 * 1024 * 1024; max_chunk_size = max_stripe_size; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { max_stripe_size = 32 * 1024 * 1024; max_chunk_size = 2 * max_stripe_size; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS_SYS_CHUNK; } else { - printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", + btrfs_err(info, "invalid chunk type 0x%llx requested", type); BUG_ON(1); } @@ -4055,7 +4183,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (!device->writeable) { WARN(1, KERN_ERR - "btrfs: read-only device in alloc_list\n"); + "BTRFS: read-only device in alloc_list\n"); continue; } @@ -4190,9 +4318,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, em = alloc_extent_map(); if (!em) { + kfree(map); ret = -ENOMEM; goto error; } + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); em->bdev = (struct block_device *)map; em->start = start; em->len = num_bytes; @@ -4235,7 +4365,6 @@ error_del_extent: /* One for the tree reference */ free_extent_map(em); error: - kfree(map); kfree(devices_info); return ret; } @@ -4271,7 +4400,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, if (em->start != chunk_offset || em->len != chunk_size) { btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" - " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset, + " %Lu-%Lu, found %Lu-%Lu", chunk_offset, chunk_size, em->start, em->len); free_extent_map(em); return -EINVAL; @@ -4447,7 +4576,6 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) write_unlock(&tree->map_tree.lock); if (!em) break; - kfree(em->bdev); /* once for us */ free_extent_map(em); /* once for the tree */ @@ -4473,15 +4601,16 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) * and exit, so return 1 so the callers don't try to use other copies. */ if (!em) { - btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical, + btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical, logical+len); return 1; } if (em->start > logical || em->start + em->len < logical) { btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " - "%Lu-%Lu\n", logical, logical+len, em->start, + "%Lu-%Lu", logical, logical+len, em->start, em->start + em->len); + free_extent_map(em); return 1; } @@ -4660,8 +4789,9 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (em->start > logical || em->start + em->len < logical) { btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " - "found %Lu-%Lu\n", logical, em->start, + "found %Lu-%Lu", logical, em->start, em->start + em->len); + free_extent_map(em); return -EINVAL; } @@ -4889,7 +5019,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_stripes = map->num_stripes; max_errors = nr_parity_stripes(map); - raid_map = kmalloc(sizeof(u64) * num_stripes, + raid_map = kmalloc_array(num_stripes, sizeof(u64), GFP_NOFS); if (!raid_map) { ret = -ENOMEM; @@ -5181,13 +5311,13 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, read_unlock(&em_tree->lock); if (!em) { - printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", + printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n", chunk_start); return -EIO; } if (em->start != chunk_start) { - printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", + printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n", em->start, chunk_start); free_extent_map(em); return -EIO; @@ -5249,9 +5379,19 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } +static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) +{ + if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) + bio_endio_nodec(bio, err); + else + bio_endio(bio, err); + kfree(bbio); +} + static void btrfs_end_bio(struct bio *bio, int err) { struct btrfs_bio *bbio = bio->bi_private; + struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; if (err) { @@ -5259,7 +5399,6 @@ static void btrfs_end_bio(struct bio *bio, int err) if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; - struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; @@ -5281,11 +5420,14 @@ static void btrfs_end_bio(struct bio *bio, int err) if (bio == bbio->orig_bio) is_orig_bio = 1; + btrfs_bio_counter_dec(bbio->fs_info); + if (atomic_dec_and_test(&bbio->stripes_pending)) { if (!is_orig_bio) { bio_put(bio); bio = bbio->orig_bio; } + bio->bi_private = bbio->private; bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; @@ -5302,21 +5444,13 @@ static void btrfs_end_bio(struct bio *bio, int err) set_bit(BIO_UPTODATE, &bio->bi_flags); err = 0; } - kfree(bbio); - bio_endio(bio, err); + btrfs_end_bbio(bbio, bio, err); } else if (!is_orig_bio) { bio_put(bio); } } -struct async_sched { - struct bio *bio; - int rw; - struct btrfs_fs_info *info; - struct btrfs_work work; -}; - /* * see run_scheduled_bios for a description of why bios are collected for * async submit. @@ -5373,8 +5507,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, spin_unlock(&device->io_lock); if (should_queue) - btrfs_queue_worker(&root->fs_info->submit_workers, - &device->work); + btrfs_queue_work(root->fs_info->submit_workers, + &device->work); } static int bio_size_ok(struct block_device *bdev, struct bio *bio, @@ -5382,17 +5516,15 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, { struct bio_vec *prev; struct request_queue *q = bdev_get_queue(bdev); - unsigned short max_sectors = queue_max_sectors(q); + unsigned int max_sectors = queue_max_sectors(q); struct bvec_merge_data bvm = { .bi_bdev = bdev, .bi_sector = sector, .bi_rw = bio->bi_rw, }; - if (bio->bi_vcnt == 0) { - WARN_ON(1); + if (WARN_ON(bio->bi_vcnt == 0)) return 1; - } prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (bio_sectors(bio) > max_sectors) @@ -5401,7 +5533,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, if (!q->merge_bvec_fn) return 1; - bvm.bi_size = bio->bi_size - prev->bv_len; + bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len; if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) return 0; return 1; @@ -5416,7 +5548,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, bio->bi_private = bbio; btrfs_io_bio(bio)->stripe_index = dev_nr; bio->bi_end_io = btrfs_end_bio; - bio->bi_sector = physical >> 9; + bio->bi_iter.bi_sector = physical >> 9; #ifdef DEBUG { struct rcu_string *name; @@ -5431,6 +5563,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, } #endif bio->bi_bdev = dev->bdev; + + btrfs_bio_counter_inc_noblocked(root->fs_info); + if (async) btrfs_schedule_bio(root, dev, rw, bio); else @@ -5454,7 +5589,7 @@ again: while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset) < bvec->bv_len) { - u64 len = bio->bi_size; + u64 len = bio->bi_iter.bi_size; atomic_inc(&bbio->stripes_pending); submit_stripe_bio(root, bbio, bio, physical, dev_nr, @@ -5473,12 +5608,15 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) { atomic_inc(&bbio->error); if (atomic_dec_and_test(&bbio->stripes_pending)) { + /* Shoud be the original bio. */ + WARN_ON(bio != bbio->orig_bio); + bio->bi_private = bbio->private; bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; - bio->bi_sector = logical >> 9; - kfree(bbio); - bio_endio(bio, -EIO); + bio->bi_iter.bi_sector = logical >> 9; + + btrfs_end_bbio(bbio, bio, -EIO); } } @@ -5487,7 +5625,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, { struct btrfs_device *dev; struct bio *first_bio = bio; - u64 logical = (u64)bio->bi_sector << 9; + u64 logical = (u64)bio->bi_iter.bi_sector << 9; u64 length = 0; u64 map_length; u64 *raid_map = NULL; @@ -5496,31 +5634,41 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, int total_devs = 1; struct btrfs_bio *bbio = NULL; - length = bio->bi_size; + length = bio->bi_iter.bi_size; map_length = length; + btrfs_bio_counter_inc_blocked(root->fs_info); ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, mirror_num, &raid_map); - if (ret) /* -ENOMEM */ + if (ret) { + btrfs_bio_counter_dec(root->fs_info); return ret; + } total_devs = bbio->num_stripes; bbio->orig_bio = first_bio; bbio->private = first_bio->bi_private; bbio->end_io = first_bio->bi_end_io; + bbio->fs_info = root->fs_info; atomic_set(&bbio->stripes_pending, bbio->num_stripes); if (raid_map) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ if (rw & WRITE) { - return raid56_parity_write(root, bio, bbio, - raid_map, map_length); + ret = raid56_parity_write(root, bio, bbio, + raid_map, map_length); } else { - return raid56_parity_recover(root, bio, bbio, - raid_map, map_length, - mirror_num); + ret = raid56_parity_recover(root, bio, bbio, + raid_map, map_length, + mirror_num); } + /* + * FIXME, replace dosen't support raid56 yet, please fix + * it in the future. + */ + btrfs_bio_counter_dec(root->fs_info); + return ret; } if (map_length < length) { @@ -5555,6 +5703,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, BUG_ON(!bio); /* -ENOMEM */ } else { bio = first_bio; + bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; } submit_stripe_bio(root, bbio, bio, @@ -5562,6 +5711,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, async_submit); dev_nr++; } + btrfs_bio_counter_dec(root->fs_info); return 0; } @@ -5625,10 +5775,8 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, struct btrfs_device *dev; u64 tmp; - if (!devid && !fs_info) { - WARN_ON(1); + if (WARN_ON(!devid && !fs_info)) return ERR_PTR(-EINVAL); - } dev = __alloc_device(); if (IS_ERR(dev)) @@ -5652,7 +5800,7 @@ struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, else generate_random_uuid(dev->uuid); - dev->work.func = pending_bios_fn; + btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); return dev; } @@ -5697,6 +5845,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, return -ENOMEM; } + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); em->bdev = (struct block_device *)map; em->start = logical; em->len = length; @@ -5721,7 +5870,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, uuid, NULL); if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { - kfree(map); free_extent_map(em); return -EIO; } @@ -5729,7 +5877,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].dev = add_missing_dev(root, devid, uuid); if (!map->stripes[i].dev) { - kfree(map); free_extent_map(em); return -EIO; } @@ -6021,10 +6168,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; - mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) - device->dev_root = fs_info->dev_root; - mutex_unlock(&fs_devices->device_list_mutex); + while (fs_devices) { + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) + device->dev_root = fs_info->dev_root; + mutex_unlock(&fs_devices->device_list_mutex); + + fs_devices = fs_devices->seed; + } } static void __btrfs_reset_dev_stats(struct btrfs_device *dev) @@ -6115,7 +6266,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", + printk_in_rcu(KERN_WARNING "BTRFS: " + "error %d while searching for dev_stats item for device %s!\n", ret, rcu_str_deref(device->name)); goto out; } @@ -6125,7 +6277,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, /* need to delete old one and insert a new one */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", + printk_in_rcu(KERN_WARNING "BTRFS: " + "delete too small dev_stats item for device %s failed %d!\n", rcu_str_deref(device->name), ret); goto out; } @@ -6138,7 +6291,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", + printk_in_rcu(KERN_WARNING "BTRFS: " + "insert dev_stats item for device %s failed %d!\n", rcu_str_deref(device->name), ret); goto out; } @@ -6191,16 +6345,14 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) { if (!dev->dev_stats_valid) return; - printk_ratelimited_in_rcu(KERN_ERR - "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + printk_ratelimited_in_rcu(KERN_ERR "BTRFS: " + "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), - btrfs_dev_stat_read(dev, - BTRFS_DEV_STAT_CORRUPTION_ERRS), - btrfs_dev_stat_read(dev, - BTRFS_DEV_STAT_GENERATION_ERRS)); + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS)); } static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) @@ -6213,7 +6365,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) if (i == BTRFS_DEV_STAT_VALUES_MAX) return; /* all values == 0, suppress message */ - printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + printk_in_rcu(KERN_INFO "BTRFS: " + "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), @@ -6234,12 +6387,10 @@ int btrfs_get_dev_stats(struct btrfs_root *root, mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { - printk(KERN_WARNING - "btrfs: get dev_stats failed, device not found\n"); + btrfs_warn(root->fs_info, "get dev_stats failed, device not found"); return -ENODEV; } else if (!dev->dev_stats_valid) { - printk(KERN_WARNING - "btrfs: get dev_stats failed, not yet valid\n"); + btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid"); return -ENODEV; } else if (stats->flags & BTRFS_DEV_STATS_RESET) { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { |
