diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
| -rw-r--r-- | fs/btrfs/volumes.c | 988 |
1 files changed, 727 insertions, 261 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 78b871753cb..6cb82f62cb7 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -26,8 +26,8 @@ #include <linux/ratelimit.h> #include <linux/kthread.h> #include <linux/raid/pq.h> +#include <linux/semaphore.h> #include <asm/div64.h> -#include "compat.h" #include "ctree.h" #include "extent_map.h" #include "disk-io.h" @@ -40,6 +40,7 @@ #include "rcu-string.h" #include "math.h" #include "dev-replace.h" +#include "sysfs.h" static int init_first_rw_device(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -62,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root) mutex_unlock(&root->fs_info->chunk_mutex); } +static struct btrfs_fs_devices *__alloc_fs_devices(void) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS); + if (!fs_devs) + return ERR_PTR(-ENOMEM); + + mutex_init(&fs_devs->device_list_mutex); + + INIT_LIST_HEAD(&fs_devs->devices); + INIT_LIST_HEAD(&fs_devs->alloc_list); + INIT_LIST_HEAD(&fs_devs->list); + + return fs_devs; +} + +/** + * alloc_fs_devices - allocate struct btrfs_fs_devices + * @fsid: a pointer to UUID for this FS. If NULL a new UUID is + * generated. + * + * Return: a pointer to a new &struct btrfs_fs_devices on success; + * ERR_PTR() on error. Returned struct is not linked onto any lists and + * can be destroyed with kfree() right away. + */ +static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid) +{ + struct btrfs_fs_devices *fs_devs; + + fs_devs = __alloc_fs_devices(); + if (IS_ERR(fs_devs)) + return fs_devs; + + if (fsid) + memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE); + else + generate_random_uuid(fs_devs->fsid); + + return fs_devs; +} + static void free_fs_devices(struct btrfs_fs_devices *fs_devices) { struct btrfs_device *device; @@ -83,7 +126,7 @@ static void btrfs_kobject_uevent(struct block_device *bdev, ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action); if (ret) - pr_warn("Sending event '%d' to kobject: '%s' (%p): failed\n", + pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n", action, kobject_name(&disk_to_dev(bdev->bd_disk)->kobj), &disk_to_dev(bdev->bd_disk)->kobj); @@ -101,6 +144,27 @@ void btrfs_cleanup_fs_uuids(void) } } +static struct btrfs_device *__alloc_device(void) +{ + struct btrfs_device *dev; + + dev = kzalloc(sizeof(*dev), GFP_NOFS); + if (!dev) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&dev->dev_list); + INIT_LIST_HEAD(&dev->dev_alloc_list); + + spin_lock_init(&dev->io_lock); + + spin_lock_init(&dev->reada_lock); + atomic_set(&dev->reada_in_flight, 0); + INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT); + INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT); + + return dev; +} + static noinline struct btrfs_device *__find_device(struct list_head *head, u64 devid, u8 *uuid) { @@ -137,7 +201,7 @@ btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder, if (IS_ERR(*bdev)) { ret = PTR_ERR(*bdev); - printk(KERN_INFO "btrfs: open %s failed\n", device_path); + printk(KERN_INFO "BTRFS: open %s failed\n", device_path); goto error; } @@ -352,7 +416,8 @@ loop_lock: device->running_pending = 1; spin_unlock(&device->io_lock); - btrfs_requeue_work(&device->work); + btrfs_queue_work(fs_info->submit_workers, + &device->work); goto done; } /* unplug every 64 requests just for good measure */ @@ -384,6 +449,14 @@ static void pending_bios_fn(struct btrfs_work *work) run_scheduled_bios(device); } +/* + * Add new device to list of registered devices + * + * Returns: + * 1 - first time device is seen + * 0 - device already known + * < 0 - error + */ static noinline int device_list_add(const char *path, struct btrfs_super_block *disk_super, u64 devid, struct btrfs_fs_devices **fs_devices_ret) @@ -391,20 +464,19 @@ static noinline int device_list_add(const char *path, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices; struct rcu_string *name; + int ret = 0; u64 found_transid = btrfs_super_generation(disk_super); fs_devices = find_fsid(disk_super->fsid); if (!fs_devices) { - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return -ENOMEM; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); + fs_devices = alloc_fs_devices(disk_super->fsid); + if (IS_ERR(fs_devices)) + return PTR_ERR(fs_devices); + list_add(&fs_devices->list, &fs_uuids); - memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE); fs_devices->latest_devid = devid; fs_devices->latest_trans = found_transid; - mutex_init(&fs_devices->device_list_mutex); + device = NULL; } else { device = __find_device(&fs_devices->devices, devid, @@ -414,17 +486,12 @@ static noinline int device_list_add(const char *path, if (fs_devices->opened) return -EBUSY; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(NULL, &devid, + disk_super->dev_item.uuid); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - return -ENOMEM; + return PTR_ERR(device); } - device->devid = devid; - device->dev_stats_valid = 0; - device->work.func = pending_bios_fn; - memcpy(device->uuid, disk_super->dev_item.uuid, - BTRFS_UUID_SIZE); - spin_lock_init(&device->io_lock); name = rcu_string_strdup(path, GFP_NOFS); if (!name) { @@ -432,22 +499,14 @@ static noinline int device_list_add(const char *path, return -ENOMEM; } rcu_assign_pointer(device->name, name); - INIT_LIST_HEAD(&device->dev_alloc_list); - - /* init readahead state */ - spin_lock_init(&device->reada_lock); - device->reada_curr_zone = NULL; - atomic_set(&device->reada_in_flight, 0); - device->reada_next = 0; - INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT); - INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT); mutex_lock(&fs_devices->device_list_mutex); list_add_rcu(&device->dev_list, &fs_devices->devices); + fs_devices->num_devices++; mutex_unlock(&fs_devices->device_list_mutex); + ret = 1; device->fs_devices = fs_devices; - fs_devices->num_devices++; } else if (!device->name || strcmp(device->name->str, path)) { name = rcu_string_strdup(path, GFP_NOFS); if (!name) @@ -465,7 +524,8 @@ static noinline int device_list_add(const char *path, fs_devices->latest_trans = found_transid; } *fs_devices_ret = fs_devices; - return 0; + + return ret; } static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) @@ -474,44 +534,35 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig) struct btrfs_device *device; struct btrfs_device *orig_dev; - fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!fs_devices) - return ERR_PTR(-ENOMEM); + fs_devices = alloc_fs_devices(orig->fsid); + if (IS_ERR(fs_devices)) + return fs_devices; - INIT_LIST_HEAD(&fs_devices->devices); - INIT_LIST_HEAD(&fs_devices->alloc_list); - INIT_LIST_HEAD(&fs_devices->list); - mutex_init(&fs_devices->device_list_mutex); fs_devices->latest_devid = orig->latest_devid; fs_devices->latest_trans = orig->latest_trans; fs_devices->total_devices = orig->total_devices; - memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid)); /* We have held the volume lock, it is safe to get the devices. */ list_for_each_entry(orig_dev, &orig->devices, dev_list) { struct rcu_string *name; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &orig_dev->devid, + orig_dev->uuid); + if (IS_ERR(device)) goto error; /* * This is ok to do without rcu read locked because we hold the * uuid mutex so nothing we touch in here is going to disappear. */ - name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); - if (!name) { - kfree(device); - goto error; + if (orig_dev->name) { + name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS); + if (!name) { + kfree(device); + goto error; + } + rcu_assign_pointer(device->name, name); } - rcu_assign_pointer(device->name, name); - - device->devid = orig_dev->devid; - device->work.func = pending_bios_fn; - memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid)); - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_list); - INIT_LIST_HEAD(&device->dev_alloc_list); list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; @@ -629,30 +680,30 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) if (device->bdev) fs_devices->open_devices--; - if (device->writeable && !device->is_tgtdev_for_dev_replace) { + if (device->writeable && + device->devid != BTRFS_DEV_REPLACE_DEVID) { list_del_init(&device->dev_alloc_list); fs_devices->rw_devices--; } if (device->can_discard) fs_devices->num_can_discard--; + if (device->missing) + fs_devices->missing_devices--; - new_device = kmalloc(sizeof(*new_device), GFP_NOFS); - BUG_ON(!new_device); /* -ENOMEM */ - memcpy(new_device, device, sizeof(*new_device)); + new_device = btrfs_alloc_device(NULL, &device->devid, + device->uuid); + BUG_ON(IS_ERR(new_device)); /* -ENOMEM */ /* Safe because we are under uuid_mutex */ if (device->name) { name = rcu_string_strdup(device->name->str, GFP_NOFS); - BUG_ON(device->name && !name); /* -ENOMEM */ + BUG_ON(!name); /* -ENOMEM */ rcu_assign_pointer(new_device->name, name); } - new_device->bdev = NULL; - new_device->writeable = 0; - new_device->in_fs_metadata = 0; - new_device->can_discard = 0; - spin_lock_init(&new_device->io_lock); + list_replace_rcu(&device->dev_list, &new_device->dev_list); + new_device->fs_devices = device->fs_devices; call_rcu(&device->rcu, free_device); } @@ -760,7 +811,8 @@ static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, fs_devices->rotating = 1; fs_devices->open_devices++; - if (device->writeable && !device->is_tgtdev_for_dev_replace) { + if (device->writeable && + device->devid != BTRFS_DEV_REPLACE_DEVID) { fs_devices->rw_devices++; list_add(&device->dev_alloc_list, &fs_devices->alloc_list); @@ -865,25 +917,26 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder, disk_super = p + (bytenr & ~PAGE_CACHE_MASK); if (btrfs_super_bytenr(disk_super) != bytenr || - disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) + btrfs_super_magic(disk_super) != BTRFS_MAGIC) goto error_unmap; devid = btrfs_stack_device_id(&disk_super->dev_item); transid = btrfs_super_generation(disk_super); total_devices = btrfs_super_num_devices(disk_super); - if (disk_super->label[0]) { - if (disk_super->label[BTRFS_LABEL_SIZE - 1]) - disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; - printk(KERN_INFO "device label %s ", disk_super->label); - } else { - printk(KERN_INFO "device fsid %pU ", disk_super->fsid); - } - - printk(KERN_CONT "devid %llu transid %llu %s\n", - (unsigned long long)devid, (unsigned long long)transid, path); - ret = device_list_add(path, disk_super, devid, fs_devices_ret); + if (ret > 0) { + if (disk_super->label[0]) { + if (disk_super->label[BTRFS_LABEL_SIZE - 1]) + disk_super->label[BTRFS_LABEL_SIZE - 1] = '\0'; + printk(KERN_INFO "BTRFS: device label %s ", disk_super->label); + } else { + printk(KERN_INFO "BTRFS: device fsid %pU ", disk_super->fsid); + } + + printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path); + ret = 0; + } if (!ret && fs_devices_ret) (*fs_devices_ret)->total_devices = total_devices; @@ -1278,8 +1331,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset); write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, - (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent), - BTRFS_UUID_SIZE); + btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE); btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); @@ -1307,15 +1359,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info) return ret; } -static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) +static noinline int find_next_devid(struct btrfs_fs_info *fs_info, + u64 *devid_ret) { int ret; struct btrfs_key key; struct btrfs_key found_key; struct btrfs_path *path; - root = root->fs_info->chunk_root; - path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -1324,20 +1375,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) key.type = BTRFS_DEV_ITEM_KEY; key.offset = (u64)-1; - ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0); if (ret < 0) goto error; BUG_ON(ret == 0); /* Corruption */ - ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID, + ret = btrfs_previous_item(fs_info->chunk_root, path, + BTRFS_DEV_ITEMS_OBJECTID, BTRFS_DEV_ITEM_KEY); if (ret) { - *objectid = 1; + *devid_ret = 1; } else { btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); - *objectid = found_key.offset + 1; + *devid_ret = found_key.offset + 1; } ret = 0; error: @@ -1391,9 +1443,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans, btrfs_set_device_bandwidth(leaf, dev_item, 0); btrfs_set_device_start_offset(leaf, dev_item, 0); - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); - ptr = (unsigned long)btrfs_device_fsid(dev_item); + ptr = btrfs_device_fsid(dev_item); write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); @@ -1403,6 +1455,22 @@ out: return ret; } +/* + * Function to update ctime/mtime for a given device path. + * Mainly used for ctime/mtime based probe like libblkid. + */ +static void update_dev_time(char *path_name) +{ + struct file *filp; + + filp = filp_open(path_name, O_RDWR, 0); + if (!filp) + return; + file_update_time(filp); + filp_close(filp, NULL); + return; +} + static int btrfs_rm_dev_item(struct btrfs_root *root, struct btrfs_device *device) { @@ -1562,7 +1630,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) clear_super = true; } + mutex_unlock(&uuid_mutex); ret = btrfs_shrink_device(device, 0); + mutex_lock(&uuid_mutex); if (ret) goto error_undo; @@ -1586,7 +1656,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) /* * the device list mutex makes sure that we don't change * the device list while someone else is writing out all - * the device supers. + * the device supers. Whoever is writing all supers, should + * lock the device list mutex before getting the number of + * devices in the super block (super_copy). Conversely, + * whoever updates the number of devices in the super block + * (super_copy) should hold the device list mutex. */ cur_devices = device->fs_devices; @@ -1606,24 +1680,28 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (device->bdev == root->fs_info->fs_devices->latest_bdev) root->fs_info->fs_devices->latest_bdev = next_device->bdev; - if (device->bdev) + if (device->bdev) { device->fs_devices->open_devices--; + /* remove sysfs entry */ + btrfs_kobj_rm_device(root->fs_info, device); + } call_rcu(&device->rcu, free_device); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1; btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (cur_devices->open_devices == 0) { struct btrfs_fs_devices *fs_devices; fs_devices = root->fs_info->fs_devices; while (fs_devices) { - if (fs_devices->seed == cur_devices) + if (fs_devices->seed == cur_devices) { + fs_devices->seed = cur_devices->seed; break; + } fs_devices = fs_devices->seed; } - fs_devices->seed = cur_devices->seed; cur_devices->seed = NULL; lock_chunks(root); __btrfs_close_devices(cur_devices); @@ -1639,20 +1717,55 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) * remove it from the devices list and zero out the old super */ if (clear_super && disk_super) { + u64 bytenr; + int i; + /* make sure this device isn't detected as part of * the FS anymore */ memset(&disk_super->magic, 0, sizeof(disk_super->magic)); set_buffer_dirty(bh); sync_dirty_buffer(bh); + + /* clear the mirror copies of super block on the disk + * being removed, 0th copy is been taken care above and + * the below would take of the rest + */ + for (i = 1; i < BTRFS_SUPER_MIRROR_MAX; i++) { + bytenr = btrfs_sb_offset(i); + if (bytenr + BTRFS_SUPER_INFO_SIZE >= + i_size_read(bdev->bd_inode)) + break; + + brelse(bh); + bh = __bread(bdev, bytenr / 4096, + BTRFS_SUPER_INFO_SIZE); + if (!bh) + continue; + + disk_super = (struct btrfs_super_block *)bh->b_data; + + if (btrfs_super_bytenr(disk_super) != bytenr || + btrfs_super_magic(disk_super) != BTRFS_MAGIC) { + continue; + } + memset(&disk_super->magic, 0, + sizeof(disk_super->magic)); + set_buffer_dirty(bh); + sync_dirty_buffer(bh); + } } ret = 0; - /* Notify udev that device has changed */ - if (bdev) + if (bdev) { + /* Notify udev that device has changed */ btrfs_kobject_uevent(bdev, KOBJ_CHANGE); + /* Update ctime/mtime for device path for libblkid */ + update_dev_time(device_path); + } + error_brelse: brelse(bh); if (bdev) @@ -1675,6 +1788,7 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, struct btrfs_device *srcdev) { WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex)); + list_del_rcu(&srcdev->dev_list); list_del_rcu(&srcdev->dev_alloc_list); fs_info->fs_devices->num_devices--; @@ -1684,9 +1798,13 @@ void btrfs_rm_dev_replace_srcdev(struct btrfs_fs_info *fs_info, } if (srcdev->can_discard) fs_info->fs_devices->num_can_discard--; - if (srcdev->bdev) + if (srcdev->bdev) { fs_info->fs_devices->open_devices--; + /* zero out the old super */ + btrfs_scratch_superblock(srcdev); + } + call_rcu(&srcdev->rcu, free_device); } @@ -1767,7 +1885,7 @@ int btrfs_find_device_missing_or_by_path(struct btrfs_root *root, } if (!*device) { - pr_err("btrfs: no missing device found\n"); + btrfs_err(root->fs_info, "no missing device found"); return -ENOENT; } @@ -1793,9 +1911,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) if (!fs_devices->seeding) return -EINVAL; - seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); - if (!seed_devices) - return -ENOMEM; + seed_devices = __alloc_fs_devices(); + if (IS_ERR(seed_devices)) + return PTR_ERR(seed_devices); old_devices = clone_fs_devices(fs_devices); if (IS_ERR(old_devices)) { @@ -1814,7 +1932,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) mutex_lock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices, synchronize_rcu); - mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list); list_for_each_entry(device, &seed_devices->devices, dev_list) { @@ -1824,12 +1941,13 @@ static int btrfs_prepare_sprout(struct btrfs_root *root) fs_devices->seeding = 0; fs_devices->num_devices = 0; fs_devices->open_devices = 0; - fs_devices->total_devices = 0; fs_devices->seed = seed_devices; generate_random_uuid(fs_devices->fsid); memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); + super_flags = btrfs_super_flags(disk_super) & ~BTRFS_SUPER_FLAG_SEEDING; btrfs_set_super_flags(disk_super, super_flags); @@ -1889,11 +2007,9 @@ next_slot: dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); device = btrfs_find_device(root->fs_info, devid, dev_uuid, fs_uuid); @@ -1956,10 +2072,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { + device = btrfs_alloc_device(root->fs_info, NULL, NULL); + if (IS_ERR(device)) { /* we can safely leave the fs_devices entry around */ - ret = -ENOMEM; + ret = PTR_ERR(device); goto error; } @@ -1971,13 +2087,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) } rcu_assign_pointer(device->name, name); - ret = find_next_devid(root, &device->devid); - if (ret) { - rcu_string_free(device->name); - kfree(device); - goto error; - } - trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { rcu_string_free(device->name); @@ -1992,9 +2101,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (blk_queue_discard(q)) device->can_discard = 1; device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2006,6 +2112,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 0; device->mode = FMODE_EXCL; + device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); if (seeding_dev) { @@ -2042,9 +2149,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) total_bytes = btrfs_super_num_devices(root->fs_info->super_copy); btrfs_set_super_num_devices(root->fs_info->super_copy, total_bytes + 1); + + /* add sysfs device entry */ + btrfs_kobj_add_device(root->fs_info, device); + mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); if (seeding_dev) { + char fsid_buf[BTRFS_UUID_UNPARSED_SIZE]; ret = init_first_rw_device(trans, root, device); if (ret) { btrfs_abort_transaction(trans, root, ret); @@ -2055,6 +2167,14 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_abort_transaction(trans, root, ret); goto error_trans; } + + /* Sprouting would change fsid of the mounted root, + * so rename the fsid on the sysfs + */ + snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU", + root->fs_info->fsid); + if (kobject_rename(&root->fs_info->super_kobj, fsid_buf)) + goto error_trans; } else { ret = btrfs_add_device(trans, root, device); if (ret) { @@ -2096,12 +2216,15 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) ret = btrfs_commit_transaction(trans, root); } + /* Update ctime/mtime for libblkid */ + update_dev_time(device_path); return ret; error_trans: unlock_chunks(root); btrfs_end_transaction(trans, root); rcu_string_free(device->name); + btrfs_kobj_rm_device(root->fs_info, device); kfree(device); error: blkdev_put(bdev, FMODE_EXCL); @@ -2121,6 +2244,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, struct btrfs_fs_info *fs_info = root->fs_info; struct list_head *devices; struct rcu_string *name; + u64 devid = BTRFS_DEV_REPLACE_DEVID; int ret = 0; *device_out = NULL; @@ -2142,9 +2266,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, } } - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) { - ret = -ENOMEM; + device = btrfs_alloc_device(NULL, &devid, NULL); + if (IS_ERR(device)) { + ret = PTR_ERR(device); goto error; } @@ -2161,10 +2285,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->can_discard = 1; mutex_lock(&root->fs_info->fs_devices->device_list_mutex); device->writeable = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - device->devid = BTRFS_DEV_REPLACE_DEVID; - spin_lock_init(&device->io_lock); device->generation = 0; device->io_width = root->sectorsize; device->io_align = root->sectorsize; @@ -2176,6 +2296,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path, device->in_fs_metadata = 1; device->is_tgtdev_for_dev_replace = 1; device->mode = FMODE_EXCL; + device->dev_stats_valid = 1; set_blocksize(device->bdev, 4096); device->fs_devices = fs_info->fs_devices; list_add(&device->dev_list, &fs_info->fs_devices->devices); @@ -2442,9 +2563,6 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, remove_extent_mapping(em_tree, em); write_unlock(&em_tree->lock); - kfree(map); - em->bdev = NULL; - /* once for the tree */ free_extent_map(em); /* once for us */ @@ -2518,8 +2636,7 @@ again: failed = 0; retried = true; goto again; - } else if (failed && retried) { - WARN_ON(1); + } else if (WARN_ON(failed && retried)) { ret = -ENOSPC; } error: @@ -2875,6 +2992,16 @@ static int should_balance_chunk(struct btrfs_root *root, return 0; } + /* + * limited by count, must be the last filter + */ + if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) { + if (bargs->limit == 0) + return 0; + else + bargs->limit--; + } + return 1; } @@ -2897,6 +3024,9 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) int ret; int enospc_errors = 0; bool counting = true; + u64 limit_data = bctl->data.limit; + u64 limit_meta = bctl->meta.limit; + u64 limit_sys = bctl->sys.limit; /* step one make some room on all the devices */ devices = &fs_info->fs_devices->devices; @@ -2935,6 +3065,11 @@ static int __btrfs_balance(struct btrfs_fs_info *fs_info) memset(&bctl->stat, 0, sizeof(bctl->stat)); spin_unlock(&fs_info->balance_lock); again: + if (!counting) { + bctl->data.limit = limit_data; + bctl->meta.limit = limit_meta; + bctl->sys.limit = limit_sys; + } key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; key.offset = (u64)-1; key.type = BTRFS_CHUNK_ITEM_KEY; @@ -2971,10 +3106,6 @@ again: if (found_key.objectid != key.objectid) break; - /* chunk zero is special */ - if (found_key.offset == 0) - break; - chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); if (!counting) { @@ -3010,6 +3141,8 @@ again: spin_unlock(&fs_info->balance_lock); } loop: + if (found_key.offset == 0) + break; key.offset = found_key.offset - 1; } @@ -3021,7 +3154,7 @@ loop: error: btrfs_free_path(path); if (enospc_errors) { - printk(KERN_INFO "btrfs: %d enospc errors during balance\n", + btrfs_info(fs_info, "%d enospc errors during balance", enospc_errors); if (!ret) ret = -ENOSPC; @@ -3074,9 +3207,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info) atomic_set(&fs_info->mutually_exclusive_operation_running, 0); } -void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, - struct btrfs_ioctl_balance_args *bargs); - /* * Should be called with both balance and volume mutexes held */ @@ -3110,8 +3240,8 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if (!(bctl->flags & BTRFS_BALANCE_DATA) || !(bctl->flags & BTRFS_BALANCE_METADATA) || memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) { - printk(KERN_ERR "btrfs: with mixed groups data and " - "metadata balance options must be the same\n"); + btrfs_err(fs_info, "with mixed groups data and " + "metadata balance options must be the same"); ret = -EINVAL; goto out; } @@ -3137,27 +3267,27 @@ int btrfs_balance(struct btrfs_balance_control *bctl, if ((bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && (!alloc_profile_is_valid(bctl->data.target, 1) || (bctl->data.target & ~allowed))) { - printk(KERN_ERR "btrfs: unable to start balance with target " - "data profile %llu\n", - (unsigned long long)bctl->data.target); + btrfs_err(fs_info, "unable to start balance with target " + "data profile %llu", + bctl->data.target); ret = -EINVAL; goto out; } if ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) && (!alloc_profile_is_valid(bctl->meta.target, 1) || (bctl->meta.target & ~allowed))) { - printk(KERN_ERR "btrfs: unable to start balance with target " - "metadata profile %llu\n", - (unsigned long long)bctl->meta.target); + btrfs_err(fs_info, + "unable to start balance with target metadata profile %llu", + bctl->meta.target); ret = -EINVAL; goto out; } if ((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) && (!alloc_profile_is_valid(bctl->sys.target, 1) || (bctl->sys.target & ~allowed))) { - printk(KERN_ERR "btrfs: unable to start balance with target " - "system profile %llu\n", - (unsigned long long)bctl->sys.target); + btrfs_err(fs_info, + "unable to start balance with target system profile %llu", + bctl->sys.target); ret = -EINVAL; goto out; } @@ -3165,7 +3295,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl, /* allow dup'ed data chunks only in mixed mode */ if (!mixed && (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) && (bctl->data.target & BTRFS_BLOCK_GROUP_DUP)) { - printk(KERN_ERR "btrfs: dup for data is not allowed\n"); + btrfs_err(fs_info, "dup for data is not allowed"); ret = -EINVAL; goto out; } @@ -3185,11 +3315,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl, (fs_info->avail_metadata_alloc_bits & allowed) && !(bctl->meta.target & allowed))) { if (bctl->flags & BTRFS_BALANCE_FORCE) { - printk(KERN_INFO "btrfs: force reducing metadata " - "integrity\n"); + btrfs_info(fs_info, "force reducing metadata integrity"); } else { - printk(KERN_ERR "btrfs: balance will reduce metadata " - "integrity, use force if you want this\n"); + btrfs_err(fs_info, "balance will reduce metadata " + "integrity, use force if you want this"); ret = -EINVAL; goto out; } @@ -3275,7 +3404,7 @@ static int balance_kthread(void *data) mutex_lock(&fs_info->balance_mutex); if (fs_info->balance_ctl) { - printk(KERN_INFO "btrfs: continuing balance\n"); + btrfs_info(fs_info, "continuing balance"); ret = btrfs_balance(fs_info->balance_ctl, NULL); } @@ -3297,12 +3426,12 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) spin_unlock(&fs_info->balance_lock); if (btrfs_test_opt(fs_info->tree_root, SKIP_BALANCE)) { - printk(KERN_INFO "btrfs: force skipping balance\n"); + btrfs_info(fs_info, "force skipping balance"); return 0; } tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); - return PTR_RET(tsk); + return PTR_ERR_OR_ZERO(tsk); } int btrfs_recover_balance(struct btrfs_fs_info *fs_info) @@ -3396,6 +3525,9 @@ int btrfs_pause_balance(struct btrfs_fs_info *fs_info) int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) { + if (fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; + mutex_lock(&fs_info->balance_mutex); if (!fs_info->balance_ctl) { mutex_unlock(&fs_info->balance_mutex); @@ -3430,6 +3562,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info) return 0; } +static int btrfs_uuid_scan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = data; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_key key; + struct btrfs_key max_key; + struct btrfs_path *path = NULL; + int ret = 0; + struct extent_buffer *eb; + int slot; + struct btrfs_root_item root_item; + u32 item_size; + struct btrfs_trans_handle *trans = NULL; + + path = btrfs_alloc_path(); + if (!path) { + ret = -ENOMEM; + goto out; + } + + key.objectid = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = 0; + + max_key.objectid = (u64)-1; + max_key.type = BTRFS_ROOT_ITEM_KEY; + max_key.offset = (u64)-1; + + path->keep_locks = 1; + + while (1) { + ret = btrfs_search_forward(root, &key, path, 0); + if (ret) { + if (ret > 0) + ret = 0; + break; + } + + if (key.type != BTRFS_ROOT_ITEM_KEY || + (key.objectid < BTRFS_FIRST_FREE_OBJECTID && + key.objectid != BTRFS_FS_TREE_OBJECTID) || + key.objectid > BTRFS_LAST_FREE_OBJECTID) + goto skip; + + eb = path->nodes[0]; + slot = path->slots[0]; + item_size = btrfs_item_size_nr(eb, slot); + if (item_size < sizeof(root_item)) + goto skip; + + read_extent_buffer(eb, &root_item, + btrfs_item_ptr_offset(eb, slot), + (int)sizeof(root_item)); + if (btrfs_root_refs(&root_item) == 0) + goto skip; + + if (!btrfs_is_empty_uuid(root_item.uuid) || + !btrfs_is_empty_uuid(root_item.received_uuid)) { + if (trans) + goto update_tree; + + btrfs_release_path(path); + /* + * 1 - subvol uuid item + * 1 - received_subvol uuid item + */ + trans = btrfs_start_transaction(fs_info->uuid_root, 2); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + break; + } + continue; + } else { + goto skip; + } +update_tree: + if (!btrfs_is_empty_uuid(root_item.uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.uuid, + BTRFS_UUID_KEY_SUBVOL, + key.objectid); + if (ret < 0) { + btrfs_warn(fs_info, "uuid_tree_add failed %d", + ret); + break; + } + } + + if (!btrfs_is_empty_uuid(root_item.received_uuid)) { + ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, + root_item.received_uuid, + BTRFS_UUID_KEY_RECEIVED_SUBVOL, + key.objectid); + if (ret < 0) { + btrfs_warn(fs_info, "uuid_tree_add failed %d", + ret); + break; + } + } + +skip: + if (trans) { + ret = btrfs_end_transaction(trans, fs_info->uuid_root); + trans = NULL; + if (ret) + break; + } + + btrfs_release_path(path); + if (key.offset < (u64)-1) { + key.offset++; + } else if (key.type < BTRFS_ROOT_ITEM_KEY) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + } else if (key.objectid < (u64)-1) { + key.offset = 0; + key.type = BTRFS_ROOT_ITEM_KEY; + key.objectid++; + } else { + break; + } + cond_resched(); + } + +out: + btrfs_free_path(path); + if (trans && !IS_ERR(trans)) + btrfs_end_transaction(trans, fs_info->uuid_root); + if (ret) + btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret); + else + fs_info->update_uuid_tree_gen = 1; + up(&fs_info->uuid_tree_rescan_sem); + return 0; +} + +/* + * Callback for btrfs_uuid_tree_iterate(). + * returns: + * 0 check succeeded, the entry is not outdated. + * < 0 if an error occured. + * > 0 if the check failed, which means the caller shall remove the entry. + */ +static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info, + u8 *uuid, u8 type, u64 subid) +{ + struct btrfs_key key; + int ret = 0; + struct btrfs_root *subvol_root; + + if (type != BTRFS_UUID_KEY_SUBVOL && + type != BTRFS_UUID_KEY_RECEIVED_SUBVOL) + goto out; + + key.objectid = subid; + key.type = BTRFS_ROOT_ITEM_KEY; + key.offset = (u64)-1; + subvol_root = btrfs_read_fs_root_no_name(fs_info, &key); + if (IS_ERR(subvol_root)) { + ret = PTR_ERR(subvol_root); + if (ret == -ENOENT) + ret = 1; + goto out; + } + + switch (type) { + case BTRFS_UUID_KEY_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE)) + ret = 1; + break; + case BTRFS_UUID_KEY_RECEIVED_SUBVOL: + if (memcmp(uuid, subvol_root->root_item.received_uuid, + BTRFS_UUID_SIZE)) + ret = 1; + break; + } + +out: + return ret; +} + +static int btrfs_uuid_rescan_kthread(void *data) +{ + struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data; + int ret; + + /* + * 1st step is to iterate through the existing UUID tree and + * to delete all entries that contain outdated data. + * 2nd step is to add all missing entries to the UUID tree. + */ + ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry); + if (ret < 0) { + btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret); + up(&fs_info->uuid_tree_rescan_sem); + return ret; + } + return btrfs_uuid_scan_kthread(data); +} + +int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct btrfs_trans_handle *trans; + struct btrfs_root *tree_root = fs_info->tree_root; + struct btrfs_root *uuid_root; + struct task_struct *task; + int ret; + + /* + * 1 - root node + * 1 - root item + */ + trans = btrfs_start_transaction(tree_root, 2); + if (IS_ERR(trans)) + return PTR_ERR(trans); + + uuid_root = btrfs_create_tree(trans, fs_info, + BTRFS_UUID_TREE_OBJECTID); + if (IS_ERR(uuid_root)) { + btrfs_abort_transaction(trans, tree_root, + PTR_ERR(uuid_root)); + return PTR_ERR(uuid_root); + } + + fs_info->uuid_root = uuid_root; + + ret = btrfs_commit_transaction(trans, tree_root); + if (ret) + return ret; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + btrfs_warn(fs_info, "failed to start uuid_scan task"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + +int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info) +{ + struct task_struct *task; + + down(&fs_info->uuid_tree_rescan_sem); + task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid"); + if (IS_ERR(task)) { + /* fs_info->update_uuid_tree_gen remains 0 in all error case */ + btrfs_warn(fs_info, "failed to start uuid_rescan task"); + up(&fs_info->uuid_tree_rescan_sem); + return PTR_ERR(task); + } + + return 0; +} + /* * shrinking a device means finding all of the device extents past * the new size, and then following the back refs to the chunks. @@ -3579,7 +3969,8 @@ static int btrfs_add_system_chunk(struct btrfs_root *root, u8 *ptr; array_size = btrfs_super_sys_array_size(super_copy); - if (array_size + item_size > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) + if (array_size + item_size + sizeof(disk_key) + > BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) return -EFBIG; ptr = super_copy->sys_chunk_array + array_size; @@ -3684,6 +4075,16 @@ static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type) btrfs_set_fs_incompat(info, RAID56); } +#define BTRFS_MAX_DEVS(r) ((BTRFS_LEAF_DATA_SIZE(r) \ + - sizeof(struct btrfs_item) \ + - sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + +#define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \ + - 2 * sizeof(struct btrfs_disk_key) \ + - 2 * sizeof(struct btrfs_chunk)) \ + / sizeof(struct btrfs_stripe) + 1) + static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, struct btrfs_root *extent_root, u64 start, u64 type) @@ -3733,6 +4134,8 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (type & BTRFS_BLOCK_GROUP_DATA) { max_stripe_size = 1024 * 1024 * 1024; max_chunk_size = 10 * max_stripe_size; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_METADATA) { /* for larger filesystems, use larger metadata chunks */ if (fs_devices->total_rw_bytes > 50ULL * 1024 * 1024 * 1024) @@ -3740,11 +4143,15 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, else max_stripe_size = 256 * 1024 * 1024; max_chunk_size = max_stripe_size; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS(info->chunk_root); } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) { max_stripe_size = 32 * 1024 * 1024; max_chunk_size = 2 * max_stripe_size; + if (!devs_max) + devs_max = BTRFS_MAX_DEVS_SYS_CHUNK; } else { - printk(KERN_ERR "btrfs: invalid chunk type 0x%llx requested\n", + btrfs_err(info, "invalid chunk type 0x%llx requested", type); BUG_ON(1); } @@ -3776,7 +4183,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, if (!device->writeable) { WARN(1, KERN_ERR - "btrfs: read-only device in alloc_list\n"); + "BTRFS: read-only device in alloc_list\n"); continue; } @@ -3911,9 +4318,11 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, em = alloc_extent_map(); if (!em) { + kfree(map); ret = -ENOMEM; goto error; } + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); em->bdev = (struct block_device *)map; em->start = start; em->len = num_bytes; @@ -3956,7 +4365,6 @@ error_del_extent: /* One for the tree reference */ free_extent_map(em); error: - kfree(map); kfree(devices_info); return ret; } @@ -3992,7 +4400,7 @@ int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans, if (em->start != chunk_offset || em->len != chunk_size) { btrfs_crit(extent_root->fs_info, "found a bad mapping, wanted" - " %Lu-%Lu, found %Lu-%Lu\n", chunk_offset, + " %Lu-%Lu, found %Lu-%Lu", chunk_offset, chunk_size, em->start, em->len); free_extent_map(em); return -EINVAL; @@ -4168,7 +4576,6 @@ void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree) write_unlock(&tree->map_tree.lock); if (!em) break; - kfree(em->bdev); /* once for us */ free_extent_map(em); /* once for the tree */ @@ -4194,15 +4601,16 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len) * and exit, so return 1 so the callers don't try to use other copies. */ if (!em) { - btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical, + btrfs_crit(fs_info, "No mapping for %Lu-%Lu", logical, logical+len); return 1; } if (em->start > logical || em->start + em->len < logical) { - btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got " - "%Lu-%Lu\n", logical, logical+len, em->start, + btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got " + "%Lu-%Lu", logical, logical+len, em->start, em->start + em->len); + free_extent_map(em); return 1; } @@ -4375,15 +4783,15 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, if (!em) { btrfs_crit(fs_info, "unable to find logical %llu len %llu", - (unsigned long long)logical, - (unsigned long long)*length); + logical, *length); return -EINVAL; } if (em->start > logical || em->start + em->len < logical) { btrfs_crit(fs_info, "found a bad mapping, wanted %Lu, " - "found %Lu-%Lu\n", logical, em->start, + "found %Lu-%Lu", logical, em->start, em->start + em->len); + free_extent_map(em); return -EINVAL; } @@ -4611,7 +5019,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, num_stripes = map->num_stripes; max_errors = nr_parity_stripes(map); - raid_map = kmalloc(sizeof(u64) * num_stripes, + raid_map = kmalloc_array(num_stripes, sizeof(u64), GFP_NOFS); if (!raid_map) { ret = -ENOMEM; @@ -4671,6 +5079,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw, } bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS); if (!bbio) { + kfree(raid_map); ret = -ENOMEM; goto out; } @@ -4902,13 +5311,13 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, read_unlock(&em_tree->lock); if (!em) { - printk(KERN_ERR "btrfs: couldn't find em for chunk %Lu\n", + printk(KERN_ERR "BTRFS: couldn't find em for chunk %Lu\n", chunk_start); return -EIO; } if (em->start != chunk_start) { - printk(KERN_ERR "btrfs: bad chunk start, em=%Lu, wanted=%Lu\n", + printk(KERN_ERR "BTRFS: bad chunk start, em=%Lu, wanted=%Lu\n", em->start, chunk_start); free_extent_map(em); return -EIO; @@ -4970,9 +5379,19 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, return 0; } +static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) +{ + if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) + bio_endio_nodec(bio, err); + else + bio_endio(bio, err); + kfree(bbio); +} + static void btrfs_end_bio(struct bio *bio, int err) { struct btrfs_bio *bbio = bio->bi_private; + struct btrfs_device *dev = bbio->stripes[0].dev; int is_orig_bio = 0; if (err) { @@ -4980,7 +5399,6 @@ static void btrfs_end_bio(struct bio *bio, int err) if (err == -EIO || err == -EREMOTEIO) { unsigned int stripe_index = btrfs_io_bio(bio)->stripe_index; - struct btrfs_device *dev; BUG_ON(stripe_index >= bbio->num_stripes); dev = bbio->stripes[stripe_index].dev; @@ -5002,11 +5420,14 @@ static void btrfs_end_bio(struct bio *bio, int err) if (bio == bbio->orig_bio) is_orig_bio = 1; + btrfs_bio_counter_dec(bbio->fs_info); + if (atomic_dec_and_test(&bbio->stripes_pending)) { if (!is_orig_bio) { bio_put(bio); bio = bbio->orig_bio; } + bio->bi_private = bbio->private; bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; @@ -5023,21 +5444,13 @@ static void btrfs_end_bio(struct bio *bio, int err) set_bit(BIO_UPTODATE, &bio->bi_flags); err = 0; } - kfree(bbio); - bio_endio(bio, err); + btrfs_end_bbio(bbio, bio, err); } else if (!is_orig_bio) { bio_put(bio); } } -struct async_sched { - struct bio *bio; - int rw; - struct btrfs_fs_info *info; - struct btrfs_work work; -}; - /* * see run_scheduled_bios for a description of why bios are collected for * async submit. @@ -5094,8 +5507,8 @@ static noinline void btrfs_schedule_bio(struct btrfs_root *root, spin_unlock(&device->io_lock); if (should_queue) - btrfs_queue_worker(&root->fs_info->submit_workers, - &device->work); + btrfs_queue_work(root->fs_info->submit_workers, + &device->work); } static int bio_size_ok(struct block_device *bdev, struct bio *bio, @@ -5103,17 +5516,15 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, { struct bio_vec *prev; struct request_queue *q = bdev_get_queue(bdev); - unsigned short max_sectors = queue_max_sectors(q); + unsigned int max_sectors = queue_max_sectors(q); struct bvec_merge_data bvm = { .bi_bdev = bdev, .bi_sector = sector, .bi_rw = bio->bi_rw, }; - if (bio->bi_vcnt == 0) { - WARN_ON(1); + if (WARN_ON(bio->bi_vcnt == 0)) return 1; - } prev = &bio->bi_io_vec[bio->bi_vcnt - 1]; if (bio_sectors(bio) > max_sectors) @@ -5122,7 +5533,7 @@ static int bio_size_ok(struct block_device *bdev, struct bio *bio, if (!q->merge_bvec_fn) return 1; - bvm.bi_size = bio->bi_size - prev->bv_len; + bvm.bi_size = bio->bi_iter.bi_size - prev->bv_len; if (q->merge_bvec_fn(q, &bvm, prev) < prev->bv_len) return 0; return 1; @@ -5137,7 +5548,7 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, bio->bi_private = bbio; btrfs_io_bio(bio)->stripe_index = dev_nr; bio->bi_end_io = btrfs_end_bio; - bio->bi_sector = physical >> 9; + bio->bi_iter.bi_sector = physical >> 9; #ifdef DEBUG { struct rcu_string *name; @@ -5152,6 +5563,9 @@ static void submit_stripe_bio(struct btrfs_root *root, struct btrfs_bio *bbio, } #endif bio->bi_bdev = dev->bdev; + + btrfs_bio_counter_inc_noblocked(root->fs_info); + if (async) btrfs_schedule_bio(root, dev, rw, bio); else @@ -5175,7 +5589,7 @@ again: while (bvec <= (first_bio->bi_io_vec + first_bio->bi_vcnt - 1)) { if (bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset) < bvec->bv_len) { - u64 len = bio->bi_size; + u64 len = bio->bi_iter.bi_size; atomic_inc(&bbio->stripes_pending); submit_stripe_bio(root, bbio, bio, physical, dev_nr, @@ -5194,12 +5608,15 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) { atomic_inc(&bbio->error); if (atomic_dec_and_test(&bbio->stripes_pending)) { + /* Shoud be the original bio. */ + WARN_ON(bio != bbio->orig_bio); + bio->bi_private = bbio->private; bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; - bio->bi_sector = logical >> 9; - kfree(bbio); - bio_endio(bio, -EIO); + bio->bi_iter.bi_sector = logical >> 9; + + btrfs_end_bbio(bbio, bio, -EIO); } } @@ -5208,7 +5625,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, { struct btrfs_device *dev; struct bio *first_bio = bio; - u64 logical = (u64)bio->bi_sector << 9; + u64 logical = (u64)bio->bi_iter.bi_sector << 9; u64 length = 0; u64 map_length; u64 *raid_map = NULL; @@ -5217,38 +5634,46 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, int total_devs = 1; struct btrfs_bio *bbio = NULL; - length = bio->bi_size; + length = bio->bi_iter.bi_size; map_length = length; + btrfs_bio_counter_inc_blocked(root->fs_info); ret = __btrfs_map_block(root->fs_info, rw, logical, &map_length, &bbio, mirror_num, &raid_map); - if (ret) /* -ENOMEM */ + if (ret) { + btrfs_bio_counter_dec(root->fs_info); return ret; + } total_devs = bbio->num_stripes; bbio->orig_bio = first_bio; bbio->private = first_bio->bi_private; bbio->end_io = first_bio->bi_end_io; + bbio->fs_info = root->fs_info; atomic_set(&bbio->stripes_pending, bbio->num_stripes); if (raid_map) { /* In this case, map_length has been set to the length of a single stripe; not the whole write */ if (rw & WRITE) { - return raid56_parity_write(root, bio, bbio, - raid_map, map_length); + ret = raid56_parity_write(root, bio, bbio, + raid_map, map_length); } else { - return raid56_parity_recover(root, bio, bbio, - raid_map, map_length, - mirror_num); + ret = raid56_parity_recover(root, bio, bbio, + raid_map, map_length, + mirror_num); } + /* + * FIXME, replace dosen't support raid56 yet, please fix + * it in the future. + */ + btrfs_bio_counter_dec(root->fs_info); + return ret; } if (map_length < length) { btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu", - (unsigned long long)logical, - (unsigned long long)length, - (unsigned long long)map_length); + logical, length, map_length); BUG(); } @@ -5278,6 +5703,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, BUG_ON(!bio); /* -ENOMEM */ } else { bio = first_bio; + bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; } submit_stripe_bio(root, bbio, bio, @@ -5285,6 +5711,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, async_submit); dev_nr++; } + btrfs_bio_counter_dec(root->fs_info); return 0; } @@ -5314,23 +5741,70 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root, struct btrfs_device *device; struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; - device = kzalloc(sizeof(*device), GFP_NOFS); - if (!device) + device = btrfs_alloc_device(NULL, &devid, dev_uuid); + if (IS_ERR(device)) return NULL; - list_add(&device->dev_list, - &fs_devices->devices); - device->devid = devid; - device->work.func = pending_bios_fn; + + list_add(&device->dev_list, &fs_devices->devices); device->fs_devices = fs_devices; - device->missing = 1; fs_devices->num_devices++; + + device->missing = 1; fs_devices->missing_devices++; - spin_lock_init(&device->io_lock); - INIT_LIST_HEAD(&device->dev_alloc_list); - memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE); + return device; } +/** + * btrfs_alloc_device - allocate struct btrfs_device + * @fs_info: used only for generating a new devid, can be NULL if + * devid is provided (i.e. @devid != NULL). + * @devid: a pointer to devid for this device. If NULL a new devid + * is generated. + * @uuid: a pointer to UUID for this device. If NULL a new UUID + * is generated. + * + * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR() + * on error. Returned struct is not linked onto any lists and can be + * destroyed with kfree() right away. + */ +struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info, + const u64 *devid, + const u8 *uuid) +{ + struct btrfs_device *dev; + u64 tmp; + + if (WARN_ON(!devid && !fs_info)) + return ERR_PTR(-EINVAL); + + dev = __alloc_device(); + if (IS_ERR(dev)) + return dev; + + if (devid) + tmp = *devid; + else { + int ret; + + ret = find_next_devid(fs_info, &tmp); + if (ret) { + kfree(dev); + return ERR_PTR(ret); + } + } + dev->devid = tmp; + + if (uuid) + memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE); + else + generate_random_uuid(dev->uuid); + + btrfs_init_work(&dev->work, pending_bios_fn, NULL, NULL); + + return dev; +} + static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, struct extent_buffer *leaf, struct btrfs_chunk *chunk) @@ -5371,6 +5845,7 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, return -ENOMEM; } + set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags); em->bdev = (struct block_device *)map; em->start = logical; em->len = length; @@ -5395,7 +5870,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].dev = btrfs_find_device(root->fs_info, devid, uuid, NULL); if (!map->stripes[i].dev && !btrfs_test_opt(root, DEGRADED)) { - kfree(map); free_extent_map(em); return -EIO; } @@ -5403,7 +5877,6 @@ static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key, map->stripes[i].dev = add_missing_dev(root, devid, uuid); if (!map->stripes[i].dev) { - kfree(map); free_extent_map(em); return -EIO; } @@ -5437,7 +5910,7 @@ static void fill_device_from_item(struct extent_buffer *leaf, WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID); device->is_tgtdev_for_dev_replace = 0; - ptr = (unsigned long)btrfs_device_uuid(dev_item); + ptr = btrfs_device_uuid(dev_item); read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); } @@ -5500,11 +5973,9 @@ static int read_one_dev(struct btrfs_root *root, u8 dev_uuid[BTRFS_UUID_SIZE]; devid = btrfs_device_id(leaf, dev_item); - read_extent_buffer(leaf, dev_uuid, - (unsigned long)btrfs_device_uuid(dev_item), + read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item), BTRFS_UUID_SIZE); - read_extent_buffer(leaf, fs_uuid, - (unsigned long)btrfs_device_fsid(dev_item), + read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item), BTRFS_UUID_SIZE); if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) { @@ -5519,8 +5990,7 @@ static int read_one_dev(struct btrfs_root *root, return -EIO; if (!device) { - btrfs_warn(root->fs_info, "devid %llu missing", - (unsigned long long)devid); + btrfs_warn(root->fs_info, "devid %llu missing", devid); device = add_missing_dev(root, devid, dev_uuid); if (!device) return -ENOMEM; @@ -5644,14 +6114,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root) mutex_lock(&uuid_mutex); lock_chunks(root); - /* first we search for all of the device items, and then we - * read in all of the chunk items. This way we can create chunk - * mappings that reference all of the devices that are afound + /* + * Read all device items, and then all the chunk items. All + * device items are found before any chunk item (their object id + * is smaller than the lowest possible object id for a chunk + * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID). */ key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.offset = 0; key.type = 0; -again: ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); if (ret < 0) goto error; @@ -5667,17 +6138,13 @@ again: break; } btrfs_item_key_to_cpu(leaf, &found_key, slot); - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID) - break; - if (found_key.type == BTRFS_DEV_ITEM_KEY) { - struct btrfs_dev_item *dev_item; - dev_item = btrfs_item_ptr(leaf, slot, + if (found_key.type == BTRFS_DEV_ITEM_KEY) { + struct btrfs_dev_item *dev_item; + dev_item = btrfs_item_ptr(leaf, slot, struct btrfs_dev_item); - ret = read_one_dev(root, leaf, dev_item); - if (ret) - goto error; - } + ret = read_one_dev(root, leaf, dev_item); + if (ret) + goto error; } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) { struct btrfs_chunk *chunk; chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk); @@ -5687,11 +6154,6 @@ again: } path->slots[0]++; } - if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) { - key.objectid = 0; - btrfs_release_path(path); - goto again; - } ret = 0; error: unlock_chunks(root); @@ -5706,10 +6168,14 @@ void btrfs_init_devices_late(struct btrfs_fs_info *fs_info) struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; struct btrfs_device *device; - mutex_lock(&fs_devices->device_list_mutex); - list_for_each_entry(device, &fs_devices->devices, dev_list) - device->dev_root = fs_info->dev_root; - mutex_unlock(&fs_devices->device_list_mutex); + while (fs_devices) { + mutex_lock(&fs_devices->device_list_mutex); + list_for_each_entry(device, &fs_devices->devices, dev_list) + device->dev_root = fs_info->dev_root; + mutex_unlock(&fs_devices->device_list_mutex); + + fs_devices = fs_devices->seed; + } } static void __btrfs_reset_dev_stats(struct btrfs_device *dev) @@ -5800,7 +6266,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, BUG_ON(!path); ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1); if (ret < 0) { - printk_in_rcu(KERN_WARNING "btrfs: error %d while searching for dev_stats item for device %s!\n", + printk_in_rcu(KERN_WARNING "BTRFS: " + "error %d while searching for dev_stats item for device %s!\n", ret, rcu_str_deref(device->name)); goto out; } @@ -5810,7 +6277,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, /* need to delete old one and insert a new one */ ret = btrfs_del_item(trans, dev_root, path); if (ret != 0) { - printk_in_rcu(KERN_WARNING "btrfs: delete too small dev_stats item for device %s failed %d!\n", + printk_in_rcu(KERN_WARNING "BTRFS: " + "delete too small dev_stats item for device %s failed %d!\n", rcu_str_deref(device->name), ret); goto out; } @@ -5823,7 +6291,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans, ret = btrfs_insert_empty_item(trans, dev_root, path, &key, sizeof(*ptr)); if (ret < 0) { - printk_in_rcu(KERN_WARNING "btrfs: insert dev_stats item for device %s failed %d!\n", + printk_in_rcu(KERN_WARNING "BTRFS: " + "insert dev_stats item for device %s failed %d!\n", rcu_str_deref(device->name), ret); goto out; } @@ -5876,16 +6345,14 @@ static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev) { if (!dev->dev_stats_valid) return; - printk_ratelimited_in_rcu(KERN_ERR - "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + printk_ratelimited_in_rcu(KERN_ERR "BTRFS: " + "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS), - btrfs_dev_stat_read(dev, - BTRFS_DEV_STAT_CORRUPTION_ERRS), - btrfs_dev_stat_read(dev, - BTRFS_DEV_STAT_GENERATION_ERRS)); + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS), + btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS)); } static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) @@ -5898,7 +6365,8 @@ static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev) if (i == BTRFS_DEV_STAT_VALUES_MAX) return; /* all values == 0, suppress message */ - printk_in_rcu(KERN_INFO "btrfs: bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", + printk_in_rcu(KERN_INFO "BTRFS: " + "bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u\n", rcu_str_deref(dev->name), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS), btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS), @@ -5919,12 +6387,10 @@ int btrfs_get_dev_stats(struct btrfs_root *root, mutex_unlock(&fs_devices->device_list_mutex); if (!dev) { - printk(KERN_WARNING - "btrfs: get dev_stats failed, device not found\n"); + btrfs_warn(root->fs_info, "get dev_stats failed, device not found"); return -ENODEV; } else if (!dev->dev_stats_valid) { - printk(KERN_WARNING - "btrfs: get dev_stats failed, not yet valid\n"); + btrfs_warn(root->fs_info, "get dev_stats failed, not yet valid"); return -ENODEV; } else if (stats->flags & BTRFS_DEV_STATS_RESET) { for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) { |
