diff options
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r-- | fs/btrfs/volumes.c | 1103 |
1 files changed, 834 insertions, 269 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 80a27284dbf..d6f1996de62 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -40,6 +40,12 @@ struct map_lookup { struct btrfs_bio_stripe stripes[]; }; +static int init_first_rw_device(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_device *device); +static int btrfs_relocate_sys_chunks(struct btrfs_root *root); + + #define map_lookup_size(n) (sizeof(struct map_lookup) + \ (sizeof(struct btrfs_bio_stripe) * (n))) @@ -69,25 +75,31 @@ static void unlock_chunks(struct btrfs_root *root) int btrfs_cleanup_fs_uuids(void) { struct btrfs_fs_devices *fs_devices; - struct list_head *uuid_cur; - struct list_head *devices_cur; struct btrfs_device *dev; - list_for_each(uuid_cur, &fs_uuids) { - fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices, - list); + while (!list_empty(&fs_uuids)) { + fs_devices = list_entry(fs_uuids.next, + struct btrfs_fs_devices, list); + list_del(&fs_devices->list); while(!list_empty(&fs_devices->devices)) { - devices_cur = fs_devices->devices.next; - dev = list_entry(devices_cur, struct btrfs_device, - dev_list); + dev = list_entry(fs_devices->devices.next, + struct btrfs_device, dev_list); if (dev->bdev) { close_bdev_excl(dev->bdev); fs_devices->open_devices--; } + fs_devices->num_devices--; + if (dev->writeable) + fs_devices->rw_devices--; list_del(&dev->dev_list); + list_del(&dev->dev_alloc_list); kfree(dev->name); kfree(dev); } + WARN_ON(fs_devices->num_devices); + WARN_ON(fs_devices->open_devices); + WARN_ON(fs_devices->rw_devices); + kfree(fs_devices); } return 0; } @@ -257,6 +269,9 @@ static noinline int device_list_add(const char *path, disk_super->dev_item.uuid); } if (!device) { + if (fs_devices->opened) + return -EBUSY; + device = kzalloc(sizeof(*device), GFP_NOFS); if (!device) { /* we can safely leave the fs_devices entry around */ @@ -273,8 +288,9 @@ static noinline int device_list_add(const char *path, kfree(device); return -ENOMEM; } + INIT_LIST_HEAD(&device->dev_alloc_list); list_add(&device->dev_list, &fs_devices->devices); - list_add(&device->dev_alloc_list, &fs_devices->alloc_list); + device->fs_devices = fs_devices; fs_devices->num_devices++; } @@ -288,58 +304,94 @@ static noinline int device_list_add(const char *path, int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *head = &fs_devices->devices; + struct list_head *tmp; struct list_head *cur; struct btrfs_device *device; + int seed_devices = 0; mutex_lock(&uuid_mutex); again: - list_for_each(cur, head) { + list_for_each_safe(cur, tmp, &fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); - if (!device->in_fs_metadata) { - struct block_device *bdev; - list_del(&device->dev_list); - list_del(&device->dev_alloc_list); + if (device->in_fs_metadata) + continue; + + if (device->bdev) { + close_bdev_excl(device->bdev); + device->bdev = NULL; + fs_devices->open_devices--; + } + if (device->writeable) { + list_del_init(&device->dev_alloc_list); + device->writeable = 0; + fs_devices->rw_devices--; + } + if (!seed_devices) { + list_del_init(&device->dev_list); fs_devices->num_devices--; - if (device->bdev) { - bdev = device->bdev; - fs_devices->open_devices--; - mutex_unlock(&uuid_mutex); - close_bdev_excl(bdev); - mutex_lock(&uuid_mutex); - } kfree(device->name); kfree(device); - goto again; } } + + if (fs_devices->seed) { + fs_devices = fs_devices->seed; + seed_devices = 1; + goto again; + } + mutex_unlock(&uuid_mutex); return 0; } -int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices) { - struct list_head *head = &fs_devices->devices; + struct btrfs_fs_devices *seed_devices; struct list_head *cur; struct btrfs_device *device; +again: + if (--fs_devices->opened > 0) + return 0; - mutex_lock(&uuid_mutex); - list_for_each(cur, head) { + list_for_each(cur, &fs_devices->devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) { close_bdev_excl(device->bdev); fs_devices->open_devices--; } + if (device->writeable) { + list_del_init(&device->dev_alloc_list); + fs_devices->rw_devices--; + } + device->bdev = NULL; + device->writeable = 0; device->in_fs_metadata = 0; } - fs_devices->mounted = 0; - mutex_unlock(&uuid_mutex); + fs_devices->opened = 0; + fs_devices->seeding = 0; + fs_devices->sprouted = 0; + + seed_devices = fs_devices->seed; + fs_devices->seed = NULL; + if (seed_devices) { + fs_devices = seed_devices; + goto again; + } return 0; } -int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, - int flags, void *holder) +int btrfs_close_devices(struct btrfs_fs_devices *fs_devices) +{ + int ret; + + mutex_lock(&uuid_mutex); + ret = __btrfs_close_devices(fs_devices); + mutex_unlock(&uuid_mutex); + return ret; +} + +int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices, void *holder) { struct block_device *bdev; struct list_head *head = &fs_devices->devices; @@ -350,24 +402,18 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_super_block *disk_super; u64 latest_devid = 0; u64 latest_transid = 0; - u64 transid; u64 devid; + int seeding = 1; int ret = 0; - mutex_lock(&uuid_mutex); - if (fs_devices->mounted) - goto out; - list_for_each(cur, head) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev) continue; - if (!device->name) continue; - bdev = open_bdev_excl(device->name, flags, holder); - + bdev = open_bdev_excl(device->name, MS_RDONLY, holder); if (IS_ERR(bdev)) { printk("open %s failed\n", device->name); goto error; @@ -387,16 +433,32 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, if (devid != device->devid) goto error_brelse; - transid = btrfs_super_generation(disk_super); - if (!latest_transid || transid > latest_transid) { + if (memcmp(device->uuid, disk_super->dev_item.uuid, + BTRFS_UUID_SIZE)) + goto error_brelse; + + device->generation = btrfs_super_generation(disk_super); + if (!latest_transid || device->generation > latest_transid) { latest_devid = devid; - latest_transid = transid; + latest_transid = device->generation; latest_bdev = bdev; } + if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) { + device->writeable = 0; + } else { + device->writeable = !bdev_read_only(bdev); + seeding = 0; + } + device->bdev = bdev; device->in_fs_metadata = 0; fs_devices->open_devices++; + if (device->writeable) { + fs_devices->rw_devices++; + list_add(&device->dev_alloc_list, + &fs_devices->alloc_list); + } continue; error_brelse: @@ -410,11 +472,32 @@ error: ret = -EIO; goto out; } - fs_devices->mounted = 1; + fs_devices->seeding = seeding; + fs_devices->opened = 1; fs_devices->latest_bdev = latest_bdev; fs_devices->latest_devid = latest_devid; fs_devices->latest_trans = latest_transid; + fs_devices->total_rw_bytes = 0; out: + return ret; +} + +int btrfs_open_devices(struct btrfs_fs_devices *fs_devices, + int flags, void *holder) +{ + int ret; + + mutex_lock(&uuid_mutex); + if (fs_devices->opened) { + if (fs_devices->sprouted) { + ret = -EBUSY; + } else { + fs_devices->opened++; + ret = 0; + } + } else { + ret = __btrfs_open_devices(fs_devices, holder); + } mutex_unlock(&uuid_mutex); return ret; } @@ -481,12 +564,12 @@ error: */ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, - struct btrfs_path *path, u64 num_bytes, u64 *start) { struct btrfs_key key; struct btrfs_root *root = device->dev_root; struct btrfs_dev_extent *dev_extent = NULL; + struct btrfs_path *path; u64 hole_size = 0; u64 last_byte = 0; u64 search_start = 0; @@ -496,8 +579,11 @@ static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans, int start_found; struct extent_buffer *l; - start_found = 0; + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; path->reada = 2; + start_found = 0; /* FIXME use last free of some kind */ @@ -581,7 +667,6 @@ check_pending: /* we have to make sure we didn't find an extent that has already * been allocated by the map tree or the original allocation */ - btrfs_release_path(root, path); BUG_ON(*start < search_start); if (*start + num_bytes > search_end) { @@ -589,10 +674,10 @@ check_pending: goto error; } /* check for pending inserts here */ - return 0; + ret = 0; error: - btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -644,11 +729,10 @@ int btrfs_free_dev_extent(struct btrfs_trans_handle *trans, return ret; } -int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, +int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, struct btrfs_device *device, u64 chunk_tree, u64 chunk_objectid, - u64 chunk_offset, - u64 num_bytes, u64 *start) + u64 chunk_offset, u64 start, u64 num_bytes) { int ret; struct btrfs_path *path; @@ -662,13 +746,8 @@ int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = find_free_dev_extent(trans, device, path, num_bytes, start); - if (ret) { - goto err; - } - key.objectid = device->devid; - key.offset = *start; + key.offset = start; key.type = BTRFS_DEV_EXTENT_KEY; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*extent)); @@ -687,7 +766,6 @@ int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans, btrfs_set_dev_extent_length(leaf, extent, num_bytes); btrfs_mark_buffer_dirty(leaf); -err: btrfs_free_path(path); return ret; } @@ -735,12 +813,18 @@ error: return ret; } -static noinline int find_next_devid(struct btrfs_root *root, - struct btrfs_path *path, u64 *objectid) +static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid) { int ret; struct btrfs_key key; struct btrfs_key found_key; + struct btrfs_path *path; + + root = root->fs_info->chunk_root; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; @@ -763,7 +847,7 @@ static noinline int find_next_devid(struct btrfs_root *root, } ret = 0; error: - btrfs_release_path(root, path); + btrfs_free_path(path); return ret; } @@ -781,7 +865,6 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, struct extent_buffer *leaf; struct btrfs_key key; unsigned long ptr; - u64 free_devid = 0; root = root->fs_info->chunk_root; @@ -789,13 +872,9 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - ret = find_next_devid(root, path, &free_devid); - if (ret) - goto out; - key.objectid = BTRFS_DEV_ITEMS_OBJECTID; key.type = BTRFS_DEV_ITEM_KEY; - key.offset = free_devid; + key.offset = device->devid; ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*dev_item)); @@ -805,8 +884,8 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, leaf = path->nodes[0]; dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item); - device->devid = free_devid; btrfs_set_device_id(leaf, dev_item, device->devid); + btrfs_set_device_generation(leaf, dev_item, 0); btrfs_set_device_type(leaf, dev_item, device->type); btrfs_set_device_io_align(leaf, dev_item, device->io_align); btrfs_set_device_io_width(leaf, dev_item, device->io_width); @@ -819,9 +898,11 @@ int btrfs_add_device(struct btrfs_trans_handle *trans, ptr = (unsigned long)btrfs_device_uuid(dev_item); write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE); + ptr = (unsigned long)btrfs_device_fsid(dev_item); + write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE); btrfs_mark_buffer_dirty(leaf); - ret = 0; + ret = 0; out: btrfs_free_path(path); return ret; @@ -832,11 +913,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, { int ret; struct btrfs_path *path; - struct block_device *bdev = device->bdev; - struct btrfs_device *next_dev; struct btrfs_key key; - u64 total_bytes; - struct btrfs_fs_devices *fs_devices; struct btrfs_trans_handle *trans; root = root->fs_info->chunk_root; @@ -863,25 +940,6 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, ret = btrfs_del_item(trans, root, path); if (ret) goto out; - - /* - * at this point, the device is zero sized. We want to - * remove it from the devices list and zero out the old super - */ - list_del_init(&device->dev_list); - list_del_init(&device->dev_alloc_list); - fs_devices = root->fs_info->fs_devices; - - next_dev = list_entry(fs_devices->devices.next, struct btrfs_device, - dev_list); - if (bdev == root->fs_info->sb->s_bdev) - root->fs_info->sb->s_bdev = next_dev->bdev; - if (bdev == fs_devices->latest_bdev) - fs_devices->latest_bdev = next_dev->bdev; - - total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy); - btrfs_set_super_num_devices(&root->fs_info->super_copy, - total_bytes - 1); out: btrfs_free_path(path); unlock_chunks(root); @@ -892,11 +950,14 @@ out: int btrfs_rm_device(struct btrfs_root *root, char *device_path) { struct btrfs_device *device; + struct btrfs_device *next_device; struct block_device *bdev; struct buffer_head *bh = NULL; struct btrfs_super_block *disk_super; u64 all_avail; u64 devid; + u64 num_devices; + u8 *dev_uuid; int ret = 0; mutex_lock(&uuid_mutex); @@ -907,14 +968,14 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) root->fs_info->avail_metadata_alloc_bits; if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && - btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) { + root->fs_info->fs_devices->rw_devices <= 4) { printk("btrfs: unable to go below four devices on raid10\n"); ret = -EINVAL; goto out; } if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) && - btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) { + root->fs_info->fs_devices->rw_devices <= 2) { printk("btrfs: unable to go below two devices on raid1\n"); ret = -EINVAL; goto out; @@ -941,15 +1002,15 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) printk("btrfs: no missing devices found to remove\n"); goto out; } - } else { - bdev = open_bdev_excl(device_path, 0, + bdev = open_bdev_excl(device_path, MS_RDONLY, root->fs_info->bdev_holder); if (IS_ERR(bdev)) { ret = PTR_ERR(bdev); goto out; } + set_blocksize(bdev, 4096); bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096); if (!bh) { ret = -EIO; @@ -957,45 +1018,97 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) } disk_super = (struct btrfs_super_block *)bh->b_data; if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC, - sizeof(disk_super->magic))) { - ret = -ENOENT; - goto error_brelse; - } - if (memcmp(disk_super->fsid, root->fs_info->fsid, - BTRFS_FSID_SIZE)) { + sizeof(disk_super->magic))) { ret = -ENOENT; goto error_brelse; } devid = le64_to_cpu(disk_super->dev_item.devid); - device = btrfs_find_device(root, devid, NULL); + dev_uuid = disk_super->dev_item.uuid; + device = btrfs_find_device(root, devid, dev_uuid, + disk_super->fsid); if (!device) { ret = -ENOENT; goto error_brelse; } + } + if (device->writeable && root->fs_info->fs_devices->rw_devices == 1) { + printk("btrfs: unable to remove the only writeable device\n"); + ret = -EINVAL; + goto error_brelse; + } + + if (device->writeable) { + list_del_init(&device->dev_alloc_list); + root->fs_info->fs_devices->rw_devices--; } - root->fs_info->fs_devices->num_devices--; - root->fs_info->fs_devices->open_devices--; ret = btrfs_shrink_device(device, 0); if (ret) goto error_brelse; - ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device); if (ret) goto error_brelse; - if (bh) { + device->in_fs_metadata = 0; + if (device->fs_devices == root->fs_info->fs_devices) { + list_del_init(&device->dev_list); + root->fs_info->fs_devices->num_devices--; + if (device->bdev) + device->fs_devices->open_devices--; + } + + next_device = list_entry(root->fs_info->fs_devices->devices.next, + struct btrfs_device, dev_list); + if (device->bdev == root->fs_info->sb->s_bdev) + root->fs_info->sb->s_bdev = next_device->bdev; + if (device->bdev == root->fs_info->fs_devices->latest_bdev) + root->fs_info->fs_devices->latest_bdev = next_device->bdev; + + num_devices = btrfs_super_num_devices(&root->fs_info->super_copy) - 1; + btrfs_set_super_num_devices(&root->fs_info->super_copy, num_devices); + + if (device->fs_devices != root->fs_info->fs_devices) { + BUG_ON(device->writeable); + brelse(bh); + if (bdev) + close_bdev_excl(bdev); + + if (device->bdev) { + close_bdev_excl(device->bdev); + device->bdev = NULL; + device->fs_devices->open_devices--; + } + if (device->fs_devices->open_devices == 0) { + struct btrfs_fs_devices *fs_devices; + fs_devices = root->fs_info->fs_devices; + while (fs_devices) { + if (fs_devices->seed == device->fs_devices) + break; + fs_devices = fs_devices->seed; + } + fs_devices->seed = device->fs_devices->seed; + device->fs_devices->seed = NULL; + __btrfs_close_devices(device->fs_devices); + } + ret = 0; + goto out; + } + + /* + * at this point, the device is zero sized. We want to + * remove it from the devices list and zero out the old super + */ + if (device->writeable) { /* make sure this device isn't detected as part of * the FS anymore */ memset(&disk_super->magic, 0, sizeof(disk_super->magic)); set_buffer_dirty(bh); sync_dirty_buffer(bh); - - brelse(bh); } + brelse(bh); if (device->bdev) { /* one close for the device struct or super_block */ @@ -1021,6 +1134,129 @@ out: return ret; } +/* + * does all the dirty work required for changing file system's UUID. + */ +static int btrfs_prepare_sprout(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; + struct btrfs_fs_devices *old_devices; + struct btrfs_super_block *disk_super = &root->fs_info->super_copy; + struct btrfs_device *device; + u64 super_flags; + + BUG_ON(!mutex_is_locked(&uuid_mutex)); + if (!fs_devices->seeding || fs_devices->opened != 1) + return -EINVAL; + + old_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS); + if (!old_devices) + return -ENOMEM; + + memcpy(old_devices, fs_devices, sizeof(*old_devices)); + old_devices->opened = 1; + old_devices->sprouted = 1; + INIT_LIST_HEAD(&old_devices->devices); + INIT_LIST_HEAD(&old_devices->alloc_list); + list_splice_init(&fs_devices->devices, &old_devices->devices); + list_splice_init(&fs_devices->alloc_list, &old_devices->alloc_list); + list_for_each_entry(device, &old_devices->devices, dev_list) { + device->fs_devices = old_devices; + } + list_add(&old_devices->list, &fs_uuids); + + fs_devices->seeding = 0; + fs_devices->num_devices = 0; + fs_devices->open_devices = 0; + fs_devices->seed = old_devices; + + generate_random_uuid(fs_devices->fsid); + memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE); + super_flags = btrfs_super_flags(disk_super) & + ~BTRFS_SUPER_FLAG_SEEDING; + btrfs_set_super_flags(disk_super, super_flags); + + return 0; +} + +/* + * strore the expected generation for seed devices in device items. + */ +static int btrfs_finish_sprout(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_dev_item *dev_item; + struct btrfs_device *device; + struct btrfs_key key; + u8 fs_uuid[BTRFS_UUID_SIZE]; + u8 dev_uuid[BTRFS_UUID_SIZE]; + u64 devid; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + root = root->fs_info->chunk_root; + key.objectid = BTRFS_DEV_ITEMS_OBJECTID; + key.offset = 0; + key.type = BTRFS_DEV_ITEM_KEY; + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) + goto error; + + leaf = path->nodes[0]; +next_slot: + if (path->slots[0] >= btrfs_header_nritems(leaf)) { + ret = btrfs_next_leaf(root, path); + if (ret > 0) + break; + if (ret < 0) + goto error; + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + btrfs_release_path(root, path); + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID || + key.type != BTRFS_DEV_ITEM_KEY) + break; + + dev_item = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_dev_item); + devid = btrfs_device_id(leaf, dev_item); + read_extent_buffer(leaf, dev_uuid, + (unsigned long)btrfs_device_uuid(dev_item), + BTRFS_UUID_SIZE); + read_extent_buffer(leaf, fs_uuid, + (unsigned long)btrfs_device_fsid(dev_item), + BTRFS_UUID_SIZE); + device = btrfs_find_device(root, devid, dev_uuid, fs_uuid); + BUG_ON(!device); + + if (device->fs_devices->seeding) { + btrfs_set_device_generation(leaf, dev_item, + device->generation); + btrfs_mark_buffer_dirty(leaf); + } + + path->slots[0]++; + goto next_slot; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + int btrfs_init_new_device(struct btrfs_root *root, char *device_path) { struct btrfs_trans_handle *trans; @@ -1028,26 +1264,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) struct block_device *bdev; struct list_head *cur; struct list_head *devices; + struct super_block *sb = root->fs_info->sb; u64 total_bytes; + int seeding_dev = 0; int ret = 0; + if ((sb->s_flags & MS_RDONLY) && !root->fs_info->fs_devices->seeding) + return -EINVAL; bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder); if (!bdev) { return -EIO; } + if (root->fs_info->fs_devices->seeding) { + seeding_dev = 1; + down_write(&sb->s_umount); + mutex_lock(&uuid_mutex); + } + filemap_write_and_wait(bdev->bd_inode->i_mapping); mutex_lock(&root->fs_info->volume_mutex); - trans = btrfs_start_transaction(root, 1); - lock_chunks(root); devices = &root->fs_info->fs_devices->devices; list_for_each(cur, devices) { device = list_entry(cur, struct btrfs_device, dev_list); if (device->bdev == bdev) { ret = -EEXIST; - goto out; + goto error; } } @@ -1055,18 +1299,31 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) if (!device) { /* we can safely leave the fs_devices entry around */ ret = -ENOMEM; - goto out_close_bdev; + goto error; } - device->barriers = 1; - device->work.func = pending_bios_fn; - generate_random_uuid(device->uuid); - spin_lock_init(&device->io_lock); device->name = kstrdup(device_path, GFP_NOFS); if (!device->name) { kfree(device); - goto out_close_bdev; + ret = -ENOMEM; + goto error; } + + ret = find_next_devid(root, &device->devid); + if (ret) { + kfree(device); + goto error; + } + + trans = btrfs_start_transaction(root, 1); + lock_chunks(root); + + device->barriers = 1; + device->writeable = 1; + device->work.func = pending_bios_fn; + generate_random_uuid(device->uuid); + spin_lock_init(&device->io_lock); + device->generation = trans->transid; device->io_width = root->sectorsize; device->io_align = root->sectorsize; device->sector_size = root->sectorsize; @@ -1074,12 +1331,22 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) device->dev_root = root->fs_info->dev_root; device->bdev = bdev; device->in_fs_metadata = 1; + set_blocksize(device->bdev, 4096); - ret = btrfs_add_device(trans, root, device); - if (ret) - goto out_close_bdev; + if (seeding_dev) { + sb->s_flags &= ~MS_RDONLY; + ret = btrfs_prepare_sprout(trans, root); + BUG_ON(ret); + } - set_blocksize(device->bdev, 4096); + device->fs_devices = root->fs_info->fs_devices; + list_add(&device->dev_list, &root->fs_info->fs_devices->devices); + list_add(&device->dev_alloc_list, + &root->fs_info->fs_devices->alloc_list); + root->fs_info->fs_devices->num_devices++; + root->fs_info->fs_devices->open_devices++; + root->fs_info->fs_devices->rw_devices++; + root->fs_info->fs_devices->total_rw_bytes += device->total_bytes; total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy); btrfs_set_super_total_bytes(&root->fs_info->super_copy, @@ -1089,20 +1356,34 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) btrfs_set_super_num_devices(&root->fs_info->super_copy, total_bytes + 1); - list_add(&device->dev_list, &root->fs_info->fs_devices->devices); - list_add(&device->dev_alloc_list, - &root->fs_info->fs_devices->alloc_list); - root->fs_info->fs_devices->num_devices++; - root->fs_info->fs_devices->open_devices++; -out: + if (seeding_dev) { + ret = init_first_rw_device(trans, root, device); + BUG_ON(ret); + ret = btrfs_finish_sprout(trans, root); + BUG_ON(ret); + } else { + ret = btrfs_add_device(trans, root, device); + } + unlock_chunks(root); - btrfs_end_transaction(trans, root); - mutex_unlock(&root->fs_info->volume_mutex); + btrfs_commit_transaction(trans, root); - return ret; + if (seeding_dev) { + mutex_unlock(&uuid_mutex); + up_write(&sb->s_umount); -out_close_bdev: + ret = btrfs_relocate_sys_chunks(root); + BUG_ON(ret); + } +out: + mutex_unlock(&root->fs_info->volume_mutex); + return ret; +error: close_bdev_excl(bdev); + if (seeding_dev) { + mutex_unlock(&uuid_mutex); + up_write(&sb->s_umount); + } goto out; } @@ -1160,7 +1441,15 @@ static int __btrfs_grow_device(struct btrfs_trans_handle *trans, u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = new_size - device->total_bytes; + if (!device->writeable) + return -EACCES; + if (new_size <= device->total_bytes) + return -EINVAL; + btrfs_set_super_total_bytes(super_copy, old_total + diff); + device->fs_devices->total_rw_bytes += diff; + + device->total_bytes = new_size; return btrfs_update_device(trans, device); } @@ -1248,7 +1537,6 @@ int btrfs_del_sys_chunk(struct btrfs_root *root, u64 chunk_objectid, u64 return ret; } - int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset) @@ -1328,6 +1616,64 @@ int btrfs_relocate_chunk(struct btrfs_root *root, return 0; } +static int btrfs_relocate_sys_chunks(struct btrfs_root *root) +{ + struct btrfs_root *chunk_root = root->fs_info->chunk_root; + struct btrfs_path *path; + struct extent_buffer *leaf; + struct btrfs_chunk *chunk; + struct btrfs_key key; + struct btrfs_key found_key; + u64 chunk_tree = chunk_root->root_key.objectid; + u64 chunk_type; + int ret; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID; + key.offset = (u64)-1; + key.type = BTRFS_CHUNK_ITEM_KEY; + + while (1) { + ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0); + if (ret < 0) + goto error; + BUG_ON(ret == 0); + + ret = btrfs_previous_item(chunk_root, path, key.objectid, + key.type); + if (ret < 0) + goto error; + if (ret > 0) + break; + + leaf = path->nodes[0]; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + + chunk = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_chunk); + chunk_type = btrfs_chunk_type(leaf, chunk); + btrfs_release_path(chunk_root, path); + + if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) { + ret = btrfs_relocate_chunk(chunk_root, chunk_tree, + found_key.objectid, + found_key.offset); + BUG_ON(ret); + } + + if (found_key.offset == 0) + break; + key.offset = found_key.offset - 1; + } + ret = 0; +error: + btrfs_free_path(path); + return ret; +} + static u64 div_factor(u64 num, int factor) { if (factor == 10) @@ -1337,7 +1683,6 @@ static u64 div_factor(u64 num, int factor) return num; } - int btrfs_balance(struct btrfs_root *dev_root) { int ret; @@ -1353,6 +1698,8 @@ int btrfs_balance(struct btrfs_root *dev_root) struct btrfs_trans_handle *trans; struct btrfs_key found_key; + if (dev_root->fs_info->sb->s_flags & MS_RDONLY) + return -EROFS; mutex_lock(&dev_root->fs_info->volume_mutex); dev_root = dev_root->fs_info->dev_root; @@ -1363,7 +1710,8 @@ int btrfs_balance(struct btrfs_root *dev_root) old_size = device->total_bytes; size_to_free = div_factor(old_size, 1); size_to_free = min(size_to_free, (u64)1 * 1024 * 1024); - if (device->total_bytes - device->bytes_used > size_to_free) + if (!device->writeable || + device->total_bytes - device->bytes_used > size_to_free) continue; ret = btrfs_shrink_device(device, old_size - size_to_free); @@ -1453,6 +1801,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) u64 old_total = btrfs_super_total_bytes(super_copy); u64 diff = device->total_bytes - new_size; + if (new_size >= device->total_bytes) + return -EINVAL; path = btrfs_alloc_path(); if (!path) @@ -1469,6 +1819,8 @@ int btrfs_shrink_device(struct btrfs_device *device, u64 new_size) lock_chunks(root); device->total_bytes = new_size; + if (device->writeable) + device->fs_devices->total_rw_bytes -= diff; ret = btrfs_update_device(trans, device); if (ret) { unlock_chunks(root); @@ -1561,32 +1913,27 @@ static u64 noinline chunk_bytes_by_type(u64 type, u64 calc_size, return calc_size * num_stripes; } - -int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, - struct btrfs_root *extent_root, u64 *start, - u64 *num_bytes, u64 type) +static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct map_lookup **map_ret, + u64 *num_bytes, u64 *stripe_size, + u64 start, u64 type) { - u64 dev_offset; struct btrfs_fs_info *info = extent_root->fs_info; - struct btrfs_root *chunk_root = extent_root->fs_info->chunk_root; - struct btrfs_path *path; - struct btrfs_stripe *stripes; struct btrfs_device *device = NULL; - struct btrfs_chunk *chunk; - struct list_head private_devs; - struct list_head *dev_list; + struct btrfs_fs_devices *fs_devices = info->fs_devices; struct list_head *cur; + struct map_lookup *map = NULL; struct extent_map_tree *em_tree; - struct map_lookup *map; struct extent_map *em; + struct list_head private_devs; int min_stripe_size = 1 * 1024 * 1024; - u64 physical; u64 calc_size = 1024 * 1024 * 1024; u64 max_chunk_size = calc_size; u64 min_free; u64 avail; u64 max_avail = 0; - u64 percent_max; + u64 dev_offset; int num_stripes = 1; int min_stripes = 1; int sub_stripes = 0; @@ -1594,19 +1941,17 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, int ret; int index; int stripe_len = 64 * 1024; - struct btrfs_key key; if ((type & BTRFS_BLOCK_GROUP_RAID1) && (type & BTRFS_BLOCK_GROUP_DUP)) { WARN_ON(1); type &= ~BTRFS_BLOCK_GROUP_DUP; } - dev_list = &extent_root->fs_info->fs_devices->alloc_list; - if (list_empty(dev_list)) + if (list_empty(&fs_devices->alloc_list)) return -ENOSPC; if (type & (BTRFS_BLOCK_GROUP_RAID0)) { - num_stripes = extent_root->fs_info->fs_devices->open_devices; + num_stripes = fs_devices->rw_devices; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_DUP)) { @@ -1614,14 +1959,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID1)) { - num_stripes = min_t(u64, 2, - extent_root->fs_info->fs_devices->open_devices); + num_stripes = min_t(u64, 2, fs_devices->rw_devices); if (num_stripes < 2) return -ENOSPC; min_stripes = 2; } if (type & (BTRFS_BLOCK_GROUP_RAID10)) { - num_stripes = extent_root->fs_info->fs_devices->open_devices; + num_stripes = fs_devices->rw_devices; if (num_stripes < 4) return -ENOSPC; num_stripes &= ~(u32)1; @@ -1641,15 +1985,19 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans, min_stripe_size = 1 * 1024 * 1024; } - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - /* we don't want a chunk larger than 10% of the FS */ - percent_max = div_factor(btrfs_super_total_bytes(&info->super_copy), 1); - max_chunk_size = min(percent_max, max_chunk_size); + /* we don't want a chunk larger than 10% of writeable space */ + max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1) |