aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/volumes.c
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2008-10-16 10:12:58 -0400
committerChris Mason <chris.mason@oracle.com>2008-10-16 10:12:58 -0400
commit26ce34a9c47334ff7984769e4661b2f1883594ff (patch)
treefbc1da082a0b9f4adc7a2560811bbd6e6c47e420 /fs/btrfs/volumes.c
parent3fa8749e584b55f1180411ab1b51117190bac1e5 (diff)
parent37d3cdddf5378606f6eab99982ca16819745d8f4 (diff)
Merge branch 'master' of ssh://mason@master.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
Diffstat (limited to 'fs/btrfs/volumes.c')
-rw-r--r--fs/btrfs/volumes.c2549
1 files changed, 2549 insertions, 0 deletions
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
new file mode 100644
index 00000000000..2eed7f91f51
--- /dev/null
+++ b/fs/btrfs/volumes.c
@@ -0,0 +1,2549 @@
+/*
+ * Copyright (C) 2007 Oracle. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/sched.h>
+#include <linux/bio.h>
+#include <linux/buffer_head.h>
+#include <linux/blkdev.h>
+#include <linux/random.h>
+#include <asm/div64.h>
+#include "ctree.h"
+#include "extent_map.h"
+#include "disk-io.h"
+#include "transaction.h"
+#include "print-tree.h"
+#include "volumes.h"
+#include "async-thread.h"
+
+struct map_lookup {
+ u64 type;
+ int io_align;
+ int io_width;
+ int stripe_len;
+ int sector_size;
+ int num_stripes;
+ int sub_stripes;
+ struct btrfs_bio_stripe stripes[];
+};
+
+#define map_lookup_size(n) (sizeof(struct map_lookup) + \
+ (sizeof(struct btrfs_bio_stripe) * (n)))
+
+static DEFINE_MUTEX(uuid_mutex);
+static LIST_HEAD(fs_uuids);
+
+void btrfs_lock_volumes(void)
+{
+ mutex_lock(&uuid_mutex);
+}
+
+void btrfs_unlock_volumes(void)
+{
+ mutex_unlock(&uuid_mutex);
+}
+
+static void lock_chunks(struct btrfs_root *root)
+{
+ mutex_lock(&root->fs_info->alloc_mutex);
+ mutex_lock(&root->fs_info->chunk_mutex);
+}
+
+static void unlock_chunks(struct btrfs_root *root)
+{
+ mutex_unlock(&root->fs_info->chunk_mutex);
+ mutex_unlock(&root->fs_info->alloc_mutex);
+}
+
+int btrfs_cleanup_fs_uuids(void)
+{
+ struct btrfs_fs_devices *fs_devices;
+ struct list_head *uuid_cur;
+ struct list_head *devices_cur;
+ struct btrfs_device *dev;
+
+ list_for_each(uuid_cur, &fs_uuids) {
+ fs_devices = list_entry(uuid_cur, struct btrfs_fs_devices,
+ list);
+ while(!list_empty(&fs_devices->devices)) {
+ devices_cur = fs_devices->devices.next;
+ dev = list_entry(devices_cur, struct btrfs_device,
+ dev_list);
+ if (dev->bdev) {
+ close_bdev_excl(dev->bdev);
+ fs_devices->open_devices--;
+ }
+ list_del(&dev->dev_list);
+ kfree(dev->name);
+ kfree(dev);
+ }
+ }
+ return 0;
+}
+
+static noinline struct btrfs_device *__find_device(struct list_head *head,
+ u64 devid, u8 *uuid)
+{
+ struct btrfs_device *dev;
+ struct list_head *cur;
+
+ list_for_each(cur, head) {
+ dev = list_entry(cur, struct btrfs_device, dev_list);
+ if (dev->devid == devid &&
+ (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
+ return dev;
+ }
+ }
+ return NULL;
+}
+
+static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
+{
+ struct list_head *cur;
+ struct btrfs_fs_devices *fs_devices;
+
+ list_for_each(cur, &fs_uuids) {
+ fs_devices = list_entry(cur, struct btrfs_fs_devices, list);
+ if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
+ return fs_devices;
+ }
+ return NULL;
+}
+
+/*
+ * we try to collect pending bios for a device so we don't get a large
+ * number of procs sending bios down to the same device. This greatly
+ * improves the schedulers ability to collect and merge the bios.
+ *
+ * But, it also turns into a long list of bios to process and that is sure
+ * to eventually make the worker thread block. The solution here is to
+ * make some progress and then put this work struct back at the end of
+ * the list if the block device is congested. This way, multiple devices
+ * can make progress from a single worker thread.
+ */
+static int noinline run_scheduled_bios(struct btrfs_device *device)
+{
+ struct bio *pending;
+ struct backing_dev_info *bdi;
+ struct btrfs_fs_info *fs_info;
+ struct bio *tail;
+ struct bio *cur;
+ int again = 0;
+ unsigned long num_run = 0;
+ unsigned long limit;
+
+ bdi = device->bdev->bd_inode->i_mapping->backing_dev_info;
+ fs_info = device->dev_root->fs_info;
+ limit = btrfs_async_submit_limit(fs_info);
+ limit = limit * 2 / 3;
+
+loop:
+ spin_lock(&device->io_lock);
+
+ /* take all the bios off the list at once and process them
+ * later on (without the lock held). But, remember the
+ * tail and other pointers so the bios can be properly reinserted
+ * into the list if we hit congestion
+ */
+ pending = device->pending_bios;
+ tail = device->pending_bio_tail;
+ WARN_ON(pending && !tail);
+ device->pending_bios = NULL;
+ device->pending_bio_tail = NULL;
+
+ /*
+ * if pending was null this time around, no bios need processing
+ * at all and we can stop. Otherwise it'll loop back up again
+ * and do an additional check so no bios are missed.
+ *
+ * device->running_pending is used to synchronize with the
+ * schedule_bio code.
+ */
+ if (pending) {
+ again = 1;
+ device->running_pending = 1;
+ } else {
+ again = 0;
+ device->running_pending = 0;
+ }
+ spin_unlock(&device->io_lock);
+
+ while(pending) {
+ cur = pending;
+ pending = pending->bi_next;
+ cur->bi_next = NULL;
+ atomic_dec(&fs_info->nr_async_bios);
+
+ if (atomic_read(&fs_info->nr_async_bios) < limit &&
+ waitqueue_active(&fs_info->async_submit_wait))
+ wake_up(&fs_info->async_submit_wait);
+
+ BUG_ON(atomic_read(&cur->bi_cnt) == 0);
+ bio_get(cur);
+ submit_bio(cur->bi_rw, cur);
+ bio_put(cur);
+ num_run++;
+
+ /*
+ * we made progress, there is more work to do and the bdi
+ * is now congested. Back off and let other work structs
+ * run instead
+ */
+ if (pending && bdi_write_congested(bdi)) {
+ struct bio *old_head;
+
+ spin_lock(&device->io_lock);
+
+ old_head = device->pending_bios;
+ device->pending_bios = pending;
+ if (device->pending_bio_tail)
+ tail->bi_next = old_head;
+ else
+ device->pending_bio_tail = tail;
+
+ spin_unlock(&device->io_lock);
+ btrfs_requeue_work(&device->work);
+ goto done;
+ }
+ }
+ if (again)
+ goto loop;
+done:
+ return 0;
+}
+
+void pending_bios_fn(struct btrfs_work *work)
+{
+ struct btrfs_device *device;
+
+ device = container_of(work, struct btrfs_device, work);
+ run_scheduled_bios(device);
+}
+
+static noinline int device_list_add(const char *path,
+ struct btrfs_super_block *disk_super,
+ u64 devid, struct btrfs_fs_devices **fs_devices_ret)
+{
+ struct btrfs_device *device;
+ struct btrfs_fs_devices *fs_devices;
+ u64 found_transid = btrfs_super_generation(disk_super);
+
+ fs_devices = find_fsid(disk_super->fsid);
+ if (!fs_devices) {
+ fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
+ if (!fs_devices)
+ return -ENOMEM;
+ INIT_LIST_HEAD(&fs_devices->devices);
+ INIT_LIST_HEAD(&fs_devices->alloc_list);
+ list_add(&fs_devices->list, &fs_uuids);
+ memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
+ fs_devices->latest_devid = devid;
+ fs_devices->latest_trans = found_transid;
+ device = NULL;
+ } else {
+ device = __find_device(&fs_devices->devices, devid,
+ disk_super->dev_item.uuid);
+ }
+ if (!device) {
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ if (!device) {
+ /* we can safely leave the fs_devices entry around */
+ return -ENOMEM;
+ }
+ device->devid = devid;
+ device->work.func = pending_bios_fn;
+ memcpy(device->uuid, disk_super->dev_item.uuid,
+ BTRFS_UUID_SIZE);
+ device->barriers = 1;
+ spin_lock_init(&device->io_lock);
+ device->name = kstrdup(path, GFP_NOFS);
+ if (!device->name) {
+ kfree(device);
+ return -ENOMEM;
+ }
+ list_add(&device->dev_list, &fs_devices->devices);
+ list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
+ fs_devices->num_devices++;
+ }
+
+ if (found_transid > fs_devices->latest_trans) {
+ fs_devices->latest_devid = devid;
+ fs_devices->latest_trans = found_transid;
+ }
+ *fs_devices_ret = fs_devices;
+ return 0;
+}
+
+int btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices)
+{
+ struct list_head *head = &fs_devices->devices;
+ struct list_head *cur;
+ struct btrfs_device *device;
+
+ mutex_lock(&uuid_mutex);
+again:
+ list_for_each(cur, head) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+ if (!device->in_fs_metadata) {
+ struct block_device *bdev;
+ list_del(&device->dev_list);
+ list_del(&device->dev_alloc_list);
+ fs_devices->num_devices--;
+ if (device->bdev) {
+ bdev = device->bdev;
+ fs_devices->open_devices--;
+ mutex_unlock(&uuid_mutex);
+ close_bdev_excl(bdev);
+ mutex_lock(&uuid_mutex);
+ }
+ kfree(device->name);
+ kfree(device);
+ goto again;
+ }
+ }
+ mutex_unlock(&uuid_mutex);
+ return 0;
+}
+
+int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
+{
+ struct list_head *head = &fs_devices->devices;
+ struct list_head *cur;
+ struct btrfs_device *device;
+
+ mutex_lock(&uuid_mutex);
+ list_for_each(cur, head) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+ if (device->bdev) {
+ close_bdev_excl(device->bdev);
+ fs_devices->open_devices--;
+ }
+ device->bdev = NULL;
+ device->in_fs_metadata = 0;
+ }
+ fs_devices->mounted = 0;
+ mutex_unlock(&uuid_mutex);
+ return 0;
+}
+
+int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
+ int flags, void *holder)
+{
+ struct block_device *bdev;
+ struct list_head *head = &fs_devices->devices;
+ struct list_head *cur;
+ struct btrfs_device *device;
+ struct block_device *latest_bdev = NULL;
+ struct buffer_head *bh;
+ struct btrfs_super_block *disk_super;
+ u64 latest_devid = 0;
+ u64 latest_transid = 0;
+ u64 transid;
+ u64 devid;
+ int ret = 0;
+
+ mutex_lock(&uuid_mutex);
+ if (fs_devices->mounted)
+ goto out;
+
+ list_for_each(cur, head) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+ if (device->bdev)
+ continue;
+
+ if (!device->name)
+ continue;
+
+ bdev = open_bdev_excl(device->name, flags, holder);
+
+ if (IS_ERR(bdev)) {
+ printk("open %s failed\n", device->name);
+ goto error;
+ }
+ set_blocksize(bdev, 4096);
+
+ bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
+ if (!bh)
+ goto error_close;
+
+ disk_super = (struct btrfs_super_block *)bh->b_data;
+ if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
+ sizeof(disk_super->magic)))
+ goto error_brelse;
+
+ devid = le64_to_cpu(disk_super->dev_item.devid);
+ if (devid != device->devid)
+ goto error_brelse;
+
+ transid = btrfs_super_generation(disk_super);
+ if (!latest_transid || transid > latest_transid) {
+ latest_devid = devid;
+ latest_transid = transid;
+ latest_bdev = bdev;
+ }
+
+ device->bdev = bdev;
+ device->in_fs_metadata = 0;
+ fs_devices->open_devices++;
+ continue;
+
+error_brelse:
+ brelse(bh);
+error_close:
+ close_bdev_excl(bdev);
+error:
+ continue;
+ }
+ if (fs_devices->open_devices == 0) {
+ ret = -EIO;
+ goto out;
+ }
+ fs_devices->mounted = 1;
+ fs_devices->latest_bdev = latest_bdev;
+ fs_devices->latest_devid = latest_devid;
+ fs_devices->latest_trans = latest_transid;
+out:
+ mutex_unlock(&uuid_mutex);
+ return ret;
+}
+
+int btrfs_scan_one_device(const char *path, int flags, void *holder,
+ struct btrfs_fs_devices **fs_devices_ret)
+{
+ struct btrfs_super_block *disk_super;
+ struct block_device *bdev;
+ struct buffer_head *bh;
+ int ret;
+ u64 devid;
+ u64 transid;
+
+ mutex_lock(&uuid_mutex);
+
+ bdev = open_bdev_excl(path, flags, holder);
+
+ if (IS_ERR(bdev)) {
+ ret = PTR_ERR(bdev);
+ goto error;
+ }
+
+ ret = set_blocksize(bdev, 4096);
+ if (ret)
+ goto error_close;
+ bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
+ if (!bh) {
+ ret = -EIO;
+ goto error_close;
+ }
+ disk_super = (struct btrfs_super_block *)bh->b_data;
+ if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
+ sizeof(disk_super->magic))) {
+ ret = -EINVAL;
+ goto error_brelse;
+ }
+ devid = le64_to_cpu(disk_super->dev_item.devid);
+ transid = btrfs_super_generation(disk_super);
+ if (disk_super->label[0])
+ printk("device label %s ", disk_super->label);
+ else {
+ /* FIXME, make a readl uuid parser */
+ printk("device fsid %llx-%llx ",
+ *(unsigned long long *)disk_super->fsid,
+ *(unsigned long long *)(disk_super->fsid + 8));
+ }
+ printk("devid %Lu transid %Lu %s\n", devid, transid, path);
+ ret = device_list_add(path, disk_super, devid, fs_devices_ret);
+
+error_brelse:
+ brelse(bh);
+error_close:
+ close_bdev_excl(bdev);
+error:
+ mutex_unlock(&uuid_mutex);
+ return ret;
+}
+
+/*
+ * this uses a pretty simple search, the expectation is that it is
+ * called very infrequently and that a given device has a small number
+ * of extents
+ */
+static noinline int find_free_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ struct btrfs_path *path,
+ u64 num_bytes, u64 *start)
+{
+ struct btrfs_key key;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_dev_extent *dev_extent = NULL;
+ u64 hole_size = 0;
+ u64 last_byte = 0;
+ u64 search_start = 0;
+ u64 search_end = device->total_bytes;
+ int ret;
+ int slot = 0;
+ int start_found;
+ struct extent_buffer *l;
+
+ start_found = 0;
+ path->reada = 2;
+
+ /* FIXME use last free of some kind */
+
+ /* we don't want to overwrite the superblock on the drive,
+ * so we make sure to start at an offset of at least 1MB
+ */
+ search_start = max((u64)1024 * 1024, search_start);
+
+ if (root->fs_info->alloc_start + num_bytes <= device->total_bytes)
+ search_start = max(root->fs_info->alloc_start, search_start);
+
+ key.objectid = device->devid;
+ key.offset = search_start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+ ret = btrfs_previous_item(root, path, 0, key.type);
+ if (ret < 0)
+ goto error;
+ l = path->nodes[0];
+ btrfs_item_key_to_cpu(l, &key, path->slots[0]);
+ while (1) {
+ l = path->nodes[0];
+ slot = path->slots[0];
+ if (slot >= btrfs_header_nritems(l)) {
+ ret = btrfs_next_leaf(root, path);
+ if (ret == 0)
+ continue;
+ if (ret < 0)
+ goto error;
+no_more_items:
+ if (!start_found) {
+ if (search_start >= search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ *start = search_start;
+ start_found = 1;
+ goto check_pending;
+ }
+ *start = last_byte > search_start ?
+ last_byte : search_start;
+ if (search_end <= *start) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ goto check_pending;
+ }
+ btrfs_item_key_to_cpu(l, &key, slot);
+
+ if (key.objectid < device->devid)
+ goto next;
+
+ if (key.objectid > device->devid)
+ goto no_more_items;
+
+ if (key.offset >= search_start && key.offset > last_byte &&
+ start_found) {
+ if (last_byte < search_start)
+ last_byte = search_start;
+ hole_size = key.offset - last_byte;
+ if (key.offset > last_byte &&
+ hole_size >= num_bytes) {
+ *start = last_byte;
+ goto check_pending;
+ }
+ }
+ if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) {
+ goto next;
+ }
+
+ start_found = 1;
+ dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
+ last_byte = key.offset + btrfs_dev_extent_length(l, dev_extent);
+next:
+ path->slots[0]++;
+ cond_resched();
+ }
+check_pending:
+ /* we have to make sure we didn't find an extent that has already
+ * been allocated by the map tree or the original allocation
+ */
+ btrfs_release_path(root, path);
+ BUG_ON(*start < search_start);
+
+ if (*start + num_bytes > search_end) {
+ ret = -ENOSPC;
+ goto error;
+ }
+ /* check for pending inserts here */
+ return 0;
+
+error:
+ btrfs_release_path(root, path);
+ return ret;
+}
+
+int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ u64 start)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf = NULL;
+ struct btrfs_dev_extent *extent = NULL;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = device->devid;
+ key.offset = start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0) {
+ ret = btrfs_previous_item(root, path, key.objectid,
+ BTRFS_DEV_EXTENT_KEY);
+ BUG_ON(ret);
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ extent = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dev_extent);
+ BUG_ON(found_key.offset > start || found_key.offset +
+ btrfs_dev_extent_length(leaf, extent) < start);
+ ret = 0;
+ } else if (ret == 0) {
+ leaf = path->nodes[0];
+ extent = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dev_extent);
+ }
+ BUG_ON(ret);
+
+ if (device->bytes_used > 0)
+ device->bytes_used -= btrfs_dev_extent_length(leaf, extent);
+ ret = btrfs_del_item(trans, root, path);
+ BUG_ON(ret);
+
+ btrfs_free_path(path);
+ return ret;
+}
+
+int noinline btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device,
+ u64 chunk_tree, u64 chunk_objectid,
+ u64 chunk_offset,
+ u64 num_bytes, u64 *start)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_root *root = device->dev_root;
+ struct btrfs_dev_extent *extent;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ WARN_ON(!device->in_fs_metadata);
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = find_free_dev_extent(trans, device, path, num_bytes, start);
+ if (ret) {
+ goto err;
+ }
+
+ key.objectid = device->devid;
+ key.offset = *start;
+ key.type = BTRFS_DEV_EXTENT_KEY;
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*extent));
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ extent = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_dev_extent);
+ btrfs_set_dev_extent_chunk_tree(leaf, extent, chunk_tree);
+ btrfs_set_dev_extent_chunk_objectid(leaf, extent, chunk_objectid);
+ btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
+
+ write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
+ (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
+ BTRFS_UUID_SIZE);
+
+ btrfs_set_dev_extent_length(leaf, extent, num_bytes);
+ btrfs_mark_buffer_dirty(leaf);
+err:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static noinline int find_next_chunk(struct btrfs_root *root,
+ u64 objectid, u64 *offset)
+{
+ struct btrfs_path *path;
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_chunk *chunk;
+ struct btrfs_key found_key;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+
+ key.objectid = objectid;
+ key.offset = (u64)-1;
+ key.type = BTRFS_CHUNK_ITEM_KEY;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+
+ BUG_ON(ret == 0);
+
+ ret = btrfs_previous_item(root, path, 0, BTRFS_CHUNK_ITEM_KEY);
+ if (ret) {
+ *offset = 0;
+ } else {
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ if (found_key.objectid != objectid)
+ *offset = 0;
+ else {
+ chunk = btrfs_item_ptr(path->nodes[0], path->slots[0],
+ struct btrfs_chunk);
+ *offset = found_key.offset +
+ btrfs_chunk_length(path->nodes[0], chunk);
+ }
+ }
+ ret = 0;
+error:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static noinline int find_next_devid(struct btrfs_root *root,
+ struct btrfs_path *path, u64 *objectid)
+{
+ int ret;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = (u64)-1;
+
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0)
+ goto error;
+
+ BUG_ON(ret == 0);
+
+ ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+ BTRFS_DEV_ITEM_KEY);
+ if (ret) {
+ *objectid = 1;
+ } else {
+ btrfs_item_key_to_cpu(path->nodes[0], &found_key,
+ path->slots[0]);
+ *objectid = found_key.offset + 1;
+ }
+ ret = 0;
+error:
+ btrfs_release_path(root, path);
+ return ret;
+}
+
+/*
+ * the device information is stored in the chunk root
+ * the btrfs_device struct should be fully filled in
+ */
+int btrfs_add_device(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_device *device)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_dev_item *dev_item;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+ unsigned long ptr;
+ u64 free_devid = 0;
+
+ root = root->fs_info->chunk_root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ ret = find_next_devid(root, path, &free_devid);
+ if (ret)
+ goto out;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = free_devid;
+
+ ret = btrfs_insert_empty_item(trans, root, path, &key,
+ sizeof(*dev_item));
+ if (ret)
+ goto out;
+
+ leaf = path->nodes[0];
+ dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
+
+ device->devid = free_devid;
+ btrfs_set_device_id(leaf, dev_item, device->devid);
+ btrfs_set_device_type(leaf, dev_item, device->type);
+ btrfs_set_device_io_align(leaf, dev_item, device->io_align);
+ btrfs_set_device_io_width(leaf, dev_item, device->io_width);
+ btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
+ btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+ btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+ btrfs_set_device_group(leaf, dev_item, 0);
+ btrfs_set_device_seek_speed(leaf, dev_item, 0);
+ btrfs_set_device_bandwidth(leaf, dev_item, 0);
+
+ ptr = (unsigned long)btrfs_device_uuid(dev_item);
+ write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
+ btrfs_mark_buffer_dirty(leaf);
+ ret = 0;
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int btrfs_rm_dev_item(struct btrfs_root *root,
+ struct btrfs_device *device)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct block_device *bdev = device->bdev;
+ struct btrfs_device *next_dev;
+ struct btrfs_key key;
+ u64 total_bytes;
+ struct btrfs_fs_devices *fs_devices;
+ struct btrfs_trans_handle *trans;
+
+ root = root->fs_info->chunk_root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ trans = btrfs_start_transaction(root, 1);
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = device->devid;
+ lock_chunks(root);
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret < 0)
+ goto out;
+
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = btrfs_del_item(trans, root, path);
+ if (ret)
+ goto out;
+
+ /*
+ * at this point, the device is zero sized. We want to
+ * remove it from the devices list and zero out the old super
+ */
+ list_del_init(&device->dev_list);
+ list_del_init(&device->dev_alloc_list);
+ fs_devices = root->fs_info->fs_devices;
+
+ next_dev = list_entry(fs_devices->devices.next, struct btrfs_device,
+ dev_list);
+ if (bdev == root->fs_info->sb->s_bdev)
+ root->fs_info->sb->s_bdev = next_dev->bdev;
+ if (bdev == fs_devices->latest_bdev)
+ fs_devices->latest_bdev = next_dev->bdev;
+
+ total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
+ btrfs_set_super_num_devices(&root->fs_info->super_copy,
+ total_bytes - 1);
+out:
+ btrfs_free_path(path);
+ unlock_chunks(root);
+ btrfs_commit_transaction(trans, root);
+ return ret;
+}
+
+int btrfs_rm_device(struct btrfs_root *root, char *device_path)
+{
+ struct btrfs_device *device;
+ struct block_device *bdev;
+ struct buffer_head *bh = NULL;
+ struct btrfs_super_block *disk_super;
+ u64 all_avail;
+ u64 devid;
+ int ret = 0;
+
+ mutex_lock(&uuid_mutex);
+ mutex_lock(&root->fs_info->volume_mutex);
+
+ all_avail = root->fs_info->avail_data_alloc_bits |
+ root->fs_info->avail_system_alloc_bits |
+ root->fs_info->avail_metadata_alloc_bits;
+
+ if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
+ btrfs_super_num_devices(&root->fs_info->super_copy) <= 4) {
+ printk("btrfs: unable to go below four devices on raid10\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
+ btrfs_super_num_devices(&root->fs_info->super_copy) <= 2) {
+ printk("btrfs: unable to go below two devices on raid1\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (strcmp(device_path, "missing") == 0) {
+ struct list_head *cur;
+ struct list_head *devices;
+ struct btrfs_device *tmp;
+
+ device = NULL;
+ devices = &root->fs_info->fs_devices->devices;
+ list_for_each(cur, devices) {
+ tmp = list_entry(cur, struct btrfs_device, dev_list);
+ if (tmp->in_fs_metadata && !tmp->bdev) {
+ device = tmp;
+ break;
+ }
+ }
+ bdev = NULL;
+ bh = NULL;
+ disk_super = NULL;
+ if (!device) {
+ printk("btrfs: no missing devices found to remove\n");
+ goto out;
+ }
+
+ } else {
+ bdev = open_bdev_excl(device_path, 0,
+ root->fs_info->bdev_holder);
+ if (IS_ERR(bdev)) {
+ ret = PTR_ERR(bdev);
+ goto out;
+ }
+
+ bh = __bread(bdev, BTRFS_SUPER_INFO_OFFSET / 4096, 4096);
+ if (!bh) {
+ ret = -EIO;
+ goto error_close;
+ }
+ disk_super = (struct btrfs_super_block *)bh->b_data;
+ if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
+ sizeof(disk_super->magic))) {
+ ret = -ENOENT;
+ goto error_brelse;
+ }
+ if (memcmp(disk_super->fsid, root->fs_info->fsid,
+ BTRFS_FSID_SIZE)) {
+ ret = -ENOENT;
+ goto error_brelse;
+ }
+ devid = le64_to_cpu(disk_super->dev_item.devid);
+ device = btrfs_find_device(root, devid, NULL);
+ if (!device) {
+ ret = -ENOENT;
+ goto error_brelse;
+ }
+
+ }
+ root->fs_info->fs_devices->num_devices--;
+ root->fs_info->fs_devices->open_devices--;
+
+ ret = btrfs_shrink_device(device, 0);
+ if (ret)
+ goto error_brelse;
+
+
+ ret = btrfs_rm_dev_item(root->fs_info->chunk_root, device);
+ if (ret)
+ goto error_brelse;
+
+ if (bh) {
+ /* make sure this device isn't detected as part of
+ * the FS anymore
+ */
+ memset(&disk_super->magic, 0, sizeof(disk_super->magic));
+ set_buffer_dirty(bh);
+ sync_dirty_buffer(bh);
+
+ brelse(bh);
+ }
+
+ if (device->bdev) {
+ /* one close for the device struct or super_block */
+ close_bdev_excl(device->bdev);
+ }
+ if (bdev) {
+ /* one close for us */
+ close_bdev_excl(bdev);
+ }
+ kfree(device->name);
+ kfree(device);
+ ret = 0;
+ goto out;
+
+error_brelse:
+ brelse(bh);
+error_close:
+ if (bdev)
+ close_bdev_excl(bdev);
+out:
+ mutex_unlock(&root->fs_info->volume_mutex);
+ mutex_unlock(&uuid_mutex);
+ return ret;
+}
+
+int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
+{
+ struct btrfs_trans_handle *trans;
+ struct btrfs_device *device;
+ struct block_device *bdev;
+ struct list_head *cur;
+ struct list_head *devices;
+ u64 total_bytes;
+ int ret = 0;
+
+
+ bdev = open_bdev_excl(device_path, 0, root->fs_info->bdev_holder);
+ if (!bdev) {
+ return -EIO;
+ }
+
+ filemap_write_and_wait(bdev->bd_inode->i_mapping);
+ mutex_lock(&root->fs_info->volume_mutex);
+
+ trans = btrfs_start_transaction(root, 1);
+ lock_chunks(root);
+ devices = &root->fs_info->fs_devices->devices;
+ list_for_each(cur, devices) {
+ device = list_entry(cur, struct btrfs_device, dev_list);
+ if (device->bdev == bdev) {
+ ret = -EEXIST;
+ goto out;
+ }
+ }
+
+ device = kzalloc(sizeof(*device), GFP_NOFS);
+ if (!device) {
+ /* we can safely leave the fs_devices entry around */
+ ret = -ENOMEM;
+ goto out_close_bdev;
+ }
+
+ device->barriers = 1;
+ device->work.func = pending_bios_fn;
+ generate_random_uuid(device->uuid);
+ spin_lock_init(&device->io_lock);
+ device->name = kstrdup(device_path, GFP_NOFS);
+ if (!device->name) {
+ kfree(device);
+ goto out_close_bdev;
+ }
+ device->io_width = root->sectorsize;
+ device->io_align = root->sectorsize;
+ device->sector_size = root->sectorsize;
+ device->total_bytes = i_size_read(bdev->bd_inode);
+ device->dev_root = root->fs_info->dev_root;
+ device->bdev = bdev;
+ device->in_fs_metadata = 1;
+
+ ret = btrfs_add_device(trans, root, device);
+ if (ret)
+ goto out_close_bdev;
+
+ set_blocksize(device->bdev, 4096);
+
+ total_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
+ btrfs_set_super_total_bytes(&root->fs_info->super_copy,
+ total_bytes + device->total_bytes);
+
+ total_bytes = btrfs_super_num_devices(&root->fs_info->super_copy);
+ btrfs_set_super_num_devices(&root->fs_info->super_copy,
+ total_bytes + 1);
+
+ list_add(&device->dev_list, &root->fs_info->fs_devices->devices);
+ list_add(&device->dev_alloc_list,
+ &root->fs_info->fs_devices->alloc_list);
+ root->fs_info->fs_devices->num_devices++;
+ root->fs_info->fs_devices->open_devices++;
+out:
+ unlock_chunks(root);
+ btrfs_end_transaction(trans, root);
+ mutex_unlock(&root->fs_info->volume_mutex);
+
+ return ret;
+
+out_close_bdev:
+ close_bdev_excl(bdev);
+ goto out;
+}
+
+int noinline btrfs_update_device(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device)
+{
+ int ret;
+ struct btrfs_path *path;
+ struct btrfs_root *root;
+ struct btrfs_dev_item *dev_item;
+ struct extent_buffer *leaf;
+ struct btrfs_key key;
+
+ root = device->dev_root->fs_info->chunk_root;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+ key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
+ key.type = BTRFS_DEV_ITEM_KEY;
+ key.offset = device->devid;
+
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ if (ret < 0)
+ goto out;
+
+ if (ret > 0) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ leaf = path->nodes[0];
+ dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
+
+ btrfs_set_device_id(leaf, dev_item, device->devid);
+ btrfs_set_device_type(leaf, dev_item, device->type);
+ btrfs_set_device_io_align(leaf, dev_item, device->io_align);
+ btrfs_set_device_io_width(leaf, dev_item, device->io_width);
+ btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
+ btrfs_set_device_total_bytes(leaf, dev_item, device->total_bytes);
+ btrfs_set_device_bytes_used(leaf, dev_item, device->bytes_used);
+ btrfs_mark_buffer_dirty(leaf);
+
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+static int __btrfs_grow_device(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 new_size)
+{
+ struct btrfs_super_block *super_copy =
+ &device->dev_root->fs_info->super_copy;
+ u64 old_total = btrfs_super_total_bytes(super_copy);
+ u64 diff = new_size - device->total_bytes;
+
+ btrfs_set_super_total_bytes(super_copy, old_total + diff);
+ return btrfs_update_device(trans, device);
+}
+
+int btrfs_grow_device(struct btrfs_trans_handle *trans,
+ struct btrfs_device *device, u64 new_size)
+{
+ int ret;
+ lock_chunks(device->dev_root);
+ ret = __btrfs_grow_device(trans, device, new_size);
+ unlock_chunks(device->dev_root);
+ return ret;
+}
+
+static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,