aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c1116
1 files changed, 643 insertions, 473 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index a8f652dc940..6b092a1c4e3 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -30,6 +30,7 @@
#include <linux/slab.h>
#include <linux/migrate.h>
#include <linux/ratelimit.h>
+#include <linux/uuid.h>
#include <asm/unaligned.h>
#include "compat.h"
#include "ctree.h"
@@ -46,6 +47,7 @@
#include "check-integrity.h"
#include "rcu-string.h"
#include "dev-replace.h"
+#include "raid56.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -56,17 +58,20 @@ static void end_workqueue_fn(struct btrfs_work *work);
static void free_fs_root(struct btrfs_root *root);
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root);
+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+ struct btrfs_root *root);
static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
struct btrfs_root *root);
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t);
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t);
static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root);
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
struct extent_io_tree *dirty_pages,
int mark);
static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
struct extent_io_tree *pinned_extents);
+static int btrfs_cleanup_transaction(struct btrfs_root *root);
+static void btrfs_error_commit_super(struct btrfs_root *root);
/*
* end_io_wq structs are used to do processing in task context when an IO is
@@ -147,7 +152,7 @@ static struct btrfs_lockdep_keyset {
{ .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" },
{ .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" },
{ .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" },
- { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" },
+ { .id = BTRFS_QUOTA_TREE_OBJECTID, .name_stem = "quota" },
{ .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" },
{ .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" },
{ .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" },
@@ -220,7 +225,7 @@ static struct extent_map *btree_get_extent(struct inode *inode,
em->bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
write_lock(&em_tree->lock);
- ret = add_extent_mapping(em_tree, em);
+ ret = add_extent_mapping(em_tree, em, 0);
if (ret == -EEXIST) {
free_extent_map(em);
em = lookup_extent_mapping(em_tree, start, len);
@@ -236,7 +241,7 @@ out:
return em;
}
-u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len)
+u32 btrfs_csum_data(char *data, u32 seed, size_t len)
{
return crc32c(seed, data, len);
}
@@ -272,7 +277,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
if (err)
return 1;
cur_len = min(len, map_len - (offset - map_start));
- crc = btrfs_csum_data(root, kaddr + offset - map_start,
+ crc = btrfs_csum_data(kaddr + offset - map_start,
crc, cur_len);
len -= cur_len;
offset += cur_len;
@@ -352,6 +357,49 @@ out:
}
/*
+ * Return 0 if the superblock checksum type matches the checksum value of that
+ * algorithm. Pass the raw disk superblock data.
+ */
+static int btrfs_check_super_csum(char *raw_disk_sb)
+{
+ struct btrfs_super_block *disk_sb =
+ (struct btrfs_super_block *)raw_disk_sb;
+ u16 csum_type = btrfs_super_csum_type(disk_sb);
+ int ret = 0;
+
+ if (csum_type == BTRFS_CSUM_TYPE_CRC32) {
+ u32 crc = ~(u32)0;
+ const int csum_size = sizeof(crc);
+ char result[csum_size];
+
+ /*
+ * The super_block structure does not span the whole
+ * BTRFS_SUPER_INFO_SIZE range, we expect that the unused space
+ * is filled with zeros and is included in the checkum.
+ */
+ crc = btrfs_csum_data(raw_disk_sb + BTRFS_CSUM_SIZE,
+ crc, BTRFS_SUPER_INFO_SIZE - BTRFS_CSUM_SIZE);
+ btrfs_csum_final(crc, result);
+
+ if (memcmp(raw_disk_sb, result, csum_size))
+ ret = 1;
+
+ if (ret && btrfs_super_generation(disk_sb) < 10) {
+ printk(KERN_WARNING "btrfs: super block crcs don't match, older mkfs detected\n");
+ ret = 0;
+ }
+ }
+
+ if (csum_type >= ARRAY_SIZE(btrfs_csum_sizes)) {
+ printk(KERN_ERR "btrfs: unsupported checksum algorithm %u\n",
+ csum_type);
+ ret = 1;
+ }
+
+ return ret;
+}
+
+/*
* helper to read a given tree block, doing retries as required when
* the checksums don't match and we have alternate mirrors to try.
*/
@@ -420,7 +468,7 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root,
static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
{
struct extent_io_tree *tree;
- u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
+ u64 start = page_offset(page);
u64 found_start;
struct extent_buffer *eb;
@@ -528,41 +576,6 @@ static noinline int check_leaf(struct btrfs_root *root,
return 0;
}
-struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree,
- struct page *page, int max_walk)
-{
- struct extent_buffer *eb;
- u64 start = page_offset(page);
- u64 target = start;
- u64 min_start;
-
- if (start < max_walk)
- min_start = 0;
- else
- min_start = start - max_walk;
-
- while (start >= min_start) {
- eb = find_extent_buffer(tree, start, 0);
- if (eb) {
- /*
- * we found an extent buffer and it contains our page
- * horray!
- */
- if (eb->start <= target &&
- eb->start + eb->len > target)
- return eb;
-
- /* we found an extent buffer that wasn't for us */
- free_extent_buffer(eb);
- return NULL;
- }
- if (start == 0)
- break;
- start -= PAGE_CACHE_SIZE;
- }
- return NULL;
-}
-
static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
struct extent_state *state, int mirror)
{
@@ -611,6 +624,12 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
goto err;
}
found_level = btrfs_header_level(eb);
+ if (found_level >= BTRFS_MAX_LEVEL) {
+ btrfs_info(root->fs_info, "bad tree block level %d\n",
+ (int)btrfs_header_level(eb));
+ ret = -EIO;
+ goto err;
+ }
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
@@ -634,13 +653,19 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
if (!ret)
set_extent_buffer_uptodate(eb);
err:
- if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) {
- clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags);
+ if (reads_done &&
+ test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, ret);
- }
- if (ret)
+ if (ret) {
+ /*
+ * our io error hook is going to dec the io pages
+ * again, we have to make sure it has something
+ * to decrement
+ */
+ atomic_inc(&eb->io_pages);
clear_extent_buffer_uptodate(eb);
+ }
free_extent_buffer(eb);
out:
return ret;
@@ -654,6 +679,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror)
eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_IOERR, &eb->bflags);
eb->read_mirror = failed_mirror;
+ atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
btree_readahead_hook(root, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */
@@ -670,17 +696,23 @@ static void end_workqueue_bio(struct bio *bio, int err)
end_io_wq->work.flags = 0;
if (bio->bi_rw & REQ_WRITE) {
- if (end_io_wq->metadata == 1)
+ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA)
btrfs_queue_worker(&fs_info->endio_meta_write_workers,
&end_io_wq->work);
- else if (end_io_wq->metadata == 2)
+ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE)
btrfs_queue_worker(&fs_info->endio_freespace_worker,
&end_io_wq->work);
+ else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+ btrfs_queue_worker(&fs_info->endio_raid56_workers,
+ &end_io_wq->work);
else
btrfs_queue_worker(&fs_info->endio_write_workers,
&end_io_wq->work);
} else {
- if (end_io_wq->metadata)
+ if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56)
+ btrfs_queue_worker(&fs_info->endio_raid56_workers,
+ &end_io_wq->work);
+ else if (end_io_wq->metadata)
btrfs_queue_worker(&fs_info->endio_meta_workers,
&end_io_wq->work);
else
@@ -695,6 +727,7 @@ static void end_workqueue_bio(struct bio *bio, int err)
* 0 - if data
* 1 - if normal metadta
* 2 - if writing to the free space cache area
+ * 3 - raid parity work
*/
int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
int metadata)
@@ -946,18 +979,20 @@ static int btree_writepages(struct address_space *mapping,
struct writeback_control *wbc)
{
struct extent_io_tree *tree;
+ struct btrfs_fs_info *fs_info;
+ int ret;
+
tree = &BTRFS_I(mapping->host)->io_tree;
if (wbc->sync_mode == WB_SYNC_NONE) {
- struct btrfs_root *root = BTRFS_I(mapping->host)->root;
- u64 num_dirty;
- unsigned long thresh = 32 * 1024 * 1024;
if (wbc->for_kupdate)
return 0;
+ fs_info = BTRFS_I(mapping->host)->root->fs_info;
/* this is a bit racy, but that's ok */
- num_dirty = root->fs_info->dirty_metadata_bytes;
- if (num_dirty < thresh)
+ ret = percpu_counter_compare(&fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH);
+ if (ret < 0)
return 0;
}
return btree_write_cache_pages(mapping, wbc);
@@ -974,17 +1009,12 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags)
{
if (PageWriteback(page) || PageDirty(page))
return 0;
- /*
- * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing
- * slab allocation from alloc_extent_state down the callchain where
- * it'd hit a BUG_ON as those flags are not allowed.
- */
- gfp_flags &= ~GFP_SLAB_BUG_MASK;
- return try_release_extent_buffer(page, gfp_flags);
+ return try_release_extent_buffer(page);
}
-static void btree_invalidatepage(struct page *page, unsigned long offset)
+static void btree_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
{
struct extent_io_tree *tree;
tree = &BTRFS_I(page->mapping->host)->io_tree;
@@ -1125,24 +1155,16 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct extent_buffer *buf)
{
+ struct btrfs_fs_info *fs_info = root->fs_info;
+
if (btrfs_header_generation(buf) ==
- root->fs_info->running_transaction->transid) {
+ fs_info->running_transaction->transid) {
btrfs_assert_tree_locked(buf);
if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) {
- spin_lock(&root->fs_info->delalloc_lock);
- if (root->fs_info->dirty_metadata_bytes >= buf->len)
- root->fs_info->dirty_metadata_bytes -= buf->len;
- else {
- spin_unlock(&root->fs_info->delalloc_lock);
- btrfs_panic(root->fs_info, -EOVERFLOW,
- "Can't clear %lu bytes from "
- " dirty_mdatadata_bytes (%llu)",
- buf->len,
- root->fs_info->dirty_metadata_bytes);
- }
- spin_unlock(&root->fs_info->delalloc_lock);
-
+ __percpu_counter_add(&fs_info->dirty_metadata_bytes,
+ -buf->len,
+ fs_info->dirty_metadata_batch);
/* ugh, clear_extent_buffer_dirty needs to lock the page */
btrfs_set_lock_blocking(buf);
clear_extent_buffer_dirty(buf);
@@ -1170,6 +1192,8 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->objectid = objectid;
root->last_trans = 0;
root->highest_objectid = 0;
+ root->nr_delalloc_inodes = 0;
+ root->nr_ordered_extents = 0;
root->name = NULL;
root->inode_tree = RB_ROOT;
INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
@@ -1178,9 +1202,19 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
INIT_LIST_HEAD(&root->dirty_list);
INIT_LIST_HEAD(&root->root_list);
+ INIT_LIST_HEAD(&root->delalloc_inodes);
+ INIT_LIST_HEAD(&root->delalloc_root);
+ INIT_LIST_HEAD(&root->ordered_extents);
+ INIT_LIST_HEAD(&root->ordered_root);
+ INIT_LIST_HEAD(&root->logged_list[0]);
+ INIT_LIST_HEAD(&root->logged_list[1]);
spin_lock_init(&root->orphan_lock);
spin_lock_init(&root->inode_lock);
+ spin_lock_init(&root->delalloc_lock);
+ spin_lock_init(&root->ordered_extent_lock);
spin_lock_init(&root->accounting_lock);
+ spin_lock_init(&root->log_extents_lock[0]);
+ spin_lock_init(&root->log_extents_lock[1]);
mutex_init(&root->objectid_mutex);
mutex_init(&root->log_mutex);
init_waitqueue_head(&root->log_writer_wait);
@@ -1191,6 +1225,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
atomic_set(&root->log_writers, 0);
atomic_set(&root->log_batch, 0);
atomic_set(&root->orphan_inodes, 0);
+ atomic_set(&root->refs, 1);
root->log_transid = 0;
root->last_log_commit = 0;
extent_io_tree_init(&root->dirty_log_pages,
@@ -1209,39 +1244,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
spin_lock_init(&root->root_item_lock);
}
-static int __must_check find_and_setup_root(struct btrfs_root *tree_root,
- struct btrfs_fs_info *fs_info,
- u64 objectid,
- struct btrfs_root *root)
-{
- int ret;
- u32 blocksize;
- u64 generation;
-
- __setup_root(tree_root->nodesize, tree_root->leafsize,
- tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, objectid);
- ret = btrfs_find_last_root(tree_root, objectid,
- &root->root_item, &root->root_key);
- if (ret > 0)
- return -ENOENT;
- else if (ret < 0)
- return ret;
-
- generation = btrfs_root_generation(&root->root_item);
- blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
- root->commit_root = NULL;
- root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
- blocksize, generation);
- if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) {
- free_extent_buffer(root->node);
- root->node = NULL;
- return -EIO;
- }
- root->commit_root = btrfs_root_node(root);
- return 0;
-}
-
static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
{
struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
@@ -1260,6 +1262,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
struct btrfs_key key;
int ret = 0;
u64 bytenr;
+ uuid_le uuid;
root = btrfs_alloc_root(fs_info);
if (!root)
@@ -1276,6 +1279,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
0, objectid, NULL, 0, 0, 0);
if (IS_ERR(leaf)) {
ret = PTR_ERR(leaf);
+ leaf = NULL;
goto fail;
}
@@ -1308,6 +1312,8 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_set_root_used(&root->root_item, leaf->len);
btrfs_set_root_last_snapshot(&root->root_item, 0);
btrfs_set_root_dirid(&root->root_item, 0);
+ uuid_le_gen(&uuid);
+ memcpy(root->root_item.uuid, uuid.b, BTRFS_UUID_SIZE);
root->root_item.drop_level = 0;
key.objectid = objectid;
@@ -1319,11 +1325,16 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(leaf);
+ return root;
+
fail:
- if (ret)
- return ERR_PTR(ret);
+ if (leaf) {
+ btrfs_tree_unlock(leaf);
+ free_extent_buffer(leaf);
+ }
+ kfree(root);
- return root;
+ return ERR_PTR(ret);
}
static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
@@ -1417,63 +1428,73 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
return 0;
}
-struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root,
- struct btrfs_key *location)
+struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+ struct btrfs_key *key)
{
struct btrfs_root *root;
struct btrfs_fs_info *fs_info = tree_root->fs_info;
struct btrfs_path *path;
- struct extent_buffer *l;
u64 generation;
u32 blocksize;
- int ret = 0;
- int slot;
+ int ret;
- root = btrfs_alloc_root(fs_info);
- if (!root)
+ path = btrfs_alloc_path();
+ if (!path)
return ERR_PTR(-ENOMEM);
- if (location->offset == (u64)-1) {
- ret = find_and_setup_root(tree_root, fs_info,
- location->objectid, root);
- if (ret) {
- kfree(root);
- return ERR_PTR(ret);
- }
- goto out;
+
+ root = btrfs_alloc_root(fs_info);
+ if (!root) {
+ ret = -ENOMEM;
+ goto alloc_fail;
}
__setup_root(tree_root->nodesize, tree_root->leafsize,
tree_root->sectorsize, tree_root->stripesize,
- root, fs_info, location->objectid);
+ root, fs_info, key->objectid);
- path = btrfs_alloc_path();
- if (!path) {
- kfree(root);
- return ERR_PTR(-ENOMEM);
- }
- ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0);
- if (ret == 0) {
- l = path->nodes[0];
- slot = path->slots[0];
- btrfs_read_root_item(tree_root, l, slot, &root->root_item);
- memcpy(&root->root_key, location, sizeof(*location));
- }
- btrfs_free_path(path);
+ ret = btrfs_find_root(tree_root, key, path,
+ &root->root_item, &root->root_key);
if (ret) {
- kfree(root);
if (ret > 0)
ret = -ENOENT;
- return ERR_PTR(ret);
+ goto find_fail;
}
generation = btrfs_root_generation(&root->root_item);
blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
blocksize, generation);
+ if (!root->node) {
+ ret = -ENOMEM;
+ goto find_fail;
+ } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) {
+ ret = -EIO;
+ goto read_fail;
+ }
root->commit_root = btrfs_root_node(root);
- BUG_ON(!root->node); /* -ENOMEM */
out:
- if (location->objectid != BTRFS_TREE_LOG_OBJECTID) {
+ btrfs_free_path(path);
+ return root;
+
+read_fail:
+ free_extent_buffer(root->node);
+find_fail:
+ kfree(root);
+alloc_fail:
+ root = ERR_PTR(ret);
+ goto out;
+}
+
+struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
+ struct btrfs_key *location)
+{
+ struct btrfs_root *root;
+
+ root = btrfs_read_tree_root(tree_root, location);
+ if (IS_ERR(root))
+ return root;
+
+ if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
root->ref_cows = 1;
btrfs_check_and_init_root_item(&root->root_item);
}
@@ -1481,6 +1502,66 @@ out:
return root;
}
+int btrfs_init_fs_root(struct btrfs_root *root)
+{
+ int ret;
+
+ root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
+ root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
+ GFP_NOFS);
+ if (!root->free_ino_pinned || !root->free_ino_ctl) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ btrfs_init_free_ino_ctl(root);
+ mutex_init(&root->fs_commit_mutex);
+ spin_lock_init(&root->cache_lock);
+ init_waitqueue_head(&root->cache_wait);
+
+ ret = get_anon_bdev(&root->anon_dev);
+ if (ret)
+ goto fail;
+ return 0;
+fail:
+ kfree(root->free_ino_ctl);
+ kfree(root->free_ino_pinned);
+ return ret;
+}
+
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+ u64 root_id)
+{
+ struct btrfs_root *root;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ root = radix_tree_lookup(&fs_info->fs_roots_radix,
+ (unsigned long)root_id);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ return root;
+}
+
+int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *root)
+{
+ int ret;
+
+ ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+ if (ret)
+ return ret;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ ret = radix_tree_insert(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ root);
+ if (ret == 0)
+ root->in_radix = 1;
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ radix_tree_preload_end();
+
+ return ret;
+}
+
struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
struct btrfs_key *location)
{
@@ -1501,58 +1582,30 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
return fs_info->quota_root ? fs_info->quota_root :
ERR_PTR(-ENOENT);
again:
- spin_lock(&fs_info->fs_roots_radix_lock);
- root = radix_tree_lookup(&fs_info->fs_roots_radix,
- (unsigned long)location->objectid);
- spin_unlock(&fs_info->fs_roots_radix_lock);
+ root = btrfs_lookup_fs_root(fs_info, location->objectid);
if (root)
return root;
- root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location);
+ root = btrfs_read_fs_root(fs_info->tree_root, location);
if (IS_ERR(root))
return root;
- root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS);
- root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned),
- GFP_NOFS);
- if (!root->free_ino_pinned || !root->free_ino_ctl) {
- ret = -ENOMEM;
+ if (btrfs_root_refs(&root->root_item) == 0) {
+ ret = -ENOENT;
goto fail;
}
- btrfs_init_free_ino_ctl(root);
- mutex_init(&root->fs_commit_mutex);
- spin_lock_init(&root->cache_lock);
- init_waitqueue_head(&root->cache_wait);
-
- ret = get_anon_bdev(&root->anon_dev);
+ ret = btrfs_init_fs_root(root);
if (ret)
goto fail;
- if (btrfs_root_refs(&root->root_item) == 0) {
- ret = -ENOENT;
- goto fail;
- }
-
ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid);
if (ret < 0)
goto fail;
if (ret == 0)
root->orphan_item_inserted = 1;
- ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
- if (ret)
- goto fail;
-
- spin_lock(&fs_info->fs_roots_radix_lock);
- ret = radix_tree_insert(&fs_info->fs_roots_radix,
- (unsigned long)root->root_key.objectid,
- root);
- if (ret == 0)
- root->in_radix = 1;
-
- spin_unlock(&fs_info->fs_roots_radix_lock);
- radix_tree_preload_end();
+ ret = btrfs_insert_fs_root(fs_info, root);
if (ret) {
if (ret == -EEXIST) {
free_fs_root(root);
@@ -1560,10 +1613,6 @@ again:
}
goto fail;
}
-
- ret = btrfs_find_dead_roots(fs_info->tree_root,
- root->root_key.objectid);
- WARN_ON(ret);
return root;
fail:
free_fs_root(root);
@@ -1635,17 +1684,38 @@ static void end_workqueue_fn(struct btrfs_work *work)
static int cleaner_kthread(void *arg)
{
struct btrfs_root *root = arg;
+ int again;
do {
- if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
- mutex_trylock(&root->fs_info->cleaner_mutex)) {
- btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
+ again = 0;
+
+ /* Make the cleaner go to sleep early. */
+ if (btrfs_need_cleaner_sleep(root))
+ goto sleep;
+
+ if (!mutex_trylock(&root->fs_info->cleaner_mutex))
+ goto sleep;
+
+ /*
+ * Avoid the problem that we change the status of the fs
+ * during the above check and trylock.
+ */
+ if (btrfs_need_cleaner_sleep(root)) {
mutex_unlock(&root->fs_info->cleaner_mutex);
- btrfs_run_defrag_inodes(root->fs_info);
+ goto sleep;
}
- if (!try_to_freeze()) {
+ btrfs_run_delayed_iputs(root);
+ again = btrfs_clean_one_deleted_snapshot(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ /*
+ * The defragger has dealt with the R/O remount and umount,
+ * needn't do anything special here.
+ */
+ btrfs_run_defrag_inodes(root->fs_info);
+sleep:
+ if (!try_to_freeze() && !again) {
set_current_state(TASK_INTERRUPTIBLE);
if (!kthread_should_stop())
schedule();
@@ -1678,7 +1748,7 @@ static int transaction_kthread(void *arg)
}
now = get_seconds();
- if (!cur->blocked &&
+ if (cur->state < TRANS_STATE_BLOCKED &&
(now < cur->start_time || now - cur->start_time < 30)) {
spin_unlock(&root->fs_info->trans_lock);
delay = HZ * 5;
@@ -1914,35 +1984,60 @@ static noinline int next_root_backup(struct btrfs_fs_info *info,
return 0;
}
+/* helper to cleanup workers */
+static void btrfs_stop_all_workers(struct btrfs_fs_info *fs_info)
+{
+ btrfs_stop_workers(&fs_info->generic_worker);
+ btrfs_stop_workers(&fs_info->fixup_workers);
+ btrfs_stop_workers(&fs_info->delalloc_workers);
+ btrfs_stop_workers(&fs_info->workers);
+ btrfs_stop_workers(&fs_info->endio_workers);
+ btrfs_stop_workers(&fs_info->endio_meta_workers);
+ btrfs_stop_workers(&fs_info->endio_raid56_workers);
+ btrfs_stop_workers(&fs_info->rmw_workers);
+ btrfs_stop_workers(&fs_info->endio_meta_write_workers);
+ btrfs_stop_workers(&fs_info->endio_write_workers);
+ btrfs_stop_workers(&fs_info->endio_freespace_worker);
+ btrfs_stop_workers(&fs_info->submit_workers);
+ btrfs_stop_workers(&fs_info->delayed_workers);
+ btrfs_stop_workers(&fs_info->caching_workers);
+ btrfs_stop_workers(&fs_info->readahead_workers);
+ btrfs_stop_workers(&fs_info->flush_workers);
+ btrfs_stop_workers(&fs_info->qgroup_rescan_workers);
+}
+
/* helper to cleanup tree roots */
static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
{
free_extent_buffer(info->tree_root->node);
free_extent_buffer(info->tree_root->commit_root);
- free_extent_buffer(info->dev_root->node);
- free_extent_buffer(info->dev_root->commit_root);
- free_extent_buffer(info->extent_root->node);
- free_extent_buffer(info->extent_root->commit_root);
- free_extent_buffer(info->csum_root->node);
- free_extent_buffer(info->csum_root->commit_root);
- if (info->quota_root) {
- free_extent_buffer(info->quota_root->node);
- free_extent_buffer(info->quota_root->commit_root);
- }
-
info->tree_root->node = NULL;
info->tree_root->commit_root = NULL;
- info->dev_root->node = NULL;
- info->dev_root->commit_root = NULL;
- info->extent_root->node = NULL;
- info->extent_root->commit_root = NULL;
- info->csum_root->node = NULL;
- info->csum_root->commit_root = NULL;
+
+ if (info->dev_root) {
+ free_extent_buffer(info->dev_root->node);
+ free_extent_buffer(info->dev_root->commit_root);
+ info->dev_root->node = NULL;
+ info->dev_root->commit_root = NULL;
+ }
+ if (info->extent_root) {
+ free_extent_buffer(info->extent_root->node);
+ free_extent_buffer(info->extent_root->commit_root);
+ info->extent_root->node = NULL;
+ info->extent_root->commit_root = NULL;
+ }
+ if (info->csum_root) {
+ free_extent_buffer(info->csum_root->node);
+ free_extent_buffer(info->csum_root->commit_root);
+ info->csum_root->node = NULL;
+ info->csum_root->commit_root = NULL;
+ }
if (info->quota_root) {
+ free_extent_buffer(info->quota_root->node);
+ free_extent_buffer(info->quota_root->commit_root);
info->quota_root->node = NULL;
info->quota_root->commit_root = NULL;
}
-
if (chunk_root) {
free_extent_buffer(info->chunk_root->node);
free_extent_buffer(info->chunk_root->commit_root);
@@ -1951,6 +2046,36 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
}
}
+static void del_fs_roots(struct btrfs_fs_info *fs_info)
+{
+ int ret;
+ struct btrfs_root *gang[8];
+ int i;
+
+ while (!list_empty(&fs_info->dead_roots)) {
+ gang[0] = list_entry(fs_info->dead_roots.next,
+ struct btrfs_root, root_list);
+ list_del(&gang[0]->root_list);
+
+ if (gang[0]->in_radix) {
+ btrfs_drop_and_free_fs_root(fs_info, gang[0]);
+ } else {
+ free_extent_buffer(gang[0]->node);
+ free_extent_buffer(gang[0]->commit_root);
+ btrfs_put_fs_root(gang[0]);
+ }
+ }
+
+ while (1) {
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, 0,
+ ARRAY_SIZE(gang));
+ if (!ret)
+ break;
+ for (i = 0; i < ret; i++)
+ btrfs_drop_and_free_fs_root(fs_info, gang[i]);
+ }
+}
int open_ctree(struct super_block *sb,
struct btrfs_fs_devices *fs_devices,
@@ -1980,14 +2105,8 @@ int open_ctree(struct super_block *sb,
int backup_index = 0;
tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
- extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info);
- csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info);
chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
- dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info);
- quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info);
-
- if (!tree_root || !extent_root || !csum_root ||
- !chunk_root || !dev_root || !quota_root) {
+ if (!tree_root || !chunk_root) {
err = -ENOMEM;
goto fail;
}
@@ -2004,10 +2123,24 @@ int open_ctree(struct super_block *sb,
goto fail_srcu;
}
+ ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+ if (ret) {
+ err = ret;
+ goto fail_bdi;
+ }
+ fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
+ (1 + ilog2(nr_cpu_ids));
+
+ ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+ if (ret) {
+ err = ret;
+ goto fail_dirty_metadata_bytes;
+ }
+
fs_info->btree_inode = new_inode(sb);
if (!fs_info->btree_inode) {
err = -ENOMEM;
- goto fail_bdi;
+ goto fail_delalloc_bytes;
}
mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);
@@ -2016,18 +2149,19 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
- INIT_LIST_HEAD(&fs_info->delalloc_inodes);
- INIT_LIST_HEAD(&fs_info->ordered_operations);
+ INIT_LIST_HEAD(&fs_info->delalloc_roots);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
- spin_lock_init(&fs_info->delalloc_lock);
+ spin_lock_init(&fs_info->delalloc_root_lock);
spin_lock_init(&fs_info->trans_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->free_chunk_lock);
spin_lock_init(&fs_info->tree_mod_seq_lock);
+ spin_lock_init(&fs_info->super_lock);
rwlock_init(&fs_info->tree_mod_log_lock);
mutex_init(&fs_info->reloc_mutex);
+ seqlock_init(&fs_info->profiles_lock);
init_completion(&fs_info->kobj_unregister);
INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots);
@@ -2048,12 +2182,11 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->async_submit_draining, 0);
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
- atomic_set(&fs_info->tree_mod_seq, 0);
+ atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = 8192 * 1024;
fs_info->metadata_ratio = 0;
fs_info->defrag_inodes = RB_ROOT;
- fs_info->trans_no_join = 0;
fs_info->free_chunk_space = 0;
fs_info->tree_mod_log = RB_ROOT;
@@ -2064,8 +2197,8 @@ int open_ctree(struct super_block *sb,
fs_info->thread_pool_size = min_t(unsigned long,
num_online_cpus() + 2, 8);
- INIT_LIST_HEAD(&fs_info->ordered_extents);
- spin_lock_init(&fs_info->ordered_extent_lock);
+ INIT_LIST_HEAD(&fs_info->ordered_roots);
+ spin_lock_init(&fs_info->ordered_root_lock);
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
GFP_NOFS);
if (!fs_info->delayed_root) {
@@ -2126,6 +2259,7 @@ int open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->block_group_cache_lock);
fs_info->block_group_cache_tree = RB_ROOT;
+ fs_info->first_logical_byte = (u64)-1;
extent_io_tree_init(&fs_info->freed_extents[0],
fs_info->btree_inode->i_mapping);
@@ -2151,11 +2285,14 @@ int open_ctree(struct super_block *sb,
mutex_init(&fs_info->dev_replace.lock);
spin_lock_init(&fs_info->qgroup_lock);
+ mutex_init(&fs_info->qgroup_ioctl_lock);
fs_info->qgroup_tree = RB_ROOT;
INIT_LIST_HEAD(&fs_info->dirty_qgroups);
fs_info->qgroup_seq = 1;
fs_info->quota_enabled = 0;
fs_info->pending_quota_state = 0;
+ fs_info->qgroup_ulist = NULL;
+ mutex_init(&fs_info->qgroup_rescan_lock);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
btrfs_init_free_cluster(&fs_info->data_alloc_cluster);
@@ -2165,16 +2302,41 @@ int open_ctree(struct super_block *sb,
init_waitqueue_head(&fs_info->transaction_blocked_wait);
init_waitqueue_head(&fs_info->async_submit_wait);
+ ret = btrfs_alloc_stripe_hash_table(fs_info);
+ if (ret) {
+ err = ret;
+ goto fail_alloc;
+ }
+
__setup_root(4096, 4096, 4096, 4096, tree_root,
fs_info, BTRFS_ROOT_TREE_OBJECTID);
invalidate_bdev(fs_devices->latest_bdev);
+
+ /*
+ * Read super block and check the signature bytes only
+ */
bh = btrfs_read_dev_super(fs_devices->latest_bdev);
if (!bh) {
err = -EINVAL;
goto fail_alloc;
}
+ /*
+ * We want to check superblock checksum, the type is stored inside.
+ * Pass the whole disk block of size BTRFS_SUPER_INFO_SIZE (4k).
+ */
+ if (btrfs_check_super_csum(bh->b_data)) {
+ printk(KERN_ERR "btrfs: superblock checksum mismatch\n");
+ err = -EINVAL;
+ goto fail_alloc;
+ }
+
+ /*
+ * super_copy is zeroed at allocation time and we never touch the
+ * following bytes up to INFO_SIZE, the checksum is calculated from
+ * the whole block of INFO_SIZE
+ */
memcpy(fs_info->super_copy, bh->b_data, sizeof(*fs_info->super_copy));
memcpy(fs_info->super_for_commit, fs_info->super_copy,
sizeof(*fs_info->super_for_commit));
@@ -2182,20 +2344,21 @@ int open_ctree(struct super_block *sb,
memcpy(fs_info->fsid, fs_info->super_copy->fsid, BTRFS_FSID_SIZE);
- disk_super = fs_info->super_copy;
- if (!btrfs_super_root(disk_super))
- goto fail_alloc;
-
- /* check FS state, whether FS is broken. */
- fs_info->fs_state |= btrfs_super_flags(disk_super);
-
ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY);
if (ret) {
printk(KERN_ERR "btrfs: superblock contains fatal errors\n");
- err = ret;
+ err = -EINVAL;
goto fail_alloc;
}
+ disk_super = fs_info->super_copy;
+ if (!btrfs_super_root(disk_super))
+ goto fail_alloc;
+
+ /* check FS state, whether FS is broken. */
+ if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_ERROR)
+ set_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state);
+
/*
* run through our array of backup supers and setup
* our ring pointer to the oldest one
@@ -2247,6 +2410,9 @@ int open_ctree(struct super_block *sb,
if (tree_root->fs_info->compress_type == BTRFS_COMPRESS_LZO)
features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO;
+ if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+ printk(KERN_ERR "btrfs: has skinny extents\n");
+
/*
* flag our filesystem as having big metadata blocks if
* they are bigger than the page size
@@ -2261,6 +2427,8 @@ int open_ctree(struct super_block *sb,
leafsize = btrfs_super_leafsize(disk_super);
sectorsize = btrfs_super_sectorsize(disk_super);
stripesize = btrfs_super_stripesize(disk_super);
+ fs_info->dirty_metadata_batch = leafsize * (1 + ilog2(nr_cpu_ids));
+ fs_info->delalloc_batch = sectorsize * 512 * (1 + ilog2(nr_cpu_ids));
/*
* mixed block groups end up with duplicate but slightly offset
@@ -2274,6 +2442,10 @@ int open_ctree(struct super_block *sb,
goto fail_alloc;
}
+ /*
+ * Needn't use the lock because there is no other task which will
+ * update the flag.
+ */
btrfs_set_super_incompat_flags(disk_super, features);
features = btrfs_super_compat_ro_flags(disk_super) &
@@ -2332,6 +2504,12 @@ int open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->endio_meta_write_workers,
"endio-meta-write", fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->endio_raid56_workers,
+ "endio-raid56", fs_info->thread_pool_size,
+ &fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->rmw_workers,
+ "rmw", fs_info->thread_pool_size,
+ &fs_info->generic_worker);
btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
fs_info->thread_pool_size,
&fs_info->generic_worker);
@@ -2343,6 +2521,8 @@ int open_ctree(struct super_block *sb,
btrfs_init_workers(&fs_info->readahead_workers, "readahead",
fs_info->thread_pool_size,
&fs_info->generic_worker);
+ btrfs_init_workers(&fs_info->qgroup_rescan_workers, "qgroup-rescan", 1,
+ &fs_info->generic_worker);
/*
* endios are largely parallel and should have a very
@@ -2350,6 +2530,8 @@ int open_ctree(struct super_block *sb,
*/
fs_info->endio_workers.idle_thresh = 4;
fs_info->endio_meta_workers.idle_thresh = 4;
+ fs_info->endio_raid56_workers.idle_thresh = 4;
+ fs_info->rmw_workers.idle_thresh = 2;
fs_info->endio_write_workers.idle_thresh = 2;
fs_info->endio_meta_write_workers.idle_thresh = 2;
@@ -2366,6 +2548,8 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->fixup_workers);
ret |= btrfs_start_workers(&fs_info->endio_workers);
ret |= btrfs_start_workers(&fs_info->endio_meta_workers);
+ ret |= btrfs_start_workers(&fs_info->rmw_workers);
+ ret |= btrfs_start_workers(&fs_info->endio_raid56_workers);
ret |= btrfs_start_workers(&fs_info->endio_meta_write_workers);
ret |= btrfs_start_workers(&fs_info->endio_write_workers);
ret |= btrfs_start_workers(&fs_info->endio_freespace_worker);
@@ -2373,6 +2557,7 @@ int open_ctree(struct super_block *sb,
ret |= btrfs_start_workers(&fs_info->caching_workers);
ret |= btrfs_start_workers(&fs_info->readahead_workers);
ret |= btrfs_start_workers(&fs_info->flush_workers);
+ ret |= btrfs_start_workers(&fs_info->qgroup_rescan_workers);
if (ret) {
err = -ENOMEM;
goto fail_sb_buffer;
@@ -2390,8 +2575,7 @@ int open_ctree(struct super_block *sb,
sb->s_blocksize = sectorsize;
sb->s_blocksize_bits = blksize_bits(sectorsize);
- if (strncmp((char *)(&disk_super->magic), BTRFS_MAGIC,
- sizeof(disk_super->magic))) {
+ if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
goto fail_sb_buffer;
}
@@ -2421,8 +2605,8 @@ int open_ctree(struct super_block *sb,
chunk_root->node = read_tree_block(chunk_root,
btrfs_super_chunk_root(disk_super),
blocksize, generation);
- BUG_ON(!chunk_root->node); /* -ENOMEM */
- if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
+ if (!chunk_root->node ||
+ !test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) {
printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n",
sb->s_id);
goto fail_tree_roots;
@@ -2472,33 +2656,44 @@ retry_root_backup:
btrfs_set_root_node(&tree_root->root_item, tree_root->node);
tree_root->commit_root = btrfs_root_node(tree_root);
- ret = find_and_setup_root(tree_root, fs_info,
- BTRFS_EXTENT_TREE_OBJECTID, extent_root);
- if (ret)
+ location.objectid = BTRFS_EXTENT_TREE_OBJECTID;
+ location.type = BTRFS_ROOT_ITEM_KEY;
+ location.offset = 0;
+
+ extent_root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(extent_root)) {
+ ret = PTR_ERR(extent_root);
goto recovery_tree_root;
+ }
extent_root->track_dirty = 1;
+ fs_info->extent_root = extent_root;
- ret = find_and_setup_root(tree_root, fs_info,
- BTRFS_DEV_TREE_OBJECTID, dev_root);
- if (ret)
+ location.objectid = BTRFS_DEV_TREE_OBJECTID;
+ dev_root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(dev_root)) {
+ ret = PTR_ERR(dev_root);
goto recovery_tree_root;
+ }
dev_root->track_dirty = 1;
+ fs_info->dev_root = dev_root;
+ btrfs_init_devices_late(fs_info);
- ret = find_and_setup_root(tree_root, fs_info,
- BTRFS_CSUM_TREE_OBJECTID, csum_root);
- if (ret)
+ location.objectid = BTRFS_CSUM_TREE_OBJECTID;
+ csum_root = btrfs_read_tree_root(tree_root, &location);
+ if (IS_ERR(csum_root)) {
+ ret = PTR_ERR(csum_root);
goto recovery_tree_root;
+ }
csum_root->track_dirty = 1;
+ fs_info->csum_root = csum_root;
- ret = find_and_setup_root(tree_root, fs_info,
- BTRFS_QUOTA_TREE_OBJECTID, quota_root);
- if (ret) {
- kfree(quota_root);
- quota_root = fs_info->quota_root = NULL;
- } else {
+ location.objectid = BTRFS_QUOTA_TREE_OBJECTID;
+ quota_root = btrfs_read_tree_root(tree_root, &location);
+ if (!IS_ERR(quota_root)) {
quota_root->track_dirty = 1;
fs_info->quota_enabled = 1;
fs_info->pending_quota_state = 1;
+ fs_info->quota_root = quota_root;
}
fs_info->generation = generation;
@@ -2607,6 +2802,13 @@ retry_root_backup:
log_tree_root->node = read_tree_block(tree_root, bytenr,
blocksize,
generation + 1);
+ if (!log_tree_root->node ||
+ !extent_buffer_uptodate(log_tree_root->node)) {
+ printk(KERN_ERR "btrfs: failed to read log tree\n");
+ free_extent_buffer(log_tree_root->node);
+ kfree(log_tree_root);
+ goto fail_trans_kthread;
+ }
/* returns with log_tree_root freed on success */
ret = btrfs_recover_log_trees(log_tree_root);
if (ret) {
@@ -2644,11 +2846,9 @@ retry_root_backup:
location.objectid = BTRFS_FS_TREE_OBJECTID;
location.type = BTRFS_ROOT_ITEM_KEY;
- location.offset = (u64)-1;
+ location.offset = 0;
fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location);
- if (!fs_info->fs_root)
- goto fail_qgroup;
if (IS_ERR(fs_info->fs_root)) {
err = PTR_ERR(fs_info->fs_root);
goto fail_qgroup;
@@ -2680,12 +2880,16 @@ retry_root_backup:
return ret;
}
+ btrfs_qgroup_rescan_resume(fs_info);
+
return 0;
fail_qgroup:
btrfs_free_qgroup_config(fs_info);
fail_trans_kthread:
kthread_stop(fs_info->transaction_kthread);
+ btrfs_cleanup_transaction(fs_info->tree_root);
+ del_fs_roots(fs_info);
fail_cleaner:
kthread_stop(fs_info->cleaner_kthread);
@@ -2694,40 +2898,32 @@ fail_cleaner:
* kthreads
*/
filemap_write_and_wait(fs_info->btree_inode->i_mapping);
- invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
fail_block_groups:
+ btrfs_put_block_group_cache(fs_info);
btrfs_free_block_groups(fs_info);
fail_tree_roots:
free_root_pointers(fs_info, 1);
+ invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
fail_sb_buffer:
- btrfs_stop_workers(&fs_info->generic_worker);
- btrfs_stop_workers(&fs_info->readahead_workers);
- btrfs_stop_workers(&fs_info->fixup_workers);
- btrfs_stop_workers(&fs_info->delalloc_workers);
- btrfs_stop_workers(&fs_info->workers);
- btrfs_stop_workers(&fs_info->endio_workers);
- btrfs_stop_workers(&fs_info->endio_meta_workers);
- btrfs_stop_workers(&fs_info->endio_meta_write_workers);
- btrfs_stop_workers(&fs_info->endio_write_workers);
- btrfs_stop_workers(&fs_info->endio_freespace_worker);
- btrfs_stop_workers(&fs_info->submit_workers);
- btrfs_stop_workers(&fs_info->delayed_workers);
- btrfs_stop_workers(&fs_info->caching_workers);
- btrfs_stop_workers(&fs_info->flush_workers);
+ btrfs_stop_all_workers(fs_info);
fail_alloc:
fail_iput:
btrfs_mapping_tree_free(&fs_info->mapping_tree);
- invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
iput(fs_info->btree_inode);
+fail_delalloc_bytes:
+ percpu_counter_destroy(&fs_info->delalloc_bytes);
+fail_dirty_metadata_bytes:
+ percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
fail_bdi:
bdi_destroy(&fs_info->bdi);
fail_srcu:
cleanup_srcu_struct(&fs_info->subvol_srcu);
fail:
+ btrfs_free_stripe_hash_table(fs_info);
btrfs_close_devices(fs_info->fs_devices);
return err;
@@ -2795,8 +2991,7 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
super = (struct btrfs_super_block *)bh->b_data;
if (btrfs_super_bytenr(super) != bytenr ||
- strncmp((char *)(&super->magic), BTRFS_MAGIC,
- sizeof(super->magic))) {
+ super->magic != cpu_to_le64(BTRFS_MAGIC)) {
brelse(bh);
continue;
}
@@ -2845,7 +3040,10 @@ static int write_dev_supers(struct btrfs_device *device,
if (wait) {
bh = __find_get_block(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
- BUG_ON(!bh);
+ if (!bh) {
+ errors++;
+ continue;
+ }
wait_on_buffer(bh);
if (!buffer_uptodate(bh))
errors++;
@@ -2860,7 +3058,7 @@ static int write_dev_supers(struct btrfs_device *device,
btrfs_set_super_bytenr(sb, bytenr);
crc = ~(u32)0;
- crc = btrfs_csum_data(NULL, (char *)sb +
+ crc = btrfs_csum_data((char *)sb +
BTRFS_CSUM_SIZE, crc,
BTRFS_SUPER_INFO_SIZE -
BTRFS_CSUM_SIZE);
@@ -2872,6 +3070,13 @@ static int write_dev_supers(struct btrfs_device *device,
*/
bh = __getblk(device->bdev, bytenr / 4096,
BTRFS_SUPER_INFO_SIZE);
+ if (!bh) {
+ printk(KERN_ERR "btrfs: couldn't get super "
+ "buffer head for bytenr %Lu\n", bytenr);
+ errors++;
+ continue;
+ }
+
memcpy(bh->b_data, sb, BTRFS_SUPER_INFO_SIZE);
/* one reference for submit_bh */
@@ -2954,7 +3159,7 @@ static int write_dev_flush(struct btrfs_device *device, int wait)
* caller
*/
device->flush_bio = NULL;
- bio = bio_alloc(GFP_NOFS, 0);
+ bio = btrfs_io_bio_alloc(GFP_NOFS, 0);
if (!bio)
return -ENOMEM;
@@ -3076,11 +3281,16 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
((flags & BTRFS_BLOCK_GROUP_PROFILE_MASK)
== 0)))
num_tolerated_disk_barrier_failures = 0;
- else if (num_tolerated_disk_barrier_failures > 1
- &&
- (flags & (BTRFS_BLOCK_GROUP_RAID1 |
- BTRFS_BLOCK_GROUP_RAID10)))
- num_tolerated_disk_barrier_failures = 1;
+ else if (num_tolerated_disk_barrier_failures > 1) {
+ if (flags & (BTRFS_BLOCK_GROUP_RAID1 |
+ BTRFS_BLOCK_GROUP_RAID5 |
+ BTRFS_BLOCK_GROUP_RAID10)) {
+ num_tolerated_disk_barrier_failures = 1;
+ } else if (flags &
+ BTRFS_BLOCK_GROUP_RAID6) {
+ num_tolerated_disk_barrier_failures = 2;
+ }
+ }
}
}
up_read(&sinfo->groups_sem);
@@ -3089,7 +3299,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures(
return num_tolerated_disk_barrier_failures;
}
-int write_all_supers(struct btrfs_root *root, int max_mirrors)
+static int write_all_supers(struct btrfs_root *root, int max_mirrors)
{
struct list_head *head;
struct btrfs_device *dev;
@@ -3185,7 +3395,9 @@ int write_ctree_super(struct btrfs_trans_handle *trans,
return ret;
}
-void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
+/* Drop a fs root from the radix tree and free it. */
+void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info,
+ struct btrfs_root *root)
{
spin_lock(&fs_info->fs_roots_radix_lock);
radix_tree_delete(&fs_info->fs_roots_radix,
@@ -3195,6 +3407,11 @@ void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
if (btrfs_root_refs(&root->root_item) == 0)
synchronize_srcu(&fs_info->subvol_srcu);
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state)) {
+ btrfs_free_log(NULL, root);
+ btrfs_free_log_root_tree(NULL, fs_info);
+ }
+
__btrfs_remove_free_space_cache(root->free_ino_pinned);
__btrfs_remove_free_space_cache(root->free_ino_ctl);
free_fs_root(root);
@@ -3211,38 +3428,12 @@ static void free_fs_root(struct btrfs_root *root)
kfree(root->free_ino_ctl);
kfree(root->free_ino_pinned);
kfree(root->name);
- kfree(root);
+ btrfs_put_fs_root(root);
}
-static void del_fs_roots(struct btrfs_fs_info *fs_info)
+void btrfs_free_fs_root(struct btrfs_root *root)
{
- int ret;
- struct btrfs_root *gang[8];
- int i;
-
- while (!list_empty(&fs_info->dead_roots)) {
- gang[0] = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
- list_del(&gang[0]->root_list);
-
- if (gang[0]->in_radix) {
- btrfs_free_fs_root(fs_info, gang[0]);
- } else {
- free_extent_buffer(gang[0]->node);
- free_extent_buffer(gang[0]->commit_root);
- kfree(gang[0]);
- }
- }
-
- while (1) {
- ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
- (void **)gang, 0,
- ARRAY_SIZE(gang));
- if (!ret)
- break;
- for (i = 0; i < ret; i++)
- btrfs_free_fs_root(fs_info, gang[i]);
- }
+ free_fs_root(root);
}
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
@@ -3280,8 +3471,8 @@ int btrfs_commit_super(struct btrfs_root *root)
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
- btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
+ wake_up_process(root->fs_info->cleaner_kthread);
/* wait until ongoing cleanup work done */
down_write(&root->fs_info->cleanup_work_sem);
@@ -3339,7 +3530,7 @@ int close_ctree(struct btrfs_root *root)
printk(KERN_ERR "btrfs: commit super ret %d\n", ret);
}
- if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR)
+ if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
btrfs_error_commit_super(root);
btrfs_put_block_group_cache(fs_info);
@@ -3352,46 +3543,20 @@ int close_ctree(struct btrfs_root *root)
btrfs_free_qgroup_config(root->fs_info);
- if (fs_info->delalloc_bytes) {
- printk(KERN_INFO "btrfs: at unmount delalloc count %llu\n",
- (unsigned long long)fs_info->delalloc_bytes);
- }
-
- free_extent_buffer(fs_info->extent_root->node);
- free_extent_buffer(fs_info->extent_root->commit_root);
- free_extent_buffer(fs_info->tree_root->node);
- free_extent_buffer(fs_info->tree_root->commit_root);
- free_extent_buffer(fs_info->chunk_root->node);
- free_extent_buffer(fs_info->chunk_root->commit_root);
- free_extent_buffer(fs_info->dev_root->node);
- free_extent_buffer(fs_info->dev_root->commit_root);
- free_extent_buffer(fs_info->csum_root->node);
- free_extent_buffer(fs_info->csum_root->commit_root);
- if (fs_info->quota_root) {
- free_extent_buffer(fs_info->quota_root->node);
- free_extent_buffer(fs_info->quota_root->commit_root);
+ if (percpu_counter_sum(&fs_info->delalloc_bytes)) {
+ printk(KERN_INFO "btrfs: at unmount delalloc count %lld\n",
+ percpu_counter_sum(&fs_info->delalloc_bytes));
}
btrfs_free_block_groups(fs_info);
+ btrfs_stop_all_workers(fs_info);
+
del_fs_roots(fs_info);
- iput(fs_info->btree_inode);
+ free_root_pointers(fs_info, 1);
- btrfs_stop_workers(&fs_info->generic_worker);
- btrfs_stop_workers(&fs_info->fixup_workers);
- btrfs_stop_workers(&fs_info->delalloc_workers);
- btrfs_stop_workers(&fs_info->workers);
- btrfs_stop_workers(&fs_info->endio_workers);
- btrfs_stop_workers(&fs_info->endio_meta_workers);
- btrfs_stop_workers(&fs_info->endio_meta_write_workers);
- btrfs_stop_workers(&fs_info->endio_write_workers);
- btrfs_stop_workers(&fs_info->endio_freespace_worker);
- btrfs_stop_workers(&fs_info->submit_workers);
- btrfs_stop_workers(&fs_info->delayed_workers);
- btrfs_stop_workers(&fs_info->caching_workers);
- btrfs_stop_workers(&fs_info->readahead_workers);
- btrfs_stop_workers(&fs_info->flush_workers);
+ iput(fs_info->btree_inode);
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY))
@@ -3401,9 +3566,13 @@ int close_ctree(struct btrfs_root *root)
btrfs_close_devices(fs_info->fs_devices);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
+ percpu_counter_destroy(&fs_info->dirty_metadata_bytes);
+ percpu_counter_destroy(&fs_info->delalloc_bytes);
bdi_destroy(&fs_info->bdi);
cleanup_srcu_struct(&fs_info->subvol_srcu);
+ btrfs_free_stripe_hash_table(fs_info);
+
return 0;
}
@@ -3443,11 +3612,10 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
(unsigned long long)transid,
(unsigned long long)root->fs_info->generation);
was_dirty = set_extent_buffer_dirty(buf);
- if (!was_dirty) {
- spin_lock(&root->fs_info->delalloc_lock);
- root->fs_info->dirty_metadata_bytes += buf->len;
- spin_unlock(&root->fs_info->delalloc_lock);
- }
+ if (!was_dirty)
+ __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
+ buf->len,
+ root->fs_info->dirty_metadata_batch);
}
static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
@@ -3457,8 +3625,7 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
* looks as though older kernels can get into trouble with
* this code, they end up stuck in balance_dirty_pages forever
*/
- u64 num_dirty;
- unsigned long thresh = 32 * 1024 * 1024;
+ int ret;
if (current->flags & PF_MEMALLOC)
return;
@@ -3466,9 +3633,9 @@ static void __btrfs_btree_balance_dirty(struct btrfs_root *root,
if (flush_delayed)
btrfs_balance_delayed_items(root);
- num_dirty = root->fs_info->dirty_metadata_bytes;
-
- if (num_dirty > thresh) {
+ ret = percpu_counter_compare(&root->fs_info->dirty_metadata_bytes,
+ BTRFS_DIRTY_METADATA_THRESH);
+ if (ret > 0) {
balance_dirty_pages_ratelimited(
root->fs_info->btree_inode->i_mapping);
}
@@ -3494,18 +3661,13 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid)
static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
int read_only)
{
- if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) {
- printk(KERN_ERR "btrfs: unsupported checksum algorithm\n");
- return -EINVAL;
- }
-
- if (read_only)
- return 0;
-
+ /*
+ * Placeholder for checks
+ */
return 0;
}
-void btrfs_error_commit_super(struct btrfs_root *root)
+static void btrfs_error_commit_super(struct btrfs_root *root)
{
mutex_lock(&root->fs_info->cleaner_mutex);
btrfs_run_delayed_iputs(root);
@@ -3518,7 +3680,8 @@ void btrfs_error_commit_super(struct btrfs_root *root)
btrfs_cleanup_transaction(root);
}
-static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
+static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
+ struct btrfs_root *root)
{
struct btrfs_inode *btrfs_inode;
struct list_head splice;
@@ -3526,54 +3689,59 @@ static void btrfs_destroy_ordered_operations(struct btrfs_root *root)
INIT_LIST_HEAD(&splice);
mutex_lock(&root->fs_info->ordered_operations_mutex);
- spin_lock(&root->fs_info->ordered_extent_lock);
+ spin_lock(&root->fs_info->ordered_root_lock);
- list_splice_init(&root->fs_info->ordered_operations, &splice);
+ list_splice_init(&t->ordered_operations, &splice);
while (!list_empty(&splice)) {
btrfs_inode = list_entry(splice.next, struct btrfs_inode,
ordered_operations);
list_del_init(&btrfs_inode->ordered_operations);
+ spin_unlock(&root->fs_info->ordered_root_lock);
btrfs_invalidate_inodes(btrfs_inode->root);
+
+ spin_lock(&root->fs_info->ordered_root_lock);
}
- spin_unlock(&root->fs_info->ordered_extent_lock);
+ spin_unlock(&root->fs_info->ordered_root_lock);
mutex_unlock(&root->fs_info->ordered_operations_mutex);
}
static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
{
- struct list_head splice;
struct btrfs_ordered_extent *ordered;
- struct inode *inode;
- INIT_LIST_HEAD(&splice);
-
- spin_lock(&root->fs_info->ordered_extent_lock);
-
- list_splice_init(&root->fs_info->ordered_extents, &splice);
- while (!list_empty(&splice)) {
- ordered = list_entry(splice.next, struct btrfs_ordered_extent,
- root_extent_list);
+ spin_lock(&root->ordered_extent_lock);
+ /*
+ * This will just short circuit the ordered completion stuff which will
+ * make sure the ordered extent gets properly cleaned up.
+ */
+ list_for_each_entry(ordered, &root->ordered_extents,
+ root_extent_list)
+ set_bit(BTRFS_ORDERED_IOERR, &ordered->flags);
+ spin_unlock(&root->ordered_extent_lock);
+}
- list_del_init(&ordered->root_extent_list);
- atomic_inc(&ordered->refs);
+static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root;
+ struct list_head splice;
- /* the inode may be getting freed (in sys_unlink path). */
- inode = igrab(ordered->inode);
+ INIT_LIST_HEAD(&splice);
- spin_unlock(&root->fs_info->ordered_extent_lock);
- if (inode)
- iput(inode);
+ spin_lock(&fs_info->ordered_root_lock);
+ list_splice_init(&fs_info->ordered_roots, &splice);
+ while (!list_empty(&splice)) {
+ root = list_first_entry(&splice, struct btrfs_root,
+ ordered_root);
+ list_del_init(&root->ordered_root);
- atomic_set(&ordered->refs, 1);
- btrfs_put_ordered_extent(ordered);
+ btrfs_destroy_ordered_extents(root);
- spin_lock(&root->fs_info->ordered_extent_lock);
+ cond_resched_lock(&fs_info->ordered_root_lock);
}
-
- spin_unlock(&root->fs_info->ordered_extent_lock);
+ spin_unlock(&fs_info->ordered_root_lock);
}
int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
@@ -3594,11 +3762,12 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
}
while ((node = rb_first(&delayed_refs->root)) != NULL) {
- ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
+ struct btrfs_delayed_ref_head *head = NULL;
+ bool pin_bytes = false;
+ ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
atomic_set(&ref->refs, 1);
if (btrfs_delayed_ref_is_head(ref)) {
- struct btrfs_delayed_ref_head *head;
head = btrfs_delayed_node_to_head(ref);
if (!mutex_trylock(&head->mutex)) {
@@ -3614,17 +3783,25 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
continue;
}
- kfree(head->extent_op);
+ if (head->must_insert_reserved)
+ pin_bytes = true;
+ btrfs_free_delayed_extent_op(head->extent_op);
delayed_refs->num_heads--;
if (list_empty(&head->cluster))
delayed_refs->num_heads_ready--;
list_del_init(&head->cluster);
}
+
ref->in_tree = 0;
rb_erase(&ref->rb_node, &delayed_refs->root);
delayed_refs->num_entries--;
-
spin_unlock(&delayed_refs->lock);
+ if (head) {
+ if (pin_bytes)
+ btrfs_pin_extent(root, ref->bytenr,
+ ref->num_bytes, 1);
+ mutex_unlock(&head->mutex);
+ }
btrfs_put_delayed_ref(ref);
cond_resched();
@@ -3636,7 +3813,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
return ret;
}
-static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
+static void btrfs_evict_pending_snapshots(struct btrfs_transaction *t)
{
struct btrfs_pending_snapshot *snapshot;
struct list_head splice;
@@ -3649,10 +3826,8 @@ static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t)
snapshot = list_entry(splice.next,
struct btrfs_pending_snapshot,
list);
-
+ snapshot->error = -ECANCELED;
list_del_init(&snapshot->list);
-
- kfree(snapshot);
}
}
@@ -3663,19 +3838,49 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root)
INIT_LIST_HEAD(&splice);
- spin_lock(&root->fs_info->delalloc_lock);
- list_splice_init(&root->fs_info->delalloc_inodes, &splice);
+ spin_lock(&root->delalloc_lock);
+ list_splice_init(&root->delalloc_inodes, &splice);
while (!list_empty(&splice)) {
- btrfs_inode = list_entry(splice.next, struct btrfs_inode,
- delalloc_inodes);
+ btrfs_inode = list_first_entry(&splice, struct btrfs_inode,
+ delalloc_inodes);
list_del_init(&btrfs_inode->delalloc_inodes);
+ clear_bit(BTRFS_INODE_IN_DELALLOC_LIST,
+ &btrfs_inode->runtime_flags);
+ spin_unlock(&root->delalloc_lock);
btrfs_invalidate_inodes(btrfs_inode->root);
+
+ spin_lock(&root->delalloc_lock);
}
- spin_unlock(&root->fs_info->delalloc_lock);
+ spin_unlock(&root->delalloc_lock);
+}
+
+static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info)
+{
+ struct btrfs_root *root;
+ struct list_head splice;
+
+ INIT_LIST_HEAD(&splice);
+
+ spin_lock(&fs_info->delalloc_root_lock);
+ list_splice_init(&fs_info->delalloc_roots, &splice);
+ while (!list_empty(&splice)) {
+ root = list_first_entry(&splice, struct btrfs_root,
+ delalloc_root);
+ list_del_init(&root->delalloc_root);
+ root = btrfs_grab_fs_root(root);
+ BUG_ON(!root);
+ spin_unlock(&fs_info->delalloc_root_lock);
+
+ btrfs_destroy_delalloc_inodes(root);
+ btrfs_put_fs_root(root);
+
+ spin_lock(&fs_info->delalloc_root_lock);
+ }
+ spin_unlock(&fs_info->delalloc_root_lock);
}
static int btrfs_destroy_marked_extents(struct btrfs_root *root,
@@ -3683,13 +3888,9 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
int mark)
{
int ret;
- struct page *page;
- struct inode *btree_inode = root->fs_info->btree_inode;
struct extent_buffer *eb;
u64 start = 0;
u64 end;
- u64 offset;
- unsigned long index;
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
@@ -3699,36 +3900,17 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
while (start <= end) {
- index = start >> PAGE_CACHE_SHIFT;
- start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
- page = find_get_page(btree_inode->i_mapping, index);
- if (!page)
+ eb = btrfs_find_tree_block(root, start,
+ root->leafsize);
+ start += root->leafsize;
+ if (!eb)
continue;
- offset = page_offset(page);
-
- spin_lock(&dirty_pages->buffer_lock);
- eb = radix_tree_lookup(
- &(&BTRFS_I(page->mapping->host)->io_tree)->buffer,
- offset >> PAGE_CACHE_SHIFT);
- spin_unlock(&dirty_pages->buffer_lock);
- if (eb)
- ret = test_and_clear_bit(EXTENT_BUFFER_DIRTY,
- &eb->bflags);
- if (PageWriteback(page))
- end_page_writeback(page);
-
- lock_page(page);
- if (PageDirty(page)) {
- clear_page_dirty_for_io(page);
- spin_lock_irq(&page->mapping->tree_lock);
- radix_tree_tag_clear(&page->mapping->page_tree,
- page_index(page),
- PAGECACHE_TAG_DIRTY);
- spin_unlock_irq(&page->mapping->tree_lock);
- }
+ wait_on_extent_buffer_writeback(eb);
- unlock_page(page);
- page_cache_release(page);
+ if (test_and_clear_bit(EXTENT_BUFFER_DIRTY,
+ &eb->bflags))
+ clear_extent_buffer_dirty(eb);
+ free_extent_buffer_stale(eb);
}
}
@@ -3782,34 +3964,32 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
cur_trans->dirty_pages.dirty_bytes);
- /* FIXME: cleanup wait for commit */
- cur_trans->in_commit = 1;
- cur_trans->blocked = 1;
+ cur_trans->state = TRANS_STATE_COMMIT_START;
wake_up(&root->fs_info->transaction_blocked_wait);
- cur_trans->blocked = 0;
- wake_up(&root->fs_info->transaction_wait);
+ btrfs_evict_pending_snapshots(cur_trans);
- cur_trans->commit_done = 1;
- wake_up(&cur_trans->commit_wait);
+ cur_trans->state = TRANS_STATE_UNBLOCKED;
+ wake_up(&root->fs_info->transaction_wait);
btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root);
- btrfs_destroy_pending_snapshots(cur_trans);
-
btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages,
EXTENT_DIRTY);
btrfs_destroy_pinned_extent(root,
root->fs_info->pinned_extents);
+ cur_trans->state =TRANS_STATE_COMPLETED;
+ wake_up(&cur_trans->commit_wait);
+
/*
memset(cur_trans, 0, sizeof(*cur_trans));
kmem_cache_free(btrfs_transaction_cachep, cur_trans);
*/
}
-int btrfs_cleanup_transaction(struct btrfs_root *root)
+static int btrfs_cleanup_transaction(struct btrfs_root *root)
{
struct btrfs_transaction *t;
LIST_HEAD(list);
@@ -3818,51 +3998,39 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
spin_lock(&root->fs_info->trans_lock);
list_splice_init(&root->fs_info->trans_list, &list);
- root->fs_info->trans_no_join = 1;
+ root->fs_info->running_transaction = NULL;
spin_unlock(&root->fs_info->trans_lock);
while (!list_empty(&list)) {
t = list_entry(list.next, struct btrfs_transaction, list);
- if (!t)
- break;
- btrfs_destroy_ordered_operations(root);
+ btrfs_destroy_ordered_operations(t, root);
- btrfs_destroy_ordered_extents(root);
+ btrfs_destroy_all_ordered_extents(root->fs_info);
btrfs_destroy_delayed_refs(t, root);
- btrfs_block_rsv_release(root,
- &root->fs_info->trans_block_rsv,
- t->dirty_pages.dirty_bytes);
-
- /* FIXME: cleanup wait for commit */
- t->in_commit = 1;
- t->blocked = 1;
+ /*
+ * FIXME: cleanup wait for commit
+ * We needn't acquire the lock here, because we are during
+ * the umount, there is no other task which will change it.
+ */
+ t->state = TRANS_STATE_COMMIT_START;
smp_mb();
if (waitqueue_active(&root->fs_info->transaction_blocked_wait))
wake_up(&root->fs_info->transaction_blocked_wait);
- t->blocked = 0;
+ btrfs_evict_pending_snapshots(t);
+
+ t->state = TRANS_STATE_UNBLOCKED;
smp_mb();
if (waitqueue_active(&root->fs_info->transaction_wait))
wake_up(&root->fs_info->transaction_wait);
- t->commit_done = 1;
- smp_mb();
- if (waitqueue_active(&t->commit_wait))
- wake_up(&t->commit_wait);
-
btrfs_destroy_delayed_inodes(root);
btrfs_assert_delayed_root_empty(root);
- btrfs_destroy_pending_snapshots(t);
-
- btrfs_destroy_delalloc_inodes(root);
-
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->running_transaction = NULL;
- spin_unlock(&root->fs_info->trans_lock);
+ btrfs_destroy_all_delalloc_inodes(root->fs_info);
btrfs_destroy_marked_extents(root, &t->dirty_pages,
EXTENT_DIRTY);
@@ -3870,15 +4038,17 @@ int btrfs_cleanup_transaction(struct btrfs_root *root)
btrfs_destroy_pinned_extent(root,
root->fs_info->pinned_extents);
+ t->state = TRANS_STATE_COMPLETED;
+ smp_mb();
+ if (waitqueue_active(&t->commit_wait))
+ wake_up(&t->commit_wait);
+
atomic_set(&t->use_count, 0);
list_del_init(&t->list);
memset(t, 0, sizeof(*t));
kmem_cache_free(btrfs_transaction_cachep, t);
}
- spin_lock(&root->fs_info->trans_lock);
- root->fs_info->trans_no_join = 0;
- spin_unlock(&root->fs_info->trans_lock);
mutex_unlock(&root->fs_info->transaction_kthread_mutex);
return 0;