From d47992f86b307985b3215bcf141d56d1849d71df Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Tue, 21 May 2013 23:17:23 -0400 Subject: mm: change invalidatepage prototype to accept length Currently there is no way to truncate partial page where the end truncate point is not at the end of the page. This is because it was not needed and the functionality was enough for file system truncate operation to work properly. However more file systems now support punch hole feature and it can benefit from mm supporting truncating page just up to the certain point. Specifically, with this functionality truncate_inode_pages_range() can be changed so it supports truncating partial page at the end of the range (currently it will BUG_ON() if 'end' is not at the end of the page). This commit changes the invalidatepage() address space operation prototype to accept range to be invalidated and update all the instances for it. We also change the block_invalidatepage() in the same way and actually make a use of the new length argument implementing range invalidation. Actual file system implementations will follow except the file systems where the changes are really simple and should not change the behaviour in any way .Implementation for truncate_page_range() which will be able to accept page unaligned ranges will follow as well. Signed-off-by: Lukas Czerner Cc: Andrew Morton Cc: Hugh Dickins --- fs/btrfs/disk-io.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e7b3cb5286a..40c7bc30007 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1013,7 +1013,8 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) return try_release_extent_buffer(page); } -static void btree_invalidatepage(struct page *page, unsigned long offset) +static void btree_invalidatepage(struct page *page, unsigned int offset, + unsigned int length) { struct extent_io_tree *tree; tree = &BTRFS_I(page->mapping->host)->io_tree; -- cgit v1.2.3-18-g5258 From 15b0a89d71ae2573ed1c3b168f08320527eed34e Mon Sep 17 00:00:00 2001 From: Henrik Nordvik Date: Mon, 29 Apr 2013 18:09:23 +0000 Subject: Btrfs: fix check on same raid type flag twice Code checked for raid 5 flag in two else-if branches, so code would never be reached. Probably a copy-paste bug. Signed-off-by: Henrik Nordvik Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b8b60b660c8..aecf788ed81 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3258,7 +3258,7 @@ int btrfs_calc_num_tolerated_disk_barrier_failures( BTRFS_BLOCK_GROUP_RAID10)) { num_tolerated_disk_barrier_failures = 1; } else if (flags & - BTRFS_BLOCK_GROUP_RAID5) { + BTRFS_BLOCK_GROUP_RAID6) { num_tolerated_disk_barrier_failures = 2; } } -- cgit v1.2.3-18-g5258 From 1e8f915868c59be4d6e49d9aff928454a5d5d569 Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Mon, 6 May 2013 11:03:27 +0000 Subject: Btrfs: introduce qgroup_ulist to avoid frequently allocating/freeing ulist When doing qgroup accounting, we call ulist_alloc()/ulist_free() every time when we want to walk qgroup tree. By introducing 'qgroup_ulist', we only need to call ulist_alloc()/ulist_free() once. This reduce some sys time to allocate memory, see the measurements below fsstress -p 4 -n 10000 -d $dir With this patch: real 0m50.153s user 0m0.081s sys 0m6.294s real 0m51.113s user 0m0.092s sys 0m6.220s real 0m52.610s user 0m0.096s sys 0m6.125s avg 6.213 ----------------------------------------------------- Without the patch: real 0m54.825s user 0m0.061s sys 0m10.665s real 1m6.401s user 0m0.089s sys 0m11.218s real 1m13.768s user 0m0.087s sys 0m10.665s avg 10.849 we can see the sys time reduce ~43%. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index aecf788ed81..df5169b7aad 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2274,6 +2274,7 @@ int open_ctree(struct super_block *sb, fs_info->qgroup_seq = 1; fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + fs_info->qgroup_ulist = NULL; mutex_init(&fs_info->qgroup_rescan_lock); btrfs_init_free_cluster(&fs_info->meta_alloc_cluster); -- cgit v1.2.3-18-g5258 From b1b195969fe6d936f8c8bb63abf7efd2cc4cd5cf Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Mon, 13 May 2013 14:42:57 +0000 Subject: Btrfs: cleanup, btrfs_read_fs_root_no_name() doesn't return NULL No need to check for NULL in send.c and disk-io.c. Signed-off-by: Stefan Behrens Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index df5169b7aad..bc2ea9b5304 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2821,8 +2821,6 @@ retry_root_backup: location.offset = (u64)-1; fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); - if (!fs_info->fs_root) - goto fail_qgroup; if (IS_ERR(fs_info->fs_root)) { err = PTR_ERR(fs_info->fs_root); goto fail_qgroup; -- cgit v1.2.3-18-g5258 From d027824564c5fcee19109530b87c94c9908e910a Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 14 May 2013 10:20:40 +0000 Subject: Btrfs: remove unnecessary ->s_umount in cleaner_kthread() In order to avoid the R/O remount, we acquired ->s_umount lock during we deleted the dead snapshots and subvolumes. But it is unnecessary, because we have cleaner_mutex. We use cleaner_mutex to protect the process of the dead snapshots/subvolumes deletion. And when we remount the fs to be R/O, we also acquire this mutex to do cleanup after we change the status of the fs. That is this lock can serialize the above operations, the cleaner can be aware of the status of the fs, and if the cleaner is deleting the dead snapshots/subvolumes, the remount task will wait for it. So it is safe to remove ->s_umount in cleaner_kthread(). Cc: David Sterba Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index bc2ea9b5304..7a54b8e7d12 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1673,24 +1673,40 @@ static void end_workqueue_fn(struct btrfs_work *work) bio_endio(bio, error); } +/* + * If we remount the fs to be R/O, the cleaner needn't do anything except + * sleeping. This function is used to check the status of the fs. + */ +static inline int need_cleaner_sleep(struct btrfs_root *root) +{ + return root->fs_info->sb->s_flags & MS_RDONLY; +} + static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; + int again; do { - int again = 0; - - if (!(root->fs_info->sb->s_flags & MS_RDONLY) && - down_read_trylock(&root->fs_info->sb->s_umount)) { - if (mutex_trylock(&root->fs_info->cleaner_mutex)) { - btrfs_run_delayed_iputs(root); - again = btrfs_clean_one_deleted_snapshot(root); - mutex_unlock(&root->fs_info->cleaner_mutex); - } - btrfs_run_defrag_inodes(root->fs_info); - up_read(&root->fs_info->sb->s_umount); - } + again = 0; + /* Make the cleaner go to sleep early. */ + if (need_cleaner_sleep(root)) + goto sleep; + + if (!mutex_trylock(&root->fs_info->cleaner_mutex)) + goto sleep; + + btrfs_run_delayed_iputs(root); + again = btrfs_clean_one_deleted_snapshot(root); + mutex_unlock(&root->fs_info->cleaner_mutex); + + /* + * The defragger has dealt with the R/O remount, needn't + * do anything special here. + */ + btrfs_run_defrag_inodes(root->fs_info); +sleep: if (!try_to_freeze() && !again) { set_current_state(TASK_INTERRUPTIBLE); if (!kthread_should_stop()) -- cgit v1.2.3-18-g5258 From 05323cd13503937e71d5c6ef2debf69e51a9634f Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 14 May 2013 10:20:41 +0000 Subject: Btrfs: make the cleaner complete early when the fs is going to be umounted Cc: David Sterba Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 7a54b8e7d12..06f2c011db0 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1674,12 +1674,14 @@ static void end_workqueue_fn(struct btrfs_work *work) } /* - * If we remount the fs to be R/O, the cleaner needn't do anything except - * sleeping. This function is used to check the status of the fs. + * If we remount the fs to be R/O or umount the fs, the cleaner needn't do + * anything except sleeping. This function is used to check the status of + * the fs. */ static inline int need_cleaner_sleep(struct btrfs_root *root) { - return root->fs_info->sb->s_flags & MS_RDONLY; + return (root->fs_info->sb->s_flags & MS_RDONLY || + btrfs_fs_closing(root->fs_info)); } static int cleaner_kthread(void *arg) @@ -1702,8 +1704,8 @@ static int cleaner_kthread(void *arg) mutex_unlock(&root->fs_info->cleaner_mutex); /* - * The defragger has dealt with the R/O remount, needn't - * do anything special here. + * The defragger has dealt with the R/O remount and umount, + * needn't do anything special here. */ btrfs_run_defrag_inodes(root->fs_info); sleep: -- cgit v1.2.3-18-g5258 From dc7f370c05dd024697d4d6c68f91fd04fe8fad1e Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 14 May 2013 10:20:42 +0000 Subject: Btrfs: move the R/O check out of btrfs_clean_one_deleted_snapshot() If the fs is remounted to be R/O, it is unnecessary to call btrfs_clean_one_deleted_snapshot(), so move the R/O check out of this function. And besides that, it can make the check logic in the caller more clear. Cc: David Sterba Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 06f2c011db0..5b9b4eb36e5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1699,6 +1699,15 @@ static int cleaner_kthread(void *arg) if (!mutex_trylock(&root->fs_info->cleaner_mutex)) goto sleep; + /* + * Avoid the problem that we change the status of the fs + * during the above check and trylock. + */ + if (need_cleaner_sleep(root)) { + mutex_unlock(&root->fs_info->cleaner_mutex); + goto sleep; + } + btrfs_run_delayed_iputs(root); again = btrfs_clean_one_deleted_snapshot(root); mutex_unlock(&root->fs_info->cleaner_mutex); -- cgit v1.2.3-18-g5258 From babbf170c781f24095336c82ebf18ad272ddb773 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Tue, 14 May 2013 10:20:43 +0000 Subject: Btrfs: make the snap/subv deletion end more early when the fs is R/O The snapshot/subvolume deletion might spend lots of time, it would make the remount task wait for a long time. This patch improve this problem, we will break the deletion if the fs is remounted to be R/O. It will make the users happy. Cc: David Sterba Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5b9b4eb36e5..8dbd908a3a9 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1673,17 +1673,6 @@ static void end_workqueue_fn(struct btrfs_work *work) bio_endio(bio, error); } -/* - * If we remount the fs to be R/O or umount the fs, the cleaner needn't do - * anything except sleeping. This function is used to check the status of - * the fs. - */ -static inline int need_cleaner_sleep(struct btrfs_root *root) -{ - return (root->fs_info->sb->s_flags & MS_RDONLY || - btrfs_fs_closing(root->fs_info)); -} - static int cleaner_kthread(void *arg) { struct btrfs_root *root = arg; @@ -1693,7 +1682,7 @@ static int cleaner_kthread(void *arg) again = 0; /* Make the cleaner go to sleep early. */ - if (need_cleaner_sleep(root)) + if (btrfs_need_cleaner_sleep(root)) goto sleep; if (!mutex_trylock(&root->fs_info->cleaner_mutex)) @@ -1703,7 +1692,7 @@ static int cleaner_kthread(void *arg) * Avoid the problem that we change the status of the fs * during the above check and trylock. */ - if (need_cleaner_sleep(root)) { + if (btrfs_need_cleaner_sleep(root)) { mutex_unlock(&root->fs_info->cleaner_mutex); goto sleep; } -- cgit v1.2.3-18-g5258 From cb517eabba4f109810dba2e5f37b0dcf22103065 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:19 +0000 Subject: Btrfs: cleanup the similar code of the fs root read There are several functions whose code is similar, such as btrfs_find_last_root() btrfs_read_fs_root_no_radix() Besides that, some functions are invoked twice, it is unnecessary, for example, we are sure that all roots which is found in btrfs_find_orphan_roots() have their orphan items, so it is unnecessary to check the orphan item again. So cleanup it. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 282 +++++++++++++++++++++++++++-------------------------- 1 file changed, 146 insertions(+), 136 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8dbd908a3a9..c65a5aac1e4 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1234,39 +1234,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, spin_lock_init(&root->root_item_lock); } -static int __must_check find_and_setup_root(struct btrfs_root *tree_root, - struct btrfs_fs_info *fs_info, - u64 objectid, - struct btrfs_root *root) -{ - int ret; - u32 blocksize; - u64 generation; - - __setup_root(tree_root->nodesize, tree_root->leafsize, - tree_root->sectorsize, tree_root->stripesize, - root, fs_info, objectid); - ret = btrfs_find_last_root(tree_root, objectid, - &root->root_item, &root->root_key); - if (ret > 0) - return -ENOENT; - else if (ret < 0) - return ret; - - generation = btrfs_root_generation(&root->root_item); - blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); - root->commit_root = NULL; - root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), - blocksize, generation); - if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) { - free_extent_buffer(root->node); - root->node = NULL; - return -EIO; - } - root->commit_root = btrfs_root_node(root); - return 0; -} - static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info) { struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS); @@ -1451,70 +1418,73 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans, return 0; } -struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, - struct btrfs_key *location) +struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root, + struct btrfs_key *key) { struct btrfs_root *root; struct btrfs_fs_info *fs_info = tree_root->fs_info; struct btrfs_path *path; - struct extent_buffer *l; u64 generation; u32 blocksize; - int ret = 0; - int slot; + int ret; - root = btrfs_alloc_root(fs_info); - if (!root) + path = btrfs_alloc_path(); + if (!path) return ERR_PTR(-ENOMEM); - if (location->offset == (u64)-1) { - ret = find_and_setup_root(tree_root, fs_info, - location->objectid, root); - if (ret) { - kfree(root); - return ERR_PTR(ret); - } - goto out; + + root = btrfs_alloc_root(fs_info); + if (!root) { + ret = -ENOMEM; + goto alloc_fail; } __setup_root(tree_root->nodesize, tree_root->leafsize, tree_root->sectorsize, tree_root->stripesize, - root, fs_info, location->objectid); + root, fs_info, key->objectid); - path = btrfs_alloc_path(); - if (!path) { - kfree(root); - return ERR_PTR(-ENOMEM); - } - ret = btrfs_search_slot(NULL, tree_root, location, path, 0, 0); - if (ret == 0) { - l = path->nodes[0]; - slot = path->slots[0]; - btrfs_read_root_item(l, slot, &root->root_item); - memcpy(&root->root_key, location, sizeof(*location)); - } - btrfs_free_path(path); + ret = btrfs_find_root(tree_root, key, path, + &root->root_item, &root->root_key); if (ret) { - kfree(root); if (ret > 0) ret = -ENOENT; - return ERR_PTR(ret); + goto find_fail; } generation = btrfs_root_generation(&root->root_item); blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - if (!root->node || !extent_buffer_uptodate(root->node)) { - ret = (!root->node) ? -ENOMEM : -EIO; - - free_extent_buffer(root->node); - kfree(root); - return ERR_PTR(ret); + if (!root->node) { + ret = -ENOMEM; + goto find_fail; + } else if (!btrfs_buffer_uptodate(root->node, generation, 0)) { + ret = -EIO; + goto read_fail; } - root->commit_root = btrfs_root_node(root); out: - if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { + btrfs_free_path(path); + return root; + +read_fail: + free_extent_buffer(root->node); +find_fail: + kfree(root); +alloc_fail: + root = ERR_PTR(ret); + goto out; +} + +struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root, + struct btrfs_key *location) +{ + struct btrfs_root *root; + + root = btrfs_read_tree_root(tree_root, location); + if (IS_ERR(root)) + return root; + + if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { root->ref_cows = 1; btrfs_check_and_init_root_item(&root->root_item); } @@ -1522,6 +1492,66 @@ out: return root; } +int btrfs_init_fs_root(struct btrfs_root *root) +{ + int ret; + + root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); + root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), + GFP_NOFS); + if (!root->free_ino_pinned || !root->free_ino_ctl) { + ret = -ENOMEM; + goto fail; + } + + btrfs_init_free_ino_ctl(root); + mutex_init(&root->fs_commit_mutex); + spin_lock_init(&root->cache_lock); + init_waitqueue_head(&root->cache_wait); + + ret = get_anon_bdev(&root->anon_dev); + if (ret) + goto fail; + return 0; +fail: + kfree(root->free_ino_ctl); + kfree(root->free_ino_pinned); + return ret; +} + +struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info, + u64 root_id) +{ + struct btrfs_root *root; + + spin_lock(&fs_info->fs_roots_radix_lock); + root = radix_tree_lookup(&fs_info->fs_roots_radix, + (unsigned long)root_id); + spin_unlock(&fs_info->fs_roots_radix_lock); + return root; +} + +int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_root *root) +{ + int ret; + + ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); + if (ret) + return ret; + + spin_lock(&fs_info->fs_roots_radix_lock); + ret = radix_tree_insert(&fs_info->fs_roots_radix, + (unsigned long)root->root_key.objectid, + root); + if (ret == 0) + root->in_radix = 1; + spin_unlock(&fs_info->fs_roots_radix_lock); + radix_tree_preload_end(); + + return ret; +} + struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, struct btrfs_key *location) { @@ -1542,58 +1572,30 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info, return fs_info->quota_root ? fs_info->quota_root : ERR_PTR(-ENOENT); again: - spin_lock(&fs_info->fs_roots_radix_lock); - root = radix_tree_lookup(&fs_info->fs_roots_radix, - (unsigned long)location->objectid); - spin_unlock(&fs_info->fs_roots_radix_lock); + root = btrfs_lookup_fs_root(fs_info, location->objectid); if (root) return root; - root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); + root = btrfs_read_fs_root(fs_info->tree_root, location); if (IS_ERR(root)) return root; - root->free_ino_ctl = kzalloc(sizeof(*root->free_ino_ctl), GFP_NOFS); - root->free_ino_pinned = kzalloc(sizeof(*root->free_ino_pinned), - GFP_NOFS); - if (!root->free_ino_pinned || !root->free_ino_ctl) { - ret = -ENOMEM; + if (btrfs_root_refs(&root->root_item) == 0) { + ret = -ENOENT; goto fail; } - btrfs_init_free_ino_ctl(root); - mutex_init(&root->fs_commit_mutex); - spin_lock_init(&root->cache_lock); - init_waitqueue_head(&root->cache_wait); - - ret = get_anon_bdev(&root->anon_dev); + ret = btrfs_init_fs_root(root); if (ret) goto fail; - if (btrfs_root_refs(&root->root_item) == 0) { - ret = -ENOENT; - goto fail; - } - ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); if (ret < 0) goto fail; if (ret == 0) root->orphan_item_inserted = 1; - ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); - if (ret) - goto fail; - - spin_lock(&fs_info->fs_roots_radix_lock); - ret = radix_tree_insert(&fs_info->fs_roots_radix, - (unsigned long)root->root_key.objectid, - root); - if (ret == 0) - root->in_radix = 1; - - spin_unlock(&fs_info->fs_roots_radix_lock); - radix_tree_preload_end(); + ret = btrfs_insert_fs_root(fs_info, root); if (ret) { if (ret == -EEXIST) { free_fs_root(root); @@ -1601,10 +1603,6 @@ again: } goto fail; } - - ret = btrfs_find_dead_roots(fs_info->tree_root, - root->root_key.objectid); - WARN_ON(ret); return root; fail: free_fs_root(root); @@ -2050,7 +2048,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) list_del(&gang[0]->root_list); if (gang[0]->in_radix) { - btrfs_free_fs_root(fs_info, gang[0]); + btrfs_drop_and_free_fs_root(fs_info, gang[0]); } else { free_extent_buffer(gang[0]->node); free_extent_buffer(gang[0]->commit_root); @@ -2065,7 +2063,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) if (!ret) break; for (i = 0; i < ret; i++) - btrfs_free_fs_root(fs_info, gang[i]); + btrfs_drop_and_free_fs_root(fs_info, gang[i]); } } @@ -2097,14 +2095,8 @@ int open_ctree(struct super_block *sb, int backup_index = 0; tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info); - extent_root = fs_info->extent_root = btrfs_alloc_root(fs_info); - csum_root = fs_info->csum_root = btrfs_alloc_root(fs_info); chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info); - dev_root = fs_info->dev_root = btrfs_alloc_root(fs_info); - quota_root = fs_info->quota_root = btrfs_alloc_root(fs_info); - - if (!tree_root || !extent_root || !csum_root || - !chunk_root || !dev_root || !quota_root) { + if (!tree_root || !chunk_root) { err = -ENOMEM; goto fail; } @@ -2655,33 +2647,44 @@ retry_root_backup: btrfs_set_root_node(&tree_root->root_item, tree_root->node); tree_root->commit_root = btrfs_root_node(tree_root); - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_EXTENT_TREE_OBJECTID, extent_root); - if (ret) + location.objectid = BTRFS_EXTENT_TREE_OBJECTID; + location.type = BTRFS_ROOT_ITEM_KEY; + location.offset = 0; + + extent_root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(extent_root)) { + ret = PTR_ERR(extent_root); goto recovery_tree_root; + } extent_root->track_dirty = 1; + fs_info->extent_root = extent_root; - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_DEV_TREE_OBJECTID, dev_root); - if (ret) + location.objectid = BTRFS_DEV_TREE_OBJECTID; + dev_root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(dev_root)) { + ret = PTR_ERR(dev_root); goto recovery_tree_root; + } dev_root->track_dirty = 1; + fs_info->dev_root = dev_root; + btrfs_init_devices_late(fs_info); - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_CSUM_TREE_OBJECTID, csum_root); - if (ret) + location.objectid = BTRFS_CSUM_TREE_OBJECTID; + csum_root = btrfs_read_tree_root(tree_root, &location); + if (IS_ERR(csum_root)) { + ret = PTR_ERR(csum_root); goto recovery_tree_root; + } csum_root->track_dirty = 1; + fs_info->csum_root = csum_root; - ret = find_and_setup_root(tree_root, fs_info, - BTRFS_QUOTA_TREE_OBJECTID, quota_root); - if (ret) { - kfree(quota_root); - quota_root = fs_info->quota_root = NULL; - } else { + location.objectid = BTRFS_QUOTA_TREE_OBJECTID; + quota_root = btrfs_read_tree_root(tree_root, &location); + if (!IS_ERR(quota_root)) { quota_root->track_dirty = 1; fs_info->quota_enabled = 1; fs_info->pending_quota_state = 1; + fs_info->quota_root = quota_root; } fs_info->generation = generation; @@ -2834,7 +2837,7 @@ retry_root_backup: location.objectid = BTRFS_FS_TREE_OBJECTID; location.type = BTRFS_ROOT_ITEM_KEY; - location.offset = (u64)-1; + location.offset = 0; fs_info->fs_root = btrfs_read_fs_root_no_name(fs_info, &location); if (IS_ERR(fs_info->fs_root)) { @@ -3381,7 +3384,9 @@ int write_ctree_super(struct btrfs_trans_handle *trans, return ret; } -void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) +/* Drop a fs root from the radix tree and free it. */ +void btrfs_drop_and_free_fs_root(struct btrfs_fs_info *fs_info, + struct btrfs_root *root) { spin_lock(&fs_info->fs_roots_radix_lock); radix_tree_delete(&fs_info->fs_roots_radix, @@ -3415,6 +3420,11 @@ static void free_fs_root(struct btrfs_root *root) kfree(root); } +void btrfs_free_fs_root(struct btrfs_root *root) +{ + free_fs_root(root); +} + int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) { u64 root_objectid = 0; -- cgit v1.2.3-18-g5258 From b0feb9d96e71a88d7eec56f41b8f23e92af889b0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:20 +0000 Subject: Btrfs: introduce grab/put functions for the root of the fs/file tree The grab/put funtions will be used in the next patch, which need grab the root object and ensure it is not freed. We use reference counter instead of the srcu lock is to aovid blocking the memory reclaim task, which invokes synchronize_srcu(). Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index c65a5aac1e4..90b643e07f3 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1216,6 +1216,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, atomic_set(&root->log_writers, 0); atomic_set(&root->log_batch, 0); atomic_set(&root->orphan_inodes, 0); + atomic_set(&root->refs, 1); root->log_transid = 0; root->last_log_commit = 0; extent_io_tree_init(&root->dirty_log_pages, @@ -2052,7 +2053,7 @@ static void del_fs_roots(struct btrfs_fs_info *fs_info) } else { free_extent_buffer(gang[0]->node); free_extent_buffer(gang[0]->commit_root); - kfree(gang[0]); + btrfs_put_fs_root(gang[0]); } } @@ -3417,7 +3418,7 @@ static void free_fs_root(struct btrfs_root *root) kfree(root->free_ino_ctl); kfree(root->free_ino_pinned); kfree(root->name); - kfree(root); + btrfs_put_fs_root(root); } void btrfs_free_fs_root(struct btrfs_root *root) -- cgit v1.2.3-18-g5258 From eb73c1b7cea7d533288ef5297a0ea0e159db85b0 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:22 +0000 Subject: Btrfs: introduce per-subvolume delalloc inode list When we create a snapshot, we need flush all delalloc inodes in the fs, just flushing the inodes in the source tree is OK. So we introduce per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 39 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 90b643e07f3..2748c7ccdd5 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1191,6 +1191,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->objectid = objectid; root->last_trans = 0; root->highest_objectid = 0; + root->nr_delalloc_inodes = 0; root->name = NULL; root->inode_tree = RB_ROOT; INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); @@ -1199,10 +1200,13 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->dirty_list); INIT_LIST_HEAD(&root->root_list); + INIT_LIST_HEAD(&root->delalloc_inodes); + INIT_LIST_HEAD(&root->delalloc_root); INIT_LIST_HEAD(&root->logged_list[0]); INIT_LIST_HEAD(&root->logged_list[1]); spin_lock_init(&root->orphan_lock); spin_lock_init(&root->inode_lock); + spin_lock_init(&root->delalloc_lock); spin_lock_init(&root->accounting_lock); spin_lock_init(&root->log_extents_lock[0]); spin_lock_init(&root->log_extents_lock[1]); @@ -2140,9 +2144,9 @@ int open_ctree(struct super_block *sb, INIT_LIST_HEAD(&fs_info->trans_list); INIT_LIST_HEAD(&fs_info->dead_roots); INIT_LIST_HEAD(&fs_info->delayed_iputs); - INIT_LIST_HEAD(&fs_info->delalloc_inodes); + INIT_LIST_HEAD(&fs_info->delalloc_roots); INIT_LIST_HEAD(&fs_info->caching_block_groups); - spin_lock_init(&fs_info->delalloc_lock); + spin_lock_init(&fs_info->delalloc_root_lock); spin_lock_init(&fs_info->trans_lock); spin_lock_init(&fs_info->fs_roots_radix_lock); spin_lock_init(&fs_info->delayed_iput_lock); @@ -3803,24 +3807,49 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) INIT_LIST_HEAD(&splice); - spin_lock(&root->fs_info->delalloc_lock); - list_splice_init(&root->fs_info->delalloc_inodes, &splice); + spin_lock(&root->delalloc_lock); + list_splice_init(&root->delalloc_inodes, &splice); while (!list_empty(&splice)) { - btrfs_inode = list_entry(splice.next, struct btrfs_inode, - delalloc_inodes); + btrfs_inode = list_first_entry(&splice, struct btrfs_inode, + delalloc_inodes); list_del_init(&btrfs_inode->delalloc_inodes); clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, &btrfs_inode->runtime_flags); - spin_unlock(&root->fs_info->delalloc_lock); + spin_unlock(&root->delalloc_lock); btrfs_invalidate_inodes(btrfs_inode->root); - spin_lock(&root->fs_info->delalloc_lock); + spin_lock(&root->delalloc_lock); } - spin_unlock(&root->fs_info->delalloc_lock); + spin_unlock(&root->delalloc_lock); +} + +static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + spin_lock(&fs_info->delalloc_root_lock); + list_splice_init(&fs_info->delalloc_roots, &splice); + while (!list_empty(&splice)) { + root = list_first_entry(&splice, struct btrfs_root, + delalloc_root); + list_del_init(&root->delalloc_root); + root = btrfs_grab_fs_root(root); + BUG_ON(!root); + spin_unlock(&fs_info->delalloc_root_lock); + + btrfs_destroy_delalloc_inodes(root); + btrfs_put_fs_root(root); + + spin_lock(&fs_info->delalloc_root_lock); + } + spin_unlock(&fs_info->delalloc_root_lock); } static int btrfs_destroy_marked_extents(struct btrfs_root *root, @@ -3974,7 +4003,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); - btrfs_destroy_delalloc_inodes(root); + btrfs_destroy_all_delalloc_inodes(root->fs_info); spin_lock(&root->fs_info->trans_lock); root->fs_info->running_transaction = NULL; -- cgit v1.2.3-18-g5258 From 199c2a9c3d1389db7f7a211e64f6809d352ce5f6 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:23 +0000 Subject: Btrfs: introduce per-subvolume ordered extent list The reason we introduce per-subvolume ordered extent list is the same as the per-subvolume delalloc inode list. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 45 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 2748c7ccdd5..0f873872d1f 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1192,6 +1192,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, root->last_trans = 0; root->highest_objectid = 0; root->nr_delalloc_inodes = 0; + root->nr_ordered_extents = 0; root->name = NULL; root->inode_tree = RB_ROOT; INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC); @@ -1202,11 +1203,14 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, INIT_LIST_HEAD(&root->root_list); INIT_LIST_HEAD(&root->delalloc_inodes); INIT_LIST_HEAD(&root->delalloc_root); + INIT_LIST_HEAD(&root->ordered_extents); + INIT_LIST_HEAD(&root->ordered_root); INIT_LIST_HEAD(&root->logged_list[0]); INIT_LIST_HEAD(&root->logged_list[1]); spin_lock_init(&root->orphan_lock); spin_lock_init(&root->inode_lock); spin_lock_init(&root->delalloc_lock); + spin_lock_init(&root->ordered_extent_lock); spin_lock_init(&root->accounting_lock); spin_lock_init(&root->log_extents_lock[0]); spin_lock_init(&root->log_extents_lock[1]); @@ -2193,8 +2197,8 @@ int open_ctree(struct super_block *sb, fs_info->thread_pool_size = min_t(unsigned long, num_online_cpus() + 2, 8); - INIT_LIST_HEAD(&fs_info->ordered_extents); - spin_lock_init(&fs_info->ordered_extent_lock); + INIT_LIST_HEAD(&fs_info->ordered_roots); + spin_lock_init(&fs_info->ordered_root_lock); fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root), GFP_NOFS); if (!fs_info->delayed_root) { @@ -3683,7 +3687,7 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, INIT_LIST_HEAD(&splice); mutex_lock(&root->fs_info->ordered_operations_mutex); - spin_lock(&root->fs_info->ordered_extent_lock); + spin_lock(&root->fs_info->ordered_root_lock); list_splice_init(&t->ordered_operations, &splice); while (!list_empty(&splice)) { @@ -3691,14 +3695,14 @@ static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t, ordered_operations); list_del_init(&btrfs_inode->ordered_operations); - spin_unlock(&root->fs_info->ordered_extent_lock); + spin_unlock(&root->fs_info->ordered_root_lock); btrfs_invalidate_inodes(btrfs_inode->root); - spin_lock(&root->fs_info->ordered_extent_lock); + spin_lock(&root->fs_info->ordered_root_lock); } - spin_unlock(&root->fs_info->ordered_extent_lock); + spin_unlock(&root->fs_info->ordered_root_lock); mutex_unlock(&root->fs_info->ordered_operations_mutex); } @@ -3706,15 +3710,36 @@ static void btrfs_destroy_ordered_extents(struct btrfs_root *root) { struct btrfs_ordered_extent *ordered; - spin_lock(&root->fs_info->ordered_extent_lock); + spin_lock(&root->ordered_extent_lock); /* * This will just short circuit the ordered completion stuff which will * make sure the ordered extent gets properly cleaned up. */ - list_for_each_entry(ordered, &root->fs_info->ordered_extents, + list_for_each_entry(ordered, &root->ordered_extents, root_extent_list) set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); - spin_unlock(&root->fs_info->ordered_extent_lock); + spin_unlock(&root->ordered_extent_lock); +} + +static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info) +{ + struct btrfs_root *root; + struct list_head splice; + + INIT_LIST_HEAD(&splice); + + spin_lock(&fs_info->ordered_root_lock); + list_splice_init(&fs_info->ordered_roots, &splice); + while (!list_empty(&splice)) { + root = list_first_entry(&splice, struct btrfs_root, + ordered_root); + list_del_init(&root->ordered_root); + + btrfs_destroy_ordered_extents(root); + + cond_resched_lock(&fs_info->ordered_root_lock); + } + spin_unlock(&fs_info->ordered_root_lock); } int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, @@ -3977,7 +4002,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_ordered_operations(t, root); - btrfs_destroy_ordered_extents(root); + btrfs_destroy_all_ordered_extents(root->fs_info); btrfs_destroy_delayed_refs(t, root); -- cgit v1.2.3-18-g5258 From ac6738792fe4478df2da9c1f41e3540e9ef79604 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Wed, 15 May 2013 07:48:25 +0000 Subject: Btrfs: cleanup unnecessary assignment when cleaning up all the residual transaction When we umount a fs with serious errors, we will invoke btrfs_cleanup_transactions() to clean up the residual transaction. At this time, It is impossible to start a new transaction, so we needn't assign trans_no_join to 1, and also needn't clear running transaction every time we destroy a residual transaction. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 0f873872d1f..885245f5acd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3994,7 +3994,7 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) spin_lock(&root->fs_info->trans_lock); list_splice_init(&root->fs_info->trans_list, &list); - root->fs_info->trans_no_join = 1; + root->fs_info->running_transaction = NULL; spin_unlock(&root->fs_info->trans_lock); while (!list_empty(&list)) { @@ -4030,10 +4030,6 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_all_delalloc_inodes(root->fs_info); - spin_lock(&root->fs_info->trans_lock); - root->fs_info->running_transaction = NULL; - spin_unlock(&root->fs_info->trans_lock); - btrfs_destroy_marked_extents(root, &t->dirty_pages, EXTENT_DIRTY); @@ -4046,9 +4042,6 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) kmem_cache_free(btrfs_transaction_cachep, t); } - spin_lock(&root->fs_info->trans_lock); - root->fs_info->trans_no_join = 0; - spin_unlock(&root->fs_info->trans_lock); mutex_unlock(&root->fs_info->transaction_kthread_mutex); return 0; -- cgit v1.2.3-18-g5258 From 4a9d8bdee368de78ace8b36da4eb2186afea162d Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Fri, 17 May 2013 03:53:43 +0000 Subject: Btrfs: make the state of the transaction more readable We used 3 variants to track the state of the transaction, it was complex and wasted the memory space. Besides that, it was hard to understand that which types of the transaction handles should be blocked in each transaction state, so the developers often made mistakes. This patch improved the above problem. In this patch, we define 6 states for the transaction, enum btrfs_trans_state { TRANS_STATE_RUNNING = 0, TRANS_STATE_BLOCKED = 1, TRANS_STATE_COMMIT_START = 2, TRANS_STATE_COMMIT_DOING = 3, TRANS_STATE_UNBLOCKED = 4, TRANS_STATE_COMPLETED = 5, TRANS_STATE_MAX = 6, } and just use 1 variant to track those state. In order to make the blocked handle types for each state more clear, we introduce a array: unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = { [TRANS_STATE_RUNNING] = 0U, [TRANS_STATE_BLOCKED] = (__TRANS_USERSPACE | __TRANS_START), [TRANS_STATE_COMMIT_START] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH), [TRANS_STATE_COMMIT_DOING] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN), [TRANS_STATE_UNBLOCKED] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), [TRANS_STATE_COMPLETED] = (__TRANS_USERSPACE | __TRANS_START | __TRANS_ATTACH | __TRANS_JOIN | __TRANS_JOIN_NOLOCK), } it is very intuitionistic. Besides that, because we remove ->in_commit in transaction structure, so the lock ->commit_lock which was used to protect it is unnecessary, remove ->commit_lock. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 885245f5acd..b9eaa0f2114 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1747,7 +1747,7 @@ static int transaction_kthread(void *arg) } now = get_seconds(); - if (!cur->blocked && + if (cur->state < TRANS_STATE_BLOCKED && (now < cur->start_time || now - cur->start_time < 30)) { spin_unlock(&root->fs_info->trans_lock); delay = HZ * 5; @@ -2186,7 +2186,6 @@ int open_ctree(struct super_block *sb, fs_info->max_inline = 8192 * 1024; fs_info->metadata_ratio = 0; fs_info->defrag_inodes = RB_ROOT; - fs_info->trans_no_join = 0; fs_info->free_chunk_space = 0; fs_info->tree_mod_log = RB_ROOT; @@ -3958,19 +3957,14 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, cur_trans->dirty_pages.dirty_bytes); - /* FIXME: cleanup wait for commit */ - cur_trans->in_commit = 1; - cur_trans->blocked = 1; + cur_trans->state = TRANS_STATE_COMMIT_START; wake_up(&root->fs_info->transaction_blocked_wait); btrfs_evict_pending_snapshots(cur_trans); - cur_trans->blocked = 0; + cur_trans->state = TRANS_STATE_UNBLOCKED; wake_up(&root->fs_info->transaction_wait); - cur_trans->commit_done = 1; - wake_up(&cur_trans->commit_wait); - btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); @@ -3979,6 +3973,9 @@ void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); + cur_trans->state =TRANS_STATE_COMPLETED; + wake_up(&cur_trans->commit_wait); + /* memset(cur_trans, 0, sizeof(*cur_trans)); kmem_cache_free(btrfs_transaction_cachep, cur_trans); @@ -4006,25 +4003,23 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_delayed_refs(t, root); - /* FIXME: cleanup wait for commit */ - t->in_commit = 1; - t->blocked = 1; + /* + * FIXME: cleanup wait for commit + * We needn't acquire the lock here, because we are during + * the umount, there is no other task which will change it. + */ + t->state = TRANS_STATE_COMMIT_START; smp_mb(); if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) wake_up(&root->fs_info->transaction_blocked_wait); btrfs_evict_pending_snapshots(t); - t->blocked = 0; + t->state = TRANS_STATE_UNBLOCKED; smp_mb(); if (waitqueue_active(&root->fs_info->transaction_wait)) wake_up(&root->fs_info->transaction_wait); - t->commit_done = 1; - smp_mb(); - if (waitqueue_active(&t->commit_wait)) - wake_up(&t->commit_wait); - btrfs_destroy_delayed_inodes(root); btrfs_assert_delayed_root_empty(root); @@ -4036,6 +4031,11 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) btrfs_destroy_pinned_extent(root, root->fs_info->pinned_extents); + t->state = TRANS_STATE_COMPLETED; + smp_mb(); + if (waitqueue_active(&t->commit_wait)) + wake_up(&t->commit_wait); + atomic_set(&t->use_count, 0); list_del_init(&t->list); memset(t, 0, sizeof(*t)); -- cgit v1.2.3-18-g5258 From b382a324b60f4923e9fc8e11f023e4f493c51318 Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Tue, 28 May 2013 15:47:24 +0000 Subject: Btrfs: fix qgroup rescan resume on mount When called during mount, we cannot start the rescan worker thread until open_ctree is done. This commit restuctures the qgroup rescan internals to enable a clean deferral of the rescan resume operation. First of all, the struct qgroup_rescan is removed, saving us a malloc and some initialization synchronizations problems. Its only element (the worker struct) now lives within fs_info just as the rest of the rescan code. Then setting up a rescan worker is split into several reusable stages. Currently we have three different rescan startup scenarios: (A) rescan ioctl (B) rescan resume by mount (C) rescan by quota enable Each case needs its own combination of the four following steps: (1) set the progress [A, C: zero; B: state of umount] (2) commit the transaction [A] (3) set the counters [A, C: zero; B: state of umount] (4) start worker [A, B, C] qgroup_rescan_init does step (1). There's no extra function added to commit a transaction, we've got that already. qgroup_rescan_zero_tracking does step (3). Step (4) is nothing more than a call to the generic btrfs_queue_worker. We also get rid of a double check for the rescan progress during btrfs_qgroup_account_ref, which is no longer required due to having step 2 from the list above. As a side effect, this commit prepares to move the rescan start code from btrfs_run_qgroups (which is run during commit) to a less time critical section. Signed-off-by: Jan Schmidt Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b9eaa0f2114..9b7020197c7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2879,6 +2879,8 @@ retry_root_backup: return ret; } + btrfs_qgroup_rescan_resume(fs_info); + return 0; fail_qgroup: -- cgit v1.2.3-18-g5258 From e78417d1921c538ea195537c7bea1b31a6a55961 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Mon, 3 Jun 2013 16:42:36 -0400 Subject: Btrfs: do not pin while under spin lock When testing a corrupted fs I noticed I was getting sleep while atomic errors when the transaction aborted. This is because btrfs_pin_extent may need to allocate memory and we are calling this under the spin lock. Fix this by moving it out and doing the pin after dropping the spin lock but before dropping the mutex, the same way it works when delayed refs run normally. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/disk-io.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/disk-io.c') diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 9b7020197c7..3c2886ca7d8 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3762,6 +3762,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, while ((node = rb_first(&delayed_refs->root)) != NULL) { struct btrfs_delayed_ref_head *head = NULL; + bool pin_bytes = false; ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); atomic_set(&ref->refs, 1); @@ -3782,8 +3783,7 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, } if (head->must_insert_reserved) - btrfs_pin_extent(root, ref->bytenr, - ref->num_bytes, 1); + pin_bytes = true; btrfs_free_delayed_extent_op(head->extent_op); delayed_refs->num_heads--; if (list_empty(&head->cluster)) @@ -3794,9 +3794,13 @@ int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, ref->in_tree = 0; rb_erase(&ref->rb_node, &delayed_refs->root); delayed_refs->num_entries--; - if (head) - mutex_unlock(&head->mutex); spin_unlock(&delayed_refs->lock); + if (head) { + if (pin_bytes) + btrfs_pin_extent(root, ref->bytenr, + ref->num_bytes, 1); + mutex_unlock(&head->mutex); + } btrfs_put_delayed_ref(ref); cond_resched(); -- cgit v1.2.3-18-g5258