diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-11 14:23:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-06-11 14:23:12 -0700 |
commit | a525890cb6a2949b644d212ae290b658967d3919 (patch) | |
tree | a2c6c1f6cefff89b235bf6556212527e47a5d50b | |
parent | 3bb66d7f8cc31537a3170c9bb82b38e538b984c5 (diff) | |
parent | b263c2c8bf13c273485bd99dbbeba79c844409dd (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (23 commits)
Btrfs: fix extent_buffer leak during tree log replay
Btrfs: fix oops when btrfs_inherit_iflags called with a NULL dir
Btrfs: fix -o nodatasum printk spelling
Btrfs: check duplicate backrefs for both data and metadata
Btrfs: init worker struct fields before kthread-run
Btrfs: pin buffers during write_dev_supers
Btrfs: avoid races between super writeout and device list updates
Fix btrfs when ACLs are configured out
Btrfs: fdatasync should skip metadata writeout
Btrfs: remove crc32c.h and use libcrc32c directly.
Btrfs: implement FS_IOC_GETFLAGS/SETFLAGS/GETVERSION
Btrfs: autodetect SSD devices
Btrfs: add mount -o ssd_spread to spread allocations out
Btrfs: avoid allocation clusters that are too spread out
Btrfs: Add mount -o nossd
Btrfs: avoid IO stalls behind congested devices in a multi-device FS
Btrfs: don't allow WRITE_SYNC bios to starve out regular writes
Btrfs: fix metadata dirty throttling limits
Btrfs: reduce mount -o ssd CPU usage
Btrfs: balance btree more often
...
-rw-r--r-- | fs/btrfs/Makefile | 4 | ||||
-rw-r--r-- | fs/btrfs/acl.c | 5 | ||||
-rw-r--r-- | fs/btrfs/async-thread.c | 2 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 4 | ||||
-rw-r--r-- | fs/btrfs/compression.c | 6 | ||||
-rw-r--r-- | fs/btrfs/crc32c.h | 29 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 698 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 330 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 509 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.h | 85 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 164 | ||||
-rw-r--r-- | fs/btrfs/export.c | 4 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 2610 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 18 | ||||
-rw-r--r-- | fs/btrfs/file.c | 78 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.c | 10 | ||||
-rw-r--r-- | fs/btrfs/free-space-cache.h | 1 | ||||
-rw-r--r-- | fs/btrfs/hash.h | 4 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 159 | ||||
-rw-r--r-- | fs/btrfs/ioctl.c | 197 | ||||
-rw-r--r-- | fs/btrfs/print-tree.c | 155 | ||||
-rw-r--r-- | fs/btrfs/relocation.c | 3711 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 17 | ||||
-rw-r--r-- | fs/btrfs/super.c | 53 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 410 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 12 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 103 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 69 | ||||
-rw-r--r-- | fs/btrfs/volumes.h | 12 |
29 files changed, 7306 insertions, 2153 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index 94212844a9b..a35eb36b32f 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -6,5 +6,5 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ transaction.o inode.o file.o tree-defrag.o \ extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ - ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ - compression.o delayed-ref.o + export.o tree-log.o acl.o free-space-cache.o zlib.o \ + compression.o delayed-ref.o relocation.o diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index cbba000dccb..603972576f0 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -351,9 +351,4 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir) return 0; } -int btrfs_check_acl(struct inode *inode, int mask) -{ - return 0; -} - #endif /* CONFIG_FS_POSIX_ACL */ diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 502c3d61de6..7f88628a1a7 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c @@ -294,10 +294,10 @@ int btrfs_start_workers(struct btrfs_workers *workers, int num_workers) INIT_LIST_HEAD(&worker->worker_list); spin_lock_init(&worker->lock); atomic_set(&worker->num_pending, 0); + worker->workers = workers; worker->task = kthread_run(worker_loop, worker, "btrfs-%s-%d", workers->name, workers->num_workers + i); - worker->workers = workers; if (IS_ERR(worker->task)) { kfree(worker); ret = PTR_ERR(worker->task); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index b30986f00b9..acb4f351758 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -72,6 +72,9 @@ struct btrfs_inode { */ struct list_head ordered_operations; + /* node for the red-black tree that links inodes in subvolume root */ + struct rb_node rb_node; + /* the space_info for where this inode's data allocations are done */ struct btrfs_space_info *space_info; @@ -154,5 +157,4 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) BTRFS_I(inode)->disk_i_size = size; } - #endif diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index ab07627084f..de1e2fd3208 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -123,7 +123,7 @@ static int check_compressed_csum(struct inode *inode, u32 csum; u32 *cb_sum = &cb->sums; - if (btrfs_test_flag(inode, NODATASUM)) + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) return 0; for (i = 0; i < cb->nr_pages; i++) { @@ -670,7 +670,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, */ atomic_inc(&cb->pending_bios); - if (!btrfs_test_flag(inode, NODATASUM)) { + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { btrfs_lookup_bio_sums(root, inode, comp_bio, sums); } @@ -697,7 +697,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio, 0); BUG_ON(ret); - if (!btrfs_test_flag(inode, NODATASUM)) + if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) btrfs_lookup_bio_sums(root, inode, comp_bio, sums); ret = btrfs_map_bio(root, READ, comp_bio, mirror_num, 0); diff --git a/fs/btrfs/crc32c.h b/fs/btrfs/crc32c.h deleted file mode 100644 index 6e1b3de3670..00000000000 --- a/fs/btrfs/crc32c.h +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Copyright (C) 2008 Oracle. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * - * You should have received a copy of the GNU General Public - * License along with this program; if not, write to the - * Free Software Foundation, Inc., 59 Temple Place - Suite 330, - * Boston, MA 021110-1307, USA. - */ - -#ifndef __BTRFS_CRC32C__ -#define __BTRFS_CRC32C__ -#include <linux/crc32c.h> - -/* - * this file used to do more for selecting the HW version of crc32c, - * perhaps it will one day again soon. - */ -#define btrfs_crc32c(seed, data, length) crc32c(seed, data, length) -#endif - diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index fedf8b9f03a..60a45f3a4e9 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, u32 nritems; int ret = 0; int level; - struct btrfs_root *new_root; - - new_root = kmalloc(sizeof(*new_root), GFP_NOFS); - if (!new_root) - return -ENOMEM; - - memcpy(new_root, root, sizeof(*new_root)); - new_root->root_key.objectid = new_root_objectid; + struct btrfs_disk_key disk_key; WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); @@ -212,28 +205,37 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, level = btrfs_header_level(buf); nritems = btrfs_header_nritems(buf); + if (level == 0) + btrfs_item_key(buf, &disk_key, 0); + else + btrfs_node_key(buf, &disk_key, 0); - cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0, - new_root_objectid, trans->transid, - level, buf->start, 0); - if (IS_ERR(cow)) { - kfree(new_root); + cow = btrfs_alloc_free_block(trans, root, buf->len, 0, + new_root_objectid, &disk_key, level, + buf->start, 0); + if (IS_ERR(cow)) return PTR_ERR(cow); - } copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); - btrfs_set_header_owner(cow, new_root_objectid); - btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); + btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | + BTRFS_HEADER_FLAG_RELOC); + if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); + else + btrfs_set_header_owner(cow, new_root_objectid); write_extent_buffer(cow, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(cow), BTRFS_FSID_SIZE); WARN_ON(btrfs_header_generation(buf) > trans->transid); - ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL); - kfree(new_root); + if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else + ret = btrfs_inc_ref(trans, root, cow, 0); if (ret) return ret; @@ -244,6 +246,125 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, } /* + * check if the tree block can be shared by multiple trees + */ +int btrfs_block_can_be_shared(struct btrfs_root *root, + struct extent_buffer *buf) +{ + /* + * Tree blocks not in refernece counted trees and tree roots + * are never shared. If a block was allocated after the last + * snapshot and the block was not allocated by tree relocation, + * we know the block is not shared. + */ + if (root->ref_cows && + buf != root->node && buf != root->commit_root && + (btrfs_header_generation(buf) <= + btrfs_root_last_snapshot(&root->root_item) || + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) + return 1; +#ifdef BTRFS_COMPAT_EXTENT_TREE_V0 + if (root->ref_cows && + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + return 1; +#endif + return 0; +} + +static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, + struct extent_buffer *cow) +{ + u64 refs; + u64 owner; + u64 flags; + u64 new_flags = 0; + int ret; + + /* + * Backrefs update rules: + * + * Always use full backrefs for extent pointers in tree block + * allocated by tree relocation. + * + * If a shared tree block is no longer referenced by its owner + * tree (btrfs_header_owner(buf) == root->root_key.objectid), + * use full backrefs for extent pointers in tree block. + * + * If a tree block is been relocating + * (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID), + * use full backrefs for extent pointers in tree block. + * The reason for this is some operations (such as drop tree) + * are only allowed for blocks use full backrefs. + */ + + if (btrfs_block_can_be_shared(root, buf)) { + ret = btrfs_lookup_extent_info(trans, root, buf->start, + buf->len, &refs, &flags); + BUG_ON(ret); + BUG_ON(refs == 0); + } else { + refs = 1; + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; + else + flags = 0; + } + + owner = btrfs_header_owner(buf); + BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID && + !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); + + if (refs > 1) { + if ((owner == root->root_key.objectid || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && + !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { + ret = btrfs_inc_ref(trans, root, buf, 1); + BUG_ON(ret); + + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_dec_ref(trans, root, buf, 0); + BUG_ON(ret); + ret = btrfs_inc_ref(trans, root, cow, 1); + BUG_ON(ret); + } + new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; + } else { + + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else + ret = btrfs_inc_ref(trans, root, cow, 0); + BUG_ON(ret); + } + if (new_flags != 0) { + ret = btrfs_set_disk_extent_flags(trans, root, + buf->start, + buf->len, + new_flags, 0); + BUG_ON(ret); + } + } else { + if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) { + if (root->root_key.objectid == + BTRFS_TREE_RELOC_OBJECTID) + ret = btrfs_inc_ref(trans, root, cow, 1); + else + ret = btrfs_inc_ref(trans, root, cow, 0); + BUG_ON(ret); + ret = btrfs_dec_ref(trans, root, buf, 1); + BUG_ON(ret); + } + clean_tree_block(trans, root, buf); + } + return 0; +} + +/* * does the dirty work in cow of a single block. The parent block (if * supplied) is updated to point to the new cow copy. The new buffer is marked * dirty and returned locked. If you modify the block it needs to be marked @@ -262,34 +383,39 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer **cow_ret, u64 search_start, u64 empty_size) { - u64 parent_start; + struct btrfs_disk_key disk_key; struct extent_buffer *cow; - u32 nritems; - int ret = 0; int level; int unlock_orig = 0; + u64 parent_start; if (*cow_ret == buf) unlock_orig = 1; btrfs_assert_tree_locked(buf); - if (parent) - parent_start = parent->start; - else - parent_start = 0; - WARN_ON(root->ref_cows && trans->transid != root->fs_info->running_transaction->transid); WARN_ON(root->ref_cows && trans->transid != root->last_trans); level = btrfs_header_level(buf); - nritems = btrfs_header_nritems(buf); - cow = btrfs_alloc_free_block(trans, root, buf->len, - parent_start, root->root_key.objectid, - trans->transid, level, - search_start, empty_size); + if (level == 0) + btrfs_item_key(buf, &disk_key, 0); + else + btrfs_node_key(buf, &disk_key, 0); + + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { + if (parent) + parent_start = parent->start; + else + parent_start = 0; + } else + parent_start = 0; + + cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start, + root->root_key.objectid, &disk_key, + level, search_start, empty_size); if (IS_ERR(cow)) return PTR_ERR(cow); @@ -298,83 +424,53 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, copy_extent_buffer(cow, buf, 0, 0, cow->len); btrfs_set_header_bytenr(cow, cow->start); btrfs_set_header_generation(cow, trans->transid); - btrfs_set_header_owner(cow, root->root_key.objectid); - btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN); + btrfs_set_header_backref_rev(cow, BTRFS_MIXED_BACKREF_REV); + btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN | + BTRFS_HEADER_FLAG_RELOC); + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC); + else + btrfs_set_header_owner(cow, root->root_key.objectid); write_extent_buffer(cow, root->fs_info->fsid, (unsigned long)btrfs_header_fsid(cow), BTRFS_FSID_SIZE); - WARN_ON(btrfs_header_generation(buf) > trans->transid); - if (btrfs_header_generation(buf) != trans->transid) { - u32 nr_extents; - ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); - if (ret) - return ret; - - ret = btrfs_cache_ref(trans, root, buf, nr_extents); - WARN_ON(ret); - } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) { - /* - * There are only two places that can drop reference to - * tree blocks owned by living reloc trees, one is here, - * the other place is btrfs_drop_subtree. In both places, - * we check reference count while tree block is locked. - * Furthermore, if reference count is one, it won't get - * increased by someone else. - */ - u32 refs; - ret = btrfs_lookup_extent_ref(trans, root, buf->start, - buf->len, &refs); - BUG_ON(ret); - if (refs == 1) { - ret = btrfs_update_ref(trans, root, buf, cow, - 0, nritems); - clean_tree_block(trans, root, buf); - } else { - ret = btrfs_inc_ref(trans, root, buf, cow, NULL); - } - BUG_ON(ret); - } else { - ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); - if (ret) - return ret; - clean_tree_block(trans, root, buf); - } - - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); - WARN_ON(ret); - } + update_ref_for_cow(trans, root, buf, cow); if (buf == root->node) { WARN_ON(parent && parent != buf); + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID || + btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV) + parent_start = buf->start; + else + parent_start = 0; spin_lock(&root->node_lock); root->node = cow; extent_buffer_get(cow); spin_unlock(&root->node_lock); - if (buf != root->commit_root) { - btrfs_free_extent(trans, root, buf->start, - buf->len, buf->start, - root->root_key.objectid, - btrfs_header_generation(buf), - level, 1); - } + btrfs_free_extent(trans, root, buf->start, buf->len, + parent_start, root->root_key.objectid, + level, 0); free_extent_buffer(buf); add_root_to_dirty_list(root); } else { + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + parent_start = parent->start; + else + parent_start = 0; + + WARN_ON(trans->transid != btrfs_header_generation(parent)); btrfs_set_node_blockptr(parent, parent_slot, cow->start); - WARN_ON(trans->transid == 0); btrfs_set_node_ptr_generation(parent, parent_slot, trans->transid); btrfs_mark_buffer_dirty(parent); - WARN_ON(btrfs_header_generation(parent) != trans->transid); btrfs_free_extent(trans, root, buf->start, buf->len, - parent_start, btrfs_header_owner(parent), - btrfs_header_generation(parent), level, 1); + parent_start, root->root_key.objectid, + level, 0); } if (unlock_orig) btrfs_tree_unlock(buf); @@ -384,6 +480,18 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, return 0; } +static inline int should_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf) +{ + if (btrfs_header_generation(buf) == trans->transid && + !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) && + !(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID && + btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC))) + return 0; + return 1; +} + /* * cows a single block, see __btrfs_cow_block for the real work. * This version of it has extra checks so that a block isn't cow'd more than @@ -411,9 +519,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(1); } - if (btrfs_header_generation(buf) == trans->transid && - btrfs_header_owner(buf) == root->root_key.objectid && - !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { + if (!should_cow_block(trans, root, buf)) { *cow_ret = buf; return 0; } @@ -469,7 +575,7 @@ static int comp_keys(struct btrfs_disk_key *disk, struct btrfs_key *k2) /* * same as comp_keys only with two btrfs_key's */ -static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) +int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2) { if (k1->objectid > k2->objectid) return 1; @@ -845,6 +951,12 @@ static int bin_search(struct extent_buffer *eb, struct btrfs_key *key, return -1; } +int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, + int level, int *slot) +{ + return bin_search(eb, key, level, slot); +} + /* given a node and slot number, this reads the blocks it points to. The * extent buffer is returned with a reference taken (but unlocked). * NULL is returned on error. @@ -921,13 +1033,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, root->node = child; spin_unlock(&root->node_lock); - ret = btrfs_update_extent_ref(trans, root, child->start, - child->len, - mid->start, child->start, - root->root_key.objectid, - trans->transid, level - 1); - BUG_ON(ret); - add_root_to_dirty_list(root); btrfs_tree_unlock(child); @@ -938,9 +1043,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, /* once for the path */ free_extent_buffer(mid); ret = btrfs_free_extent(trans, root, mid->start, mid->len, - mid->start, root->root_key.objectid, - btrfs_header_generation(mid), - level, 1); + 0, root->root_key.objectid, level, 1); /* once for the root ptr */ free_extent_buffer(mid); return ret; @@ -949,8 +1052,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 4) return 0; - if (trans->transaction->delayed_refs.flushing && - btrfs_header_nritems(mid) > 2) + if (btrfs_header_nritems(mid) > 2) return 0; if (btrfs_header_nritems(mid) < 2) @@ -998,7 +1100,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, ret = wret; if (btrfs_header_nritems(right) == 0) { u64 bytenr = right->start; - u64 generation = btrfs_header_generation(parent); u32 blocksize = right->len; clean_tree_block(trans, root, right); @@ -1010,9 +1111,9 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - generation, level, 1); + blocksize, 0, + root->root_key.objectid, + level, 0); if (wret) ret = wret; } else { @@ -1047,7 +1148,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, } if (btrfs_header_nritems(mid) == 0) { /* we've managed to empty the middle node, drop it */ - u64 root_gen = btrfs_header_generation(parent); u64 bytenr = mid->start; u32 blocksize = mid->len; @@ -1059,9 +1159,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, if (wret) ret = wret; wret = btrfs_free_extent(trans, root, bytenr, blocksize, - parent->start, - btrfs_header_owner(parent), - root_gen, level, 1); + 0, root->root_key.objectid, + level, 0); if (wret) ret = wret; } else { @@ -1437,7 +1536,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) { int i; - if (path->keep_locks || path->lowest_level) + if (path->keep_locks) return; for (i = level; i < BTRFS_MAX_LEVEL; i++) { @@ -1552,7 +1651,7 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, } b = p->nodes[level]; } else if (ins_len < 0 && btrfs_header_nritems(b) < - BTRFS_NODEPTRS_PER_BLOCK(root) / 4) { + BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { int sret; sret = reada_for_balance(root, p, level); @@ -1614,10 +1713,17 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root lowest_unlock = 2; again: - if (p->skip_locking) - b = btrfs_root_node(root); - else - b = btrfs_lock_root_node(root); + if (p->search_commit_root) { + b = root->commit_root; + extent_buffer_get(b); + if (!p->skip_locking) + btrfs_tree_lock(b); + } else { + if (p->skip_locking) + b = btrfs_root_node(root); + else + b = btrfs_lock_root_node(root); + } while (b) { level = btrfs_header_level(b); @@ -1638,11 +1744,9 @@ again: * then we don't want to set the path blocking, * so we test it here */ - if (btrfs_header_generation(b) == trans->transid && - btrfs_header_owner(b) == root->root_key.objectid && - !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { + if (!should_cow_block(trans, root, b)) goto cow_done; - } + btrfs_set_path_blocking(p); wret = btrfs_cow_block(trans, root, b, @@ -1764,138 +1868,6 @@ done: return ret; } -int btrfs_merge_path(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - struct btrfs_key *node_keys, - u64 *nodes, int lowest_level) -{ - struct extent_buffer *eb; - struct extent_buffer *parent; - struct btrfs_key key; - u64 bytenr; - u64 generation; - u32 blocksize; - int level; - int slot; - int key_match; - int ret; - - eb = btrfs_lock_root_node(root); - ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb); - BUG_ON(ret); - - btrfs_set_lock_blocking(eb); - - parent = eb; - while (1) { - level = btrfs_header_level(parent); - if (level == 0 || level <= lowest_level) - break; - - ret = bin_search(parent, &node_keys[lowest_level], level, - &slot); - if (ret && slot > 0) - slot--; - - bytenr = btrfs_node_blockptr(parent, slot); - if (nodes[level - 1] == bytenr) - break; - - blocksize = btrfs_level_size(root, level - 1); - generation = btrfs_node_ptr_generation(parent, slot); - btrfs_node_key_to_cpu(eb, &key, slot); - key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); - - if (generation == trans->transid) { - eb = read_tree_block(root, bytenr, blocksize, - generation); - btrfs_tree_lock(eb); - btrfs_set_lock_blocking(eb); - } - - /* - * if node keys match and node pointer hasn't been modified - * in the running transaction, we can merge the path. for - * blocks owened by reloc trees, the node pointer check is - * skipped, this is because these blocks are fully controlled - * by the space balance code, no one else can modify them. - */ - if (!nodes[level - 1] || !key_match || - (generation == trans->transid && - btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) { - if (level == 1 || level == lowest_level + 1) { - if (generation == trans->transid) { - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - } - break; - } - - if (generation != trans->transid) { - eb = read_tree_block(root, bytenr, blocksize, - generation); - btrfs_tree_lock(eb); - btrfs_set_lock_blocking(eb); - } - - ret = btrfs_cow_block(trans, root, eb, parent, slot, - &eb); - BUG_ON(ret); - - if (root->root_key.objectid == - BTRFS_TREE_RELOC_OBJECTID) { - if (!nodes[level - 1]) { - nodes[level - 1] = eb->start; - memcpy(&node_keys[level - 1], &key, - sizeof(node_keys[0])); - } else { - WARN_ON(1); - } - } - - btrfs_tree_unlock(parent); - free_extent_buffer(parent); - parent = eb; - continue; - } - - btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); - btrfs_set_node_ptr_generation(parent, slot, trans->transid); - btrfs_mark_buffer_dirty(parent); - - ret = btrfs_inc_extent_ref(trans, root, - nodes[level - 1], - blocksize, parent->start, - btrfs_header_owner(parent), - btrfs_header_generation(parent), - level - 1); - BUG_ON(ret); - - /* - * If the block was created in the running transaction, - * it's possible this is the last reference to it, so we - * should drop the subtree. - */ - if (generation == trans->transid) { - ret = btrfs_drop_subtree(trans, root, eb, parent); - BUG_ON(ret); - btrfs_tree_unlock(eb); - free_extent_buffer(eb); - } else { - ret = btrfs_free_extent(trans, root, bytenr, - blocksize, parent->start, - btrfs_header_owner(parent), - btrfs_header_generation(parent), - level - 1, 1); - BUG_ON(ret); - } - break; - } - btrfs_tree_unlock(parent); - free_extent_buffer(parent); - return 0; -} - /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. @@ -2021,9 +1993,6 @@ static int push_node_left(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); - ret = btrfs_update_ref(trans, root, src, dst, dst_nritems, push_items); - BUG_ON(ret); - return ret; } @@ -2083,9 +2052,6 @@ static int balance_node_right(struct btrfs_trans_handle *trans, btrfs_mark_buffer_dirty(src); btrfs_mark_buffer_dirty(dst); - ret = btrfs_update_ref(trans, root, src, dst, 0, push_items); - BUG_ON(ret); - return ret; } @@ -2105,7 +2071,6 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, struct extent_buffer *c; struct extent_buffer *old; struct btrfs_disk_key lower_key; - int ret; BUG_ON(path->nodes[level]); BUG_ON(path->nodes[level-1] != root->node); @@ -2117,16 +2082,17 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, btrfs_node_key(lower, &lower_key, 0); c = btrfs_alloc_free_block(trans, root, root->nodesize, 0, - root->root_key.objectid, trans->transid, + root->root_key.objectid, &lower_key, level, root->node->start, 0); if (IS_ERR(c)) return PTR_ERR(c); - memset_extent_buffer(c, 0, 0, root->nodesize); + memset_ex |