aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c1419
1 files changed, 759 insertions, 660 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index eea5da7a2b9..aeab453b8e2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,16 +37,10 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *dst_buf,
struct extent_buffer *src_buf);
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot);
-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
+ int level, int slot);
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
-struct extent_buffer *read_old_tree_block(struct btrfs_root *root, u64 bytenr,
- u32 blocksize, u64 parent_transid,
- u64 time_seq);
-struct extent_buffer *btrfs_find_old_tree_block(struct btrfs_root *root,
- u64 bytenr, u32 blocksize,
- u64 time_seq);
struct btrfs_path *btrfs_alloc_path(void)
{
@@ -208,7 +202,7 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
* tree until you end up with a lock on the root. A locked buffer
* is returned, with a reference held.
*/
-struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
+static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
{
struct extent_buffer *eb;
@@ -230,7 +224,8 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
static void add_root_to_dirty_list(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
- if (root->track_dirty && list_empty(&root->dirty_list)) {
+ if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
+ list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
@@ -252,9 +247,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
int level;
struct btrfs_disk_key disk_key;
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
if (level == 0)
@@ -279,8 +275,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, new_root_objectid);
- write_extent_buffer(cow, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -361,6 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}
/*
+ * Pull a new tree mod seq number for our operation.
+ */
+static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
+{
+ return atomic64_inc_return(&fs_info->tree_mod_seq);
+}
+
+/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
* to record tree modifications, it should ensure to set elem->seq to zero
@@ -371,19 +374,16 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
- u64 seq;
-
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- seq = btrfs_inc_tree_mod_seq(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
- return seq;
+ return elem->seq;
}
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -443,6 +443,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* the index is the shifted logical of the *new* root node for root replace
* operations, or the shifted logical of the affected block for all other
* operations.
+ *
+ * Note: must be called with write lock (tree_mod_log_write_lock).
*/
static noinline int
__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -452,7 +454,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
- BUG_ON(!tm || !tm->seq);
+ BUG_ON(!tm);
+
+ tm->seq = btrfs_inc_tree_mod_seq(fs_info);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
@@ -467,10 +471,8 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
new = &((*new)->rb_left);
else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
- else {
- kfree(tm);
+ else
return -EEXIST;
- }
}
rb_link_node(&tm->node, parent, new);
@@ -493,11 +495,7 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
return 1;
tree_mod_log_write_lock(fs_info);
- if (list_empty(&fs_info->tree_mod_seq_list)) {
- /*
- * someone emptied the list while we were waiting for the lock.
- * we must not add to the list when no blocker exists.
- */
+ if (list_empty(&(fs_info)->tree_mod_seq_list)) {
tree_mod_log_write_unlock(fs_info);
return 1;
}
@@ -505,40 +503,28 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
return 0;
}
-/*
- * This allocates memory and gets a tree modification sequence number.
- *
- * Returns <0 on error.
- * Returns >0 (the added sequence number) on success.
- */
-static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
- struct tree_mod_elem **tm_ret)
+/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
+static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb)
{
- struct tree_mod_elem *tm;
-
- /*
- * once we switch from spin locks to something different, we should
- * honor the flags parameter here.
- */
- tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
- if (!tm)
- return -ENOMEM;
+ smp_mb();
+ if (list_empty(&(fs_info)->tree_mod_seq_list))
+ return 0;
+ if (eb && btrfs_header_level(eb) == 0)
+ return 0;
- tm->seq = btrfs_inc_tree_mod_seq(fs_info);
- return tm->seq;
+ return 1;
}
-static inline int
-__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+static struct tree_mod_elem *
+alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
- int ret;
struct tree_mod_elem *tm;
- ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret < 0)
- return ret;
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm)
+ return NULL;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
if (op != MOD_LOG_KEY_ADD) {
@@ -548,39 +534,37 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
tm->op = op;
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
+ RB_CLEAR_NODE(&tm->node);
- return __tree_mod_log_insert(fs_info, tm);
+ return tm;
}
static noinline int
-tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
+ struct tree_mod_elem *tm;
int ret;
- if (tree_mod_dont_log(fs_info, eb))
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
- ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
+ tm = alloc_tree_mod_elem(eb, slot, op, flags);
+ if (!tm)
+ return -ENOMEM;
- tree_mod_log_write_unlock(fs_info);
- return ret;
-}
+ if (tree_mod_dont_log(fs_info, eb)) {
+ kfree(tm);
+ return 0;
+ }
-static noinline int
-tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
- int slot, enum mod_log_op op)
-{
- return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
-}
+ ret = __tree_mod_log_insert(fs_info, tm);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ kfree(tm);
-static noinline int
-tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op)
-{
- return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
+ return ret;
}
static noinline int
@@ -588,27 +572,24 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items, gfp_t flags)
{
- struct tree_mod_elem *tm;
- int ret;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int ret = 0;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, eb))
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
- /*
- * When we override something during the move, we log these removals.
- * This can only happen when we move towards the beginning of the
- * buffer, i.e. dst_slot < src_slot.
- */
- for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
- ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
- MOD_LOG_KEY_REMOVE_WHILE_MOVING);
- BUG_ON(ret < 0);
- }
+ tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+ if (!tm_list)
+ return -ENOMEM;
- ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret < 0)
- goto out;
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
tm->index = eb->start >> PAGE_CACHE_SHIFT;
tm->slot = src_slot;
@@ -616,44 +597,110 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
tm->move.nr_items = nr_items;
tm->op = MOD_LOG_MOVE_KEYS;
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, eb))
+ goto free_tms;
+ locked = 1;
+
+ /*
+ * When we override something during the move, we log these removals.
+ * This can only happen when we move towards the beginning of the
+ * buffer, i.e. dst_slot < src_slot.
+ */
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret)
+ goto free_tms;
+ }
+
ret = __tree_mod_log_insert(fs_info, tm);
-out:
+ if (ret)
+ goto free_tms;
tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return 0;
+free_tms:
+ for (i = 0; i < nr_items; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+ kfree(tm);
+
return ret;
}
-static inline void
-__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+static inline int
+__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct tree_mod_elem **tm_list,
+ int nritems)
{
- int i;
- u32 nritems;
+ int i, j;
int ret;
- if (btrfs_header_level(eb) == 0)
- return;
-
- nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
- ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
- MOD_LOG_KEY_REMOVE_WHILE_FREEING);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret) {
+ for (j = nritems - 1; j > i; j--)
+ rb_erase(&tm_list[j]->node,
+ &fs_info->tree_mod_log);
+ return ret;
+ }
}
+
+ return 0;
}
static noinline int
tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *old_root,
- struct extent_buffer *new_root, gfp_t flags)
+ struct extent_buffer *new_root, gfp_t flags,
+ int log_removal)
{
- struct tree_mod_elem *tm;
- int ret;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int ret = 0;
+ int i;
- if (tree_mod_dont_log(fs_info, NULL))
+ if (!tree_mod_need_log(fs_info, NULL))
return 0;
- ret = tree_mod_alloc(fs_info, flags, &tm);
- if (ret < 0)
- goto out;
+ if (log_removal && btrfs_header_level(old_root) > 0) {
+ nritems = btrfs_header_nritems(old_root);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ flags);
+ if (!tm_list) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(old_root, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+ }
+
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
@@ -661,9 +708,29 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->generation = btrfs_header_generation(old_root);
tm->op = MOD_LOG_ROOT_REPLACE;
- ret = __tree_mod_log_insert(fs_info, tm);
-out:
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+
+ if (tm_list)
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ if (!ret)
+ ret = __tree_mod_log_insert(fs_info, tm);
+
tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return ret;
+
+free_tms:
+ if (tm_list) {
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+ }
+ kfree(tm);
+
return ret;
}
@@ -733,34 +800,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
-static noinline void
+static noinline int
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
{
- int ret;
+ int ret = 0;
+ struct tree_mod_elem **tm_list = NULL;
+ struct tree_mod_elem **tm_list_add, **tm_list_rem;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, NULL))
- return;
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
- if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
- tree_mod_log_write_unlock(fs_info);
- return;
+ if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
+ return 0;
+
+ tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm_list_add = tm_list;
+ tm_list_rem = tm_list + nr_items;
+ for (i = 0; i < nr_items; i++) {
+ tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
+ if (!tm_list_rem[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
+ MOD_LOG_KEY_ADD, GFP_NOFS);
+ if (!tm_list_add[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
}
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+ locked = 1;
+
for (i = 0; i < nr_items; i++) {
- ret = tree_mod_log_insert_key_locked(fs_info, src,
- i + src_offset,
- MOD_LOG_KEY_REMOVE);
- BUG_ON(ret < 0);
- ret = tree_mod_log_insert_key_locked(fs_info, dst,
- i + dst_offset,
- MOD_LOG_KEY_ADD);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
+ if (ret)
+ goto free_tms;
+ ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
+ if (ret)
+ goto free_tms;
}
tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nr_items * 2; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return ret;
}
static inline void
@@ -779,30 +887,68 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
{
int ret;
- ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
- MOD_LOG_KEY_REPLACE,
- atomic ? GFP_ATOMIC : GFP_NOFS);
+ ret = tree_mod_log_insert_key(fs_info, eb, slot,
+ MOD_LOG_KEY_REPLACE,
+ atomic ? GFP_ATOMIC : GFP_NOFS);
BUG_ON(ret < 0);
}
-static noinline void
+static noinline int
tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
- if (tree_mod_dont_log(fs_info, eb))
- return;
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int i;
+ int ret = 0;
- __tree_mod_log_free_eb(fs_info, eb);
+ if (btrfs_header_level(eb) == 0)
+ return 0;
+
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
+
+ nritems = btrfs_header_nritems(eb);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, eb))
+ goto free_tms;
+
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+
+ return ret;
}
static noinline void
tree_mod_log_set_root_pointer(struct btrfs_root *root,
- struct extent_buffer *new_root_node)
+ struct extent_buffer *new_root_node,
+ int log_removal)
{
int ret;
ret = tree_mod_log_insert_root(root->fs_info, root->node,
- new_root_node, GFP_NOFS);
+ new_root_node, GFP_NOFS, log_removal);
BUG_ON(ret < 0);
}
@@ -818,14 +964,14 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
* snapshot and the block was not allocated by tree relocation,
* we know the block is not shared.
*/
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
buf != root->node && buf != root->commit_root &&
(btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item) ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
return 1;
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
return 1;
#endif
@@ -863,7 +1009,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
if (btrfs_block_can_be_shared(root, buf)) {
ret = btrfs_lookup_extent_info(trans, root, buf->start,
- buf->len, &refs, &flags);
+ btrfs_header_level(buf), 1,
+ &refs, &flags);
if (ret)
return ret;
if (refs == 0) {
@@ -909,10 +1056,12 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
BUG_ON(ret); /* -ENOMEM */
}
if (new_flags != 0) {
+ int level = btrfs_header_level(buf);
+
ret = btrfs_set_disk_extent_flags(trans, root,
buf->start,
buf->len,
- new_flags, 0);
+ new_flags, level, 0);
if (ret)
return ret;
}
@@ -927,7 +1076,6 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
ret = btrfs_dec_ref(trans, root, buf, 1, 1);
BUG_ON(ret); /* -ENOMEM */
}
- tree_mod_log_free_eb(root->fs_info, buf);
clean_tree_block(trans, root, buf);
*last_ref = 1;
}
@@ -965,9 +1113,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(buf);
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
@@ -1003,8 +1152,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, root->root_key.objectid);
- write_extent_buffer(cow, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1013,8 +1161,11 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
- if (root->ref_cows)
- btrfs_reloc_cow_block(trans, root, buf, cow);
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
+ ret = btrfs_reloc_cow_block(trans, root, buf, cow);
+ if (ret)
+ return ret;
+ }
if (buf == root->node) {
WARN_ON(parent && parent != buf);
@@ -1025,7 +1176,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
parent_start = 0;
extent_buffer_get(cow);
- tree_mod_log_set_root_pointer(root, cow);
+ tree_mod_log_set_root_pointer(root, cow, 1);
rcu_assign_pointer(root->node, cow);
btrfs_free_tree_block(trans, root, buf, parent_start,
@@ -1040,12 +1191,19 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
WARN_ON(trans->transid != btrfs_header_generation(parent));
tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
- MOD_LOG_KEY_REPLACE);
+ MOD_LOG_KEY_REPLACE, GFP_NOFS);
btrfs_set_node_blockptr(parent, parent_slot,
cow->start);
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
+ if (last_ref) {
+ ret = tree_mod_log_free_eb(root->fs_info, buf);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
+ }
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
@@ -1063,15 +1221,15 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
*/
static struct tree_mod_elem *
__tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
- struct btrfs_root *root, u64 time_seq)
+ struct extent_buffer *eb_root, u64 time_seq)
{
struct tree_mod_elem *tm;
struct tree_mod_elem *found = NULL;
- u64 root_logical = root->node->start;
+ u64 root_logical = eb_root->start;
int looped = 0;
if (!time_seq)
- return 0;
+ return NULL;
/*
* the very last operation that's logged for a root is the replacement
@@ -1082,7 +1240,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
tm = tree_mod_log_search_oldest(fs_info, root_logical,
time_seq);
if (!looped && !tm)
- return 0;
+ return NULL;
/*
* if there are no tree operation for the oldest root, we simply
* return it. this should only happen if that (old) root is at
@@ -1101,7 +1259,6 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
found = tm;
root_logical = tm->old_root.logical;
- BUG_ON(root_logical == root->node->start);
looped = 1;
}
@@ -1118,8 +1275,8 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
* time_seq).
*/
static void
-__tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
- struct tree_mod_elem *first_tm)
+__tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
+ u64 time_seq, struct tree_mod_elem *first_tm)
{
u32 n;
struct rb_node *next;
@@ -1129,6 +1286,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
unsigned long p_size = sizeof(struct btrfs_key_ptr);
n = btrfs_header_nritems(eb);
+ tree_mod_log_read_lock(fs_info);
while (tm && tm->seq >= time_seq) {
/*
* all the operations are recorded with the operator used for
@@ -1138,6 +1296,7 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
switch (tm->op) {
case MOD_LOG_KEY_REMOVE_WHILE_FREEING:
BUG_ON(tm->slot < n);
+ /* Fallthrough */
case MOD_LOG_KEY_REMOVE_WHILE_MOVING:
case MOD_LOG_KEY_REMOVE:
btrfs_set_node_key(eb, &tm->key, tm->slot);
@@ -1182,12 +1341,20 @@ __tree_mod_log_rewind(struct extent_buffer *eb, u64 time_seq,
if (tm->index != first_tm->index)
break;
}
+ tree_mod_log_read_unlock(fs_info);
btrfs_set_header_nritems(eb, n);
}
+/*
+ * Called with eb read locked. If the buffer cannot be rewinded, the same buffer
+ * is returned. If rewind operations happen, a fresh buffer is returned. The
+ * returned buffer is always read-locked. If the returned buffer is not the
+ * input buffer, the lock on the input buffer is released and the input buffer
+ * is freed (its refcount is decremented).
+ */
static struct extent_buffer *
-tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
- u64 time_seq)
+tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
+ struct extent_buffer *eb, u64 time_seq)
{
struct extent_buffer *eb_rewin;
struct tree_mod_elem *tm;
@@ -1202,11 +1369,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
if (!tm)
return eb;
+ btrfs_set_path_blocking(path);
+ btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+
if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
BUG_ON(tm->slot != 0);
eb_rewin = alloc_dummy_extent_buffer(eb->start,
fs_info->tree_root->nodesize);
- BUG_ON(!eb_rewin);
+ if (!eb_rewin) {
+ btrfs_tree_read_unlock_blocking(eb);
+ free_extent_buffer(eb);
+ return NULL;
+ }
btrfs_set_header_bytenr(eb_rewin, eb->start);
btrfs_set_header_backref_rev(eb_rewin,
btrfs_header_backref_rev(eb));
@@ -1214,15 +1388,22 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
} else {
eb_rewin = btrfs_clone_extent_buffer(eb);
- BUG_ON(!eb_rewin);
+ if (!eb_rewin) {
+ btrfs_tree_read_unlock_blocking(eb);
+ free_extent_buffer(eb);
+ return NULL;
+ }
}
- extent_buffer_get(eb_rewin);
+ btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
+ btrfs_tree_read_unlock_blocking(eb);
free_extent_buffer(eb);
- __tree_mod_log_rewind(eb_rewin, time_seq, tm);
+ extent_buffer_get(eb_rewin);
+ btrfs_tree_read_lock(eb_rewin);
+ __tree_mod_log_rewind(fs_info, eb_rewin, time_seq, tm);
WARN_ON(btrfs_header_nritems(eb_rewin) >
- BTRFS_NODEPTRS_PER_BLOCK(fs_info->fs_root));
+ BTRFS_NODEPTRS_PER_BLOCK(fs_info->tree_root));
return eb_rewin;
}
@@ -1238,48 +1419,50 @@ static inline struct extent_buffer *
get_old_root(struct btrfs_root *root, u64 time_seq)
{
struct tree_mod_elem *tm;
- struct extent_buffer *eb;
+ struct extent_buffer *eb = NULL;
+ struct extent_buffer *eb_root;
struct extent_buffer *old;
struct tree_mod_root *old_root = NULL;
u64 old_generation = 0;
u64 logical;
u32 blocksize;
- eb = btrfs_read_lock_root_node(root);
- tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+ eb_root = btrfs_read_lock_root_node(root);
+ tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
if (!tm)
- return root->node;
+ return eb_root;
if (tm->op == MOD_LOG_ROOT_REPLACE) {
old_root = &tm->old_root;
old_generation = tm->generation;
logical = old_root->logical;
} else {
- logical = root->node->start;
+ logical = eb_root->start;
}
tm = tree_mod_log_search(root->fs_info, logical, time_seq);
if (old_root && tm && tm->op != MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
- btrfs_tree_read_unlock(root->node);
- free_extent_buffer(root->node);
+ btrfs_tree_read_unlock(eb_root);
+ free_extent_buffer(eb_root);
blocksize = btrfs_level_size(root, old_root->level);
old = read_tree_block(root, logical, blocksize, 0);
- if (!old) {
- pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
- logical);
- WARN_ON(1);
+ if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
+ free_extent_buffer(old);
+ btrfs_warn(root->fs_info,
+ "failed to read tree block %llu from get_old_root", logical);
} else {
eb = btrfs_clone_extent_buffer(old);
free_extent_buffer(old);
}
} else if (old_root) {
- btrfs_tree_read_unlock(root->node);
- free_extent_buffer(root->node);
+ btrfs_tree_read_unlock(eb_root);
+ free_extent_buffer(eb_root);
eb = alloc_dummy_extent_buffer(logical, root->nodesize);
} else {
- eb = btrfs_clone_extent_buffer(root->node);
- btrfs_tree_read_unlock(root->node);
- free_extent_buffer(root->node);
+ btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
+ eb = btrfs_clone_extent_buffer(eb_root);
+ btrfs_tree_read_unlock_blocking(eb_root);
+ free_extent_buffer(eb_root);
}
if (!eb)
@@ -1289,12 +1472,12 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
if (old_root) {
btrfs_set_header_bytenr(eb, eb->start);
btrfs_set_header_backref_rev(eb, BTRFS_MIXED_BACKREF_REV);
- btrfs_set_header_owner(eb, root->root_key.objectid);
+ btrfs_set_header_owner(eb, btrfs_header_owner(eb_root));
btrfs_set_header_level(eb, old_root->level);
btrfs_set_header_generation(eb, old_generation);
}
if (tm)
- __tree_mod_log_rewind(eb, time_seq, tm);
+ __tree_mod_log_rewind(root->fs_info, eb, time_seq, tm);
else
WARN_ON(btrfs_header_level(eb) != 0);
WARN_ON(btrfs_header_nritems(eb) > BTRFS_NODEPTRS_PER_BLOCK(root));
@@ -1306,15 +1489,15 @@ int btrfs_old_root_level(struct btrfs_root *root, u64 time_seq)
{
struct tree_mod_elem *tm;
int level;
+ struct extent_buffer *eb_root = btrfs_root_node(root);
- tm = __tree_mod_log_oldest_root(root->fs_info, root, time_seq);
+ tm = __tree_mod_log_oldest_root(root->fs_info, eb_root, time_seq);
if (tm && tm->op == MOD_LOG_ROOT_REPLACE) {
level = tm->old_root.level;
} else {
- rcu_read_lock();
- level = btrfs_header_level(root->node);
- rcu_read_unlock();
+ level = btrfs_header_level(eb_root);
}
+ free_extent_buffer(eb_root);
return level;
}
@@ -1323,6 +1506,10 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ return 0;
+#endif
/* ensure we can see the force_cow */
smp_rmb();
@@ -1341,7 +1528,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
!(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
- !root->force_cow)
+ !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return 0;
return 1;
}
@@ -1361,14 +1548,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
if (trans->transaction != root->fs_info->running_transaction)
WARN(1, KERN_CRIT "trans %llu running %llu\n",
- (unsigned long long)trans->transid,
- (unsigned long long)
+ trans->transid,
root->fs_info->running_transaction->transid);
if (trans->transid != root->fs_info->generation)
WARN(1, KERN_CRIT "trans %llu running %llu\n",
- (unsigned long long)trans->transid,
- (unsigned long long)root->fs_info->generation);
+ trans->transid, root->fs_info->generation);
if (!should_cow_block(trans, root, buf)) {
*cow_ret = buf;
@@ -1441,7 +1626,7 @@ int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
*/
int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_root *root, struct extent_buffer *parent,
- int start_slot, int cache_only, u64 *last_ret,
+ int start_slot, u64 *last_ret,
struct btrfs_key *progress)
{
struct extent_buffer *cur;
@@ -1461,8 +1646,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
struct btrfs_disk_key disk_key;
parent_level = btrfs_header_level(parent);
- if (cache_only && parent_level != 1)
- return 0;
WARN_ON(trans->transaction != root->fs_info->running_transaction);
WARN_ON(trans->transid != root->fs_info->generation);
@@ -1508,15 +1691,13 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans,
else
uptodate = 0;
if (!cur || !uptodate) {
- if (cache_only) {
- free_extent_buffer(cur);
- continue;
- }
if (!cur) {
cur = read_tree_block(root, blocknr,
blocksize, gen);
- if (!cur)
+ if (!cur || !extent_buffer_uptodate(cur)) {
+ free_extent_buffer(cur);
return -EIO;
+ }
} else if (!uptodate) {
err = btrfs_read_buffer(cur, gen);
if (err) {
@@ -1681,6 +1862,8 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
struct extent_buffer *parent, int slot)
{
int level = btrfs_header_level(parent);
+ struct extent_buffer *eb;
+
if (slot < 0)
return NULL;
if (slot >= btrfs_header_nritems(parent))
@@ -1688,9 +1871,15 @@ static noinline struct extent_buffer *read_node_slot(struct btrfs_root *root,
BUG_ON(level == 0);
- return read_tree_block(root, btrfs_node_blockptr(parent, slot),
- btrfs_level_size(root, level - 1),
- btrfs_node_ptr_generation(parent, slot));
+ eb = read_tree_block(root, btrfs_node_blockptr(parent, slot),
+ btrfs_level_size(root, level - 1),
+ btrfs_node_ptr_generation(parent, slot));
+ if (eb && !extent_buffer_uptodate(eb)) {
+ free_extent_buffer(eb);
+ eb = NULL;
+ }
+
+ return eb;
}
/*
@@ -1755,8 +1944,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
goto enospc;
}
- tree_mod_log_free_eb(root->fs_info, root->node);
- tree_mod_log_set_root_pointer(root, child);
+ tree_mod_log_set_root_pointer(root, child, 1);
rcu_assign_pointer(root->node, child);
add_root_to_dirty_list(root);
@@ -1820,7 +2008,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(right) == 0) {
clean_tree_block(trans, root, right);
btrfs_tree_unlock(right);
- del_ptr(trans, root, path, level + 1, pslot + 1);
+ del_ptr(root, path, level + 1, pslot + 1);
root_sub_used(root, right->len);
btrfs_free_tree_block(trans, root, right, 0, 1);
free_extent_buffer_stale(right);
@@ -1864,7 +2052,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
if (btrfs_header_nritems(mid) == 0) {
clean_tree_block(trans, root, mid);
btrfs_tree_unlock(mid);
- del_ptr(trans, root, path, level + 1, pslot);
+ del_ptr(root, path, level + 1, pslot);
root_sub_used(root, mid->len);
btrfs_free_tree_block(trans, root, mid, 0, 1);
free_extent_buffer_stale(mid);
@@ -2119,12 +2307,8 @@ static void reada_for_search(struct btrfs_root *root,
}
}
-/*
- * returns -EAGAIN if it had to drop the path, or zero if everything was in
- * cache
- */
-static noinline int reada_for_balance(struct btrfs_root *root,
- struct btrfs_path *path, int level)
+static noinline void reada_for_balance(struct btrfs_root *root,
+ struct btrfs_path *path, int level)
{
int slot;
int nritems;
@@ -2133,12 +2317,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
u64 gen;
u64 block1 = 0;
u64 block2 = 0;
- int ret = 0;
int blocksize;
parent = path->nodes[level + 1];
if (!parent)
- return 0;
+ return;
nritems = btrfs_header_nritems(parent);
slot = path->slots[level + 1];
@@ -2165,28 +2348,11 @@ static noinline int reada_for_balance(struct btrfs_root *root,
block2 = 0;
free_extent_buffer(eb);
}
- if (block1 || block2) {
- ret = -EAGAIN;
-
- /* release the whole path */
- btrfs_release_path(path);
- /* read the blocks */
- if (block1)
- readahead_tree_block(root, block1, blocksize, 0);
- if (block2)
- readahead_tree_block(root, block2, blocksize, 0);
-
- if (block1) {
- eb = read_tree_block(root, block1, blocksize, 0);
- free_extent_buffer(eb);
- }
- if (block2) {
- eb = read_tree_block(root, block2, blocksize, 0);
- free_extent_buffer(eb);
- }
- }
- return ret;
+ if (block1)
+ readahead_tree_block(root, block1, blocksize, 0);
+ if (block2)
+ readahead_tree_block(root, block2, blocksize, 0);
}
@@ -2212,9 +2378,6 @@ static noinline void unlock_up(struct btrfs_path *path, int level,
int no_skips = 0;
struct extent_buffer *t;
- if (path->really_keep_locks)
- return;
-
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
if (!path->nodes[i])
break;
@@ -2262,7 +2425,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
{
int i;
- if (path->keep_locks || path->really_keep_locks)
+ if (path->keep_locks)
return;
for (i = level; i < BTRFS_MAX_LEVEL; i++) {
@@ -2303,35 +2466,28 @@ read_block_for_search(struct btrfs_trans_handle *trans,
tmp = btrfs_find_tree_block(root, blocknr, blocksize);
if (tmp) {
/* first we do an atomic uptodate check */
- if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) {
- if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
- /*
- * we found an up to date block without
- * sleeping, return
- * right away
- */
- *eb_ret = tmp;
- return 0;
- }
- /* the pages were up to date, but we failed
- * the generation number check. Do a full
- * read for the generation number that is correct.
- * We must do this without dropping locks so
- * we can trust our generation number
- */
- free_extent_buffer(tmp);
- btrfs_set_path_blocking(p);
+ if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
+ *eb_ret = tmp;
+ return 0;
+ }
- /* now we're allowed to do a blocking uptodate check */
- tmp = read_tree_block(root, blocknr, blocksize, gen);
- if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) {
- *eb_ret = tmp;
- return 0;
- }
- free_extent_buffer(tmp);
- btrfs_release_path(p);
- return -EIO;
+ /* the pages were up to date, but we failed
+ * the generation number check. Do a full
+ * read for the generation number that is correct.
+ * We must do this without dropping locks so
+ * we can trust our generation number
+ */
+ btrfs_set_path_blocking(p);
+
+ /* now we're allowed to do a blocking uptodate check */
+ ret = btrfs_read_buffer(tmp, gen);
+ if (!ret) {
+ *eb_ret = tmp;
+ return 0;
}
+ free_extent_buffer(tmp);
+ btrfs_release_path(p);
+ return -EIO;
}
/*
@@ -2392,11 +2548,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
goto again;
}
- sret = reada_for_balance(root, p, level);
- if (sret)
- goto again;
-
btrfs_set_path_blocking(p);
+ reada_for_balance(root, p, level);
sret = split_node(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL, 0);
@@ -2416,11 +2569,8 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans,
goto again;
}
- sret = reada_for_balance(root, p, level);
- if (sret)
- goto again;
-
btrfs_set_path_blocking(p);
+ reada_for_balance(root, p, level);
sret = balance_level(trans, root, p, level);
btrfs_clear_path_blocking(p, NULL, 0);
@@ -2443,6 +2593,83 @@ done:
return ret;
}
+static void key_search_validate(struct extent_buffer *b,
+ struct btrfs_key *key,
+ int level)
+{
+#ifdef CONFIG_BTRFS_ASSERT
+ struct btrfs_disk_key disk_key;
+
+ btrfs_cpu_key_to_disk(&disk_key, key);
+
+ if (level == 0)
+ ASSERT(!memcmp_extent_buffer(b, &disk_key,
+ offsetof(struct btrfs_leaf, items[0].key),
+ sizeof(disk_key)));
+ else
+ ASSERT(!memcmp_extent_buffer(b, &disk_key,
+ offsetof(struct btrfs_node, ptrs[0].key),
+ sizeof(disk_key)));
+#endif
+}
+
+static int key_search(struct extent_buffer *b, struct btrfs_key *key,
+ int level, int *prev_cmp, int *slot)
+{
+ if (*prev_cmp != 0) {
+ *prev_cmp = bin_search(b, key, level, slot);
+ return *prev_cmp;
+ }
+
+ key_search_validate(b, key, level);
+ *slot = 0;
+
+ return 0;
+}
+
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+ u64 iobjectid, u64 ioff, u8 key_type,
+ struct btrfs_key *found_key)
+{
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ struct btrfs_path *path;
+
+ key.type = key_type;
+ key.objectid = iobjectid;
+ key.offset = ioff;
+
+ if (found_path == NULL) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ } else
+ path = found_path;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if ((ret < 0) || (found_key == NULL)) {
+ if (path != found_path)
+ btrfs_free_path(path);
+ return ret;
+ }
+
+ eb = path->nodes[0];
+ if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(fs_root, path);
+ if (ret)
+ return ret;
+ eb = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+ if (found_key->type != key.type ||
+ found_key->objectid != key.objectid)
+ return 1;
+
+ return 0;
+}
+
/*
* look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf
@@ -2471,10 +2698,12 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
int write_lock_level = 0;
u8 lowest_level = 0;
int min_write_lock_level;
+ int prev_cmp;
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
WARN_ON(p->nodes[0] != NULL);
+ BUG_ON(!cow && ins_len);
if (ins_len < 0) {
lowest_unlock = 2;
@@ -2495,12 +2724,13 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
if (!cow)
write_lock_level = -1;
- if (cow && (p->really_keep_locks || p->keep_locks || p->lowest_level))
+ if (cow && (p->keep_locks || p->lowest_level))
write_lock_level = BTRFS_MAX_LEVEL;
min_write_lock_level = write_lock_level;
again:
+ prev_cmp = -1;
/*
* we try very hard to do read locks on the root
*/
@@ -2511,9 +2741,13 @@ again:
* the commit roots are read only
* so we always do read locks
*/
+ if (p->need_commit_sem)
+ down_read(&root->fs_info->commit_root_sem);
b = root->commit_root;
extent_buffer_get(b);
level = btrfs_header_level(b);
+ if (p->need_commit_sem)
+ up_read(&root->fs_info->commit_root_sem);
if (!p->skip_locking)
btrfs_tree_read_lock(b);
} else {
@@ -2582,8 +2816,6 @@ again:
}
}
cow_done:
- BUG_ON(!cow && ins_len);
-
p->nodes[level] = b;
btrfs_clear_path_blocking(p, NULL, 0);
@@ -2593,15 +2825,21 @@ cow_done:
* It is safe to drop the lock on our parent before we
* go through the expensive btree search on b.
*
- * If cow is true, then we might be changing slot zero,
- * which may require changing the parent. So, we can't
- * drop the lock until after we know which slot we're
- * operating on.
+ * If we're inserting or deleting (ins_len != 0), then we might
+ * be changing slot zero, which may require changing the parent.
+ * So, we can't drop the lock until after we know which slot
+ * we're operating on.
*/
- if (!cow)
- btrfs_unlock_up_safe(p, level + 1);
+ if (!ins_len && !p->keep_locks) {
+ int u = level + 1;
+
+ if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
+ btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
+ p->locks[u] = 0;
+ }
+ }
- ret = bin_search(b, key, level, &slot);
+ ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
int dec = 0;
@@ -2627,7 +2865,7 @@ cow_done:
* which means we must have a write lock
* on the parent
*/
- if (slot == 0 && cow &&
+ if (slot == 0 && ins_len &&
write_lock_level < level + 1) {
write_lock_level = level + 1;
btrfs_release_path(p);
@@ -2736,6 +2974,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
+ int prev_cmp = -1;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@@ -2763,7 +3002,12 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
- ret = bin_search(b, key, level, &slot);
+ /*
+ * Since we can unwind eb's we want to do a real search every
+ * time.
+ */
+ prev_cmp = -1;
+ ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
int dec = 0;
@@ -2797,15 +3041,13 @@ again:
btrfs_clear_path_blocking(p, b,
BTRFS_READ_LOCK);
}
+ b = tree_mod_log_rewind(root->fs_info, p, b, time_seq);
+ if (!b) {
+ ret = -ENOMEM;
+ goto done;
+ }
p->locks[level] = BTRFS_READ_LOCK;
p->nodes[level] = b;
- b = tree_mod_log_rewind(root->fs_info, b, time_seq);
- if (b != p->nodes[level]) {
- btrfs_tree_unlock_rw(p->nodes[level],
- p->locks[level]);
- p->locks[level] = 0;
- p->nodes[level] = b;
- }
} else {
p->slots[level] = slot;
unlock_up(p, level, lowest_unlock, 0, NULL);
@@ -2876,7 +3118,9 @@ again:
if (ret < 0)
return ret;
if (!ret) {
- p->slots[0] = btrfs_header_nritems(leaf) - 1;
+ leaf = p->nodes[0];
+ if (p->slots[0] == btrfs_header_nritems(leaf))
+ p->slots[0]--;
return 0;
}
if (!return_any)
@@ -2904,8 +3148,7 @@ again:
* higher levels
*
*/
-static void fixup_low_keys(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+static void fixup_low_keys(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_disk_key *key, int level)
{
int i;
@@ -2930,8 +3173,7 @@ static void fixup_low_keys(struct btrfs_trans_handle *trans,
* This function isn't completely safe. It's the caller's responsibility
* that the new key won't break the order
*/
-void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_set_item_key_safe(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *new_key)
{
struct btrfs_disk_key disk_key;
@@ -2953,7 +3195,7 @@ void btrfs_set_item_key_safe(struct btrfs_trans_handle *trans,
btrfs_set_item_key(eb, &disk_key, slot);
btrfs_mark_buffer_dirty(eb);
if (slot == 0)
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
/*
@@ -2999,8 +3241,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
} else
push_items = min(src_nritems - 8, push_items);
- tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
- push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+ push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(dst_nritems),
btrfs_node_key_ptr_offset(0),
@@ -3070,8 +3316,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
(dst_nritems) *
sizeof(struct btrfs_key_ptr));
- tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
- src_nritems - push_items, push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+ src_nritems - push_items, push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3128,13 +3378,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(c, root->root_key.objectid);
- write_extent_buffer(c, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(c),
+ write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(c),
- BTRFS_UUID_SIZE);
+ btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE);
btrfs_set_node_key(c, &lower_key, 0);
btrfs_set_node_blockptr(c, 0, lower->start);
@@ -3146,7 +3394,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(c);
old = root->node;
- tree_mod_log_set_root_pointer(root, c);
+ tree_mod_log_set_root_pointer(root, c, 0);
rcu_assign_pointer(root->node, c);
/* the super has an extra ref to root->node */
@@ -3193,7 +3441,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
}
if (level) {
ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
- MOD_LOG_KEY_ADD);
+ MOD_LOG_KEY_ADD, GFP_NOFS);
BUG_ON(ret < 0);
}
btrfs_set_node_key(lower, key, slot);
@@ -3227,7 +3475,16 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
c = path->nodes[level];
WARN_ON(btrfs_header_generation(c) != trans->transid);
if (c == root->node) {
- /* trying to split the root, lets make a new one */
+ /*
+ * trying to split the root, lets make a new one
+ *
+ * tree mod log: We don't log_removal old root in
+ * insert_new_root, because that root buffer will be kept as a
+ * normal node. We are going to log removal of half of the
+ * elements below with tree_mod_log_eb_copy. We're holding a
+ * tree lock on the buffer, which is why we cannot race with
+ * other tree_mod_log users.
+ */
ret = insert_new_root(trans, root, path, level + 1);
if (ret)
return ret;
@@ -3260,13 +3517,17 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(split, root->root_key.objectid);
write_extent_buffer(split, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(split),
- BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(split),
+ btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
- tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+ ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
+ mid, c_nritems - mid);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(split, c,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(mid),
@@ -3311,8 +3572,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
if (!nr)
return 0;
btrfs_init_map_token(&token);
- start_item = btrfs_item_nr(l, start);
- end_item = btrfs_item_nr(l, end);
+ start_item = btrfs_item_nr(start);
+ end_item = btrfs_item_nr(end);
data_len = btrfs_token_item_offset(l, start_item, &token) +
btrfs_token_item_size(l, start_item, &token);
data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
@@ -3333,8 +3594,8 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
int ret;
ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
if (ret < 0) {
- printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
- "used %d nritems %d\n",
+ btrfs_crit(root->fs_info,
+ "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
leaf_space_used(leaf, 0, nritems), nritems);
}
@@ -3380,7 +3641,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
slot = path->slots[1];
i = left_nritems - 1;
while (i >= nr) {
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] > i)
@@ -3444,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space -= btrfs_token_item_size(right, item, &token);
btrfs_set_token_item_offset(right, item, push_space, &token);
}
@@ -3542,6 +3803,19 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
+ if (path->slots[0] == left_nritems && !empty) {
+ /* Key greater than all keys in the leaf, right neighbor has
+ * enough room for it and we're not emptying our leaf to delete
+ * it, therefore use right neighbor to insert the new item and
+ * no need to touch/dirty our left leaft. */
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ path->nodes[0] = right;
+ path->slots[0] = 0;
+ path->slots[1]++;
+ return 0;
+ }
+
return __push_leaf_right(trans, root, path, min_data_size, empty,
right, free_space, left_nritems, min_slot);
out_unlock:
@@ -3586,7 +3860,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] < i)
@@ -3613,8 +3887,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
ret = 1;
goto out;
}
- if (!empty && push_items == btrfs_header_nritems(right))
- WARN_ON(1);
+ WARN_ON(!empty && push_items == btrfs_header_nritems(right));
/* push data from right to left */
copy_extent_buffer(left, right,
@@ -3637,7 +3910,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(left, item, &token);
btrfs_set_token_item_offset(left, item,
@@ -3668,7 +3941,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space = push_space - btrfs_token_item_size(right,
item, &token);
@@ -3682,7 +3955,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
clean_tree_block(trans, root, right);
btrfs_item_key(right, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
/* then fixup the leaf pointer in the path */
if (path->slots[0] < push_items) {
@@ -3809,7 +4082,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_item_end_nr(l, mid);
for (i = 0; i < nritems; i++) {
- struct btrfs_item *item = btrfs_item_nr(right, i);
+ struct btrfs_item *item = btrfs_item_nr(i);
u32 ioff;
ioff = btrfs_token_item_offset(right, item, &token);
@@ -3859,14 +4132,17 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
int progress = 0;
int slot;
u32 nritems;
+ int space_needed = data_size;
slot = path->slots[0];
+ if (slot < btrfs_header_nritems(path->nodes[0]))
+ space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
/*
* try to push all the items after our slot into the
* right leaf
*/
- ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -3886,7 +4162,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
/* try to push all the items before our slot into the next leaf */
slot = path->slots[0];
- ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -3929,14 +4205,19 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
return -EOVERFLOW;
/* first try to make some room by pushing left and right */
- if (data_size) {
- wret = push_leaf_right(trans, root, path, data_size,
- data_size, 0, 0);
+ if (data_size && path->nodes[1]) {
+ int space_needed = data_size;
+
+ if (slot < btrfs_header_nritems(l))
+ space_needed -= btrfs_leaf_free_space(root, l);
+
+ wret = push_leaf_right(trans, root, path, space_needed,
+ space_needed, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size,
- data_size, 0, (u32)-1);
+ wret = push_leaf_left(trans, root, path, space_needed,
+ space_needed, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -3990,7 +4271,7 @@ again:
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
if (data_size && !tried_avoid_double)
goto push_for_double;
- split = 2 ;
+ split = 2;
}
}
}
@@ -4016,11 +4297,10 @@ again:
btrfs_set_header_owner(right, root->root_key.objectid);
btrfs_set_header_level(right, 0);
write_extent_buffer(right, root->fs_info->fsid,
- (unsigned long)btrfs_header_fsid(right),
- BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
- (unsigned long)btrfs_header_chunk_tree_uuid(right),
+ btrfs_header_chunk_tree_uuid(right),
BTRFS_UUID_SIZE);
if (split == 0) {
@@ -4042,8 +4322,7 @@ again:
path->nodes[0] = right;
path->slots[0] = 0;
if (path->slots[1] == 0)
- fixup_low_keys(trans, root, path,
- &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
btrfs_mark_buffer_dirty(right);
return ret;
@@ -4153,7 +4432,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
orig_offset = btrfs_item_offset(leaf, item);
item_size = btrfs_item_size(leaf, item);
@@ -4176,7 +4455,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, new_key);
btrfs_set_item_key(leaf, &disk_key, slot);
- new_item = btrfs_item_nr(leaf, slot);
+ new_item = btrfs_item_nr(slot);
btrfs_set_item_offset(leaf, new_item, orig_offset);
btrfs_set_item_size(leaf, new_item, item_size - split_offset);
@@ -4259,7 +4538,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
return ret;
path->slots[0]++;
- setup_items_for_insert(trans, root, path, new_key, &item_size,
+ setup_items_for_insert(root, path, new_key, &item_size,
item_size, item_size +
sizeof(struct btrfs_item), 1);
leaf = path->nodes[0];
@@ -4276,9 +4555,7 @@ int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
* off the end of the item or if we shift the item to chop bytes off
* the front.
*/
-void btrfs_truncate_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
+void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
u32 new_size, int from_end)
{
int slot;
@@ -4317,7 +4594,7 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4362,10 +4639,10 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
btrfs_set_disk_key_offset(&disk_key, offset + size_diff);
btrfs_set_item_key(leaf, &disk_key, slot);
if (slot == 0)
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, new_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4376,10 +4653,9 @@ void btrfs_truncate_item(struct btrfs_trans_handle *trans,
}
/*
- * make the item pointed to by the path bigger, data_size is the new size.
+ * make the item pointed to by the path bigger, data_size is the added size.
*/
-void btrfs_extend_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
u32 data_size)
{
int slot;
@@ -4409,7 +4685,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
BUG_ON(slot < 0);
if (slot >= nritems) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d too large, nritems %d\n",
+ btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
slot, nritems);
BUG_ON(1);
}
@@ -4420,7 +4696,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4434,7 +4710,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
data_end = old_data;
old_size = btrfs_item_size_nr(leaf, slot);
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, old_size + data_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4449,8 +4725,7 @@ void btrfs_extend_item(struct btrfs_trans_handle *trans,
* to save stack depth by doing the bulk of the work in a function
* that doesn't call btrfs_search_slot
*/
-void setup_items_for_insert(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
+void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
struct btrfs_key *cpu_key, u32 *data_size,
u32 total_data, u32 total_size, int nr)
{
@@ -4473,7 +4748,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
if (btrfs_leaf_free_space(root, leaf) < total_size) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "not enough freespace need %u have %d\n",
+ btrfs_crit(root->fs_info, "not enough freespace need %u have %d",
total_size, btrfs_leaf_free_space(root, leaf));
BUG();
}
@@ -4483,7 +4758,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
if (old_data < data_end) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
+ btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d",
slot, old_data, data_end);
BUG_ON(1);
}
@@ -4494,7 +4769,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr( i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff - total_data, &token);
@@ -4515,7 +4790,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
for (i = 0; i < nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(leaf, slot + i);
+ item = btrfs_item_nr(slot + i);
btrfs_set_token_item_offset(leaf, item,
data_end - data_size[i], &token);
data_end -= data_size[i];
@@ -4526,7 +4801,7 @@ void setup_items_for_insert(struct btrfs_trans_handle *trans,
if (slot == 0) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key);
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
btrfs_unlock_up_safe(path, 1);
btrfs_mark_buffer_dirty(leaf);
@@ -4566,7 +4841,7 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
slot = path->slots[0];
BUG_ON(slot < 0);
- setup_items_for_insert(trans, root, path, cpu_key, data_size,
+ setup_items_for_insert(root, path, cpu_key, data_size,
total_data, total_size, nr);
return 0;
}
@@ -4604,8 +4879,8 @@ int btrfs_insert_item(struct btrfs_trans_handle *trans, struct btrfs_root
* the tree should have been previously balanced so the deletion does not
* empty a node.
*/
-static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
- struct btrfs_path *path, int level, int slot)
+static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
+ int level, int slot)
{
struct extent_buffer *parent = path->nodes[level];
u32 nritems;
@@ -4623,7 +4898,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
(nritems - slot - 1));
} else if (level) {
ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
- MOD_LOG_KEY_REMOVE);
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
BUG_ON(ret < 0);
}
@@ -4637,7 +4912,7 @@ static void del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_node_key(parent, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, level + 1);
+ fixup_low_keys(root, path, &disk_key, level + 1);
}
btrfs_mark_buffer_dirty(parent);
}
@@ -4658,7 +4933,7 @@ static noinline void btrfs_del_leaf(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf)
{
WARN_ON(btrfs_header_generation(leaf) != trans->transid);
- del_ptr(trans, root, path, 1, path->slots[1]);
+ del_ptr(root, path, 1, path->slots[1]);
/*
* btrfs_free_extent is expensive, we want to make sure we
@@ -4710,7 +4985,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff + dsize, &token);
@@ -4739,7 +5014,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_disk_key disk_key;
btrfs_item_key(leaf, &disk_key, 0);
- fixup_low_keys(trans, root, path, &disk_key, 1);
+ fixup_low_keys(root, path, &disk_key, 1);
}
/* delete the leaf if it is mostly empty */
@@ -4803,14 +5078,18 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
- if (key.offset > 0)
+ if (key.offset > 0) {
key.offset--;
- else if (key.type > 0)
+ } else if (key.type > 0) {
key.type--;
- else if (key.objectid > 0)
+ key.offset = (u64)-1;
+ } else if (key.objectid > 0) {
key.objectid--;
- else
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+ } else {
return 1;
+ }
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -4818,15 +5097,25 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
return ret;
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
- if (ret < 0)
+ /*
+ * We might have had an item with the previous key in the tree right
+ * before we released our path. And after we released our path, that
+ * item might have been pushed to the first slot (0) of the leaf we
+ * were holding due to a tree balance. Alternatively, an item with the
+ * previous key can exist as the only element of a leaf (big fat item).
+ * Therefore account for these 2 cases, so that our callers (like
+ * btrfs_previous_item) don't miss an existing item with a key matching
+ * the previous key we computed above.
+ */
+ if (ret <= 0)
return 0;
return 1;
}
/*
* A helper function to walk down the tree starting at min_key, and looking
- * for nodes or leaves that are either in cache or have a minimum
- * transaction id. This is used by the btree defrag code, and tree logging
+ * for nodes or leaves that are have a minimum transaction id.
+ * This is used by the btree defrag code, and tree logging
*
* This does not cow, but it does stuff the starting key it finds back
* into min_key, so you can call btrfs_search_slot with cow=1 on the
@@ -4846,8 +5135,7 @@ int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
* was nothing in the tree that matched the search criteria.
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
- struct btrfs_path *path, int cache_only,
+ struct btrfs_path *path,
u64 min_trans)
{
struct extent_buffer *cur;
@@ -4887,43 +5175,18 @@ again:
if (sret && slot > 0)
slot--;
/*
- * check this node pointer against the cache_only and
- * min_trans parameters. If it isn't in cache or is too
- * old, skip to the next one.
+ * check this node pointer against the min_trans parameters.
+ * If it is too old, old, skip to the next one.
*/
while (slot < nritems) {
- u64 blockptr;
u64 gen;
- struct extent_buffer *tmp;
- struct btrfs_disk_key disk_key;
- blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
if (gen < min_trans) {
slot++;
continue;
}
- if (!cache_only)
- break;
-
- if (max_key) {
- btrfs_node_key(cur, &disk_key, slot);
- if (comp_keys(&disk_key, max_key) >= 0) {
- ret = 1;
- goto out;
- }
- }
-
- tmp = btrfs_find_tree_block(root, blockptr,
- btrfs_level_size(root, level - 1));
-
- if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) {
- free_extent_buffer(tmp);
- break;
- }
- if (tmp)
- free_extent_buffer(tmp);
- slot++;
+ break;
}
find_next_key:
/*
@@ -4934,7 +5197,7 @@ find_next_key:
path->slots[level] = slot;
btrfs_set_path_blocking(path);
sret = btrfs_find_next_key(root, path, min_key, level,
- cache_only, min_trans);
+ min_trans);
if (sret == 0) {
btrfs_release_path(path);
goto again;
@@ -5083,7 +5346,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
{
int ret;
int cmp;
- struct btrfs_trans_handle *trans = NULL;
struct btrfs_path *left_path = NULL;
struct btrfs_path *right_path = NULL;
struct btrfs_key left_key;
@@ -5099,9 +5361,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
int advance_right;
u64 left_blockptr;
u64 right_blockptr;
- u64 left_start_ctransid;
- u64 right_start_ctransid;
- u64 ctransid;
+ u64 left_gen;
+ u64 right_gen;
left_path = btrfs_alloc_path();
if (!left_path) {
@@ -5125,21 +5386,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_path->search_commit_root = 1;
right_path->skip_locking = 1;
- spin_lock(&left_root->root_item_lock);
- left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
- spin_unlock(&left_root->root_item_lock);
-
- spin_lock(&right_root->root_item_lock);
- right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
- spin_unlock(&right_root->root_item_lock);
-
- trans = btrfs_join_transaction(left_root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
-
/*
* Strategy: Go to the first items of both trees. Then do
*
@@ -5176,6 +5422,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
* the right if possible or go up and right.
*/
+ down_read(&left_root->fs_info->commit_root_sem);
left_level = btrfs_header_level(left_root->commit_root);
left_root_level = left_level;
left_path->nodes[left_level] = left_root->commit_root;
@@ -5185,6 +5432,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_root_level = right_level;
right_path->nodes[right_level] = right_root->commit_root;
extent_buffer_get(right_path->nodes[right_level]);
+ up_read(&left_root->fs_info->commit_root_sem);
if (left_level == 0)
btrfs_item_key_to_cpu(left_path->nodes[left_level],
@@ -5203,67 +5451,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
advance_left = advance_right = 0;
while (1) {
- /*
- * We need to make sure the transaction does not get committed
- * while we do anything on commit roots. This means, we need to
- * join and leave transactions for every item that we process.
- */
- if (trans && btrfs_should_end_transaction(trans, left_root)) {
- btrfs_release_path(left_path);
- btrfs_release_path(right_path);
-
- ret = btrfs_end_transaction(trans, left_root);
- trans = NULL;
- if (ret < 0)
- goto out;
- }
- /* now rejoin the transaction */
- if (!trans) {
- trans = btrfs_join_transaction(left_root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
-
- spin_lock(&left_root->root_item_lock);
- ctransid = btrfs_root_ctransid(&left_root->root_item);
- spin_unlock(&left_root->root_item_lock);
- if (ctransid != left_start_ctransid)
- left_start_ctransid = 0;
-
- spin_lock(&right_root->root_item_lock);
- ctransid = btrfs_root_ctransid(&right_root->root_item);
- spin_unlock(&right_root->root_item_lock);
- if (ctransid != right_start_ctransid)
- right_start_ctransid = 0;
-
- if (!left_start_ctransid || !right_start_ctransid) {
- WARN(1, KERN_WARNING
- "btrfs: btrfs_compare_tree detected "
- "a change in one of the trees while "
- "iterating. This is probably a "
- "bug.\n");
- ret = -EIO;
- goto out;
- }
-
- /*
- * the commit root may have changed, so start again
- * where we stopped
- */
- left_path->lowest_level = left_level;
- right_path->lowest_level = right_level;
- ret = btrfs_search_slot(NULL, left_root,
- &left_key, left_path, 0, 0);
- if (ret < 0)
- goto out;
- ret = btrfs_search_slot(NULL, right_root,
- &right_key, right_path, 0, 0);
- if (ret < 0)
- goto out;
- }
-
if (advance_left && !left_end_reached) {
ret = tree_advance(left_root, left_path, &left_level,
left_root_level,
@@ -5333,19 +5520,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
advance_right = ADVANCE;
} else {
+ enum btrfs_compare_tree_result cmp;
+
WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
ret = tree_compare_item(left_root, left_path,
right_path, tmp_buf);
- if (ret) {
- WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
- ret = changed_cb(left_root, right_root,
- left_path, right_path,
- &left_key,
- BTRFS_COMPARE_TREE_CHANGED,
- ctx);
- if (ret < 0)
- goto out;
- }
+ if (ret)
+ cmp = BTRFS_COMPARE_TREE_CHANGED;
+ else
+ cmp = BTRFS_COMPARE_TREE_SAME;
+ ret = changed_cb(left_root, right_root,
+ left_path, right_path,
+ &left_key, cmp, ctx);
+ if (ret < 0)
+ goto out;
advance_left = ADVANCE;
advance_right = ADVANCE;
}
@@ -5362,7 +5550,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_blockptr = btrfs_node_blockptr(
right_path->nodes[right_level],
right_path->slots[right_level]);
- if (left_blockptr == right_blockptr) {
+ left_gen = btrfs_node_ptr_generation(
+ left_path->nodes[left_level],
+ left_path->slots[left_level]);
+ right_gen = btrfs_node_ptr_generation(
+ right_path->nodes[right_level],
+ right_path->slots[right_level]);
+ if (left_blockptr == right_blockptr &&
+ left_gen == right_gen) {
/*
* As we're on a shared block, don't
* allow to go deeper.
@@ -5385,22 +5580,13 @@ out:
btrfs_free_path(left_path);
btrfs_free_path(right_path);
kfree(tmp_buf);
-
- if (trans) {
- if (!ret)
- ret = btrfs_end_transaction(trans, left_root);
- else
- btrfs_end_transaction(trans, left_root);
- }
-
return ret;
}
/*
* this is similar to btrfs_next_leaf, but does not try to preserve
* and fixup the path. It looks for and returns the next key in the
- * tree based on the current path and the cache_only and min_trans
- * parameters.
+ * tree based on the current path and the min_trans parameters.
*
* 0 is returned if another key is found, < 0 if there are any errors
* and 1 is returned if there are no higher keys in the tree
@@ -5409,8 +5595,7 @@ out:
* calling this function.
*/
int btrfs_find_next_key(struct btrfs_root *root, struct btrfs_path *path,
- struct btrfs_key *key, int level,
- int cache_only, u64 min_trans)
+ struct btrfs_key *key, int level, u64 min_trans)
{
int slot;
struct extent_buffer *c;
@@ -5461,22 +5646,8 @@ next:
if (level == 0)
btrfs_item_key_to_cpu(c, key, slot);
else {
- u64 blockptr = btrfs_node_blockptr(c, slot);
u64 gen = btrfs_node_ptr_generation(c, slot);
- if (cache_only) {
- struct extent_buffer *cur;
- cur = btrfs_find_tree_block(root, blockptr,
- btrfs_level_size(root, level - 1));
- if (!cur ||
- btrfs_buffer_uptodate(cur, gen, 1) <= 0) {
- slot++;
- if (cur)
- free_extent_buffer(cur);
- goto next;
- }
- free_extent_buffer(cur);
- }
if (gen < min_trans) {
slot++;
goto next;
@@ -5498,139 +5669,6 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path)
return btrfs_next_old_leaf(root, path, 0);
}
-/* Release the path up to but not including the given level */
-static void btrfs_release_level(struct btrfs_path *path, int level)
-{
- int i;
-
- for (i = 0; i < level; i++) {
- path->slots[i] = 0;
- if (!path->nodes[i])
- continue;
- if (path->locks[i]) {
- btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
- path->locks[i] = 0;
- }
- free_extent_buffer(path->nodes[i]);
- path->nodes[i] = NULL;
- }
-}
-
-/*
- * This function assumes 2 things
- *
- * 1) You are using path->keep_locks
- * 2) You are not inserting items.
- *
- * If either of these are not true do not use this function. If you need a next
- * leaf with either of these not being true then this function can be easily
- * adapted to do that, but at the moment these are the limitations.
- */
-int btrfs_next_leaf_write(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct btrfs_path *path,
- int del)
-{
- struct extent_buffer *b;
- struct btrfs_key key;
- u32 nritems;
- int level = 1;
- int slot;
- int ret = 1;
- int write_lock_level = BTRFS_MAX_LEVEL;
- int ins_len = del ? -1 : 0;
-
- WARN_ON(!(path->keep_locks || path->really_keep_locks));
-
- nritems = btrfs_header_nritems(path->nodes[0]);
- btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1);
-
- while (path->nodes[level]) {
- nritems = btrfs_header_nritems(path->nodes[level]);
- if (!(path->locks[level] & BTRFS_WRITE_LOCK)) {
-search:
- btrfs_release_path(path);
- ret = btrfs_search_slot(trans, root, &key, path,
- ins_len, 1);
- if (ret < 0)
- goto out;
- level = 1;
- continue;
- }
-
- if (path->slots[level] >= nritems - 1) {
- level++;
- continue;
- }
-
- btrfs_release_level(path, level);
- break;
- }
-
- if (!path->nodes[level]) {
- ret = 1;
- goto out;
- }
-
- path->slots[level]++;
- b = path->nodes[level];
-
- while (b) {
- level = btrfs_header_level(b);
-
- if (!should_cow_block(trans, root, b))
- goto cow_done;
-
- btrfs_set_path_blocking(path);
- ret = btrfs_cow_block(trans, root, b,
- path->nodes[level + 1],
- path->slots[level + 1], &b);
- if (ret)
- goto out;
-cow_done:
- path->nodes[level] = b;
- btrfs_clear_path_blocking(path, NULL, 0);
- if (level != 0) {
- ret = setup_nodes_for_search(trans, root, path, b,
- level, ins_len,
- &write_lock_level);
- if (ret == -EAGAIN)
- goto search;
- if (ret)
- goto out;
-
- b = path->nodes[level];
- slot = path->slots[level];
-
- ret = read_block_for_search(trans, root, path,
- &b, level, slot, &key, 0);
- if (ret == -EAGAIN)
- goto search;
- if (ret)
- goto out;
- level = btrfs_header_level(b);
- if (!btrfs_try_tree_write_lock(b)) {
- btrfs_set_path_blocking(path);
- btrfs_tree_lock(b);
- btrfs_clear_path_blocking(path, b,
- BTRFS_WRITE_LOCK);
- }
- path->locks[level] = BTRFS_WRITE_LOCK;
- path->nodes[level] = b;
- path->slots[level] = 0;
- } else {
- path->slots[level] = 0;
- ret = 0;
- break;
- }
- }
-
-out:
- if (ret)
- btrfs_release_path(path);
-
- return ret;
-}
-
int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
u64 time_seq)
{
@@ -5680,6 +5718,24 @@ again:
ret = 0;
goto done;
}
+ /*
+ * So the above check misses one case:
+ * - after releasing the path above, someone has removed the item that
+ * used to be at the very end of the block, and balance between leafs
+ * gets another one with bigger key.offset to replace it.
+ *
+ * This one should be returned as well, or we can get leaf corruption
+ * later(esp. in __btrfs_drop_extents()).
+ *
+ * And a bit more explanation about this check,
+ * with ret > 0, the key isn't found, the path points to the slot
+ * where it should be inserted, so the path->slots[0] item must be the
+ * bigger one.
+ */
+ if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
+ ret = 0;
+ goto done;
+ }
while (level < BTRFS_MAX_LEVEL) {
if (!path->nodes[level]) {
@@ -5828,3 +5884,46 @@ int btrfs_previous_item(struct btrfs_root *root,
}
return 1;
}
+
+/*
+ * search in extent tree to find a previous Metadata/Data extent item with
+ * min objecitd.
+ *
+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
+ */
+int btrfs_previous_extent_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid)
+{
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int ret;
+
+ while (1) {
+ if (path->slots[0] == 0) {
+ btrfs_set_path_blocking(path);
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (nritems == 0)
+ return 1;
+ if (path->slots[0] == nritems)
+ path->slots[0]--;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid < min_objectid)
+ break;
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ found_key.type == BTRFS_METADATA_ITEM_KEY)
+ return 0;
+ if (found_key.objectid == min_objectid &&
+ found_key.type < BTRFS_EXTENT_ITEM_KEY)
+ break;
+ }
+ return 1;
+}