aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/ctree.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.c')
-rw-r--r--fs/btrfs/ctree.c834
1 files changed, 542 insertions, 292 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 61b5bcd57b7..aeab453b8e2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -39,9 +39,8 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *src_buf);
static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
int level, int slot);
-static void tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+static int tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb);
-static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
struct btrfs_path *btrfs_alloc_path(void)
{
@@ -225,7 +224,8 @@ static struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
static void add_root_to_dirty_list(struct btrfs_root *root)
{
spin_lock(&root->fs_info->trans_lock);
- if (root->track_dirty && list_empty(&root->dirty_list)) {
+ if (test_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state) &&
+ list_empty(&root->dirty_list)) {
list_add(&root->dirty_list,
&root->fs_info->dirty_cowonly_roots);
}
@@ -247,9 +247,10 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
int level;
struct btrfs_disk_key disk_key;
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
if (level == 0)
@@ -274,7 +275,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, new_root_objectid);
- write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -355,44 +356,14 @@ static inline void tree_mod_log_write_unlock(struct btrfs_fs_info *fs_info)
}
/*
- * Increment the upper half of tree_mod_seq, set lower half zero.
- *
- * Must be called with fs_info->tree_mod_seq_lock held.
+ * Pull a new tree mod seq number for our operation.
*/
-static inline u64 btrfs_inc_tree_mod_seq_major(struct btrfs_fs_info *fs_info)
-{
- u64 seq = atomic64_read(&fs_info->tree_mod_seq);
- seq &= 0xffffffff00000000ull;
- seq += 1ull << 32;
- atomic64_set(&fs_info->tree_mod_seq, seq);
- return seq;
-}
-
-/*
- * Increment the lower half of tree_mod_seq.
- *
- * Must be called with fs_info->tree_mod_seq_lock held. The way major numbers
- * are generated should not technically require a spin lock here. (Rationale:
- * incrementing the minor while incrementing the major seq number is between its
- * atomic64_read and atomic64_set calls doesn't duplicate sequence numbers, it
- * just returns a unique sequence number as usual.) We have decided to leave
- * that requirement in here and rethink it once we notice it really imposes a
- * problem on some workload.
- */
-static inline u64 btrfs_inc_tree_mod_seq_minor(struct btrfs_fs_info *fs_info)
+static inline u64 btrfs_inc_tree_mod_seq(struct btrfs_fs_info *fs_info)
{
return atomic64_inc_return(&fs_info->tree_mod_seq);
}
/*
- * return the last minor in the previous major tree_mod_seq number
- */
-u64 btrfs_tree_mod_seq_prev(u64 seq)
-{
- return (seq & 0xffffffff00000000ull) - 1ull;
-}
-
-/*
* This adds a new blocker to the tree mod log's blocker list if the @elem
* passed does not already have a sequence number set. So when a caller expects
* to record tree modifications, it should ensure to set elem->seq to zero
@@ -403,19 +374,16 @@ u64 btrfs_tree_mod_seq_prev(u64 seq)
u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
struct seq_list *elem)
{
- u64 seq;
-
tree_mod_log_write_lock(fs_info);
spin_lock(&fs_info->tree_mod_seq_lock);
if (!elem->seq) {
- elem->seq = btrfs_inc_tree_mod_seq_major(fs_info);
+ elem->seq = btrfs_inc_tree_mod_seq(fs_info);
list_add_tail(&elem->list, &fs_info->tree_mod_seq_list);
}
- seq = btrfs_inc_tree_mod_seq_minor(fs_info);
spin_unlock(&fs_info->tree_mod_seq_lock);
tree_mod_log_write_unlock(fs_info);
- return seq;
+ return elem->seq;
}
void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
@@ -475,6 +443,8 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
* the index is the shifted logical of the *new* root node for root replace
* operations, or the shifted logical of the affected block for all other
* operations.
+ *
+ * Note: must be called with write lock (tree_mod_log_write_lock).
*/
static noinline int
__tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
@@ -483,27 +453,10 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
struct rb_node **new;
struct rb_node *parent = NULL;
struct tree_mod_elem *cur;
- int ret = 0;
BUG_ON(!tm);
- tree_mod_log_write_lock(fs_info);
- if (list_empty(&fs_info->tree_mod_seq_list)) {
- tree_mod_log_write_unlock(fs_info);
- /*
- * Ok we no longer care about logging modifications, free up tm
- * and return 0. Any callers shouldn't be using tm after
- * calling tree_mod_log_insert, but if they do we can just
- * change this to return a special error code to let the callers
- * do their own thing.
- */
- kfree(tm);
- return 0;
- }
-
- spin_lock(&fs_info->tree_mod_seq_lock);
- tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
- spin_unlock(&fs_info->tree_mod_seq_lock);
+ tm->seq = btrfs_inc_tree_mod_seq(fs_info);
tm_root = &fs_info->tree_mod_log;
new = &tm_root->rb_node;
@@ -518,18 +471,13 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
new = &((*new)->rb_left);
else if (cur->seq > tm->seq)
new = &((*new)->rb_right);
- else {
- ret = -EEXIST;
- kfree(tm);
- goto out;
- }
+ else
+ return -EEXIST;
}
rb_link_node(&tm->node, parent, new);
rb_insert_color(&tm->node, tm_root);
-out:
- tree_mod_log_write_unlock(fs_info);
- return ret;
+ return 0;
}
/*
@@ -545,19 +493,38 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
return 1;
if (eb && btrfs_header_level(eb) == 0)
return 1;
+
+ tree_mod_log_write_lock(fs_info);
+ if (list_empty(&(fs_info)->tree_mod_seq_list)) {
+ tree_mod_log_write_unlock(fs_info);
+ return 1;
+ }
+
return 0;
}
-static inline int
-__tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
- struct extent_buffer *eb, int slot,
- enum mod_log_op op, gfp_t flags)
+/* Similar to tree_mod_dont_log, but doesn't acquire any locks. */
+static inline int tree_mod_need_log(const struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb)
+{
+ smp_mb();
+ if (list_empty(&(fs_info)->tree_mod_seq_list))
+ return 0;
+ if (eb && btrfs_header_level(eb) == 0)
+ return 0;
+
+ return 1;
+}
+
+static struct tree_mod_elem *
+alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
+ enum mod_log_op op, gfp_t flags)
{
struct tree_mod_elem *tm;
tm = kzalloc(sizeof(*tm), flags);
if (!tm)
- return -ENOMEM;
+ return NULL;
tm->index = eb->start >> PAGE_CACHE_SHIFT;
if (op != MOD_LOG_KEY_ADD) {
@@ -567,8 +534,9 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
tm->op = op;
tm->slot = slot;
tm->generation = btrfs_node_ptr_generation(eb, slot);
+ RB_CLEAR_NODE(&tm->node);
- return __tree_mod_log_insert(fs_info, tm);
+ return tm;
}
static noinline int
@@ -576,10 +544,27 @@ tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int slot,
enum mod_log_op op, gfp_t flags)
{
- if (tree_mod_dont_log(fs_info, eb))
+ struct tree_mod_elem *tm;
+ int ret;
+
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
- return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
+ tm = alloc_tree_mod_elem(eb, slot, op, flags);
+ if (!tm)
+ return -ENOMEM;
+
+ if (tree_mod_dont_log(fs_info, eb)) {
+ kfree(tm);
+ return 0;
+ }
+
+ ret = __tree_mod_log_insert(fs_info, tm);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ kfree(tm);
+
+ return ret;
}
static noinline int
@@ -587,53 +572,95 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
struct extent_buffer *eb, int dst_slot, int src_slot,
int nr_items, gfp_t flags)
{
- struct tree_mod_elem *tm;
- int ret;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int ret = 0;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, eb))
+ if (!tree_mod_need_log(fs_info, eb))
return 0;
+ tm_list = kzalloc(nr_items * sizeof(struct tree_mod_elem *), flags);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm = kzalloc(sizeof(*tm), flags);
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->slot = src_slot;
+ tm->move.dst_slot = dst_slot;
+ tm->move.nr_items = nr_items;
+ tm->op = MOD_LOG_MOVE_KEYS;
+
+ for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i + dst_slot,
+ MOD_LOG_KEY_REMOVE_WHILE_MOVING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, eb))
+ goto free_tms;
+ locked = 1;
+
/*
* When we override something during the move, we log these removals.
* This can only happen when we move towards the beginning of the
* buffer, i.e. dst_slot < src_slot.
*/
for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
- MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret)
+ goto free_tms;
}
- tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ ret = __tree_mod_log_insert(fs_info, tm);
+ if (ret)
+ goto free_tms;
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
- tm->slot = src_slot;
- tm->move.dst_slot = dst_slot;
- tm->move.nr_items = nr_items;
- tm->op = MOD_LOG_MOVE_KEYS;
+ return 0;
+free_tms:
+ for (i = 0; i < nr_items; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
+ }
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+ kfree(tm);
- return __tree_mod_log_insert(fs_info, tm);
+ return ret;
}
-static inline void
-__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
+static inline int
+__tree_mod_log_free_eb(struct btrfs_fs_info *fs_info,
+ struct tree_mod_elem **tm_list,
+ int nritems)
{
- int i;
- u32 nritems;
+ int i, j;
int ret;
- if (btrfs_header_level(eb) == 0)
- return;
-
- nritems = btrfs_header_nritems(eb);
for (i = nritems - 1; i >= 0; i--) {
- ret = __tree_mod_log_insert_key(fs_info, eb, i,
- MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
- BUG_ON(ret < 0);
+ ret = __tree_mod_log_insert(fs_info, tm_list[i]);
+ if (ret) {
+ for (j = nritems - 1; j > i; j--)
+ rb_erase(&tm_list[j]->node,
+ &fs_info->tree_mod_log);
+ return ret;
+ }
}
+
+ return 0;
}
static noinline int
@@ -642,17 +669,38 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
struct extent_buffer *new_root, gfp_t flags,
int log_removal)
{
- struct tree_mod_elem *tm;
+ struct tree_mod_elem *tm = NULL;
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int ret = 0;
+ int i;
- if (tree_mod_dont_log(fs_info, NULL))
+ if (!tree_mod_need_log(fs_info, NULL))
return 0;
- if (log_removal)
- __tree_mod_log_free_eb(fs_info, old_root);
+ if (log_removal && btrfs_header_level(old_root) > 0) {
+ nritems = btrfs_header_nritems(old_root);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ flags);
+ if (!tm_list) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(old_root, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, flags);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+ }
tm = kzalloc(sizeof(*tm), flags);
- if (!tm)
- return -ENOMEM;
+ if (!tm) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
tm->index = new_root->start >> PAGE_CACHE_SHIFT;
tm->old_root.logical = old_root->start;
@@ -660,7 +708,30 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
tm->generation = btrfs_header_generation(old_root);
tm->op = MOD_LOG_ROOT_REPLACE;
- return __tree_mod_log_insert(fs_info, tm);
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+
+ if (tm_list)
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ if (!ret)
+ ret = __tree_mod_log_insert(fs_info, tm);
+
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return ret;
+
+free_tms:
+ if (tm_list) {
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+ }
+ kfree(tm);
+
+ return ret;
}
static struct tree_mod_elem *
@@ -729,31 +800,75 @@ tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq)
return __tree_mod_log_search(fs_info, start, min_seq, 0);
}
-static noinline void
+static noinline int
tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
struct extent_buffer *src, unsigned long dst_offset,
unsigned long src_offset, int nr_items)
{
- int ret;
+ int ret = 0;
+ struct tree_mod_elem **tm_list = NULL;
+ struct tree_mod_elem **tm_list_add, **tm_list_rem;
int i;
+ int locked = 0;
- if (tree_mod_dont_log(fs_info, NULL))
- return;
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
- return;
+ return 0;
+ tm_list = kzalloc(nr_items * 2 * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ tm_list_add = tm_list;
+ tm_list_rem = tm_list + nr_items;
for (i = 0; i < nr_items; i++) {
- ret = __tree_mod_log_insert_key(fs_info, src,
- i + src_offset,
- MOD_LOG_KEY_REMOVE, GFP_NOFS);
- BUG_ON(ret < 0);
- ret = __tree_mod_log_insert_key(fs_info, dst,
- i + dst_offset,
- MOD_LOG_KEY_ADD,
- GFP_NOFS);
- BUG_ON(ret < 0);
+ tm_list_rem[i] = alloc_tree_mod_elem(src, i + src_offset,
+ MOD_LOG_KEY_REMOVE, GFP_NOFS);
+ if (!tm_list_rem[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+
+ tm_list_add[i] = alloc_tree_mod_elem(dst, i + dst_offset,
+ MOD_LOG_KEY_ADD, GFP_NOFS);
+ if (!tm_list_add[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
+ if (tree_mod_dont_log(fs_info, NULL))
+ goto free_tms;
+ locked = 1;
+
+ for (i = 0; i < nr_items; i++) {
+ ret = __tree_mod_log_insert(fs_info, tm_list_rem[i]);
+ if (ret)
+ goto free_tms;
+ ret = __tree_mod_log_insert(fs_info, tm_list_add[i]);
+ if (ret)
+ goto free_tms;
+ }
+
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nr_items * 2; i++) {
+ if (tm_list[i] && !RB_EMPTY_NODE(&tm_list[i]->node))
+ rb_erase(&tm_list[i]->node, &fs_info->tree_mod_log);
+ kfree(tm_list[i]);
}
+ if (locked)
+ tree_mod_log_write_unlock(fs_info);
+ kfree(tm_list);
+
+ return ret;
}
static inline void
@@ -772,18 +887,58 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
{
int ret;
- ret = __tree_mod_log_insert_key(fs_info, eb, slot,
+ ret = tree_mod_log_insert_key(fs_info, eb, slot,
MOD_LOG_KEY_REPLACE,
atomic ? GFP_ATOMIC : GFP_NOFS);
BUG_ON(ret < 0);
}
-static noinline void
+static noinline int
tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
{
+ struct tree_mod_elem **tm_list = NULL;
+ int nritems = 0;
+ int i;
+ int ret = 0;
+
+ if (btrfs_header_level(eb) == 0)
+ return 0;
+
+ if (!tree_mod_need_log(fs_info, NULL))
+ return 0;
+
+ nritems = btrfs_header_nritems(eb);
+ tm_list = kzalloc(nritems * sizeof(struct tree_mod_elem *),
+ GFP_NOFS);
+ if (!tm_list)
+ return -ENOMEM;
+
+ for (i = 0; i < nritems; i++) {
+ tm_list[i] = alloc_tree_mod_elem(eb, i,
+ MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
+ if (!tm_list[i]) {
+ ret = -ENOMEM;
+ goto free_tms;
+ }
+ }
+
if (tree_mod_dont_log(fs_info, eb))
- return;
- __tree_mod_log_free_eb(fs_info, eb);
+ goto free_tms;
+
+ ret = __tree_mod_log_free_eb(fs_info, tm_list, nritems);
+ tree_mod_log_write_unlock(fs_info);
+ if (ret)
+ goto free_tms;
+ kfree(tm_list);
+
+ return 0;
+
+free_tms:
+ for (i = 0; i < nritems; i++)
+ kfree(tm_list[i]);
+ kfree(tm_list);
+
+ return ret;
}
static noinline void
@@ -809,14 +964,14 @@ int btrfs_block_can_be_shared(struct btrfs_root *root,
* snapshot and the block was not allocated by tree relocation,
* we know the block is not shared.
*/
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
buf != root->node && buf != root->commit_root &&
(btrfs_header_generation(buf) <=
btrfs_root_last_snapshot(&root->root_item) ||
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
return 1;
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
- if (root->ref_cows &&
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
btrfs_header_backref_rev(buf) < BTRFS_MIXED_BACKREF_REV)
return 1;
#endif
@@ -958,9 +1113,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_assert_tree_locked(buf);
- WARN_ON(root->ref_cows && trans->transid !=
- root->fs_info->running_transaction->transid);
- WARN_ON(root->ref_cows && trans->transid != root->last_trans);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->fs_info->running_transaction->transid);
+ WARN_ON(test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&
+ trans->transid != root->last_trans);
level = btrfs_header_level(buf);
@@ -996,7 +1152,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
else
btrfs_set_header_owner(cow, root->root_key.objectid);
- write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
+ write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1005,7 +1161,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
return ret;
}
- if (root->ref_cows) {
+ if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {
ret = btrfs_reloc_cow_block(trans, root, buf, cow);
if (ret)
return ret;
@@ -1041,8 +1197,13 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- if (last_ref)
- tree_mod_log_free_eb(root->fs_info, buf);
+ if (last_ref) {
+ ret = tree_mod_log_free_eb(root->fs_info, buf);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
+ }
btrfs_free_tree_block(trans, root, buf, parent_start,
last_ref);
}
@@ -1285,11 +1446,10 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
free_extent_buffer(eb_root);
blocksize = btrfs_level_size(root, old_root->level);
old = read_tree_block(root, logical, blocksize, 0);
- if (!old || !extent_buffer_uptodate(old)) {
+ if (WARN_ON(!old || !extent_buffer_uptodate(old))) {
free_extent_buffer(old);
- pr_warn("btrfs: failed to read tree block %llu from get_old_root\n",
- logical);
- WARN_ON(1);
+ btrfs_warn(root->fs_info,
+ "failed to read tree block %llu from get_old_root", logical);
} else {
eb = btrfs_clone_extent_buffer(old);
free_extent_buffer(old);
@@ -1346,6 +1506,10 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *buf)
{
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+ if (unlikely(test_bit(BTRFS_ROOT_DUMMY_ROOT, &root->state)))
+ return 0;
+#endif
/* ensure we can see the force_cow */
smp_rmb();
@@ -1364,7 +1528,7 @@ static inline int should_cow_block(struct btrfs_trans_handle *trans,
!btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN) &&
!(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID &&
btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)) &&
- !root->force_cow)
+ !test_bit(BTRFS_ROOT_FORCE_COW, &root->state))
return 0;
return 1;
}
@@ -2463,6 +2627,49 @@ static int key_search(struct extent_buffer *b, struct btrfs_key *key,
return 0;
}
+int btrfs_find_item(struct btrfs_root *fs_root, struct btrfs_path *found_path,
+ u64 iobjectid, u64 ioff, u8 key_type,
+ struct btrfs_key *found_key)
+{
+ int ret;
+ struct btrfs_key key;
+ struct extent_buffer *eb;
+ struct btrfs_path *path;
+
+ key.type = key_type;
+ key.objectid = iobjectid;
+ key.offset = ioff;
+
+ if (found_path == NULL) {
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+ } else
+ path = found_path;
+
+ ret = btrfs_search_slot(NULL, fs_root, &key, path, 0, 0);
+ if ((ret < 0) || (found_key == NULL)) {
+ if (path != found_path)
+ btrfs_free_path(path);
+ return ret;
+ }
+
+ eb = path->nodes[0];
+ if (ret && path->slots[0] >= btrfs_header_nritems(eb)) {
+ ret = btrfs_next_leaf(fs_root, path);
+ if (ret)
+ return ret;
+ eb = path->nodes[0];
+ }
+
+ btrfs_item_key_to_cpu(eb, found_key, path->slots[0]);
+ if (found_key->type != key.type ||
+ found_key->objectid != key.objectid)
+ return 1;
+
+ return 0;
+}
+
/*
* look for key in the tree. path is filled in with nodes along the way
* if key is found, we return zero and you can find the item in the leaf
@@ -2496,6 +2703,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
lowest_level = p->lowest_level;
WARN_ON(lowest_level && ins_len > 0);
WARN_ON(p->nodes[0] != NULL);
+ BUG_ON(!cow && ins_len);
if (ins_len < 0) {
lowest_unlock = 2;
@@ -2533,9 +2741,13 @@ again:
* the commit roots are read only
* so we always do read locks
*/
+ if (p->need_commit_sem)
+ down_read(&root->fs_info->commit_root_sem);
b = root->commit_root;
extent_buffer_get(b);
level = btrfs_header_level(b);
+ if (p->need_commit_sem)
+ up_read(&root->fs_info->commit_root_sem);
if (!p->skip_locking)
btrfs_tree_read_lock(b);
} else {
@@ -2604,8 +2816,6 @@ again:
}
}
cow_done:
- BUG_ON(!cow && ins_len);
-
p->nodes[level] = b;
btrfs_clear_path_blocking(p, NULL, 0);
@@ -2615,13 +2825,19 @@ cow_done:
* It is safe to drop the lock on our parent before we
* go through the expensive btree search on b.
*
- * If cow is true, then we might be changing slot zero,
- * which may require changing the parent. So, we can't
- * drop the lock until after we know which slot we're
- * operating on.
+ * If we're inserting or deleting (ins_len != 0), then we might
+ * be changing slot zero, which may require changing the parent.
+ * So, we can't drop the lock until after we know which slot
+ * we're operating on.
*/
- if (!cow)
- btrfs_unlock_up_safe(p, level + 1);
+ if (!ins_len && !p->keep_locks) {
+ int u = level + 1;
+
+ if (u < BTRFS_MAX_LEVEL && p->locks[u]) {
+ btrfs_tree_unlock_rw(p->nodes[u], p->locks[u]);
+ p->locks[u] = 0;
+ }
+ }
ret = key_search(b, key, level, &prev_cmp, &slot);
@@ -2649,7 +2865,7 @@ cow_done:
* which means we must have a write lock
* on the parent
*/
- if (slot == 0 && cow &&
+ if (slot == 0 && ins_len &&
write_lock_level < level + 1) {
write_lock_level = level + 1;
btrfs_release_path(p);
@@ -2758,7 +2974,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
int level;
int lowest_unlock = 1;
u8 lowest_level = 0;
- int prev_cmp;
+ int prev_cmp = -1;
lowest_level = p->lowest_level;
WARN_ON(p->nodes[0] != NULL);
@@ -2769,7 +2985,6 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
}
again:
- prev_cmp = -1;
b = get_old_root(root, time_seq);
level = btrfs_header_level(b);
p->locks[level] = BTRFS_READ_LOCK;
@@ -2787,6 +3002,11 @@ again:
*/
btrfs_unlock_up_safe(p, level + 1);
+ /*
+ * Since we can unwind eb's we want to do a real search every
+ * time.
+ */
+ prev_cmp = -1;
ret = key_search(b, key, level, &prev_cmp, &slot);
if (level != 0) {
@@ -2898,7 +3118,9 @@ again:
if (ret < 0)
return ret;
if (!ret) {
- p->slots[0] = btrfs_header_nritems(leaf) - 1;
+ leaf = p->nodes[0];
+ if (p->slots[0] == btrfs_header_nritems(leaf))
+ p->slots[0]--;
return 0;
}
if (!return_any)
@@ -3019,8 +3241,12 @@ static int push_node_left(struct btrfs_trans_handle *trans,
} else
push_items = min(src_nritems - 8, push_items);
- tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
- push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, dst_nritems, 0,
+ push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(dst_nritems),
btrfs_node_key_ptr_offset(0),
@@ -3090,8 +3316,12 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
(dst_nritems) *
sizeof(struct btrfs_key_ptr));
- tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
- src_nritems - push_items, push_items);
+ ret = tree_mod_log_eb_copy(root->fs_info, dst, src, 0,
+ src_nritems - push_items, push_items);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(dst, src,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(src_nritems - push_items),
@@ -3148,7 +3378,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(c, root->root_key.objectid);
- write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c),
+ write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(),
BTRFS_FSID_SIZE);
write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
@@ -3287,12 +3517,17 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
btrfs_set_header_owner(split, root->root_key.objectid);
write_extent_buffer(split, root->fs_info->fsid,
- btrfs_header_fsid(split), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(split),
BTRFS_UUID_SIZE);
- tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
+ ret = tree_mod_log_eb_copy(root->fs_info, split, c, 0,
+ mid, c_nritems - mid);
+ if (ret) {
+ btrfs_abort_transaction(trans, root, ret);
+ return ret;
+ }
copy_extent_buffer(split, c,
btrfs_node_key_ptr_offset(0),
btrfs_node_key_ptr_offset(mid),
@@ -3337,8 +3572,8 @@ static int leaf_space_used(struct extent_buffer *l, int start, int nr)
if (!nr)
return 0;
btrfs_init_map_token(&token);
- start_item = btrfs_item_nr(l, start);
- end_item = btrfs_item_nr(l, end);
+ start_item = btrfs_item_nr(start);
+ end_item = btrfs_item_nr(end);
data_len = btrfs_token_item_offset(l, start_item, &token) +
btrfs_token_item_size(l, start_item, &token);
data_len = data_len - btrfs_token_item_offset(l, end_item, &token);
@@ -3359,8 +3594,8 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root,
int ret;
ret = BTRFS_LEAF_DATA_SIZE(root) - leaf_space_used(leaf, 0, nritems);
if (ret < 0) {
- printk(KERN_CRIT "leaf free space ret %d, leaf data size %lu, "
- "used %d nritems %d\n",
+ btrfs_crit(root->fs_info,
+ "leaf free space ret %d, leaf data size %lu, used %d nritems %d",
ret, (unsigned long) BTRFS_LEAF_DATA_SIZE(root),
leaf_space_used(leaf, 0, nritems), nritems);
}
@@ -3406,7 +3641,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
slot = path->slots[1];
i = left_nritems - 1;
while (i >= nr) {
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] > i)
@@ -3470,7 +3705,7 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space -= btrfs_token_item_size(right, item, &token);
btrfs_set_token_item_offset(right, item, push_space, &token);
}
@@ -3568,6 +3803,19 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root
if (left_nritems == 0)
goto out_unlock;
+ if (path->slots[0] == left_nritems && !empty) {
+ /* Key greater than all keys in the leaf, right neighbor has
+ * enough room for it and we're not emptying our leaf to delete
+ * it, therefore use right neighbor to insert the new item and
+ * no need to touch/dirty our left leaft. */
+ btrfs_tree_unlock(left);
+ free_extent_buffer(left);
+ path->nodes[0] = right;
+ path->slots[0] = 0;
+ path->slots[1]++;
+ return 0;
+ }
+
return __push_leaf_right(trans, root, path, min_data_size, empty,
right, free_space, left_nritems, min_slot);
out_unlock:
@@ -3612,7 +3860,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
nr = min(right_nritems - 1, max_slot);
for (i = 0; i < nr; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
if (!empty && push_items > 0) {
if (path->slots[0] < i)
@@ -3639,8 +3887,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
ret = 1;
goto out;
}
- if (!empty && push_items == btrfs_header_nritems(right))
- WARN_ON(1);
+ WARN_ON(!empty && push_items == btrfs_header_nritems(right));
/* push data from right to left */
copy_extent_buffer(left, right,
@@ -3663,7 +3910,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
for (i = old_left_nritems; i < old_left_nritems + push_items; i++) {
u32 ioff;
- item = btrfs_item_nr(left, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(left, item, &token);
btrfs_set_token_item_offset(left, item,
@@ -3694,7 +3941,7 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
btrfs_set_header_nritems(right, right_nritems);
push_space = BTRFS_LEAF_DATA_SIZE(root);
for (i = 0; i < right_nritems; i++) {
- item = btrfs_item_nr(right, i);
+ item = btrfs_item_nr(i);
push_space = push_space - btrfs_token_item_size(right,
item, &token);
@@ -3835,7 +4082,7 @@ static noinline void copy_for_split(struct btrfs_trans_handle *trans,
btrfs_item_end_nr(l, mid);
for (i = 0; i < nritems; i++) {
- struct btrfs_item *item = btrfs_item_nr(right, i);
+ struct btrfs_item *item = btrfs_item_nr(i);
u32 ioff;
ioff = btrfs_token_item_offset(right, item, &token);
@@ -3885,14 +4132,17 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
int progress = 0;
int slot;
u32 nritems;
+ int space_needed = data_size;
slot = path->slots[0];
+ if (slot < btrfs_header_nritems(path->nodes[0]))
+ space_needed -= btrfs_leaf_free_space(root, path->nodes[0]);
/*
* try to push all the items after our slot into the
* right leaf
*/
- ret = push_leaf_right(trans, root, path, 1, data_size, 0, slot);
+ ret = push_leaf_right(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -3912,7 +4162,7 @@ static noinline int push_for_double_split(struct btrfs_trans_handle *trans,
/* try to push all the items before our slot into the next leaf */
slot = path->slots[0];
- ret = push_leaf_left(trans, root, path, 1, data_size, 0, slot);
+ ret = push_leaf_left(trans, root, path, 1, space_needed, 0, slot);
if (ret < 0)
return ret;
@@ -3956,13 +4206,18 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans,
/* first try to make some room by pushing left and right */
if (data_size && path->nodes[1]) {
- wret = push_leaf_right(trans, root, path, data_size,
- data_size, 0, 0);
+ int space_needed = data_size;
+
+ if (slot < btrfs_header_nritems(l))
+ space_needed -= btrfs_leaf_free_space(root, l);
+
+ wret = push_leaf_right(trans, root, path, space_needed,
+ space_needed, 0, 0);
if (wret < 0)
return wret;
if (wret) {
- wret = push_leaf_left(trans, root, path, data_size,
- data_size, 0, (u32)-1);
+ wret = push_leaf_left(trans, root, path, space_needed,
+ space_needed, 0, (u32)-1);
if (wret < 0)
return wret;
}
@@ -4016,7 +4271,7 @@ again:
data_size > BTRFS_LEAF_DATA_SIZE(root)) {
if (data_size && !tried_avoid_double)
goto push_for_double;
- split = 2 ;
+ split = 2;
}
}
}
@@ -4042,7 +4297,7 @@ again:
btrfs_set_header_owner(right, root->root_key.objectid);
btrfs_set_header_level(right, 0);
write_extent_buffer(right, root->fs_info->fsid,
- btrfs_header_fsid(right), BTRFS_FSID_SIZE);
+ btrfs_header_fsid(), BTRFS_FSID_SIZE);
write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
btrfs_header_chunk_tree_uuid(right),
@@ -4177,7 +4432,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_set_path_blocking(path);
- item = btrfs_item_nr(leaf, path->slots[0]);
+ item = btrfs_item_nr(path->slots[0]);
orig_offset = btrfs_item_offset(leaf, item);
item_size = btrfs_item_size(leaf, item);
@@ -4200,7 +4455,7 @@ static noinline int split_item(struct btrfs_trans_handle *trans,
btrfs_cpu_key_to_disk(&disk_key, new_key);
btrfs_set_item_key(leaf, &disk_key, slot);
- new_item = btrfs_item_nr(leaf, slot);
+ new_item = btrfs_item_nr(slot);
btrfs_set_item_offset(leaf, new_item, orig_offset);
btrfs_set_item_size(leaf, new_item, item_size - split_offset);
@@ -4339,7 +4594,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4387,7 +4642,7 @@ void btrfs_truncate_item(struct btrfs_root *root, struct btrfs_path *path,
fixup_low_keys(root, path, &disk_key, 1);
}
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, new_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4430,7 +4685,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
BUG_ON(slot < 0);
if (slot >= nritems) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d too large, nritems %d\n",
+ btrfs_crit(root->fs_info, "slot %d too large, nritems %d",
slot, nritems);
BUG_ON(1);
}
@@ -4441,7 +4696,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
/* first correct the data pointers */
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
@@ -4455,7 +4710,7 @@ void btrfs_extend_item(struct btrfs_root *root, struct btrfs_path *path,
data_end = old_data;
old_size = btrfs_item_size_nr(leaf, slot);
- item = btrfs_item_nr(leaf, slot);
+ item = btrfs_item_nr(slot);
btrfs_set_item_size(leaf, item, old_size + data_size);
btrfs_mark_buffer_dirty(leaf);
@@ -4493,7 +4748,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
if (btrfs_leaf_free_space(root, leaf) < total_size) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "not enough freespace need %u have %d\n",
+ btrfs_crit(root->fs_info, "not enough freespace need %u have %d",
total_size, btrfs_leaf_free_space(root, leaf));
BUG();
}
@@ -4503,7 +4758,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
if (old_data < data_end) {
btrfs_print_leaf(root, leaf);
- printk(KERN_CRIT "slot %d old_data %d data_end %d\n",
+ btrfs_crit(root->fs_info, "slot %d old_data %d data_end %d",
slot, old_data, data_end);
BUG_ON(1);
}
@@ -4514,7 +4769,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = slot; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr( i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff - total_data, &token);
@@ -4535,7 +4790,7 @@ void setup_items_for_insert(struct btrfs_root *root, struct btrfs_path *path,
for (i = 0; i < nr; i++) {
btrfs_cpu_key_to_disk(&disk_key, cpu_key + i);
btrfs_set_item_key(leaf, &disk_key, slot + i);
- item = btrfs_item_nr(leaf, slot + i);
+ item = btrfs_item_nr(slot + i);
btrfs_set_token_item_offset(leaf, item,
data_end - data_size[i], &token);
data_end -= data_size[i];
@@ -4730,7 +4985,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
for (i = slot + nr; i < nritems; i++) {
u32 ioff;
- item = btrfs_item_nr(leaf, i);
+ item = btrfs_item_nr(i);
ioff = btrfs_token_item_offset(leaf, item, &token);
btrfs_set_token_item_offset(leaf, item,
ioff + dsize, &token);
@@ -4815,7 +5070,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
* This may release the path, and so you may lose any locks held at the
* time you call it.
*/
-static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
{
struct btrfs_key key;
struct btrfs_disk_key found_key;
@@ -4823,14 +5078,18 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
btrfs_item_key_to_cpu(path->nodes[0], &key, 0);
- if (key.offset > 0)
+ if (key.offset > 0) {
key.offset--;
- else if (key.type > 0)
+ } else if (key.type > 0) {
key.type--;
- else if (key.objectid > 0)
+ key.offset = (u64)-1;
+ } else if (key.objectid > 0) {
key.objectid--;
- else
+ key.type = (u8)-1;
+ key.offset = (u64)-1;
+ } else {
return 1;
+ }
btrfs_release_path(path);
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
@@ -4838,7 +5097,17 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
return ret;
btrfs_item_key(path->nodes[0], &found_key, 0);
ret = comp_keys(&found_key, &key);
- if (ret < 0)
+ /*
+ * We might have had an item with the previous key in the tree right
+ * before we released our path. And after we released our path, that
+ * item might have been pushed to the first slot (0) of the leaf we
+ * were holding due to a tree balance. Alternatively, an item with the
+ * previous key can exist as the only element of a leaf (big fat item).
+ * Therefore account for these 2 cases, so that our callers (like
+ * btrfs_previous_item) don't miss an existing item with a key matching
+ * the previous key we computed above.
+ */
+ if (ret <= 0)
return 0;
return 1;
}
@@ -4866,7 +5135,6 @@ static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
* was nothing in the tree that matched the search criteria.
*/
int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
- struct btrfs_key *max_key,
struct btrfs_path *path,
u64 min_trans)
{
@@ -4911,10 +5179,8 @@ again:
* If it is too old, old, skip to the next one.
*/
while (slot < nritems) {
- u64 blockptr;
u64 gen;
- blockptr = btrfs_node_blockptr(cur, slot);
gen = btrfs_node_ptr_generation(cur, slot);
if (gen < min_trans) {
slot++;
@@ -5080,7 +5346,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
{
int ret;
int cmp;
- struct btrfs_trans_handle *trans = NULL;
struct btrfs_path *left_path = NULL;
struct btrfs_path *right_path = NULL;
struct btrfs_key left_key;
@@ -5096,9 +5361,8 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
int advance_right;
u64 left_blockptr;
u64 right_blockptr;
- u64 left_start_ctransid;
- u64 right_start_ctransid;
- u64 ctransid;
+ u64 left_gen;
+ u64 right_gen;
left_path = btrfs_alloc_path();
if (!left_path) {
@@ -5122,21 +5386,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_path->search_commit_root = 1;
right_path->skip_locking = 1;
- spin_lock(&left_root->root_item_lock);
- left_start_ctransid = btrfs_root_ctransid(&left_root->root_item);
- spin_unlock(&left_root->root_item_lock);
-
- spin_lock(&right_root->root_item_lock);
- right_start_ctransid = btrfs_root_ctransid(&right_root->root_item);
- spin_unlock(&right_root->root_item_lock);
-
- trans = btrfs_join_transaction(left_root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
-
/*
* Strategy: Go to the first items of both trees. Then do
*
@@ -5173,6 +5422,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
* the right if possible or go up and right.
*/
+ down_read(&left_root->fs_info->commit_root_sem);
left_level = btrfs_header_level(left_root->commit_root);
left_root_level = left_level;
left_path->nodes[left_level] = left_root->commit_root;
@@ -5182,6 +5432,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_root_level = right_level;
right_path->nodes[right_level] = right_root->commit_root;
extent_buffer_get(right_path->nodes[right_level]);
+ up_read(&left_root->fs_info->commit_root_sem);
if (left_level == 0)
btrfs_item_key_to_cpu(left_path->nodes[left_level],
@@ -5200,67 +5451,6 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
advance_left = advance_right = 0;
while (1) {
- /*
- * We need to make sure the transaction does not get committed
- * while we do anything on commit roots. This means, we need to
- * join and leave transactions for every item that we process.
- */
- if (trans && btrfs_should_end_transaction(trans, left_root)) {
- btrfs_release_path(left_path);
- btrfs_release_path(right_path);
-
- ret = btrfs_end_transaction(trans, left_root);
- trans = NULL;
- if (ret < 0)
- goto out;
- }
- /* now rejoin the transaction */
- if (!trans) {
- trans = btrfs_join_transaction(left_root);
- if (IS_ERR(trans)) {
- ret = PTR_ERR(trans);
- trans = NULL;
- goto out;
- }
-
- spin_lock(&left_root->root_item_lock);
- ctransid = btrfs_root_ctransid(&left_root->root_item);
- spin_unlock(&left_root->root_item_lock);
- if (ctransid != left_start_ctransid)
- left_start_ctransid = 0;
-
- spin_lock(&right_root->root_item_lock);
- ctransid = btrfs_root_ctransid(&right_root->root_item);
- spin_unlock(&right_root->root_item_lock);
- if (ctransid != right_start_ctransid)
- right_start_ctransid = 0;
-
- if (!left_start_ctransid || !right_start_ctransid) {
- WARN(1, KERN_WARNING
- "btrfs: btrfs_compare_tree detected "
- "a change in one of the trees while "
- "iterating. This is probably a "
- "bug.\n");
- ret = -EIO;
- goto out;
- }
-
- /*
- * the commit root may have changed, so start again
- * where we stopped
- */
- left_path->lowest_level = left_level;
- right_path->lowest_level = right_level;
- ret = btrfs_search_slot(NULL, left_root,
- &left_key, left_path, 0, 0);
- if (ret < 0)
- goto out;
- ret = btrfs_search_slot(NULL, right_root,
- &right_key, right_path, 0, 0);
- if (ret < 0)
- goto out;
- }
-
if (advance_left && !left_end_reached) {
ret = tree_advance(left_root, left_path, &left_level,
left_root_level,
@@ -5360,7 +5550,14 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
right_blockptr = btrfs_node_blockptr(
right_path->nodes[right_level],
right_path->slots[right_level]);
- if (left_blockptr == right_blockptr) {
+ left_gen = btrfs_node_ptr_generation(
+ left_path->nodes[left_level],
+ left_path->slots[left_level]);
+ right_gen = btrfs_node_ptr_generation(
+ right_path->nodes[right_level],
+ right_path->slots[right_level]);
+ if (left_blockptr == right_blockptr &&
+ left_gen == right_gen) {
/*
* As we're on a shared block, don't
* allow to go deeper.
@@ -5383,14 +5580,6 @@ out:
btrfs_free_path(left_path);
btrfs_free_path(right_path);
kfree(tmp_buf);
-
- if (trans) {
- if (!ret)
- ret = btrfs_end_transaction(trans, left_root);
- else
- btrfs_end_transaction(trans, left_root);
- }
-
return ret;
}
@@ -5529,6 +5718,24 @@ again:
ret = 0;
goto done;
}
+ /*
+ * So the above check misses one case:
+ * - after releasing the path above, someone has removed the item that
+ * used to be at the very end of the block, and balance between leafs
+ * gets another one with bigger key.offset to replace it.
+ *
+ * This one should be returned as well, or we can get leaf corruption
+ * later(esp. in __btrfs_drop_extents()).
+ *
+ * And a bit more explanation about this check,
+ * with ret > 0, the key isn't found, the path points to the slot
+ * where it should be inserted, so the path->slots[0] item must be the
+ * bigger one.
+ */
+ if (nritems > 0 && ret > 0 && path->slots[0] == nritems - 1) {
+ ret = 0;
+ goto done;
+ }
while (level < BTRFS_MAX_LEVEL) {
if (!path->nodes[level]) {
@@ -5677,3 +5884,46 @@ int btrfs_previous_item(struct btrfs_root *root,
}
return 1;
}
+
+/*
+ * search in extent tree to find a previous Metadata/Data extent item with
+ * min objecitd.
+ *
+ * returns 0 if something is found, 1 if nothing was found and < 0 on error
+ */
+int btrfs_previous_extent_item(struct btrfs_root *root,
+ struct btrfs_path *path, u64 min_objectid)
+{
+ struct btrfs_key found_key;
+ struct extent_buffer *leaf;
+ u32 nritems;
+ int ret;
+
+ while (1) {
+ if (path->slots[0] == 0) {
+ btrfs_set_path_blocking(path);
+ ret = btrfs_prev_leaf(root, path);
+ if (ret != 0)
+ return ret;
+ } else {
+ path->slots[0]--;
+ }
+ leaf = path->nodes[0];
+ nritems = btrfs_header_nritems(leaf);
+ if (nritems == 0)
+ return 1;
+ if (path->slots[0] == nritems)
+ path->slots[0]--;
+
+ btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+ if (found_key.objectid < min_objectid)
+ break;
+ if (found_key.type == BTRFS_EXTENT_ITEM_KEY ||
+ found_key.type == BTRFS_METADATA_ITEM_KEY)
+ return 0;
+ if (found_key.objectid == min_objectid &&
+ found_key.type < BTRFS_EXTENT_ITEM_KEY)
+ break;
+ }
+ return 1;
+}