aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYan, Zheng <zheng.yan@oracle.com>2010-05-16 10:49:59 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:54 -0400
commit3fd0a5585eb98e074fb9934549c8d85c49756c0d (patch)
tree3e7ff9bd9678a5eea62818a2f4a50e19dda91a81
parentefa56464562991b8c24f965199888806bd8c4b38 (diff)
Btrfs: Metadata ENOSPC handling for balance
This patch adds metadata ENOSPC handling for the balance code. It is consisted by following major changes: 1. Avoid COW tree leave in the phrase of merging tree. 2. Handle interaction with snapshot creation. 3. make the backref cache can live across transactions. Signed-off-by: Yan Zheng <zheng.yan@oracle.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/btrfs/ctree.c3
-rw-r--r--fs/btrfs/ctree.h11
-rw-r--r--fs/btrfs/extent-tree.c23
-rw-r--r--fs/btrfs/relocation.c1867
-rw-r--r--fs/btrfs/transaction.c6
5 files changed, 1163 insertions, 747 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 6bee8e5204f..acd532af8e5 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -447,6 +447,9 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
update_ref_for_cow(trans, root, buf, cow, &last_ref);
+ if (root->ref_cows)
+ btrfs_reloc_cow_block(trans, root, buf, cow);
+
if (buf == root->node) {
WARN_ON(parent && parent != buf);
if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID ||
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 65530837d04..5ed0223d1cb 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2205,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans,
int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path);
int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf);
-int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref);
+int btrfs_drop_snapshot(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, int update_ref);
int btrfs_drop_subtree(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct extent_buffer *node,
@@ -2479,4 +2480,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_recover_relocation(struct btrfs_root *root);
int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len);
+void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct extent_buffer *buf,
+ struct extent_buffer *cow);
+void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,
+ struct btrfs_pending_snapshot *pending,
+ u64 *bytes_to_reserve);
+void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,
+ struct btrfs_pending_snapshot *pending);
#endif
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a713f69f0c7..d61a799fe32 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -5875,7 +5875,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
* also make sure backrefs for the shared block and all lower level
* blocks are properly updated.
*/
-int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
+int btrfs_drop_snapshot(struct btrfs_root *root,
+ struct btrfs_block_rsv *block_rsv, int update_ref)
{
struct btrfs_path *path;
struct btrfs_trans_handle *trans;
@@ -5894,6 +5895,8 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
BUG_ON(!wc);
trans = btrfs_start_transaction(tree_root, 0);
+ if (block_rsv)
+ trans->block_rsv = block_rsv;
if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
level = btrfs_header_level(root->node);
@@ -5981,24 +5984,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
}
BUG_ON(wc->level == 0);
- if (trans->transaction->in_commit ||
- trans->transaction->delayed_refs.flushing) {
+ if (btrfs_should_end_transaction(trans, tree_root)) {
ret = btrfs_update_root(trans, tree_root,
&root->root_key,
root_item);
BUG_ON(ret);
- btrfs_end_transaction(trans, tree_root);
+ btrfs_end_transaction_throttle(trans, tree_root);
trans = btrfs_start_transaction(tree_root, 0);
- if (IS_ERR(trans))
- return PTR_ERR(trans);
- } else {
- unsigned long update;
- update = trans->delayed_ref_updates;
- trans->delayed_ref_updates = 0;
- if (update)
- btrfs_run_delayed_refs(trans, tree_root,
- update);
+ if (block_rsv)
+ trans->block_rsv = block_rsv;
}
}
btrfs_release_path(root, path);
@@ -6026,7 +6021,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref)
kfree(root);
}
out:
- btrfs_end_transaction(trans, tree_root);
+ btrfs_end_transaction_throttle(trans, tree_root);
kfree(wc);
btrfs_free_path(path);
return err;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 3943526b734..05d41e56923 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -44,8 +44,12 @@ struct tree_entry {
struct backref_node {
struct rb_node rb_node;
u64 bytenr;
- /* objectid tree block owner */
+
+ u64 new_bytenr;
+ /* objectid of tree block owner, can be not uptodate */
u64 owner;
+ /* link to pending, changed or detached list */
+ struct list_head list;
/* list of upper level blocks reference this block */
struct list_head upper;
/* list of child blocks in the cache */
@@ -56,9 +60,9 @@ struct backref_node {
struct extent_buffer *eb;
/* level of tree block */
unsigned int level:8;
- /* 1 if the block is root of old snapshot */
- unsigned int old_root:1;
- /* 1 if no child blocks in the cache */
+ /* is the block in non-reference counted tree */
+ unsigned int cowonly:1;
+ /* 1 if no child node in the cache */
unsigned int lowest:1;
/* is the extent buffer locked */
unsigned int locked:1;
@@ -66,6 +70,16 @@ struct backref_node {
unsigned int processed:1;
/* have backrefs of this block been checked */
unsigned int checked:1;
+ /*
+ * 1 if corresponding block has been cowed but some upper
+ * level block pointers may not point to the new location
+ */
+ unsigned int pending:1;
+ /*
+ * 1 if the backref node isn't connected to any other
+ * backref node.
+ */
+ unsigned int detached:1;
};
/*
@@ -74,7 +88,6 @@ struct backref_node {
struct backref_edge {
struct list_head list[2];
struct backref_node *node[2];
- u64 blockptr;
};
#define LOWER 0
@@ -83,9 +96,25 @@ struct backref_edge {
struct backref_cache {
/* red black tree of all backref nodes in the cache */
struct rb_root rb_root;
- /* list of backref nodes with no child block in the cache */
+ /* for passing backref nodes to btrfs_reloc_cow_block */
+ struct backref_node *path[BTRFS_MAX_LEVEL];
+ /*
+ * list of blocks that have been cowed but some block
+ * pointers in upper level blocks may not reflect the
+ * new location
+ */
struct list_head pending[BTRFS_MAX_LEVEL];
- spinlock_t lock;
+ /* list of backref nodes with no child node */
+ struct list_head leaves;
+ /* list of blocks that have been cowed in current transaction */
+ struct list_head changed;
+ /* list of detached backref node. */
+ struct list_head detached;
+
+ u64 last_trans;
+
+ int nr_nodes;
+ int nr_edges;
};
/*
@@ -113,15 +142,6 @@ struct tree_block {
unsigned int key_ready:1;
};
-/* inode vector */
-#define INODEVEC_SIZE 16
-
-struct inodevec {
- struct list_head list;
- struct inode *inode[INODEVEC_SIZE];
- int nr;
-};
-
#define MAX_EXTENTS 128
struct file_extent_cluster {
@@ -138,36 +158,43 @@ struct reloc_control {
struct btrfs_root *extent_root;
/* inode for moving data */
struct inode *data_inode;
- struct btrfs_workers workers;
+
+ struct btrfs_block_rsv *block_rsv;
+
+ struct backref_cache backref_cache;
+
+ struct file_extent_cluster cluster;
/* tree blocks have been processed */
struct extent_io_tree processed_blocks;
/* map start of tree root to corresponding reloc tree */
struct mapping_tree reloc_root_tree;
/* list of reloc trees */
struct list_head reloc_roots;
+ /* size of metadata reservation for merging reloc trees */
+ u64 merging_rsv_size;
+ /* size of relocated tree nodes */
+ u64 nodes_relocated;
+
u64 search_start;
u64 extents_found;
- u64 extents_skipped;
- int stage;
- int create_reloc_root;
+
+ int block_rsv_retries;
+
+ unsigned int stage:8;
+ unsigned int create_reloc_tree:1;
+ unsigned int merge_reloc_tree:1;
unsigned int found_file_extent:1;
- unsigned int found_old_snapshot:1;
+ unsigned int commit_transaction:1;
};
/* stages of data relocation */
#define MOVE_DATA_EXTENTS 0
#define UPDATE_DATA_PTRS 1
-/*
- * merge reloc tree to corresponding fs tree in worker threads
- */
-struct async_merge {
- struct btrfs_work work;
- struct reloc_control *rc;
- struct btrfs_root *root;
- struct completion *done;
- atomic_t *num_pending;
-};
+static void remove_backref_node(struct backref_cache *cache,
+ struct backref_node *node);
+static void __mark_block_processed(struct reloc_control *rc,
+ struct backref_node *node);
static void mapping_tree_init(struct mapping_tree *tree)
{
@@ -181,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache)
cache->rb_root = RB_ROOT;
for (i = 0; i < BTRFS_MAX_LEVEL; i++)
INIT_LIST_HEAD(&cache->pending[i]);
- spin_lock_init(&cache->lock);
+ INIT_LIST_HEAD(&cache->changed);
+ INIT_LIST_HEAD(&cache->detached);
+ INIT_LIST_HEAD(&cache->leaves);
}
-static void backref_node_init(struct backref_node *node)
+static void backref_cache_cleanup(struct backref_cache *cache)
{
- memset(node, 0, sizeof(*node));
- INIT_LIST_HEAD(&node->upper);
- INIT_LIST_HEAD(&node->lower);
- RB_CLEAR_NODE(&node->rb_node);
+ struct backref_node *node;
+ int i;
+
+ while (!list_empty(&cache->detached)) {
+ node = list_entry(cache->detached.next,
+ struct backref_node, list);
+ remove_backref_node(cache, node);
+ }
+
+ while (!list_empty(&cache->leaves)) {
+ node = list_entry(cache->leaves.next,
+ struct backref_node, lower);
+ remove_backref_node(cache, node);
+ }
+
+ cache->last_trans = 0;
+
+ for (i = 0; i < BTRFS_MAX_LEVEL; i++)
+ BUG_ON(!list_empty(&cache->pending[i]));
+ BUG_ON(!list_empty(&cache->changed));
+ BUG_ON(!list_empty(&cache->detached));
+ BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root));
+ BUG_ON(cache->nr_nodes);
+ BUG_ON(cache->nr_edges);
+}
+
+static struct backref_node *alloc_backref_node(struct backref_cache *cache)
+{
+ struct backref_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_NOFS);
+ if (node) {
+ INIT_LIST_HEAD(&node->list);
+ INIT_LIST_HEAD(&node->upper);
+ INIT_LIST_HEAD(&node->lower);
+ RB_CLEAR_NODE(&node->rb_node);
+ cache->nr_nodes++;
+ }
+ return node;
+}
+
+static void free_backref_node(struct backref_cache *cache,
+ struct backref_node *node)
+{
+ if (node) {
+ cache->nr_nodes--;
+ kfree(node);
+ }
+}
+
+static struct backref_edge *alloc_backref_edge(struct backref_cache *cache)
+{
+ struct backref_edge *edge;
+
+ edge = kzalloc(sizeof(*edge), GFP_NOFS);
+ if (edge)
+ cache->nr_edges++;
+ return edge;
+}
+
+static void free_backref_edge(struct backref_cache *cache,
+ struct backref_edge *edge)
+{
+ if (edge) {
+ cache->nr_edges--;
+ kfree(edge);
+ }
}
static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr,
@@ -250,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node,
edges[idx++] = edge;
node = edge->node[UPPER];
}
+ BUG_ON(node->detached);
*index = idx;
return node;
}
@@ -281,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[],
return NULL;
}
+static void unlock_node_buffer(struct backref_node *node)
+{
+ if (node->locked) {
+ btrfs_tree_unlock(node->eb);
+ node->locked = 0;
+ }
+}
+
static void drop_node_buffer(struct backref_node *node)
{
if (node->eb) {
- if (node->locked) {
- btrfs_tree_unlock(node->eb);
- node->locked = 0;
- }
+ unlock_node_buffer(node);
free_extent_buffer(node->eb);
node->eb = NULL;
}
@@ -296,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node)
static void drop_backref_node(struct backref_cache *tree,
struct backref_node *node)
{
- BUG_ON(!node->lowest);
BUG_ON(!list_empty(&node->upper));
drop_node_buffer(node);
+ list_del(&node->list);
list_del(&node->lower);
-
- rb_erase(&node->rb_node, &tree->rb_root);
- kfree(node);
+ if (!RB_EMPTY_NODE(&node->rb_node))
+ rb_erase(&node->rb_node, &tree->rb_root);
+ free_backref_node(tree, node);
}
/*
@@ -318,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache,
if (!node)
return;
- BUG_ON(!node->lowest);
+ BUG_ON(!node->lowest && !node->detached);
while (!list_empty(&node->upper)) {
edge = list_entry(node->upper.next, struct backref_edge,
list[LOWER]);
upper = edge->node[UPPER];
list_del(&edge->list[LOWER]);
list_del(&edge->list[UPPER]);
- kfree(edge);
+ free_backref_edge(cache, edge);
+
+ if (RB_EMPTY_NODE(&upper->rb_node)) {
+ BUG_ON(!list_empty(&node->upper));
+ drop_backref_node(cache, node);
+ node = upper;
+ node->lowest = 1;
+ continue;
+ }
/*
- * add the node to pending list if no other
+ * add the node to leaf node list if no other
* child block cached.
*/
if (list_empty(&upper->lower)) {
- list_add_tail(&upper->lower,
- &cache->pending[upper->level]);
+ list_add_tail(&upper->lower, &cache->leaves);
upper->lowest = 1;
}
}
+
drop_backref_node(cache, node);
}
+static void update_backref_node(struct backref_cache *cache,
+ struct backref_node *node, u64 bytenr)
+{
+ struct rb_node *rb_node;
+ rb_erase(&node->rb_node, &cache->rb_root);
+ node->bytenr = bytenr;
+ rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
+ BUG_ON(rb_node);
+}
+
+/*
+ * update backref cache after a transaction commit
+ */
+static int update_backref_cache(struct btrfs_trans_handle *trans,
+ struct backref_cache *cache)
+{
+ struct backref_node *node;
+ int level = 0;
+
+ if (cache->last_trans == 0) {
+ cache->last_trans = trans->transid;
+ return 0;
+ }
+
+ if (cache->last_trans == trans->transid)
+ return 0;
+
+ /*
+ * detached nodes are used to avoid unnecessary backref
+ * lookup. transaction commit changes the extent tree.
+ * so the detached nodes are no longer useful.
+ */
+ while (!list_empty(&cache->detached)) {
+ node = list_entry(cache->detached.next,
+ struct backref_node, list);
+ remove_backref_node(cache, node);
+ }
+
+ while (!list_empty(&cache->changed)) {
+ node = list_entry(cache->changed.next,
+ struct backref_node, list);
+ list_del_init(&node->list);
+ BUG_ON(node->pending);
+ update_backref_node(cache, node, node->new_bytenr);
+ }
+
+ /*
+ * some nodes can be left in the pending list if there were
+ * errors during processing the pending nodes.
+ */
+ for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
+ list_for_each_entry(node, &cache->pending[level], list) {
+ BUG_ON(!node->pending);
+ if (node->bytenr == node->new_bytenr)
+ continue;
+ update_backref_node(cache, node, node->new_bytenr);
+ }
+ }
+
+ cache->last_trans = 0;
+ return 1;
+}
+
+static int should_ignore_root(struct btrfs_root *root)
+{
+ struct btrfs_root *reloc_root;
+
+ if (!root->ref_cows)
+ return 0;
+
+ reloc_root = root->reloc_root;
+ if (!reloc_root)
+ return 0;
+
+ if (btrfs_root_last_snapshot(&reloc_root->root_item) ==
+ root->fs_info->running_transaction->transid - 1)
+ return 0;
+ /*
+ * if there is reloc tree and it was created in previous
+ * transaction backref lookup can find the reloc tree,
+ * so backref node for the fs tree root is useless for
+ * relocation.
+ */
+ return 1;
+}
+
/*
* find reloc tree by address of tree root
*/
@@ -453,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
* for all upper level blocks that directly/indirectly reference the
* block are also cached.
*/
-static struct backref_node *build_backref_tree(struct reloc_control *rc,
- struct backref_cache *cache,
- struct btrfs_key *node_key,
- int level, u64 bytenr)
+static noinline_for_stack
+struct backref_node *build_backref_tree(struct reloc_control *rc,
+ struct btrfs_key *node_key,
+ int level, u64 bytenr)
{
+ struct backref_cache *cache = &rc->backref_cache;
struct btrfs_path *path1;
struct btrfs_path *path2;
struct extent_buffer *eb;
@@ -473,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc,
unsigned long end;
unsigned long ptr;
LIST_HEAD(list);
+ LIST_HEAD(useless);
+ int cowonly;
int ret;
int err = 0;
@@ -483,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc,
goto out;
}
- node = kmalloc(sizeof(*node), GFP_NOFS);
+ node = alloc_backref_node(cache);
if (!node) {
err = -ENOMEM;
goto out;
}
- backref_node_init(node);
node->bytenr = bytenr;
- node->owner = 0;
node->level = level;
node->lowest = 1;
cur = node;
@@ -587,17 +780,20 @@ again:
#ifdef BTRFS_COMPAT_EXTENT_TREE_V0
if (key.type == BTRFS_SHARED_BLOCK_REF_KEY ||
key.type == BTRFS_EXTENT_REF_V0_KEY) {
- if (key.objectid == key.offset &&
- key.type == BTRFS_EXTENT_REF_V0_KEY) {
+ if (key.type == BTRFS_EXTENT_REF_V0_KEY) {
struct btrfs_extent_ref_v0 *ref0;
ref0 = btrfs_item_ptr(eb, path1->slots[0],
struct btrfs_extent_ref_v0);
root = find_tree_root(rc, eb, ref0);
- if (root)
- cur->root = root;
- else
- cur->old_root = 1;
- break;
+ if (!root->ref_cows)
+ cur->cowonly = 1;
+ if (key.objectid == key.offset) {
+ if (root && !should_ignore_root(root))
+ cur->root = root;
+ else
+ list_add(&cur->list, &useless);
+ break;
+ }
}
#else
BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY);
@@ -614,22 +810,20 @@ again:
break;
}
- edge = kzalloc(sizeof(*edge), GFP_NOFS);
+ edge = alloc_backref_edge(cache);
if (!edge) {
err = -ENOMEM;
goto out;
}
rb_node = tree_search(&cache->rb_root, key.offset);
if (!rb_node) {
- upper = kmalloc(sizeof(*upper), GFP_NOFS);
+ upper = alloc_backref_node(cache);
if (!upper) {
- kfree(edge);
+ free_backref_edge(cache, edge);
err = -ENOMEM;
goto out;
}
- backref_node_init(upper);
upper->bytenr = key.offset;
- upper->owner = 0;
upper->level = cur->level + 1;
/*
* backrefs for the upper level block isn't
@@ -639,11 +833,12 @@ again:
} else {
upper = rb_entry(rb_node, struct backref_node,
rb_node);
+ BUG_ON(!upper->checked);
INIT_LIST_HEAD(&edge->list[UPPER]);
}
- list_add(&edge->list[LOWER], &cur->upper);
- edge->node[UPPER] = upper;
+ list_add_tail(&edge->list[LOWER], &cur->upper);
edge->node[LOWER] = cur;
+ edge->node[UPPER] = upper;
goto next;
} else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) {
@@ -657,11 +852,17 @@ again:
goto out;
}
+ if (!root->ref_cows)
+ cur->cowonly = 1;
+
if (btrfs_root_level(&root->root_item) == cur->level) {
/* tree root */
BUG_ON(btrfs_root_bytenr(&root->root_item) !=
cur->bytenr);
- cur->root = root;
+ if (should_ignore_root(root))
+ list_add(&cur->list, &useless);
+ else
+ cur->root = root;
break;
}
@@ -692,11 +893,14 @@ again:
if (!path2->nodes[level]) {
BUG_ON(btrfs_root_bytenr(&root->root_item) !=
lower->bytenr);
- lower->root = root;
+ if (should_ignore_root(root))
+ list_add(&lower->list, &useless);
+ else
+ lower->root = root;
break;
}
- edge = kzalloc(sizeof(*edge), GFP_NOFS);
+ edge = alloc_backref_edge(cache);
if (!edge) {
err = -ENOMEM;
goto out;
@@ -705,16 +909,17 @@ again:
eb = path2->nodes[level];
rb_node = tree_search(&cache->rb_root, eb->start);
if (!rb_node) {
- upper = kmalloc(sizeof(*upper), GFP_NOFS);
+ upper = alloc_backref_node(cache);
if (!upper) {
- kfree(edge);
+ free_backref_edge(cache, edge);
err = -ENOMEM;
goto out;
}
- backref_node_init(upper);
upper->bytenr = eb->start;
upper->owner = btrfs_header_owner(eb);
upper->level = lower->level + 1;
+ if (!root->ref_cows)
+ upper->cowonly = 1;
/*
* if we know the block isn't shared
@@ -744,10 +949,12 @@ again:
rb_node);
BUG_ON(!upper->checked);
INIT_LIST_HEAD(&edge->list[UPPER]);
+ if (!upper->owner)
+ upper->owner = btrfs_header_owner(eb);
}
list_add_tail(&edge->list[LOWER], &lower->upper);
- edge->node[UPPER] = upper;
edge->node[LOWER] = lower;
+ edge->node[UPPER] = upper;
if (rb_node)
break;
@@ -785,8 +992,13 @@ next:
* into the cache.
*/
BUG_ON(!node->checked);
- rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node);
- BUG_ON(rb_node);
+ cowonly = node->cowonly;
+ if (!cowonly) {
+ rb_node = tree_insert(&cache->rb_root, node->bytenr,
+ &node->rb_node);
+ BUG_ON(rb_node);
+ list_add_tail(&node->lower, &cache->leaves);
+ }
list_for_each_entry(edge, &node->upper, list[LOWER])
list_add_tail(&edge->list[UPPER], &list);
@@ -795,6 +1007,14 @@ next:
edge = list_entry(list.next, struct backref_edge, list[UPPER]);
list_del_init(&edge->list[UPPER]);
upper = edge->node[UPPER];
+ if (upper->detached) {
+ list_del(&edge->list[LOWER]);
+ lower = edge->node[LOWER];
+ free_backref_edge(cache, edge);
+ if (list_empty(&lower->upper))
+ list_add(&lower->list, &useless);
+ continue;
+ }
if (!RB_EMPTY_NODE(&upper->rb_node)) {
if (upper->lowest) {
@@ -807,25 +1027,69 @@ next:
}
BUG_ON(!upper->checked);
- rb_node = tree_insert(&cache->rb_root, upper->bytenr,
- &upper->rb_node);
- BUG_ON(rb_node);
+ BUG_ON(cowonly != upper->cowonly);
+ if (!cowonly) {
+ rb_node = tree_insert(&cache->rb_root, upper->bytenr,
+ &upper->rb_node);
+ BUG_ON(rb_node);
+ }
list_add_tail(&edge->list[UPPER], &upper->lower);
list_for_each_entry(edge, &upper->upper, list[LOWER])
list_add_tail(&edge->list[UPPER], &list);
}
+ /*
+ * process useless backref nodes. backref nodes for tree leaves
+ * are deleted from the cache. backref nodes for upper level
+ * tree blocks are left in the cache to avoid unnecessary backref
+ * lookup.
+ */
+ while (!list_empty(&useless)) {
+ upper = list_entry(useless.next, struct backref_node, list);
+ list_del_init(&upper->list);
+ BUG_ON(!list_empty(&upper->upper));
+ if (upper == node)
+ node = NULL;
+ if (upper->lowest) {
+ list_del_init(&upper->lower);
+ upper->lowest = 0;
+ }
+ while (!list_empty(&upper->lower)) {
+ edge = list_entry(upper->lower.next,
+ struct backref_edge, list[UPPER]);
+ list_del(&edge->list[UPPER]);
+ list_del(&edge->list[LOWER]);
+ lower = edge->node[LOWER];
+ free_backref_edge(cache, edge);
+
+ if (list_empty(&lower->upper))
+ list_add(&lower->list, &useless);
+ }
+ __mark_block_processed(rc, upper);
+ if (upper->level > 0) {
+ list_add(&upper->list, &cache->detached);
+ upper->detached = 1;
+ } else {
+ rb_erase(&upper->rb_node, &cache->rb_root);
+ free_backref_node(cache, upper);
+ }
+ }
out:
btrfs_free_path(path1);
btrfs_free_path(path2);
if (err) {
- INIT_LIST_HEAD(&list);
+ while (!list_empty(&useless)) {
+ lower = list_entry(useless.next,
+ struct backref_node, upper);
+ list_del_init(&lower->upper);
+ }
upper = node;
+ INIT_LIST_HEAD(&list);
while (upper) {
if (RB_EMPTY_NODE(&upper->rb_node)) {
list_splice_tail(&upper->upper, &list);
- kfree(upper);
+ free_backref_node(cache, upper);
}
if (list_empty(&list))
@@ -833,15 +1097,104 @@ out:
edge = list_entry(list.next, struct backref_edge,
list[LOWER]);
+ list_del(&edge->list[LOWER]);
upper = edge->node[UPPER];
- kfree(edge);
+ free_backref_edge(cache, edge);
}
return ERR_PTR(err);
}
+ BUG_ON(node && node->detached);
return node;
}
/*
+ * helper to add backref node for the newly created snapshot.
+ * the backref node is created by cloning backref node that
+ * corresponds to root of source tree
+ */
+static int clone_backref_node(struct btrfs_trans_handle *trans,
+ struct reloc_control *rc,
+ struct btrfs_root *src,
+ struct btrfs_root *dest)
+{
+ struct btrfs_root *reloc_root = src->reloc_root;
+ struct backref_cache *cache = &rc->backref_cache;
+ struct backref_node *node = NULL;
+ struct backref_node *new_node;
+ struct backref_edge *edge;
+ struct backref_edge *new_edge;
+ struct rb_node *rb_node;
+
+ if (cache->last_trans > 0)
+ update_backref_cache(trans, cache);
+
+ rb_node = tree_search(&cache->rb_root, src->commit_root->start);
+ if (rb_node) {
+ node = rb_entry(rb_node, struct backref_node, rb_node);
+ if (node->detached)
+ node = NULL;
+ else
+ BUG_ON(node->new_bytenr != reloc_root->node->start);
+ }
+
+ if (!node) {
+ rb_node = tree_search(&cache->rb_root,
+ reloc_root->commit_root->start);
+ if (rb_node) {
+ node = rb_entry(rb_node, struct backref_node,
+ rb_node);
+ BUG_ON(node->detached);
+ }
+ }
+
+ if (!node)
+ return 0;
+
+ new_node = alloc_backref_node(cache);
+ if (!new_node)
+ return -ENOMEM;
+
+ new_node->bytenr = dest->node->start;
+ new_node->level = node->level;
+ new_node->lowest = node->lowest;
+ new_node->root = dest;
+
+ if (!node->lowest) {
+ list_for_each_entry(edge, &node->lower, list[UPPER]) {
+ new_edge = alloc_backref_edge(cache);
+ if (!new_edge)
+ goto fail;
+
+ new_edge->node[UPPER] = new_node;
+ new_edge->node[LOWER] = edge->node[LOWER];
+ list_add_tail(&new_edge->list[UPPER],
+ &new_node->lower);
+ }
+ }
+
+ rb_node = tree_insert(&cache->rb_root, new_node->bytenr,
+ &new_node->rb_node);
+ BUG_ON(rb_node);
+
+ if (!new_node->lowest) {
+ list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) {
+ list_add_tail(&new_edge->list[LOWER],
+ &new_edge->node[LOWER]->upper);
+ }
+ }
+ return 0;
+fail:
+ while (!list_empty(&new_node->lower)) {
+ new_edge = list_entry(new_node->lower.next,
+ struct backref_edge, list[UPPER]);
+ list_del(&new_edge->list[UPPER]);
+ free_backref_edge(cache, new_edge);
+ }
+ free_backref_node(cache, new_node);
+ return -ENOMEM;
+}
+
+/*
* helper to add 'address of tree root -> reloc tree' mapping
*/
static int __add_reloc_root(struct btrfs_root *root)
@@ -901,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del)
return 0;
}
-/*
- * create reloc tree for a given fs tree. reloc tree is just a
- * snapshot of the fs tree with special root objectid.
- */
-int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
- struct btrfs_root *root)
+static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 objectid)
{
struct btrfs_root *reloc_root;
struct extent_buffer *eb;
@@ -914,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
struct btrfs_key root_key;
int ret;
- if (root->reloc_root) {
- reloc_root = root->reloc_root;
- reloc_root->last_trans = trans->transid;
- return 0;
- }
-
- if (!root->fs_info->reloc_ctl ||
- !root->fs_info->reloc_ctl->create_reloc_root ||
- root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
- return 0;
-
root_item = kmalloc(sizeof(*root_item), GFP_NOFS);
BUG_ON(!root_item);
root_key.objectid = BTRFS_TREE_RELOC_OBJECTID;
root_key.type = BTRFS_ROOT_ITEM_KEY;
- root_key.offset = root->root_key.objectid;
+ root_key.offset = objectid;
- ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
- BTRFS_TREE_RELOC_OBJECTID);
- BUG_ON(ret);
+ if (root->root_key.objectid == objectid) {
+ /* called by btrfs_init_reloc_root */
+ ret = btrfs_copy_root(trans, root, root->commit_root, &eb,
+ BTRFS_TREE_RELOC_OBJECTID);
+ BUG_ON(ret);
+
+ btrfs_set_root_last_snapshot(&root->root_item,
+ trans->transid - 1);
+ } else {
+ /*
+ * called by btrfs_reloc_post_snapshot_hook.
+ * the source tree is a reloc tree, all tree blocks
+ * modified after it was created have RELOC flag
+ * set in their headers. so it's OK to not update
+ * the 'last_snapshot'.
+ */
+ ret = btrfs_copy_root(trans, root, root->node, &eb,
+ BTRFS_TREE_RELOC_OBJECTID);
+ BUG_ON(ret);
+ }
- btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1);
memcpy(root_item, &root->root_item, sizeof(*root_item));
- btrfs_set_root_refs(root_item, 1);
btrfs_set_root_bytenr(root_item, eb->start);
btrfs_set_root_level(root_item, btrfs_header_level(eb));
btrfs_set_root_generation(root_item, trans->transid);
- memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key));
- root_item->drop_level = 0;
+
+ if (root->root_key.objectid == objectid) {
+ btrfs_set_root_refs(root_item, 0);
+ memset(&root_item->drop_progress, 0,
+ sizeof(struct btrfs_disk_key));
+ root_item->drop_level = 0;
+ }
btrfs_tree_unlock(eb);
free_extent_buffer(eb);
@@ -957,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
&root_key);
BUG_ON(IS_ERR(reloc_root));
reloc_root->last_trans = trans->transid;
+ return reloc_root;
+}
+
+/*
+ * create reloc tree for a given fs tree. reloc tree is just a
+ * snapshot of the fs tree with special root objectid.
+ */
+int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root)
+{
+ struct btrfs_root *reloc_root;
+ struct reloc_control *rc = root->fs_info->reloc_ctl;
+ int clear_rsv = 0;
+
+ if (root->reloc_root) {
+ reloc_root = root->reloc_root;
+ reloc_root->last_trans = trans->transid;
+ return 0;
+ }
+
+ if (!rc || !rc->create_reloc_tree ||
+ root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)
+ return 0;
+
+ if (!trans->block_rsv) {
+ trans->block_rsv = rc->block_rsv;
+ clear_rsv = 1;
+ }
+ reloc_root = create_reloc_root(trans, root, root->root_key.objectid);
+ if (clear_rsv)
+ trans->block_rsv = NULL;
__add_reloc_root(reloc_root);
root->reloc_root = reloc_root;
@@ -980,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,
reloc_root = root->reloc_root;
root_item = &reloc_root->root_item;
- if (btrfs_root_refs(root_item) == 0) {
+ if (root->fs_info->reloc_ctl->merge_reloc_tree &&
+ btrfs_root_refs(root_item) == 0) {
root->reloc_root = NULL;
del = 1;
}
@@ -1102,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,
goto out;
}
- if (new_bytenr)
- *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
ret = 0;
out:
btrfs_free_path(path);
@@ -1114,19 +1503,18 @@ out:
* update file extent items in the tree leaf to point to
* the new locations.
*/
-static int replace_file_extents(struct btrfs_trans_handle *trans,
- struct reloc_control *rc,
- struct btrfs_root *root,
- struct extent_buffer *leaf,
- struct list_head *inode_list)
+static noinline_for_stack
+int replace_file_extents(struct btrfs_trans_handle *trans,
+ struct reloc_control *rc,
+ struct btrfs_root *root,
+ struct extent_buffer *leaf)
{
struct btrfs_key key;
struct btrfs_file_extent_item *fi;
struct inode *inode = NULL;
- struct inodevec *ivec = NULL;
u64 parent;
u64 bytenr;
- u64 new_bytenr;
+ u64 new_bytenr = 0;
u64 num_bytes;
u64 end;
u32 nritems;
@@ -1166,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans,
* to complete and drop the extent cache
*/
if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) {
- if (!ivec || ivec->nr == INODEVEC_SIZE) {
- ivec = kmalloc(sizeof(*ivec), GFP_NOFS);
- BUG_ON(!ivec);
- ivec->nr = 0;
- list_add_tail(&ivec->list, inode_list);
- }
if (first) {
inode = find_next_inode(root, key.objectid);
- if (inode)
- ivec->inode[ivec->nr++] = inode;
first = 0;
} else if (inode && inode->i_ino < key.objectid) {
+ btrfs_add_delayed_iput(inode);