diff options
Diffstat (limited to 'fs/btrfs/relocation.c')
| -rw-r--r-- | fs/btrfs/relocation.c | 2737 |
1 files changed, 1783 insertions, 954 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index a9728680eca..65245a07275 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -21,6 +21,7 @@ #include <linux/writeback.h> #include <linux/blkdev.h> #include <linux/rbtree.h> +#include <linux/slab.h> #include "ctree.h" #include "disk-io.h" #include "transaction.h" @@ -28,6 +29,8 @@ #include "locking.h" #include "btrfs_inode.h" #include "async-thread.h" +#include "free-space-cache.h" +#include "inode-map.h" /* * backref_node, mapping_node and tree_block start with this @@ -43,8 +46,12 @@ struct tree_entry { struct backref_node { struct rb_node rb_node; u64 bytenr; - /* objectid tree block owner */ + + u64 new_bytenr; + /* objectid of tree block owner, can be not uptodate */ u64 owner; + /* link to pending, changed or detached list */ + struct list_head list; /* list of upper level blocks reference this block */ struct list_head upper; /* list of child blocks in the cache */ @@ -55,9 +62,9 @@ struct backref_node { struct extent_buffer *eb; /* level of tree block */ unsigned int level:8; - /* 1 if the block is root of old snapshot */ - unsigned int old_root:1; - /* 1 if no child blocks in the cache */ + /* is the block in non-reference counted tree */ + unsigned int cowonly:1; + /* 1 if no child node in the cache */ unsigned int lowest:1; /* is the extent buffer locked */ unsigned int locked:1; @@ -65,6 +72,16 @@ struct backref_node { unsigned int processed:1; /* have backrefs of this block been checked */ unsigned int checked:1; + /* + * 1 if corresponding block has been cowed but some upper + * level block pointers may not point to the new location + */ + unsigned int pending:1; + /* + * 1 if the backref node isn't connected to any other + * backref node. + */ + unsigned int detached:1; }; /* @@ -73,18 +90,34 @@ struct backref_node { struct backref_edge { struct list_head list[2]; struct backref_node *node[2]; - u64 blockptr; }; #define LOWER 0 #define UPPER 1 +#define RELOCATION_RESERVED_NODES 256 struct backref_cache { /* red black tree of all backref nodes in the cache */ struct rb_root rb_root; - /* list of backref nodes with no child block in the cache */ + /* for passing backref nodes to btrfs_reloc_cow_block */ + struct backref_node *path[BTRFS_MAX_LEVEL]; + /* + * list of blocks that have been cowed but some block + * pointers in upper level blocks may not reflect the + * new location + */ struct list_head pending[BTRFS_MAX_LEVEL]; - spinlock_t lock; + /* list of backref nodes with no child node */ + struct list_head leaves; + /* list of blocks that have been cowed in current transaction */ + struct list_head changed; + /* list of detached backref node. */ + struct list_head detached; + + u64 last_trans; + + int nr_nodes; + int nr_edges; }; /* @@ -112,15 +145,6 @@ struct tree_block { unsigned int key_ready:1; }; -/* inode vector */ -#define INODEVEC_SIZE 16 - -struct inodevec { - struct list_head list; - struct inode *inode[INODEVEC_SIZE]; - int nr; -}; - #define MAX_EXTENTS 128 struct file_extent_cluster { @@ -137,58 +161,129 @@ struct reloc_control { struct btrfs_root *extent_root; /* inode for moving data */ struct inode *data_inode; - struct btrfs_workers workers; + + struct btrfs_block_rsv *block_rsv; + + struct backref_cache backref_cache; + + struct file_extent_cluster cluster; /* tree blocks have been processed */ struct extent_io_tree processed_blocks; /* map start of tree root to corresponding reloc tree */ struct mapping_tree reloc_root_tree; /* list of reloc trees */ struct list_head reloc_roots; + /* size of metadata reservation for merging reloc trees */ + u64 merging_rsv_size; + /* size of relocated tree nodes */ + u64 nodes_relocated; + /* reserved size for block group relocation*/ + u64 reserved_bytes; + u64 search_start; u64 extents_found; - u64 extents_skipped; - int stage; - int create_reloc_root; + + unsigned int stage:8; + unsigned int create_reloc_tree:1; + unsigned int merge_reloc_tree:1; unsigned int found_file_extent:1; - unsigned int found_old_snapshot:1; }; /* stages of data relocation */ #define MOVE_DATA_EXTENTS 0 #define UPDATE_DATA_PTRS 1 -/* - * merge reloc tree to corresponding fs tree in worker threads - */ -struct async_merge { - struct btrfs_work work; - struct reloc_control *rc; - struct btrfs_root *root; - struct completion *done; - atomic_t *num_pending; -}; +static void remove_backref_node(struct backref_cache *cache, + struct backref_node *node); +static void __mark_block_processed(struct reloc_control *rc, + struct backref_node *node); static void mapping_tree_init(struct mapping_tree *tree) { - tree->rb_root.rb_node = NULL; + tree->rb_root = RB_ROOT; spin_lock_init(&tree->lock); } static void backref_cache_init(struct backref_cache *cache) { int i; - cache->rb_root.rb_node = NULL; + cache->rb_root = RB_ROOT; for (i = 0; i < BTRFS_MAX_LEVEL; i++) INIT_LIST_HEAD(&cache->pending[i]); - spin_lock_init(&cache->lock); + INIT_LIST_HEAD(&cache->changed); + INIT_LIST_HEAD(&cache->detached); + INIT_LIST_HEAD(&cache->leaves); +} + +static void backref_cache_cleanup(struct backref_cache *cache) +{ + struct backref_node *node; + int i; + + while (!list_empty(&cache->detached)) { + node = list_entry(cache->detached.next, + struct backref_node, list); + remove_backref_node(cache, node); + } + + while (!list_empty(&cache->leaves)) { + node = list_entry(cache->leaves.next, + struct backref_node, lower); + remove_backref_node(cache, node); + } + + cache->last_trans = 0; + + for (i = 0; i < BTRFS_MAX_LEVEL; i++) + BUG_ON(!list_empty(&cache->pending[i])); + BUG_ON(!list_empty(&cache->changed)); + BUG_ON(!list_empty(&cache->detached)); + BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); + BUG_ON(cache->nr_nodes); + BUG_ON(cache->nr_edges); +} + +static struct backref_node *alloc_backref_node(struct backref_cache *cache) +{ + struct backref_node *node; + + node = kzalloc(sizeof(*node), GFP_NOFS); + if (node) { + INIT_LIST_HEAD(&node->list); + INIT_LIST_HEAD(&node->upper); + INIT_LIST_HEAD(&node->lower); + RB_CLEAR_NODE(&node->rb_node); + cache->nr_nodes++; + } + return node; +} + +static void free_backref_node(struct backref_cache *cache, + struct backref_node *node) +{ + if (node) { + cache->nr_nodes--; + kfree(node); + } } -static void backref_node_init(struct backref_node *node) +static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) { - memset(node, 0, sizeof(*node)); - INIT_LIST_HEAD(&node->upper); - INIT_LIST_HEAD(&node->lower); - RB_CLEAR_NODE(&node->rb_node); + struct backref_edge *edge; + + edge = kzalloc(sizeof(*edge), GFP_NOFS); + if (edge) + cache->nr_edges++; + return edge; +} + +static void free_backref_edge(struct backref_cache *cache, + struct backref_edge *edge) +{ + if (edge) { + cache->nr_edges--; + kfree(edge); + } } static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, @@ -233,6 +328,18 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } +static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) +{ + + struct btrfs_fs_info *fs_info = NULL; + struct backref_node *bnode = rb_entry(rb_node, struct backref_node, + rb_node); + if (bnode->root) + fs_info = bnode->root->fs_info; + btrfs_panic(fs_info, errno, "Inconsistency in backref cache " + "found at offset %llu", bytenr); +} + /* * walk up backref nodes until reach node presents tree root */ @@ -249,6 +356,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, edges[idx++] = edge; node = edge->node[UPPER]; } + BUG_ON(node->detached); *index = idx; return node; } @@ -280,13 +388,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], return NULL; } +static void unlock_node_buffer(struct backref_node *node) +{ + if (node->locked) { + btrfs_tree_unlock(node->eb); + node->locked = 0; + } +} + static void drop_node_buffer(struct backref_node *node) { if (node->eb) { - if (node->locked) { - btrfs_tree_unlock(node->eb); - node->locked = 0; - } + unlock_node_buffer(node); free_extent_buffer(node->eb); node->eb = NULL; } @@ -295,14 +408,14 @@ static void drop_node_buffer(struct backref_node *node) static void drop_backref_node(struct backref_cache *tree, struct backref_node *node) { - BUG_ON(!node->lowest); BUG_ON(!list_empty(&node->upper)); drop_node_buffer(node); + list_del(&node->list); list_del(&node->lower); - - rb_erase(&node->rb_node, &tree->rb_root); - kfree(node); + if (!RB_EMPTY_NODE(&node->rb_node)) + rb_erase(&node->rb_node, &tree->rb_root); + free_backref_node(tree, node); } /* @@ -317,27 +430,122 @@ static void remove_backref_node(struct backref_cache *cache, if (!node) return; - BUG_ON(!node->lowest); + BUG_ON(!node->lowest && !node->detached); while (!list_empty(&node->upper)) { edge = list_entry(node->upper.next, struct backref_edge, list[LOWER]); upper = edge->node[UPPER]; list_del(&edge->list[LOWER]); list_del(&edge->list[UPPER]); - kfree(edge); + free_backref_edge(cache, edge); + + if (RB_EMPTY_NODE(&upper->rb_node)) { + BUG_ON(!list_empty(&node->upper)); + drop_backref_node(cache, node); + node = upper; + node->lowest = 1; + continue; + } /* - * add the node to pending list if no other + * add the node to leaf node list if no other * child block cached. */ if (list_empty(&upper->lower)) { - list_add_tail(&upper->lower, - &cache->pending[upper->level]); + list_add_tail(&upper->lower, &cache->leaves); upper->lowest = 1; } } + drop_backref_node(cache, node); } +static void update_backref_node(struct backref_cache *cache, + struct backref_node *node, u64 bytenr) +{ + struct rb_node *rb_node; + rb_erase(&node->rb_node, &cache->rb_root); + node->bytenr = bytenr; + rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, bytenr); +} + +/* + * update backref cache after a transaction commit + */ +static int update_backref_cache(struct btrfs_trans_handle *trans, + struct backref_cache *cache) +{ + struct backref_node *node; + int level = 0; + + if (cache->last_trans == 0) { + cache->last_trans = trans->transid; + return 0; + } + + if (cache->last_trans == trans->transid) + return 0; + + /* + * detached nodes are used to avoid unnecessary backref + * lookup. transaction commit changes the extent tree. + * so the detached nodes are no longer useful. + */ + while (!list_empty(&cache->detached)) { + node = list_entry(cache->detached.next, + struct backref_node, list); + remove_backref_node(cache, node); + } + + while (!list_empty(&cache->changed)) { + node = list_entry(cache->changed.next, + struct backref_node, list); + list_del_init(&node->list); + BUG_ON(node->pending); + update_backref_node(cache, node, node->new_bytenr); + } + + /* + * some nodes can be left in the pending list if there were + * errors during processing the pending nodes. + */ + for (level = 0; level < BTRFS_MAX_LEVEL; level++) { + list_for_each_entry(node, &cache->pending[level], list) { + BUG_ON(!node->pending); + if (node->bytenr == node->new_bytenr) + continue; + update_backref_node(cache, node, node->new_bytenr); + } + } + + cache->last_trans = 0; + return 1; +} + + +static int should_ignore_root(struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + return 0; + + reloc_root = root->reloc_root; + if (!reloc_root) + return 0; + + if (btrfs_root_last_snapshot(&reloc_root->root_item) == + root->fs_info->running_transaction->transid - 1) + return 0; + /* + * if there is reloc tree and it was created in previous + * transaction backref lookup can find the reloc tree, + * so backref node for the fs tree root is useless for + * relocation. + */ + return 1; +} /* * find reloc tree by address of tree root */ @@ -365,7 +573,9 @@ static int is_cowonly_root(u64 root_objectid) root_objectid == BTRFS_CHUNK_TREE_OBJECTID || root_objectid == BTRFS_DEV_TREE_OBJECTID || root_objectid == BTRFS_TREE_LOG_OBJECTID || - root_objectid == BTRFS_CSUM_TREE_OBJECTID) + root_objectid == BTRFS_CSUM_TREE_OBJECTID || + root_objectid == BTRFS_UUID_TREE_OBJECTID || + root_objectid == BTRFS_QUOTA_TREE_OBJECTID) return 1; return 0; } @@ -382,7 +592,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, else key.offset = (u64)-1; - return btrfs_read_fs_root_no_name(fs_info, &key); + return btrfs_get_fs_root(fs_info, &key, false); } #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 @@ -400,7 +610,7 @@ struct btrfs_root *find_tree_root(struct reloc_control *rc, root = read_fs_root(rc->extent_root->fs_info, root_objectid); BUG_ON(IS_ERR(root)); - if (root->ref_cows && + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && generation != btrfs_root_generation(&root->root_item)) return NULL; @@ -412,10 +622,13 @@ static noinline_for_stack int find_inline_backref(struct extent_buffer *leaf, int slot, unsigned long *ptr, unsigned long *end) { + struct btrfs_key key; struct btrfs_extent_item *ei; struct btrfs_tree_block_info *bi; u32 item_size; + btrfs_item_key_to_cpu(leaf, &key, slot); + item_size = btrfs_item_size_nr(leaf, slot); #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (item_size < sizeof(*ei)) { @@ -427,13 +640,23 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, WARN_ON(!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)); - if (item_size <= sizeof(*ei) + sizeof(*bi)) { + if (key.type == BTRFS_EXTENT_ITEM_KEY && + item_size <= sizeof(*ei) + sizeof(*bi)) { WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); return 1; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + item_size <= sizeof(*ei)) { + WARN_ON(item_size < sizeof(*ei)); + return 1; + } - bi = (struct btrfs_tree_block_info *)(ei + 1); - *ptr = (unsigned long)(bi + 1); + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + bi = (struct btrfs_tree_block_info *)(ei + 1); + *ptr = (unsigned long)(bi + 1); + } else { + *ptr = (unsigned long)(ei + 1); + } *end = (unsigned long)ei + item_size; return 0; } @@ -452,11 +675,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, * for all upper level blocks that directly/indirectly reference the * block are also cached. */ -static struct backref_node *build_backref_tree(struct reloc_control *rc, - struct backref_cache *cache, - struct btrfs_key *node_key, - int level, u64 bytenr) +static noinline_for_stack +struct backref_node *build_backref_tree(struct reloc_control *rc, + struct btrfs_key *node_key, + int level, u64 bytenr) { + struct backref_cache *cache = &rc->backref_cache; struct btrfs_path *path1; struct btrfs_path *path2; struct extent_buffer *eb; @@ -472,8 +696,11 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, unsigned long end; unsigned long ptr; LIST_HEAD(list); + LIST_HEAD(useless); + int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -481,16 +708,16 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, err = -ENOMEM; goto out; } + path1->reada = 1; + path2->reada = 2; - node = kmalloc(sizeof(*node), GFP_NOFS); + node = alloc_backref_node(cache); if (!node) { err = -ENOMEM; goto out; } - backref_node_init(node); node->bytenr = bytenr; - node->owner = 0; node->level = level; node->lowest = 1; cur = node; @@ -498,7 +725,7 @@ again: end = 0; ptr = 0; key.objectid = cur->bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; + key.type = BTRFS_METADATA_ITEM_KEY; key.offset = (u64)-1; path1->search_commit_root = 1; @@ -516,7 +743,7 @@ again: WARN_ON(cur->checked); if (!list_empty(&cur->upper)) { /* - * the backref was added previously when processsing + * the backref was added previously when processing * backref of type BTRFS_TREE_BLOCK_REF_KEY */ BUG_ON(!list_is_singular(&cur->upper)); @@ -556,7 +783,8 @@ again: break; } - if (key.type == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { ret = find_inline_backref(eb, path1->slots[0], &ptr, &end); if (ret) @@ -586,17 +814,21 @@ again: #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || key.type == BTRFS_EXTENT_REF_V0_KEY) { - if (key.objectid == key.offset && - key.type == BTRFS_EXTENT_REF_V0_KEY) { + if (key.type == BTRFS_EXTENT_REF_V0_KEY) { struct btrfs_extent_ref_v0 *ref0; ref0 = btrfs_item_ptr(eb, path1->slots[0], struct btrfs_extent_ref_v0); - root = find_tree_root(rc, eb, ref0); - if (root) - cur->root = root; - else - cur->old_root = 1; - break; + if (key.objectid == key.offset) { + root = find_tree_root(rc, eb, ref0); + if (root && !should_ignore_root(root)) + cur->root = root; + else + list_add(&cur->list, &useless); + break; + } + if (is_cowonly_root(btrfs_ref_root_v0(eb, + ref0))) + cur->cowonly = 1; } #else BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); @@ -613,22 +845,20 @@ again: break; } - edge = kzalloc(sizeof(*edge), GFP_NOFS); + edge = alloc_backref_edge(cache); if (!edge) { err = -ENOMEM; goto out; } rb_node = tree_search(&cache->rb_root, key.offset); if (!rb_node) { - upper = kmalloc(sizeof(*upper), GFP_NOFS); + upper = alloc_backref_node(cache); if (!upper) { - kfree(edge); + free_backref_edge(cache, edge); err = -ENOMEM; goto out; } - backref_node_init(upper); upper->bytenr = key.offset; - upper->owner = 0; upper->level = cur->level + 1; /* * backrefs for the upper level block isn't @@ -638,11 +868,12 @@ again: } else { upper = rb_entry(rb_node, struct backref_node, rb_node); + BUG_ON(!upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); } - list_add(&edge->list[LOWER], &cur->upper); - edge->node[UPPER] = upper; + list_add_tail(&edge->list[LOWER], &cur->upper); edge->node[LOWER] = cur; + edge->node[UPPER] = upper; goto next; } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { @@ -656,11 +887,17 @@ again: goto out; } + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + cur->cowonly = 1; + if (btrfs_root_level(&root->root_item) == cur->level) { /* tree root */ BUG_ON(btrfs_root_bytenr(&root->root_item) != cur->bytenr); - cur->root = root; + if (should_ignore_root(root)) + list_add(&cur->list, &useless); + else + cur->root = root; break; } @@ -687,15 +924,19 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != lower->bytenr); - lower->root = root; + if (should_ignore_root(root)) + list_add(&lower->list, &useless); + else + lower->root = root; break; } - edge = kzalloc(sizeof(*edge), GFP_NOFS); + edge = alloc_backref_edge(cache); if (!edge) { err = -ENOMEM; goto out; @@ -704,16 +945,18 @@ again: eb = path2->nodes[level]; rb_node = tree_search(&cache->rb_root, eb->start); if (!rb_node) { - upper = kmalloc(sizeof(*upper), GFP_NOFS); + upper = alloc_backref_node(cache); if (!upper) { - kfree(edge); + free_backref_edge(cache, edge); err = -ENOMEM; goto out; } - backref_node_init(upper); upper->bytenr = eb->start; upper->owner = btrfs_header_owner(eb); upper->level = lower->level + 1; + if (!test_bit(BTRFS_ROOT_REF_COWS, + &root->state)) + upper->cowonly = 1; /* * if we know the block isn't shared @@ -726,14 +969,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else @@ -743,17 +984,19 @@ again: rb_node); BUG_ON(!upper->checked); INIT_LIST_HEAD(&edge->list[UPPER]); + if (!upper->owner) + upper->owner = btrfs_header_owner(eb); } list_add_tail(&edge->list[LOWER], &lower->upper); - edge->node[UPPER] = upper; edge->node[LOWER] = lower; + edge->node[UPPER] = upper; if (rb_node) break; lower = upper; upper = NULL; } - btrfs_release_path(root, path2); + btrfs_release_path(path2); next: if (ptr < end) { ptr += btrfs_extent_inline_ref_size(key.type); @@ -766,7 +1009,7 @@ next: if (ptr >= end) path1->slots[0]++; } - btrfs_release_path(rc->extent_root, path1); + btrfs_release_path(path1); cur->checked = 1; WARN_ON(exist); @@ -784,8 +1027,14 @@ next: * into the cache. */ BUG_ON(!node->checked); - rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); - BUG_ON(rb_node); + cowonly = node->cowonly; + if (!cowonly) { + rb_node = tree_insert(&cache->rb_root, node->bytenr, + &node->rb_node); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, node->bytenr); + list_add_tail(&node->lower, &cache->leaves); + } list_for_each_entry(edge, &node->upper, list[LOWER]) list_add_tail(&edge->list[UPPER], &list); @@ -794,6 +1043,14 @@ next: edge = list_entry(list.next, struct backref_edge, list[UPPER]); list_del_init(&edge->list[UPPER]); upper = edge->node[UPPER]; + if (upper->detached) { + list_del(&edge->list[LOWER]); + lower = edge->node[LOWER]; + free_backref_edge(cache, edge); + if (list_empty(&lower->upper)) + list_add(&lower->list, &useless); + continue; + } if (!RB_EMPTY_NODE(&upper->rb_node)) { if (upper->lowest) { @@ -806,25 +1063,71 @@ next: } BUG_ON(!upper->checked); - rb_node = tree_insert(&cache->rb_root, upper->bytenr, - &upper->rb_node); - BUG_ON(rb_node); + BUG_ON(cowonly != upper->cowonly); + if (!cowonly) { + rb_node = tree_insert(&cache->rb_root, upper->bytenr, + &upper->rb_node); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, + upper->bytenr); + } list_add_tail(&edge->list[UPPER], &upper->lower); list_for_each_entry(edge, &upper->upper, list[LOWER]) list_add_tail(&edge->list[UPPER], &list); } + /* + * process useless backref nodes. backref nodes for tree leaves + * are deleted from the cache. backref nodes for upper level + * tree blocks are left in the cache to avoid unnecessary backref + * lookup. + */ + while (!list_empty(&useless)) { + upper = list_entry(useless.next, struct backref_node, list); + list_del_init(&upper->list); + BUG_ON(!list_empty(&upper->upper)); + if (upper == node) + node = NULL; + if (upper->lowest) { + list_del_init(&upper->lower); + upper->lowest = 0; + } + while (!list_empty(&upper->lower)) { + edge = list_entry(upper->lower.next, + struct backref_edge, list[UPPER]); + list_del(&edge->list[UPPER]); + list_del(&edge->list[LOWER]); + lower = edge->node[LOWER]; + free_backref_edge(cache, edge); + + if (list_empty(&lower->upper)) + list_add(&lower->list, &useless); + } + __mark_block_processed(rc, upper); + if (upper->level > 0) { + list_add(&upper->list, &cache->detached); + upper->detached = 1; + } else { + rb_erase(&upper->rb_node, &cache->rb_root); + free_backref_node(cache, upper); + } + } out: btrfs_free_path(path1); btrfs_free_path(path2); if (err) { - INIT_LIST_HEAD(&list); + while (!list_empty(&useless)) { + lower = list_entry(useless.next, + struct backref_node, upper); + list_del_init(&lower->upper); + } upper = node; + INIT_LIST_HEAD(&list); while (upper) { if (RB_EMPTY_NODE(&upper->rb_node)) { list_splice_tail(&upper->upper, &list); - kfree(upper); + free_backref_node(cache, upper); } if (list_empty(&list)) @@ -832,25 +1135,119 @@ out: edge = list_entry(list.next, struct backref_edge, list[LOWER]); + list_del(&edge->list[LOWER]); upper = edge->node[UPPER]; - kfree(edge); + free_backref_edge(cache, edge); } return ERR_PTR(err); } + BUG_ON(node && node->detached); return node; } /* + * helper to add backref node for the newly created snapshot. + * the backref node is created by cloning backref node that + * corresponds to root of source tree + */ +static int clone_backref_node(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct btrfs_root *src, + struct btrfs_root *dest) +{ + struct btrfs_root *reloc_root = src->reloc_root; + struct backref_cache *cache = &rc->backref_cache; + struct backref_node *node = NULL; + struct backref_node *new_node; + struct backref_edge *edge; + struct backref_edge *new_edge; + struct rb_node *rb_node; + + if (cache->last_trans > 0) + update_backref_cache(trans, cache); + + rb_node = tree_search(&cache->rb_root, src->commit_root->start); + if (rb_node) { + node = rb_entry(rb_node, struct backref_node, rb_node); + if (node->detached) + node = NULL; + else + BUG_ON(node->new_bytenr != reloc_root->node->start); + } + + if (!node) { + rb_node = tree_search(&cache->rb_root, + reloc_root->commit_root->start); + if (rb_node) { + node = rb_entry(rb_node, struct backref_node, + rb_node); + BUG_ON(node->detached); + } + } + + if (!node) + return 0; + + new_node = alloc_backref_node(cache); + if (!new_node) + return -ENOMEM; + + new_node->bytenr = dest->node->start; + new_node->level = node->level; + new_node->lowest = node->lowest; + new_node->checked = 1; + new_node->root = dest; + + if (!node->lowest) { + list_for_each_entry(edge, &node->lower, list[UPPER]) { + new_edge = alloc_backref_edge(cache); + if (!new_edge) + goto fail; + + new_edge->node[UPPER] = new_node; + new_edge->node[LOWER] = edge->node[LOWER]; + list_add_tail(&new_edge->list[UPPER], + &new_node->lower); + } + } else { + list_add_tail(&new_node->lower, &cache->leaves); + } + + rb_node = tree_insert(&cache->rb_root, new_node->bytenr, + &new_node->rb_node); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, new_node->bytenr); + + if (!new_node->lowest) { + list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { + list_add_tail(&new_edge->list[LOWER], + &new_edge->node[LOWER]->upper); + } + } + return 0; +fail: + while (!list_empty(&new_node->lower)) { + new_edge = list_entry(new_node->lower.next, + struct backref_edge, list[UPPER]); + list_del(&new_edge->list[UPPER]); + free_backref_edge(cache, new_edge); + } + free_backref_node(cache, new_node); + return -ENOMEM; +} + +/* * helper to add 'address of tree root -> reloc tree' mapping */ -static int __add_reloc_root(struct btrfs_root *root) +static int __must_check __add_reloc_root(struct btrfs_root *root) { struct rb_node *rb_node; struct mapping_node *node; struct reloc_control *rc = root->fs_info->reloc_ctl; node = kmalloc(sizeof(*node), GFP_NOFS); - BUG_ON(!node); + if (!node) + return -ENOMEM; node->bytenr = root->node->start; node->data = root; @@ -859,17 +1256,23 @@ static int __add_reloc_root(struct btrfs_root *root) rb_node = tree_insert(&rc->reloc_root_tree.rb_root, node->bytenr, &node->rb_node); spin_unlock(&rc->reloc_root_tree.lock); - BUG_ON(rb_node); + if (rb_node) { + btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " + "for start=%llu while inserting into relocation " + "tree", node->bytenr); + kfree(node); + return -EEXIST; + } list_add_tail(&root->root_list, &rc->reloc_roots); return 0; } /* - * helper to update/delete the 'address of tree root -> reloc tree' + * helper to delete the 'address of tree root -> reloc tree' * mapping */ -static int __update_reloc_root(struct btrfs_root *root, int del) +static void __del_reloc_root(struct btrfs_root *root) { struct rb_node *rb_node; struct mapping_node *node = NULL; @@ -877,72 +1280,112 @@ static int __update_reloc_root(struct btrfs_root *root, int del) spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->commit_root->start); + root->node->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); } spin_unlock(&rc->reloc_root_tree.lock); + if (!node) + return; BUG_ON((struct btrfs_root *)node->data != root); - if (!del) { - spin_lock(&rc->reloc_root_tree.lock); - node->bytenr = root->node->start; - rb_node = tree_insert(&rc->reloc_root_tree.rb_root, - node->bytenr, &node->rb_node); - spin_unlock(&rc->reloc_root_tree.lock); - BUG_ON(rb_node); - } else { - list_del_init(&root->root_list); - kfree(node); - } - return 0; + spin_lock(&root->fs_info->trans_lock); + list_del_init(&root->root_list); + spin_unlock(&root->fs_info->trans_lock); + kfree(node); } /* - * create reloc tree for a given fs tree. reloc tree is just a - * snapshot of the fs tree with special root objectid. + * helper to update the 'address of tree root -> reloc tree' + * mapping */ -int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, - struct btrfs_root *root) +static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) +{ + struct rb_node *rb_node; + struct mapping_node *node = NULL; + struct reloc_control *rc = root->fs_info->reloc_ctl; + + spin_lock(&rc->reloc_root_tree.lock); + rb_node = tree_search(&rc->reloc_root_tree.rb_root, + root->node->start); + if (rb_node) { + node = rb_entry(rb_node, struct mapping_node, rb_node); + rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); + } + spin_unlock(&rc->reloc_root_tree.lock); + + if (!node) + return 0; + BUG_ON((struct btrfs_root *)node->data != root); + + spin_lock(&rc->reloc_root_tree.lock); + node->bytenr = new_bytenr; + rb_node = tree_insert(&rc->reloc_root_tree.rb_root, + node->bytenr, &node->rb_node); + spin_unlock(&rc->reloc_root_tree.lock); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, node->bytenr); + return 0; +} + +static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 objectid) { struct btrfs_root *reloc_root; struct extent_buffer *eb; struct btrfs_root_item *root_item; struct btrfs_key root_key; + u64 last_snap = 0; int ret; - if (root->reloc_root) { - reloc_root = root->reloc_root; - reloc_root->last_trans = trans->transid; - return 0; - } - - if (!root->fs_info->reloc_ctl || - !root->fs_info->reloc_ctl->create_reloc_root || - root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) - return 0; - root_item = kmalloc(sizeof(*root_item), GFP_NOFS); BUG_ON(!root_item); root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; root_key.type = BTRFS_ROOT_ITEM_KEY; - root_key.offset = root->root_key.objectid; + root_key.offset = objectid; - ret = btrfs_copy_root(trans, root, root->commit_root, &eb, - BTRFS_TREE_RELOC_OBJECTID); - BUG_ON(ret); + if (root->root_key.objectid == objectid) { + /* called by btrfs_init_reloc_root */ + ret = btrfs_copy_root(trans, root, root->commit_root, &eb, + BTRFS_TREE_RELOC_OBJECTID); + BUG_ON(ret); + + last_snap = btrfs_root_last_snapshot(&root->root_item); + btrfs_set_root_last_snapshot(&root->root_item, + trans->transid - 1); + } else { + /* + * called by btrfs_reloc_post_snapshot_hook. + * the source tree is a reloc tree, all tree blocks + * modified after it was created have RELOC flag + * set in their headers. so it's OK to not update + * the 'last_snapshot'. + */ + ret = btrfs_copy_root(trans, root, root->node, &eb, + BTRFS_TREE_RELOC_OBJECTID); + BUG_ON(ret); + } - btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); memcpy(root_item, &root->root_item, sizeof(*root_item)); - btrfs_set_root_refs(root_item, 1); btrfs_set_root_bytenr(root_item, eb->start); btrfs_set_root_level(root_item, btrfs_header_level(eb)); btrfs_set_root_generation(root_item, trans->transid); - memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); - root_item->drop_level = 0; + + if (root->root_key.objectid == objectid) { + btrfs_set_root_refs(root_item, 0); + memset(&root_item->drop_progress, 0, + sizeof(struct btrfs_disk_key)); + root_item->drop_level = 0; + /* + * abuse rtransid, it is safe because it is impossible to + * receive data into a relocation tree. + */ + btrfs_set_root_rtransid(root_item, last_snap); + btrfs_set_root_otransid(root_item, trans->transid); + } btrfs_tree_unlock(eb); free_extent_buffer(eb); @@ -952,12 +1395,46 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, BUG_ON(ret); kfree(root_item); - reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, - &root_key); + reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key); BUG_ON(IS_ERR(reloc_root)); reloc_root->last_trans = trans->transid; + return reloc_root; +} + +/* + * create reloc tree for a given fs tree. reloc tree is just a + * snapshot of the fs tree with special root objectid. + */ +int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, + struct btrfs_root *root) +{ + struct btrfs_root *reloc_root; + struct reloc_control *rc = root->fs_info->reloc_ctl; + struct btrfs_block_rsv *rsv; + int clear_rsv = 0; + int ret; - __add_reloc_root(reloc_root); + if (root->reloc_root) { + reloc_root = root->reloc_root; + reloc_root->last_trans = trans->transid; + return 0; + } + + if (!rc || !rc->create_reloc_tree || + root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) + return 0; + + if (!trans->reloc_reserved) { + rsv = trans->block_rsv; + trans->block_rsv = rc->block_rsv; + clear_rsv = 1; + } + reloc_root = create_reloc_root(trans, root, root->root_key.objectid); + if (clear_rsv) + trans->block_rsv = rsv; + + ret = __add_reloc_root(reloc_root); + BUG_ON(ret < 0); root->reloc_root = reloc_root; return 0; } @@ -970,22 +1447,20 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, { struct btrfs_root *reloc_root; struct btrfs_root_item *root_item; - int del = 0; int ret; if (!root->reloc_root) - return 0; + goto out; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; - if (btrfs_root_refs(root_item) == 0) { + if (root->fs_info->reloc_ctl->merge_reloc_tree && + btrfs_root_refs(root_item) == 0) { root->reloc_root = NULL; - del = 1; + __del_reloc_root(reloc_root); } - __update_reloc_root(reloc_root, del); - if (reloc_root->commit_root != reloc_root->node) { btrfs_set_root_node(root_item, reloc_root->node); free_extent_buffer(reloc_root->commit_root); @@ -995,6 +1470,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, ret = btrfs_update_root(trans, root->fs_info->tree_root, &reloc_root->root_key, root_item); BUG_ON(ret); + +out: return 0; } @@ -1017,9 +1494,9 @@ again: prev = node; entry = rb_entry(node, struct btrfs_inode, rb_node); - if (objectid < entry->vfs_inode.i_ino) + if (objectid < btrfs_ino(&entry->vfs_inode)) node = node->rb_left; - else if (objectid > entry->vfs_inode.i_ino) + else if (objectid > btrfs_ino(&entry->vfs_inode)) node = node->rb_right; else break; @@ -1027,7 +1504,7 @@ again: if (!node) { while (prev) { entry = rb_entry(prev, struct btrfs_inode, rb_node); - if (objectid <= entry->vfs_inode.i_ino) { + if (objectid <= btrfs_ino(&entry->vfs_inode)) { node = prev; break; } @@ -1042,7 +1519,7 @@ again: return inode; } - objectid = entry->vfs_inode.i_ino + 1; + objectid = btrfs_ino(&entry->vfs_inode) + 1; if (cond_resched_lock(&root->inode_lock)) goto again; @@ -1078,7 +1555,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, return -ENOMEM; bytenr -= BTRFS_I(reloc_inode)->index_cnt; - ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, + ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode), bytenr, 0); if (ret < 0) goto out; @@ -1097,12 +1574,11 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, btrfs_file_extent_other_encoding(leaf, fi)); if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { - ret = 1; + ret = -EINVAL; goto out; } - if (new_bytenr) - *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); + *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); ret = 0; out: btrfs_free_path(path); @@ -1113,24 +1589,23 @@ out: * update file extent items in the tree leaf to point to * the new locations. */ -static int replace_file_extents(struct btrfs_trans_handle *trans, - struct reloc_control *rc, - struct btrfs_root *root, - struct extent_buffer *leaf, - struct list_head *inode_list) +static noinline_for_stack +int replace_file_extents(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct btrfs_root *root, + struct extent_buffer *leaf) { struct btrfs_key key; struct btrfs_file_extent_item *fi; struct inode *inode = NULL; - struct inodevec *ivec = NULL; u64 parent; u64 bytenr; - u64 new_bytenr; + u64 new_bytenr = 0; u64 num_bytes; u64 end; u32 nritems; u32 i; - int ret; + int ret = 0; int first = 1; int dirty = 0; @@ -1165,23 +1640,14 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, * to complete and drop the extent cache */ if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { - if (!ivec || ivec->nr == INODEVEC_SIZE) { - ivec = kmalloc(sizeof(*ivec), GFP_NOFS); - BUG_ON(!ivec); - ivec->nr = 0; - list_add_tail(&ivec->list, inode_list); - } if (first) { inode = find_next_inode(root, key.objectid); - if (inode) - ivec->inode[ivec->nr++] = inode; first = 0; - } else if (inode && inode->i_ino < key.objectid) { + } else if (inode && btrfs_ino(inode) < key.objectid) { + btrfs_add_delayed_iput(inode); inode = find_next_inode(root, key.objectid); - if (inode) - ivec->inode[ivec->nr++] = inode; } - if (inode && inode->i_ino == key.objectid) { + if (inode && btrfs_ino(inode) == key.objectid) { end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); WARN_ON(!IS_ALIGNED(key.offset, @@ -1189,23 +1655,26 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, WARN_ON(!IS_ALIGNED(end, root->sectorsize)); end--; ret = try_lock_extent(&BTRFS_I(inode)->io_tree, - key.offset, end, - GFP_NOFS); + key.offset, end); if (!ret) continue; btrfs_drop_extent_cache(inode, key.offset, end, 1); unlock_extent(&BTRFS_I(inode)->io_tree, - key.offset, end, GFP_NOFS); + key.offset, end); } } ret = get_new_location(rc->data_inode, &new_bytenr, bytenr, num_bytes); - if (ret > 0) - continue; - BUG_ON(ret < 0); + if (ret) { + /* + * Don't have to abort since we've not changed anything + * in the file extent yet. + */ + break; + } btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); dirty = 1; @@ -1214,17 +1683,25 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, root, new_bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset); - BUG_ON(ret); + key.objectid, key.offset, 1); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + break; + } ret = btrfs_free_extent(trans, root, bytenr, num_bytes, parent, btrfs_header_owner(leaf), - key.objectid, key.offset); - BUG_ON(ret); + key.objectid, key.offset, 1); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + break; + } } if (dirty) btrfs_mark_buffer_dirty(leaf); - return 0; + if (inode) + btrfs_add_delayed_iput(inode); + return ret; } static noinline_for_stack @@ -1247,11 +1724,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, * if no block got replaced, 0 is returned. if there are other * errors, a negative error number is returned. */ -static int replace_path(struct btrfs_trans_handle *trans, - struct btrfs_root *dest, struct btrfs_root *src, - struct btrfs_path *path, struct btrfs_key *next_key, - struct extent_buffer **leaf, - int lowest_level, int max_level) +static noinline_for_stack +int replace_path(struct btrfs_trans_handle *trans, + struct btrfs_root *dest, struct btrfs_root *src, + struct btrfs_path *path, struct btrfs_key *next_key, + int lowest_level, int max_level) { struct extent_buffer *eb; struct extent_buffer *parent; @@ -1262,16 +1739,16 @@ static int replace_path(struct btrfs_trans_handle *trans, u64 new_ptr_gen; u64 last_snapshot; u32 blocksize; + int cow = 0; int level; int ret; int slot; BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); - BUG_ON(lowest_level > 1 && leaf); last_snapshot = btrfs_root_last_snapshot(&src->root_item); - +again: slot = path->slots[lowest_level]; btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); @@ -1285,8 +1762,10 @@ static int replace_path(struct btrfs_trans_handle *trans, return 0; } - ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); - BUG_ON(ret); + if (cow) { + ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); + BUG_ON(ret); + } btrfs_set_lock_blocking(eb); if (next_key) { @@ -1322,32 +1801,32 @@ static int replace_path(struct btrfs_trans_handle *trans, new_ptr_gen = 0; } - if (new_bytenr > 0 && new_bytenr == old_bytenr) { - WARN_ON(1); + if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) { ret = level; break; } if (new_bytenr == 0 || old_ptr_gen > last_snapshot || memcmp_node_keys(parent, slot, path, level)) { - if (level <= lowest_level && !leaf) { + if (level <= lowest_level) { ret = 0; break; } eb = read_tree_block(dest, old_bytenr, blocksize, old_ptr_gen); - btrfs_tree_lock(eb); - ret = btrfs_cow_block(trans, dest, eb, parent, - slot, &eb); - BUG_ON(ret); - btrfs_set_lock_blocking(eb); - - if (level <= lowest_level) { - *leaf = eb; - ret = 0; + if (!eb || !extent_buffer_uptodate(eb)) { + ret = (!eb) ? -ENOMEM : -EIO; + free_extent_buffer(eb); break; } + btrfs_tree_lock(eb); + if (cow) { + ret = btrfs_cow_block(trans, dest, eb, parent, + slot, &eb); + BUG_ON(ret); + } + btrfs_set_lock_blocking(eb); btrfs_tree_unlock(parent); free_extent_buffer(parent); @@ -1356,9 +1835,16 @@ static int replace_path(struct btrfs_trans_handle *trans, continue; } + if (!cow) { + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + cow = 1; + goto again; + } + btrfs_node_key_to_cpu(path->nodes[level], &key, path->slots[level]); - btrfs_release_path(src, path); + btrfs_release_path(path); path->lowest_level = level; ret = btrfs_search_slot(trans, src, &key, path, 0, 1); @@ -1380,21 +1866,23 @@ static int replace_path(struct btrfs_trans_handle *trans, ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0); + src->root_key.objectid, level - 1, 0, + 1); BUG_ON(ret); ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0); + 0, 1); BUG_ON(ret); ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, path->nodes[level]->start, - src->root_key.objectid, level - 1, 0); + src->root_key.objectid, level - 1, 0, + 1); BUG_ON(ret); ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 0, dest->root_key.objectid, level - 1, - 0); + 0, 1); BUG_ON(ret); btrfs_unlock_up_safe(path, 0); @@ -1484,6 +1972,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, bytenr = btrfs_node_blockptr(eb, path->slots[i]); blocksize = btrfs_level_size(root, i - 1); eb = read_tree_block(root, bytenr, blocksize, ptr_gen); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } BUG_ON(btrfs_header_level(eb) != i - 1); path->nodes[i - 1] = eb; path->slots[i - 1] = 0; @@ -1502,6 +1994,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, struct inode *inode = NULL; u64 objectid; u64 start, end; + u64 ino; objectid = min_key->objectid; while (1) { @@ -1514,17 +2007,18 @@ static int invalidate_extent_cache(struct btrfs_root *root, inode = find_next_inode(root, objectid); if (!inode) break; + ino = btrfs_ino(inode); - if (inode->i_ino > max_key->objectid) { + if (ino > max_key->objectid) { iput(inode); break; } - objectid = inode->i_ino + 1; + objectid = ino + 1; if (!S_ISREG(inode->i_mode)) continue; - if (unlikely(min_key->objectid == inode->i_ino)) { + if (unlikely(min_key->objectid == ino)) { if (min_key->type > BTRFS_EXTENT_DATA_KEY) continue; if (min_key->type < BTRFS_EXTENT_DATA_KEY) @@ -1537,7 +2031,7 @@ static int invalidate_extent_cache(struct btrfs_root *root, start = 0; } - if (unlikely(max_key->objectid == inode->i_ino)) { + if (unlikely(max_key->objectid == ino)) { if (max_key->type < BTRFS_EXTENT_DATA_KEY) continue; if (max_key->type > BTRFS_EXTENT_DATA_KEY) { @@ -1554,27 +2048,13 @@ static int invalidate_extent_cache(struct btrfs_root *root, } /* the lock_extent waits for readpage to complete */ - lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + lock_extent(&BTRFS_I(inode)->io_tree, start, end); btrfs_drop_extent_cache(inode, start, end, 1); - unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end); } return 0; } -static void put_inodes(struct list_head *list) -{ - struct inodevec *ivec; - while (!list_empty(list)) { - ivec = list_entry(list->next, struct inodevec, list); - list_del(&ivec->list); - while (ivec->nr > 0) { - ivec->nr--; - iput(ivec->inode[ivec->nr]); - } - kfree(ivec); - } -} - static int find_next_key(struct btrfs_path *path, int level, struct btrfs_key *key) @@ -1603,21 +2083,22 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, LIST_HEAD(inode_list); struct btrfs_key key; struct btrfs_key next_key; - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct btrfs_root *reloc_root; struct btrfs_root_item *root_item; struct btrfs_path *path; - struct extent_buffer *leaf = NULL; - unsigned long nr; + struct extent_buffer *leaf; int level; int max_level; int replaced = 0; int ret; int err = 0; + u32 min_reserved; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; reloc_root = root->reloc_root; root_item = &reloc_root->root_item; @@ -1647,34 +2128,25 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, btrfs_unlock_up_safe(path, 0); } - if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { - trans = btrfs_start_transaction(root, 1); - - leaf = path->nodes[0]; - btrfs_item_key_to_cpu(leaf, &key, 0); - btrfs_release_path(reloc_root, path); + min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; + memset(&next_key, 0, sizeof(next_key)); - ret = btrfs_search_slot(trans, root, &key, path, 0, 1); - if (ret < 0) { + while (1) { + ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, + BTRFS_RESERVE_FLUSH_ALL); + if (ret) { err = ret; goto out; } + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + goto out; + } + trans->block_rsv = rc->block_rsv; - leaf = path->nodes[0]; - btrfs_unlock_up_safe(path, 1); - ret = replace_file_extents(trans, rc, root, leaf, - &inode_list); - if (ret < 0) - err = ret; - goto out; - } - - memset(&next_key, 0, sizeof(next_key)); - - while (1) { - leaf = NULL; replaced = 0; - trans = btrfs_start_transaction(root, 1); max_level = level; ret = walk_down_reloc_tree(reloc_root, path, &level); @@ -1688,14 +2160,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, if (!find_next_key(path, level, &key) && btrfs_comp_cpu_keys(&next_key, &key) >= 0) { ret = 0; - } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { - ret = replace_path(trans, root, reloc_root, - path, &next_key, &leaf, - level, max_level); } else { - ret = replace_path(trans, root, reloc_root, - path, &next_key, NULL, - level, max_level); + ret = replace_path(trans, root, reloc_root, path, + &next_key, level, max_level); } if (ret < 0) { err = ret; @@ -1707,16 +2174,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, btrfs_node_key_to_cpu(path->nodes[level], &key, path->slots[level]); replaced = 1; - } else if (leaf) { - /* - * no block got replaced, try replacing file extents - */ - btrfs_item_key_to_cpu(leaf, &key, 0); - ret = replace_file_extents(trans, rc, root, leaf, - &inode_list); - btrfs_tree_unlock(leaf); - free_extent_buffer(leaf); - BUG_ON(ret < 0); } ret = walk_up_reloc_tree(reloc_root, path, &level); @@ -1732,15 +2189,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, path->slots[level]); root_item->drop_level = level; - nr = trans->blocks_used; - btrfs_end_transaction(trans, root); - - btrfs_btree_balance_dirty(root, nr); + btrfs_end_transaction_throttle(trans, root); + trans = NULL; - /* - * put inodes outside transaction, otherwise we may deadlock. - */ - put_inodes(&inode_list); + btrfs_btree_balance_dirty(root); if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); @@ -1764,14 +2216,13 @@ out: sizeof(root_item->drop_progress)); root_item->drop_level = 0; btrfs_set_root_refs(root_item, 0); + btrfs_update_reloc_root(trans, root); } - nr = trans->blocks_used; - btrfs_end_transaction(trans, root); - - btrfs_btree_balance_dirty(root, nr); + if (trans) + btrfs_end_transaction_throttle(trans, root); - put_inodes(&inode_list); + btrfs_btree_balance_dirty(root); if (replaced && rc->stage == UPDATE_DATA_PTRS) invalidate_extent_cache(root, &key, &next_key); @@ -1779,74 +2230,174 @@ out: return err; } -/* - * callback for the work threads. - * this function merges reloc tree with corresponding fs tree, - * and then drops the reloc tree. - */ -static void merge_func(struct btrfs_work *work) +static noinline_for_stack +int prepare_to_merge(struct reloc_control *rc, int err) { - struct btrfs_trans_handle *trans; - struct btrfs_root *root; + struct btrfs_root *root = rc->extent_root; struct btrfs_root *reloc_root; - struct async_merge *async; + struct btrfs_trans_handle *trans; + LIST_HEAD(reloc_roots); + u64 num_bytes = 0; + int ret; - async = container_of(work, struct async_merge, work); - reloc_root = async->root; + mutex_lock(&root->fs_info->reloc_mutex); + rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; + rc->merging_rsv_size += rc->nodes_relocated * 2; + mutex_unlock(&root->fs_info->reloc_mutex); + +again: + if (!err) { + num_bytes = rc->merging_rsv_size; + ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, + BTRFS_RESERVE_FLUSH_ALL); + if (ret) + err = ret; + } + + trans = btrfs_join_transaction(rc->extent_root); + if (IS_ERR(trans)) { + if (!err) + btrfs_block_rsv_release(rc->extent_root, + rc->block_rsv, num_bytes); + return PTR_ERR(trans); + } + + if (!err) { + if (num_bytes != rc->merging_rsv_size) { + btrfs_end_transaction(trans, rc->extent_root); + btrfs_block_rsv_release(rc->extent_root, + rc->block_rsv, num_bytes); + goto again; + } + } + + rc->merge_reloc_tree = 1; + + while (!list_empty(&rc->reloc_roots)) { + reloc_root = list_entry(rc->reloc_roots.next, + struct btrfs_root, root_list); + list_del_init(&reloc_root->root_list); - if (btrfs_root_refs(&reloc_root->root_item) > 0) { root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); BUG_ON(IS_ERR(root)); BUG_ON(root->reloc_root != reloc_root); - merge_reloc_root(async->rc, root); - - trans = btrfs_start_transaction(root, 1); + /* + * set reference count to 1, so btrfs_recover_relocation + * knows it should resumes merging + */ + if (!err) + btrfs_set_root_refs(&reloc_root->root_item, 1); btrfs_update_reloc_root(trans, root); - btrfs_end_transaction(trans, root); + + list_add(&reloc_root->root_list, &reloc_roots); } - btrfs_drop_snapshot(reloc_root, 0); + list_splice(&reloc_roots, &rc->reloc_roots); + + if (!err) + btrfs_commit_transaction(trans, rc->extent_root); + else + btrfs_end_transaction(trans, rc->extent_root); + return err; +} - if (atomic_dec_and_test(async->num_pending)) - complete(async->done); +static noinline_for_stack +void free_reloc_roots(struct list_head *list) +{ + struct btrfs_root *reloc_root; - kfree(async); + while (!list_empty(list)) { + reloc_root = list_entry(list->next, struct btrfs_root, + root_list); + __del_reloc_root(reloc_root); + } } -static int merge_reloc_roots(struct reloc_control *rc) +static noinline_for_stack +int merge_reloc_roots(struct reloc_control *rc) { - struct async_merge *async; struct btrfs_root *root; - struct completion done; - atomic_t num_pending; + struct btrfs_root *reloc_root; + u64 last_snap; + u64 otransid; + u64 objectid; + LIST_HEAD(reloc_roots); + int found = 0; + int ret = 0; +again: + root = rc->extent_root; - init_completion(&done); - atomic_set(&num_pending, 1); + /* + * this serializes us with btrfs_record_root_in_transaction, + * we have to make sure nobody is in the middle of + * adding their roots to the list while we are + * doing this splice + */ + mutex_lock(&root->fs_info->reloc_mutex); + list_splice_init(&rc->reloc_roots, &reloc_roots); + mutex_unlock(&root->fs_info->reloc_mutex); - while (!list_empty(&rc->reloc_roots)) { - root = list_entry(rc->reloc_roots.next, - struct btrfs_root, root_list); - list_del_init(&root->root_list); + while (!list_empty(&reloc_roots)) { + found = 1; + reloc_root = list_entry(reloc_roots.next, + struct btrfs_root, root_list); + + if (btrfs_root_refs(&reloc_root->root_item) > 0) { + root = read_fs_root(reloc_root->fs_info, + reloc_root->root_key.offset); + BUG_ON(IS_ERR(root)); + BUG_ON(root->reloc_root != reloc_root); + + ret = merge_reloc_root(rc, root); + if (ret) { + if (list_empty(&reloc_root->root_list)) + list_add_tail(&reloc_root->root_list, + &reloc_roots); + goto out; + } + } else { + list_del_init(&reloc_root->root_list); + } - async = kmalloc(sizeof(*async), GFP_NOFS); - BUG_ON(!async); - async->work.func = merge_func; - async->work.flags = 0; - async->rc = rc; - async->root = root; - async->done = &done; - async->num_pending = &num_pending; - atomic_inc(&num_pending); - btrfs_queue_worker(&rc->workers, &async->work); + /* + * we keep the old last snapshod transid in rtranid when we + * created the relocation tree. + */ + last_snap = btrfs_root_rtransid(&reloc_root->root_item); + otransid = btrfs_root_otransid(&reloc_root->root_item); + objectid = reloc_root->root_key.offset; + + ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); + if (ret < 0) { + if (list_empty(&reloc_root->root_list)) + list_add_tail(&reloc_root->root_list, + &reloc_roots); + goto out; + } } - if (!atomic_dec_and_test(&num_pending)) - wait_for_completion(&done); + if (found) { + found = 0; + goto again; + } +out: + if (ret) { + btrfs_std_error(root->fs_info, ret); + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + + /* new reloc root may be added */ + mutex_lock(&root->fs_info->reloc_mutex); + list_splice_init(&rc->reloc_roots, &reloc_roots); + mutex_unlock(&root->fs_info->reloc_mutex); + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + } BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); - return 0; + return ret; } static void free_block_list(struct rb_root *blocks) @@ -1875,119 +2426,175 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, return btrfs_record_root_in_trans(trans, root); } -/* - * select one tree from trees that references the block. - * for blocks in refernce counted trees, we preper reloc tree. - * if no reloc tree found and reloc_only is true, NULL is returned. - */ -static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, - struct backref_node *node, - struct backref_edge *edges[], - int *nr, int reloc_only) +static noinline_for_stack +struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct backref_node *node, + struct backref_edge *edges[]) { struct backref_node *next; struct btrfs_root *root; - int index; - int loop = 0; -again: - index = 0; + int index = 0; + next = node; while (1) { cond_resched(); next = walk_up_backref(next, edges, &index); root = next->root; - if (!root) { - BUG_ON(!node->old_root); - goto skip; - } - - /* no other choice for non-refernce counted tree */ - if (!root->ref_cows) { - BUG_ON(reloc_only); - break; - } + BUG_ON(!root); + BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state)); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { record_reloc_root_in_trans(trans, root); break; } - if (loop) { - btrfs_record_root_in_trans(trans, root); + btrfs_record_root_in_trans(trans, root); + root = root->reloc_root; + + if (next->new_bytenr != root->node->start) { + BUG_ON(next->new_bytenr); + BUG_ON(!list_empty(&next->list)); + next->new_bytenr = root->node->start; + next->root = root; + list_add_tail(&next->list, + &rc->backref_cache.changed); + __mark_block_processed(rc, next); break; } - if (reloc_only || next != node) { - if (!root->reloc_root) - btrfs_record_root_in_trans(trans, root); - root = root->reloc_root; - /* - * if the reloc tree was created in current - * transation, there is no node in backref tree - * corresponds to the root of the reloc tree. - */ - if (btrfs_root_last_snapshot(&root->root_item) == - trans->transid - 1) - break; - } -skip: + WARN_ON(1); root = NULL; next = walk_down_backref(edges, &index); if (!next || next->level <= node->level) break; } + if (!root) + return NULL; - if (!root && !loop && !reloc_only) { - loop = 1; - goto again; + next = node; + /* setup backref node path for btrfs_reloc_cow_block */ + while (1) { + rc->backref_cache.path[next->level] = next; + if (--index < 0) + break; + next = edges[index]->node[UPPER]; } - - if (root) - *nr = index; - else - *nr = 0; - return root; } +/* + * select a tree root for relocation. return NULL if the block + * is reference counted. we should use do_relocation() in this + * case. return a tree root pointer if the block isn't reference + * counted. return -ENOENT if the block is root of reloc tree. + */ static noinline_for_stack struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, struct backref_node *node) { + struct backref_node *next; + struct btrfs_root *root; + struct btrfs_root *fs_root = NULL; struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; - int nr; - return __select_one_root(trans, node, edges, &nr, 0); + int index = 0; + + next = node; + while (1) { + cond_resched(); + next = walk_up_backref(next, edges, &index); + root = next->root; + BUG_ON(!root); + + /* no other choice for non-references counted tree */ + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) + return root; + + if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) + fs_root = root; + + if (next != node) + return NULL; + + next = walk_down_backref(edges, &index); + if (!next || next->level <= node->level) + break; + } + + if (!fs_root) + return ERR_PTR(-ENOENT); + return fs_root; } static noinline_for_stack -struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, - struct backref_node *node, - struct backref_edge *edges[], int *nr) +u64 calcu_metadata_size(struct reloc_control *rc, + struct backref_node *node, int reserve) { - return __select_one_root(trans, node, edges, nr, 1); + struct backref_node *next = node; + struct backref_edge *edge; + struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; + u64 num_bytes = 0; + int index = 0; + + BUG_ON(reserve && node->processed); + + while (next) { + cond_resched(); + while (1) { + if (next->processed && (reserve || next != node)) + break; + + num_bytes += btrfs_level_size(rc->extent_root, + next->level); + + if (list_empty(&next->upper)) + break; + + edge = list_entry(next->upper.next, + struct backref_edge, list[LOWER]); + edges[index++] = edge; + next = edge->node[UPPER]; + } + next = walk_down_backref(edges, &index); + } + return num_bytes; } -static void grab_path_buffers(struct btrfs_path *path, - struct backref_node *node, - struct backref_edge *edges[], int nr) +static int reserve_metadata_space(struct btrfs_trans_handle *trans, + struct reloc_control *rc, + struct backref_node *node) { - int i = 0; - while (1) { - drop_node_buffer(node); - node->eb = path->nodes[node->level]; - BUG_ON(!node->eb); - if (path->locks[node->level]) - node->locked = 1; - path->nodes[node->level] = NULL; - path->locks[node->level] = 0; - - if (i >= nr) - break; - - edges[i]->blockptr = node->eb->start; - node = edges[i]->node[UPPER]; - i++; + struct btrfs_root *root = rc->extent_root; + u64 num_bytes; + int ret; + u64 tmp; + + num_bytes = calcu_metadata_size(rc, node, 1) * 2; + + trans->block_rsv = rc->block_rsv; + rc->reserved_bytes += num_bytes; + ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, + BTRFS_RESERVE_FLUSH_ALL); + if (ret) { + if (ret == -EAGAIN) { + tmp = rc->extent_root->nodesize * + RELOCATION_RESERVED_NODES; + while (tmp <= rc->reserved_bytes) + tmp <<= 1; + /* + * only one thread can access block_rsv at this point, + * so we don't need hold lock to protect block_rsv. + * we expand more reservation size here to allow enough + * space for relocation and we will return eailer in + * enospc case. + */ + rc->block_rsv->size = tmp + rc->extent_root->nodesize * + RELOCATION_RESERVED_NODES; + } + return ret; } + + return 0; } /* @@ -1998,6 +2605,7 @@ static void grab_path_buffers(struct btrfs_path *path, * in that case this function just updates pointers. */ static int do_relocation(struct btrfs_trans_handle *trans, + struct reloc_control *rc, struct backref_node *node, struct btrfs_key *key, struct btrfs_path *path, int lowest) @@ -2010,7 +2618,6 @@ static int do_relocation(struct btrfs_trans_handle *trans, u32 blocksize; u64 bytenr; u64 generation; - int nr; int slot; int ret; int err = 0; @@ -2018,18 +2625,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, BUG_ON(lowest && node->eb); path->lowest_level = node->level + 1; + rc->backref_cache.path[node->level] = node; list_for_each_entry(edge, &node->upper, list[LOWER]) { cond_resched(); - if (node->eb && node->eb->start == edge->blockptr) - continue; upper = edge->node[UPPER]; - root = select_reloc_root(trans, upper, edges, &nr); - if (!root) - continue; - - if (upper->eb && !upper->locked) + root = select_reloc_root(trans, rc, upper, edges); + BUG_ON(!root); + + if (upper->eb && !upper->locked) { + if (!lowest) { + ret = btrfs_bin_search(upper->eb, key, + upper->level, &slot); + BUG_ON(ret); + bytenr = btrfs_node_blockptr(upper->eb, slot); + if (node->eb->start == bytenr) + goto next; + } drop_node_buffer(upper); + } if (!upper->eb) { ret = btrfs_search_slot(trans, root, key, path, 0, 1); @@ -2039,12 +2653,18 @@ static int do_relocation(struct btrfs_trans_handle *trans, } BUG_ON(ret > 0); - slot = path->slots[upper->level]; + if (!upper->eb) { + upper->eb = path->nodes[upper->level]; + path->nodes[upper->level] = NULL; + } else { + BUG_ON(upper->eb != path->nodes[upper->level]); + } - btrfs_unlock_up_safe(path, upper->level + 1); - grab_path_buffers(path, upper, edges, nr); + upper->locked = 1; + path->locks[upper->level] = 0; - btrfs_release_path(NULL, path); + slot = path->slots[upper->level]; + btrfs_release_path(path); } else { ret = btrfs_bin_search(upper->eb, key, upper->level, &slot); @@ -2052,32 +2672,34 @@ static int do_relocation(struct btrfs_trans_handle *trans, } bytenr = btrfs_node_blockptr(upper->eb, slot); - if (!lowest) { - if (node->eb->start == bytenr) { - btrfs_tree_unlock(upper->eb); - upper->locked = 0; - continue; - } + if (lowest) { + BUG_ON(bytenr != node->bytenr); } else { - BUG_ON(node->bytenr != bytenr); + if (node->eb->start == bytenr) + goto next; } blocksize = btrfs_level_size(root, node->level); generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, blocksize, generation); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + err = -EIO; + goto next; + } btrfs_tree_lock(eb); btrfs_set_lock_blocking(eb); if (!node->eb) { ret = btrfs_cow_block(trans, root, eb, upper->eb, slot, &eb); + btrfs_tree_unlock(eb); + free_extent_buffer(eb); if (ret < 0) { err = ret; - break; + goto next; } - btrfs_set_lock_blocking(eb); - node->eb = eb; - node->locked = 1; + BUG_ON(node->eb != eb); } else { btrfs_set_node_blockptr(upper->eb, slot, node->eb->start); @@ -2089,73 +2711,86 @@ static int do_relocation(struct btrfs_trans_handle *trans, node->eb->start, blocksize, upper->eb->start, btrfs_header_owner(upper->eb), - node->level, 0); + node->level, 0, 1); BUG_ON(ret); ret = btrfs_drop_subtree(trans, root, eb, upper->eb); BUG_ON(ret); } - if (!lowest) { - btrfs_tree_unlock(upper->eb); - upper->locked = 0; - } +next: + if (!upper->pending) + drop_node_buffer(upper); + else + unlock_node_buffer(upper); + if (err) + break; + } + + if (!err && node->pending) { + drop_node_buffer(node); + list_move_tail(&node->list, &rc->backref_cache.changed); + node->pending = 0; } + path->lowest_level = 0; + BUG_ON(err == -ENOSPC); return err; } static int link_to_upper(struct btrfs_trans_handle *trans, + struct reloc_control *rc, struct backref_node *node, struct btrfs_path *path) { struct btrfs_key key; - if (!node->eb || list_empty(&node->upper)) - return 0; btrfs_node_key_to_cpu(node->eb, &key, 0); - return do_relocation(trans, node, &key, path, 0); + return do_relocation(trans, rc, node, &key, path, 0); } static int finish_pending_nodes(struct btrfs_trans_handle *trans, - struct backref_cache *cache, - struct btrfs_path *path) + struct reloc_control *rc, + struct btrfs_path *path, int err) { + LIST_HEAD(list); + struct backref_cache *cache = &rc->backref_cache; struct backref_node *node; int level; int ret; - int err = 0; for (level = 0; level < BTRFS_MAX_LEVEL; level++) { while (!list_empty(&cache->pending[level])) { node = list_entry(cache->pending[level].next, - struct backref_node, lower); - BUG_ON(node->level != level); + struct backref_node, list); + list_move_tail(&node->list, &list); + BUG_ON(!node->pending); - ret = link_to_upper(trans, node, path); - if (ret < 0) - err = ret; - /* - * this remove the node from the pending list and - * may add some other nodes to the level + 1 - * pending list - */ - remove_backref_node(cache, node); + if (!err) { + ret = link_to_upper(trans, rc, node, path); + if (ret < 0) + err = ret; + } } + list_splice_init(&list, &cache->pending[level]); } - BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); return err; } static void mark_block_processed(struct reloc_control *rc, - struct backref_node *node) + u64 bytenr, u32 blocksize) +{ + set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, + EXTENT_DIRTY, GFP_NOFS); +} + +static void __mark_block_processed(struct reloc_control *rc, + struct backref_node *node) { u32 blocksize; if (node->level == 0 || in_block_group(node->bytenr, rc->block_group)) { blocksize = btrfs_level_size(rc->extent_root, node->level); - set_extent_bits(&rc->processed_blocks, node->bytenr, - node->bytenr + blocksize - 1, EXTENT_DIRTY, - GFP_NOFS); + mark_block_processed(rc, node->bytenr, blocksize); } node->processed = 1; } @@ -2178,7 +2813,7 @@ static void update_processed_blocks(struct reloc_control *rc, if (next->processed) break; - mark_block_processed(rc, next); + __mark_block_processed(rc, next); if (list_empty(&next->upper)) break; @@ -2201,138 +2836,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, return 0; } -/* - * check if there are any file extent pointers in the leaf point to - * data require processing - */ -static int check_file_extents(struct reloc_control *rc, - u64 bytenr, u32 blocksize, u64 ptr_gen) -{ - struct btrfs_key found_key; - struct btrfs_file_extent_item *fi; - struct extent_buffer *leaf; - u32 nritems; - int i; - int ret = 0; - - leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); - - nritems = btrfs_header_nritems(leaf); - for (i = 0; i < nritems; i++) { - cond_resched(); - btrfs_item_key_to_cpu(leaf, &found_key, i); - if (found_key.type != BTRFS_EXTENT_DATA_KEY) - continue; - fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, fi) == - BTRFS_FILE_EXTENT_INLINE) - continue; - bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); - if (bytenr == 0) - continue; - if (in_block_group(bytenr, rc->block_group)) { - ret = 1; - break; - } - } - free_extent_buffer(leaf); - return ret; -} - -/* - * scan child blocks of a given block to find blocks require processing - */ -static int add_child_blocks(struct btrfs_trans_handle *trans, - struct reloc_control *rc, - struct backref_node *node, - struct rb_root *blocks) -{ - struct tree_block *block; - struct rb_node *rb_node; - u64 bytenr; - u64 ptr_gen; - u32 blocksize; - u32 nritems; - int i; - int err = 0; - - nritems = btrfs_header_nritems(node->eb); - blocksize = btrfs_level_size(rc->extent_root, node->level - 1); - for (i = 0; i < nritems; i++) { - cond_resched(); - bytenr = btrfs_node_blockptr(node->eb, i); - ptr_gen = btrfs_node_ptr_generation(node->eb, i); - if (ptr_gen == trans->transid) - continue; - if (!in_block_group(bytenr, rc->block_group) && - (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) - continue; - if (tree_block_processed(bytenr, blocksize, rc)) - continue; - - readahead_tree_block(rc->extent_root, - bytenr, blocksize, ptr_gen); - } - - for (i = 0; i < nritems; i++) { - cond_resched(); - bytenr = btrfs_node_blockptr(node->eb, i); - ptr_gen = btrfs_node_ptr_generation(node->eb, i); - if (ptr_gen == trans->transid) - continue; - if (!in_block_group(bytenr, rc->block_group) && - (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) - continue; - if (tree_block_processed(bytenr, blocksize, rc)) - continue; - if (!in_block_group(bytenr, rc->block_group) && - !check_file_extents(rc, bytenr, blocksize, ptr_gen)) - continue; - - block = kmalloc(sizeof(*block), GFP_NOFS); - if (!block) { - err = -ENOMEM; - break; - } - block->bytenr = bytenr; - btrfs_node_key_to_cpu(node->eb, &block->key, i); - block->level = node->level - 1; - block->key_ready = 1; - rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); - BUG_ON(rb_node); - } - if (err) - free_block_list(blocks); - return err; -} - -/* - * find adjacent blocks require processing - */ -static noinline_for_stack -int add_adjacent_blocks(struct btrfs_trans_handle *trans, - struct reloc_control *rc, - struct backref_cache *cache, - struct rb_root *blocks, int level, - struct backref_node **upper) -{ - struct backref_node *node; - int ret = 0; - - WARN_ON(!list_empty(&cache->pending[level])); - - if (list_empty(&cache->pending[level + 1])) - return 1; - - node = list_entry(cache->pending[level + 1].next, - struct backref_node, lower); - if (node->eb) - ret = add_child_blocks(trans, rc, node, blocks); - - *upper = node; - return ret; -} - static int get_tree_block_key(struct reloc_control *rc, struct tree_block *block) { @@ -2341,6 +2844,10 @@ static int get_tree_block_key(struct reloc_control *rc, BUG_ON(block->key_ready); eb = read_tree_block(rc->extent_root, block->bytenr, block->key.objectid, block->key.offset); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } WARN_ON(btrfs_header_level(eb) != block->level); if (block->level == 0) btrfs_item_key_to_cpu(eb, &block->key, 0); @@ -2355,8 +2862,13 @@ static int reada_tree_block(struct reloc_control *rc, struct tree_block *block) { BUG_ON(block->key_ready); - readahead_tree_block(rc->extent_root, block->bytenr, - block->key.objectid, block->key.offset); + if (block->key.type == BTRFS_METADATA_ITEM_KEY) + readahead_tree_block(rc->extent_root, block->bytenr, + block->key.objectid, + rc->extent_root->leafsize); + else + readahead_tree_block(rc->extent_root, block->bytenr, + block->key.objectid, block->key.offset); return 0; } @@ -2370,40 +2882,48 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, struct btrfs_path *path) { struct btrfs_root *root; - int ret; + int ret = 0; + if (!node) + return 0; + + BUG_ON(node->processed); root = select_one_root(trans, node); - if (unlikely(!root)) { - rc->found_old_snapshot = 1; + if (root == ERR_PTR(-ENOENT)) { update_processed_blocks(rc, node); - return 0; + goto out; } - if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { - ret = do_relocation(trans, node, key, path, 1); - if (ret < 0) - goto out; - if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { - ret = replace_file_extents(trans, rc, root, - node->eb, NULL); - if (ret < 0) - goto out; - } - drop_node_buffer(node); - } else if (!root->ref_cows) { - path->lowest_level = node->level; - ret = btrfs_search_slot(trans, root, key, path, 0, 1); - btrfs_release_path(root, path); - if (ret < 0) + if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { + ret = reserve_metadata_space(trans, rc, node); + if (ret) goto out; - } else if (root != node->root) { - WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); } - update_processed_blocks(rc, node); - ret = 0; + if (root) { + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { + BUG_ON(node->new_bytenr); + BUG_ON(!list_empty(&node->list)); + btrfs_record_root_in_trans(trans, root); + root = root->reloc_root; + node->new_bytenr = root->node->start; + node->root = root; + list_add_tail(&node->list, &rc->backref_cache.changed); + } else { + path->lowest_level = node->level; + ret = btrfs_search_slot(trans, root, key, path, 0, 1); + btrfs_release_path(path); + if (ret > 0) + ret = 0; + } + if (!ret) + update_processed_blocks(rc, node); + } else { + ret = do_relocation(trans, rc, node, key, path, 1); + } out: - drop_node_buffer(node); + if (ret || node->level == 0 || node->cowonly) + remove_backref_node(&rc->backref_cache, node); return ret; } @@ -2414,34 +2934,22 @@ static noinline_for_stack int relocate_tree_blocks(struct btrfs_trans_handle *trans, struct reloc_control *rc, struct rb_root *blocks) { - struct backref_cache *cache; struct backref_node *node; struct btrfs_path *path; struct tree_block *block; struct rb_node *rb_node; - int level = -1; int ret; int err = 0; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - - cache = kmalloc(sizeof(*cache), GFP_NOFS); - if (!cache) { - btrfs_free_path(path); - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out_free_blocks; } - backref_cache_init(cache); - rb_node = rb_first(blocks); while (rb_node) { block = rb_entry(rb_node, struct tree_block, rb_node); - if (level == -1) - level = block->level; - else - BUG_ON(level != block->level); if (!block->key_ready) reada_tree_block(rc, block); rb_node = rb_next(rb_node); @@ -2450,8 +2958,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, rb_node = rb_first(blocks); while (rb_node) { block = rb_entry(rb_node, struct tree_block, rb_node); - if (!block->key_ready) - get_tree_block_key(rc, block); + if (!block->key_ready) { + err = get_tree_block_key(rc, block); + if (err) + goto out_free_path; + } rb_node = rb_next(rb_node); } @@ -2459,7 +2970,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, while (rb_node) { block = rb_entry(rb_node, struct tree_block, rb_node); - node = build_backref_tree(rc, cache, &block->key, + node = build_backref_tree(rc, &block->key, block->level, block->bytenr); if (IS_ERR(node)) { err = PTR_ERR(node); @@ -2469,79 +2980,64 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, ret = relocate_tree_block(trans, rc, node, &block->key, path); if (ret < 0) { - err = ret; + if (ret != -EAGAIN || rb_node == rb_first(blocks)) + err = ret; goto out; } - remove_backref_node(cache, node); rb_node = rb_next(rb_node); } +out: + err = finish_pending_nodes(trans, rc, path, err); - if (level > 0) - goto out; - +out_free_path: + btrfs_free_path(path); +out_free_blocks: free_block_list(blocks); + return err; +} - /* - * now backrefs of some upper level tree blocks have been cached, - * try relocating blocks referenced by these upper level blocks. - */ - while (1) { - struct backref_node *upper = NULL; - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) - break; - - ret = add_adjacent_blocks(trans, rc, cache, blocks, level, - &upper); - if (ret < 0) - err = ret; - if (ret != 0) - break; +static noinline_for_stack +int prealloc_file_extent_cluster(struct inode *inode, + struct file_extent_cluster *cluster) +{ + u64 alloc_hint = 0; + u64 start; + u64 end; + u64 offset = BTRFS_I(inode)->index_cnt; + u64 num_bytes; + int nr = 0; + int ret = 0; - rb_node = rb_first(blocks); - while (rb_node) { - block = rb_entry(rb_node, struct tree_block, rb_node); - if (trans->transaction->in_commit || - trans->transaction->delayed_refs.flushing) - goto out; - BUG_ON(!block->key_ready); - node = build_backref_tree(rc, cache, &block->key, - level, block->bytenr); - if (IS_ERR(node)) { - err = PTR_ERR(node); - goto out; - } + BUG_ON(cluster->start != cluster->boundary[0]); + mutex_lock(&inode->i_mutex); - ret = relocate_tree_block(trans, rc, node, - &block->key, path); - if (ret < 0) { - err = ret; - goto out; - } - remove_backref_node(cache, node); - rb_node = rb_next(rb_node); - } - free_block_list(blocks); + ret = btrfs_check_data_free_space(inode, cluster->end + + 1 - cluster->start); + if (ret) + goto out; - if (upper) { - ret = link_to_upper(trans, upper, path); - if (ret < 0) { - err = ret; - break; - } - remove_backref_node(cache, upper); - } + while (nr < cluster->nr) { + start = cluster->boundary[nr] - offset; + if (nr + 1 < cluster->nr) + end = cluster->boundary[nr + 1] - 1 - offset; + else + end = cluster->end - offset; + + lock_extent(&BTRFS_I(inode)->io_tree, start, end); + num_bytes = end + 1 - start; + ret = btrfs_prealloc_file_range(inode, 0, start, + num_bytes, num_bytes, + end + 1, &alloc_hint); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end); + if (ret) + break; + nr++; } + btrfs_free_reserved_data_space(inode, cluster->end + + 1 - cluster->start); out: - free_block_list(blocks); - - ret = finish_pending_nodes(trans, cache, path); - if (ret < 0) - err = ret; - - kfree(cache); - btrfs_free_path(path); - return err; + mutex_unlock(&inode->i_mutex); + return ret; } static noinline_for_stack @@ -2553,7 +3049,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, struct extent_map *em; int ret = 0; - em = alloc_extent_map(GFP_NOFS); + em = alloc_extent_map(); if (!em) return -ENOMEM; @@ -2564,10 +3060,10 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, em->bdev = root->fs_info->fs_devices->latest_bdev; set_bit(EXTENT_FLAG_PINNED, &em->flags); - lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + lock_extent(&BTRFS_I(inode)->io_tree, start, end); while (1) { write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + ret = add_extent_mapping(em_tree, em, 0); write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); @@ -2575,7 +3071,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, } btrfs_drop_extent_cache(inode, start, end, 0); } - unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); + unlock_extent(&BTRFS_I(inode)->io_tree, start, end); return ret; } @@ -2587,9 +3083,9 @@ static int relocate_file_extent_cluster(struct inode *inode, u64 offset = BTRFS_I(inode)->index_cnt; unsigned long index; unsigned long last_index; - unsigned int dirty_page = 0; struct page *page; struct file_ra_state *ra; + gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); int nr = 0; int ret = 0; @@ -2600,30 +3096,36 @@ static int relocate_file_extent_cluster(struct inode *inode, if (!ra) return -ENOMEM; - index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; - last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; + ret = prealloc_file_extent_cluster(inode, cluster); + if (ret) + goto out; - mutex_lock(&inode->i_mutex); + file_ra_state_init(ra, inode->i_mapping); - i_size_write(inode, cluster->end + 1 - offset); ret = setup_extent_mapping(inode, cluster->start - offset, cluster->end - offset, cluster->start); if (ret) - goto out_unlock; - - file_ra_state_init(ra, inode->i_mapping); + goto out; - WARN_ON(cluster->start != cluster->boundary[0]); + index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; + last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; while (index <= last_index) { + ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); + if (ret) + goto out; + page = find_lock_page(inode->i_mapping, index); if (!page) { page_cache_sync_readahead(inode->i_mapping, ra, NULL, index, last_index + 1 - index); - page = grab_cache_page(inode->i_mapping, index); + page = find_or_create_page(inode->i_mapping, index, + mask); if (!page) { + btrfs_delalloc_release_metadata(inode, + PAGE_CACHE_SIZE); ret = -ENOMEM; - goto out_unlock; + goto out; } } @@ -2639,16 +3141,17 @@ static int relocate_file_extent_cluster(struct inode *inode, if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); + btrfs_delalloc_release_metadata(inode, + PAGE_CACHE_SIZE); ret = -EIO; - goto out_unlock; + goto out; } } - page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_start = page_offset(page); page_end = page_start + PAGE_CACHE_SIZE - 1; - lock_extent(&BTRFS_I(inode)->io_tree, - page_start, page_end, GFP_NOFS); + lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); set_page_extent_mapped(page); @@ -2659,31 +3162,21 @@ static int relocate_file_extent_cluster(struct inode *inode, EXTENT_BOUNDARY, GFP_NOFS); nr++; } - btrfs_set_extent_delalloc(inode, page_start, page_end); + btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); set_page_dirty(page); - dirty_page++; unlock_extent(&BTRFS_I(inode)->io_tree, - page_start, page_end, GFP_NOFS); + page_start, page_end); unlock_page(page); page_cache_release(page); index++; - if (nr < cluster->nr && - page_end + 1 + offset == cluster->boundary[nr]) { - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - dirty_page); - dirty_page = 0; - } - } - if (dirty_page) { - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - dirty_page); + balance_dirty_pages_ratelimited(inode->i_mapping); + btrfs_throttle(BTRFS_I(inode)->root); } WARN_ON(nr != cluster->nr); -out_unlock: - mutex_unlock(&inode->i_mutex); +out: kfree(ra); return ret; } @@ -2776,17 +3269,22 @@ static int add_tree_block(struct reloc_control *rc, struct rb_node *rb_node; u32 item_size; int level = -1; - int generation; + u64 generation; eb = path->nodes[0]; item_size = btrfs_item_size_nr(eb, path->slots[0]); - if (item_size >= sizeof(*ei) + sizeof(*bi)) { + if (extent_key->type == BTRFS_METADATA_ITEM_KEY || + item_size >= sizeof(*ei) + sizeof(*bi)) { ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); - bi = (struct btrfs_tree_block_info *)(ei + 1); + if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) { + bi = (struct btrfs_tree_block_info *)(ei + 1); + level = btrfs_tree_block_level(eb, bi); + } else { + level = (int)extent_key->offset; + } generation = btrfs_extent_generation(eb, ei); - level = btrfs_tree_block_level(eb, bi); } else { #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 u64 ref_owner; @@ -2795,6 +3293,8 @@ static int add_tree_block(struct reloc_control *rc, BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); ret = get_ref_objectid_v0(rc, path, extent_key, &ref_owner, NULL); + if (ret < 0) + return ret; BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); level = (int)ref_owner; /* FIXME: get real generation */ @@ -2804,7 +3304,7 @@ static int add_tree_block(struct reloc_control *rc, #endif } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); BUG_ON(level == -1); @@ -2813,13 +3313,14 @@ static int add_tree_block(struct reloc_control *rc, return -ENOMEM; block->bytenr = extent_key->objectid; - block->key.objectid = extent_key->offset; + block->key.objectid = rc->extent_root->leafsize; block->key.offset = generation; block->level = level; block->key_ready = 0; rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); - BUG_ON(rb_node); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, block->bytenr); return 0; } @@ -2834,6 +3335,8 @@ static int __add_tree_block(struct reloc_control *rc, struct btrfs_path *path; struct btrfs_key key; int ret; + bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, + SKINNY_METADATA); if (tree_block_processed(bytenr, blocksize, rc)) return 0; @@ -2844,19 +3347,42 @@ static int __add_tree_block(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = blocksize; + if (skinny) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = blocksize; + } path->search_commit_root = 1; path->skip_locking = 1; ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); if (ret < 0) goto out; + + if (ret > 0 && skinny) { + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + (key.type == BTRFS_METADATA_ITEM_KEY || + (key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == blocksize))) + ret = 0; + } + + if (ret) { + skinny = false; + btrfs_release_path(path); + goto again; + } + } BUG_ON(ret); - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ret = add_tree_block(rc, &key, path, blocks); out: btrfs_free_path(path); @@ -2869,9 +3395,6 @@ out: static int block_use_full_backref(struct reloc_control *rc, struct extent_buffer *eb) { - struct btrfs_path *path; - struct btrfs_extent_item *ei; - struct btrfs_key key; u64 flags; int ret; @@ -2879,28 +3402,58 @@ static int block_use_full_backref(struct reloc_control *rc, btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) return 1; - path = btrfs_alloc_path(); - BUG_ON(!path); - - key.objectid = eb->start; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = eb->len; - - path->search_commit_root = 1; - path->skip_locking = 1; - ret = btrfs_search_slot(NULL, rc->extent_root, - &key, path, 0, 0); + ret = btrfs_lookup_extent_info(NULL, rc->extent_root, + eb->start, btrfs_header_level(eb), 1, + NULL, &flags); BUG_ON(ret); - ei = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_item); - flags = btrfs_extent_flags(path->nodes[0], ei); - BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) ret = 1; else ret = 0; - btrfs_free_path(path); + return ret; +} + +static int delete_block_group_cache(struct btrfs_fs_info *fs_info, + struct inode *inode, u64 ino) +{ + struct btrfs_key key; + struct btrfs_root *root = fs_info->tree_root; + struct btrfs_trans_handle *trans; + int ret = 0; + + if (inode) + goto truncate; + + key.objectid = ino; + key.type = BTRFS_INODE_ITEM_KEY; + key.offset = 0; + + inode = btrfs_iget(fs_info->sb, &key, root, NULL); + if (IS_ERR(inode) || is_bad_inode(inode)) { + if (!IS_ERR(inode)) + iput(inode); + return -ENOENT; + } + +truncate: + ret = btrfs_check_trunc_cache_free_space(root, + &fs_info->global_block_rsv); + if (ret) + goto out; + + trans = btrfs_join_transaction(root); + if (IS_ERR(trans)) { + ret = PTR_ERR(trans); + goto out; + } + + ret = btrfs_truncate_free_space_cache(root, trans, inode); + + btrfs_end_transaction(trans, root); + btrfs_btree_balance_dirty(root); +out: + iput(inode); return ret; } @@ -2930,15 +3483,28 @@ static int find_data_references(struct reloc_control *rc, int counted; int ret; - path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; - ref_root = btrfs_extent_data_ref_root(leaf, ref); ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); ref_offset = btrfs_extent_data_ref_offset(leaf, ref); ref_count = btrfs_extent_data_ref_count(leaf, ref); + /* + * This is an extent belonging to the free space cache, lets just delete + * it and redo the search. + */ + if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { + ret = delete_block_group_cache(rc->extent_root->fs_info, + NULL, ref_objectid); + if (ret != -ENOENT) + return ret; + ret = 0; + } + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + path->reada = 1; + root = read_fs_root(rc->extent_root->fs_info, ref_root); if (IS_ERR(root)) { err = PTR_ERR(root); @@ -2946,8 +3512,11 @@ static int find_data_references(struct reloc_control *rc, } key.objectid = ref_objectid; - key.offset = ref_offset; key.type = BTRFS_EXTENT_DATA_KEY; + if (ref_offset > ((u64)-1 << 32)) + key.offset = 0; + else + key.offset = ref_offset; path->search_commit_root = 1; path->skip_locking = 1; @@ -2982,10 +3551,8 @@ static int find_data_references(struct reloc_control *rc, err = ret; goto out; } - if (ret > 0) { - WARN_ON(1); + if (WARN_ON(ret > 0)) goto out; - } leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -3005,11 +3572,9 @@ static int find_data_references(struct reloc_control *rc, } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid != ref_objectid || - key.type != BTRFS_EXTENT_DATA_KEY) { - WARN_ON(1); + if (WARN_ON(key.objectid != ref_objectid || + key.type != BTRFS_EXTENT_DATA_KEY)) break; - } fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -3043,7 +3608,9 @@ static int find_data_references(struct reloc_control *rc, block->key_ready = 1; rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); - BUG_ON(rb_node); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, + block->bytenr); } if (counted) added = 1; @@ -3059,7 +3626,7 @@ out: } /* - * hepler to find all tree blocks that reference a given data extent + * helper to find all tree blocks that reference a given data extent */ static noinline_for_stack int add_data_references(struct reloc_control *rc, @@ -3073,22 +3640,10 @@ int add_data_references(struct reloc_control *rc, struct btrfs_extent_inline_ref *iref; unsigned long ptr; unsigned long end; - u32 blocksize; - int ret; + u32 blocksize = btrfs_level_size(rc->extent_root, 0); + int ret = 0; int err = 0; - ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, - extent_key->offset); - BUG_ON(ret < 0); - if (ret > 0) { - /* the relocated data is fragmented */ - rc->extents_skipped++; - btrfs_release_path(rc->extent_root, path); - return 0; - } - - blocksize = btrfs_level_size(rc->extent_root, 0); - eb = path->nodes[0]; ptr = btrfs_item_ptr_offset(eb, path->slots[0]); end = ptr + btrfs_item_size_nr(eb, path->slots[0]); @@ -3113,6 +3668,10 @@ int add_data_references(struct reloc_control *rc, } else { BUG(); } + if (ret) { + err = ret; + goto out; + } ptr += btrfs_extent_inline_ref_size(key.type); } WARN_ON(ptr > end); @@ -3158,18 +3717,20 @@ int add_data_references(struct reloc_control *rc, } path->slots[0]++; } - btrfs_release_path(rc->extent_root, path); +out: + btrfs_release_path(path); if (err) free_block_list(blocks); return err; } /* - * hepler to find next unprocessed extent + * helper to find next unprocessed extent */ static noinline_for_stack int find_next_extent(struct btrfs_trans_handle *trans, - struct reloc_control *rc, struct btrfs_path *path) + struct reloc_control *rc, struct btrfs_path *path, + struct btrfs_key *extent_key) { struct btrfs_key key; struct extent_buffer *leaf; @@ -3209,42 +3770,62 @@ next: break; } - if (key.type != BTRFS_EXTENT_ITEM_KEY || + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) { + path->slots[0]++; + goto next; + } + + if (key.type == BTRFS_EXTENT_ITEM_KEY && key.objectid + key.offset <= rc->search_start) { path->slots[0]++; goto next; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.objectid + rc->extent_root->leafsize <= + rc->search_start) { + path->slots[0]++; + goto next; + } + ret = find_first_extent_bit(&rc->processed_blocks, key.objectid, &start, &end, - EXTENT_DIRTY); + EXTENT_DIRTY, NULL); if (ret == 0 && start <= key.objectid) { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); rc->search_start = end + 1; } else { - rc->search_start = key.objectid + key.offset; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + rc->search_start = key.objectid + key.offset; + else + rc->search_start = key.objectid + + rc->extent_root->leafsize; + memcpy(extent_key, &key, sizeof(key)); return 0; } } - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); return ret; } static void set_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = rc; - mutex_unlock(&fs_info->trans_mutex); + mutex_unlock(&fs_info->reloc_mutex); } static void unset_reloc_control(struct reloc_control *rc) { struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; - mutex_lock(&fs_info->trans_mutex); + + mutex_lock(&fs_info->reloc_mutex); fs_info->reloc_ctl = NULL; - mutex_unlock(&fs_info->trans_mutex); + mutex_unlock(&fs_info->reloc_mutex); } static int check_extent_flags(u64 flags) @@ -3261,46 +3842,89 @@ static int check_extent_flags(u64 flags) return 0; } +static noinline_for_stack +int prepare_to_relocate(struct reloc_control *rc) +{ + struct btrfs_trans_handle *trans; + + rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, + BTRFS_BLOCK_RSV_TEMP); + if (!rc->block_rsv) + return -ENOMEM; + + memset(&rc->cluster, 0, sizeof(rc->cluster)); + rc->search_start = rc->block_group->key.objectid; + rc->extents_found = 0; + rc->nodes_relocated = 0; + rc->merging_rsv_size = 0; + rc->reserved_bytes = 0; + rc->block_rsv->size = rc->extent_root->nodesize * + RELOCATION_RESERVED_NODES; + + rc->create_reloc_tree = 1; + set_reloc_control(rc); + + trans = btrfs_join_transaction(rc->extent_root); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + /* + * extent tree is not a ref_cow tree and has no reloc_root to + * cleanup. And callers are responsible to free the above + * block rsv. + */ + return PTR_ERR(trans); + } + btrfs_commit_transaction(trans, rc->extent_root); + return 0; +} static noinline_for_stack int relocate_block_group(struct reloc_control *rc) { struct rb_root blocks = RB_ROOT; struct btrfs_key key; - struct file_extent_cluster *cluster; struct btrfs_trans_handle *trans = NULL; struct btrfs_path *path; struct btrfs_extent_item *ei; - unsigned long nr; u64 flags; u32 item_size; int ret; int err = 0; - - cluster = kzalloc(sizeof(*cluster), GFP_NOFS); - if (!cluster) - return -ENOMEM; + int progress = 0; path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = 1; - rc->extents_found = 0; - rc->extents_skipped = 0; - - rc->search_start = rc->block_group->key.objectid; - clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, - GFP_NOFS); - - rc->create_reloc_root = 1; - set_reloc_control(rc); - - trans = btrfs_start_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); + ret = prepare_to_relocate(rc); + if (ret) { + err = ret; + goto out_free; + } while (1) { - trans = btrfs_start_transaction(rc->extent_root, 1); + rc->reserved_bytes = 0; + ret = btrfs_block_rsv_refill(rc->extent_root, + rc->block_rsv, rc->block_rsv->size, + BTRFS_RESERVE_FLUSH_ALL); + if (ret) { + err = ret; + break; + } + progress++; + trans = btrfs_start_transaction(rc->extent_root, 0); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + break; + } +restart: + if (update_backref_cache(trans, &rc->backref_cache)) { + btrfs_end_transaction(trans, rc->extent_root); + continue; + } - ret = find_next_extent(trans, rc, path); + ret = find_next_extent(trans, rc, path, &key); if (ret < 0) err = ret; if (ret != 0) @@ -3310,9 +3934,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) ei = btrfs_item_ptr(path->nodes[0], path->slots[0], struct btrfs_extent_item); - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); - item_size = btrfs_item_size_nr(path->nodes[0], - path->slots[0]); + item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); if (item_size >= sizeof(*ei)) { flags = btrfs_extent_flags(path->nodes[0], ei); ret = check_extent_flags(flags); @@ -3333,7 +3955,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) flags = BTRFS_EXTENT_FLAG_DATA; if (path_change) { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); path->search_commit_root = 1; path->skip_locking = 1; @@ -3353,73 +3975,99 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { ret = add_tree_block(rc, &key, path, &blocks); } else if (rc->stage == UPDATE_DATA_PTRS && - (flags & BTRFS_EXTENT_FLAG_DATA)) { + (flags & BTRFS_EXTENT_FLAG_DATA)) { ret = add_data_references(rc, &key, path, &blocks); } else { - btrfs_release_path(rc->extent_root, path); + btrfs_release_path(path); ret = 0; } if (ret < 0) { - err = 0; + err = ret; break; } if (!RB_EMPTY_ROOT(&blocks)) { ret = relocate_tree_blocks(trans, rc, &blocks); if (ret < 0) { - err = ret; - break; + /* + * if we fail to relocate tree blocks, force to update + * backref cache when committing transaction. + */ + rc->backref_cache.last_trans = trans->transid - 1; + + if (ret != -EAGAIN) { + err = ret; + break; + } + rc->extents_found--; + rc->search_start = key.objectid; } } - nr = trans->blocks_used; - btrfs_end_transaction(trans, rc->extent_root); + btrfs_end_transaction_throttle(trans, rc->extent_root); + btrfs_btree_balance_dirty(rc->extent_root); trans = NULL; - btrfs_btree_balance_dirty(rc->extent_root, nr); if (rc->stage == MOVE_DATA_EXTENTS && (flags & BTRFS_EXTENT_FLAG_DATA)) { rc->found_file_extent = 1; ret = relocate_data_extent(rc->data_inode, - &key, cluster); + &key, &rc->cluster); if (ret < 0) { err = ret; break; } } } - btrfs_free_path(path); + if (trans && progress && err == -ENOSPC) { + ret = btrfs_force_chunk_alloc(trans, rc->extent_root, + rc->block_group->flags); + if (ret == 0) { + err = 0; + progress = 0; + goto restart; + } + } + + btrfs_release_path(path); + clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, + GFP_NOFS); if (trans) { - nr = trans->blocks_used; - btrfs_end_transaction(trans, rc->extent_root); - btrfs_btree_balance_dirty(rc->extent_root, nr); + btrfs_end_transaction_throttle(trans, rc->extent_root); + btrfs_btree_balance_dirty(rc->extent_root); } if (!err) { - ret = relocate_file_extent_cluster(rc->data_inode, cluster); + ret = relocate_file_extent_cluster(rc->data_inode, + &rc->cluster); if (ret < 0) err = ret; } - kfree(cluster); + rc->create_reloc_tree = 0; + set_reloc_control(rc); - rc->create_reloc_root = 0; - smp_mb(); + backref_cache_cleanup(&rc->backref_cache); + btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); - if (rc->extents_found > 0) { - trans = btrfs_start_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); - } + err = prepare_to_merge(rc, err); merge_reloc_roots(rc); + rc->merge_reloc_tree = 0; unset_reloc_control(rc); + btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); /* get rid of pinned extents */ - trans = btrfs_start_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); - + trans = btrfs_join_transaction(rc->extent_root); + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + btrfs_commit_transaction(trans, rc->extent_root); +out_free: + btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); + btrfs_free_path(path); return err; } @@ -3445,9 +4093,10 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, btrfs_set_inode_generation(leaf, item, 1); btrfs_set_inode_size(leaf, item, 0); btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); - btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); + btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | + BTRFS_INODE_PREALLOC); btrfs_mark_buffer_dirty(leaf); - btrfs_release_path(root, path); + btrfs_release_path(path); out: btrfs_free_path(path); return ret; @@ -3457,14 +4106,14 @@ out: * helper to create inode for data relocation. * the inode is in data relocation tree and its link count is 0 */ -static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, - struct btrfs_block_group_cache *group) +static noinline_for_stack +struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, + struct btrfs_block_group_cache *group) { struct inode *inode = NULL; struct btrfs_trans_handle *trans; struct btrfs_root *root; struct btrfs_key key; - unsigned long nr; u64 objectid = BTRFS_FIRST_FREE_OBJECTID; int err = 0; @@ -3472,10 +4121,11 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, if (IS_ERR(root)) return ERR_CAST(root); - trans = btrfs_start_transaction(root, 1); - BUG_ON(!trans); + trans = btrfs_start_transaction(root, 6); + if (IS_ERR(trans)) + return ERR_CAST(trans); - err = btrfs_find_free_objectid(trans, root, objectid, &objectid); + err = btrfs_find_free_objectid(root, &objectid); if (err) goto out; @@ -3485,16 +4135,14 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, key.objectid = objectid; key.type = BTRFS_INODE_ITEM_KEY; key.offset = 0; - inode = btrfs_iget(root->fs_info->sb, &key, root); + inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); BTRFS_I(inode)->index_cnt = group->key.objectid; err = btrfs_orphan_add(trans, inode); out: - nr = trans->blocks_used; btrfs_end_transaction(trans, root); - - btrfs_btree_balance_dirty(root, nr); + btrfs_btree_balance_dirty(root); if (err) { if (inode) iput(inode); @@ -3503,6 +4151,22 @@ out: return inode; } +static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) +{ + struct reloc_control *rc; + + rc = kzalloc(sizeof(*rc), GFP_NOFS); + if (!rc) + return NULL; + + INIT_LIST_HEAD(&rc->reloc_roots); + backref_cache_init(&rc->backref_cache); + mapping_tree_init(&rc->reloc_root_tree); + extent_io_tree_init(&rc->processed_blocks, + fs_info->btree_inode->i_mapping); + return rc; +} + /* * function to relocate all extents in a block group. */ @@ -3510,25 +4174,49 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) { struct btrfs_fs_info *fs_info = extent_root->fs_info; struct reloc_control *rc; + struct inode *inode; + struct btrfs_path *path; int ret; + int rw = 0; int err = 0; - rc = kzalloc(sizeof(*rc), GFP_NOFS); + rc = alloc_reloc_control(fs_info); if (!rc) return -ENOMEM; - mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); - INIT_LIST_HEAD(&rc->reloc_roots); + rc->extent_root = extent_root; rc->block_group = btrfs_lookup_block_group(fs_info, group_start); BUG_ON(!rc->block_group); - btrfs_init_workers(&rc->workers, "relocate", - fs_info->thread_pool_size, NULL); + if (!rc->block_group->ro) { + ret = btrfs_set_block_group_ro(extent_root, rc->block_group); + if (ret) { + err = ret; + goto out; + } + rw = 1; + } + + path = btrfs_alloc_path(); + if (!path) { + err = -ENOMEM; + goto out; + } - rc->extent_root = extent_root; - btrfs_prepare_block_group_relocation(extent_root, rc->block_group); + inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, + path); + btrfs_free_path(path); + + if (!IS_ERR(inode)) + ret = delete_block_group_cache(fs_info, inode, 0); + else + ret = PTR_ERR(inode); + + if (ret && ret != -ENOENT) { + err = ret; + goto out; + } rc->data_inode = create_reloc_inode(fs_info, rc->block_group); if (IS_ERR(rc->data_inode)) { @@ -3537,65 +4225,51 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) goto out; } - printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", - (unsigned long long)rc->block_group->key.objectid, - (unsigned long long)rc->block_group->flags); + btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", + rc->block_group->key.objectid, rc->block_group->flags); - btrfs_start_delalloc_inodes(fs_info->tree_root, 0); - btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); + ret = btrfs_start_delalloc_roots(fs_info, 0, -1); + if (ret < 0) { + err = ret; + goto out; + } + btrfs_wait_ordered_roots(fs_info, -1); while (1) { - rc->extents_found = 0; - rc->extents_skipped = 0; - mutex_lock(&fs_info->cleaner_mutex); - - btrfs_clean_old_snapshots(fs_info->tree_root); ret = relocate_block_group(rc); - mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { err = ret; - break; + goto out; } if (rc->extents_found == 0) break; - printk(KERN_INFO "btrfs: found %llu extents\n", - (unsigned long long)rc->extents_found); + btrfs_info(extent_root->fs_info, "found %llu extents", + rc->extents_found); if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { - btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); + ret = btrfs_wait_ordered_range(rc->data_inode, 0, + (u64)-1); + if (ret) { + err = ret; + goto out; + } invalidate_mapping_pages(rc->data_inode->i_mapping, 0, -1); rc->stage = UPDATE_DATA_PTRS; - } else if (rc->stage == UPDATE_DATA_PTRS && - rc->extents_skipped >= rc->extents_found) { - iput(rc->data_inode); - rc->data_inode = create_reloc_inode(fs_info, - rc->block_group); - if (IS_ERR(rc->data_inode)) { - err = PTR_ERR(rc->data_inode); - rc->data_inode = NULL; - break; - } - rc->stage = MOVE_DATA_EXTENTS; - rc->found_file_extent = 0; } } - filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, - rc->block_group->key.objectid, - rc->block_group->key.objectid + - rc->block_group->key.offset - 1); - WARN_ON(rc->block_group->pinned > 0); WARN_ON(rc->block_group->reserved > 0); WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); out: + if (err && rw) + btrfs_set_block_group_rw(extent_root, rc->block_group); iput(rc->data_inode); - btrfs_stop_workers(&rc->workers); btrfs_put_block_group(rc->block_group); kfree(rc); return err; @@ -3604,9 +4278,11 @@ out: static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) { struct btrfs_trans_handle *trans; - int ret; + int ret, err; - trans = btrfs_start_transaction(root->fs_info->tree_root, 1); + trans = btrfs_start_transaction(root->fs_info->tree_root, 0); + if (IS_ERR(trans)) + return PTR_ERR(trans); memset(&root->root_item.drop_progress, 0, sizeof(root->root_item.drop_progress)); @@ -3614,11 +4290,11 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) btrfs_set_root_refs(&root->root_item, 0); ret = btrfs_update_root(trans, root->fs_info->tree_root, &root->root_key, &root->root_item); - BUG_ON(ret); - ret = btrfs_end_transaction(trans, root->fs_info->tree_root); - BUG_ON(ret); - return 0; + err = btrfs_end_transaction(trans, root->fs_info->tree_root); + if (err) + return err; + return ret; } /* @@ -3643,6 +4319,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) path = btrfs_alloc_path(); if (!path) return -ENOMEM; + path->reada = -1; key.objectid = BTRFS_TREE_RELOC_OBJECTID; key.type = BTRFS_ROOT_ITEM_KEY; @@ -3662,13 +4339,13 @@ int btrfs_recover_relocation(struct btrfs_root *root) } leaf = path->nodes[0]; btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - btrfs_release_path(root->fs_info->tree_root, path); + btrfs_release_path(path); if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || key.type != BTRFS_ROOT_ITEM_KEY) break; - reloc_root = btrfs_read_fs_root_no_radix(root, &key); + reloc_root = btrfs_read_fs_root(root, &key); if (IS_ERR(reloc_root)) { err = PTR_ERR(reloc_root); goto out; @@ -3685,7 +4362,11 @@ int btrfs_recover_relocation(struct btrfs_root *root) err = ret; goto out; } - mark_garbage_root(reloc_root); + ret = mark_garbage_root(reloc_root); + if (ret < 0) { + err = ret; + goto out; + } } } @@ -3694,25 +4375,30 @@ int btrfs_recover_relocation(struct btrfs_root *root) key.offset--; } - btrfs_release_path(root->fs_info->tree_root, path); + btrfs_release_path(path); if (list_empty(&reloc_roots)) goto out; - rc = kzalloc(sizeof(*rc), GFP_NOFS); + rc = alloc_reloc_control(root->fs_info); if (!rc) { err = -ENOMEM; goto out; } - mapping_tree_init(&rc->reloc_root_tree); - INIT_LIST_HEAD(&rc->reloc_roots); - btrfs_init_workers(&rc->workers, "relocate", - root->fs_info->thread_pool_size, NULL); rc->extent_root = root->fs_info->extent_root; set_reloc_control(rc); + trans = btrfs_join_transaction(rc->extent_root); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + err = PTR_ERR(trans); + goto out_free; + } + + rc->merge_reloc_tree = 1; + while (!list_empty(&reloc_roots)) { reloc_root = list_entry(reloc_roots.next, struct btrfs_root, root_list); @@ -3726,34 +4412,35 @@ int btrfs_recover_relocation(struct btrfs_root *root) fs_root = read_fs_root(root->fs_info, reloc_root->root_key.offset); - BUG_ON(IS_ERR(fs_root)); + if (IS_ERR(fs_root)) { + err = PTR_ERR(fs_root); + goto out_free; + } - __add_reloc_root(reloc_root); + err = __add_reloc_root(reloc_root); + BUG_ON(err < 0); /* -ENOMEM or logic error */ fs_root->reloc_root = reloc_root; } - trans = btrfs_start_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); + err = btrfs_commit_transaction(trans, rc->extent_root); + if (err) + goto out_free; merge_reloc_roots(rc); unset_reloc_control(rc); - trans = btrfs_start_transaction(rc->extent_root, 1); - btrfs_commit_transaction(trans, rc->extent_root); + trans = btrfs_join_transaction(rc->extent_root); + if (IS_ERR(trans)) + err = PTR_ERR(trans); + else + err = btrfs_commit_transaction(trans, rc->extent_root); +out_free: + kfree(rc); out: - if (rc) { - btrfs_stop_workers(&rc->workers); - kfree(rc); - } - while (!list_empty(&reloc_roots)) { - reloc_root = list_entry(reloc_roots.next, - struct btrfs_root, root_list); - list_del(&reloc_root->root_list); - free_extent_buffer(reloc_root->node); - free_extent_buffer(reloc_root->commit_root); - kfree(reloc_root); - } + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + btrfs_free_path(path); if (err == 0) { @@ -3762,7 +4449,8 @@ out: BTRFS_DATA_RELOC_TREE_OBJECTID); if (IS_ERR(fs_root)) err = PTR_ERR(fs_root); - btrfs_orphan_cleanup(fs_root); + else + err = btrfs_orphan_cleanup(fs_root); } return err; } @@ -3776,12 +4464,11 @@ out: int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) { struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; struct btrfs_ordered_extent *ordered; struct btrfs_root *root = BTRFS_I(inode)->root; - size_t offset; int ret; u64 disk_bytenr; + u64 new_bytenr; LIST_HEAD(list); ordered = btrfs_lookup_ordered_extent(inode, file_pos); @@ -3789,24 +4476,166 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, - disk_bytenr + len - 1, &list); + disk_bytenr + len - 1, &list, 0); + if (ret) + goto out; while (!list_empty(&list)) { sums = list_entry(list.next, struct btrfs_ordered_sum, list); list_del_init(&sums->list); - sector_sum = sums->sums; - sums->bytenr = ordered->start; - - offset = 0; - while (offset < sums->len) { - sector_sum->bytenr += ordered->start - disk_bytenr; - sector_sum++; - offset += root->sectorsize; - } + /* + * We need to offset the new_bytenr based on where the csum is. + * We need to do this because we will read in entire prealloc + * extents but we may have written to say the middle of the + * prealloc extent, so we need to make sure the csum goes with + * the right disk offset. + * + * We can do this because the data reloc inode refers strictly + * to the on disk bytes, so we don't have to worry about + * disk_len vs real len like with real inodes since it's all + * disk length. + */ + new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); + sums->bytenr = new_bytenr; btrfs_add_ordered_sum(inode, ordered, sums); } +out: btrfs_put_ordered_extent(ordered); - return 0; + return ret; +} + +int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *cow) +{ + struct reloc_control *rc; + struct backref_node *node; + int first_cow = 0; + int level; + int ret = 0; + + rc = root->fs_info->reloc_ctl; + if (!rc) + return 0; + + BUG_ON(rc->stage == UPDATE_DATA_PTRS && + root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); + + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { + if (buf == root->node) + __update_reloc_root(root, cow->start); + } + + level = btrfs_header_level(buf); + if (btrfs_header_generation(buf) <= + btrfs_root_last_snapshot(&root->root_item)) + first_cow = 1; + + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && + rc->create_reloc_tree) { + WARN_ON(!first_cow && level == 0); + + node = rc->backref_cache.path[level]; + BUG_ON(node->bytenr != buf->start && + node->new_bytenr != buf->start); + + drop_node_buffer(node); + extent_buffer_get(cow); + node->eb = cow; + node->new_bytenr = cow->start; + + if (!node->pending) { + list_move_tail(&node->list, + &rc->backref_cache.pending[level]); + node->pending = 1; + } + + if (first_cow) + __mark_block_processed(rc, node); + + if (first_cow && level > 0) + rc->nodes_relocated += buf->len; + } + + if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) + ret = replace_file_extents(trans, rc, root, cow); + return ret; +} + +/* + * called before creating snapshot. it calculates metadata reservation + * requried for relocating tree blocks in the snapshot + */ +void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending, + u64 *bytes_to_reserve) +{ + struct btrfs_root *root; + struct reloc_control *rc; + + root = pending->root; + if (!root->reloc_root) + return; + + rc = root->fs_info->reloc_ctl; + if (!rc->merge_reloc_tree) + return; + + root = root->reloc_root; + BUG_ON(btrfs_root_refs(&root->root_item) == 0); + /* + * relocation is in the stage of merging trees. the space + * used by merging a reloc tree is twice the size of + * relocated tree nodes in the worst case. half for cowing + * the reloc tree, half for cowing the fs tree. the space + * used by cowing the reloc tree will be freed after the + * tree is dropped. if we create snapshot, cowing the fs + * tree may use more space than it frees. so we need + * reserve extra space. + */ + *bytes_to_reserve += rc->nodes_relocated; +} + +/* + * called after snapshot is created. migrate block reservation + * and create reloc root for the newly created snapshot + */ +int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, + struct btrfs_pending_snapshot *pending) +{ + struct btrfs_root *root = pending->root; + struct btrfs_root *reloc_root; + struct btrfs_root *new_root; + struct reloc_control *rc; + int ret; + + if (!root->reloc_root) + return 0; + + rc = root->fs_info->reloc_ctl; + rc->merging_rsv_size += rc->nodes_relocated; + + if (rc->merge_reloc_tree) { + ret = btrfs_block_rsv_migrate(&pending->block_rsv, + rc->block_rsv, + rc->nodes_relocated); + if (ret) + return ret; + } + + new_root = pending->snap; + reloc_root = create_reloc_root(trans, root->reloc_root, + new_root->root_key.objectid); + if (IS_ERR(reloc_root)) + return PTR_ERR(reloc_root); + + ret = __add_reloc_root(reloc_root); + BUG_ON(ret < 0); + new_root->reloc_root = reloc_root; + + if (rc->create_reloc_tree) + ret = clone_backref_node(trans, rc, root, reloc_root); + return ret; } |
