diff options
Diffstat (limited to 'fs/btrfs/relocation.c')
| -rw-r--r-- | fs/btrfs/relocation.c | 615 |
1 files changed, 396 insertions, 219 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 300e09ac365..65245a07275 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -94,6 +94,7 @@ struct backref_edge { #define LOWER 0 #define UPPER 1 +#define RELOCATION_RESERVED_NODES 256 struct backref_cache { /* red black tree of all backref nodes in the cache */ @@ -176,6 +177,8 @@ struct reloc_control { u64 merging_rsv_size; /* size of relocated tree nodes */ u64 nodes_relocated; + /* reserved size for block group relocation*/ + u64 reserved_bytes; u64 search_start; u64 extents_found; @@ -184,7 +187,6 @@ struct reloc_control { unsigned int create_reloc_tree:1; unsigned int merge_reloc_tree:1; unsigned int found_file_extent:1; - unsigned int commit_transaction:1; }; /* stages of data relocation */ @@ -326,8 +328,7 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) return NULL; } -void backref_tree_panic(struct rb_node *rb_node, int errno, - u64 bytenr) +static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) { struct btrfs_fs_info *fs_info = NULL; @@ -336,7 +337,7 @@ void backref_tree_panic(struct rb_node *rb_node, int errno, if (bnode->root) fs_info = bnode->root->fs_info; btrfs_panic(fs_info, errno, "Inconsistency in backref cache " - "found at offset %llu\n", (unsigned long long)bytenr); + "found at offset %llu", bytenr); } /* @@ -527,7 +528,7 @@ static int should_ignore_root(struct btrfs_root *root) { struct btrfs_root *reloc_root; - if (!root->ref_cows) + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) return 0; reloc_root = root->reloc_root; @@ -572,7 +573,9 @@ static int is_cowonly_root(u64 root_objectid) root_objectid == BTRFS_CHUNK_TREE_OBJECTID || root_objectid == BTRFS_DEV_TREE_OBJECTID || root_objectid == BTRFS_TREE_LOG_OBJECTID || - root_objectid == BTRFS_CSUM_TREE_OBJECTID) + root_objectid == BTRFS_CSUM_TREE_OBJECTID || + root_objectid == BTRFS_UUID_TREE_OBJECTID || + root_objectid == BTRFS_QUOTA_TREE_OBJECTID) return 1; return 0; } @@ -589,7 +592,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, else key.offset = (u64)-1; - return btrfs_read_fs_root_no_name(fs_info, &key); + return btrfs_get_fs_root(fs_info, &key, false); } #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 @@ -607,7 +610,7 @@ struct btrfs_root *find_tree_root(struct reloc_control *rc, root = read_fs_root(rc->extent_root->fs_info, root_objectid); BUG_ON(IS_ERR(root)); - if (root->ref_cows && + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) && generation != btrfs_root_generation(&root->root_item)) return NULL; @@ -619,10 +622,13 @@ static noinline_for_stack int find_inline_backref(struct extent_buffer *leaf, int slot, unsigned long *ptr, unsigned long *end) { + struct btrfs_key key; struct btrfs_extent_item *ei; struct btrfs_tree_block_info *bi; u32 item_size; + btrfs_item_key_to_cpu(leaf, &key, slot); + item_size = btrfs_item_size_nr(leaf, slot); #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 if (item_size < sizeof(*ei)) { @@ -634,13 +640,23 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, WARN_ON(!(btrfs_extent_flags(leaf, ei) & BTRFS_EXTENT_FLAG_TREE_BLOCK)); - if (item_size <= sizeof(*ei) + sizeof(*bi)) { + if (key.type == BTRFS_EXTENT_ITEM_KEY && + item_size <= sizeof(*ei) + sizeof(*bi)) { WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); return 1; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + item_size <= sizeof(*ei)) { + WARN_ON(item_size < sizeof(*ei)); + return 1; + } - bi = (struct btrfs_tree_block_info *)(ei + 1); - *ptr = (unsigned long)(bi + 1); + if (key.type == BTRFS_EXTENT_ITEM_KEY) { + bi = (struct btrfs_tree_block_info *)(ei + 1); + *ptr = (unsigned long)(bi + 1); + } else { + *ptr = (unsigned long)(ei + 1); + } *end = (unsigned long)ei + item_size; return 0; } @@ -684,6 +700,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc, int cowonly; int ret; int err = 0; + bool need_check = true; path1 = btrfs_alloc_path(); path2 = btrfs_alloc_path(); @@ -708,7 +725,7 @@ again: end = 0; ptr = 0; key.objectid = cur->bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; + key.type = BTRFS_METADATA_ITEM_KEY; key.offset = (u64)-1; path1->search_commit_root = 1; @@ -766,7 +783,8 @@ again: break; } - if (key.type == BTRFS_EXTENT_ITEM_KEY) { + if (key.type == BTRFS_EXTENT_ITEM_KEY || + key.type == BTRFS_METADATA_ITEM_KEY) { ret = find_inline_backref(eb, path1->slots[0], &ptr, &end); if (ret) @@ -869,7 +887,7 @@ again: goto out; } - if (!root->ref_cows) + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) cur->cowonly = 1; if (btrfs_root_level(&root->root_item) == cur->level) { @@ -906,6 +924,7 @@ again: cur->bytenr); lower = cur; + need_check = true; for (; level < BTRFS_MAX_LEVEL; level++) { if (!path2->nodes[level]) { BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -935,7 +954,8 @@ again: upper->bytenr = eb->start; upper->owner = btrfs_header_owner(eb); upper->level = lower->level + 1; - if (!root->ref_cows) + if (!test_bit(BTRFS_ROOT_REF_COWS, + &root->state)) upper->cowonly = 1; /* @@ -949,14 +969,12 @@ again: /* * add the block to pending list if we - * need check its backrefs. only block - * at 'cur->level + 1' is added to the - * tail of pending list. this guarantees - * we check backrefs from lower level - * blocks to upper level blocks. + * need check its backrefs, we only do this once + * while walking up a tree as we will catch + * anything else later on. */ - if (!upper->checked && - level == cur->level + 1) { + if (!upper->checked && need_check) { + need_check = false; list_add_tail(&edge->list[UPPER], &list); } else @@ -1241,7 +1259,7 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) if (rb_node) { btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " "for start=%llu while inserting into relocation " - "tree\n", node->bytenr); + "tree", node->bytenr); kfree(node); return -EEXIST; } @@ -1251,10 +1269,10 @@ static int __must_check __add_reloc_root(struct btrfs_root *root) } /* - * helper to update/delete the 'address of tree root -> reloc tree' + * helper to delete the 'address of tree root -> reloc tree' * mapping */ -static int __update_reloc_root(struct btrfs_root *root, int del) +static void __del_reloc_root(struct btrfs_root *root) { struct rb_node *rb_node; struct mapping_node *node = NULL; @@ -1262,29 +1280,53 @@ static int __update_reloc_root(struct btrfs_root *root, int del) spin_lock(&rc->reloc_root_tree.lock); rb_node = tree_search(&rc->reloc_root_tree.rb_root, - root->commit_root->start); + root->node->start); if (rb_node) { node = rb_entry(rb_node, struct mapping_node, rb_node); rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); } spin_unlock(&rc->reloc_root_tree.lock); + if (!node) + return; BUG_ON((struct btrfs_root *)node->data != root); - if (!del) { - spin_lock(&rc->reloc_root_tree.lock); - node->bytenr = root->node->start; - rb_node = tree_insert(&rc->reloc_root_tree.rb_root, - node->bytenr, &node->rb_node); - spin_unlock(&rc->reloc_root_tree.lock); - if (rb_node) - backref_tree_panic(rb_node, -EEXIST, node->bytenr); - } else { - spin_lock(&root->fs_info->trans_lock); - list_del_init(&root->root_list); - spin_unlock(&root->fs_info->trans_lock); - kfree(node); + spin_lock(&root->fs_info->trans_lock); + list_del_init(&root->root_list); + spin_unlock(&root->fs_info->trans_lock); + kfree(node); +} + +/* + * helper to update the 'address of tree root -> reloc tree' + * mapping + */ +static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) +{ + struct rb_node *rb_node; + struct mapping_node *node = NULL; + struct reloc_control *rc = root->fs_info->reloc_ctl; + + spin_lock(&rc->reloc_root_tree.lock); + rb_node = tree_search(&rc->reloc_root_tree.rb_root, + root->node->start); + if (rb_node) { + node = rb_entry(rb_node, struct mapping_node, rb_node); + rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); } + spin_unlock(&rc->reloc_root_tree.lock); + + if (!node) + return 0; + BUG_ON((struct btrfs_root *)node->data != root); + + spin_lock(&rc->reloc_root_tree.lock); + node->bytenr = new_bytenr; + rb_node = tree_insert(&rc->reloc_root_tree.rb_root, + node->bytenr, &node->rb_node); + spin_unlock(&rc->reloc_root_tree.lock); + if (rb_node) + backref_tree_panic(rb_node, -EEXIST, node->bytenr); return 0; } @@ -1295,6 +1337,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, struct extent_buffer *eb; struct btrfs_root_item *root_item; struct btrfs_key root_key; + u64 last_snap = 0; int ret; root_item = kmalloc(sizeof(*root_item), GFP_NOFS); @@ -1310,6 +1353,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, BTRFS_TREE_RELOC_OBJECTID); BUG_ON(ret); + last_snap = btrfs_root_last_snapshot(&root->root_item); btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); } else { @@ -1335,6 +1379,12 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); root_item->drop_level = 0; + /* + * abuse rtransid, it is safe because it is impossible to + * receive data into a relocation tree. + */ + btrfs_set_root_rtransid(root_item, last_snap); + btrfs_set_root_otransid(root_item, trans->transid); } btrfs_tree_unlock(eb); @@ -1345,8 +1395,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, BUG_ON(ret); kfree(root_item); - reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, - &root_key); + reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key); BUG_ON(IS_ERR(reloc_root)); reloc_root->last_trans = trans->transid; return reloc_root; @@ -1361,6 +1410,7 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, { struct btrfs_root *reloc_root; struct reloc_control *rc = root->fs_info->reloc_ctl; + struct btrfs_block_rsv *rsv; int clear_rsv = 0; int ret; @@ -1374,13 +1424,14 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) return 0; - if (!trans->block_rsv) { + if (!trans->reloc_reserved) { + rsv = trans->block_rsv; trans->block_rsv = rc->block_rsv; clear_rsv = 1; } reloc_root = create_reloc_root(trans, root, root->root_key.objectid); if (clear_rsv) - trans->block_rsv = NULL; + trans->block_rsv = rsv; ret = __add_reloc_root(reloc_root); BUG_ON(ret < 0); @@ -1396,7 +1447,6 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, { struct btrfs_root *reloc_root; struct btrfs_root_item *root_item; - int del = 0; int ret; if (!root->reloc_root) @@ -1408,11 +1458,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, if (root->fs_info->reloc_ctl->merge_reloc_tree && btrfs_root_refs(root_item) == 0) { root->reloc_root = NULL; - del = 1; + __del_reloc_root(reloc_root); } - __update_reloc_root(reloc_root, del); - if (reloc_root->commit_root != reloc_root->node) { btrfs_set_root_node(root_item, reloc_root->node); free_extent_buffer(reloc_root->commit_root); @@ -1526,7 +1574,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, btrfs_file_extent_other_encoding(leaf, fi)); if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { - ret = 1; + ret = -EINVAL; goto out; } @@ -1557,7 +1605,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans, u64 end; u32 nritems; u32 i; - int ret; + int ret = 0; int first = 1; int dirty = 0; @@ -1620,11 +1668,13 @@ int replace_file_extents(struct btrfs_trans_handle *trans, ret = get_new_location(rc->data_inode, &new_bytenr, bytenr, num_bytes); - if (ret > 0) { - WARN_ON(1); - continue; + if (ret) { + /* + * Don't have to abort since we've not changed anything + * in the file extent yet. + */ + break; } - BUG_ON(ret < 0); btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); dirty = 1; @@ -1634,18 +1684,24 @@ int replace_file_extents(struct btrfs_trans_handle *trans, num_bytes, parent, btrfs_header_owner(leaf), key.objectid, key.offset, 1); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + break; + } ret = btrfs_free_extent(trans, root, bytenr, num_bytes, parent, btrfs_header_owner(leaf), key.objectid, key.offset, 1); - BUG_ON(ret); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + break; + } } if (dirty) btrfs_mark_buffer_dirty(leaf); if (inode) btrfs_add_delayed_iput(inode); - return 0; + return ret; } static noinline_for_stack @@ -1745,8 +1801,7 @@ again: new_ptr_gen = 0; } - if (new_bytenr > 0 && new_bytenr == old_bytenr) { - WARN_ON(1); + if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) { ret = level; break; } @@ -1760,7 +1815,11 @@ again: eb = read_tree_block(dest, old_bytenr, blocksize, old_ptr_gen); - BUG_ON(!eb); + if (!eb || !extent_buffer_uptodate(eb)) { + ret = (!eb) ? -ENOMEM : -EIO; + free_extent_buffer(eb); + break; + } btrfs_tree_lock(eb); if (cow) { ret = btrfs_cow_block(trans, dest, eb, parent, @@ -1913,6 +1972,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, bytenr = btrfs_node_blockptr(eb, path->slots[i]); blocksize = btrfs_level_size(root, i - 1); eb = read_tree_block(root, bytenr, blocksize, ptr_gen); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } BUG_ON(btrfs_header_level(eb) != i - 1); path->nodes[i - 1] = eb; path->slots[i - 1] = 0; @@ -2020,7 +2083,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, LIST_HEAD(inode_list); struct btrfs_key key; struct btrfs_key next_key; - struct btrfs_trans_handle *trans; + struct btrfs_trans_handle *trans = NULL; struct btrfs_root *reloc_root; struct btrfs_root_item *root_item; struct btrfs_path *path; @@ -2069,18 +2132,19 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, memset(&next_key, 0, sizeof(next_key)); while (1) { - trans = btrfs_start_transaction(root, 0); - BUG_ON(IS_ERR(trans)); - trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, BTRFS_RESERVE_FLUSH_ALL); if (ret) { - BUG_ON(ret != -EAGAIN); - ret = btrfs_commit_transaction(trans, root); - BUG_ON(ret); - continue; + err = ret; + goto out; + } + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + goto out; } + trans->block_rsv = rc->block_rsv; replaced = 0; max_level = level; @@ -2126,6 +2190,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, root_item->drop_level = level; btrfs_end_transaction_throttle(trans, root); + trans = NULL; btrfs_btree_balance_dirty(root); @@ -2154,7 +2219,8 @@ out: btrfs_update_reloc_root(trans, root); } - btrfs_end_transaction_throttle(trans, root); + if (trans) + btrfs_end_transaction_throttle(trans, root); btrfs_btree_balance_dirty(root); @@ -2238,13 +2304,28 @@ again: } static noinline_for_stack +void free_reloc_roots(struct list_head *list) +{ + struct btrfs_root *reloc_root; + + while (!list_empty(list)) { + reloc_root = list_entry(list->next, struct btrfs_root, + root_list); + __del_reloc_root(reloc_root); + } +} + +static noinline_for_stack int merge_reloc_roots(struct reloc_control *rc) { struct btrfs_root *root; struct btrfs_root *reloc_root; + u64 last_snap; + u64 otransid; + u64 objectid; LIST_HEAD(reloc_roots); int found = 0; - int ret; + int ret = 0; again: root = rc->extent_root; @@ -2270,20 +2351,53 @@ again: BUG_ON(root->reloc_root != reloc_root); ret = merge_reloc_root(rc, root); - BUG_ON(ret); + if (ret) { + if (list_empty(&reloc_root->root_list)) + list_add_tail(&reloc_root->root_list, + &reloc_roots); + goto out; + } } else { list_del_init(&reloc_root->root_list); } + + /* + * we keep the old last snapshod transid in rtranid when we + * created the relocation tree. + */ + last_snap = btrfs_root_rtransid(&reloc_root->root_item); + otransid = btrfs_root_otransid(&reloc_root->root_item); + objectid = reloc_root->root_key.offset; + ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); - BUG_ON(ret < 0); + if (ret < 0) { + if (list_empty(&reloc_root->root_list)) + list_add_tail(&reloc_root->root_list, + &reloc_roots); + goto out; + } } if (found) { found = 0; goto again; } +out: + if (ret) { + btrfs_std_error(root->fs_info, ret); + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + + /* new reloc root may be added */ + mutex_lock(&root->fs_info->reloc_mutex); + list_splice_init(&rc->reloc_roots, &reloc_roots); + mutex_unlock(&root->fs_info->reloc_mutex); + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + } + BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); - return 0; + return ret; } static void free_block_list(struct rb_root *blocks) @@ -2316,7 +2430,7 @@ static noinline_for_stack struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, struct reloc_control *rc, struct backref_node *node, - struct backref_edge *edges[], int *nr) + struct backref_edge *edges[]) { struct backref_node *next; struct btrfs_root *root; @@ -2328,7 +2442,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, next = walk_up_backref(next, edges, &index); root = next->root; BUG_ON(!root); - BUG_ON(!root->ref_cows); + BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state)); if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { record_reloc_root_in_trans(trans, root); @@ -2358,7 +2472,6 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, if (!root) return NULL; - *nr = index; next = node; /* setup backref node path for btrfs_reloc_cow_block */ while (1) { @@ -2394,7 +2507,7 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, BUG_ON(!root); /* no other choice for non-references counted tree */ - if (!root->ref_cows) + if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) return root; if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) @@ -2454,28 +2567,36 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans, struct btrfs_root *root = rc->extent_root; u64 num_bytes; int ret; + u64 tmp; num_bytes = calcu_metadata_size(rc, node, 1) * 2; trans->block_rsv = rc->block_rsv; - ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, - BTRFS_RESERVE_FLUSH_ALL); + rc->reserved_bytes += num_bytes; + ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, + BTRFS_RESERVE_FLUSH_ALL); if (ret) { - if (ret == -EAGAIN) - rc->commit_transaction = 1; + if (ret == -EAGAIN) { + tmp = rc->extent_root->nodesize * + RELOCATION_RESERVED_NODES; + while (tmp <= rc->reserved_bytes) + tmp <<= 1; + /* + * only one thread can access block_rsv at this point, + * so we don't need hold lock to protect block_rsv. + * we expand more reservation size here to allow enough + * space for relocation and we will return eailer in + * enospc case. + */ + rc->block_rsv->size = tmp + rc->extent_root->nodesize * + RELOCATION_RESERVED_NODES; + } return ret; } return 0; } -static void release_metadata_space(struct reloc_control *rc, - struct backref_node *node) -{ - u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; - btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); -} - /* * relocate a block tree, and then update pointers in upper level * blocks that reference the block to point to the new location. @@ -2497,7 +2618,6 @@ static int do_relocation(struct btrfs_trans_handle *trans, u32 blocksize; u64 bytenr; u64 generation; - int nr; int slot; int ret; int err = 0; @@ -2510,7 +2630,7 @@ static int do_relocation(struct btrfs_trans_handle *trans, cond_resched(); upper = edge->node[UPPER]; - root = select_reloc_root(trans, rc, upper, edges, &nr); + root = select_reloc_root(trans, rc, upper, edges); BUG_ON(!root); if (upper->eb && !upper->locked) { @@ -2562,7 +2682,8 @@ static int do_relocation(struct btrfs_trans_handle *trans, blocksize = btrfs_level_size(root, node->level); generation = btrfs_node_ptr_generation(upper->eb, slot); eb = read_tree_block(root, bytenr, blocksize, generation); - if (!eb) { + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); err = -EIO; goto next; } @@ -2723,7 +2844,10 @@ static int get_tree_block_key(struct reloc_control *rc, BUG_ON(block->key_ready); eb = read_tree_block(rc->extent_root, block->bytenr, block->key.objectid, block->key.offset); - BUG_ON(!eb); + if (!eb || !extent_buffer_uptodate(eb)) { + free_extent_buffer(eb); + return -EIO; + } WARN_ON(btrfs_header_level(eb) != block->level); if (block->level == 0) btrfs_item_key_to_cpu(eb, &block->key, 0); @@ -2738,8 +2862,13 @@ static int reada_tree_block(struct reloc_control *rc, struct tree_block *block) { BUG_ON(block->key_ready); - readahead_tree_block(rc->extent_root, block->bytenr, - block->key.objectid, block->key.offset); + if (block->key.type == BTRFS_METADATA_ITEM_KEY) + readahead_tree_block(rc->extent_root, block->bytenr, + block->key.objectid, + rc->extent_root->leafsize); + else + readahead_tree_block(rc->extent_root, block->bytenr, + block->key.objectid, block->key.offset); return 0; } @@ -2753,7 +2882,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, struct btrfs_path *path) { struct btrfs_root *root; - int release = 0; int ret = 0; if (!node) @@ -2766,15 +2894,14 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, goto out; } - if (!root || root->ref_cows) { + if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { ret = reserve_metadata_space(trans, rc, node); if (ret) goto out; - release = 1; } if (root) { - if (root->ref_cows) { + if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) { BUG_ON(node->new_bytenr); BUG_ON(!list_empty(&node->list)); btrfs_record_root_in_trans(trans, root); @@ -2795,11 +2922,8 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, ret = do_relocation(trans, rc, node, key, path, 1); } out: - if (ret || node->level == 0 || node->cowonly) { - if (release) - release_metadata_space(rc, node); + if (ret || node->level == 0 || node->cowonly) remove_backref_node(&rc->backref_cache, node); - } return ret; } @@ -2818,8 +2942,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, int err = 0; path = btrfs_alloc_path(); - if (!path) - return -ENOMEM; + if (!path) { + err = -ENOMEM; + goto out_free_blocks; + } rb_node = rb_first(blocks); while (rb_node) { @@ -2832,8 +2958,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, rb_node = rb_first(blocks); while (rb_node) { block = rb_entry(rb_node, struct tree_block, rb_node); - if (!block->key_ready) - get_tree_block_key(rc, block); + if (!block->key_ready) { + err = get_tree_block_key(rc, block); + if (err) + goto out_free_path; + } rb_node = rb_next(rb_node); } @@ -2858,10 +2987,12 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, rb_node = rb_next(rb_node); } out: - free_block_list(blocks); err = finish_pending_nodes(trans, rc, path, err); +out_free_path: btrfs_free_path(path); +out_free_blocks: + free_block_list(blocks); return err; } @@ -2932,7 +3063,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end, lock_extent(&BTRFS_I(inode)->io_tree, start, end); while (1) { write_lock(&em_tree->lock); - ret = add_extent_mapping(em_tree, em); + ret = add_extent_mapping(em_tree, em, 0); write_unlock(&em_tree->lock); if (ret != -EEXIST) { free_extent_map(em); @@ -3017,7 +3148,7 @@ static int relocate_file_extent_cluster(struct inode *inode, } } - page_start = (u64)page->index << PAGE_CACHE_SHIFT; + page_start = page_offset(page); page_end = page_start + PAGE_CACHE_SIZE - 1; lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end); @@ -3138,17 +3269,22 @@ static int add_tree_block(struct reloc_control *rc, struct rb_node *rb_node; u32 item_size; int level = -1; - int generation; + u64 generation; eb = path->nodes[0]; item_size = btrfs_item_size_nr(eb, path->slots[0]); - if (item_size >= sizeof(*ei) + sizeof(*bi)) { + if (extent_key->type == BTRFS_METADATA_ITEM_KEY || + item_size >= sizeof(*ei) + sizeof(*bi)) { ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item); - bi = (struct btrfs_tree_block_info *)(ei + 1); + if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) { + bi = (struct btrfs_tree_block_info *)(ei + 1); + level = btrfs_tree_block_level(eb, bi); + } else { + level = (int)extent_key->offset; + } generation = btrfs_extent_generation(eb, ei); - level = btrfs_tree_block_level(eb, bi); } else { #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 u64 ref_owner; @@ -3177,7 +3313,7 @@ static int add_tree_block(struct reloc_control *rc, return -ENOMEM; block->bytenr = extent_key->objectid; - block->key.objectid = extent_key->offset; + block->key.objectid = rc->extent_root->leafsize; block->key.offset = generation; block->level = level; block->key_ready = 0; @@ -3199,6 +3335,8 @@ static int __add_tree_block(struct reloc_control *rc, struct btrfs_path *path; struct btrfs_key key; int ret; + bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, + SKINNY_METADATA); if (tree_block_processed(bytenr, blocksize, rc)) return 0; @@ -3209,19 +3347,42 @@ static int __add_tree_block(struct reloc_control *rc, path = btrfs_alloc_path(); if (!path) return -ENOMEM; - +again: key.objectid = bytenr; - key.type = BTRFS_EXTENT_ITEM_KEY; - key.offset = blocksize; + if (skinny) { + key.type = BTRFS_METADATA_ITEM_KEY; + key.offset = (u64)-1; + } else { + key.type = BTRFS_EXTENT_ITEM_KEY; + key.offset = blocksize; + } path->search_commit_root = 1; path->skip_locking = 1; ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); if (ret < 0) goto out; + + if (ret > 0 && skinny) { + if (path->slots[0]) { + path->slots[0]--; + btrfs_item_key_to_cpu(path->nodes[0], &key, + path->slots[0]); + if (key.objectid == bytenr && + (key.type == BTRFS_METADATA_ITEM_KEY || + (key.type == BTRFS_EXTENT_ITEM_KEY && + key.offset == blocksize))) + ret = 0; + } + + if (ret) { + skinny = false; + btrfs_release_path(path); + goto again; + } + } BUG_ON(ret); - btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); ret = add_tree_block(rc, &key, path, blocks); out: btrfs_free_path(path); @@ -3242,7 +3403,8 @@ static int block_use_full_backref(struct reloc_control *rc, return 1; ret = btrfs_lookup_extent_info(NULL, rc->extent_root, - eb->start, eb->len, NULL, &flags); + eb->start, btrfs_header_level(eb), 1, + NULL, &flags); BUG_ON(ret); if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) @@ -3256,7 +3418,6 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, struct inode *inode, u64 ino) { struct btrfs_key key; - struct btrfs_path *path; struct btrfs_root *root = fs_info->tree_root; struct btrfs_trans_handle *trans; int ret = 0; @@ -3276,22 +3437,19 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info, } truncate: - path = btrfs_alloc_path(); - if (!path) { - ret = -ENOMEM; + ret = btrfs_check_trunc_cache_free_space(root, + &fs_info->global_block_rsv); + if (ret) goto out; - } trans = btrfs_join_transaction(root); if (IS_ERR(trans)) { - btrfs_free_path(path); ret = PTR_ERR(trans); goto out; } - ret = btrfs_truncate_free_space_cache(root, trans, path, inode); + ret = btrfs_truncate_free_space_cache(root, trans, inode); - btrfs_free_path(path); btrfs_end_transaction(trans, root); btrfs_btree_balance_dirty(root); out: @@ -3393,10 +3551,8 @@ static int find_data_references(struct reloc_control *rc, err = ret; goto out; } - if (ret > 0) { - WARN_ON(1); + if (WARN_ON(ret > 0)) goto out; - } leaf = path->nodes[0]; nritems = btrfs_header_nritems(leaf); @@ -3416,11 +3572,9 @@ static int find_data_references(struct reloc_control *rc, } btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); - if (key.objectid != ref_objectid || - key.type != BTRFS_EXTENT_DATA_KEY) { - WARN_ON(1); + if (WARN_ON(key.objectid != ref_objectid || + key.type != BTRFS_EXTENT_DATA_KEY)) break; - } fi = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_file_extent_item); @@ -3472,7 +3626,7 @@ out: } /* - * hepler to find all tree blocks that reference a given data extent + * helper to find all tree blocks that reference a given data extent */ static noinline_for_stack int add_data_references(struct reloc_control *rc, @@ -3487,7 +3641,7 @@ int add_data_references(struct reloc_control *rc, unsigned long ptr; unsigned long end; u32 blocksize = btrfs_level_size(rc->extent_root, 0); - int ret; + int ret = 0; int err = 0; eb = path->nodes[0]; @@ -3514,6 +3668,10 @@ int add_data_references(struct reloc_control *rc, } else { BUG(); } + if (ret) { + err = ret; + goto out; + } ptr += btrfs_extent_inline_ref_size(key.type); } WARN_ON(ptr > end); @@ -3559,6 +3717,7 @@ int add_data_references(struct reloc_control *rc, } path->slots[0]++; } +out: btrfs_release_path(path); if (err) free_block_list(blocks); @@ -3566,7 +3725,7 @@ int add_data_references(struct reloc_control *rc, } /* - * hepler to find next unprocessed extent + * helper to find next unprocessed extent */ static noinline_for_stack int find_next_extent(struct btrfs_trans_handle *trans, @@ -3611,12 +3770,25 @@ next: break; } - if (key.type != BTRFS_EXTENT_ITEM_KEY || + if (key.type != BTRFS_EXTENT_ITEM_KEY && + key.type != BTRFS_METADATA_ITEM_KEY) { + path->slots[0]++; + goto next; + } + + if (key.type == BTRFS_EXTENT_ITEM_KEY && key.objectid + key.offset <= rc->search_start) { path->slots[0]++; goto next; } + if (key.type == BTRFS_METADATA_ITEM_KEY && + key.objectid + rc->extent_root->leafsize <= + rc->search_start) { + path->slots[0]++; + goto next; + } + ret = find_first_extent_bit(&rc->processed_blocks, key.objectid, &start, &end, EXTENT_DIRTY, NULL); @@ -3625,7 +3797,11 @@ next: btrfs_release_path(path); rc->search_start = end + 1; } else { - rc->search_start = key.objectid + key.offset; + if (key.type == BTRFS_EXTENT_ITEM_KEY) + rc->search_start = key.objectid + key.offset; + else + rc->search_start = key.objectid + + rc->extent_root->leafsize; memcpy(extent_key, &key, sizeof(key)); return 0; } @@ -3670,35 +3846,34 @@ static noinline_for_stack int prepare_to_relocate(struct reloc_control *rc) { struct btrfs_trans_handle *trans; - int ret; rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, BTRFS_BLOCK_RSV_TEMP); if (!rc->block_rsv) return -ENOMEM; - /* - * reserve some space for creating reloc trees. - * btrfs_init_reloc_root will use them when there - * is no reservation in transaction handle. - */ - ret = btrfs_block_rsv_add(rc->extent_root, rc->block_rsv, - rc->extent_root->nodesize * 256, - BTRFS_RESERVE_FLUSH_ALL); - if (ret) - return ret; - memset(&rc->cluster, 0, sizeof(rc->cluster)); rc->search_start = rc->block_group->key.objectid; rc->extents_found = 0; rc->nodes_relocated = 0; rc->merging_rsv_size = 0; + rc->reserved_bytes = 0; + rc->block_rsv->size = rc->extent_root->nodesize * + RELOCATION_RESERVED_NODES; rc->create_reloc_tree = 1; set_reloc_control(rc); trans = btrfs_join_transaction(rc->extent_root); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + unset_reloc_control(rc); + /* + * extent tree is not a ref_cow tree and has no reloc_root to + * cleanup. And callers are responsible to free the above + * block rsv. + */ + return PTR_ERR(trans); + } btrfs_commit_transaction(trans, rc->extent_root); return 0; } @@ -3728,9 +3903,21 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) } while (1) { + rc->reserved_bytes = 0; + ret = btrfs_block_rsv_refill(rc->extent_root, + rc->block_rsv, rc->block_rsv->size, + BTRFS_RESERVE_FLUSH_ALL); + if (ret) { + err = ret; + break; + } progress++; trans = btrfs_start_transaction(rc->extent_root, 0); - BUG_ON(IS_ERR(trans)); + if (IS_ERR(trans)) { + err = PTR_ERR(trans); + trans = NULL; + break; + } restart: if (update_backref_cache(trans, &rc->backref_cache)) { btrfs_end_transaction(trans, rc->extent_root); @@ -3802,6 +3989,12 @@ restart: if (!RB_EMPTY_ROOT(&blocks)) { ret = relocate_tree_blocks(trans, rc, &blocks); if (ret < 0) { + /* + * if we fail to relocate tree blocks, force to update + * backref cache when committing transaction. + */ + rc->backref_cache.last_trans = trans->transid - 1; + if (ret != -EAGAIN) { err = ret; break; @@ -3811,24 +4004,8 @@ restart: } } - ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); - if (ret < 0) { - if (ret != -ENOSPC) { - err = ret; - WARN_ON(1); - break; - } - rc->commit_transaction = 1; - } - - if (rc->commit_transaction) { - rc->commit_transaction = 0; - ret = btrfs_commit_transaction(trans, rc->extent_root); - BUG_ON(ret); - } else { - btrfs_end_transaction_throttle(trans, rc->extent_root); - btrfs_btree_balance_dirty(rc->extent_root); - } + btrfs_end_transaction_throttle(trans, rc->extent_root); + btrfs_btree_balance_dirty(rc->extent_root); trans = NULL; if (rc->stage == MOVE_DATA_EXTENTS && @@ -3974,7 +4151,7 @@ out: return inode; } -static struct reloc_control *alloc_reloc_control(void) +static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info) { struct reloc_control *rc; @@ -3985,7 +4162,8 @@ static struct reloc_control *alloc_reloc_control(void) INIT_LIST_HEAD(&rc->reloc_roots); backref_cache_init(&rc->backref_cache); mapping_tree_init(&rc->reloc_root_tree); - extent_io_tree_init(&rc->processed_blocks, NULL); + extent_io_tree_init(&rc->processed_blocks, + fs_info->btree_inode->i_mapping); return rc; } @@ -4002,7 +4180,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) int rw = 0; int err = 0; - rc = alloc_reloc_control(); + rc = alloc_reloc_control(fs_info); if (!rc) return -ENOMEM; @@ -4047,23 +4225,19 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) goto out; } - printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", - (unsigned long long)rc->block_group->key.objectid, - (unsigned long long)rc->block_group->flags); + btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", + rc->block_group->key.objectid, rc->block_group->flags); - ret = btrfs_start_delalloc_inodes(fs_info->tree_root, 0); + ret = btrfs_start_delalloc_roots(fs_info, 0, -1); if (ret < 0) { err = ret; goto out; } - btrfs_wait_ordered_extents(fs_info->tree_root, 0); + btrfs_wait_ordered_roots(fs_info, -1); while (1) { mutex_lock(&fs_info->cleaner_mutex); - - btrfs_clean_old_snapshots(fs_info->tree_root); ret = relocate_block_group(rc); - mutex_unlock(&fs_info->cleaner_mutex); if (ret < 0) { err = ret; @@ -4073,22 +4247,22 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) if (rc->extents_found == 0) break; - printk(KERN_INFO "btrfs: found %llu extents\n", - (unsigned long long)rc->extents_found); + btrfs_info(extent_root->fs_info, "found %llu extents", + rc->extents_found); if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { - btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); + ret = btrfs_wait_ordered_range(rc->data_inode, 0, + (u64)-1); + if (ret) { + err = ret; + goto out; + } invalidate_mapping_pages(rc->data_inode->i_mapping, 0, -1); rc->stage = UPDATE_DATA_PTRS; } } - filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, - rc->block_group->key.objectid, - rc->block_group->key.objectid + - rc->block_group->key.offset - 1); - WARN_ON(rc->block_group->pinned > 0); WARN_ON(rc->block_group->reserved > 0); WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); @@ -4171,7 +4345,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) key.type != BTRFS_ROOT_ITEM_KEY) break; - reloc_root = btrfs_read_fs_root_no_radix(root, &key); + reloc_root = btrfs_read_fs_root(root, &key); if (IS_ERR(reloc_root)) { err = PTR_ERR(reloc_root); goto out; @@ -4206,7 +4380,7 @@ int btrfs_recover_relocation(struct btrfs_root *root) if (list_empty(&reloc_roots)) goto out; - rc = alloc_reloc_control(); + rc = alloc_reloc_control(root->fs_info); if (!rc) { err = -ENOMEM; goto out; @@ -4264,14 +4438,9 @@ int btrfs_recover_relocation(struct btrfs_root *root) out_free: kfree(rc); out: - while (!list_empty(&reloc_roots)) { - reloc_root = list_entry(reloc_roots.next, - struct btrfs_root, root_list); - list_del(&reloc_root->root_list); - free_extent_buffer(reloc_root->node); - free_extent_buffer(reloc_root->commit_root); - kfree(reloc_root); - } + if (!list_empty(&reloc_roots)) + free_reloc_roots(&reloc_roots); + btrfs_free_path(path); if (err == 0) { @@ -4295,12 +4464,11 @@ out: int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) { struct btrfs_ordered_sum *sums; - struct btrfs_sector_sum *sector_sum; struct btrfs_ordered_extent *ordered; struct btrfs_root *root = BTRFS_I(inode)->root; - size_t offset; int ret; u64 disk_bytenr; + u64 new_bytenr; LIST_HEAD(list); ordered = btrfs_lookup_ordered_extent(inode, file_pos); @@ -4316,15 +4484,20 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) sums = list_entry(list.next, struct btrfs_ordered_sum, list); list_del_init(&sums->list); - sector_sum = sums->sums; - sums->bytenr = ordered->start; - - offset = 0; - while (offset < sums->len) { - sector_sum->bytenr += ordered->start - disk_bytenr; - sector_sum++; - offset += root->sectorsize; - } + /* + * We need to offset the new_bytenr based on where the csum is. + * We need to do this because we will read in entire prealloc + * extents but we may have written to say the middle of the + * prealloc extent, so we need to make sure the csum goes with + * the right disk offset. + * + * We can do this because the data reloc inode refers strictly + * to the on disk bytes, so we don't have to worry about + * disk_len vs real len like with real inodes since it's all + * disk length. + */ + new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); + sums->bytenr = new_bytenr; btrfs_add_ordered_sum(inode, ordered, sums); } @@ -4333,23 +4506,28 @@ out: return ret; } -void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, - struct btrfs_root *root, struct extent_buffer *buf, - struct extent_buffer *cow) +int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, + struct btrfs_root *root, struct extent_buffer *buf, + struct extent_buffer *cow) { struct reloc_control *rc; struct backref_node *node; int first_cow = 0; int level; - int ret; + int ret = 0; rc = root->fs_info->reloc_ctl; if (!rc) - return; + return 0; BUG_ON(rc->stage == UPDATE_DATA_PTRS && root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { + if (buf == root->node) + __update_reloc_root(root, cow->start); + } + level = btrfs_header_level(buf); if (btrfs_header_generation(buf) <= btrfs_root_last_snapshot(&root->root_item)) @@ -4381,10 +4559,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, rc->nodes_relocated += buf->len; } - if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { + if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) ret = replace_file_extents(trans, rc, root, cow); - BUG_ON(ret); - } + return ret; } /* |
