diff options
-rw-r--r-- | fs/btrfs/ctree.c | 155 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 26 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 9 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 2034 | ||||
-rw-r--r-- | fs/btrfs/root-tree.c | 5 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 15 | ||||
-rw-r--r-- | fs/btrfs/volumes.c | 9 |
7 files changed, 1828 insertions, 425 deletions
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index f9cd40967d0..50e81f43e6d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -179,7 +179,6 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, struct extent_buffer *cow; u32 nritems; int ret = 0; - int different_trans = 0; int level; int unlock_orig = 0; @@ -233,13 +232,33 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, WARN_ON(btrfs_header_generation(buf) > trans->transid); if (btrfs_header_generation(buf) != trans->transid) { u32 nr_extents; - different_trans = 1; ret = btrfs_inc_ref(trans, root, buf, cow, &nr_extents); if (ret) return ret; ret = btrfs_cache_ref(trans, root, buf, nr_extents); WARN_ON(ret); + } else if (btrfs_header_owner(buf) == BTRFS_TREE_RELOC_OBJECTID) { + /* + * There are only two places that can drop reference to + * tree blocks owned by living reloc trees, one is here, + * the other place is btrfs_merge_path. In both places, + * we check reference count while tree block is locked. + * Furthermore, if reference count is one, it won't get + * increased by someone else. + */ + u32 refs; + ret = btrfs_lookup_extent_ref(trans, root, buf->start, + buf->len, &refs); + BUG_ON(ret); + if (refs == 1) { + ret = btrfs_update_ref(trans, root, buf, cow, + 0, nritems); + clean_tree_block(trans, root, buf); + } else { + ret = btrfs_inc_ref(trans, root, buf, cow, NULL); + } + BUG_ON(ret); } else { ret = btrfs_update_ref(trans, root, buf, cow, 0, nritems); if (ret) @@ -247,6 +266,14 @@ int noinline __btrfs_cow_block(struct btrfs_trans_handle *trans, clean_tree_block(trans, root, buf); } + if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { + ret = btrfs_add_reloc_mapping(root, buf->start, + buf->len, cow->start); + BUG_ON(ret); + ret = btrfs_reloc_tree_cache_ref(trans, root, cow, buf->start); + WARN_ON(ret); + } + if (buf == root->node) { WARN_ON(parent && parent != buf); @@ -1466,6 +1493,130 @@ done: return ret; } +int btrfs_merge_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *node_keys, + u64 *nodes, int lowest_level) +{ + struct extent_buffer *eb; + struct extent_buffer *parent; + struct btrfs_key key; + u64 bytenr; + u64 generation; + u32 blocksize; + int level; + int slot; + int key_match; + int ret; + + eb = btrfs_lock_root_node(root); + ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); + BUG_ON(ret); + + parent = eb; + while (1) { + level = btrfs_header_level(parent); + if (level == 0 || level <= lowest_level) + break; + + ret = bin_search(parent, &node_keys[lowest_level], level, + &slot); + if (ret && slot > 0) + slot--; + + bytenr = btrfs_node_blockptr(parent, slot); + if (nodes[level - 1] == bytenr) + break; + + blocksize = btrfs_level_size(root, level - 1); + generation = btrfs_node_ptr_generation(parent, slot); + btrfs_node_key_to_cpu(eb, &key, slot); + key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key)); + + /* + * if node keys match and node pointer hasn't been modified + * in the running transaction, we can merge the path. for + * reloc trees, the node pointer check is skipped, this is + * because the reloc trees are fully controlled by the space + * balance code, no one else can modify them. + */ + if (!nodes[level - 1] || !key_match || + (generation == trans->transid && + root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID)) { +next_level: + if (level == 1 || level == lowest_level + 1) + break; + + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + + ret = btrfs_cow_block(trans, root, eb, parent, slot, + &eb, 0); + BUG_ON(ret); + + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + parent = eb; + continue; + } + + if (generation == trans->transid) { + u32 refs; + BUG_ON(btrfs_header_owner(eb) != + BTRFS_TREE_RELOC_OBJECTID); + /* + * lock the block to keep __btrfs_cow_block from + * changing the reference count. + */ + eb = read_tree_block(root, bytenr, blocksize, + generation); + btrfs_tree_lock(eb); + + ret = btrfs_lookup_extent_ref(trans, root, bytenr, + blocksize, &refs); + BUG_ON(ret); + /* + * if replace block whose reference count is one, + * we have to "drop the subtree". so skip it for + * simplicity + */ + if (refs == 1) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + goto next_level; + } + } + + btrfs_set_node_blockptr(parent, slot, nodes[level - 1]); + btrfs_set_node_ptr_generation(parent, slot, trans->transid); + btrfs_mark_buffer_dirty(parent); + + ret = btrfs_inc_extent_ref(trans, root, + nodes[level - 1], + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + level - 1, 0); + BUG_ON(ret); + ret = btrfs_free_extent(trans, root, bytenr, + blocksize, parent->start, + btrfs_header_owner(parent), + btrfs_header_generation(parent), + level - 1, 0, 1); + BUG_ON(ret); + + if (generation == trans->transid) { + btrfs_tree_unlock(eb); + free_extent_buffer(eb); + } + break; + } + btrfs_tree_unlock(parent); + free_extent_buffer(parent); + return 0; +} + /* * adjust the pointers going up the tree, starting at level * making sure the right key of each node is points to 'key'. diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 3e62a1b0a1f..2775e270881 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -604,6 +604,7 @@ struct btrfs_fs_info { struct mutex chunk_mutex; struct mutex drop_mutex; struct mutex volume_mutex; + struct mutex tree_reloc_mutex; struct list_head trans_list; struct list_head hashers; struct list_head dead_roots; @@ -647,6 +648,10 @@ struct btrfs_fs_info { struct task_struct *cleaner_kthread; int thread_pool_size; + /* tree relocation relocated fields */ + struct extent_io_tree reloc_mapping_tree; + struct list_head dead_reloc_roots; + struct btrfs_leaf_ref_tree reloc_ref_tree; struct btrfs_leaf_ref_tree shared_ref_tree; struct kobject super_kobj; @@ -698,6 +703,7 @@ struct btrfs_root { struct btrfs_leaf_ref_tree ref_tree_struct; struct btrfs_dirty_root *dirty_root; struct btrfs_root *log_root; + struct btrfs_root *reloc_root; struct btrfs_root_item root_item; struct btrfs_key root_key; @@ -1517,7 +1523,6 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytenr, u32 blocksize); -int btrfs_shrink_extent_tree(struct btrfs_root *root, u64 new_size); int btrfs_insert_extent_backref(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, @@ -1582,10 +1587,29 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); +int btrfs_remove_block_group(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 group_start); +int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start); +int btrfs_free_reloc_root(struct btrfs_root *root); +int btrfs_drop_dead_reloc_roots(struct btrfs_root *root); +int btrfs_add_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 new_bytenr); +int btrfs_get_reloc_mapping(struct btrfs_root *root, u64 orig_bytenr, + u64 num_bytes, u64 *new_bytenr); +void btrfs_free_reloc_mappings(struct btrfs_root *root); +int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct extent_buffer *buf, u64 orig_start); +int btrfs_add_dead_reloc_root(struct btrfs_root *root); +int btrfs_cleanup_reloc_trees(struct btrfs_root *root); /* ctree.c */ int btrfs_previous_item(struct btrfs_root *root, struct btrfs_path *path, u64 min_objectid, int type); +int btrfs_merge_path(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_key *node_keys, + u64 *nodes, int lowest_level); int btrfs_set_item_key_safe(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *new_key); diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 8969fee2331..45bc3132b05 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1406,6 +1406,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->btree_inode->i_mapping, GFP_NOFS); fs_info->do_barriers = 1; + extent_io_tree_init(&fs_info->reloc_mapping_tree, + fs_info->btree_inode->i_mapping, GFP_NOFS); + INIT_LIST_HEAD(&fs_info->dead_reloc_roots); + btrfs_leaf_ref_tree_init(&fs_info->reloc_ref_tree); btrfs_leaf_ref_tree_init(&fs_info->shared_ref_tree); BTRFS_I(fs_info->btree_inode)->root = tree_root; @@ -1421,6 +1425,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, mutex_init(&fs_info->transaction_kthread_mutex); mutex_init(&fs_info->cleaner_mutex); mutex_init(&fs_info->volume_mutex); + mutex_init(&fs_info->tree_reloc_mutex); init_waitqueue_head(&fs_info->transaction_throttle); init_waitqueue_head(&fs_info->transaction_wait); init_waitqueue_head(&fs_info->async_submit_wait); @@ -1627,6 +1632,10 @@ struct btrfs_root *open_ctree(struct super_block *sb, ret = btrfs_recover_log_trees(log_tree_root); BUG_ON(ret); } + + ret = btrfs_cleanup_reloc_trees(tree_root); + BUG_ON(ret); + fs_info->last_trans_committed = btrfs_super_generation(disk_super); return tree_root; diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 9ab099bc01a..8043b9d584a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -1834,6 +1834,7 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, u64 header_owner = btrfs_header_owner(buf); u64 header_transid = btrfs_header_generation(buf); if (header_owner != BTRFS_TREE_LOG_OBJECTID && + header_owner != BTRFS_TREE_RELOC_OBJECTID && header_transid == trans->transid && !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { clean_tree_block(NULL, root, buf); @@ -2487,6 +2488,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) return -ENOSPC; } btrfs_add_free_space(cache, start, len); + update_reserved_extents(root, start, len, 0); maybe_unlock_mutex(root); return 0; } @@ -2947,6 +2949,10 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, */ if (*level == 1) { ref = btrfs_lookup_leaf_ref(root, bytenr); + if (ref && ref->generation != ptr_gen) { + btrfs_free_leaf_ref(root, ref); + ref = NULL; + } if (ref) { ret = cache_drop_leaf_ref(trans, root, ref); BUG_ON(ret); @@ -3153,34 +3159,6 @@ out: return ret; } -int btrfs_free_block_groups(struct btrfs_fs_info *info) -{ - struct btrfs_block_group_cache *block_group; - struct rb_node *n; - - mutex_lock(&info->alloc_mutex); - spin_lock(&info->block_group_cache_lock); - while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { - block_group = rb_entry(n, struct btrfs_block_group_cache, - cache_node); - - spin_unlock(&info->block_group_cache_lock); - btrfs_remove_free_space_cache(block_group); - spin_lock(&info->block_group_cache_lock); - - rb_erase(&block_group->cache_node, - &info->block_group_cache_tree); - - spin_lock(&block_group->space_info->lock); - list_del(&block_group->list); - spin_unlock(&block_group->space_info->lock); - kfree(block_group); - } - spin_unlock(&info->block_group_cache_lock); - mutex_unlock(&info->alloc_mutex); - return 0; -} - static unsigned long calc_ra(unsigned long start, unsigned long last, unsigned long nr) { @@ -3192,37 +3170,43 @@ static int noinline relocate_inode_pages(struct inode *inode, u64 start, { u64 page_start; u64 page_end; + unsigned long first_index; unsigned long last_index; unsigned long i; struct page *page; struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct file_ra_state *ra; - unsigned long total_read = 0; - unsigned long ra_pages; struct btrfs_ordered_extent *ordered; - struct btrfs_trans_handle *trans; + unsigned int total_read = 0; + unsigned int total_dirty = 0; + int ret = 0; ra = kzalloc(sizeof(*ra), GFP_NOFS); mutex_lock(&inode->i_mutex); - i = start >> PAGE_CACHE_SHIFT; + first_index = start >> PAGE_CACHE_SHIFT; last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; - ra_pages = BTRFS_I(inode)->root->fs_info->bdi.ra_pages; + /* make sure the dirty trick played by the caller work */ + ret = invalidate_inode_pages2_range(inode->i_mapping, + first_index, last_index); + if (ret) + goto out_unlock; file_ra_state_init(ra, inode->i_mapping); - for (; i <= last_index; i++) { - if (total_read % ra_pages == 0) { + for (i = first_index ; i <= last_index; i++) { + if (total_read % ra->ra_pages == 0) { btrfs_force_ra(inode->i_mapping, ra, NULL, i, - calc_ra(i, last_index, ra_pages)); + calc_ra(i, last_index, ra->ra_pages)); } total_read++; again: if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) - goto truncate_racing; + BUG_ON(1); page = grab_cache_page(inode->i_mapping, i); if (!page) { + ret = -ENOMEM; goto out_unlock; } if (!PageUptodate(page)) { @@ -3231,6 +3215,7 @@ again: if (!PageUptodate(page)) { unlock_page(page); page_cache_release(page); + ret = -EIO; goto out_unlock; } } @@ -3251,14 +3236,13 @@ again: } set_page_extent_mapped(page); - /* - * make sure page_mkwrite is called for this page if userland - * wants to change it from mmap - */ - clear_page_dirty_for_io(page); - btrfs_set_extent_delalloc(inode, page_start, page_end); + if (i == first_index) + set_extent_bits(io_tree, page_start, page_end, + EXTENT_BOUNDARY, GFP_NOFS); + set_page_dirty(page); + total_dirty++; unlock_extent(io_tree, page_start, page_end, GFP_NOFS); unlock_page(page); @@ -3266,347 +3250,1457 @@ again: } out_unlock: - /* we have to start the IO in order to get the ordered extents - * instantiated. This allows the relocation to code to wait - * for all the ordered extents to hit the disk. - * - * Otherwise, it would constantly loop over the same extents - * because the old ones don't get deleted until the IO is - * started - */ - btrfs_fdatawrite_range(inode->i_mapping, start, start + len - 1, - WB_SYNC_NONE); kfree(ra); - trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); - if (trans) { - btrfs_end_transaction(trans, BTRFS_I(inode)->root); - mark_inode_dirty(inode); - } mutex_unlock(&inode->i_mutex); - return 0; + balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); + return ret; +} + +static int noinline relocate_data_extent(struct inode *reloc_inode, + struct btrfs_key *extent_key, + u64 offset) +{ + struct btrfs_root *root = BTRFS_I(reloc_inode)->root; + struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; + struct extent_map *em; + + em = alloc_extent_map(GFP_NOFS); + BUG_ON(!em || IS_ERR(em)); + + em->start = extent_key->objectid - offset; + em->len = extent_key->offset; + em->block_start = extent_key->objectid; + em->bdev = root->fs_info->fs_devices->latest_bdev; + set_bit(EXTENT_FLAG_PINNED, &em->flags); + + /* setup extent map to cheat btrfs_readpage */ + mutex_lock(&BTRFS_I(reloc_inode)->extent_mutex); + while (1) { + int ret; + spin_lock(&em_tree->lock); + ret = add_extent_mapping(em_tree, em); + spin_unlock(&em_tree->lock); + if (ret != -EEXIST) { + free_extent_map(em); + break; + } + btrfs_drop_extent_cache(reloc_inode, em->start, + em->start + em->len - 1, 0); + } + mutex_unlock(&BTRFS_I(reloc_inode)->extent_mutex); -truncate_racing: - vmtruncate(inode, inode->i_size); - balance_dirty_pages_ratelimited_nr(inode->i_mapping, - total_read); - goto out_unlock; + return relocate_inode_pages(reloc_inode, extent_key->objectid - offset, + extent_key->offset); } -/* - * The back references tell us which tree holds a ref on a block, - * but it is possible for the tree root field in the reference to - * reflect the original root before a snapshot was made. In this - * case we should search through all the children of a given root - * to find potential holders of references on a block. - * - * Instead, we do something a little less fancy and just search - * all the roots for a given key/block combination. - */ -static int find_root_for_ref(struct btrfs_root *root, - struct btrfs_path *path, - struct btrfs_key *key0, - int level, - int file_key, - struct btrfs_root **found_root, - u64 bytenr) -{ - struct btrfs_key root_location; - struct btrfs_root *cur_root = *found_root; - struct btrfs_file_extent_item *file_extent; - u64 root_search_start = BTRFS_FS_TREE_OBJECTID; - u64 found_bytenr; - int ret; +struct btrfs_ref_path { + u64 extent_start; + u64 nodes[BTRFS_MAX_LEVEL]; + u64 root_objectid; + u64 root_generation; + u64 owner_objectid; + u64 owner_offset; + u32 num_refs; + int lowest_level; + int current_level; +}; - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; - path->lowest_level = level; - path->reada = 0; - while(1) { - ret = btrfs_search_slot(NULL, cur_root, key0, path, 0, 0); - found_bytenr = 0; - if (ret == 0 && file_key) { - struct extent_buffer *leaf = path->nodes[0]; - file_extent = btrfs_item_ptr(leaf, path->slots[0], - struct btrfs_file_extent_item); - if (btrfs_file_extent_type(leaf, file_extent) == - BTRFS_FILE_EXTENT_REG) { - found_bytenr = - btrfs_file_extent_disk_bytenr(leaf, - file_extent); - } - } else if (!file_key) { - if (path->nodes[level]) - found_bytenr = path->nodes[level]->start; - } - - btrfs_release_path(cur_root, path); - - if (found_bytenr == bytenr) { - *found_root = cur_root; +struct disk_extent { + u64 disk_bytenr; + u64 disk_num_bytes; + u64 offset; + u64 num_bytes; +}; + +static int is_cowonly_root(u64 root_objectid) +{ + if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || + root_objectid == BTRFS_EXTENT_TREE_OBJECTID || + root_objectid == BTRFS_CHUNK_TREE_OBJECTID || + root_objectid == BTRFS_DEV_TREE_OBJECTID || + root_objectid == BTRFS_TREE_LOG_OBJECTID) + return 1; + return 0; +} + +static int noinline __next_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path, + int first_time) +{ + struct extent_buffer *leaf; + struct btrfs_path *path; + struct btrfs_extent_ref *ref; + struct btrfs_key key; + struct btrfs_key found_key; + u64 bytenr; + u32 nritems; + int level; + int ret = 1; + + path = btrfs_alloc_path(); + if (!path) + return -ENOMEM; + + mutex_lock(&extent_root->fs_info->alloc_mutex); + + if (first_time) { + ref_path->lowest_level = -1; + ref_path->current_level = -1; + goto walk_up; + } +walk_down: + level = ref_path->current_level - 1; + while (level >= -1) { + u64 parent; + if (level < ref_path->lowest_level) + break; + + if (level >= 0) { + bytenr = ref_path->nodes[level]; + } else { + bytenr = ref_path->extent_start; + } + BUG_ON(bytenr == 0); + + parent = ref_path->nodes[level + 1]; + ref_path->nodes[level + 1] = 0; + ref_path->current_level = level; + BUG_ON(parent == 0); + + key.objectid = bytenr; + key.offset = parent + 1; + key.type = BTRFS_EXTENT_REF_KEY; + + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + BUG_ON(ret == 0); + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; + if (ret > 0) + goto next; + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid == bytenr && + found_key.type == BTRFS_EXTENT_REF_KEY) + goto found; +next: + level--; + btrfs_release_path(extent_root, path); + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); + } + } + /* reached lowest level */ + ret = 1; + goto out; +walk_up: + level = ref_path->current_level; + while (level < BTRFS_MAX_LEVEL - 1) { + u64 ref_objectid; + if (level >= 0) { + bytenr = ref_path->nodes[level]; + } else { + bytenr = ref_path->extent_start; + } + BUG_ON(bytenr == 0); + + key.objectid = bytenr; + key.offset = 0; + key.type = BTRFS_EXTENT_REF_KEY; + + ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); + if (ret < 0) + goto out; + + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(extent_root, path); + if (ret < 0) + goto out; + if (ret > 0) { + /* the extent was freed by someone */ + if (ref_path->lowest_level == level) + goto out; + btrfs_release_path(extent_root, path); + goto walk_down; + } + leaf = path->nodes[0]; + } + + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.objectid != bytenr || + found_key.type != BTRFS_EXTENT_REF_KEY) { + /* the extent was freed by someone */ + if (ref_path->lowest_level == level) { + ret = 1; + goto out; + } + btrfs_release_path(extent_root, path); + goto walk_down; + } +found: + ref = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_extent_ref); + ref_objectid = btrfs_ref_objectid(leaf, ref); + if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { + if (first_time) { + level = (int)ref_objectid; + BUG_ON(level >= BTRFS_MAX_LEVEL); + ref_path->lowest_level = level; + ref_path->current_level = level; + ref_path->nodes[level] = bytenr; + } else { + WARN_ON(ref_objectid != level); + } + } else { + WARN_ON(level != -1); + } + first_time = 0; + + if (ref_path->lowest_level == level) { + ref_path->owner_objectid = ref_objectid; + ref_path->owner_offset = btrfs_ref_offset(leaf, ref); + ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); + } + + /* + * the block is tree root or the block isn't in reference + * counted tree. + */ + if (found_key.objectid == found_key.offset || + is_cowonly_root(btrfs_ref_root(leaf, ref))) { + ref_path->root_objectid = btrfs_ref_root(leaf, ref); + ref_path->root_generation = + btrfs_ref_generation(leaf, ref); + if (level < 0) { + /* special reference from the tree log */ + ref_path->nodes[0] = found_key.offset; + ref_path->current_level = 0; + } ret = 0; goto out; } - ret = btrfs_search_root(root->fs_info->tree_root, - root_search_start, &root_search_start); - if (ret) - break; - root_location.objectid = root_search_start; - cur_root = btrfs_read_fs_root_no_name(root->fs_info, - &root_location); - if (!cur_root) { - ret = 1; - break; + level++; + BUG_ON(ref_path->nodes[level] != 0); + ref_path->nodes[level] = found_key.offset; + ref_path->current_level = level; + + /* + * the reference was created in the running transaction, + * no need to continue walking up. + */ + if (btrfs_ref_generation(leaf, ref) == trans->transid) { + ref_path->root_objectid = btrfs_ref_root(leaf, ref); + ref_path->root_generation = + btrfs_ref_generation(leaf, ref); + ret = 0; + goto out; + } + + btrfs_release_path(extent_root, path); + if (need_resched()) { + mutex_unlock(&extent_root->fs_info->alloc_mutex); + cond_resched(); + mutex_lock(&extent_root->fs_info->alloc_mutex); } } + /* reached max tree level, but no tree root found. */ + BUG(); out: - path->lowest_level = 0; + mutex_unlock(&extent_root->fs_info->alloc_mutex); + btrfs_free_path(path); return ret; } -/* - * note, this releases the path - */ -static int noinline relocate_one_reference(struct btrfs_root *extent_root, - struct btrfs_path *path, - struct btrfs_key *extent_key, - u64 *last_file_objectid, - u64 *last_file_offset, - u64 *last_file_root, - u64 last_extent) -{ - struct inode *inode; - struct btrfs_root *found_root; - struct btrfs_key root_location; +static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path, + u64 extent_start) +{ + memset(ref_path, 0, sizeof(*ref_path)); + ref_path->extent_start = extent_start; + + return __next_ref_path(trans, extent_root, ref_path, 1); +} + +static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, + struct btrfs_root *extent_root, + struct btrfs_ref_path *ref_path) +{ + return __next_ref_path(trans, extent_root, ref_path, 0); +} + +static int noinline get_new_locations(struct inode *reloc_inode, + struct btrfs_key *extent_key, + u64 offset, int no_fragment, + struct disk_extent **extents, + int *nr_extents) +{ + struct btrfs_root *root = BTRFS_I(reloc_inode)->root; + struct btrfs_path *path; + struct btrfs_file_extent_item *fi; + struct extent_buffer *leaf; + struct disk_extent *exts = *extents; struct btrfs_key found_key; - struct btrfs_extent_ref *ref; - u64 ref_root; - u64 ref_gen; - u64 ref_objectid; - u64 ref_offset; + u64 cur_pos; + u64 last_byte; + u32 nritems; + int nr = 0; + int max = *nr_extents; int ret; - int level; - WARN_ON(!mutex_is_locked(&extent_root->fs_info->alloc_mutex)); + WARN_ON(!no_fragment && *extents); + if (!exts) { + max = 1; + exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); + if (!exts) + return -ENOMEM; + } - ref = btrfs_item_ptr(path->nodes[0], path->slots[0], - struct btrfs_extent_ref); - ref_root = btrfs_ref_root(path->nodes[0], ref); - ref_gen = btrfs_ref_generation(path->nodes[0], ref); - ref_objectid = btrfs_ref_objectid(path->nodes[0], ref); - ref_offset = btrfs_ref_offset(path->nodes[0], ref); - btrfs_release_path(extent_root, path); + path = btrfs_alloc_path(); + BUG_ON(!path); - root_location.objectid = ref_root; - if (ref_gen == 0) - root_location.offset = 0; - else - root_location.offset = (u64)-1; - root_location.type = BTRFS_ROOT_ITEM_KEY; + cur_pos = extent_key->objectid - offset; + last_byte = extent_key->objectid + extent_key->offset; + ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, + cur_pos, 0); + if (ret < 0) + goto out; + if (ret > 0) { + ret = -ENOENT; + goto out; + } - found_root = btrfs_read_fs_root_no_name(extent_root->fs_info, - &root_location); - BUG_ON(!found_root); - mutex_unlock(&extent_root->fs_info->alloc_mutex); + while (1) { + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + if (path->slots[0] >= nritems) { + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + leaf = path->nodes[0]; + } - if (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID) { - found_key.objectid = ref_objectid; - found_key.type = BTRFS_EXTENT_DATA_KEY; - found_key.offset = ref_offset; - level = 0; + btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); + if (found_key.offset != cur_pos || + found_key.type != BTRFS_EXTENT_DATA_KEY || + found_key.objectid != reloc_inode->i_ino) + break; - if (last_extent == extent_key->objectid && - *last_file_objectid == ref_objectid && - *last_file_offset == ref_offset && - *last_file_root == ref_root) - goto out; + fi = btrfs_item_ptr(leaf, path->slots[0], + struct btrfs_file_extent_item); + if (btrfs_file_extent_type(leaf, fi) != + BTRFS_FILE_EXTENT_REG || + btrfs_file_extent_disk_bytenr(leaf, fi) == 0) + break; - ret = find_root_for_ref(extent_root, path, &found_key, - level, 1, &found_root, - extent_key->objectid); + if (nr == max) { + struct disk_extent *old = exts; + max *= 2; + exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); + memcpy(exts, old, sizeof(*exts) * nr); + if (old != *extents) + kfree(old); + } - if (ret) + exts[nr].disk_bytenr = + btrfs_file_extent_disk_bytenr(leaf, fi); + exts[nr].disk_num_bytes = + btrfs_file_extent_disk_num_bytes(leaf, fi); + exts[nr].offset = btrfs_file_extent_offset(leaf, fi); + exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); + WARN_ON(exts[nr].offset > 0); + WARN_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); + + cur_pos += exts[nr].num_bytes; + nr++; + + if (cur_pos + offset >= last_byte) + break; + + if (no_fragment) { + ret = 1; goto out; + } + path->slots[0]++; + } - if (last_extent == extent_key->objectid && - *last_file_objectid == ref_objectid && - *last_file_offset == ref_offset && - *last_file_root == ref_root) + WARN_ON(cur_pos + offset > last_byte); + if (cur_pos + offset < last_byte) { + ret = -ENOENT; + goto out; + } + ret = 0; +out: + btrfs_free_path(path); + if (ret) { + if (exts != *extents) + kfree(exts); + } else { + *extents = exts; + *nr_extents = nr; + } + return ret; +} + +static int noinline replace_one_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, + struct btrfs_path *path, + struct btrfs_key *extent_key, + struct btrfs_key *leaf_key, + struct btrfs_ref_path *ref_path, + struct disk_extent *new_extents, + int nr_extents) +{ + struct extent_buffer *leaf; + struct btrfs_file_extent_item *fi; + struct inode *inode = NULL; + struct btrfs_key key; + u64 lock_start = 0; + u64 lock_end = 0; + u64 num_bytes; + u64 ext_offset; + u64 first_pos; + u32 nritems; + int extent_locked = 0; + int ret; + + first_pos = ref_path->owner_offset; + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { + key.objectid = ref_path->owner_objectid; + key.offset = ref_path->owner_offset; + key.type = BTRFS_EXTENT_DATA_KEY; + } else { + memcpy(&key, leaf_key, sizeof(key)); + } + + while (1) { + ret = btrfs_search_slot(trans, root, &key, path, 0, 1); + if (ret < 0) goto out; - inode = btrfs_iget_locked(extent_root->fs_info->sb, - ref_objectid, found_root); - if (inode->i_state & I_NEW) { - /* the inode and parent dir are two different roots */ - BTRFS_I(inode)->root = found_root; - BTRFS_I(inode)->location.objectid = ref_objectid; - BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; - BTRFS_I(inode)->location.offset = 0; - btrfs_read_locked_inode(inode); - unlock_new_inode(inode); + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); +next: + if (extent_locked && ret > 0) { + /* + * the file extent item was modified by someone + * before the extent got locked. + */ + mutex_unlock(&BTRFS_I(inode)->extent_mutex); + unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, + lock_end, GFP_NOFS); + extent_locked = 0; + } + + if (path->slots[0] >= nritems) { + if (ref_path->owner_objectid == + BTRFS_MULTIPLE_OBJECTIDS) + break; + + BUG_ON(extent_locked); + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) + break; + leaf = path->nodes[0]; + nritems = btrfs_header_nritems(leaf); + } + + btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); + if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { + if ((key.objectid > ref_path->owner_objectid) || + (key.objectid == ref_path->owner_objectid && + key.type > BTRFS_EXTENT_DATA_KEY) || + (key.offset >= first_pos + extent_key->offset)) + break; } - /* this can happen if the reference is not against - * the latest version of the tree root - */ - if (is_bad_inode(inode)) - goto out; - *last_file_objectid = inode->i_ino; - *last_file_root = found_root->root_key.objectid; - *last_file_offset = ref_offset; + if (inode && key.objectid != inode->i_ino) { + BUG_ON(extent_locked); + btrfs_release_path(root, path); + mutex_unlock(&inode->i_mutex); + iput(inode); + inode = NULL; + continue; + } - relocate_inode_pages(inode, ref_offset, extent_key->offset); - iput(inode); - } else { - struct btrfs_trans_handle *trans; - struct extent_buffer *eb; - int needs_lock = 0; + if (key.type != BTRFS_EXTENT_DATA_KEY) { + path->slots[0]++; |