diff options
Diffstat (limited to 'fs/btrfs/relocation.c')
| -rw-r--r-- | fs/btrfs/relocation.c | 950 | 
1 files changed, 620 insertions, 330 deletions
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 045c9c2b2d7..65245a07275 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -30,6 +30,7 @@  #include "btrfs_inode.h"  #include "async-thread.h"  #include "free-space-cache.h" +#include "inode-map.h"  /*   * backref_node, mapping_node and tree_block start with this @@ -93,6 +94,7 @@ struct backref_edge {  #define LOWER	0  #define UPPER	1 +#define RELOCATION_RESERVED_NODES	256  struct backref_cache {  	/* red black tree of all backref nodes in the cache */ @@ -175,6 +177,8 @@ struct reloc_control {  	u64 merging_rsv_size;  	/* size of relocated tree nodes */  	u64 nodes_relocated; +	/* reserved size for block group relocation*/ +	u64 reserved_bytes;  	u64 search_start;  	u64 extents_found; @@ -183,7 +187,6 @@ struct reloc_control {  	unsigned int create_reloc_tree:1;  	unsigned int merge_reloc_tree:1;  	unsigned int found_file_extent:1; -	unsigned int commit_transaction:1;  };  /* stages of data relocation */ @@ -325,6 +328,18 @@ static struct rb_node *tree_search(struct rb_root *root, u64 bytenr)  	return NULL;  } +static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr) +{ + +	struct btrfs_fs_info *fs_info = NULL; +	struct backref_node *bnode = rb_entry(rb_node, struct backref_node, +					      rb_node); +	if (bnode->root) +		fs_info = bnode->root->fs_info; +	btrfs_panic(fs_info, errno, "Inconsistency in backref cache " +		    "found at offset %llu", bytenr); +} +  /*   * walk up backref nodes until reach node presents tree root   */ @@ -451,7 +466,8 @@ static void update_backref_node(struct backref_cache *cache,  	rb_erase(&node->rb_node, &cache->rb_root);  	node->bytenr = bytenr;  	rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); -	BUG_ON(rb_node); +	if (rb_node) +		backref_tree_panic(rb_node, -EEXIST, bytenr);  }  /* @@ -507,11 +523,12 @@ static int update_backref_cache(struct btrfs_trans_handle *trans,  	return 1;  } +  static int should_ignore_root(struct btrfs_root *root)  {  	struct btrfs_root *reloc_root; -	if (!root->ref_cows) +	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))  		return 0;  	reloc_root = root->reloc_root; @@ -529,7 +546,6 @@ static int should_ignore_root(struct btrfs_root *root)  	 */  	return 1;  } -  /*   * find reloc tree by address of tree root   */ @@ -557,7 +573,9 @@ static int is_cowonly_root(u64 root_objectid)  	    root_objectid == BTRFS_CHUNK_TREE_OBJECTID ||  	    root_objectid == BTRFS_DEV_TREE_OBJECTID ||  	    root_objectid == BTRFS_TREE_LOG_OBJECTID || -	    root_objectid == BTRFS_CSUM_TREE_OBJECTID) +	    root_objectid == BTRFS_CSUM_TREE_OBJECTID || +	    root_objectid == BTRFS_UUID_TREE_OBJECTID || +	    root_objectid == BTRFS_QUOTA_TREE_OBJECTID)  		return 1;  	return 0;  } @@ -574,7 +592,7 @@ static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info,  	else  		key.offset = (u64)-1; -	return btrfs_read_fs_root_no_name(fs_info, &key); +	return btrfs_get_fs_root(fs_info, &key, false);  }  #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 @@ -592,7 +610,7 @@ struct btrfs_root *find_tree_root(struct reloc_control *rc,  	root = read_fs_root(rc->extent_root->fs_info, root_objectid);  	BUG_ON(IS_ERR(root)); -	if (root->ref_cows && +	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) &&  	    generation != btrfs_root_generation(&root->root_item))  		return NULL; @@ -604,10 +622,13 @@ static noinline_for_stack  int find_inline_backref(struct extent_buffer *leaf, int slot,  			unsigned long *ptr, unsigned long *end)  { +	struct btrfs_key key;  	struct btrfs_extent_item *ei;  	struct btrfs_tree_block_info *bi;  	u32 item_size; +	btrfs_item_key_to_cpu(leaf, &key, slot); +  	item_size = btrfs_item_size_nr(leaf, slot);  #ifdef BTRFS_COMPAT_EXTENT_TREE_V0  	if (item_size < sizeof(*ei)) { @@ -619,13 +640,23 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,  	WARN_ON(!(btrfs_extent_flags(leaf, ei) &  		  BTRFS_EXTENT_FLAG_TREE_BLOCK)); -	if (item_size <= sizeof(*ei) + sizeof(*bi)) { +	if (key.type == BTRFS_EXTENT_ITEM_KEY && +	    item_size <= sizeof(*ei) + sizeof(*bi)) {  		WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));  		return 1;  	} +	if (key.type == BTRFS_METADATA_ITEM_KEY && +	    item_size <= sizeof(*ei)) { +		WARN_ON(item_size < sizeof(*ei)); +		return 1; +	} -	bi = (struct btrfs_tree_block_info *)(ei + 1); -	*ptr = (unsigned long)(bi + 1); +	if (key.type == BTRFS_EXTENT_ITEM_KEY) { +		bi = (struct btrfs_tree_block_info *)(ei + 1); +		*ptr = (unsigned long)(bi + 1); +	} else { +		*ptr = (unsigned long)(ei + 1); +	}  	*end = (unsigned long)ei + item_size;  	return 0;  } @@ -669,6 +700,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,  	int cowonly;  	int ret;  	int err = 0; +	bool need_check = true;  	path1 = btrfs_alloc_path();  	path2 = btrfs_alloc_path(); @@ -676,6 +708,8 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,  		err = -ENOMEM;  		goto out;  	} +	path1->reada = 1; +	path2->reada = 2;  	node = alloc_backref_node(cache);  	if (!node) { @@ -691,7 +725,7 @@ again:  	end = 0;  	ptr = 0;  	key.objectid = cur->bytenr; -	key.type = BTRFS_EXTENT_ITEM_KEY; +	key.type = BTRFS_METADATA_ITEM_KEY;  	key.offset = (u64)-1;  	path1->search_commit_root = 1; @@ -709,7 +743,7 @@ again:  	WARN_ON(cur->checked);  	if (!list_empty(&cur->upper)) {  		/* -		 * the backref was added previously when processsing +		 * the backref was added previously when processing  		 * backref of type BTRFS_TREE_BLOCK_REF_KEY  		 */  		BUG_ON(!list_is_singular(&cur->upper)); @@ -749,7 +783,8 @@ again:  				break;  			} -			if (key.type == BTRFS_EXTENT_ITEM_KEY) { +			if (key.type == BTRFS_EXTENT_ITEM_KEY || +			    key.type == BTRFS_METADATA_ITEM_KEY) {  				ret = find_inline_backref(eb, path1->slots[0],  							  &ptr, &end);  				if (ret) @@ -852,7 +887,7 @@ again:  			goto out;  		} -		if (!root->ref_cows) +		if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))  			cur->cowonly = 1;  		if (btrfs_root_level(&root->root_item) == cur->level) { @@ -889,6 +924,7 @@ again:  			cur->bytenr);  		lower = cur; +		need_check = true;  		for (; level < BTRFS_MAX_LEVEL; level++) {  			if (!path2->nodes[level]) {  				BUG_ON(btrfs_root_bytenr(&root->root_item) != @@ -918,7 +954,8 @@ again:  				upper->bytenr = eb->start;  				upper->owner = btrfs_header_owner(eb);  				upper->level = lower->level + 1; -				if (!root->ref_cows) +				if (!test_bit(BTRFS_ROOT_REF_COWS, +					      &root->state))  					upper->cowonly = 1;  				/* @@ -932,14 +969,12 @@ again:  				/*  				 * add the block to pending list if we -				 * need check its backrefs. only block -				 * at 'cur->level + 1' is added to the -				 * tail of pending list. this guarantees -				 * we check backrefs from lower level -				 * blocks to upper level blocks. +				 * need check its backrefs, we only do this once +				 * while walking up a tree as we will catch +				 * anything else later on.  				 */ -				if (!upper->checked && -				    level == cur->level + 1) { +				if (!upper->checked && need_check) { +					need_check = false;  					list_add_tail(&edge->list[UPPER],  						      &list);  				} else @@ -961,7 +996,7 @@ again:  			lower = upper;  			upper = NULL;  		} -		btrfs_release_path(root, path2); +		btrfs_release_path(path2);  next:  		if (ptr < end) {  			ptr += btrfs_extent_inline_ref_size(key.type); @@ -974,7 +1009,7 @@ next:  		if (ptr >= end)  			path1->slots[0]++;  	} -	btrfs_release_path(rc->extent_root, path1); +	btrfs_release_path(path1);  	cur->checked = 1;  	WARN_ON(exist); @@ -996,7 +1031,8 @@ next:  	if (!cowonly) {  		rb_node = tree_insert(&cache->rb_root, node->bytenr,  				      &node->rb_node); -		BUG_ON(rb_node); +		if (rb_node) +			backref_tree_panic(rb_node, -EEXIST, node->bytenr);  		list_add_tail(&node->lower, &cache->leaves);  	} @@ -1031,7 +1067,9 @@ next:  		if (!cowonly) {  			rb_node = tree_insert(&cache->rb_root, upper->bytenr,  					      &upper->rb_node); -			BUG_ON(rb_node); +			if (rb_node) +				backref_tree_panic(rb_node, -EEXIST, +						   upper->bytenr);  		}  		list_add_tail(&edge->list[UPPER], &upper->lower); @@ -1157,6 +1195,7 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,  	new_node->bytenr = dest->node->start;  	new_node->level = node->level;  	new_node->lowest = node->lowest; +	new_node->checked = 1;  	new_node->root = dest;  	if (!node->lowest) { @@ -1170,11 +1209,14 @@ static int clone_backref_node(struct btrfs_trans_handle *trans,  			list_add_tail(&new_edge->list[UPPER],  				      &new_node->lower);  		} +	} else { +		list_add_tail(&new_node->lower, &cache->leaves);  	}  	rb_node = tree_insert(&cache->rb_root, new_node->bytenr,  			      &new_node->rb_node); -	BUG_ON(rb_node); +	if (rb_node) +		backref_tree_panic(rb_node, -EEXIST, new_node->bytenr);  	if (!new_node->lowest) {  		list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { @@ -1197,14 +1239,15 @@ fail:  /*   * helper to add 'address of tree root -> reloc tree' mapping   */ -static int __add_reloc_root(struct btrfs_root *root) +static int __must_check __add_reloc_root(struct btrfs_root *root)  {  	struct rb_node *rb_node;  	struct mapping_node *node;  	struct reloc_control *rc = root->fs_info->reloc_ctl;  	node = kmalloc(sizeof(*node), GFP_NOFS); -	BUG_ON(!node); +	if (!node) +		return -ENOMEM;  	node->bytenr = root->node->start;  	node->data = root; @@ -1213,17 +1256,23 @@ static int __add_reloc_root(struct btrfs_root *root)  	rb_node = tree_insert(&rc->reloc_root_tree.rb_root,  			      node->bytenr, &node->rb_node);  	spin_unlock(&rc->reloc_root_tree.lock); -	BUG_ON(rb_node); +	if (rb_node) { +		btrfs_panic(root->fs_info, -EEXIST, "Duplicate root found " +			    "for start=%llu while inserting into relocation " +			    "tree", node->bytenr); +		kfree(node); +		return -EEXIST; +	}  	list_add_tail(&root->root_list, &rc->reloc_roots);  	return 0;  }  /* - * helper to update/delete the 'address of tree root -> reloc tree' + * helper to delete the 'address of tree root -> reloc tree'   * mapping   */ -static int __update_reloc_root(struct btrfs_root *root, int del) +static void __del_reloc_root(struct btrfs_root *root)  {  	struct rb_node *rb_node;  	struct mapping_node *node = NULL; @@ -1231,26 +1280,53 @@ static int __update_reloc_root(struct btrfs_root *root, int del)  	spin_lock(&rc->reloc_root_tree.lock);  	rb_node = tree_search(&rc->reloc_root_tree.rb_root, -			      root->commit_root->start); +			      root->node->start);  	if (rb_node) {  		node = rb_entry(rb_node, struct mapping_node, rb_node);  		rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);  	}  	spin_unlock(&rc->reloc_root_tree.lock); +	if (!node) +		return;  	BUG_ON((struct btrfs_root *)node->data != root); -	if (!del) { -		spin_lock(&rc->reloc_root_tree.lock); -		node->bytenr = root->node->start; -		rb_node = tree_insert(&rc->reloc_root_tree.rb_root, -				      node->bytenr, &node->rb_node); -		spin_unlock(&rc->reloc_root_tree.lock); -		BUG_ON(rb_node); -	} else { -		list_del_init(&root->root_list); -		kfree(node); +	spin_lock(&root->fs_info->trans_lock); +	list_del_init(&root->root_list); +	spin_unlock(&root->fs_info->trans_lock); +	kfree(node); +} + +/* + * helper to update the 'address of tree root -> reloc tree' + * mapping + */ +static int __update_reloc_root(struct btrfs_root *root, u64 new_bytenr) +{ +	struct rb_node *rb_node; +	struct mapping_node *node = NULL; +	struct reloc_control *rc = root->fs_info->reloc_ctl; + +	spin_lock(&rc->reloc_root_tree.lock); +	rb_node = tree_search(&rc->reloc_root_tree.rb_root, +			      root->node->start); +	if (rb_node) { +		node = rb_entry(rb_node, struct mapping_node, rb_node); +		rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root);  	} +	spin_unlock(&rc->reloc_root_tree.lock); + +	if (!node) +		return 0; +	BUG_ON((struct btrfs_root *)node->data != root); + +	spin_lock(&rc->reloc_root_tree.lock); +	node->bytenr = new_bytenr; +	rb_node = tree_insert(&rc->reloc_root_tree.rb_root, +			      node->bytenr, &node->rb_node); +	spin_unlock(&rc->reloc_root_tree.lock); +	if (rb_node) +		backref_tree_panic(rb_node, -EEXIST, node->bytenr);  	return 0;  } @@ -1261,6 +1337,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,  	struct extent_buffer *eb;  	struct btrfs_root_item *root_item;  	struct btrfs_key root_key; +	u64 last_snap = 0;  	int ret;  	root_item = kmalloc(sizeof(*root_item), GFP_NOFS); @@ -1276,6 +1353,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,  				      BTRFS_TREE_RELOC_OBJECTID);  		BUG_ON(ret); +		last_snap = btrfs_root_last_snapshot(&root->root_item);  		btrfs_set_root_last_snapshot(&root->root_item,  					     trans->transid - 1);  	} else { @@ -1301,6 +1379,12 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,  		memset(&root_item->drop_progress, 0,  		       sizeof(struct btrfs_disk_key));  		root_item->drop_level = 0; +		/* +		 * abuse rtransid, it is safe because it is impossible to +		 * receive data into a relocation tree. +		 */ +		btrfs_set_root_rtransid(root_item, last_snap); +		btrfs_set_root_otransid(root_item, trans->transid);  	}  	btrfs_tree_unlock(eb); @@ -1311,8 +1395,7 @@ static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans,  	BUG_ON(ret);  	kfree(root_item); -	reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, -						 &root_key); +	reloc_root = btrfs_read_fs_root(root->fs_info->tree_root, &root_key);  	BUG_ON(IS_ERR(reloc_root));  	reloc_root->last_trans = trans->transid;  	return reloc_root; @@ -1327,7 +1410,9 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,  {  	struct btrfs_root *reloc_root;  	struct reloc_control *rc = root->fs_info->reloc_ctl; +	struct btrfs_block_rsv *rsv;  	int clear_rsv = 0; +	int ret;  	if (root->reloc_root) {  		reloc_root = root->reloc_root; @@ -1339,15 +1424,17 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans,  	    root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID)  		return 0; -	if (!trans->block_rsv) { +	if (!trans->reloc_reserved) { +		rsv = trans->block_rsv;  		trans->block_rsv = rc->block_rsv;  		clear_rsv = 1;  	}  	reloc_root = create_reloc_root(trans, root, root->root_key.objectid);  	if (clear_rsv) -		trans->block_rsv = NULL; +		trans->block_rsv = rsv; -	__add_reloc_root(reloc_root); +	ret = __add_reloc_root(reloc_root); +	BUG_ON(ret < 0);  	root->reloc_root = reloc_root;  	return 0;  } @@ -1360,11 +1447,10 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,  {  	struct btrfs_root *reloc_root;  	struct btrfs_root_item *root_item; -	int del = 0;  	int ret;  	if (!root->reloc_root) -		return 0; +		goto out;  	reloc_root = root->reloc_root;  	root_item = &reloc_root->root_item; @@ -1372,11 +1458,9 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,  	if (root->fs_info->reloc_ctl->merge_reloc_tree &&  	    btrfs_root_refs(root_item) == 0) {  		root->reloc_root = NULL; -		del = 1; +		__del_reloc_root(reloc_root);  	} -	__update_reloc_root(reloc_root, del); -  	if (reloc_root->commit_root != reloc_root->node) {  		btrfs_set_root_node(root_item, reloc_root->node);  		free_extent_buffer(reloc_root->commit_root); @@ -1386,6 +1470,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans,  	ret = btrfs_update_root(trans, root->fs_info->tree_root,  				&reloc_root->root_key, root_item);  	BUG_ON(ret); + +out:  	return 0;  } @@ -1408,9 +1494,9 @@ again:  		prev = node;  		entry = rb_entry(node, struct btrfs_inode, rb_node); -		if (objectid < entry->vfs_inode.i_ino) +		if (objectid < btrfs_ino(&entry->vfs_inode))  			node = node->rb_left; -		else if (objectid > entry->vfs_inode.i_ino) +		else if (objectid > btrfs_ino(&entry->vfs_inode))  			node = node->rb_right;  		else  			break; @@ -1418,7 +1504,7 @@ again:  	if (!node) {  		while (prev) {  			entry = rb_entry(prev, struct btrfs_inode, rb_node); -			if (objectid <= entry->vfs_inode.i_ino) { +			if (objectid <= btrfs_ino(&entry->vfs_inode)) {  				node = prev;  				break;  			} @@ -1433,7 +1519,7 @@ again:  			return inode;  		} -		objectid = entry->vfs_inode.i_ino + 1; +		objectid = btrfs_ino(&entry->vfs_inode) + 1;  		if (cond_resched_lock(&root->inode_lock))  			goto again; @@ -1469,7 +1555,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,  		return -ENOMEM;  	bytenr -= BTRFS_I(reloc_inode)->index_cnt; -	ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, +	ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(reloc_inode),  				       bytenr, 0);  	if (ret < 0)  		goto out; @@ -1488,7 +1574,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr,  	       btrfs_file_extent_other_encoding(leaf, fi));  	if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { -		ret = 1; +		ret = -EINVAL;  		goto out;  	} @@ -1519,7 +1605,7 @@ int replace_file_extents(struct btrfs_trans_handle *trans,  	u64 end;  	u32 nritems;  	u32 i; -	int ret; +	int ret = 0;  	int first = 1;  	int dirty = 0; @@ -1557,11 +1643,11 @@ int replace_file_extents(struct btrfs_trans_handle *trans,  			if (first) {  				inode = find_next_inode(root, key.objectid);  				first = 0; -			} else if (inode && inode->i_ino < key.objectid) { +			} else if (inode && btrfs_ino(inode) < key.objectid) {  				btrfs_add_delayed_iput(inode);  				inode = find_next_inode(root, key.objectid);  			} -			if (inode && inode->i_ino == key.objectid) { +			if (inode && btrfs_ino(inode) == key.objectid) {  				end = key.offset +  				      btrfs_file_extent_num_bytes(leaf, fi);  				WARN_ON(!IS_ALIGNED(key.offset, @@ -1569,25 +1655,26 @@ int replace_file_extents(struct btrfs_trans_handle *trans,  				WARN_ON(!IS_ALIGNED(end, root->sectorsize));  				end--;  				ret = try_lock_extent(&BTRFS_I(inode)->io_tree, -						      key.offset, end, -						      GFP_NOFS); +						      key.offset, end);  				if (!ret)  					continue;  				btrfs_drop_extent_cache(inode, key.offset, end,  							1);  				unlock_extent(&BTRFS_I(inode)->io_tree, -					      key.offset, end, GFP_NOFS); +					      key.offset, end);  			}  		}  		ret = get_new_location(rc->data_inode, &new_bytenr,  				       bytenr, num_bytes); -		if (ret > 0) { -			WARN_ON(1); -			continue; +		if (ret) { +			/* +			 * Don't have to abort since we've not changed anything +			 * in the file extent yet. +			 */ +			break;  		} -		BUG_ON(ret < 0);  		btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr);  		dirty = 1; @@ -1596,19 +1683,25 @@ int replace_file_extents(struct btrfs_trans_handle *trans,  		ret = btrfs_inc_extent_ref(trans, root, new_bytenr,  					   num_bytes, parent,  					   btrfs_header_owner(leaf), -					   key.objectid, key.offset); -		BUG_ON(ret); +					   key.objectid, key.offset, 1); +		if (ret) { +			btrfs_abort_transaction(trans, root, ret); +			break; +		}  		ret = btrfs_free_extent(trans, root, bytenr, num_bytes,  					parent, btrfs_header_owner(leaf), -					key.objectid, key.offset); -		BUG_ON(ret); +					key.objectid, key.offset, 1); +		if (ret) { +			btrfs_abort_transaction(trans, root, ret); +			break; +		}  	}  	if (dirty)  		btrfs_mark_buffer_dirty(leaf);  	if (inode)  		btrfs_add_delayed_iput(inode); -	return 0; +	return ret;  }  static noinline_for_stack @@ -1708,8 +1801,7 @@ again:  			new_ptr_gen = 0;  		} -		if (new_bytenr > 0 && new_bytenr == old_bytenr) { -			WARN_ON(1); +		if (WARN_ON(new_bytenr > 0 && new_bytenr == old_bytenr)) {  			ret = level;  			break;  		} @@ -1723,6 +1815,11 @@ again:  			eb = read_tree_block(dest, old_bytenr, blocksize,  					     old_ptr_gen); +			if (!eb || !extent_buffer_uptodate(eb)) { +				ret = (!eb) ? -ENOMEM : -EIO; +				free_extent_buffer(eb); +				break; +			}  			btrfs_tree_lock(eb);  			if (cow) {  				ret = btrfs_cow_block(trans, dest, eb, parent, @@ -1747,7 +1844,7 @@ again:  		btrfs_node_key_to_cpu(path->nodes[level], &key,  				      path->slots[level]); -		btrfs_release_path(src, path); +		btrfs_release_path(path);  		path->lowest_level = level;  		ret = btrfs_search_slot(trans, src, &key, path, 0, 1); @@ -1769,21 +1866,23 @@ again:  		ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize,  					path->nodes[level]->start, -					src->root_key.objectid, level - 1, 0); +					src->root_key.objectid, level - 1, 0, +					1);  		BUG_ON(ret);  		ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize,  					0, dest->root_key.objectid, level - 1, -					0); +					0, 1);  		BUG_ON(ret);  		ret = btrfs_free_extent(trans, src, new_bytenr, blocksize,  					path->nodes[level]->start, -					src->root_key.objectid, level - 1, 0); +					src->root_key.objectid, level - 1, 0, +					1);  		BUG_ON(ret);  		ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize,  					0, dest->root_key.objectid, level - 1, -					0); +					0, 1);  		BUG_ON(ret);  		btrfs_unlock_up_safe(path, 0); @@ -1873,6 +1972,10 @@ int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path,  		bytenr = btrfs_node_blockptr(eb, path->slots[i]);  		blocksize = btrfs_level_size(root, i - 1);  		eb = read_tree_block(root, bytenr, blocksize, ptr_gen); +		if (!eb || !extent_buffer_uptodate(eb)) { +			free_extent_buffer(eb); +			return -EIO; +		}  		BUG_ON(btrfs_header_level(eb) != i - 1);  		path->nodes[i - 1] = eb;  		path->slots[i - 1] = 0; @@ -1891,6 +1994,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,  	struct inode *inode = NULL;  	u64 objectid;  	u64 start, end; +	u64 ino;  	objectid = min_key->objectid;  	while (1) { @@ -1903,17 +2007,18 @@ static int invalidate_extent_cache(struct btrfs_root *root,  		inode = find_next_inode(root, objectid);  		if (!inode)  			break; +		ino = btrfs_ino(inode); -		if (inode->i_ino > max_key->objectid) { +		if (ino > max_key->objectid) {  			iput(inode);  			break;  		} -		objectid = inode->i_ino + 1; +		objectid = ino + 1;  		if (!S_ISREG(inode->i_mode))  			continue; -		if (unlikely(min_key->objectid == inode->i_ino)) { +		if (unlikely(min_key->objectid == ino)) {  			if (min_key->type > BTRFS_EXTENT_DATA_KEY)  				continue;  			if (min_key->type < BTRFS_EXTENT_DATA_KEY) @@ -1926,7 +2031,7 @@ static int invalidate_extent_cache(struct btrfs_root *root,  			start = 0;  		} -		if (unlikely(max_key->objectid == inode->i_ino)) { +		if (unlikely(max_key->objectid == ino)) {  			if (max_key->type < BTRFS_EXTENT_DATA_KEY)  				continue;  			if (max_key->type > BTRFS_EXTENT_DATA_KEY) { @@ -1943,9 +2048,9 @@ static int invalidate_extent_cache(struct btrfs_root *root,  		}  		/* the lock_extent waits for readpage to complete */ -		lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); +		lock_extent(&BTRFS_I(inode)->io_tree, start, end);  		btrfs_drop_extent_cache(inode, start, end, 1); -		unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); +		unlock_extent(&BTRFS_I(inode)->io_tree, start, end);  	}  	return 0;  } @@ -1978,12 +2083,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,  	LIST_HEAD(inode_list);  	struct btrfs_key key;  	struct btrfs_key next_key; -	struct btrfs_trans_handle *trans; +	struct btrfs_trans_handle *trans = NULL;  	struct btrfs_root *reloc_root;  	struct btrfs_root_item *root_item;  	struct btrfs_path *path;  	struct extent_buffer *leaf; -	unsigned long nr;  	int level;  	int max_level;  	int replaced = 0; @@ -1994,6 +2098,7 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = 1;  	reloc_root = root->reloc_root;  	root_item = &reloc_root->root_item; @@ -2027,17 +2132,19 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,  	memset(&next_key, 0, sizeof(next_key));  	while (1) { -		trans = btrfs_start_transaction(root, 0); -		trans->block_rsv = rc->block_rsv; - -		ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, -					    min_reserved, 0); +		ret = btrfs_block_rsv_refill(root, rc->block_rsv, min_reserved, +					     BTRFS_RESERVE_FLUSH_ALL);  		if (ret) { -			BUG_ON(ret != -EAGAIN); -			ret = btrfs_commit_transaction(trans, root); -			BUG_ON(ret); -			continue; +			err = ret; +			goto out;  		} +		trans = btrfs_start_transaction(root, 0); +		if (IS_ERR(trans)) { +			err = PTR_ERR(trans); +			trans = NULL; +			goto out; +		} +		trans->block_rsv = rc->block_rsv;  		replaced = 0;  		max_level = level; @@ -2082,10 +2189,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,  			       path->slots[level]);  		root_item->drop_level = level; -		nr = trans->blocks_used;  		btrfs_end_transaction_throttle(trans, root); +		trans = NULL; -		btrfs_btree_balance_dirty(root, nr); +		btrfs_btree_balance_dirty(root);  		if (replaced && rc->stage == UPDATE_DATA_PTRS)  			invalidate_extent_cache(root, &key, &next_key); @@ -2112,10 +2219,10 @@ out:  		btrfs_update_reloc_root(trans, root);  	} -	nr = trans->blocks_used; -	btrfs_end_transaction_throttle(trans, root); +	if (trans) +		btrfs_end_transaction_throttle(trans, root); -	btrfs_btree_balance_dirty(root, nr); +	btrfs_btree_balance_dirty(root);  	if (replaced && rc->stage == UPDATE_DATA_PTRS)  		invalidate_extent_cache(root, &key, &next_key); @@ -2133,20 +2240,27 @@ int prepare_to_merge(struct reloc_control *rc, int err)  	u64 num_bytes = 0;  	int ret; -	mutex_lock(&root->fs_info->trans_mutex); +	mutex_lock(&root->fs_info->reloc_mutex);  	rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;  	rc->merging_rsv_size += rc->nodes_relocated * 2; -	mutex_unlock(&root->fs_info->trans_mutex); +	mutex_unlock(&root->fs_info->reloc_mutex); +  again:  	if (!err) {  		num_bytes = rc->merging_rsv_size; -		ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, -					  num_bytes); +		ret = btrfs_block_rsv_add(root, rc->block_rsv, num_bytes, +					  BTRFS_RESERVE_FLUSH_ALL);  		if (ret)  			err = ret;  	} -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root); +	if (IS_ERR(trans)) { +		if (!err) +			btrfs_block_rsv_release(rc->extent_root, +						rc->block_rsv, num_bytes); +		return PTR_ERR(trans); +	}  	if (!err) {  		if (num_bytes != rc->merging_rsv_size) { @@ -2190,18 +2304,40 @@ again:  }  static noinline_for_stack +void free_reloc_roots(struct list_head *list) +{ +	struct btrfs_root *reloc_root; + +	while (!list_empty(list)) { +		reloc_root = list_entry(list->next, struct btrfs_root, +					root_list); +		__del_reloc_root(reloc_root); +	} +} + +static noinline_for_stack  int merge_reloc_roots(struct reloc_control *rc)  {  	struct btrfs_root *root;  	struct btrfs_root *reloc_root; +	u64 last_snap; +	u64 otransid; +	u64 objectid;  	LIST_HEAD(reloc_roots);  	int found = 0; -	int ret; +	int ret = 0;  again:  	root = rc->extent_root; -	mutex_lock(&root->fs_info->trans_mutex); + +	/* +	 * this serializes us with btrfs_record_root_in_transaction, +	 * we have to make sure nobody is in the middle of +	 * adding their roots to the list while we are +	 * doing this splice +	 */ +	mutex_lock(&root->fs_info->reloc_mutex);  	list_splice_init(&rc->reloc_roots, &reloc_roots); -	mutex_unlock(&root->fs_info->trans_mutex); +	mutex_unlock(&root->fs_info->reloc_mutex);  	while (!list_empty(&reloc_roots)) {  		found = 1; @@ -2215,19 +2351,53 @@ again:  			BUG_ON(root->reloc_root != reloc_root);  			ret = merge_reloc_root(rc, root); -			BUG_ON(ret); +			if (ret) { +				if (list_empty(&reloc_root->root_list)) +					list_add_tail(&reloc_root->root_list, +						      &reloc_roots); +				goto out; +			}  		} else {  			list_del_init(&reloc_root->root_list);  		} -		btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); + +		/* +		 * we keep the old last snapshod transid in rtranid when we +		 * created the relocation tree. +		 */ +		last_snap = btrfs_root_rtransid(&reloc_root->root_item); +		otransid = btrfs_root_otransid(&reloc_root->root_item); +		objectid = reloc_root->root_key.offset; + +		ret = btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0, 1); +		if (ret < 0) { +			if (list_empty(&reloc_root->root_list)) +				list_add_tail(&reloc_root->root_list, +					      &reloc_roots); +			goto out; +		}  	}  	if (found) {  		found = 0;  		goto again;  	} +out: +	if (ret) { +		btrfs_std_error(root->fs_info, ret); +		if (!list_empty(&reloc_roots)) +			free_reloc_roots(&reloc_roots); + +		/* new reloc root may be added */ +		mutex_lock(&root->fs_info->reloc_mutex); +		list_splice_init(&rc->reloc_roots, &reloc_roots); +		mutex_unlock(&root->fs_info->reloc_mutex); +		if (!list_empty(&reloc_roots)) +			free_reloc_roots(&reloc_roots); +	} +  	BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); -	return 0; +	return ret;  }  static void free_block_list(struct rb_root *blocks) @@ -2260,7 +2430,7 @@ static noinline_for_stack  struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,  				     struct reloc_control *rc,  				     struct backref_node *node, -				     struct backref_edge *edges[], int *nr) +				     struct backref_edge *edges[])  {  	struct backref_node *next;  	struct btrfs_root *root; @@ -2272,7 +2442,7 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,  		next = walk_up_backref(next, edges, &index);  		root = next->root;  		BUG_ON(!root); -		BUG_ON(!root->ref_cows); +		BUG_ON(!test_bit(BTRFS_ROOT_REF_COWS, &root->state));  		if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {  			record_reloc_root_in_trans(trans, root); @@ -2302,7 +2472,6 @@ struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,  	if (!root)  		return NULL; -	*nr = index;  	next = node;  	/* setup backref node path for btrfs_reloc_cow_block */  	while (1) { @@ -2337,8 +2506,8 @@ struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans,  		root = next->root;  		BUG_ON(!root); -		/* no other choice for non-refernce counted tree */ -		if (!root->ref_cows) +		/* no other choice for non-references counted tree */ +		if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))  			return root;  		if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) @@ -2398,27 +2567,36 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,  	struct btrfs_root *root = rc->extent_root;  	u64 num_bytes;  	int ret; +	u64 tmp;  	num_bytes = calcu_metadata_size(rc, node, 1) * 2;  	trans->block_rsv = rc->block_rsv; -	ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); +	rc->reserved_bytes += num_bytes; +	ret = btrfs_block_rsv_refill(root, rc->block_rsv, num_bytes, +				BTRFS_RESERVE_FLUSH_ALL);  	if (ret) { -		if (ret == -EAGAIN) -			rc->commit_transaction = 1; +		if (ret == -EAGAIN) { +			tmp = rc->extent_root->nodesize * +				RELOCATION_RESERVED_NODES; +			while (tmp <= rc->reserved_bytes) +				tmp <<= 1; +			/* +			 * only one thread can access block_rsv at this point, +			 * so we don't need hold lock to protect block_rsv. +			 * we expand more reservation size here to allow enough +			 * space for relocation and we will return eailer in +			 * enospc case. +			 */ +			rc->block_rsv->size = tmp + rc->extent_root->nodesize * +					      RELOCATION_RESERVED_NODES; +		}  		return ret;  	}  	return 0;  } -static void release_metadata_space(struct reloc_control *rc, -				   struct backref_node *node) -{ -	u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; -	btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); -} -  /*   * relocate a block tree, and then update pointers in upper level   * blocks that reference the block to point to the new location. @@ -2440,7 +2618,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,  	u32 blocksize;  	u64 bytenr;  	u64 generation; -	int nr;  	int slot;  	int ret;  	int err = 0; @@ -2453,7 +2630,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,  		cond_resched();  		upper = edge->node[UPPER]; -		root = select_reloc_root(trans, rc, upper, edges, &nr); +		root = select_reloc_root(trans, rc, upper, edges);  		BUG_ON(!root);  		if (upper->eb && !upper->locked) { @@ -2487,7 +2664,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,  			path->locks[upper->level] = 0;  			slot = path->slots[upper->level]; -			btrfs_release_path(NULL, path); +			btrfs_release_path(path);  		} else {  			ret = btrfs_bin_search(upper->eb, key, upper->level,  					       &slot); @@ -2505,6 +2682,11 @@ static int do_relocation(struct btrfs_trans_handle *trans,  		blocksize = btrfs_level_size(root, node->level);  		generation = btrfs_node_ptr_generation(upper->eb, slot);  		eb = read_tree_block(root, bytenr, blocksize, generation); +		if (!eb || !extent_buffer_uptodate(eb)) { +			free_extent_buffer(eb); +			err = -EIO; +			goto next; +		}  		btrfs_tree_lock(eb);  		btrfs_set_lock_blocking(eb); @@ -2529,7 +2711,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,  						node->eb->start, blocksize,  						upper->eb->start,  						btrfs_header_owner(upper->eb), -						node->level, 0); +						node->level, 0, 1);  			BUG_ON(ret);  			ret = btrfs_drop_subtree(trans, root, eb, upper->eb); @@ -2662,6 +2844,10 @@ static int get_tree_block_key(struct reloc_control *rc,  	BUG_ON(block->key_ready);  	eb = read_tree_block(rc->extent_root, block->bytenr,  			     block->key.objectid, block->key.offset); +	if (!eb || !extent_buffer_uptodate(eb)) { +		free_extent_buffer(eb); +		return -EIO; +	}  	WARN_ON(btrfs_header_level(eb) != block->level);  	if (block->level == 0)  		btrfs_item_key_to_cpu(eb, &block->key, 0); @@ -2676,8 +2862,13 @@ static int reada_tree_block(struct reloc_control *rc,  			    struct tree_block *block)  {  	BUG_ON(block->key_ready); -	readahead_tree_block(rc->extent_root, block->bytenr, -			     block->key.objectid, block->key.offset); +	if (block->key.type == BTRFS_METADATA_ITEM_KEY) +		readahead_tree_block(rc->extent_root, block->bytenr, +				     block->key.objectid, +				     rc->extent_root->leafsize); +	else +		readahead_tree_block(rc->extent_root, block->bytenr, +				     block->key.objectid, block->key.offset);  	return 0;  } @@ -2691,7 +2882,6 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,  				struct btrfs_path *path)  {  	struct btrfs_root *root; -	int release = 0;  	int ret = 0;  	if (!node) @@ -2704,15 +2894,14 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,  		goto out;  	} -	if (!root || root->ref_cows) { +	if (!root || test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {  		ret = reserve_metadata_space(trans, rc, node);  		if (ret)  			goto out; -		release = 1;  	}  	if (root) { -		if (root->ref_cows) { +		if (test_bit(BTRFS_ROOT_REF_COWS, &root->state)) {  			BUG_ON(node->new_bytenr);  			BUG_ON(!list_empty(&node->list));  			btrfs_record_root_in_trans(trans, root); @@ -2723,7 +2912,7 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,  		} else {  			path->lowest_level = node->level;  			ret = btrfs_search_slot(trans, root, key, path, 0, 1); -			btrfs_release_path(root, path); +			btrfs_release_path(path);  			if (ret > 0)  				ret = 0;  		} @@ -2733,11 +2922,8 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans,  		ret = do_relocation(trans, rc, node, key, path, 1);  	}  out: -	if (ret || node->level == 0 || node->cowonly) { -		if (release) -			release_metadata_space(rc, node); +	if (ret || node->level == 0 || node->cowonly)  		remove_backref_node(&rc->backref_cache, node); -	}  	return ret;  } @@ -2756,8 +2942,10 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,  	int err = 0;  	path = btrfs_alloc_path(); -	if (!path) -		return -ENOMEM; +	if (!path) { +		err = -ENOMEM; +		goto out_free_blocks; +	}  	rb_node = rb_first(blocks);  	while (rb_node) { @@ -2770,8 +2958,11 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,  	rb_node = rb_first(blocks);  	while (rb_node) {  		block = rb_entry(rb_node, struct tree_block, rb_node); -		if (!block->key_ready) -			get_tree_block_key(rc, block); +		if (!block->key_ready) { +			err = get_tree_block_key(rc, block); +			if (err) +				goto out_free_path; +		}  		rb_node = rb_next(rb_node);  	} @@ -2796,10 +2987,12 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans,  		rb_node = rb_next(rb_node);  	}  out: -	free_block_list(blocks);  	err = finish_pending_nodes(trans, rc, path, err); +out_free_path:  	btrfs_free_path(path); +out_free_blocks: +	free_block_list(blocks);  	return err;  } @@ -2830,12 +3023,12 @@ int prealloc_file_extent_cluster(struct inode *inode,  		else  			end = cluster->end - offset; -		lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); +		lock_extent(&BTRFS_I(inode)->io_tree, start, end);  		num_bytes = end + 1 - start;  		ret = btrfs_prealloc_file_range(inode, 0, start,  						num_bytes, num_bytes,  						end + 1, &alloc_hint); -		unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); +		unlock_extent(&BTRFS_I(inode)->io_tree, start, end);  		if (ret)  			break;  		nr++; @@ -2856,7 +3049,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,  	struct extent_map *em;  	int ret = 0; -	em = alloc_extent_map(GFP_NOFS); +	em = alloc_extent_map();  	if (!em)  		return -ENOMEM; @@ -2867,10 +3060,10 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,  	em->bdev = root->fs_info->fs_devices->latest_bdev;  	set_bit(EXTENT_FLAG_PINNED, &em->flags); -	lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); +	lock_extent(&BTRFS_I(inode)->io_tree, start, end);  	while (1) {  		write_lock(&em_tree->lock); -		ret = add_extent_mapping(em_tree, em); +		ret = add_extent_mapping(em_tree, em, 0);  		write_unlock(&em_tree->lock);  		if (ret != -EEXIST) {  			free_extent_map(em); @@ -2878,7 +3071,7 @@ int setup_extent_mapping(struct inode *inode, u64 start, u64 end,  		}  		btrfs_drop_extent_cache(inode, start, end, 0);  	} -	unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); +	unlock_extent(&BTRFS_I(inode)->io_tree, start, end);  	return ret;  } @@ -2892,6 +3085,7 @@ static int relocate_file_extent_cluster(struct inode *inode,  	unsigned long last_index;  	struct page *page;  	struct file_ra_state *ra; +	gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);  	int nr = 0;  	int ret = 0; @@ -2925,7 +3119,8 @@ static int relocate_file_extent_cluster(struct inode *inode,  			page_cache_sync_readahead(inode->i_mapping,  						  ra, NULL, index,  						  last_index + 1 - index); -			page = grab_cache_page(inode->i_mapping, index); +			page = find_or_create_page(inode->i_mapping, index, +						   mask);  			if (!page) {  				btrfs_delalloc_release_metadata(inode,  							PAGE_CACHE_SIZE); @@ -2953,11 +3148,10 @@ static int relocate_file_extent_cluster(struct inode *inode,  			}  		} -		page_start = (u64)page->index << PAGE_CACHE_SHIFT; +		page_start = page_offset(page);  		page_end = page_start + PAGE_CACHE_SIZE - 1; -		lock_extent(&BTRFS_I(inode)->io_tree, -			    page_start, page_end, GFP_NOFS); +		lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);  		set_page_extent_mapped(page); @@ -2973,7 +3167,7 @@ static int relocate_file_extent_cluster(struct inode *inode,  		set_page_dirty(page);  		unlock_extent(&BTRFS_I(inode)->io_tree, -			      page_start, page_end, GFP_NOFS); +			      page_start, page_end);  		unlock_page(page);  		page_cache_release(page); @@ -3075,17 +3269,22 @@ static int add_tree_block(struct reloc_control *rc,  	struct rb_node *rb_node;  	u32 item_size;  	int level = -1; -	int generation; +	u64 generation;  	eb =  path->nodes[0];  	item_size = btrfs_item_size_nr(eb, path->slots[0]); -	if (item_size >= sizeof(*ei) + sizeof(*bi)) { +	if (extent_key->type == BTRFS_METADATA_ITEM_KEY || +	    item_size >= sizeof(*ei) + sizeof(*bi)) {  		ei = btrfs_item_ptr(eb, path->slots[0],  				struct btrfs_extent_item); -		bi = (struct btrfs_tree_block_info *)(ei + 1); +		if (extent_key->type == BTRFS_EXTENT_ITEM_KEY) { +			bi = (struct btrfs_tree_block_info *)(ei + 1); +			level = btrfs_tree_block_level(eb, bi); +		} else { +			level = (int)extent_key->offset; +		}  		generation = btrfs_extent_generation(eb, ei); -		level = btrfs_tree_block_level(eb, bi);  	} else {  #ifdef BTRFS_COMPAT_EXTENT_TREE_V0  		u64 ref_owner; @@ -3105,7 +3304,7 @@ static int add_tree_block(struct reloc_control *rc,  #endif  	} -	btrfs_release_path(rc->extent_root, path); +	btrfs_release_path(path);  	BUG_ON(level == -1); @@ -3114,13 +3313,14 @@ static int add_tree_block(struct reloc_control *rc,  		return -ENOMEM;  	block->bytenr = extent_key->objectid; -	block->key.objectid = extent_key->offset; +	block->key.objectid = rc->extent_root->leafsize;  	block->key.offset = generation;  	block->level = level;  	block->key_ready = 0;  	rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); -	BUG_ON(rb_node); +	if (rb_node) +		backref_tree_panic(rb_node, -EEXIST, block->bytenr);  	return 0;  } @@ -3135,6 +3335,8 @@ static int __add_tree_block(struct reloc_control *rc,  	struct btrfs_path *path;  	struct btrfs_key key;  	int ret; +	bool skinny = btrfs_fs_incompat(rc->extent_root->fs_info, +					SKINNY_METADATA);  	if (tree_block_processed(bytenr, blocksize, rc))  		return 0; @@ -3145,19 +3347,42 @@ static int __add_tree_block(struct reloc_control *rc,  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; - +again:  	key.objectid = bytenr; -	key.type = BTRFS_EXTENT_ITEM_KEY; -	key.offset = blocksize; +	if (skinny) { +		key.type = BTRFS_METADATA_ITEM_KEY; +		key.offset = (u64)-1; +	} else { +		key.type = BTRFS_EXTENT_ITEM_KEY; +		key.offset = blocksize; +	}  	path->search_commit_root = 1;  	path->skip_locking = 1;  	ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0);  	if (ret < 0)  		goto out; + +	if (ret > 0 && skinny) { +		if (path->slots[0]) { +			path->slots[0]--; +			btrfs_item_key_to_cpu(path->nodes[0], &key, +					      path->slots[0]); +			if (key.objectid == bytenr && +			    (key.type == BTRFS_METADATA_ITEM_KEY || +			     (key.type == BTRFS_EXTENT_ITEM_KEY && +			      key.offset == blocksize))) +				ret = 0; +		} + +		if (ret) { +			skinny = false; +			btrfs_release_path(path); +			goto again; +		} +	}  	BUG_ON(ret); -	btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);  	ret = add_tree_block(rc, &key, path, blocks);  out:  	btrfs_free_path(path); @@ -3178,7 +3403,8 @@ static int block_use_full_backref(struct reloc_control *rc,  		return 1;  	ret = btrfs_lookup_extent_info(NULL, rc->extent_root, -				       eb->start, eb->len, NULL, &flags); +				       eb->start, btrfs_header_level(eb), 1, +				       NULL, &flags);  	BUG_ON(ret);  	if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) @@ -3192,10 +3418,8 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,  				    struct inode *inode, u64 ino)  {  	struct btrfs_key key; -	struct btrfs_path *path;  	struct btrfs_root *root = fs_info->tree_root;  	struct btrfs_trans_handle *trans; -	unsigned long nr;  	int ret = 0;  	if (inode) @@ -3206,31 +3430,28 @@ static int delete_block_group_cache(struct btrfs_fs_info *fs_info,  	key.offset = 0;  	inode = btrfs_iget(fs_info->sb, &key, root, NULL); -	if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { -		if (inode && !IS_ERR(inode)) +	if (IS_ERR(inode) || is_bad_inode(inode)) { +		if (!IS_ERR(inode))  			iput(inode);  		return -ENOENT;  	}  truncate: -	path = btrfs_alloc_path(); -	if (!path) { -		ret = -ENOMEM; +	ret = btrfs_check_trunc_cache_free_space(root, +						 &fs_info->global_block_rsv); +	if (ret)  		goto out; -	} -	trans = btrfs_join_transaction(root, 0); +	trans = btrfs_join_transaction(root);  	if (IS_ERR(trans)) { -		btrfs_free_path(path); +		ret = PTR_ERR(trans);  		goto out;  	} -	ret = btrfs_truncate_free_space_cache(root, trans, path, inode); +	ret = btrfs_truncate_free_space_cache(root, trans, inode); -	btrfs_free_path(path); -	nr = trans->blocks_used;  	btrfs_end_transaction(trans, root); -	btrfs_btree_balance_dirty(root, nr); +	btrfs_btree_balance_dirty(root);  out:  	iput(inode);  	return ret; @@ -3282,6 +3503,7 @@ static int find_data_references(struct reloc_control *rc,  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = 1;  	root = read_fs_root(rc->extent_root->fs_info, ref_root);  	if (IS_ERR(root)) { @@ -3290,8 +3512,11 @@ static int find_data_references(struct reloc_control *rc,  	}  	key.objectid = ref_objectid; -	key.offset = ref_offset;  	key.type = BTRFS_EXTENT_DATA_KEY; +	if (ref_offset > ((u64)-1 << 32)) +		key.offset = 0; +	else +		key.offset = ref_offset;  	path->search_commit_root = 1;  	path->skip_locking = 1; @@ -3326,10 +3551,8 @@ static int find_data_references(struct reloc_control *rc,  				err = ret;  				goto out;  			} -			if (ret > 0) { -				WARN_ON(1); +			if (WARN_ON(ret > 0))  				goto out; -			}  			leaf = path->nodes[0];  			nritems = btrfs_header_nritems(leaf); @@ -3349,11 +3572,9 @@ static int find_data_references(struct reloc_control *rc,  		}  		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); -		if (key.objectid != ref_objectid || -		    key.type != BTRFS_EXTENT_DATA_KEY) { -			WARN_ON(1); +		if (WARN_ON(key.objectid != ref_objectid || +		    key.type != BTRFS_EXTENT_DATA_KEY))  			break; -		}  		fi = btrfs_item_ptr(leaf, path->slots[0],  				    struct btrfs_file_extent_item); @@ -3387,7 +3608,9 @@ static int find_data_references(struct reloc_control *rc,  			block->key_ready = 1;  			rb_node = tree_insert(blocks, block->bytenr,  					      &block->rb_node); -			BUG_ON(rb_node); +			if (rb_node) +				backref_tree_panic(rb_node, -EEXIST, +						   block->bytenr);  		}  		if (counted)  			added = 1; @@ -3403,7 +3626,7 @@ out:  }  /* - * hepler to find all tree blocks that reference a given data extent + * helper to find all tree blocks that reference a given data extent   */  static noinline_for_stack  int add_data_references(struct reloc_control *rc, @@ -3418,7 +3641,7 @@ int add_data_references(struct reloc_control *rc,  	unsigned long ptr;  	unsigned long end;  	u32 blocksize = btrfs_level_size(rc->extent_root, 0); -	int ret; +	int ret = 0;  	int err = 0;  	eb = path->nodes[0]; @@ -3445,6 +3668,10 @@ int add_data_references(struct reloc_control *rc,  		} else {  			BUG();  		} +		if (ret) { +			err = ret; +			goto out; +		}  		ptr += btrfs_extent_inline_ref_size(key.type);  	}  	WARN_ON(ptr > end); @@ -3490,14 +3717,15 @@ int add_data_references(struct reloc_control *rc,  		}  		path->slots[0]++;  	} -	btrfs_release_path(rc->extent_root, path); +out: +	btrfs_release_path(path);  	if (err)  		free_block_list(blocks);  	return err;  }  /* - * hepler to find next unprocessed extent + * helper to find next unprocessed extent   */  static noinline_for_stack  int find_next_extent(struct btrfs_trans_handle *trans, @@ -3542,43 +3770,62 @@ next:  			break;  		} -		if (key.type != BTRFS_EXTENT_ITEM_KEY || +		if (key.type != BTRFS_EXTENT_ITEM_KEY && +		    key.type != BTRFS_METADATA_ITEM_KEY) { +			path->slots[0]++; +			goto next; +		} + +		if (key.type == BTRFS_EXTENT_ITEM_KEY &&  		    key.objectid + key.offset <= rc->search_start) {  			path->slots[0]++;  			goto next;  		} +		if (key.type == BTRFS_METADATA_ITEM_KEY && +		    key.objectid + rc->extent_root->leafsize <= +		    rc->search_start) { +			path->slots[0]++; +			goto next; +		} +  		ret = find_first_extent_bit(&rc->processed_blocks,  					    key.objectid, &start, &end, -					    EXTENT_DIRTY); +					    EXTENT_DIRTY, NULL);  		if (ret == 0 && start <= key.objectid) { -			btrfs_release_path(rc->extent_root, path); +			btrfs_release_path(path);  			rc->search_start = end + 1;  		} else { -			rc->search_start = key.objectid + key.offset; +			if (key.type == BTRFS_EXTENT_ITEM_KEY) +				rc->search_start = key.objectid + key.offset; +			else +				rc->search_start = key.objectid + +					rc->extent_root->leafsize;  			memcpy(extent_key, &key, sizeof(key));  			return 0;  		}  	} -	btrfs_release_path(rc->extent_root, path); +	btrfs_release_path(path);  	return ret;  }  static void set_reloc_control(struct reloc_control *rc)  {  	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; -	mutex_lock(&fs_info->trans_mutex); + +	mutex_lock(&fs_info->reloc_mutex);  	fs_info->reloc_ctl = rc; -	mutex_unlock(&fs_info->trans_mutex); +	mutex_unlock(&fs_info->reloc_mutex);  }  static void unset_reloc_control(struct reloc_control *rc)  {  	struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; -	mutex_lock(&fs_info->trans_mutex); + +	mutex_lock(&fs_info->reloc_mutex);  	fs_info->reloc_ctl = NULL; -	mutex_unlock(&fs_info->trans_mutex); +	mutex_unlock(&fs_info->reloc_mutex);  }  static int check_extent_flags(u64 flags) @@ -3599,35 +3846,34 @@ static noinline_for_stack  int prepare_to_relocate(struct reloc_control *rc)  {  	struct btrfs_trans_handle *trans; -	int ret; -	rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); +	rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root, +					      BTRFS_BLOCK_RSV_TEMP);  	if (!rc->block_rsv)  		return -ENOMEM; -	/* -	 * reserve some space for creating reloc trees. -	 * btrfs_init_reloc_root will use them when there -	 * is no reservation in transaction handle. -	 */ -	ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, -				  rc->extent_root->nodesize * 256); -	if (ret) -		return ret; - -	rc->block_rsv->refill_used = 1; -	btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); -  	memset(&rc->cluster, 0, sizeof(rc->cluster));  	rc->search_start = rc->block_group->key.objectid;  	rc->extents_found = 0;  	rc->nodes_relocated = 0;  	rc->merging_rsv_size = 0; +	rc->reserved_bytes = 0; +	rc->block_rsv->size = rc->extent_root->nodesize * +			      RELOCATION_RESERVED_NODES;  	rc->create_reloc_tree = 1;  	set_reloc_control(rc); -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root); +	if (IS_ERR(trans)) { +		unset_reloc_control(rc); +		/* +		 * extent tree is not a ref_cow tree and has no reloc_root to +		 * cleanup.  And callers are responsible to free the above +		 * block rsv. +		 */ +		return PTR_ERR(trans); +	}  	btrfs_commit_transaction(trans, rc->extent_root);  	return 0;  } @@ -3639,15 +3885,16 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  	struct btrfs_trans_handle *trans = NULL;  	struct btrfs_path *path;  	struct btrfs_extent_item *ei; -	unsigned long nr;  	u64 flags;  	u32 item_size;  	int ret;  	int err = 0; +	int progress = 0;  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = 1;  	ret = prepare_to_relocate(rc);  	if (ret) { @@ -3656,8 +3903,22 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  	}  	while (1) { +		rc->reserved_bytes = 0; +		ret = btrfs_block_rsv_refill(rc->extent_root, +					rc->block_rsv, rc->block_rsv->size, +					BTRFS_RESERVE_FLUSH_ALL); +		if (ret) { +			err = ret; +			break; +		} +		progress++;  		trans = btrfs_start_transaction(rc->extent_root, 0); - +		if (IS_ERR(trans)) { +			err = PTR_ERR(trans); +			trans = NULL; +			break; +		} +restart:  		if (update_backref_cache(trans, &rc->backref_cache)) {  			btrfs_end_transaction(trans, rc->extent_root);  			continue; @@ -3694,7 +3955,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  				flags = BTRFS_EXTENT_FLAG_DATA;  			if (path_change) { -				btrfs_release_path(rc->extent_root, path); +				btrfs_release_path(path);  				path->search_commit_root = 1;  				path->skip_locking = 1; @@ -3717,7 +3978,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  			   (flags & BTRFS_EXTENT_FLAG_DATA)) {  			ret = add_data_references(rc, &key, path, &blocks);  		} else { -			btrfs_release_path(rc->extent_root, path); +			btrfs_release_path(path);  			ret = 0;  		}  		if (ret < 0) { @@ -3728,6 +3989,12 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  		if (!RB_EMPTY_ROOT(&blocks)) {  			ret = relocate_tree_blocks(trans, rc, &blocks);  			if (ret < 0) { +				/* +				 * if we fail to relocate tree blocks, force to update +				 * backref cache when committing transaction. +				 */ +				rc->backref_cache.last_trans = trans->transid - 1; +  				if (ret != -EAGAIN) {  					err = ret;  					break; @@ -3737,26 +4004,8 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  			}  		} -		ret = btrfs_block_rsv_check(trans, rc->extent_root, -					    rc->block_rsv, 0, 5); -		if (ret < 0) { -			if (ret != -EAGAIN) { -				err = ret; -				WARN_ON(1); -				break; -			} -			rc->commit_transaction = 1; -		} - -		if (rc->commit_transaction) { -			rc->commit_transaction = 0; -			ret = btrfs_commit_transaction(trans, rc->extent_root); -			BUG_ON(ret); -		} else { -			nr = trans->blocks_used; -			btrfs_end_transaction_throttle(trans, rc->extent_root); -			btrfs_btree_balance_dirty(rc->extent_root, nr); -		} +		btrfs_end_transaction_throttle(trans, rc->extent_root); +		btrfs_btree_balance_dirty(rc->extent_root);  		trans = NULL;  		if (rc->stage == MOVE_DATA_EXTENTS && @@ -3770,15 +4019,23 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  			}  		}  	} +	if (trans && progress && err == -ENOSPC) { +		ret = btrfs_force_chunk_alloc(trans, rc->extent_root, +					      rc->block_group->flags); +		if (ret == 0) { +			err = 0; +			progress = 0; +			goto restart; +		} +	} -	btrfs_release_path(rc->extent_root, path); +	btrfs_release_path(path);  	clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY,  			  GFP_NOFS);  	if (trans) { -		nr = trans->blocks_used;  		btrfs_end_transaction_throttle(trans, rc->extent_root); -		btrfs_btree_balance_dirty(rc->extent_root, nr); +		btrfs_btree_balance_dirty(rc->extent_root);  	}  	if (!err) { @@ -3803,8 +4060,11 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)  	btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1);  	/* get rid of pinned extents */ -	trans = btrfs_join_transaction(rc->extent_root, 1); -	btrfs_commit_transaction(trans, rc->extent_root); +	trans = btrfs_join_transaction(rc->extent_root); +	if (IS_ERR(trans)) +		err = PTR_ERR(trans); +	else +		btrfs_commit_transaction(trans, rc->extent_root);  out_free:  	btrfs_free_block_rsv(rc->extent_root, rc->block_rsv);  	btrfs_free_path(path); @@ -3836,7 +4096,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans,  	btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS |  					  BTRFS_INODE_PREALLOC);  	btrfs_mark_buffer_dirty(leaf); -	btrfs_release_path(root, path); +	btrfs_release_path(path);  out:  	btrfs_free_path(path);  	return ret; @@ -3854,7 +4114,6 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,  	struct btrfs_trans_handle *trans;  	struct btrfs_root *root;  	struct btrfs_key key; -	unsigned long nr;  	u64 objectid = BTRFS_FIRST_FREE_OBJECTID;  	int err = 0; @@ -3866,7 +4125,7 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,  	if (IS_ERR(trans))  		return ERR_CAST(trans); -	err = btrfs_find_free_objectid(trans, root, objectid, &objectid); +	err = btrfs_find_free_objectid(root, &objectid);  	if (err)  		goto out; @@ -3882,9 +4141,8 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,  	err = btrfs_orphan_add(trans, inode);  out: -	nr = trans->blocks_used;  	btrfs_end_transaction(trans, root); -	btrfs_btree_balance_dirty(root, nr); +	btrfs_btree_balance_dirty(root);  	if (err) {  		if (inode)  			iput(inode); @@ -3893,7 +4151,7 @@ out:  	return inode;  } -static struct reloc_control *alloc_reloc_control(void) +static struct reloc_control *alloc_reloc_control(struct btrfs_fs_info *fs_info)  {  	struct reloc_control *rc; @@ -3904,7 +4162,8 @@ static struct reloc_control *alloc_reloc_control(void)  	INIT_LIST_HEAD(&rc->reloc_roots);  	backref_cache_init(&rc->backref_cache);  	mapping_tree_init(&rc->reloc_root_tree); -	extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); +	extent_io_tree_init(&rc->processed_blocks, +			    fs_info->btree_inode->i_mapping);  	return rc;  } @@ -3921,7 +4180,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)  	int rw = 0;  	int err = 0; -	rc = alloc_reloc_control(); +	rc = alloc_reloc_control(fs_info);  	if (!rc)  		return -ENOMEM; @@ -3966,19 +4225,19 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)  		goto out;  	} -	printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", -	       (unsigned long long)rc->block_group->key.objectid, -	       (unsigned long long)rc->block_group->flags); +	btrfs_info(extent_root->fs_info, "relocating block group %llu flags %llu", +	       rc->block_group->key.objectid, rc->block_group->flags); -	btrfs_start_delalloc_inodes(fs_info->tree_root, 0); -	btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); +	ret = btrfs_start_delalloc_roots(fs_info, 0, -1); +	if (ret < 0) { +		err = ret; +		goto out; +	} +	btrfs_wait_ordered_roots(fs_info, -1);  	while (1) {  		mutex_lock(&fs_info->cleaner_mutex); - -		btrfs_clean_old_snapshots(fs_info->tree_root);  		ret = relocate_block_group(rc); -  		mutex_unlock(&fs_info->cleaner_mutex);  		if (ret < 0) {  			err = ret; @@ -3988,22 +4247,22 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)  		if (rc->extents_found == 0)  			break; -		printk(KERN_INFO "btrfs: found %llu extents\n", -			(unsigned long long)rc->extents_found); +		btrfs_info(extent_root->fs_info, "found %llu extents", +			rc->extents_found);  		if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { -			btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); +			ret = btrfs_wait_ordered_range(rc->data_inode, 0, +						       (u64)-1); +			if (ret) { +				err = ret; +				goto out; +			}  			invalidate_mapping_pages(rc->data_inode->i_mapping,  						 0, -1);  			rc->stage = UPDATE_DATA_PTRS;  		}  	} -	filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, -				     rc->block_group->key.objectid, -				     rc->block_group->key.objectid + -				     rc->block_group->key.offset - 1); -  	WARN_ON(rc->block_group->pinned > 0);  	WARN_ON(rc->block_group->reserved > 0);  	WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); @@ -4019,9 +4278,11 @@ out:  static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)  {  	struct btrfs_trans_handle *trans; -	int ret; +	int ret, err;  	trans = btrfs_start_transaction(root->fs_info->tree_root, 0); +	if (IS_ERR(trans)) +		return PTR_ERR(trans);  	memset(&root->root_item.drop_progress, 0,  		sizeof(root->root_item.drop_progress)); @@ -4029,11 +4290,11 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root)  	btrfs_set_root_refs(&root->root_item, 0);  	ret = btrfs_update_root(trans, root->fs_info->tree_root,  				&root->root_key, &root->root_item); -	BUG_ON(ret); -	ret = btrfs_end_transaction(trans, root->fs_info->tree_root); -	BUG_ON(ret); -	return 0; +	err = btrfs_end_transaction(trans, root->fs_info->tree_root); +	if (err) +		return err; +	return ret;  }  /* @@ -4058,6 +4319,7 @@ int btrfs_recover_relocation(struct btrfs_root *root)  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; +	path->reada = -1;  	key.objectid = BTRFS_TREE_RELOC_OBJECTID;  	key.type = BTRFS_ROOT_ITEM_KEY; @@ -4077,13 +4339,13 @@ int btrfs_recover_relocation(struct btrfs_root *root)  		}  		leaf = path->nodes[0];  		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); -		btrfs_release_path(root->fs_info->tree_root, path); +		btrfs_release_path(path);  		if (key.objectid != BTRFS_TREE_RELOC_OBJECTID ||  		    key.type != BTRFS_ROOT_ITEM_KEY)  			break; -		reloc_root = btrfs_read_fs_root_no_radix(root, &key); +		reloc_root = btrfs_read_fs_root(root, &key);  		if (IS_ERR(reloc_root)) {  			err = PTR_ERR(reloc_root);  			goto out; @@ -4100,7 +4362,11 @@ int btrfs_recover_relocation(struct btrfs_root *root)  					err = ret;  					goto out;  				} -				mark_garbage_root(reloc_root); +				ret = mark_garbage_root(reloc_root); +				if (ret < 0) { +					err = ret; +					goto out; +				}  			}  		} @@ -4109,12 +4375,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)  		key.offset--;  	} -	btrfs_release_path(root->fs_info->tree_root, path); +	btrfs_release_path(path);  	if (list_empty(&reloc_roots))  		goto out; -	rc = alloc_reloc_control(); +	rc = alloc_reloc_control(root->fs_info);  	if (!rc) {  		err = -ENOMEM;  		goto out; @@ -4124,7 +4390,12 @@ int btrfs_recover_relocation(struct btrfs_root *root)  	set_reloc_control(rc); -	trans = btrfs_join_transaction(rc->extent_root, 1); +	trans = btrfs_join_transaction(rc->extent_root); +	if (IS_ERR(trans)) { +		unset_reloc_control(rc); +		err = PTR_ERR(trans); +		goto out_free; +	}  	rc->merge_reloc_tree = 1; @@ -4141,30 +4412,35 @@ int btrfs_recover_relocation(struct btrfs_root *root)  		fs_root = read_fs_root(root->fs_info,  				       reloc_root->root_key.offset); -		BUG_ON(IS_ERR(fs_root)); +		if (IS_ERR(fs_root)) { +			err = PTR_ERR(fs_root); +			goto out_free; +		} -		__add_reloc_root(reloc_root); +		err = __add_reloc_root(reloc_root); +		BUG_ON(err < 0); /* -ENOMEM or logic error */  		fs_root->reloc_root = reloc_root;  	} -	btrfs_commit_transaction(trans, rc->extent_root); +	err = btrfs_commit_transaction(trans, rc->extent_root); +	if (err) +		goto out_free;  	merge_reloc_roots(rc);  	unset_reloc_control(rc); -	trans = btrfs_join_transaction(rc->extent_root, 1); -	btrfs_commit_transaction(trans, rc->extent_root); -out: +	trans = btrfs_join_transaction(rc->extent_root); +	if (IS_ERR(trans)) +		err = PTR_ERR(trans); +	else +		err = btrfs_commit_transaction(trans, rc->extent_root); +out_free:  	kfree(rc); -	while (!list_empty(&reloc_roots)) { -		reloc_root = list_entry(reloc_roots.next, -					struct btrfs_root, root_list); -		list_del(&reloc_root->root_list); -		free_extent_buffer(reloc_root->node); -		free_extent_buffer(reloc_root->commit_root); -		kfree(reloc_root); -	} +out: +	if (!list_empty(&reloc_roots)) +		free_reloc_roots(&reloc_roots); +  	btrfs_free_path(path);  	if (err == 0) { @@ -4174,7 +4450,7 @@ out:  		if (IS_ERR(fs_root))  			err = PTR_ERR(fs_root);  		else -			btrfs_orphan_cleanup(fs_root); +			err = btrfs_orphan_cleanup(fs_root);  	}  	return err;  } @@ -4188,12 +4464,11 @@ out:  int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)  {  	struct btrfs_ordered_sum *sums; -	struct btrfs_sector_sum *sector_sum;  	struct btrfs_ordered_extent *ordered;  	struct btrfs_root *root = BTRFS_I(inode)->root; -	size_t offset;  	int ret;  	u64 disk_bytenr; +	u64 new_bytenr;  	LIST_HEAD(list);  	ordered = btrfs_lookup_ordered_extent(inode, file_pos); @@ -4201,45 +4476,58 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)  	disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt;  	ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, -				       disk_bytenr + len - 1, &list); +				       disk_bytenr + len - 1, &list, 0); +	if (ret) +		goto out;  	while (!list_empty(&list)) {  		sums = list_entry(list.next, struct btrfs_ordered_sum, list);  		list_del_init(&sums->list); -		sector_sum = sums->sums; -		sums->bytenr = ordered->start; - -		offset = 0; -		while (offset < sums->len) { -			sector_sum->bytenr += ordered->start - disk_bytenr; -			sector_sum++; -			offset += root->sectorsize; -		} +		/* +		 * We need to offset the new_bytenr based on where the csum is. +		 * We need to do this because we will read in entire prealloc +		 * extents but we may have written to say the middle of the +		 * prealloc extent, so we need to make sure the csum goes with +		 * the right disk offset. +		 * +		 * We can do this because the data reloc inode refers strictly +		 * to the on disk bytes, so we don't have to worry about +		 * disk_len vs real len like with real inodes since it's all +		 * disk length. +		 */ +		new_bytenr = ordered->start + (sums->bytenr - disk_bytenr); +		sums->bytenr = new_bytenr;  		btrfs_add_ordered_sum(inode, ordered, sums);  	} +out:  	btrfs_put_ordered_extent(ordered);  	return ret;  } -void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, -			   struct btrfs_root *root, struct extent_buffer *buf, -			   struct extent_buffer *cow) +int btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, +			  struct btrfs_root *root, struct extent_buffer *buf, +			  struct extent_buffer *cow)  {  	struct reloc_control *rc;  	struct backref_node *node;  	int first_cow = 0;  	int level; -	int ret; +	int ret = 0;  	rc = root->fs_info->reloc_ctl;  	if (!rc) -		return; +		return 0;  	BUG_ON(rc->stage == UPDATE_DATA_PTRS &&  	       root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); +	if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { +		if (buf == root->node) +			__update_reloc_root(root, cow->start); +	} +  	level = btrfs_header_level(buf);  	if (btrfs_header_generation(buf) <=  	    btrfs_root_last_snapshot(&root->root_item)) @@ -4271,10 +4559,9 @@ void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,  			rc->nodes_relocated += buf->len;  	} -	if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { +	if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS)  		ret = replace_file_extents(trans, rc, root, cow); -		BUG_ON(ret); -	} +	return ret;  }  /* @@ -4315,7 +4602,7 @@ void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans,   * called after snapshot is created. migrate block reservation   * and create reloc root for the newly created snapshot   */ -void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, +int btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,  			       struct btrfs_pending_snapshot *pending)  {  	struct btrfs_root *root = pending->root; @@ -4325,7 +4612,7 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,  	int ret;  	if (!root->reloc_root) -		return; +		return 0;  	rc = root->fs_info->reloc_ctl;  	rc->merging_rsv_size += rc->nodes_relocated; @@ -4334,18 +4621,21 @@ void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans,  		ret = btrfs_block_rsv_migrate(&pending->block_rsv,  					      rc->block_rsv,  					      rc->nodes_relocated); -		BUG_ON(ret); +		if (ret) +			return ret;  	}  	new_root = pending->snap;  	reloc_root = create_reloc_root(trans, root->reloc_root,  				       new_root->root_key.objectid); +	if (IS_ERR(reloc_root)) +		return PTR_ERR(reloc_root); -	__add_reloc_root(reloc_root); +	ret = __add_reloc_root(reloc_root); +	BUG_ON(ret < 0);  	new_root->reloc_root = reloc_root; -	if (rc->create_reloc_tree) { +	if (rc->create_reloc_tree)  		ret = clone_backref_node(trans, rc, root, reloc_root); -		BUG_ON(ret); -	} +	return ret;  }  | 
