diff options
Diffstat (limited to 'fs/btrfs/inode.c')
| -rw-r--r-- | fs/btrfs/inode.c | 1222 | 
1 files changed, 819 insertions, 403 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 22ebc13b6c9..3668048e16f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -43,7 +43,6 @@  #include <linux/btrfs.h>  #include <linux/blkdev.h>  #include <linux/posix_acl_xattr.h> -#include "compat.h"  #include "ctree.h"  #include "disk-io.h"  #include "transaction.h" @@ -59,9 +58,10 @@  #include "inode-map.h"  #include "backref.h"  #include "hash.h" +#include "props.h"  struct btrfs_iget_args { -	u64 ino; +	struct btrfs_key *location;  	struct btrfs_root *root;  }; @@ -125,14 +125,13 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,   * the btree.  The caller should have done a btrfs_drop_extents so that   * no overlapping inline items exist in the btree   */ -static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, +static int insert_inline_extent(struct btrfs_trans_handle *trans, +				struct btrfs_path *path, int extent_inserted,  				struct btrfs_root *root, struct inode *inode,  				u64 start, size_t size, size_t compressed_size,  				int compress_type,  				struct page **compressed_pages)  { -	struct btrfs_key key; -	struct btrfs_path *path;  	struct extent_buffer *leaf;  	struct page *page = NULL;  	char *kaddr; @@ -141,29 +140,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,  	int err = 0;  	int ret;  	size_t cur_size = size; -	size_t datasize;  	unsigned long offset;  	if (compressed_size && compressed_pages)  		cur_size = compressed_size; -	path = btrfs_alloc_path(); -	if (!path) -		return -ENOMEM; +	inode_add_bytes(inode, size); -	path->leave_spinning = 1; +	if (!extent_inserted) { +		struct btrfs_key key; +		size_t datasize; -	key.objectid = btrfs_ino(inode); -	key.offset = start; -	btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); -	datasize = btrfs_file_extent_calc_inline_size(cur_size); +		key.objectid = btrfs_ino(inode); +		key.offset = start; +		btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); -	inode_add_bytes(inode, size); -	ret = btrfs_insert_empty_item(trans, root, path, &key, -				      datasize); -	if (ret) { -		err = ret; -		goto fail; +		datasize = btrfs_file_extent_calc_inline_size(cur_size); +		path->leave_spinning = 1; +		ret = btrfs_insert_empty_item(trans, root, path, &key, +					      datasize); +		if (ret) { +			err = ret; +			goto fail; +		}  	}  	leaf = path->nodes[0];  	ei = btrfs_item_ptr(leaf, path->slots[0], @@ -204,7 +203,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,  		page_cache_release(page);  	}  	btrfs_mark_buffer_dirty(leaf); -	btrfs_free_path(path); +	btrfs_release_path(path);  	/*  	 * we're an inline extent, so nobody can @@ -220,7 +219,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,  	return ret;  fail: -	btrfs_free_path(path);  	return err;  } @@ -243,6 +241,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,  	u64 aligned_end = ALIGN(end, root->sectorsize);  	u64 data_len = inline_len;  	int ret; +	struct btrfs_path *path; +	int extent_inserted = 0; +	u32 extent_item_size;  	if (compressed_size)  		data_len = compressed_size; @@ -257,12 +258,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,  		return 1;  	} +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; +  	trans = btrfs_join_transaction(root); -	if (IS_ERR(trans)) +	if (IS_ERR(trans)) { +		btrfs_free_path(path);  		return PTR_ERR(trans); +	}  	trans->block_rsv = &root->fs_info->delalloc_block_rsv; -	ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); +	if (compressed_size && compressed_pages) +		extent_item_size = btrfs_file_extent_calc_inline_size( +		   compressed_size); +	else +		extent_item_size = btrfs_file_extent_calc_inline_size( +		    inline_len); + +	ret = __btrfs_drop_extents(trans, root, inode, path, +				   start, aligned_end, NULL, +				   1, 1, extent_item_size, &extent_inserted);  	if (ret) {  		btrfs_abort_transaction(trans, root, ret);  		goto out; @@ -270,7 +286,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,  	if (isize > actual_end)  		inline_len = min_t(u64, isize, actual_end); -	ret = insert_inline_extent(trans, root, inode, start, +	ret = insert_inline_extent(trans, path, extent_inserted, +				   root, inode, start,  				   inline_len, compressed_size,  				   compress_type, compressed_pages);  	if (ret && ret != -ENOSPC) { @@ -285,6 +302,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,  	btrfs_delalloc_release_metadata(inode, end + 1 - start);  	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);  out: +	btrfs_free_path(path);  	btrfs_end_transaction(trans, root);  	return ret;  } @@ -376,6 +394,14 @@ static noinline int compress_file_range(struct inode *inode,  	    (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))  		btrfs_add_inode_defrag(NULL, inode); +	/* +	 * skip compression for a small file range(<=blocksize) that +	 * isn't an inline extent, since it dosen't save disk space at all. +	 */ +	if ((end - start + 1) <= blocksize && +	    (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size)) +		goto cleanup_and_bail_uncompressed; +  	actual_end = min_t(u64, isize, end + 1);  again:  	will_compress = 0; @@ -667,7 +693,7 @@ retry:  		ret = btrfs_reserve_extent(root,  					   async_extent->compressed_size,  					   async_extent->compressed_size, -					   0, alloc_hint, &ins, 1); +					   0, alloc_hint, &ins, 1, 1);  		if (ret) {  			int i; @@ -768,7 +794,7 @@ retry:  out:  	return ret;  out_free_reserve: -	btrfs_free_reserved_extent(root, ins.objectid, ins.offset); +	btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);  out_free:  	extent_clear_unlock_delalloc(inode, async_extent->start,  				     async_extent->start + @@ -844,7 +870,11 @@ static noinline int cow_file_range(struct inode *inode,  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;  	int ret = 0; -	BUG_ON(btrfs_is_free_space_inode(inode)); +	if (btrfs_is_free_space_inode(inode)) { +		WARN_ON_ONCE(1); +		ret = -EINVAL; +		goto out_unlock; +	}  	num_bytes = ALIGN(end - start + 1, blocksize);  	num_bytes = max(blocksize,  num_bytes); @@ -887,7 +917,7 @@ static noinline int cow_file_range(struct inode *inode,  		cur_alloc_size = disk_num_bytes;  		ret = btrfs_reserve_extent(root, cur_alloc_size,  					   root->sectorsize, 0, alloc_hint, -					   &ins, 1); +					   &ins, 1, 1);  		if (ret < 0)  			goto out_unlock; @@ -965,7 +995,7 @@ out:  	return ret;  out_reserve: -	btrfs_free_reserved_extent(root, ins.objectid, ins.offset); +	btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);  out_unlock:  	extent_clear_unlock_delalloc(inode, start, end, locked_page,  				     EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | @@ -1054,17 +1084,15 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,  		async_cow->end = cur_end;  		INIT_LIST_HEAD(&async_cow->extents); -		async_cow->work.func = async_cow_start; -		async_cow->work.ordered_func = async_cow_submit; -		async_cow->work.ordered_free = async_cow_free; -		async_cow->work.flags = 0; +		btrfs_init_work(&async_cow->work, async_cow_start, +				async_cow_submit, async_cow_free);  		nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>  			PAGE_CACHE_SHIFT;  		atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); -		btrfs_queue_worker(&root->fs_info->delalloc_workers, -				   &async_cow->work); +		btrfs_queue_work(root->fs_info->delalloc_workers, +				 &async_cow->work);  		if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) {  			wait_event(root->fs_info->async_submit_wait, @@ -1178,10 +1206,8 @@ static noinline int run_delalloc_nocow(struct inode *inode,  	while (1) {  		ret = btrfs_lookup_file_extent(trans, root, path, ino,  					       cur_offset, 0); -		if (ret < 0) { -			btrfs_abort_transaction(trans, root, ret); +		if (ret < 0)  			goto error; -		}  		if (ret > 0 && path->slots[0] > 0 && check_prev) {  			leaf = path->nodes[0];  			btrfs_item_key_to_cpu(leaf, &found_key, @@ -1195,10 +1221,8 @@ next_slot:  		leaf = path->nodes[0];  		if (path->slots[0] >= btrfs_header_nritems(leaf)) {  			ret = btrfs_next_leaf(root, path); -			if (ret < 0) { -				btrfs_abort_transaction(trans, root, ret); +			if (ret < 0)  				goto error; -			}  			if (ret > 0)  				break;  			leaf = path->nodes[0]; @@ -1255,6 +1279,15 @@ next_slot:  			disk_bytenr += cur_offset - found_key.offset;  			num_bytes = min(end + 1, extent_end) - cur_offset;  			/* +			 * if there are pending snapshots for this root, +			 * we fall into common COW way. +			 */ +			if (!nolock) { +				err = btrfs_start_nocow_write(root); +				if (!err) +					goto out_check; +			} +			/*  			 * force cow if csum exists in the range.  			 * this ensure that csum for a given extent are  			 * either valid or do not exist. @@ -1264,7 +1297,8 @@ next_slot:  			nocow = 1;  		} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {  			extent_end = found_key.offset + -				btrfs_file_extent_inline_len(leaf, fi); +				btrfs_file_extent_inline_len(leaf, +						     path->slots[0], fi);  			extent_end = ALIGN(extent_end, root->sectorsize);  		} else {  			BUG_ON(1); @@ -1272,6 +1306,8 @@ next_slot:  out_check:  		if (extent_end <= start) {  			path->slots[0]++; +			if (!nolock && nocow) +				btrfs_end_nocow_write(root);  			goto next_slot;  		}  		if (!nocow) { @@ -1290,7 +1326,8 @@ out_check:  					     cow_start, found_key.offset - 1,  					     page_started, nr_written, 1);  			if (ret) { -				btrfs_abort_transaction(trans, root, ret); +				if (!nolock && nocow) +					btrfs_end_nocow_write(root);  				goto error;  			}  			cow_start = (u64)-1; @@ -1340,7 +1377,8 @@ out_check:  			ret = btrfs_reloc_clone_csums(inode, cur_offset,  						      num_bytes);  			if (ret) { -				btrfs_abort_transaction(trans, root, ret); +				if (!nolock && nocow) +					btrfs_end_nocow_write(root);  				goto error;  			}  		} @@ -1350,6 +1388,8 @@ out_check:  					     locked_page, EXTENT_LOCKED |  					     EXTENT_DELALLOC, PAGE_UNLOCK |  					     PAGE_SET_PRIVATE2); +		if (!nolock && nocow) +			btrfs_end_nocow_write(root);  		cur_offset = extent_end;  		if (cur_offset > end)  			break; @@ -1364,10 +1404,8 @@ out_check:  	if (cow_start != (u64)-1) {  		ret = cow_file_range(inode, locked_page, cow_start, end,  				     page_started, nr_written, 1); -		if (ret) { -			btrfs_abort_transaction(trans, root, ret); +		if (ret)  			goto error; -		}  	}  error: @@ -1551,7 +1589,13 @@ static void btrfs_clear_bit_hook(struct inode *inode,  			spin_unlock(&BTRFS_I(inode)->lock);  		} -		if (*bits & EXTENT_DO_ACCOUNTING) +		/* +		 * We don't reserve metadata space for space cache inodes so we +		 * don't need to call dellalloc_release_metadata if there is an +		 * error. +		 */ +		if (*bits & EXTENT_DO_ACCOUNTING && +		    root != root->fs_info->tree_root)  			btrfs_delalloc_release_metadata(inode, len);  		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID @@ -1579,7 +1623,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,  			 unsigned long bio_flags)  {  	struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; -	u64 logical = (u64)bio->bi_sector << 9; +	u64 logical = (u64)bio->bi_iter.bi_sector << 9;  	u64 length = 0;  	u64 map_length;  	int ret; @@ -1587,7 +1631,7 @@ int btrfs_merge_bio_hook(int rw, struct page *page, unsigned long offset,  	if (bio_flags & EXTENT_BIO_COMPRESSED)  		return 0; -	length = bio->bi_size; +	length = bio->bi_iter.bi_size;  	map_length = length;  	ret = btrfs_map_block(root->fs_info, rw, logical,  			      &map_length, NULL, 0); @@ -1825,9 +1869,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)  	SetPageChecked(page);  	page_cache_get(page); -	fixup->work.func = btrfs_writepage_fixup_worker; +	btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL);  	fixup->page = page; -	btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); +	btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work);  	return -EBUSY;  } @@ -1843,14 +1887,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,  	struct btrfs_path *path;  	struct extent_buffer *leaf;  	struct btrfs_key ins; +	int extent_inserted = 0;  	int ret;  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; -	path->leave_spinning = 1; -  	/*  	 * we may be replacing one extent in the tree with another.  	 * The new extent is pinned in the extent map, and we don't want @@ -1860,17 +1903,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,  	 * the caller is expected to unpin it and allow it to be merged  	 * with the others.  	 */ -	ret = btrfs_drop_extents(trans, root, inode, file_pos, -				 file_pos + num_bytes, 0); +	ret = __btrfs_drop_extents(trans, root, inode, path, file_pos, +				   file_pos + num_bytes, NULL, 0, +				   1, sizeof(*fi), &extent_inserted);  	if (ret)  		goto out; -	ins.objectid = btrfs_ino(inode); -	ins.offset = file_pos; -	ins.type = BTRFS_EXTENT_DATA_KEY; -	ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); -	if (ret) -		goto out; +	if (!extent_inserted) { +		ins.objectid = btrfs_ino(inode); +		ins.offset = file_pos; +		ins.type = BTRFS_EXTENT_DATA_KEY; + +		path->leave_spinning = 1; +		ret = btrfs_insert_empty_item(trans, root, path, &ins, +					      sizeof(*fi)); +		if (ret) +			goto out; +	}  	leaf = path->nodes[0];  	fi = btrfs_item_ptr(leaf, path->slots[0],  			    struct btrfs_file_extent_item); @@ -2041,10 +2090,8 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,  		key.offset = offset;  	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); -	if (ret < 0) { -		WARN_ON(1); +	if (WARN_ON(ret < 0))  		return ret; -	}  	ret = 0;  	while (1) { @@ -2133,7 +2180,8 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,  						  old->extent_offset, fs_info,  						  path, record_one_backref,  						  old); -		BUG_ON(ret < 0 && ret != -ENOENT); +		if (ret < 0 && ret != -ENOENT) +			return false;  		/* no backref to be processed for this extent */  		if (!old->count) { @@ -2217,6 +2265,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path,  		return PTR_ERR(root);  	} +	if (btrfs_root_readonly(root)) { +		srcu_read_unlock(&fs_info->subvol_srcu, index); +		return 0; +	} +  	/* step 2: get inode */  	key.objectid = backref->inum;  	key.type = BTRFS_INODE_ITEM_KEY; @@ -2293,7 +2346,7 @@ again:  		u64 extent_len;  		struct btrfs_key found_key; -		ret = btrfs_search_slot(trans, root, &key, path, 1, 1); +		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);  		if (ret < 0)  			goto out_free_path; @@ -2367,10 +2420,23 @@ out_unlock:  	return ret;  } +static void free_sa_defrag_extent(struct new_sa_defrag_extent *new) +{ +	struct old_sa_defrag_extent *old, *tmp; + +	if (!new) +		return; + +	list_for_each_entry_safe(old, tmp, &new->head, list) { +		list_del(&old->list); +		kfree(old); +	} +	kfree(new); +} +  static void relink_file_extents(struct new_sa_defrag_extent *new)  {  	struct btrfs_path *path; -	struct old_sa_defrag_extent *old, *tmp;  	struct sa_defrag_extent_backref *backref;  	struct sa_defrag_extent_backref *prev = NULL;  	struct inode *inode; @@ -2413,16 +2479,11 @@ static void relink_file_extents(struct new_sa_defrag_extent *new)  	kfree(prev);  	btrfs_free_path(path); - -	list_for_each_entry_safe(old, tmp, &new->head, list) { -		list_del(&old->list); -		kfree(old); -	}  out: +	free_sa_defrag_extent(new); +  	atomic_dec(&root->fs_info->defrag_running);  	wake_up(&root->fs_info->transaction_wait); - -	kfree(new);  }  static struct new_sa_defrag_extent * @@ -2432,7 +2493,7 @@ record_old_file_extents(struct inode *inode,  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_path *path;  	struct btrfs_key key; -	struct old_sa_defrag_extent *old, *tmp; +	struct old_sa_defrag_extent *old;  	struct new_sa_defrag_extent *new;  	int ret; @@ -2480,7 +2541,7 @@ record_old_file_extents(struct inode *inode,  		if (slot >= btrfs_header_nritems(l)) {  			ret = btrfs_next_leaf(root, path);  			if (ret < 0) -				goto out_free_list; +				goto out_free_path;  			else if (ret > 0)  				break;  			continue; @@ -2509,7 +2570,7 @@ record_old_file_extents(struct inode *inode,  		old = kmalloc(sizeof(*old), GFP_NOFS);  		if (!old) -			goto out_free_list; +			goto out_free_path;  		offset = max(new->file_pos, key.offset);  		end = min(new->file_pos + new->len, key.offset + num_bytes); @@ -2531,24 +2592,28 @@ next:  	return new; -out_free_list: -	list_for_each_entry_safe(old, tmp, &new->head, list) { -		list_del(&old->list); -		kfree(old); -	}  out_free_path:  	btrfs_free_path(path);  out_kfree: -	kfree(new); +	free_sa_defrag_extent(new);  	return NULL;  } -/* - * helper function for btrfs_finish_ordered_io, this - * just reads in some of the csum leaves to prime them into ram - * before we start the transaction.  It limits the amount of btree - * reads required while inside the transaction. - */ +static void btrfs_release_delalloc_bytes(struct btrfs_root *root, +					 u64 start, u64 len) +{ +	struct btrfs_block_group_cache *cache; + +	cache = btrfs_lookup_block_group(root->fs_info, start); +	ASSERT(cache); + +	spin_lock(&cache->lock); +	cache->delalloc_bytes -= len; +	spin_unlock(&cache->lock); + +	btrfs_put_block_group(cache); +} +  /* as ordered data IO finishes, this gets called so we can finish   * an ordered extent if the range of bytes in the file it covers are   * fully written. @@ -2610,7 +2675,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  			EXTENT_DEFRAG, 1, cached_state);  	if (ret) {  		u64 last_snapshot = btrfs_root_last_snapshot(&root->root_item); -		if (last_snapshot >= BTRFS_I(inode)->generation) +		if (0 && last_snapshot >= BTRFS_I(inode)->generation)  			/* the inode is shared */  			new = record_old_file_extents(inode, ordered_extent); @@ -2628,6 +2693,7 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  		trans = NULL;  		goto out_unlock;  	} +  	trans->block_rsv = &root->fs_info->delalloc_block_rsv;  	if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) @@ -2647,6 +2713,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)  						logical_len, logical_len,  						compress_type, 0, 0,  						BTRFS_FILE_EXTENT_REG); +		if (!ret) +			btrfs_release_delalloc_bytes(root, +						     ordered_extent->start, +						     ordered_extent->disk_len);  	}  	unpin_extent_cache(&BTRFS_I(inode)->extent_tree,  			   ordered_extent->file_offset, ordered_extent->len, @@ -2699,7 +2769,7 @@ out:  		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&  		    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))  			btrfs_free_reserved_extent(root, ordered_extent->start, -						   ordered_extent->disk_len); +						   ordered_extent->disk_len, 1);  	} @@ -2710,8 +2780,14 @@ out:  	btrfs_remove_ordered_extent(inode, ordered_extent);  	/* for snapshot-aware defrag */ -	if (new) -		relink_file_extents(new); +	if (new) { +		if (ret) { +			free_sa_defrag_extent(new); +			atomic_dec(&root->fs_info->defrag_running); +		} else { +			relink_file_extents(new); +		} +	}  	/* once for us */  	btrfs_put_ordered_extent(ordered_extent); @@ -2734,7 +2810,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,  	struct inode *inode = page->mapping->host;  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_ordered_extent *ordered_extent = NULL; -	struct btrfs_workers *workers; +	struct btrfs_workqueue *workers;  	trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -2743,14 +2819,13 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,  					    end - start + 1, uptodate))  		return 0; -	ordered_extent->work.func = finish_ordered_fn; -	ordered_extent->work.flags = 0; +	btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL);  	if (btrfs_is_free_space_inode(inode)) -		workers = &root->fs_info->endio_freespace_worker; +		workers = root->fs_info->endio_freespace_worker;  	else -		workers = &root->fs_info->endio_write_workers; -	btrfs_queue_worker(workers, &ordered_extent->work); +		workers = root->fs_info->endio_write_workers; +	btrfs_queue_work(workers, &ordered_extent->work);  	return 0;  } @@ -2892,14 +2967,15 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,  	root->orphan_block_rsv = NULL;  	spin_unlock(&root->orphan_lock); -	if (root->orphan_item_inserted && +	if (test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state) &&  	    btrfs_root_refs(&root->root_item) > 0) {  		ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,  					    root->root_key.objectid);  		if (ret)  			btrfs_abort_transaction(trans, root, ret);  		else -			root->orphan_item_inserted = 0; +			clear_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, +				  &root->state);  	}  	if (block_rsv) { @@ -2969,6 +3045,7 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)  	if (insert >= 1) {  		ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));  		if (ret) { +			atomic_dec(&root->orphan_inodes);  			if (reserve) {  				clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,  					  &BTRFS_I(inode)->runtime_flags); @@ -3018,14 +3095,16 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,  		release_rsv = 1;  	spin_unlock(&root->orphan_lock); -	if (trans && delete_item) -		ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode)); - -	if (release_rsv) { -		btrfs_orphan_release_metadata(inode); +	if (delete_item) {  		atomic_dec(&root->orphan_inodes); +		if (trans) +			ret = btrfs_del_orphan_item(trans, root, +						    btrfs_ino(inode));  	} +	if (release_rsv) +		btrfs_orphan_release_metadata(inode); +  	return ret;  } @@ -3172,8 +3251,7 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)  		/* if we have links, this was a truncate, lets do that */  		if (inode->i_nlink) { -			if (!S_ISREG(inode->i_mode)) { -				WARN_ON(1); +			if (WARN_ON(!S_ISREG(inode->i_mode))) {  				iput(inode);  				continue;  			} @@ -3214,7 +3292,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)  		btrfs_block_rsv_release(root, root->orphan_block_rsv,  					(u64)-1); -	if (root->orphan_block_rsv || root->orphan_item_inserted) { +	if (root->orphan_block_rsv || +	    test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state)) {  		trans = btrfs_join_transaction(root);  		if (!IS_ERR(trans))  			btrfs_end_transaction(trans, root); @@ -3240,7 +3319,8 @@ out:   * slot is the slot the inode is in, objectid is the objectid of the inode   */  static noinline int acls_after_inode_item(struct extent_buffer *leaf, -					  int slot, u64 objectid) +					  int slot, u64 objectid, +					  int *first_xattr_slot)  {  	u32 nritems = btrfs_header_nritems(leaf);  	struct btrfs_key found_key; @@ -3256,6 +3336,7 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,  	}  	slot++; +	*first_xattr_slot = -1;  	while (slot < nritems) {  		btrfs_item_key_to_cpu(leaf, &found_key, slot); @@ -3265,6 +3346,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,  		/* we found an xattr, assume we've got an acl */  		if (found_key.type == BTRFS_XATTR_ITEM_KEY) { +			if (*first_xattr_slot == -1) +				*first_xattr_slot = slot;  			if (found_key.offset == xattr_access ||  			    found_key.offset == xattr_default)  				return 1; @@ -3293,6 +3376,8 @@ static noinline int acls_after_inode_item(struct extent_buffer *leaf,  	 * something larger than an xattr.  We have to assume the inode  	 * has acls  	 */ +	if (*first_xattr_slot == -1) +		*first_xattr_slot = slot;  	return 1;  } @@ -3307,10 +3392,12 @@ static void btrfs_read_locked_inode(struct inode *inode)  	struct btrfs_timespec *tspec;  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_key location; +	unsigned long ptr;  	int maybe_acls;  	u32 rdev;  	int ret;  	bool filled = false; +	int first_xattr_slot;  	ret = btrfs_fill_inode(inode, &rdev);  	if (!ret) @@ -3320,7 +3407,6 @@ static void btrfs_read_locked_inode(struct inode *inode)  	if (!path)  		goto make_bad; -	path->leave_spinning = 1;  	memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));  	ret = btrfs_lookup_inode(NULL, root, path, &location, 0); @@ -3330,7 +3416,7 @@ static void btrfs_read_locked_inode(struct inode *inode)  	leaf = path->nodes[0];  	if (filled) -		goto cache_acl; +		goto cache_index;  	inode_item = btrfs_item_ptr(leaf, path->slots[0],  				    struct btrfs_inode_item); @@ -3373,18 +3459,51 @@ static void btrfs_read_locked_inode(struct inode *inode)  	BTRFS_I(inode)->index_cnt = (u64)-1;  	BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item); + +cache_index: +	path->slots[0]++; +	if (inode->i_nlink != 1 || +	    path->slots[0] >= btrfs_header_nritems(leaf)) +		goto cache_acl; + +	btrfs_item_key_to_cpu(leaf, &location, path->slots[0]); +	if (location.objectid != btrfs_ino(inode)) +		goto cache_acl; + +	ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); +	if (location.type == BTRFS_INODE_REF_KEY) { +		struct btrfs_inode_ref *ref; + +		ref = (struct btrfs_inode_ref *)ptr; +		BTRFS_I(inode)->dir_index = btrfs_inode_ref_index(leaf, ref); +	} else if (location.type == BTRFS_INODE_EXTREF_KEY) { +		struct btrfs_inode_extref *extref; + +		extref = (struct btrfs_inode_extref *)ptr; +		BTRFS_I(inode)->dir_index = btrfs_inode_extref_index(leaf, +								     extref); +	}  cache_acl:  	/*  	 * try to precache a NULL acl entry for files that don't have  	 * any xattrs or acls  	 */  	maybe_acls = acls_after_inode_item(leaf, path->slots[0], -					   btrfs_ino(inode)); +					   btrfs_ino(inode), &first_xattr_slot); +	if (first_xattr_slot != -1) { +		path->slots[0] = first_xattr_slot; +		ret = btrfs_load_inode_props(inode, path); +		if (ret) +			btrfs_err(root->fs_info, +				  "error loading props for ino %llu (root %llu): %d", +				  btrfs_ino(inode), +				  root->root_key.objectid, ret); +	} +	btrfs_free_path(path); +  	if (!maybe_acls)  		cache_no_acl(inode); -	btrfs_free_path(path); -  	switch (inode->i_mode & S_IFMT) {  	case S_IFREG:  		inode->i_mapping->a_ops = &btrfs_aops; @@ -3488,7 +3607,6 @@ static noinline int btrfs_update_inode_item(struct btrfs_trans_handle *trans,  		goto failed;  	} -	btrfs_unlock_up_safe(path, 1);  	leaf = path->nodes[0];  	inode_item = btrfs_item_ptr(leaf, path->slots[0],  				    struct btrfs_inode_item); @@ -3585,6 +3703,24 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,  		goto err;  	btrfs_release_path(path); +	/* +	 * If we don't have dir index, we have to get it by looking up +	 * the inode ref, since we get the inode ref, remove it directly, +	 * it is unnecessary to do delayed deletion. +	 * +	 * But if we have dir index, needn't search inode ref to get it. +	 * Since the inode ref is close to the inode item, it is better +	 * that we delay to delete it, and just do this deletion when +	 * we update the inode item. +	 */ +	if (BTRFS_I(inode)->dir_index) { +		ret = btrfs_delayed_delete_inode_ref(inode); +		if (!ret) { +			index = BTRFS_I(inode)->dir_index; +			goto skip_backref; +		} +	} +  	ret = btrfs_del_inode_ref(trans, root, name, name_len, ino,  				  dir_ino, &index);  	if (ret) { @@ -3594,7 +3730,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,  		btrfs_abort_transaction(trans, root, ret);  		goto err;  	} - +skip_backref:  	ret = btrfs_delete_delayed_dir_index(trans, root, dir, index);  	if (ret) {  		btrfs_abort_transaction(trans, root, ret); @@ -3636,7 +3772,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,  	int ret;  	ret = __btrfs_unlink_inode(trans, root, dir, inode, name, name_len);  	if (!ret) { -		btrfs_drop_nlink(inode); +		drop_nlink(inode);  		ret = btrfs_update_inode(trans, root, inode);  	}  	return ret; @@ -3884,7 +4020,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,  	 * not block aligned since we will be keeping the last block of the  	 * extent just the way it is.  	 */ -	if (root->ref_cows || root == root->fs_info->tree_root) +	if (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || +	    root == root->fs_info->tree_root)  		btrfs_drop_extent_cache(inode, ALIGN(new_size,  					root->sectorsize), (u64)-1, 0); @@ -3940,7 +4077,7 @@ search_again:  				    btrfs_file_extent_num_bytes(leaf, fi);  			} else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {  				item_end += btrfs_file_extent_inline_len(leaf, -									 fi); +							 path->slots[0], fi);  			}  			item_end--;  		} @@ -3977,7 +4114,9 @@ search_again:  							 extent_num_bytes);  				num_dec = (orig_num_bytes -  					   extent_num_bytes); -				if (root->ref_cows && extent_start != 0) +				if (test_bit(BTRFS_ROOT_REF_COWS, +					     &root->state) && +				    extent_start != 0)  					inode_sub_bytes(inode, num_dec);  				btrfs_mark_buffer_dirty(leaf);  			} else { @@ -3991,7 +4130,8 @@ search_again:  				num_dec = btrfs_file_extent_num_bytes(leaf, fi);  				if (extent_start != 0) {  					found_extent = 1; -					if (root->ref_cows) +					if (test_bit(BTRFS_ROOT_REF_COWS, +						     &root->state))  						inode_sub_bytes(inode, num_dec);  				}  			} @@ -4006,14 +4146,20 @@ search_again:  			    btrfs_file_extent_other_encoding(leaf, fi) == 0) {  				u32 size = new_size - found_key.offset; -				if (root->ref_cows) { +				if (test_bit(BTRFS_ROOT_REF_COWS, &root->state))  					inode_sub_bytes(inode, item_end + 1 -  							new_size); -				} + +				/* +				 * update the ram bytes to properly reflect +				 * the new size of our item +				 */ +				btrfs_set_file_extent_ram_bytes(leaf, fi, size);  				size =  				    btrfs_file_extent_calc_inline_size(size);  				btrfs_truncate_item(root, path, size, 1); -			} else if (root->ref_cows) { +			} else if (test_bit(BTRFS_ROOT_REF_COWS, +					    &root->state)) {  				inode_sub_bytes(inode, item_end + 1 -  						found_key.offset);  			} @@ -4035,8 +4181,9 @@ delete:  		} else {  			break;  		} -		if (found_extent && (root->ref_cows || -				     root == root->fs_info->tree_root)) { +		if (found_extent && +		    (test_bit(BTRFS_ROOT_REF_COWS, &root->state) || +		     root == root->fs_info->tree_root)) {  			btrfs_set_path_blocking(path);  			ret = btrfs_free_extent(trans, root, extent_start,  						extent_num_bytes, 0, @@ -4195,6 +4342,49 @@ out:  	return ret;  } +static int maybe_insert_hole(struct btrfs_root *root, struct inode *inode, +			     u64 offset, u64 len) +{ +	struct btrfs_trans_handle *trans; +	int ret; + +	/* +	 * Still need to make sure the inode looks like it's been updated so +	 * that any holes get logged if we fsync. +	 */ +	if (btrfs_fs_incompat(root->fs_info, NO_HOLES)) { +		BTRFS_I(inode)->last_trans = root->fs_info->generation; +		BTRFS_I(inode)->last_sub_trans = root->log_transid; +		BTRFS_I(inode)->last_log_commit = root->last_log_commit; +		return 0; +	} + +	/* +	 * 1 - for the one we're dropping +	 * 1 - for the one we're adding +	 * 1 - for updating the inode. +	 */ +	trans = btrfs_start_transaction(root, 3); +	if (IS_ERR(trans)) +		return PTR_ERR(trans); + +	ret = btrfs_drop_extents(trans, root, inode, offset, offset + len, 1); +	if (ret) { +		btrfs_abort_transaction(trans, root, ret); +		btrfs_end_transaction(trans, root); +		return ret; +	} + +	ret = btrfs_insert_file_extent(trans, root, btrfs_ino(inode), offset, +				       0, 0, len, 0, len, 0, 0, 0); +	if (ret) +		btrfs_abort_transaction(trans, root, ret); +	else +		btrfs_update_inode(trans, root, inode); +	btrfs_end_transaction(trans, root); +	return ret; +} +  /*   * This function puts in dummy file extents for the area we're creating a hole   * for.  So if we are truncating this file to a larger size we need to insert @@ -4203,7 +4393,6 @@ out:   */  int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  { -	struct btrfs_trans_handle *trans;  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;  	struct extent_map *em = NULL; @@ -4230,15 +4419,16 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  	while (1) {  		struct btrfs_ordered_extent *ordered; -		btrfs_wait_ordered_range(inode, hole_start, -					 block_end - hole_start); +  		lock_extent_bits(io_tree, hole_start, block_end - 1, 0,  				 &cached_state); -		ordered = btrfs_lookup_ordered_extent(inode, hole_start); +		ordered = btrfs_lookup_ordered_range(inode, hole_start, +						     block_end - hole_start);  		if (!ordered)  			break;  		unlock_extent_cached(io_tree, hole_start, block_end - 1,  				     &cached_state, GFP_NOFS); +		btrfs_start_ordered_extent(inode, ordered, 1);  		btrfs_put_ordered_extent(ordered);  	} @@ -4257,31 +4447,10 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  			struct extent_map *hole_em;  			hole_size = last_byte - cur_offset; -			trans = btrfs_start_transaction(root, 3); -			if (IS_ERR(trans)) { -				err = PTR_ERR(trans); -				break; -			} - -			err = btrfs_drop_extents(trans, root, inode, -						 cur_offset, -						 cur_offset + hole_size, 1); -			if (err) { -				btrfs_abort_transaction(trans, root, err); -				btrfs_end_transaction(trans, root); -				break; -			} - -			err = btrfs_insert_file_extent(trans, root, -					btrfs_ino(inode), cur_offset, 0, -					0, hole_size, 0, hole_size, -					0, 0, 0); -			if (err) { -				btrfs_abort_transaction(trans, root, err); -				btrfs_end_transaction(trans, root); +			err = maybe_insert_hole(root, inode, cur_offset, +						hole_size); +			if (err)  				break; -			} -  			btrfs_drop_extent_cache(inode, cur_offset,  						cur_offset + hole_size - 1, 0);  			hole_em = alloc_extent_map(); @@ -4300,7 +4469,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  			hole_em->ram_bytes = hole_size;  			hole_em->bdev = root->fs_info->fs_devices->latest_bdev;  			hole_em->compress_type = BTRFS_COMPRESS_NONE; -			hole_em->generation = trans->transid; +			hole_em->generation = root->fs_info->generation;  			while (1) {  				write_lock(&em_tree->lock); @@ -4313,17 +4482,14 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)  							hole_size - 1, 0);  			}  			free_extent_map(hole_em); -next: -			btrfs_update_inode(trans, root, inode); -			btrfs_end_transaction(trans, root);  		} +next:  		free_extent_map(em);  		em = NULL;  		cur_offset = last_byte;  		if (cur_offset >= block_end)  			break;  	} -  	free_extent_map(em);  	unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state,  			     GFP_NOFS); @@ -4345,8 +4511,12 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)  	 * these flags set.  For all other operations the VFS set these flags  	 * explicitly if it wants a timestamp update.  	 */ -	if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) -		inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb); +	if (newsize != oldsize) { +		inode_inc_iversion(inode); +		if (!(mask & (ATTR_CTIME | ATTR_MTIME))) +			inode->i_ctime = inode->i_mtime = +				current_fs_time(inode->i_sb); +	}  	if (newsize > oldsize) {  		truncate_pagecache(inode, newsize); @@ -4455,12 +4625,70 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)  		err = btrfs_dirty_inode(inode);  		if (!err && attr->ia_valid & ATTR_MODE) -			err = btrfs_acl_chmod(inode); +			err = posix_acl_chmod(inode, inode->i_mode);  	}  	return err;  } +/* + * While truncating the inode pages during eviction, we get the VFS calling + * btrfs_invalidatepage() against each page of the inode. This is slow because + * the calls to btrfs_invalidatepage() result in a huge amount of calls to + * lock_extent_bits() and clear_extent_bit(), which keep merging and splitting + * extent_state structures over and over, wasting lots of time. + * + * Therefore if the inode is being evicted, let btrfs_invalidatepage() skip all + * those expensive operations on a per page basis and do only the ordered io + * finishing, while we release here the extent_map and extent_state structures, + * without the excessive merging and splitting. + */ +static void evict_inode_truncate_pages(struct inode *inode) +{ +	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; +	struct extent_map_tree *map_tree = &BTRFS_I(inode)->extent_tree; +	struct rb_node *node; + +	ASSERT(inode->i_state & I_FREEING); +	truncate_inode_pages_final(&inode->i_data); + +	write_lock(&map_tree->lock); +	while (!RB_EMPTY_ROOT(&map_tree->map)) { +		struct extent_map *em; + +		node = rb_first(&map_tree->map); +		em = rb_entry(node, struct extent_map, rb_node); +		clear_bit(EXTENT_FLAG_PINNED, &em->flags); +		clear_bit(EXTENT_FLAG_LOGGING, &em->flags); +		remove_extent_mapping(map_tree, em); +		free_extent_map(em); +	} +	write_unlock(&map_tree->lock); + +	spin_lock(&io_tree->lock); +	while (!RB_EMPTY_ROOT(&io_tree->state)) { +		struct extent_state *state; +		struct extent_state *cached_state = NULL; + +		node = rb_first(&io_tree->state); +		state = rb_entry(node, struct extent_state, rb_node); +		atomic_inc(&state->refs); +		spin_unlock(&io_tree->lock); + +		lock_extent_bits(io_tree, state->start, state->end, +				 0, &cached_state); +		clear_extent_bit(io_tree, state->start, state->end, +				 EXTENT_LOCKED | EXTENT_DIRTY | +				 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | +				 EXTENT_DEFRAG, 1, 1, +				 &cached_state, GFP_NOFS); +		free_extent_state(state); + +		spin_lock(&io_tree->lock); +	} +	spin_unlock(&io_tree->lock); +} +  void btrfs_evict_inode(struct inode *inode)  {  	struct btrfs_trans_handle *trans; @@ -4471,9 +4699,12 @@ void btrfs_evict_inode(struct inode *inode)  	trace_btrfs_inode_evict(inode); -	truncate_inode_pages(&inode->i_data, 0); -	if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || -			       btrfs_is_free_space_inode(inode))) +	evict_inode_truncate_pages(inode); + +	if (inode->i_nlink && +	    ((btrfs_root_refs(&root->root_item) != 0 && +	      root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID) || +	     btrfs_is_free_space_inode(inode)))  		goto no_delete;  	if (is_bad_inode(inode)) { @@ -4490,7 +4721,8 @@ void btrfs_evict_inode(struct inode *inode)  	}  	if (inode->i_nlink > 0) { -		BUG_ON(btrfs_root_refs(&root->root_item) != 0); +		BUG_ON(btrfs_root_refs(&root->root_item) != 0 && +		       root->root_key.objectid != BTRFS_ROOT_TREE_OBJECTID);  		goto no_delete;  	} @@ -4643,9 +4875,9 @@ static int fixup_tree_root_location(struct btrfs_root *root,  	}  	err = -ENOENT; -	ret = btrfs_find_root_ref(root->fs_info->tree_root, path, -				  BTRFS_I(dir)->root->root_key.objectid, -				  location->objectid); +	ret = btrfs_find_item(root->fs_info->tree_root, path, +				BTRFS_I(dir)->root->root_key.objectid, +				location->objectid, BTRFS_ROOT_REF_KEY, NULL);  	if (ret) {  		if (ret < 0)  			err = ret; @@ -4731,14 +4963,7 @@ static void inode_tree_del(struct inode *inode)  	}  	spin_unlock(&root->inode_lock); -	/* -	 * Free space cache has inodes in the tree root, but the tree root has a -	 * root_refs of 0, so this could end up dropping the tree root as a -	 * snapshot, so we need the extra !root->fs_info->tree_root check to -	 * make sure we don't drop it. -	 */ -	if (empty && btrfs_root_refs(&root->root_item) == 0 && -	    root != root->fs_info->tree_root) { +	if (empty && btrfs_root_refs(&root->root_item) == 0) {  		synchronize_srcu(&root->fs_info->subvol_srcu);  		spin_lock(&root->inode_lock);  		empty = RB_EMPTY_ROOT(&root->inode_tree); @@ -4756,7 +4981,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root)  	struct inode *inode;  	u64 objectid = 0; -	WARN_ON(btrfs_root_refs(&root->root_item) != 0); +	if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) +		WARN_ON(btrfs_root_refs(&root->root_item) != 0);  	spin_lock(&root->inode_lock);  again: @@ -4813,7 +5039,9 @@ again:  static int btrfs_init_locked_inode(struct inode *inode, void *p)  {  	struct btrfs_iget_args *args = p; -	inode->i_ino = args->ino; +	inode->i_ino = args->location->objectid; +	memcpy(&BTRFS_I(inode)->location, args->location, +	       sizeof(*args->location));  	BTRFS_I(inode)->root = args->root;  	return 0;  } @@ -4821,20 +5049,22 @@ static int btrfs_init_locked_inode(struct inode *inode, void *p)  static int btrfs_find_actor(struct inode *inode, void *opaque)  {  	struct btrfs_iget_args *args = opaque; -	return args->ino == btrfs_ino(inode) && +	return args->location->objectid == BTRFS_I(inode)->location.objectid &&  		args->root == BTRFS_I(inode)->root;  }  static struct inode *btrfs_iget_locked(struct super_block *s, -				       u64 objectid, +				       struct btrfs_key *location,  				       struct btrfs_root *root)  {  	struct inode *inode;  	struct btrfs_iget_args args; -	args.ino = objectid; +	unsigned long hashval = btrfs_inode_hash(location->objectid, root); + +	args.location = location;  	args.root = root; -	inode = iget5_locked(s, objectid, btrfs_find_actor, +	inode = iget5_locked(s, hashval, btrfs_find_actor,  			     btrfs_init_locked_inode,  			     (void *)&args);  	return inode; @@ -4848,13 +5078,11 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,  {  	struct inode *inode; -	inode = btrfs_iget_locked(s, location->objectid, root); +	inode = btrfs_iget_locked(s, location, root);  	if (!inode)  		return ERR_PTR(-ENOMEM);  	if (inode->i_state & I_NEW) { -		BTRFS_I(inode)->root = root; -		memcpy(&BTRFS_I(inode)->location, location, sizeof(*location));  		btrfs_read_locked_inode(inode);  		if (!is_bad_inode(inode)) {  			inode_tree_add(inode); @@ -4910,7 +5138,7 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)  		return ERR_PTR(ret);  	if (location.objectid == 0) -		return NULL; +		return ERR_PTR(-ENOENT);  	if (location.type == BTRFS_INODE_ITEM_KEY) {  		inode = btrfs_iget(dir->i_sb, &location, root, NULL); @@ -4967,17 +5195,23 @@ static int btrfs_dentry_delete(const struct dentry *dentry)  static void btrfs_dentry_release(struct dentry *dentry)  { -	if (dentry->d_fsdata) -		kfree(dentry->d_fsdata); +	kfree(dentry->d_fsdata);  }  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,  				   unsigned int flags)  { -	struct dentry *ret; +	struct inode *inode; -	ret = d_splice_alias(btrfs_lookup_dentry(dir, dentry), dentry); -	return ret; +	inode = btrfs_lookup_dentry(dir, dentry); +	if (IS_ERR(inode)) { +		if (PTR_ERR(inode) == -ENOENT) +			inode = NULL; +		else +			return ERR_CAST(inode); +	} + +	return d_materialise_unique(dentry, inode);  }  unsigned char btrfs_filetype_table[] = { @@ -5048,7 +5282,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)  			continue;  		} -		item = btrfs_item_nr(leaf, slot); +		item = btrfs_item_nr(slot);  		btrfs_item_key_to_cpu(leaf, &found_key, slot);  		if (found_key.objectid != key.objectid) @@ -5345,9 +5579,9 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  	struct btrfs_inode_ref *ref;  	struct btrfs_key key[2];  	u32 sizes[2]; +	int nitems = name ? 2 : 1;  	unsigned long ptr;  	int ret; -	int owner;  	path = btrfs_alloc_path();  	if (!path) @@ -5365,7 +5599,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  	 */  	inode->i_ino = objectid; -	if (dir) { +	if (dir && name) {  		trace_btrfs_inode_request(dir);  		ret = btrfs_set_inode_index(dir, index); @@ -5374,6 +5608,8 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  			iput(inode);  			return ERR_PTR(ret);  		} +	} else if (dir) { +		*index = 0;  	}  	/*  	 * index_cnt is ignored for everything but a dir, @@ -5381,6 +5617,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  	 * number  	 */  	BTRFS_I(inode)->index_cnt = 2; +	BTRFS_I(inode)->dir_index = *index;  	BTRFS_I(inode)->root = root;  	BTRFS_I(inode)->generation = trans->transid;  	inode->i_generation = BTRFS_I(inode)->generation; @@ -5393,30 +5630,28 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  	 */  	set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags); -	if (S_ISDIR(mode)) -		owner = 0; -	else -		owner = 1; -  	key[0].objectid = objectid;  	btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);  	key[0].offset = 0; -	/* -	 * Start new inodes with an inode_ref. This is slightly more -	 * efficient for small numbers of hard links since they will -	 * be packed into one item. Extended refs will kick in if we -	 * add more hard links than can fit in the ref item. -	 */ -	key[1].objectid = objectid; -	btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); -	key[1].offset = ref_objectid; -  	sizes[0] = sizeof(struct btrfs_inode_item); -	sizes[1] = name_len + sizeof(*ref); + +	if (name) { +		/* +		 * Start new inodes with an inode_ref. This is slightly more +		 * efficient for small numbers of hard links since they will +		 * be packed into one item. Extended refs will kick in if we +		 * add more hard links than can fit in the ref item. +		 */ +		key[1].objectid = objectid; +		btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY); +		key[1].offset = ref_objectid; + +		sizes[1] = name_len + sizeof(*ref); +	}  	path->leave_spinning = 1; -	ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); +	ret = btrfs_insert_empty_items(trans, root, path, key, sizes, nitems);  	if (ret != 0)  		goto fail; @@ -5429,12 +5664,14 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  			     sizeof(*inode_item));  	fill_inode_item(trans, path->nodes[0], inode_item, inode); -	ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, -			     struct btrfs_inode_ref); -	btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); -	btrfs_set_inode_ref_index(path->nodes[0], ref, *index); -	ptr = (unsigned long)(ref + 1); -	write_extent_buffer(path->nodes[0], name, ptr, name_len); +	if (name) { +		ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, +				     struct btrfs_inode_ref); +		btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len); +		btrfs_set_inode_ref_index(path->nodes[0], ref, *index); +		ptr = (unsigned long)(ref + 1); +		write_extent_buffer(path->nodes[0], name, ptr, name_len); +	}  	btrfs_mark_buffer_dirty(path->nodes[0]);  	btrfs_free_path(path); @@ -5454,7 +5691,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  				BTRFS_INODE_NODATASUM;  	} -	insert_inode_hash(inode); +	btrfs_insert_inode_hash(inode);  	inode_tree_add(inode);  	trace_btrfs_inode_new(inode); @@ -5462,9 +5699,15 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  	btrfs_update_root_times(trans, root); +	ret = btrfs_inode_inherit_props(trans, inode, dir); +	if (ret) +		btrfs_err(root->fs_info, +			  "error inheriting props for ino %llu (root %llu): %d", +			  btrfs_ino(inode), root->root_key.objectid, ret); +  	return inode;  fail: -	if (dir) +	if (dir && name)  		BTRFS_I(dir)->index_cnt--;  	btrfs_free_path(path);  	iput(inode); @@ -5621,6 +5864,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,  	}  out_unlock:  	btrfs_end_transaction(trans, root); +	btrfs_balance_delayed_items(root);  	btrfs_btree_balance_dirty(root);  	if (drop_inode) {  		inode_dec_link_count(inode); @@ -5694,6 +5938,7 @@ out_unlock:  		inode_dec_link_count(inode);  		iput(inode);  	} +	btrfs_balance_delayed_items(root);  	btrfs_btree_balance_dirty(root);  	return err;  } @@ -5730,7 +5975,9 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,  		goto fail;  	} -	btrfs_inc_nlink(inode); +	/* There are several dir indexes for this inode, clear the cache. */ +	BTRFS_I(inode)->dir_index = 0ULL; +	inc_nlink(inode);  	inode_inc_iversion(inode);  	inode->i_ctime = CURRENT_TIME;  	ihold(inode); @@ -5745,11 +5992,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,  		err = btrfs_update_inode(trans, root, inode);  		if (err)  			goto fail; +		if (inode->i_nlink == 1) { +			/* +			 * If new hard link count is 1, it's a file created +			 * with open(2) O_TMPFILE flag. +			 */ +			err = btrfs_orphan_del(trans, inode); +			if (err) +				goto fail; +		}  		d_instantiate(dentry, inode);  		btrfs_log_new_name(trans, inode, NULL, parent);  	}  	btrfs_end_transaction(trans, root); +	btrfs_balance_delayed_items(root);  fail:  	if (drop_inode) {  		inode_dec_link_count(inode); @@ -5816,6 +6073,7 @@ out_fail:  	btrfs_end_transaction(trans, root);  	if (drop_on_err)  		iput(inode); +	btrfs_balance_delayed_items(root);  	btrfs_btree_balance_dirty(root);  	return err;  } @@ -5860,7 +6118,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,  	compress_type = btrfs_file_extent_compression(leaf, item);  	max_size = btrfs_file_extent_ram_bytes(leaf, item);  	inline_size = btrfs_file_extent_inline_item_len(leaf, -					btrfs_item_nr(leaf, path->slots[0])); +					btrfs_item_nr(path->slots[0]));  	tmp = kmalloc(inline_size, GFP_NOFS);  	if (!tmp)  		return -ENOMEM; @@ -5871,16 +6129,8 @@ static noinline int uncompress_inline(struct btrfs_path *path,  	max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);  	ret = btrfs_decompress(compress_type, tmp, page,  			       extent_offset, inline_size, max_size); -	if (ret) { -		char *kaddr = kmap_atomic(page); -		unsigned long copy_size = min_t(u64, -				  PAGE_CACHE_SIZE - pg_offset, -				  max_size - extent_offset); -		memset(kaddr + pg_offset, 0, copy_size); -		kunmap_atomic(kaddr); -	}  	kfree(tmp); -	return 0; +	return ret;  }  /* @@ -5898,7 +6148,6 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,  {  	int ret;  	int err = 0; -	u64 bytenr;  	u64 extent_start = 0;  	u64 extent_end = 0;  	u64 objectid = btrfs_ino(inode); @@ -5912,7 +6161,7 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,  	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;  	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;  	struct btrfs_trans_handle *trans = NULL; -	int compress_type; +	const bool new_inline = !page || create;  again:  	read_lock(&em_tree->lock); @@ -5974,22 +6223,28 @@ again:  	found_type = btrfs_key_type(&found_key);  	if (found_key.objectid != objectid ||  	    found_type != BTRFS_EXTENT_DATA_KEY) { -		goto not_found; +		/* +		 * If we backup past the first extent we want to move forward +		 * and see if there is an extent in front of us, otherwise we'll +		 * say there is a hole for our whole search range which can +		 * cause problems. +		 */ +		extent_end = start; +		goto next;  	}  	found_type = btrfs_file_extent_type(leaf, item);  	extent_start = found_key.offset; -	compress_type = btrfs_file_extent_compression(leaf, item);  	if (found_type == BTRFS_FILE_EXTENT_REG ||  	    found_type == BTRFS_FILE_EXTENT_PREALLOC) {  		extent_end = extent_start +  		       btrfs_file_extent_num_bytes(leaf, item);  	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {  		size_t size; -		size = btrfs_file_extent_inline_len(leaf, item); +		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);  		extent_end = ALIGN(extent_start + size, root->sectorsize);  	} - +next:  	if (start >= extent_end) {  		path->slots[0]++;  		if (path->slots[0] >= btrfs_header_nritems(leaf)) { @@ -6014,32 +6269,10 @@ again:  		goto not_found_em;  	} -	em->ram_bytes = btrfs_file_extent_ram_bytes(leaf, item); +	btrfs_extent_item_to_extent_map(inode, path, item, new_inline, em); +  	if (found_type == BTRFS_FILE_EXTENT_REG ||  	    found_type == BTRFS_FILE_EXTENT_PREALLOC) { -		em->start = extent_start; -		em->len = extent_end - extent_start; -		em->orig_start = extent_start - -				 btrfs_file_extent_offset(leaf, item); -		em->orig_block_len = btrfs_file_extent_disk_num_bytes(leaf, -								      item); -		bytenr = btrfs_file_extent_disk_bytenr(leaf, item); -		if (bytenr == 0) { -			em->block_start = EXTENT_MAP_HOLE; -			goto insert; -		} -		if (compress_type != BTRFS_COMPRESS_NONE) { -			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); -			em->compress_type = compress_type; -			em->block_start = bytenr; -			em->block_len = em->orig_block_len; -		} else { -			bytenr += btrfs_file_extent_offset(leaf, item); -			em->block_start = bytenr; -			em->block_len = em->len; -			if (found_type == BTRFS_FILE_EXTENT_PREALLOC) -				set_bit(EXTENT_FLAG_PREALLOC, &em->flags); -		}  		goto insert;  	} else if (found_type == BTRFS_FILE_EXTENT_INLINE) {  		unsigned long ptr; @@ -6048,14 +6281,10 @@ again:  		size_t extent_offset;  		size_t copy_size; -		em->block_start = EXTENT_MAP_INLINE; -		if (!page || create) { -			em->start = extent_start; -			em->len = extent_end - extent_start; +		if (new_inline)  			goto out; -		} -		size = btrfs_file_extent_inline_len(leaf, item); +		size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);  		extent_offset = page_offset(page) + pg_offset - extent_start;  		copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,  				size - extent_offset); @@ -6063,10 +6292,6 @@ again:  		em->len = ALIGN(copy_size, root->sectorsize);  		em->orig_block_len = em->len;  		em->orig_start = em->start; -		if (compress_type) { -			set_bit(EXTENT_FLAG_COMPRESSED, &em->flags); -			em->compress_type = compress_type; -		}  		ptr = btrfs_file_extent_inline_start(item) + extent_offset;  		if (create == 0 && !PageUptodate(page)) {  			if (btrfs_file_extent_compression(leaf, item) != @@ -6074,7 +6299,10 @@ again:  				ret = uncompress_inline(path, inode, page,  							pg_offset,  							extent_offset, item); -				BUG_ON(ret); /* -ENOMEM */ +				if (ret) { +					err = ret; +					goto out; +				}  			} else {  				map = kmap(page);  				read_extent_buffer(leaf, map + pg_offset, ptr, @@ -6110,8 +6338,6 @@ again:  		set_extent_uptodate(io_tree, em->start,  				    extent_map_end(em) - 1, NULL, GFP_NOFS);  		goto insert; -	} else { -		WARN(1, KERN_ERR "btrfs unknown found_type %d\n", found_type);  	}  not_found:  	em->start = start; @@ -6173,8 +6399,7 @@ insert:  	write_unlock(&em_tree->lock);  out: -	if (em) -		trace_btrfs_get_extent(root, em); +	trace_btrfs_get_extent(root, em);  	if (path)  		btrfs_free_path(path); @@ -6249,7 +6474,7 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag  	/* adjust the range_start to make sure it doesn't  	 * go backwards from the start they passed in  	 */ -	range_start = max(start,range_start); +	range_start = max(start, range_start);  	found = found_end - range_start;  	if (found > 0) { @@ -6329,21 +6554,21 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,  	alloc_hint = get_extent_allocation_hint(inode, start, len);  	ret = btrfs_reserve_extent(root, len, root->sectorsize, 0, -				   alloc_hint, &ins, 1); +				   alloc_hint, &ins, 1, 1);  	if (ret)  		return ERR_PTR(ret);  	em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,  			      ins.offset, ins.offset, ins.offset, 0);  	if (IS_ERR(em)) { -		btrfs_free_reserved_extent(root, ins.objectid, ins.offset); +		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);  		return em;  	}  	ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,  					   ins.offset, ins.offset, 0);  	if (ret) { -		btrfs_free_reserved_extent(root, ins.objectid, ins.offset); +		btrfs_free_reserved_extent(root, ins.objectid, ins.offset, 1);  		free_extent_map(em);  		return ERR_PTR(ret);  	} @@ -6364,6 +6589,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  	int ret;  	struct extent_buffer *leaf;  	struct btrfs_root *root = BTRFS_I(inode)->root; +	struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;  	struct btrfs_file_extent_item *fi;  	struct btrfs_key key;  	u64 disk_bytenr; @@ -6373,6 +6599,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  	int slot;  	int found_type;  	bool nocow = (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW); +  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; @@ -6416,6 +6643,10 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  	if (!nocow && found_type == BTRFS_FILE_EXTENT_REG)  		goto out; +	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); +	if (extent_end <= offset) +		goto out; +  	disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);  	if (disk_bytenr == 0)  		goto out; @@ -6433,11 +6664,24 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  		*ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi);  	} -	extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); -  	if (btrfs_extent_readonly(root, disk_bytenr))  		goto out; +	num_bytes = min(offset + *len, extent_end) - offset; +	if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) { +		u64 range_end; + +		range_end = round_up(offset + num_bytes, root->sectorsize) - 1; +		ret = test_range_bit(io_tree, offset, range_end, +				     EXTENT_DELALLOC, 0, NULL); +		if (ret) { +			ret = -EAGAIN; +			goto out; +		} +	} + +	btrfs_release_path(path); +  	/*  	 * look for other files referencing this extent, if we  	 * find any we must cow @@ -6464,7 +6708,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,  	 */  	disk_bytenr += backref_offset;  	disk_bytenr += offset - key.offset; -	num_bytes = min(offset + *len, extent_end) - offset;  	if (csum_exist_in_range(root, disk_bytenr, num_bytes))  				goto out;  	/* @@ -6478,6 +6721,76 @@ out:  	return ret;  } +bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end) +{ +	struct radix_tree_root *root = &inode->i_mapping->page_tree; +	int found = false; +	void **pagep = NULL; +	struct page *page = NULL; +	int start_idx; +	int end_idx; + +	start_idx = start >> PAGE_CACHE_SHIFT; + +	/* +	 * end is the last byte in the last page.  end == start is legal +	 */ +	end_idx = end >> PAGE_CACHE_SHIFT; + +	rcu_read_lock(); + +	/* Most of the code in this while loop is lifted from +	 * find_get_page.  It's been modified to begin searching from a +	 * page and return just the first page found in that range.  If the +	 * found idx is less than or equal to the end idx then we know that +	 * a page exists.  If no pages are found or if those pages are +	 * outside of the range then we're fine (yay!) */ +	while (page == NULL && +	       radix_tree_gang_lookup_slot(root, &pagep, NULL, start_idx, 1)) { +		page = radix_tree_deref_slot(pagep); +		if (unlikely(!page)) +			break; + +		if (radix_tree_exception(page)) { +			if (radix_tree_deref_retry(page)) { +				page = NULL; +				continue; +			} +			/* +			 * Otherwise, shmem/tmpfs must be storing a swap entry +			 * here as an exceptional entry: so return it without +			 * attempting to raise page count. +			 */ +			page = NULL; +			break; /* TODO: Is this relevant for this use case? */ +		} + +		if (!page_cache_get_speculative(page)) { +			page = NULL; +			continue; +		} + +		/* +		 * Has the page moved? +		 * This is part of the lockless pagecache protocol. See +		 * include/linux/pagemap.h for details. +		 */ +		if (unlikely(page != *pagep)) { +			page_cache_release(page); +			page = NULL; +		} +	} + +	if (page) { +		if (page->index <= end_idx) +			found = true; +		page_cache_release(page); +	} + +	rcu_read_unlock(); +	return found; +} +  static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,  			      struct extent_state **cached_state, int writing)  { @@ -6502,10 +6815,9 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,  		 * invalidate needs to happen so that reads after a write do not  		 * get stale data.  		 */ -		if (!ordered && (!writing || -		    !test_range_bit(&BTRFS_I(inode)->io_tree, -				    lockstart, lockend, EXTENT_UPTODATE, 0, -				    *cached_state))) +		if (!ordered && +		    (!writing || +		     !btrfs_page_exists_in_range(inode, lockstart, lockend)))  			break;  		unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, @@ -6765,17 +7077,16 @@ unlock_err:  static void btrfs_endio_direct_read(struct bio *bio, int err)  {  	struct btrfs_dio_private *dip = bio->bi_private; -	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; -	struct bio_vec *bvec = bio->bi_io_vec; +	struct bio_vec *bvec;  	struct inode *inode = dip->inode;  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct bio *dio_bio;  	u32 *csums = (u32 *)dip->csum; -	int index = 0;  	u64 start; +	int i;  	start = dip->logical_offset; -	do { +	bio_for_each_segment_all(bvec, bio, i) {  		if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) {  			struct page *page = bvec->bv_page;  			char *kaddr; @@ -6791,18 +7102,16 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)  			local_irq_restore(flags);  			flush_dcache_page(bvec->bv_page); -			if (csum != csums[index]) { +			if (csum != csums[i]) {  				btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",  					  btrfs_ino(inode), start, csum, -					  csums[index]); +					  csums[i]);  				err = -EIO;  			}  		}  		start += bvec->bv_len; -		bvec++; -		index++; -	} while (bvec <= bvec_end); +	}  	unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,  		      dip->logical_offset + dip->bytes - 1); @@ -6837,10 +7146,9 @@ again:  	if (!ret)  		goto out_test; -	ordered->work.func = finish_ordered_fn; -	ordered->work.flags = 0; -	btrfs_queue_worker(&root->fs_info->endio_write_workers, -			   &ordered->work); +	btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); +	btrfs_queue_work(root->fs_info->endio_write_workers, +			 &ordered->work);  out_test:  	/*  	 * our bio might span multiple ordered extents.  If we haven't @@ -6880,17 +7188,18 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)  	struct btrfs_dio_private *dip = bio->bi_private;  	if (err) { -		printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu " -		      "sector %#Lx len %u err no %d\n", +		btrfs_err(BTRFS_I(dip->inode)->root->fs_info, +			  "direct IO failed ino %llu rw %lu sector %#Lx len %u err no %d",  		      btrfs_ino(dip->inode), bio->bi_rw, -		      (unsigned long long)bio->bi_sector, bio->bi_size, err); +		      (unsigned long long)bio->bi_iter.bi_sector, +		      bio->bi_iter.bi_size, err);  		dip->errors = 1;  		/*  		 * before atomic variable goto zero, we must make sure  		 * dip->errors is perceived to be set.  		 */ -		smp_mb__before_atomic_dec(); +		smp_mb__before_atomic();  	}  	/* if there are more bios still pending for this dio, just exit */ @@ -6974,7 +7283,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  	struct bio *bio;  	struct bio *orig_bio = dip->orig_bio;  	struct bio_vec *bvec = orig_bio->bi_io_vec; -	u64 start_sector = orig_bio->bi_sector; +	u64 start_sector = orig_bio->bi_iter.bi_sector;  	u64 file_offset = dip->logical_offset;  	u64 submit_len = 0;  	u64 map_length; @@ -6982,7 +7291,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  	int ret = 0;  	int async_submit = 0; -	map_length = orig_bio->bi_size; +	map_length = orig_bio->bi_iter.bi_size;  	ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,  			      &map_length, NULL, 0);  	if (ret) { @@ -6990,7 +7299,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  		return -EIO;  	} -	if (map_length >= orig_bio->bi_size) { +	if (map_length >= orig_bio->bi_iter.bi_size) {  		bio = orig_bio;  		goto submit;  	} @@ -7042,7 +7351,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  			bio->bi_private = dip;  			bio->bi_end_io = btrfs_end_dio_bio; -			map_length = orig_bio->bi_size; +			map_length = orig_bio->bi_iter.bi_size;  			ret = btrfs_map_block(root->fs_info, rw,  					      start_sector << 9,  					      &map_length, NULL, 0); @@ -7052,7 +7361,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,  			}  		} else {  			submit_len += bvec->bv_len; -			nr_pages ++; +			nr_pages++;  			bvec++;  		}  	} @@ -7070,7 +7379,7 @@ out_err:  	 * before atomic variable goto zero, we must  	 * make sure dip->errors is perceived to be set.  	 */ -	smp_mb__before_atomic_dec(); +	smp_mb__before_atomic();  	if (atomic_dec_and_test(&dip->pending_bios))  		bio_io_error(dip->orig_bio); @@ -7100,7 +7409,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,  	if (!skip_sum && !write) {  		csum_size = btrfs_super_csum_size(root->fs_info->super_copy); -		sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits; +		sum_len = dio_bio->bi_iter.bi_size >> +			inode->i_sb->s_blocksize_bits;  		sum_len *= csum_size;  	} else {  		sum_len = 0; @@ -7115,8 +7425,8 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,  	dip->private = dio_bio->bi_private;  	dip->inode = inode;  	dip->logical_offset = file_offset; -	dip->bytes = dio_bio->bi_size; -	dip->disk_bytenr = (u64)dio_bio->bi_sector << 9; +	dip->bytes = dio_bio->bi_iter.bi_size; +	dip->disk_bytenr = (u64)dio_bio->bi_iter.bi_sector << 9;  	io_bio->bi_private = dip;  	dip->errors = 0;  	dip->orig_bio = io_bio; @@ -7146,7 +7456,7 @@ free_ordered:  		if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&  		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))  			btrfs_free_reserved_extent(root, ordered->start, -						   ordered->disk_len); +						   ordered->disk_len, 1);  		btrfs_put_ordered_extent(ordered);  		btrfs_put_ordered_extent(ordered);  	} @@ -7154,39 +7464,30 @@ free_ordered:  }  static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, -			const struct iovec *iov, loff_t offset, -			unsigned long nr_segs) +			const struct iov_iter *iter, loff_t offset)  {  	int seg;  	int i; -	size_t size; -	unsigned long addr;  	unsigned blocksize_mask = root->sectorsize - 1;  	ssize_t retval = -EINVAL; -	loff_t end = offset;  	if (offset & blocksize_mask)  		goto out; -	/* Check the memory alignment.  Blocks cannot straddle pages */ -	for (seg = 0; seg < nr_segs; seg++) { -		addr = (unsigned long)iov[seg].iov_base; -		size = iov[seg].iov_len; -		end += size; -		if ((addr & blocksize_mask) || (size & blocksize_mask)) -			goto out; - -		/* If this is a write we don't need to check anymore */ -		if (rw & WRITE) -			continue; +	if (iov_iter_alignment(iter) & blocksize_mask) +		goto out; -		/* -		 * Check to make sure we don't have duplicate iov_base's in this -		 * iovec, if so return EINVAL, otherwise we'll get csum errors -		 * when reading back. -		 */ -		for (i = seg + 1; i < nr_segs; i++) { -			if (iov[seg].iov_base == iov[i].iov_base) +	/* If this is a write we don't need to check anymore */ +	if (rw & WRITE) +		return 0; +	/* +	 * Check to make sure we don't have duplicate iov_base's in this +	 * iovec, if so return EINVAL, otherwise we'll get csum errors +	 * when reading back. +	 */ +	for (seg = 0; seg < iter->nr_segs; seg++) { +		for (i = seg + 1; i < iter->nr_segs; i++) { +			if (iter->iov[seg].iov_base == iter->iov[i].iov_base)  				goto out;  		}  	} @@ -7196,8 +7497,7 @@ out:  }  static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, -			const struct iovec *iov, loff_t offset, -			unsigned long nr_segs) +			struct iov_iter *iter, loff_t offset)  {  	struct file *file = iocb->ki_filp;  	struct inode *inode = file->f_mapping->host; @@ -7207,21 +7507,22 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,  	bool relock = false;  	ssize_t ret; -	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, -			    offset, nr_segs)) +	if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset))  		return 0;  	atomic_inc(&inode->i_dio_count); -	smp_mb__after_atomic_inc(); +	smp_mb__after_atomic();  	/* -	 * The generic stuff only does filemap_write_and_wait_range, which isn't -	 * enough if we've written compressed pages to this area, so we need to -	 * call btrfs_wait_ordered_range to make absolutely sure that any -	 * outstanding dirty pages are on disk. +	 * The generic stuff only does filemap_write_and_wait_range, which +	 * isn't enough if we've written compressed pages to this area, so +	 * we need to flush the dirty pages again to make absolutely sure +	 * that any outstanding dirty pages are on disk.  	 */ -	count = iov_length(iov, nr_segs); -	btrfs_wait_ordered_range(inode, offset, count); +	count = iov_iter_count(iter); +	if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, +		     &BTRFS_I(inode)->runtime_flags)) +		filemap_fdatawrite_range(inode->i_mapping, offset, count);  	if (rw & WRITE) {  		/* @@ -7245,7 +7546,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,  	ret = __blockdev_direct_IO(rw, iocb, inode,  			BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, -			iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, +			iter, offset, btrfs_get_blocks_direct, NULL,  			btrfs_submit_direct, flags);  	if (rw & WRITE) {  		if (ret < 0 && ret != -EIOCBQUEUED) @@ -7351,6 +7652,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,  	struct extent_state *cached_state = NULL;  	u64 page_start = page_offset(page);  	u64 page_end = page_start + PAGE_CACHE_SIZE - 1; +	int inode_evicting = inode->i_state & I_FREEING;  	/*  	 * we have the page locked, so new writeback can't start, @@ -7366,17 +7668,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,  		btrfs_releasepage(page, GFP_NOFS);  		return;  	} -	lock_extent_bits(tree, page_start, page_end, 0, &cached_state); -	ordered = btrfs_lookup_ordered_extent(inode, page_offset(page)); + +	if (!inode_evicting) +		lock_extent_bits(tree, page_start, page_end, 0, &cached_state); +	ordered = btrfs_lookup_ordered_extent(inode, page_start);  	if (ordered) {  		/*  		 * IO on this page will never be started, so we need  		 * to account for any ordered extents now  		 */ -		clear_extent_bit(tree, page_start, page_end, -				 EXTENT_DIRTY | EXTENT_DELALLOC | -				 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | -				 EXTENT_DEFRAG, 1, 0, &cached_state, GFP_NOFS); +		if (!inode_evicting) +			clear_extent_bit(tree, page_start, page_end, +					 EXTENT_DIRTY | EXTENT_DELALLOC | +					 EXTENT_LOCKED | EXTENT_DO_ACCOUNTING | +					 EXTENT_DEFRAG, 1, 0, &cached_state, +					 GFP_NOFS);  		/*  		 * whoever cleared the private bit is responsible  		 * for the finish_ordered_io @@ -7400,14 +7706,22 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,  				btrfs_finish_ordered_io(ordered);  		}  		btrfs_put_ordered_extent(ordered); -		cached_state = NULL; -		lock_extent_bits(tree, page_start, page_end, 0, &cached_state); +		if (!inode_evicting) { +			cached_state = NULL; +			lock_extent_bits(tree, page_start, page_end, 0, +					 &cached_state); +		} +	} + +	if (!inode_evicting) { +		clear_extent_bit(tree, page_start, page_end, +				 EXTENT_LOCKED | EXTENT_DIRTY | +				 EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | +				 EXTENT_DEFRAG, 1, 1, +				 &cached_state, GFP_NOFS); + +		__btrfs_releasepage(page, GFP_NOFS);  	} -	clear_extent_bit(tree, page_start, page_end, -		 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | -		 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 1, 1, -		 &cached_state, GFP_NOFS); -	__btrfs_releasepage(page, GFP_NOFS);  	ClearPageChecked(page);  	if (PagePrivate(page)) { @@ -7562,7 +7876,10 @@ static int btrfs_truncate(struct inode *inode)  	u64 mask = root->sectorsize - 1;  	u64 min_size = btrfs_calc_trunc_metadata_size(root, 1); -	btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); +	ret = btrfs_wait_ordered_range(inode, inode->i_size & (~mask), +				       (u64)-1); +	if (ret) +		return ret;  	/*  	 * Yes ladies and gentelment, this is indeed ugly.  The fact is we have @@ -7714,7 +8031,9 @@ out:   * create a new subvolume directory/inode (helper for the ioctl).   */  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, -			     struct btrfs_root *new_root, u64 new_dirid) +			     struct btrfs_root *new_root, +			     struct btrfs_root *parent_root, +			     u64 new_dirid)  {  	struct inode *inode;  	int err; @@ -7732,6 +8051,12 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,  	set_nlink(inode, 1);  	btrfs_i_size_write(inode, 0); +	err = btrfs_subvol_inherit_props(trans, new_root, parent_root); +	if (err) +		btrfs_err(new_root->fs_info, +			  "error inheriting subvolume %llu properties: %d", +			  new_root->root_key.objectid, err); +  	err = btrfs_update_inode(trans, new_root, inode);  	iput(inode); @@ -7757,6 +8082,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)  	ei->flags = 0;  	ei->csum_bytes = 0;  	ei->index_cnt = (u64)-1; +	ei->dir_index = 0;  	ei->last_unlink_trans = 0;  	ei->last_log_commit = 0; @@ -7786,6 +8112,14 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)  	return inode;  } +#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS +void btrfs_test_destroy_inode(struct inode *inode) +{ +	btrfs_drop_extent_cache(inode, 0, (u64)-1, 0); +	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode)); +} +#endif +  static void btrfs_i_callback(struct rcu_head *head)  {  	struct inode *inode = container_of(head, struct inode, i_rcu); @@ -7856,8 +8190,7 @@ int btrfs_drop_inode(struct inode *inode)  		return 1;  	/* the snap/subvol tree is on deleting */ -	if (btrfs_root_refs(&root->root_item) == 0 && -	    root != root->fs_info->tree_root) +	if (btrfs_root_refs(&root->root_item) == 0)  		return 1;  	else  		return generic_drop_inode(inode); @@ -7986,7 +8319,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	/* check for collisions, even if the  name isn't there */ -	ret = btrfs_check_dir_item_collision(root, new_dir->i_ino, +	ret = btrfs_check_dir_item_collision(dest, new_dir->i_ino,  			     new_dentry->d_name.name,  			     new_dentry->d_name.len); @@ -7994,8 +8327,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  		if (ret == -EEXIST) {  			/* we shouldn't get  			 * eexist without a new_inode */ -			if (!new_inode) { -				WARN_ON(1); +			if (WARN_ON(!new_inode)) {  				return ret;  			}  		} else { @@ -8038,9 +8370,10 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	if (ret)  		goto out_fail; +	BTRFS_I(old_inode)->dir_index = 0ULL;  	if (unlikely(old_ino == BTRFS_FIRST_FREE_OBJECTID)) {  		/* force full log commit if subvolume involved. */ -		root->fs_info->last_trans_log_full_commit = trans->transid; +		btrfs_set_log_full_commit(root->fs_info, trans);  	} else {  		ret = btrfs_insert_inode_ref(trans, dest,  					     new_dentry->d_name.name, @@ -8126,6 +8459,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  		goto out_fail;  	} +	if (old_inode->i_nlink == 1) +		BTRFS_I(old_inode)->dir_index = index; +  	if (old_ino != BTRFS_FIRST_FREE_OBJECTID) {  		struct dentry *parent = new_dentry->d_parent;  		btrfs_log_new_name(trans, old_inode, old_dir, parent); @@ -8143,18 +8479,24 @@ out_notrans:  static void btrfs_run_delalloc_work(struct btrfs_work *work)  {  	struct btrfs_delalloc_work *delalloc_work; +	struct inode *inode;  	delalloc_work = container_of(work, struct btrfs_delalloc_work,  				     work); -	if (delalloc_work->wait) -		btrfs_wait_ordered_range(delalloc_work->inode, 0, (u64)-1); -	else -		filemap_flush(delalloc_work->inode->i_mapping); +	inode = delalloc_work->inode; +	if (delalloc_work->wait) { +		btrfs_wait_ordered_range(inode, 0, (u64)-1); +	} else { +		filemap_flush(inode->i_mapping); +		if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, +			     &BTRFS_I(inode)->runtime_flags)) +			filemap_flush(inode->i_mapping); +	}  	if (delalloc_work->delay_iput) -		btrfs_add_delayed_iput(delalloc_work->inode); +		btrfs_add_delayed_iput(inode);  	else -		iput(delalloc_work->inode); +		iput(inode);  	complete(&delalloc_work->completion);  } @@ -8172,7 +8514,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode,  	work->inode = inode;  	work->wait = wait;  	work->delay_iput = delay_iput; -	work->work.func = btrfs_run_delalloc_work; +	btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL);  	return work;  } @@ -8187,7 +8529,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)   * some fairly slow code that needs optimization. This walks the list   * of all the inodes with pending delalloc and forces them to disk.   */ -static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) +static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, +				   int nr)  {  	struct btrfs_inode *binode;  	struct inode *inode; @@ -8199,6 +8542,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)  	INIT_LIST_HEAD(&works);  	INIT_LIST_HEAD(&splice); +	mutex_lock(&root->delalloc_mutex);  	spin_lock(&root->delalloc_lock);  	list_splice_init(&root->delalloc_inodes, &splice);  	while (!list_empty(&splice)) { @@ -8224,19 +8568,16 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput)  			goto out;  		}  		list_add_tail(&work->list, &works); -		btrfs_queue_worker(&root->fs_info->flush_workers, -				   &work->work); - +		btrfs_queue_work(root->fs_info->flush_workers, +				 &work->work); +		ret++; +		if (nr != -1 && ret >= nr) +			goto out;  		cond_resched();  		spin_lock(&root->delalloc_lock);  	}  	spin_unlock(&root->delalloc_lock); -	list_for_each_entry_safe(work, next, &works, list) { -		list_del_init(&work->list); -		btrfs_wait_and_free_delalloc_work(work); -	} -	return 0;  out:  	list_for_each_entry_safe(work, next, &works, list) {  		list_del_init(&work->list); @@ -8248,6 +8589,7 @@ out:  		list_splice_tail(&splice, &root->delalloc_inodes);  		spin_unlock(&root->delalloc_lock);  	} +	mutex_unlock(&root->delalloc_mutex);  	return ret;  } @@ -8255,10 +8597,12 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)  {  	int ret; -	if (root->fs_info->sb->s_flags & MS_RDONLY) +	if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state))  		return -EROFS; -	ret = __start_delalloc_inodes(root, delay_iput); +	ret = __start_delalloc_inodes(root, delay_iput, -1); +	if (ret > 0) +		ret = 0;  	/*  	 * the filemap_flush will queue IO into the worker threads, but  	 * we have to make sure the IO is actually started and that @@ -8275,21 +8619,22 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)  	return ret;  } -int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info, -				    int delay_iput) +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, +			       int nr)  {  	struct btrfs_root *root;  	struct list_head splice;  	int ret; -	if (fs_info->sb->s_flags & MS_RDONLY) +	if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))  		return -EROFS;  	INIT_LIST_HEAD(&splice); +	mutex_lock(&fs_info->delalloc_root_mutex);  	spin_lock(&fs_info->delalloc_root_lock);  	list_splice_init(&fs_info->delalloc_roots, &splice); -	while (!list_empty(&splice)) { +	while (!list_empty(&splice) && nr) {  		root = list_first_entry(&splice, struct btrfs_root,  					delalloc_root);  		root = btrfs_grab_fs_root(root); @@ -8298,15 +8643,20 @@ int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,  			       &fs_info->delalloc_roots);  		spin_unlock(&fs_info->delalloc_root_lock); -		ret = __start_delalloc_inodes(root, delay_iput); +		ret = __start_delalloc_inodes(root, delay_iput, nr);  		btrfs_put_fs_root(root); -		if (ret) +		if (ret < 0)  			goto out; +		if (nr != -1) { +			nr -= ret; +			WARN_ON(nr < 0); +		}  		spin_lock(&fs_info->delalloc_root_lock);  	}  	spin_unlock(&fs_info->delalloc_root_lock); +	ret = 0;  	atomic_inc(&fs_info->async_submit_draining);  	while (atomic_read(&fs_info->nr_async_submits) ||  	      atomic_read(&fs_info->async_delalloc_pages)) { @@ -8315,13 +8665,13 @@ int btrfs_start_all_delalloc_inodes(struct btrfs_fs_info *fs_info,  		    atomic_read(&fs_info->async_delalloc_pages) == 0));  	}  	atomic_dec(&fs_info->async_submit_draining); -	return 0;  out:  	if (!list_empty_careful(&splice)) {  		spin_lock(&fs_info->delalloc_root_lock);  		list_splice_tail(&splice, &fs_info->delalloc_roots);  		spin_unlock(&fs_info->delalloc_root_lock);  	} +	mutex_unlock(&fs_info->delalloc_root_mutex);  	return ret;  } @@ -8336,14 +8686,14 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  	int err;  	int drop_inode = 0;  	u64 objectid; -	u64 index = 0 ; +	u64 index = 0;  	int name_len;  	int datasize;  	unsigned long ptr;  	struct btrfs_file_extent_item *ei;  	struct extent_buffer *leaf; -	name_len = strlen(symname) + 1; +	name_len = strlen(symname);  	if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))  		return -ENAMETOOLONG; @@ -8431,7 +8781,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,  	inode->i_mapping->a_ops = &btrfs_symlink_aops;  	inode->i_mapping->backing_dev_info = &root->fs_info->bdi;  	inode_set_bytes(inode, name_len); -	btrfs_i_size_write(inode, name_len - 1); +	btrfs_i_size_write(inode, name_len);  	err = btrfs_update_inode(trans, root, inode);  	if (err)  		drop_inode = 1; @@ -8477,7 +8827,7 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,  		cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);  		cur_bytes = max(cur_bytes, min_size);  		ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0, -					   *alloc_hint, &ins, 1); +					   *alloc_hint, &ins, 1, 0);  		if (ret) {  			if (own_trans)  				btrfs_end_transaction(trans, root); @@ -8490,6 +8840,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,  						  ins.offset, 0, 0, 0,  						  BTRFS_FILE_EXTENT_PREALLOC);  		if (ret) { +			btrfs_free_reserved_extent(root, ins.objectid, +						   ins.offset, 0);  			btrfs_abort_transaction(trans, root, ret);  			if (own_trans)  				btrfs_end_transaction(trans, root); @@ -8599,6 +8951,66 @@ static int btrfs_permission(struct inode *inode, int mask)  	return generic_permission(inode, mask);  } +static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode) +{ +	struct btrfs_trans_handle *trans; +	struct btrfs_root *root = BTRFS_I(dir)->root; +	struct inode *inode = NULL; +	u64 objectid; +	u64 index; +	int ret = 0; + +	/* +	 * 5 units required for adding orphan entry +	 */ +	trans = btrfs_start_transaction(root, 5); +	if (IS_ERR(trans)) +		return PTR_ERR(trans); + +	ret = btrfs_find_free_ino(root, &objectid); +	if (ret) +		goto out; + +	inode = btrfs_new_inode(trans, root, dir, NULL, 0, +				btrfs_ino(dir), objectid, mode, &index); +	if (IS_ERR(inode)) { +		ret = PTR_ERR(inode); +		inode = NULL; +		goto out; +	} + +	ret = btrfs_init_inode_security(trans, inode, dir, NULL); +	if (ret) +		goto out; + +	ret = btrfs_update_inode(trans, root, inode); +	if (ret) +		goto out; + +	inode->i_fop = &btrfs_file_operations; +	inode->i_op = &btrfs_file_inode_operations; + +	inode->i_mapping->a_ops = &btrfs_aops; +	inode->i_mapping->backing_dev_info = &root->fs_info->bdi; +	BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops; + +	ret = btrfs_orphan_add(trans, inode); +	if (ret) +		goto out; + +	d_tmpfile(dentry, inode); +	mark_inode_dirty(inode); + +out: +	btrfs_end_transaction(trans, root); +	if (ret) +		iput(inode); +	btrfs_balance_delayed_items(root); +	btrfs_btree_balance_dirty(root); + +	return ret; +} +  static const struct inode_operations btrfs_dir_inode_operations = {  	.getattr	= btrfs_getattr,  	.lookup		= btrfs_lookup, @@ -8617,12 +9029,15 @@ static const struct inode_operations btrfs_dir_inode_operations = {  	.removexattr	= btrfs_removexattr,  	.permission	= btrfs_permission,  	.get_acl	= btrfs_get_acl, +	.set_acl	= btrfs_set_acl,  	.update_time	= btrfs_update_time, +	.tmpfile        = btrfs_tmpfile,  };  static const struct inode_operations btrfs_dir_ro_inode_operations = {  	.lookup		= btrfs_lookup,  	.permission	= btrfs_permission,  	.get_acl	= btrfs_get_acl, +	.set_acl	= btrfs_set_acl,  	.update_time	= btrfs_update_time,  }; @@ -8692,6 +9107,7 @@ static const struct inode_operations btrfs_file_inode_operations = {  	.permission	= btrfs_permission,  	.fiemap		= btrfs_fiemap,  	.get_acl	= btrfs_get_acl, +	.set_acl	= btrfs_set_acl,  	.update_time	= btrfs_update_time,  };  static const struct inode_operations btrfs_special_inode_operations = { @@ -8703,6 +9119,7 @@ static const struct inode_operations btrfs_special_inode_operations = {  	.listxattr	= btrfs_listxattr,  	.removexattr	= btrfs_removexattr,  	.get_acl	= btrfs_get_acl, +	.set_acl	= btrfs_set_acl,  	.update_time	= btrfs_update_time,  };  static const struct inode_operations btrfs_symlink_inode_operations = { @@ -8716,7 +9133,6 @@ static const struct inode_operations btrfs_symlink_inode_operations = {  	.getxattr	= btrfs_getxattr,  	.listxattr	= btrfs_listxattr,  	.removexattr	= btrfs_removexattr, -	.get_acl	= btrfs_get_acl,  	.update_time	= btrfs_update_time,  };  | 
