diff options
Diffstat (limited to 'fs/btrfs/ioctl.c')
| -rw-r--r-- | fs/btrfs/ioctl.c | 1309 | 
1 files changed, 1042 insertions, 267 deletions
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 1a5b9462dd9..47aceb494d1 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -44,7 +44,6 @@  #include <linux/uuid.h>  #include <linux/btrfs.h>  #include <linux/uaccess.h> -#include "compat.h"  #include "ctree.h"  #include "disk-io.h"  #include "transaction.h" @@ -57,6 +56,35 @@  #include "rcu-string.h"  #include "send.h"  #include "dev-replace.h" +#include "props.h" +#include "sysfs.h" +#include "qgroup.h" + +#ifdef CONFIG_64BIT +/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI + * structures are incorrect, as the timespec structure from userspace + * is 4 bytes too small. We define these alternatives here to teach + * the kernel about the 32-bit struct packing. + */ +struct btrfs_ioctl_timespec_32 { +	__u64 sec; +	__u32 nsec; +} __attribute__ ((__packed__)); + +struct btrfs_ioctl_received_subvol_args_32 { +	char	uuid[BTRFS_UUID_SIZE];	/* in */ +	__u64	stransid;		/* in */ +	__u64	rtransid;		/* out */ +	struct btrfs_ioctl_timespec_32 stime; /* in */ +	struct btrfs_ioctl_timespec_32 rtime; /* out */ +	__u64	flags;			/* in */ +	__u64	reserved[16];		/* in */ +} __attribute__ ((__packed__)); + +#define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ +				struct btrfs_ioctl_received_subvol_args_32) +#endif +  static int btrfs_clone(struct inode *src, struct inode *inode,  		       u64 off, u64 olen, u64 olen_aligned, u64 destoff); @@ -108,19 +136,22 @@ static unsigned int btrfs_flags_to_ioctl(unsigned int flags)  void btrfs_update_iflags(struct inode *inode)  {  	struct btrfs_inode *ip = BTRFS_I(inode); - -	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); +	unsigned int new_fl = 0;  	if (ip->flags & BTRFS_INODE_SYNC) -		inode->i_flags |= S_SYNC; +		new_fl |= S_SYNC;  	if (ip->flags & BTRFS_INODE_IMMUTABLE) -		inode->i_flags |= S_IMMUTABLE; +		new_fl |= S_IMMUTABLE;  	if (ip->flags & BTRFS_INODE_APPEND) -		inode->i_flags |= S_APPEND; +		new_fl |= S_APPEND;  	if (ip->flags & BTRFS_INODE_NOATIME) -		inode->i_flags |= S_NOATIME; +		new_fl |= S_NOATIME;  	if (ip->flags & BTRFS_INODE_DIRSYNC) -		inode->i_flags |= S_DIRSYNC; +		new_fl |= S_DIRSYNC; + +	set_mask_bits(&inode->i_flags, +		      S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC, +		      new_fl);  }  /* @@ -191,6 +222,9 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	unsigned int i_oldflags;  	umode_t mode; +	if (!inode_owner_or_capable(inode)) +		return -EPERM; +  	if (btrfs_root_readonly(root))  		return -EROFS; @@ -201,9 +235,6 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	if (ret)  		return ret; -	if (!inode_owner_or_capable(inode)) -		return -EACCES; -  	ret = mnt_want_write_file(file);  	if (ret)  		return ret; @@ -281,9 +312,25 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)  	if (flags & FS_NOCOMP_FL) {  		ip->flags &= ~BTRFS_INODE_COMPRESS;  		ip->flags |= BTRFS_INODE_NOCOMPRESS; + +		ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); +		if (ret && ret != -ENODATA) +			goto out_drop;  	} else if (flags & FS_COMPR_FL) { +		const char *comp; +  		ip->flags |= BTRFS_INODE_COMPRESS;  		ip->flags &= ~BTRFS_INODE_NOCOMPRESS; + +		if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO) +			comp = "lzo"; +		else +			comp = "zlib"; +		ret = btrfs_set_prop(inode, "btrfs.compression", +				     comp, strlen(comp), 0); +		if (ret) +			goto out_drop; +  	} else {  		ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS);  	} @@ -321,7 +368,7 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)  static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)  { -	struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); +	struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb);  	struct btrfs_device *device;  	struct request_queue *q;  	struct fstrim_range range; @@ -369,9 +416,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)  int btrfs_is_empty_uuid(u8 *uuid)  { -	static char empty_uuid[BTRFS_UUID_SIZE] = {0}; +	int i; -	return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); +	for (i = 0; i < BTRFS_UUID_SIZE; i++) { +		if (uuid[i]) +			return 0; +	} +	return 1;  }  static noinline int create_subvol(struct inode *dir, @@ -389,6 +440,7 @@ static noinline int create_subvol(struct inode *dir,  	struct btrfs_root *new_root;  	struct btrfs_block_rsv block_rsv;  	struct timespec cur_time = CURRENT_TIME; +	struct inode *inode;  	int ret;  	int err;  	u64 objectid; @@ -414,7 +466,9 @@ static noinline int create_subvol(struct inode *dir,  	trans = btrfs_start_transaction(root, 0);  	if (IS_ERR(trans)) {  		ret = PTR_ERR(trans); -		goto out; +		btrfs_subvolume_release_metadata(root, &block_rsv, +						 qgroup_reserved); +		return ret;  	}  	trans->block_rsv = &block_rsv;  	trans->bytes_reserved = block_rsv.size; @@ -436,7 +490,7 @@ static noinline int create_subvol(struct inode *dir,  	btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);  	btrfs_set_header_owner(leaf, objectid); -	write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf), +	write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(),  			    BTRFS_FSID_SIZE);  	write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,  			    btrfs_header_chunk_tree_uuid(leaf), @@ -497,7 +551,7 @@ static noinline int create_subvol(struct inode *dir,  	btrfs_record_root_in_trans(trans, new_root); -	ret = btrfs_create_subvol_root(trans, new_root, new_dirid); +	ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid);  	if (ret) {  		/* We potentially lose an unused inode item here */  		btrfs_abort_transaction(trans, root, ret); @@ -539,6 +593,8 @@ static noinline int create_subvol(struct inode *dir,  fail:  	trans->block_rsv = NULL;  	trans->bytes_reserved = 0; +	btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); +  	if (async_transid) {  		*async_transid = trans->transid;  		err = btrfs_commit_transaction_async(trans, root, 1); @@ -550,13 +606,32 @@ fail:  	if (err && !ret)  		ret = err; -	if (!ret) -		d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); -out: -	btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); +	if (!ret) { +		inode = btrfs_lookup_dentry(dir, dentry); +		if (IS_ERR(inode)) +			return PTR_ERR(inode); +		d_instantiate(dentry, inode); +	}  	return ret;  } +static void btrfs_wait_nocow_write(struct btrfs_root *root) +{ +	s64 writers; +	DEFINE_WAIT(wait); + +	do { +		prepare_to_wait(&root->subv_writers->wait, &wait, +				TASK_UNINTERRUPTIBLE); + +		writers = percpu_counter_sum(&root->subv_writers->counter); +		if (writers) +			schedule(); + +		finish_wait(&root->subv_writers->wait, &wait); +	} while (writers); +} +  static int create_snapshot(struct btrfs_root *root, struct inode *dir,  			   struct dentry *dentry, char *name, int namelen,  			   u64 *async_transid, bool readonly, @@ -567,18 +642,24 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,  	struct btrfs_trans_handle *trans;  	int ret; -	if (!root->ref_cows) +	if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state))  		return -EINVAL; +	atomic_inc(&root->will_be_snapshoted); +	smp_mb__after_atomic(); +	btrfs_wait_nocow_write(root); +  	ret = btrfs_start_delalloc_inodes(root, 0);  	if (ret) -		return ret; +		goto out; -	btrfs_wait_ordered_extents(root, 0); +	btrfs_wait_ordered_extents(root, -1);  	pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); -	if (!pending_snapshot) -		return -ENOMEM; +	if (!pending_snapshot) { +		ret = -ENOMEM; +		goto out; +	}  	btrfs_init_block_rsv(&pending_snapshot->block_rsv,  			     BTRFS_BLOCK_RSV_TEMP); @@ -595,7 +676,7 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,  					&pending_snapshot->qgroup_reserved,  					false);  	if (ret) -		goto out; +		goto free;  	pending_snapshot->dentry = dentry;  	pending_snapshot->root = root; @@ -634,20 +715,51 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,  	if (ret)  		goto fail; +	/* +	 * If orphan cleanup did remove any orphans, it means the tree was +	 * modified and therefore the commit root is not the same as the +	 * current root anymore. This is a problem, because send uses the +	 * commit root and therefore can see inode items that don't exist +	 * in the current root anymore, and for example make calls to +	 * btrfs_iget, which will do tree lookups based on the current root +	 * and not on the commit root. Those lookups will fail, returning a +	 * -ESTALE error, and making send fail with that error. So make sure +	 * a send does not see any orphans we have just removed, and that it +	 * will see the same inodes regardless of whether a transaction +	 * commit happened before it started (meaning that the commit root +	 * will be the same as the current root) or not. +	 */ +	if (readonly && pending_snapshot->snap->node != +	    pending_snapshot->snap->commit_root) { +		trans = btrfs_join_transaction(pending_snapshot->snap); +		if (IS_ERR(trans) && PTR_ERR(trans) != -ENOENT) { +			ret = PTR_ERR(trans); +			goto fail; +		} +		if (!IS_ERR(trans)) { +			ret = btrfs_commit_transaction(trans, +						       pending_snapshot->snap); +			if (ret) +				goto fail; +		} +	} +  	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);  	if (IS_ERR(inode)) {  		ret = PTR_ERR(inode);  		goto fail;  	} -	BUG_ON(!inode); +  	d_instantiate(dentry, inode);  	ret = 0;  fail:  	btrfs_subvolume_release_metadata(BTRFS_I(dir)->root,  					 &pending_snapshot->block_rsv,  					 pending_snapshot->qgroup_reserved); -out: +free:  	kfree(pending_snapshot); +out: +	atomic_dec(&root->will_be_snapshoted);  	return ret;  } @@ -688,7 +800,7 @@ static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)   *     nfs_async_unlink().   */ -static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) +static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)  {  	int error; @@ -842,7 +954,6 @@ static int find_new_extents(struct btrfs_root *root,  {  	struct btrfs_path *path;  	struct btrfs_key min_key; -	struct btrfs_key max_key;  	struct extent_buffer *leaf;  	struct btrfs_file_extent_item *extent;  	int type; @@ -857,17 +968,14 @@ static int find_new_extents(struct btrfs_root *root,  	min_key.type = BTRFS_EXTENT_DATA_KEY;  	min_key.offset = *off; -	max_key.objectid = ino; -	max_key.type = (u8)-1; -	max_key.offset = (u64)-1; - -	path->keep_locks = 1; - -	while(1) { -		ret = btrfs_search_forward(root, &min_key, &max_key, -					   path, newer_than); +	while (1) { +		path->keep_locks = 1; +		ret = btrfs_search_forward(root, &min_key, path, newer_than);  		if (ret != 0)  			goto none; +		path->keep_locks = 0; +		btrfs_unlock_up_safe(path, 1); +process_slot:  		if (min_key.objectid != ino)  			goto none;  		if (min_key.type != BTRFS_EXTENT_DATA_KEY) @@ -886,6 +994,12 @@ static int find_new_extents(struct btrfs_root *root,  			return 0;  		} +		path->slots[0]++; +		if (path->slots[0] < btrfs_header_nritems(leaf)) { +			btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); +			goto process_slot; +		} +  		if (min_key.offset == (u64)-1)  			goto none; @@ -913,10 +1027,13 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)  	read_unlock(&em_tree->lock);  	if (!em) { +		struct extent_state *cached = NULL; +		u64 end = start + len - 1; +  		/* get the big lock and read metadata off disk */ -		lock_extent(io_tree, start, start + len - 1); +		lock_extent_bits(io_tree, start, end, 0, &cached);  		em = btrfs_get_extent(inode, NULL, 0, start, len, 0); -		unlock_extent(io_tree, start, start + len - 1); +		unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS);  		if (IS_ERR(em))  			return NULL; @@ -935,7 +1052,8 @@ static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em)  		return false;  	next = defrag_lookup_extent(inode, em->start + em->len); -	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) +	if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || +	    (em->block_start + em->block_len == next->block_start))  		ret = false;  	free_extent_map(next); @@ -1014,7 +1132,7 @@ out:  static int cluster_pages_for_defrag(struct inode *inode,  				    struct page **pages,  				    unsigned long start_index, -				    int num_pages) +				    unsigned long num_pages)  {  	unsigned long file_end;  	u64 isize = i_size_read(inode); @@ -1054,10 +1172,12 @@ again:  		page_start = page_offset(page);  		page_end = page_start + PAGE_CACHE_SIZE - 1;  		while (1) { -			lock_extent(tree, page_start, page_end); +			lock_extent_bits(tree, page_start, page_end, +					 0, &cached_state);  			ordered = btrfs_lookup_ordered_extent(inode,  							      page_start); -			unlock_extent(tree, page_start, page_end); +			unlock_extent_cached(tree, page_start, page_end, +					     &cached_state, GFP_NOFS);  			if (!ordered)  				break; @@ -1172,8 +1292,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,  	int defrag_count = 0;  	int compress_type = BTRFS_COMPRESS_ZLIB;  	int extent_thresh = range->extent_thresh; -	int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; -	int cluster = max_cluster; +	unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; +	unsigned long cluster = max_cluster;  	u64 new_align = ~((u64)128 * 1024 - 1);  	struct page **pages = NULL; @@ -1206,7 +1326,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,  		ra = &file->f_ra;  	} -	pages = kmalloc(sizeof(struct page *) * max_cluster, +	pages = kmalloc_array(max_cluster, sizeof(struct page *),  			GFP_NOFS);  	if (!pages) {  		ret = -ENOMEM; @@ -1257,7 +1377,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,  			break;  		if (btrfs_defrag_cancelled(root->fs_info)) { -			printk(KERN_DEBUG "btrfs: defrag_file cancelled\n"); +			printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n");  			ret = -EAGAIN;  			break;  		} @@ -1334,8 +1454,12 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,  		}  	} -	if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) +	if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) {  		filemap_flush(inode->i_mapping); +		if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, +			     &BTRFS_I(inode)->runtime_flags)) +			filemap_flush(inode->i_mapping); +	}  	if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) {  		/* the filemap_flush will queue IO into the worker threads, but @@ -1381,6 +1505,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,  	struct btrfs_trans_handle *trans;  	struct btrfs_device *device = NULL;  	char *sizestr; +	char *retptr;  	char *devstr = NULL;  	int ret = 0;  	int mod = 0; @@ -1410,29 +1535,30 @@ static noinline int btrfs_ioctl_resize(struct file *file,  	sizestr = vol_args->name;  	devstr = strchr(sizestr, ':');  	if (devstr) { -		char *end;  		sizestr = devstr + 1;  		*devstr = '\0';  		devstr = vol_args->name; -		devid = simple_strtoull(devstr, &end, 10); +		ret = kstrtoull(devstr, 10, &devid); +		if (ret) +			goto out_free;  		if (!devid) {  			ret = -EINVAL;  			goto out_free;  		} -		printk(KERN_INFO "btrfs: resizing devid %llu\n", devid); +		btrfs_info(root->fs_info, "resizing devid %llu", devid);  	}  	device = btrfs_find_device(root->fs_info, devid, NULL, NULL);  	if (!device) { -		printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", +		btrfs_info(root->fs_info, "resizer unable to find device %llu",  		       devid);  		ret = -ENODEV;  		goto out_free;  	}  	if (!device->writeable) { -		printk(KERN_INFO "btrfs: resizer unable to apply on " -		       "readonly device %llu\n", +		btrfs_info(root->fs_info, +			   "resizer unable to apply on readonly device %llu",  		       devid);  		ret = -EPERM;  		goto out_free; @@ -1448,8 +1574,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,  			mod = 1;  			sizestr++;  		} -		new_size = memparse(sizestr, NULL); -		if (new_size == 0) { +		new_size = memparse(sizestr, &retptr); +		if (*retptr != '\0' || new_size == 0) {  			ret = -EINVAL;  			goto out_free;  		} @@ -1469,6 +1595,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,  		}  		new_size = old_size - new_size;  	} else if (mod > 0) { +		if (new_size > ULLONG_MAX - old_size) { +			ret = -ERANGE; +			goto out_free; +		}  		new_size = old_size + new_size;  	} @@ -1484,7 +1614,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,  	do_div(new_size, root->sectorsize);  	new_size *= root->sectorsize; -	printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", +	printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n",  		      rcu_str_deref(device->name), new_size);  	if (new_size > old_size) { @@ -1545,9 +1675,15 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,  		src_inode = file_inode(src.file);  		if (src_inode->i_sb != file_inode(file)->i_sb) { -			printk(KERN_INFO "btrfs: Snapshot src from " -			       "another FS\n"); -			ret = -EINVAL; +			btrfs_info(BTRFS_I(src_inode)->root->fs_info, +				   "Snapshot src from another FS"); +			ret = -EXDEV; +		} else if (!inode_owner_or_capable(src_inode)) { +			/* +			 * Subvolume creation is not restricted, but snapshots +			 * are limited to own subvolumes only +			 */ +			ret = -EPERM;  		} else {  			ret = btrfs_mksubvol(&file->f_path, name, namelen,  					     BTRFS_I(src_inode)->root, @@ -1665,6 +1801,9 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,  	u64 flags;  	int ret = 0; +	if (!inode_owner_or_capable(inode)) +		return -EPERM; +  	ret = mnt_want_write_file(file);  	if (ret)  		goto out; @@ -1689,11 +1828,6 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,  		goto out_drop_write;  	} -	if (!inode_owner_or_capable(inode)) { -		ret = -EACCES; -		goto out_drop_write; -	} -  	down_write(&root->fs_info->subvol_sem);  	/* nothing to do */ @@ -1701,12 +1835,28 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,  		goto out_drop_sem;  	root_flags = btrfs_root_flags(&root->root_item); -	if (flags & BTRFS_SUBVOL_RDONLY) +	if (flags & BTRFS_SUBVOL_RDONLY) {  		btrfs_set_root_flags(&root->root_item,  				     root_flags | BTRFS_ROOT_SUBVOL_RDONLY); -	else -		btrfs_set_root_flags(&root->root_item, +	} else { +		/* +		 * Block RO -> RW transition if this subvolume is involved in +		 * send +		 */ +		spin_lock(&root->root_item_lock); +		if (root->send_in_progress == 0) { +			btrfs_set_root_flags(&root->root_item,  				     root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); +			spin_unlock(&root->root_item_lock); +		} else { +			spin_unlock(&root->root_item_lock); +			btrfs_warn(root->fs_info, +			"Attempt to set subvolume %llu read-write during send", +					root->root_key.objectid); +			ret = -EPERM; +			goto out_drop_sem; +		} +	}  	trans = btrfs_start_transaction(root, 1);  	if (IS_ERR(trans)) { @@ -1751,7 +1901,9 @@ static noinline int may_destroy_subvol(struct btrfs_root *root)  	if (di && !IS_ERR(di)) {  		btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);  		if (key.objectid == root->root_key.objectid) { -			ret = -ENOTEMPTY; +			ret = -EPERM; +			btrfs_err(root->fs_info, "deleting default subvolume " +				  "%llu is not allowed", key.objectid);  			goto out;  		}  		btrfs_release_path(path); @@ -1808,7 +1960,8 @@ static noinline int copy_to_sk(struct btrfs_root *root,  			       struct btrfs_path *path,  			       struct btrfs_key *key,  			       struct btrfs_ioctl_search_key *sk, -			       char *buf, +			       size_t *buf_size, +			       char __user *ubuf,  			       unsigned long *sk_offset,  			       int *num_found)  { @@ -1840,13 +1993,25 @@ static noinline int copy_to_sk(struct btrfs_root *root,  		if (!key_in_sk(key, sk))  			continue; -		if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) +		if (sizeof(sh) + item_len > *buf_size) { +			if (*num_found) { +				ret = 1; +				goto out; +			} + +			/* +			 * return one empty item back for v1, which does not +			 * handle -EOVERFLOW +			 */ + +			*buf_size = sizeof(sh) + item_len;  			item_len = 0; +			ret = -EOVERFLOW; +		} -		if (sizeof(sh) + item_len + *sk_offset > -		    BTRFS_SEARCH_ARGS_BUFSIZE) { +		if (sizeof(sh) + item_len + *sk_offset > *buf_size) {  			ret = 1; -			goto overflow; +			goto out;  		}  		sh.objectid = key->objectid; @@ -1856,20 +2021,33 @@ static noinline int copy_to_sk(struct btrfs_root *root,  		sh.transid = found_transid;  		/* copy search result header */ -		memcpy(buf + *sk_offset, &sh, sizeof(sh)); +		if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { +			ret = -EFAULT; +			goto out; +		} +  		*sk_offset += sizeof(sh);  		if (item_len) { -			char *p = buf + *sk_offset; +			char __user *up = ubuf + *sk_offset;  			/* copy the item */ -			read_extent_buffer(leaf, p, -					   item_off, item_len); +			if (read_extent_buffer_to_user(leaf, up, +						       item_off, item_len)) { +				ret = -EFAULT; +				goto out; +			} +  			*sk_offset += item_len;  		}  		(*num_found)++; -		if (*num_found >= sk->nr_items) -			break; +		if (ret) /* -EOVERFLOW from above */ +			goto out; + +		if (*num_found >= sk->nr_items) { +			ret = 1; +			goto out; +		}  	}  advance_key:  	ret = 0; @@ -1884,23 +2062,37 @@ advance_key:  		key->objectid++;  	} else  		ret = 1; -overflow: +out: +	/* +	 *  0: all items from this leaf copied, continue with next +	 *  1: * more items can be copied, but unused buffer is too small +	 *     * all items were found +	 *     Either way, it will stops the loop which iterates to the next +	 *     leaf +	 *  -EOVERFLOW: item was to large for buffer +	 *  -EFAULT: could not copy extent buffer back to userspace +	 */  	return ret;  }  static noinline int search_ioctl(struct inode *inode, -				 struct btrfs_ioctl_search_args *args) +				 struct btrfs_ioctl_search_key *sk, +				 size_t *buf_size, +				 char __user *ubuf)  {  	struct btrfs_root *root;  	struct btrfs_key key; -	struct btrfs_key max_key;  	struct btrfs_path *path; -	struct btrfs_ioctl_search_key *sk = &args->key;  	struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info;  	int ret;  	int num_found = 0;  	unsigned long sk_offset = 0; +	if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { +		*buf_size = sizeof(struct btrfs_ioctl_search_header); +		return -EOVERFLOW; +	} +  	path = btrfs_alloc_path();  	if (!path)  		return -ENOMEM; @@ -1914,7 +2106,7 @@ static noinline int search_ioctl(struct inode *inode,  		key.offset = (u64)-1;  		root = btrfs_read_fs_root_no_name(info, &key);  		if (IS_ERR(root)) { -			printk(KERN_ERR "could not find root %llu\n", +			printk(KERN_ERR "BTRFS: could not find root %llu\n",  			       sk->tree_id);  			btrfs_free_path(path);  			return -ENOENT; @@ -1925,28 +2117,24 @@ static noinline int search_ioctl(struct inode *inode,  	key.type = sk->min_type;  	key.offset = sk->min_offset; -	max_key.objectid = sk->max_objectid; -	max_key.type = sk->max_type; -	max_key.offset = sk->max_offset; -  	path->keep_locks = 1; -	while(1) { -		ret = btrfs_search_forward(root, &key, &max_key, path, -					   sk->min_transid); +	while (1) { +		ret = btrfs_search_forward(root, &key, path, sk->min_transid);  		if (ret != 0) {  			if (ret > 0)  				ret = 0;  			goto err;  		} -		ret = copy_to_sk(root, path, &key, sk, args->buf, +		ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf,  				 &sk_offset, &num_found);  		btrfs_release_path(path); -		if (ret || num_found >= sk->nr_items) +		if (ret)  			break;  	} -	ret = 0; +	if (ret > 0) +		ret = 0;  err:  	sk->nr_items = num_found;  	btrfs_free_path(path); @@ -1956,22 +2144,73 @@ err:  static noinline int btrfs_ioctl_tree_search(struct file *file,  					   void __user *argp)  { -	 struct btrfs_ioctl_search_args *args; -	 struct inode *inode; -	 int ret; +	struct btrfs_ioctl_search_args __user *uargs; +	struct btrfs_ioctl_search_key sk; +	struct inode *inode; +	int ret; +	size_t buf_size;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; -	args = memdup_user(argp, sizeof(*args)); -	if (IS_ERR(args)) -		return PTR_ERR(args); +	uargs = (struct btrfs_ioctl_search_args __user *)argp; + +	if (copy_from_user(&sk, &uargs->key, sizeof(sk))) +		return -EFAULT; + +	buf_size = sizeof(uargs->buf);  	inode = file_inode(file); -	ret = search_ioctl(inode, args); -	if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) +	ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); + +	/* +	 * In the origin implementation an overflow is handled by returning a +	 * search header with a len of zero, so reset ret. +	 */ +	if (ret == -EOVERFLOW) +		ret = 0; + +	if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk)))  		ret = -EFAULT; -	kfree(args); +	return ret; +} + +static noinline int btrfs_ioctl_tree_search_v2(struct file *file, +					       void __user *argp) +{ +	struct btrfs_ioctl_search_args_v2 __user *uarg; +	struct btrfs_ioctl_search_args_v2 args; +	struct inode *inode; +	int ret; +	size_t buf_size; +	const size_t buf_limit = 16 * 1024 * 1024; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	/* copy search header and buffer size */ +	uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; +	if (copy_from_user(&args, uarg, sizeof(args))) +		return -EFAULT; + +	buf_size = args.buf_size; + +	if (buf_size < sizeof(struct btrfs_ioctl_search_header)) +		return -EOVERFLOW; + +	/* limit result size to 16MB */ +	if (buf_size > buf_limit) +		buf_size = buf_limit; + +	inode = file_inode(file); +	ret = search_ioctl(inode, &args.key, &buf_size, +			   (char *)(&uarg->buf[0])); +	if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) +		ret = -EFAULT; +	else if (ret == -EOVERFLOW && +		copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) +		ret = -EFAULT; +  	return ret;  } @@ -2009,7 +2248,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,  	key.offset = (u64)-1;  	root = btrfs_read_fs_root_no_name(info, &key);  	if (IS_ERR(root)) { -		printk(KERN_ERR "could not find root %llu\n", tree_id); +		printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id);  		ret = -ENOENT;  		goto out;  	} @@ -2018,7 +2257,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,  	key.type = BTRFS_INODE_REF_KEY;  	key.offset = (u64)-1; -	while(1) { +	while (1) {  		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);  		if (ret < 0)  			goto out; @@ -2047,7 +2286,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,  		}  		*(ptr + len) = '/'; -		read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); +		read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len);  		if (key.offset == BTRFS_FIRST_FREE_OBJECTID)  			break; @@ -2058,7 +2297,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,  		dirid = key.objectid;  	}  	memmove(name, ptr, total_len); -	name[total_len]='\0'; +	name[total_len] = '\0';  	ret = 0;  out:  	btrfs_free_path(path); @@ -2098,7 +2337,7 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,  static noinline int btrfs_ioctl_snap_destroy(struct file *file,  					     void __user *arg)  { -	struct dentry *parent = fdentry(file); +	struct dentry *parent = file->f_path.dentry;  	struct dentry *dentry;  	struct inode *dir = parent->d_inode;  	struct inode *inode; @@ -2107,6 +2346,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	struct btrfs_ioctl_vol_args *vol_args;  	struct btrfs_trans_handle *trans;  	struct btrfs_block_rsv block_rsv; +	u64 root_flags;  	u64 qgroup_reserved;  	int namelen;  	int ret; @@ -2128,9 +2368,10 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	if (err)  		goto out; +  	err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT);  	if (err == -EINTR) -		goto out; +		goto out_drop_write;  	dentry = lookup_one_len(vol_args->name, parent, namelen);  	if (IS_ERR(dentry)) {  		err = PTR_ERR(dentry); @@ -2144,7 +2385,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	inode = dentry->d_inode;  	dest = BTRFS_I(inode)->root; -	if (!capable(CAP_SYS_ADMIN)){ +	if (!capable(CAP_SYS_ADMIN)) {  		/*  		 * Regular user.  Only allow this with a special mount  		 * option, when the user has write+exec access to the @@ -2189,6 +2430,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	}  	mutex_lock(&inode->i_mutex); + +	/* +	 * Don't allow to delete a subvolume with send in progress. This is +	 * inside the i_mutex so the error handling that has to drop the bit +	 * again is not run concurrently. +	 */ +	spin_lock(&dest->root_item_lock); +	root_flags = btrfs_root_flags(&dest->root_item); +	if (dest->send_in_progress == 0) { +		btrfs_set_root_flags(&dest->root_item, +				root_flags | BTRFS_ROOT_SUBVOL_DEAD); +		spin_unlock(&dest->root_item_lock); +	} else { +		spin_unlock(&dest->root_item_lock); +		btrfs_warn(root->fs_info, +			"Attempt to delete subvolume %llu during send", +			dest->root_key.objectid); +		err = -EPERM; +		goto out_dput; +	} +  	err = d_invalidate(dentry);  	if (err)  		goto out_unlock; @@ -2234,7 +2496,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,  	dest->root_item.drop_level = 0;  	btrfs_set_root_refs(&dest->root_item, 0); -	if (!xchg(&dest->orphan_item_inserted, 1)) { +	if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) {  		ret = btrfs_insert_orphan_item(trans,  					root->fs_info->tree_root,  					dest->root_key.objectid); @@ -2277,11 +2539,19 @@ out_release:  out_up_write:  	up_write(&root->fs_info->subvol_sem);  out_unlock: +	if (err) { +		spin_lock(&dest->root_item_lock); +		root_flags = btrfs_root_flags(&dest->root_item); +		btrfs_set_root_flags(&dest->root_item, +				root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); +		spin_unlock(&dest->root_item_lock); +	}  	mutex_unlock(&inode->i_mutex);  	if (!err) {  		shrink_dcache_sb(root->fs_info->sb);  		btrfs_invalidate_inodes(dest);  		d_delete(dentry); +		ASSERT(dest->send_in_progress == 0);  		/* the last ref */  		if (dest->cache_inode) { @@ -2293,6 +2563,7 @@ out_dput:  	dput(dentry);  out_unlock_dir:  	mutex_unlock(&dir->i_mutex); +out_drop_write:  	mnt_drop_write_file(file);  out:  	kfree(vol_args); @@ -2444,9 +2715,6 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)  	struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;  	int ret = 0; -	if (!capable(CAP_SYS_ADMIN)) -		return -EPERM; -  	fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL);  	if (!fi_args)  		return -ENOMEM; @@ -2461,6 +2729,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)  	}  	mutex_unlock(&fs_devices->device_list_mutex); +	fi_args->nodesize = root->fs_info->super_copy->nodesize; +	fi_args->sectorsize = root->fs_info->super_copy->sectorsize; +	fi_args->clone_alignment = root->fs_info->super_copy->sectorsize; +  	if (copy_to_user(arg, fi_args, sizeof(*fi_args)))  		ret = -EFAULT; @@ -2476,9 +2748,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)  	int ret = 0;  	char *s_uuid = NULL; -	if (!capable(CAP_SYS_ADMIN)) -		return -EPERM; -  	di_args = memdup_user(arg, sizeof(*di_args));  	if (IS_ERR(di_args))  		return PTR_ERR(di_args); @@ -2556,10 +2825,15 @@ static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)  		lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);  		ordered = btrfs_lookup_first_ordered_extent(inode,  							    off + len - 1); -		if (!ordered && +		if ((!ordered || +		     ordered->file_offset + ordered->len <= off || +		     ordered->file_offset >= off + len) &&  		    !test_range_bit(&BTRFS_I(inode)->io_tree, off, -				    off + len - 1, EXTENT_DELALLOC, 0, NULL)) +				    off + len - 1, EXTENT_DELALLOC, 0, NULL)) { +			if (ordered) +				btrfs_put_ordered_extent(ordered);  			break; +		}  		unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);  		if (ordered)  			btrfs_put_ordered_extent(ordered); @@ -2694,20 +2968,19 @@ out_unlock:  #define BTRFS_MAX_DEDUPE_LEN	(16 * 1024 * 1024)  static long btrfs_ioctl_file_extent_same(struct file *file, -					 void __user *argp) +			struct btrfs_ioctl_same_args __user *argp)  { -	struct btrfs_ioctl_same_args *args = argp; -	struct btrfs_ioctl_same_args same; -	struct btrfs_ioctl_same_extent_info info; -	struct inode *src = file->f_dentry->d_inode; -	struct file *dst_file = NULL; -	struct inode *dst; +	struct btrfs_ioctl_same_args *same; +	struct btrfs_ioctl_same_extent_info *info; +	struct inode *src = file_inode(file);  	u64 off;  	u64 len;  	int i;  	int ret; +	unsigned long size;  	u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;  	bool is_admin = capable(CAP_SYS_ADMIN); +	u16 count;  	if (!(file->f_mode & FMODE_READ))  		return -EINVAL; @@ -2716,15 +2989,22 @@ static long btrfs_ioctl_file_extent_same(struct file *file,  	if (ret)  		return ret; -	if (copy_from_user(&same, -			   (struct btrfs_ioctl_same_args __user *)argp, -			   sizeof(same))) { +	if (get_user(count, &argp->dest_count)) {  		ret = -EFAULT;  		goto out;  	} -	off = same.logical_offset; -	len = same.length; +	size = offsetof(struct btrfs_ioctl_same_args __user, info[count]); + +	same = memdup_user(argp, size); + +	if (IS_ERR(same)) { +		ret = PTR_ERR(same); +		goto out; +	} + +	off = same->logical_offset; +	len = same->length;  	/*  	 * Limit the total length we will dedupe for each operation. @@ -2752,66 +3032,170 @@ static long btrfs_ioctl_file_extent_same(struct file *file,  	if (!S_ISREG(src->i_mode))  		goto out; -	ret = 0; -	for (i = 0; i < same.dest_count; i++) { -		if (copy_from_user(&info, &args->info[i], sizeof(info))) { -			ret = -EFAULT; -			goto out; -		} - -		info.bytes_deduped = 0; +	/* pre-format output fields to sane values */ +	for (i = 0; i < count; i++) { +		same->info[i].bytes_deduped = 0ULL; +		same->info[i].status = 0; +	} -		dst_file = fget(info.fd); -		if (!dst_file) { -			info.status = -EBADF; -			goto next; +	for (i = 0, info = same->info; i < count; i++, info++) { +		struct inode *dst; +		struct fd dst_file = fdget(info->fd); +		if (!dst_file.file) { +			info->status = -EBADF; +			continue;  		} - -		if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { -			info.status = -EINVAL; -			goto next; +		dst = file_inode(dst_file.file); + +		if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) { +			info->status = -EINVAL; +		} else if (file->f_path.mnt != dst_file.file->f_path.mnt) { +			info->status = -EXDEV; +		} else if (S_ISDIR(dst->i_mode)) { +			info->status = -EISDIR; +		} else if (!S_ISREG(dst->i_mode)) { +			info->status = -EACCES; +		} else { +			info->status = btrfs_extent_same(src, off, len, dst, +							info->logical_offset); +			if (info->status == 0) +				info->bytes_deduped += len;  		} +		fdput(dst_file); +	} -		info.status = -EXDEV; -		if (file->f_path.mnt != dst_file->f_path.mnt) -			goto next; +	ret = copy_to_user(argp, same, size); +	if (ret) +		ret = -EFAULT; + +out: +	mnt_drop_write_file(file); +	return ret; +} -		dst = dst_file->f_dentry->d_inode; -		if (src->i_sb != dst->i_sb) -			goto next; +/* Helper to check and see if this root currently has a ref on the given disk + * bytenr.  If it does then we need to update the quota for this root.  This + * doesn't do anything if quotas aren't enabled. + */ +static int check_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, +		     u64 disko) +{ +	struct seq_list tree_mod_seq_elem = {}; +	struct ulist *roots; +	struct ulist_iterator uiter; +	struct ulist_node *root_node = NULL; +	int ret; -		if (S_ISDIR(dst->i_mode)) { -			info.status = -EISDIR; -			goto next; -		} +	if (!root->fs_info->quota_enabled) +		return 1; -		if (!S_ISREG(dst->i_mode)) { -			info.status = -EACCES; -			goto next; +	btrfs_get_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); +	ret = btrfs_find_all_roots(trans, root->fs_info, disko, +				   tree_mod_seq_elem.seq, &roots); +	if (ret < 0) +		goto out; +	ret = 0; +	ULIST_ITER_INIT(&uiter); +	while ((root_node = ulist_next(roots, &uiter))) { +		if (root_node->val == root->objectid) { +			ret = 1; +			break;  		} +	} +	ulist_free(roots); +out: +	btrfs_put_tree_mod_seq(root->fs_info, &tree_mod_seq_elem); +	return ret; +} -		info.status = btrfs_extent_same(src, off, len, dst, -						info.logical_offset); -		if (info.status == 0) -			info.bytes_deduped += len; +static int clone_finish_inode_update(struct btrfs_trans_handle *trans, +				     struct inode *inode, +				     u64 endoff, +				     const u64 destoff, +				     const u64 olen) +{ +	struct btrfs_root *root = BTRFS_I(inode)->root; +	int ret; -next: -		if (dst_file) -			fput(dst_file); +	inode_inc_iversion(inode); +	inode->i_mtime = inode->i_ctime = CURRENT_TIME; +	/* +	 * We round up to the block size at eof when determining which +	 * extents to clone above, but shouldn't round up the file size. +	 */ +	if (endoff > destoff + olen) +		endoff = destoff + olen; +	if (endoff > inode->i_size) +		btrfs_i_size_write(inode, endoff); -		if (__put_user_unaligned(info.status, &args->info[i].status) || -		    __put_user_unaligned(info.bytes_deduped, -					 &args->info[i].bytes_deduped)) { -			ret = -EFAULT; -			goto out; -		}                                                                +	ret = btrfs_update_inode(trans, root, inode); +	if (ret) { +		btrfs_abort_transaction(trans, root, ret); +		btrfs_end_transaction(trans, root); +		goto out;  	} - +	ret = btrfs_end_transaction(trans, root);  out: -	mnt_drop_write_file(file);  	return ret;  } +static void clone_update_extent_map(struct inode *inode, +				    const struct btrfs_trans_handle *trans, +				    const struct btrfs_path *path, +				    const u64 hole_offset, +				    const u64 hole_len) +{ +	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; +	struct extent_map *em; +	int ret; + +	em = alloc_extent_map(); +	if (!em) { +		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, +			&BTRFS_I(inode)->runtime_flags); +		return; +	} + +	if (path) { +		struct btrfs_file_extent_item *fi; + +		fi = btrfs_item_ptr(path->nodes[0], path->slots[0], +				    struct btrfs_file_extent_item); +		btrfs_extent_item_to_extent_map(inode, path, fi, false, em); +		em->generation = -1; +		if (btrfs_file_extent_type(path->nodes[0], fi) == +		    BTRFS_FILE_EXTENT_INLINE) +			set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, +				&BTRFS_I(inode)->runtime_flags); +	} else { +		em->start = hole_offset; +		em->len = hole_len; +		em->ram_bytes = em->len; +		em->orig_start = hole_offset; +		em->block_start = EXTENT_MAP_HOLE; +		em->block_len = 0; +		em->orig_block_len = 0; +		em->compress_type = BTRFS_COMPRESS_NONE; +		em->generation = trans->transid; +	} + +	while (1) { +		write_lock(&em_tree->lock); +		ret = add_extent_mapping(em_tree, em, 1); +		write_unlock(&em_tree->lock); +		if (ret != -EEXIST) { +			free_extent_map(em); +			break; +		} +		btrfs_drop_extent_cache(inode, em->start, +					em->start + em->len - 1, 0); +	} + +	if (unlikely(ret)) +		set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, +			&BTRFS_I(inode)->runtime_flags); +} +  /**   * btrfs_clone() - clone a range from inode file to another   * @@ -2824,7 +3208,8 @@ out:   * @destoff: Offset within @inode to start clone   */  static int btrfs_clone(struct inode *src, struct inode *inode, -		       u64 off, u64 olen, u64 olen_aligned, u64 destoff) +		       const u64 off, const u64 olen, const u64 olen_aligned, +		       const u64 destoff)  {  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_path *path = NULL; @@ -2835,7 +3220,10 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  	u32 nritems;  	int slot;  	int ret; -	u64 len = olen_aligned; +	int no_quota; +	const u64 len = olen_aligned; +	u64 last_disko = 0; +	u64 last_dest_end = destoff;  	ret = -ENOMEM;  	buf = vmalloc(btrfs_level_size(root, 0)); @@ -2852,19 +3240,33 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  	/* clone data */  	key.objectid = btrfs_ino(src);  	key.type = BTRFS_EXTENT_DATA_KEY; -	key.offset = 0; +	key.offset = off;  	while (1) {  		/*  		 * note the key will change type as we walk through the  		 * tree.  		 */ +		path->leave_spinning = 1;  		ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path,  				0, 0);  		if (ret < 0)  			goto out; +		/* +		 * First search, if no extent item that starts at offset off was +		 * found but the previous item is an extent item, it's possible +		 * it might overlap our target range, therefore process it. +		 */ +		if (key.offset == off && ret > 0 && path->slots[0] > 0) { +			btrfs_item_key_to_cpu(path->nodes[0], &key, +					      path->slots[0] - 1); +			if (key.type == BTRFS_EXTENT_DATA_KEY) +				path->slots[0]--; +		}  		nritems = btrfs_header_nritems(path->nodes[0]); +process_slot: +		no_quota = 1;  		if (path->slots[0] >= nritems) {  			ret = btrfs_next_leaf(BTRFS_I(src)->root, path);  			if (ret < 0) @@ -2889,12 +3291,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  			u64 disko = 0, diskl = 0;  			u64 datao = 0, datal = 0;  			u8 comp; -			u64 endoff; - -			size = btrfs_item_size_nr(leaf, slot); -			read_extent_buffer(leaf, buf, -					   btrfs_item_ptr_offset(leaf, slot), -					   size); +			u64 drop_start;  			extent = btrfs_item_ptr(leaf, slot,  						struct btrfs_file_extent_item); @@ -2914,11 +3311,26 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  				datal = btrfs_file_extent_ram_bytes(leaf,  								    extent);  			} -			btrfs_release_path(path); -			if (key.offset + datal <= off || -			    key.offset >= off + len - 1) -				goto next; +			/* +			 * The first search might have left us at an extent +			 * item that ends before our target range's start, can +			 * happen if we have holes and NO_HOLES feature enabled. +			 */ +			if (key.offset + datal <= off) { +				path->slots[0]++; +				goto process_slot; +			} else if (key.offset >= off + len) { +				break; +			} + +			size = btrfs_item_size_nr(leaf, slot); +			read_extent_buffer(leaf, buf, +					   btrfs_item_ptr_offset(leaf, slot), +					   size); + +			btrfs_release_path(path); +			path->leave_spinning = 0;  			memcpy(&new_key, &key, sizeof(new_key));  			new_key.objectid = btrfs_ino(inode); @@ -2928,6 +3340,18 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  				new_key.offset = destoff;  			/* +			 * Deal with a hole that doesn't have an extent item +			 * that represents it (NO_HOLES feature enabled). +			 * This hole is either in the middle of the cloning +			 * range or at the beginning (fully overlaps it or +			 * partially overlaps it). +			 */ +			if (new_key.offset != last_dest_end) +				drop_start = last_dest_end; +			else +				drop_start = new_key.offset; + +			/*  			 * 1 - adjusting old extent (we may have to split it)  			 * 1 - add new extent  			 * 1 - inode update @@ -2945,23 +3369,24 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  				 * | ------------- extent ------------- |  				 */ -				/* substract range b */ +				/* subtract range b */  				if (key.offset + datal > off + len)  					datal = off + len - key.offset; -				/* substract range a */ +				/* subtract range a */  				if (off > key.offset) {  					datao += off - key.offset;  					datal -= off - key.offset;  				}  				ret = btrfs_drop_extents(trans, root, inode, -							 new_key.offset, +							 drop_start,  							 new_key.offset + datal,  							 1);  				if (ret) { -					btrfs_abort_transaction(trans, root, -								ret); +					if (ret != -EOPNOTSUPP) +						btrfs_abort_transaction(trans, +								root, ret);  					btrfs_end_transaction(trans, root);  					goto out;  				} @@ -2992,6 +3417,28 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  							     datao);  				btrfs_set_file_extent_num_bytes(leaf, extent,  								datal); + +				/* +				 * We need to look up the roots that point at +				 * this bytenr and see if the new root does.  If +				 * it does not we need to make sure we update +				 * quotas appropriately. +				 */ +				if (disko && root != BTRFS_I(src)->root && +				    disko != last_disko) { +					no_quota = check_ref(trans, root, +							     disko); +					if (no_quota < 0) { +						btrfs_abort_transaction(trans, +									root, +									ret); +						btrfs_end_transaction(trans, +								      root); +						ret = no_quota; +						goto out; +					} +				} +  				if (disko) {  					inode_add_bytes(inode, datal);  					ret = btrfs_inc_extent_ref(trans, root, @@ -2999,7 +3446,7 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  							root->root_key.objectid,  							btrfs_ino(inode),  							new_key.offset - datao, -							0); +							no_quota);  					if (ret) {  						btrfs_abort_transaction(trans,  									root, @@ -3013,6 +3460,8 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  			} else if (type == BTRFS_FILE_EXTENT_INLINE) {  				u64 skip = 0;  				u64 trim = 0; +				u64 aligned_end = 0; +  				if (off > key.offset) {  					skip = off - key.offset;  					new_key.offset += skip; @@ -3029,13 +3478,16 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  				size -= skip + trim;  				datal -= skip + trim; +				aligned_end = ALIGN(new_key.offset + datal, +						    root->sectorsize);  				ret = btrfs_drop_extents(trans, root, inode, -							 new_key.offset, -							 new_key.offset + datal, +							 drop_start, +							 aligned_end,  							 1);  				if (ret) { -					btrfs_abort_transaction(trans, root, -								ret); +					if (ret != -EOPNOTSUPP) +						btrfs_abort_transaction(trans, +							root, ret);  					btrfs_end_transaction(trans, root);  					goto out;  				} @@ -3064,39 +3516,62 @@ static int btrfs_clone(struct inode *src, struct inode *inode,  				inode_add_bytes(inode, datal);  			} +			/* If we have an implicit hole (NO_HOLES feature). */ +			if (drop_start < new_key.offset) +				clone_update_extent_map(inode, trans, +						NULL, drop_start, +						new_key.offset - drop_start); + +			clone_update_extent_map(inode, trans, path, 0, 0); +  			btrfs_mark_buffer_dirty(leaf);  			btrfs_release_path(path); -			inode_inc_iversion(inode); -			inode->i_mtime = inode->i_ctime = CURRENT_TIME; - -			/* -			 * we round up to the block size at eof when -			 * determining which extents to clone above, -			 * but shouldn't round up the file size -			 */ -			endoff = new_key.offset + datal; -			if (endoff > destoff+olen) -				endoff = destoff+olen; -			if (endoff > inode->i_size) -				btrfs_i_size_write(inode, endoff); - -			ret = btrfs_update_inode(trans, root, inode); -			if (ret) { -				btrfs_abort_transaction(trans, root, ret); -				btrfs_end_transaction(trans, root); +			last_dest_end = new_key.offset + datal; +			ret = clone_finish_inode_update(trans, inode, +							last_dest_end, +							destoff, olen); +			if (ret)  				goto out; -			} -			ret = btrfs_end_transaction(trans, root); +			if (new_key.offset + datal >= destoff + len) +				break;  		} -next:  		btrfs_release_path(path);  		key.offset++;  	}  	ret = 0; +	if (last_dest_end < destoff + len) { +		/* +		 * We have an implicit hole (NO_HOLES feature is enabled) that +		 * fully or partially overlaps our cloning range at its end. +		 */ +		btrfs_release_path(path); + +		/* +		 * 1 - remove extent(s) +		 * 1 - inode update +		 */ +		trans = btrfs_start_transaction(root, 2); +		if (IS_ERR(trans)) { +			ret = PTR_ERR(trans); +			goto out; +		} +		ret = btrfs_drop_extents(trans, root, inode, +					 last_dest_end, destoff + len, 1); +		if (ret) { +			if (ret != -EOPNOTSUPP) +				btrfs_abort_transaction(trans, root, ret); +			btrfs_end_transaction(trans, root); +			goto out; +		} +		clone_update_extent_map(inode, trans, NULL, last_dest_end, +					destoff + len - last_dest_end); +		ret = clone_finish_inode_update(trans, inode, destoff + len, +						destoff, olen); +	} +  out: -	btrfs_release_path(path);  	btrfs_free_path(path);  	vfree(buf);  	return ret; @@ -3105,7 +3580,7 @@ out:  static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,  				       u64 off, u64 olen, u64 destoff)  { -	struct inode *inode = fdentry(file)->d_inode; +	struct inode *inode = file_inode(file);  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct fd src_file;  	struct inode *src; @@ -3120,8 +3595,9 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,  	 *   decompress into destination's address_space (the file offset  	 *   may change, so source mapping won't do), then recompress (or  	 *   otherwise reinsert) a subrange. -	 * - allow ranges within the same file to be cloned (provided -	 *   they don't overlap)? +	 * +	 * - split destination inode's inline extents.  The inline extents can +	 *   be either compressed or non-compressed.  	 */  	/* the destination must be opened for writing */ @@ -3207,19 +3683,53 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,  			goto out_unlock;  	} -	/* truncate page cache pages from target inode range */ -	truncate_inode_pages_range(&inode->i_data, destoff, -				   PAGE_CACHE_ALIGN(destoff + len) - 1); +	/* +	 * Lock the target range too. Right after we replace the file extent +	 * items in the fs tree (which now point to the cloned data), we might +	 * have a worker replace them with extent items relative to a write +	 * operation that was issued before this clone operation (i.e. confront +	 * with inode.c:btrfs_finish_ordered_io). +	 */ +	if (same_inode) { +		u64 lock_start = min_t(u64, off, destoff); +		u64 lock_len = max_t(u64, off, destoff) + len - lock_start; -	lock_extent_range(src, off, len); +		lock_extent_range(src, lock_start, lock_len); +	} else { +		lock_extent_range(src, off, len); +		lock_extent_range(inode, destoff, len); +	}  	ret = btrfs_clone(src, inode, off, olen, len, destoff); -	unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); +	if (same_inode) { +		u64 lock_start = min_t(u64, off, destoff); +		u64 lock_end = max_t(u64, off, destoff) + len - 1; + +		unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end); +	} else { +		unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); +		unlock_extent(&BTRFS_I(inode)->io_tree, destoff, +			      destoff + len - 1); +	} +	/* +	 * Truncate page cache pages so that future reads will see the cloned +	 * data immediately and not the previous data. +	 */ +	truncate_inode_pages_range(&inode->i_data, destoff, +				   PAGE_CACHE_ALIGN(destoff + len) - 1);  out_unlock: -	mutex_unlock(&src->i_mutex); -	if (!same_inode) -		mutex_unlock(&inode->i_mutex); +	if (!same_inode) { +		if (inode < src) { +			mutex_unlock(&src->i_mutex); +			mutex_unlock(&inode->i_mutex); +		} else { +			mutex_unlock(&inode->i_mutex); +			mutex_unlock(&src->i_mutex); +		} +	} else { +		mutex_unlock(&src->i_mutex); +	}  out_fput:  	fdput(src_file);  out_drop_write: @@ -3310,7 +3820,7 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)  	}  	if (!objectid) -		objectid = root->root_key.objectid; +		objectid = BTRFS_FS_TREE_OBJECTID;  	location.objectid = objectid;  	location.type = BTRFS_ROOT_ITEM_KEY; @@ -3342,8 +3852,8 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)  	if (IS_ERR_OR_NULL(di)) {  		btrfs_free_path(path);  		btrfs_end_transaction(trans, root); -		printk(KERN_ERR "Umm, you don't have the default dir item, " -		       "this isn't going to work\n"); +		btrfs_err(new_root->fs_info, "Umm, you don't have the default dir" +			   "item, this isn't going to work");  		ret = -ENOENT;  		goto out;  	} @@ -3424,6 +3934,11 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)  		up_read(&info->groups_sem);  	} +	/* +	 * Global block reserve, exported as a space_info +	 */ +	slot_count++; +  	/* space_slots == 0 means they are asking for a count */  	if (space_args.space_slots == 0) {  		space_args.total_spaces = slot_count; @@ -3482,6 +3997,21 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)  		up_read(&info->groups_sem);  	} +	/* +	 * Add global block reserve +	 */ +	if (slot_count) { +		struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv; + +		spin_lock(&block_rsv->lock); +		space.total_bytes = block_rsv->size; +		space.used_bytes = block_rsv->size - block_rsv->reserved; +		spin_unlock(&block_rsv->lock); +		space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; +		memcpy(dest, &space, sizeof(space)); +		space_args.total_spaces++; +	} +  	user_dest = (struct btrfs_ioctl_space_info __user *)  		(arg + sizeof(struct btrfs_ioctl_space_args)); @@ -3665,9 +4195,10 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)  	switch (p->cmd) {  	case BTRFS_IOCTL_DEV_REPLACE_CMD_START: -		if (root->fs_info->sb->s_flags & MS_RDONLY) -			return -EROFS; - +		if (root->fs_info->sb->s_flags & MS_RDONLY) { +			ret = -EROFS; +			goto out; +		}  		if (atomic_xchg(  			&root->fs_info->mutually_exclusive_operation_running,  			1)) { @@ -3693,7 +4224,7 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)  	if (copy_to_user(arg, p, sizeof(*p)))  		ret = -EFAULT; - +out:  	kfree(p);  	return ret;  } @@ -4303,7 +4834,7 @@ static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg)  static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)  { -	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; +	struct btrfs_root *root = BTRFS_I(file_inode(file))->root;  	if (!capable(CAP_SYS_ADMIN))  		return -EPERM; @@ -4311,10 +4842,9 @@ static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg)  	return btrfs_qgroup_wait_for_completion(root->fs_info);  } -static long btrfs_ioctl_set_received_subvol(struct file *file, -					    void __user *arg) +static long _btrfs_ioctl_set_received_subvol(struct file *file, +					    struct btrfs_ioctl_received_subvol_args *sa)  { -	struct btrfs_ioctl_received_subvol_args *sa = NULL;  	struct inode *inode = file_inode(file);  	struct btrfs_root *root = BTRFS_I(inode)->root;  	struct btrfs_root_item *root_item = &root->root_item; @@ -4323,6 +4853,9 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,  	int ret = 0;  	int received_uuid_changed; +	if (!inode_owner_or_capable(inode)) +		return -EPERM; +  	ret = mnt_want_write_file(file);  	if (ret < 0)  		return ret; @@ -4339,18 +4872,6 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,  		goto out;  	} -	if (!inode_owner_or_capable(inode)) { -		ret = -EACCES; -		goto out; -	} - -	sa = memdup_user(arg, sizeof(*sa)); -	if (IS_ERR(sa)) { -		ret = PTR_ERR(sa); -		sa = NULL; -		goto out; -	} -  	/*  	 * 1 - root item  	 * 2 - uuid items (received uuid + subvol uuid) @@ -4404,14 +4925,90 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,  		goto out;  	} +out: +	up_write(&root->fs_info->subvol_sem); +	mnt_drop_write_file(file); +	return ret; +} + +#ifdef CONFIG_64BIT +static long btrfs_ioctl_set_received_subvol_32(struct file *file, +						void __user *arg) +{ +	struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; +	struct btrfs_ioctl_received_subvol_args *args64 = NULL; +	int ret = 0; + +	args32 = memdup_user(arg, sizeof(*args32)); +	if (IS_ERR(args32)) { +		ret = PTR_ERR(args32); +		args32 = NULL; +		goto out; +	} + +	args64 = kmalloc(sizeof(*args64), GFP_NOFS); +	if (!args64) { +		ret = -ENOMEM; +		goto out; +	} + +	memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); +	args64->stransid = args32->stransid; +	args64->rtransid = args32->rtransid; +	args64->stime.sec = args32->stime.sec; +	args64->stime.nsec = args32->stime.nsec; +	args64->rtime.sec = args32->rtime.sec; +	args64->rtime.nsec = args32->rtime.nsec; +	args64->flags = args32->flags; + +	ret = _btrfs_ioctl_set_received_subvol(file, args64); +	if (ret) +		goto out; + +	memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); +	args32->stransid = args64->stransid; +	args32->rtransid = args64->rtransid; +	args32->stime.sec = args64->stime.sec; +	args32->stime.nsec = args64->stime.nsec; +	args32->rtime.sec = args64->rtime.sec; +	args32->rtime.nsec = args64->rtime.nsec; +	args32->flags = args64->flags; + +	ret = copy_to_user(arg, args32, sizeof(*args32)); +	if (ret) +		ret = -EFAULT; + +out: +	kfree(args32); +	kfree(args64); +	return ret; +} +#endif + +static long btrfs_ioctl_set_received_subvol(struct file *file, +					    void __user *arg) +{ +	struct btrfs_ioctl_received_subvol_args *sa = NULL; +	int ret = 0; + +	sa = memdup_user(arg, sizeof(*sa)); +	if (IS_ERR(sa)) { +		ret = PTR_ERR(sa); +		sa = NULL; +		goto out; +	} + +	ret = _btrfs_ioctl_set_received_subvol(file, sa); + +	if (ret) +		goto out; +  	ret = copy_to_user(arg, sa, sizeof(*sa));  	if (ret)  		ret = -EFAULT;  out:  	kfree(sa); -	up_write(&root->fs_info->subvol_sem); -	mnt_drop_write_file(file);  	return ret;  } @@ -4429,8 +5026,8 @@ static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)  	len = strnlen(label, BTRFS_LABEL_SIZE);  	if (len == BTRFS_LABEL_SIZE) { -		pr_warn("btrfs: label is too long, return the first %zu bytes\n", -			--len); +		btrfs_warn(root->fs_info, +			"label is too long, return the first %zu bytes", --len);  	}  	ret = copy_to_user(arg, label, len); @@ -4453,7 +5050,7 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)  		return -EFAULT;  	if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { -		pr_err("btrfs: unable to set label with more than %d bytes\n", +		btrfs_err(root->fs_info, "unable to set label with more than %d bytes",  		       BTRFS_LABEL_SIZE - 1);  		return -EINVAL;  	} @@ -4471,13 +5068,173 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)  	spin_lock(&root->fs_info->super_lock);  	strcpy(super_block->label, label);  	spin_unlock(&root->fs_info->super_lock); -	ret = btrfs_end_transaction(trans, root); +	ret = btrfs_commit_transaction(trans, root);  out_unlock:  	mnt_drop_write_file(file);  	return ret;  } +#define INIT_FEATURE_FLAGS(suffix) \ +	{ .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ +	  .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ +	  .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } + +static int btrfs_ioctl_get_supported_features(struct file *file, +					      void __user *arg) +{ +	static struct btrfs_ioctl_feature_flags features[3] = { +		INIT_FEATURE_FLAGS(SUPP), +		INIT_FEATURE_FLAGS(SAFE_SET), +		INIT_FEATURE_FLAGS(SAFE_CLEAR) +	}; + +	if (copy_to_user(arg, &features, sizeof(features))) +		return -EFAULT; + +	return 0; +} + +static int btrfs_ioctl_get_features(struct file *file, void __user *arg) +{ +	struct btrfs_root *root = BTRFS_I(file_inode(file))->root; +	struct btrfs_super_block *super_block = root->fs_info->super_copy; +	struct btrfs_ioctl_feature_flags features; + +	features.compat_flags = btrfs_super_compat_flags(super_block); +	features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); +	features.incompat_flags = btrfs_super_incompat_flags(super_block); + +	if (copy_to_user(arg, &features, sizeof(features))) +		return -EFAULT; + +	return 0; +} + +static int check_feature_bits(struct btrfs_root *root, +			      enum btrfs_feature_set set, +			      u64 change_mask, u64 flags, u64 supported_flags, +			      u64 safe_set, u64 safe_clear) +{ +	const char *type = btrfs_feature_set_names[set]; +	char *names; +	u64 disallowed, unsupported; +	u64 set_mask = flags & change_mask; +	u64 clear_mask = ~flags & change_mask; + +	unsupported = set_mask & ~supported_flags; +	if (unsupported) { +		names = btrfs_printable_features(set, unsupported); +		if (names) { +			btrfs_warn(root->fs_info, +			   "this kernel does not support the %s feature bit%s", +			   names, strchr(names, ',') ? "s" : ""); +			kfree(names); +		} else +			btrfs_warn(root->fs_info, +			   "this kernel does not support %s bits 0x%llx", +			   type, unsupported); +		return -EOPNOTSUPP; +	} + +	disallowed = set_mask & ~safe_set; +	if (disallowed) { +		names = btrfs_printable_features(set, disallowed); +		if (names) { +			btrfs_warn(root->fs_info, +			   "can't set the %s feature bit%s while mounted", +			   names, strchr(names, ',') ? "s" : ""); +			kfree(names); +		} else +			btrfs_warn(root->fs_info, +			   "can't set %s bits 0x%llx while mounted", +			   type, disallowed); +		return -EPERM; +	} + +	disallowed = clear_mask & ~safe_clear; +	if (disallowed) { +		names = btrfs_printable_features(set, disallowed); +		if (names) { +			btrfs_warn(root->fs_info, +			   "can't clear the %s feature bit%s while mounted", +			   names, strchr(names, ',') ? "s" : ""); +			kfree(names); +		} else +			btrfs_warn(root->fs_info, +			   "can't clear %s bits 0x%llx while mounted", +			   type, disallowed); +		return -EPERM; +	} + +	return 0; +} + +#define check_feature(root, change_mask, flags, mask_base)	\ +check_feature_bits(root, FEAT_##mask_base, change_mask, flags,	\ +		   BTRFS_FEATURE_ ## mask_base ## _SUPP,	\ +		   BTRFS_FEATURE_ ## mask_base ## _SAFE_SET,	\ +		   BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) + +static int btrfs_ioctl_set_features(struct file *file, void __user *arg) +{ +	struct btrfs_root *root = BTRFS_I(file_inode(file))->root; +	struct btrfs_super_block *super_block = root->fs_info->super_copy; +	struct btrfs_ioctl_feature_flags flags[2]; +	struct btrfs_trans_handle *trans; +	u64 newflags; +	int ret; + +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	if (copy_from_user(flags, arg, sizeof(flags))) +		return -EFAULT; + +	/* Nothing to do */ +	if (!flags[0].compat_flags && !flags[0].compat_ro_flags && +	    !flags[0].incompat_flags) +		return 0; + +	ret = check_feature(root, flags[0].compat_flags, +			    flags[1].compat_flags, COMPAT); +	if (ret) +		return ret; + +	ret = check_feature(root, flags[0].compat_ro_flags, +			    flags[1].compat_ro_flags, COMPAT_RO); +	if (ret) +		return ret; + +	ret = check_feature(root, flags[0].incompat_flags, +			    flags[1].incompat_flags, INCOMPAT); +	if (ret) +		return ret; + +	trans = btrfs_start_transaction(root, 0); +	if (IS_ERR(trans)) +		return PTR_ERR(trans); + +	spin_lock(&root->fs_info->super_lock); +	newflags = btrfs_super_compat_flags(super_block); +	newflags |= flags[0].compat_flags & flags[1].compat_flags; +	newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); +	btrfs_set_super_compat_flags(super_block, newflags); + +	newflags = btrfs_super_compat_ro_flags(super_block); +	newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; +	newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); +	btrfs_set_super_compat_ro_flags(super_block, newflags); + +	newflags = btrfs_super_incompat_flags(super_block); +	newflags |= flags[0].incompat_flags & flags[1].incompat_flags; +	newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); +	btrfs_set_super_incompat_flags(super_block, newflags); +	spin_unlock(&root->fs_info->super_lock); + +	return btrfs_commit_transaction(trans, root); +} +  long btrfs_ioctl(struct file *file, unsigned int  		cmd, unsigned long arg)  { @@ -4535,6 +5292,8 @@ long btrfs_ioctl(struct file *file, unsigned int  		return btrfs_ioctl_trans_end(file);  	case BTRFS_IOC_TREE_SEARCH:  		return btrfs_ioctl_tree_search(file, argp); +	case BTRFS_IOC_TREE_SEARCH_V2: +		return btrfs_ioctl_tree_search_v2(file, argp);  	case BTRFS_IOC_INO_LOOKUP:  		return btrfs_ioctl_ino_lookup(file, argp);  	case BTRFS_IOC_INO_PATHS: @@ -4543,9 +5302,15 @@ long btrfs_ioctl(struct file *file, unsigned int  		return btrfs_ioctl_logical_to_ino(root, argp);  	case BTRFS_IOC_SPACE_INFO:  		return btrfs_ioctl_space_info(root, argp); -	case BTRFS_IOC_SYNC: -		btrfs_sync_fs(file->f_dentry->d_sb, 1); -		return 0; +	case BTRFS_IOC_SYNC: { +		int ret; + +		ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); +		if (ret) +			return ret; +		ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); +		return ret; +	}  	case BTRFS_IOC_START_SYNC:  		return btrfs_ioctl_start_sync(root, argp);  	case BTRFS_IOC_WAIT_SYNC: @@ -4564,6 +5329,10 @@ long btrfs_ioctl(struct file *file, unsigned int  		return btrfs_ioctl_balance_progress(root, argp);  	case BTRFS_IOC_SET_RECEIVED_SUBVOL:  		return btrfs_ioctl_set_received_subvol(file, argp); +#ifdef CONFIG_64BIT +	case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: +		return btrfs_ioctl_set_received_subvol_32(file, argp); +#endif  	case BTRFS_IOC_SEND:  		return btrfs_ioctl_send(file, argp);  	case BTRFS_IOC_GET_DEV_STATS: @@ -4590,6 +5359,12 @@ long btrfs_ioctl(struct file *file, unsigned int  		return btrfs_ioctl_set_fslabel(file, argp);  	case BTRFS_IOC_FILE_EXTENT_SAME:  		return btrfs_ioctl_file_extent_same(file, argp); +	case BTRFS_IOC_GET_SUPPORTED_FEATURES: +		return btrfs_ioctl_get_supported_features(file, argp); +	case BTRFS_IOC_GET_FEATURES: +		return btrfs_ioctl_get_features(file, argp); +	case BTRFS_IOC_SET_FEATURES: +		return btrfs_ioctl_set_features(file, argp);  	}  	return -ENOTTY;  | 
