diff options
Diffstat (limited to 'fs/btrfs/inode.c')
| -rw-r--r-- | fs/btrfs/inode.c | 692 | 
1 files changed, 509 insertions, 183 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 59cba180fe8..e9b76bcd1c1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -55,13 +55,13 @@ struct btrfs_iget_args {  	struct btrfs_root *root;  }; -static struct inode_operations btrfs_dir_inode_operations; -static struct inode_operations btrfs_symlink_inode_operations; -static struct inode_operations btrfs_dir_ro_inode_operations; -static struct inode_operations btrfs_special_inode_operations; -static struct inode_operations btrfs_file_inode_operations; -static struct address_space_operations btrfs_aops; -static struct address_space_operations btrfs_symlink_aops; +static const struct inode_operations btrfs_dir_inode_operations; +static const struct inode_operations btrfs_symlink_inode_operations; +static const struct inode_operations btrfs_dir_ro_inode_operations; +static const struct inode_operations btrfs_special_inode_operations; +static const struct inode_operations btrfs_file_inode_operations; +static const struct address_space_operations btrfs_aops; +static const struct address_space_operations btrfs_symlink_aops;  static struct file_operations btrfs_dir_file_operations;  static struct extent_io_ops btrfs_extent_io_ops; @@ -231,7 +231,8 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,  	}  	ret = btrfs_drop_extents(trans, root, inode, start, -				 aligned_end, aligned_end, start, &hint_byte); +				 aligned_end, aligned_end, start, +				 &hint_byte, 1);  	BUG_ON(ret);  	if (isize > actual_end) @@ -240,7 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,  				   inline_len, compressed_size,  				   compressed_pages);  	BUG_ON(ret); -	btrfs_drop_extent_cache(inode, start, aligned_end, 0); +	btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);  	return 0;  } @@ -425,7 +426,7 @@ again:  			extent_clear_unlock_delalloc(inode,  						     &BTRFS_I(inode)->io_tree,  						     start, end, NULL, 1, 0, -						     0, 1, 1, 1); +						     0, 1, 1, 1, 0);  			ret = 0;  			goto free_pages_out;  		} @@ -611,9 +612,9 @@ static noinline int submit_compressed_extents(struct inode *inode,  		set_bit(EXTENT_FLAG_COMPRESSED, &em->flags);  		while (1) { -			spin_lock(&em_tree->lock); +			write_lock(&em_tree->lock);  			ret = add_extent_mapping(em_tree, em); -			spin_unlock(&em_tree->lock); +			write_unlock(&em_tree->lock);  			if (ret != -EEXIST) {  				free_extent_map(em);  				break; @@ -640,7 +641,7 @@ static noinline int submit_compressed_extents(struct inode *inode,  					     async_extent->start,  					     async_extent->start +  					     async_extent->ram_size - 1, -					     NULL, 1, 1, 0, 1, 1, 0); +					     NULL, 1, 1, 0, 1, 1, 0, 0);  		ret = btrfs_submit_compressed_write(inode,  				    async_extent->start, @@ -713,7 +714,7 @@ static noinline int cow_file_range(struct inode *inode,  			extent_clear_unlock_delalloc(inode,  						     &BTRFS_I(inode)->io_tree,  						     start, end, NULL, 1, 1, -						     1, 1, 1, 1); +						     1, 1, 1, 1, 0);  			*nr_written = *nr_written +  			     (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;  			*page_started = 1; @@ -725,6 +726,15 @@ static noinline int cow_file_range(struct inode *inode,  	BUG_ON(disk_num_bytes >  	       btrfs_super_total_bytes(&root->fs_info->super_copy)); + +	read_lock(&BTRFS_I(inode)->extent_tree.lock); +	em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, +				   start, num_bytes); +	if (em) { +		alloc_hint = em->block_start; +		free_extent_map(em); +	} +	read_unlock(&BTRFS_I(inode)->extent_tree.lock);  	btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0);  	while (disk_num_bytes > 0) { @@ -737,7 +747,6 @@ static noinline int cow_file_range(struct inode *inode,  		em = alloc_extent_map(GFP_NOFS);  		em->start = start;  		em->orig_start = em->start; -  		ram_size = ins.offset;  		em->len = ins.offset; @@ -747,9 +756,9 @@ static noinline int cow_file_range(struct inode *inode,  		set_bit(EXTENT_FLAG_PINNED, &em->flags);  		while (1) { -			spin_lock(&em_tree->lock); +			write_lock(&em_tree->lock);  			ret = add_extent_mapping(em_tree, em); -			spin_unlock(&em_tree->lock); +			write_unlock(&em_tree->lock);  			if (ret != -EEXIST) {  				free_extent_map(em);  				break; @@ -776,11 +785,14 @@ static noinline int cow_file_range(struct inode *inode,  		/* we're not doing compressed IO, don't unlock the first  		 * page (which the caller expects to stay locked), don't  		 * clear any dirty bits and don't set any writeback bits +		 * +		 * Do set the Private2 bit so we know this page was properly +		 * setup for writepage  		 */  		extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,  					     start, start + ram_size - 1,  					     locked_page, unlock, 1, -					     1, 0, 0, 0); +					     1, 0, 0, 0, 1);  		disk_num_bytes -= cur_alloc_size;  		num_bytes -= cur_alloc_size;  		alloc_hint = ins.objectid + ins.offset; @@ -853,7 +865,7 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,  	int limit = 10 * 1024 * 1042;  	clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | -			 EXTENT_DELALLOC, 1, 0, GFP_NOFS); +			 EXTENT_DELALLOC, 1, 0, NULL, GFP_NOFS);  	while (start < end) {  		async_cow = kmalloc(sizeof(*async_cow), GFP_NOFS);  		async_cow->inode = inode; @@ -1080,9 +1092,9 @@ out_check:  			em->bdev = root->fs_info->fs_devices->latest_bdev;  			set_bit(EXTENT_FLAG_PINNED, &em->flags);  			while (1) { -				spin_lock(&em_tree->lock); +				write_lock(&em_tree->lock);  				ret = add_extent_mapping(em_tree, em); -				spin_unlock(&em_tree->lock); +				write_unlock(&em_tree->lock);  				if (ret != -EEXIST) {  					free_extent_map(em);  					break; @@ -1101,7 +1113,7 @@ out_check:  		extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,  					cur_offset, cur_offset + num_bytes - 1, -					locked_page, 1, 1, 1, 0, 0, 0); +					locked_page, 1, 1, 1, 0, 0, 0, 1);  		cur_offset = extent_end;  		if (cur_offset > end)  			break; @@ -1374,10 +1386,8 @@ again:  	lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end, GFP_NOFS);  	/* already ordered? We're done */ -	if (test_range_bit(&BTRFS_I(inode)->io_tree, page_start, page_end, -			     EXTENT_ORDERED, 0)) { +	if (PagePrivate2(page))  		goto out; -	}  	ordered = btrfs_lookup_ordered_extent(inode, page_start);  	if (ordered) { @@ -1413,11 +1423,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)  	struct inode *inode = page->mapping->host;  	struct btrfs_writepage_fixup *fixup;  	struct btrfs_root *root = BTRFS_I(inode)->root; -	int ret; -	ret = test_range_bit(&BTRFS_I(inode)->io_tree, start, end, -			     EXTENT_ORDERED, 0); -	if (ret) +	/* this page is properly in the ordered list */ +	if (TestClearPagePrivate2(page))  		return 0;  	if (PageChecked(page)) @@ -1455,9 +1463,19 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,  	BUG_ON(!path);  	path->leave_spinning = 1; + +	/* +	 * we may be replacing one extent in the tree with another. +	 * The new extent is pinned in the extent map, and we don't want +	 * to drop it from the cache until it is completely in the btree. +	 * +	 * So, tell btrfs_drop_extents to leave this extent in the cache. +	 * the caller is expected to unpin it and allow it to be merged +	 * with the others. +	 */  	ret = btrfs_drop_extents(trans, root, inode, file_pos,  				 file_pos + num_bytes, locked_end, -				 file_pos, &hint); +				 file_pos, &hint, 0);  	BUG_ON(ret);  	ins.objectid = inode->i_ino; @@ -1485,7 +1503,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,  	btrfs_mark_buffer_dirty(leaf);  	inode_add_bytes(inode, num_bytes); -	btrfs_drop_extent_cache(inode, file_pos, file_pos + num_bytes - 1, 0);  	ins.objectid = disk_bytenr;  	ins.offset = disk_num_bytes; @@ -1596,6 +1613,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)  						ordered_extent->len,  						compressed, 0, 0,  						BTRFS_FILE_EXTENT_REG); +		unpin_extent_cache(&BTRFS_I(inode)->extent_tree, +				   ordered_extent->file_offset, +				   ordered_extent->len);  		BUG_ON(ret);  	}  	unlock_extent(io_tree, ordered_extent->file_offset, @@ -1623,6 +1643,7 @@ nocow:  static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,  				struct extent_state *state, int uptodate)  { +	ClearPagePrivate2(page);  	return btrfs_finish_ordered_io(page->mapping->host, start, end);  } @@ -1669,13 +1690,13 @@ static int btrfs_io_failed_hook(struct bio *failed_bio,  		failrec->last_mirror = 0;  		failrec->bio_flags = 0; -		spin_lock(&em_tree->lock); +		read_lock(&em_tree->lock);  		em = lookup_extent_mapping(em_tree, start, failrec->len);  		if (em->start > start || em->start + em->len < start) {  			free_extent_map(em);  			em = NULL;  		} -		spin_unlock(&em_tree->lock); +		read_unlock(&em_tree->lock);  		if (!em || IS_ERR(em)) {  			kfree(failrec); @@ -1794,7 +1815,7 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,  		return 0;  	if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID && -	    test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1)) { +	    test_range_bit(io_tree, start, end, EXTENT_NODATASUM, 1, NULL)) {  		clear_extent_bits(io_tree, start, end, EXTENT_NODATASUM,  				  GFP_NOFS);  		return 0; @@ -2352,6 +2373,69 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry)  	return ret;  } +int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, +			struct btrfs_root *root, +			struct inode *dir, u64 objectid, +			const char *name, int name_len) +{ +	struct btrfs_path *path; +	struct extent_buffer *leaf; +	struct btrfs_dir_item *di; +	struct btrfs_key key; +	u64 index; +	int ret; + +	path = btrfs_alloc_path(); +	if (!path) +		return -ENOMEM; + +	di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, +				   name, name_len, -1); +	BUG_ON(!di || IS_ERR(di)); + +	leaf = path->nodes[0]; +	btrfs_dir_item_key_to_cpu(leaf, di, &key); +	WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); +	ret = btrfs_delete_one_dir_name(trans, root, path, di); +	BUG_ON(ret); +	btrfs_release_path(root, path); + +	ret = btrfs_del_root_ref(trans, root->fs_info->tree_root, +				 objectid, root->root_key.objectid, +				 dir->i_ino, &index, name, name_len); +	if (ret < 0) { +		BUG_ON(ret != -ENOENT); +		di = btrfs_search_dir_index_item(root, path, dir->i_ino, +						 name, name_len); +		BUG_ON(!di || IS_ERR(di)); + +		leaf = path->nodes[0]; +		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); +		btrfs_release_path(root, path); +		index = key.offset; +	} + +	di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, +					 index, name, name_len, -1); +	BUG_ON(!di || IS_ERR(di)); + +	leaf = path->nodes[0]; +	btrfs_dir_item_key_to_cpu(leaf, di, &key); +	WARN_ON(key.type != BTRFS_ROOT_ITEM_KEY || key.objectid != objectid); +	ret = btrfs_delete_one_dir_name(trans, root, path, di); +	BUG_ON(ret); +	btrfs_release_path(root, path); + +	btrfs_i_size_write(dir, dir->i_size - name_len * 2); +	dir->i_mtime = dir->i_ctime = CURRENT_TIME; +	ret = btrfs_update_inode(trans, root, dir); +	BUG_ON(ret); +	dir->i_sb->s_dirt = 1; + +	btrfs_free_path(path); +	return 0; +} +  static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)  {  	struct inode *inode = dentry->d_inode; @@ -2361,29 +2445,31 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)  	struct btrfs_trans_handle *trans;  	unsigned long nr = 0; -	/* -	 * the FIRST_FREE_OBJECTID check makes sure we don't try to rmdir -	 * the root of a subvolume or snapshot -	 */  	if (inode->i_size > BTRFS_EMPTY_DIR_SIZE || -	    inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) { +	    inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)  		return -ENOTEMPTY; -	}  	trans = btrfs_start_transaction(root, 1);  	btrfs_set_trans_block_group(trans, dir); +	if (unlikely(inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { +		err = btrfs_unlink_subvol(trans, root, dir, +					  BTRFS_I(inode)->location.objectid, +					  dentry->d_name.name, +					  dentry->d_name.len); +		goto out; +	} +  	err = btrfs_orphan_add(trans, inode);  	if (err) -		goto fail_trans; +		goto out;  	/* now the directory is empty */  	err = btrfs_unlink_inode(trans, root, dir, dentry->d_inode,  				 dentry->d_name.name, dentry->d_name.len);  	if (!err)  		btrfs_i_size_write(inode, 0); - -fail_trans: +out:  	nr = trans->blocks_used;  	ret = btrfs_end_transaction_throttle(trans, root);  	btrfs_btree_balance_dirty(root, nr); @@ -2935,7 +3021,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)  						 cur_offset,  						 cur_offset + hole_size,  						 block_end, -						 cur_offset, &hint_byte); +						 cur_offset, &hint_byte, 1);  			if (err)  				break;  			err = btrfs_insert_file_extent(trans, root, @@ -3003,6 +3089,11 @@ void btrfs_delete_inode(struct inode *inode)  	}  	btrfs_wait_ordered_range(inode, 0, (u64)-1); +	if (inode->i_nlink > 0) { +		BUG_ON(btrfs_root_refs(&root->root_item) != 0); +		goto no_delete; +	} +  	btrfs_i_size_write(inode, 0);  	trans = btrfs_join_transaction(root, 1); @@ -3070,29 +3161,67 @@ out_err:   * is kind of like crossing a mount point.   */  static int fixup_tree_root_location(struct btrfs_root *root, -			     struct btrfs_key *location, -			     struct btrfs_root **sub_root, -			     struct dentry *dentry) +				    struct inode *dir, +				    struct dentry *dentry, +				    struct btrfs_key *location, +				    struct btrfs_root **sub_root)  { -	struct btrfs_root_item *ri; +	struct btrfs_path *path; +	struct btrfs_root *new_root; +	struct btrfs_root_ref *ref; +	struct extent_buffer *leaf; +	int ret; +	int err = 0; -	if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY) -		return 0; -	if (location->objectid == BTRFS_ROOT_TREE_OBJECTID) -		return 0; +	path = btrfs_alloc_path(); +	if (!path) { +		err = -ENOMEM; +		goto out; +	} -	*sub_root = btrfs_read_fs_root(root->fs_info, location, -					dentry->d_name.name, -					dentry->d_name.len); -	if (IS_ERR(*sub_root)) -		return PTR_ERR(*sub_root); +	err = -ENOENT; +	ret = btrfs_find_root_ref(root->fs_info->tree_root, path, +				  BTRFS_I(dir)->root->root_key.objectid, +				  location->objectid); +	if (ret) { +		if (ret < 0) +			err = ret; +		goto out; +	} -	ri = &(*sub_root)->root_item; -	location->objectid = btrfs_root_dirid(ri); -	btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY); -	location->offset = 0; +	leaf = path->nodes[0]; +	ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_root_ref); +	if (btrfs_root_ref_dirid(leaf, ref) != dir->i_ino || +	    btrfs_root_ref_name_len(leaf, ref) != dentry->d_name.len) +		goto out; -	return 0; +	ret = memcmp_extent_buffer(leaf, dentry->d_name.name, +				   (unsigned long)(ref + 1), +				   dentry->d_name.len); +	if (ret) +		goto out; + +	btrfs_release_path(root->fs_info->tree_root, path); + +	new_root = btrfs_read_fs_root_no_name(root->fs_info, location); +	if (IS_ERR(new_root)) { +		err = PTR_ERR(new_root); +		goto out; +	} + +	if (btrfs_root_refs(&new_root->root_item) == 0) { +		err = -ENOENT; +		goto out; +	} + +	*sub_root = new_root; +	location->objectid = btrfs_root_dirid(&new_root->root_item); +	location->type = BTRFS_INODE_ITEM_KEY; +	location->offset = 0; +	err = 0; +out: +	btrfs_free_path(path); +	return err;  }  static void inode_tree_add(struct inode *inode) @@ -3101,11 +3230,13 @@ static void inode_tree_add(struct inode *inode)  	struct btrfs_inode *entry;  	struct rb_node **p;  	struct rb_node *parent; -  again:  	p = &root->inode_tree.rb_node;  	parent = NULL; +	if (hlist_unhashed(&inode->i_hash)) +		return; +  	spin_lock(&root->inode_lock);  	while (*p) {  		parent = *p; @@ -3132,13 +3263,87 @@ again:  static void inode_tree_del(struct inode *inode)  {  	struct btrfs_root *root = BTRFS_I(inode)->root; +	int empty = 0;  	spin_lock(&root->inode_lock);  	if (!RB_EMPTY_NODE(&BTRFS_I(inode)->rb_node)) {  		rb_erase(&BTRFS_I(inode)->rb_node, &root->inode_tree);  		RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); +		empty = RB_EMPTY_ROOT(&root->inode_tree);  	}  	spin_unlock(&root->inode_lock); + +	if (empty && btrfs_root_refs(&root->root_item) == 0) { +		synchronize_srcu(&root->fs_info->subvol_srcu); +		spin_lock(&root->inode_lock); +		empty = RB_EMPTY_ROOT(&root->inode_tree); +		spin_unlock(&root->inode_lock); +		if (empty) +			btrfs_add_dead_root(root); +	} +} + +int btrfs_invalidate_inodes(struct btrfs_root *root) +{ +	struct rb_node *node; +	struct rb_node *prev; +	struct btrfs_inode *entry; +	struct inode *inode; +	u64 objectid = 0; + +	WARN_ON(btrfs_root_refs(&root->root_item) != 0); + +	spin_lock(&root->inode_lock); +again: +	node = root->inode_tree.rb_node; +	prev = NULL; +	while (node) { +		prev = node; +		entry = rb_entry(node, struct btrfs_inode, rb_node); + +		if (objectid < entry->vfs_inode.i_ino) +			node = node->rb_left; +		else if (objectid > entry->vfs_inode.i_ino) +			node = node->rb_right; +		else +			break; +	} +	if (!node) { +		while (prev) { +			entry = rb_entry(prev, struct btrfs_inode, rb_node); +			if (objectid <= entry->vfs_inode.i_ino) { +				node = prev; +				break; +			} +			prev = rb_next(prev); +		} +	} +	while (node) { +		entry = rb_entry(node, struct btrfs_inode, rb_node); +		objectid = entry->vfs_inode.i_ino + 1; +		inode = igrab(&entry->vfs_inode); +		if (inode) { +			spin_unlock(&root->inode_lock); +			if (atomic_read(&inode->i_count) > 1) +				d_prune_aliases(inode); +			/* +			 * btrfs_drop_inode will remove it from +			 * the inode cache when its usage count +			 * hits zero. +			 */ +			iput(inode); +			cond_resched(); +			spin_lock(&root->inode_lock); +			goto again; +		} + +		if (cond_resched_lock(&root->inode_lock)) +			goto again; + +		node = rb_next(node); +	} +	spin_unlock(&root->inode_lock); +	return 0;  }  static noinline void init_btrfs_i(struct inode *inode) @@ -3225,15 +3430,41 @@ struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,  	return inode;  } +static struct inode *new_simple_dir(struct super_block *s, +				    struct btrfs_key *key, +				    struct btrfs_root *root) +{ +	struct inode *inode = new_inode(s); + +	if (!inode) +		return ERR_PTR(-ENOMEM); + +	init_btrfs_i(inode); + +	BTRFS_I(inode)->root = root; +	memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); +	BTRFS_I(inode)->dummy_inode = 1; + +	inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; +	inode->i_op = &simple_dir_inode_operations; +	inode->i_fop = &simple_dir_operations; +	inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; +	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + +	return inode; +} +  struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)  {  	struct inode *inode; -	struct btrfs_inode *bi = BTRFS_I(dir); -	struct btrfs_root *root = bi->root; +	struct btrfs_root *root = BTRFS_I(dir)->root;  	struct btrfs_root *sub_root = root;  	struct btrfs_key location; +	int index;  	int ret; +	dentry->d_op = &btrfs_dentry_operations; +  	if (dentry->d_name.len > BTRFS_NAME_LEN)  		return ERR_PTR(-ENAMETOOLONG); @@ -3242,29 +3473,50 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)  	if (ret < 0)  		return ERR_PTR(ret); -	inode = NULL; -	if (location.objectid) { -		ret = fixup_tree_root_location(root, &location, &sub_root, -						dentry); -		if (ret < 0) -			return ERR_PTR(ret); -		if (ret > 0) -			return ERR_PTR(-ENOENT); +	if (location.objectid == 0) +		return NULL; + +	if (location.type == BTRFS_INODE_ITEM_KEY) { +		inode = btrfs_iget(dir->i_sb, &location, root); +		return inode; +	} + +	BUG_ON(location.type != BTRFS_ROOT_ITEM_KEY); + +	index = srcu_read_lock(&root->fs_info->subvol_srcu); +	ret = fixup_tree_root_location(root, dir, dentry, +				       &location, &sub_root); +	if (ret < 0) { +		if (ret != -ENOENT) +			inode = ERR_PTR(ret); +		else +			inode = new_simple_dir(dir->i_sb, &location, sub_root); +	} else {  		inode = btrfs_iget(dir->i_sb, &location, sub_root); -		if (IS_ERR(inode)) -			return ERR_CAST(inode);  	} +	srcu_read_unlock(&root->fs_info->subvol_srcu, index); +  	return inode;  } +static int btrfs_dentry_delete(struct dentry *dentry) +{ +	struct btrfs_root *root; + +	if (!dentry->d_inode) +		return 0; + +	root = BTRFS_I(dentry->d_inode)->root; +	if (btrfs_root_refs(&root->root_item) == 0) +		return 1; +	return 0; +} +  static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,  				   struct nameidata *nd)  {  	struct inode *inode; -	if (dentry->d_name.len > BTRFS_NAME_LEN) -		return ERR_PTR(-ENAMETOOLONG); -  	inode = btrfs_lookup_dentry(dir, dentry);  	if (IS_ERR(inode))  		return ERR_CAST(inode); @@ -3603,9 +3855,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,  	if (ret != 0)  		goto fail; -	if (objectid > root->highest_inode) -		root->highest_inode = objectid; -  	inode->i_uid = current_fsuid();  	if (dir && (dir->i_mode & S_ISGID)) { @@ -3673,26 +3922,35 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,  		   struct inode *parent_inode, struct inode *inode,  		   const char *name, int name_len, int add_backref, u64 index)  { -	int ret; +	int ret = 0;  	struct btrfs_key key;  	struct btrfs_root *root = BTRFS_I(parent_inode)->root; -	key.objectid = inode->i_ino; -	btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); -	key.offset = 0; +	if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +		memcpy(&key, &BTRFS_I(inode)->root->root_key, sizeof(key)); +	} else { +		key.objectid = inode->i_ino; +		btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY); +		key.offset = 0; +	} + +	if (unlikely(inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +		ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, +					 key.objectid, root->root_key.objectid, +					 parent_inode->i_ino, +					 index, name, name_len); +	} else if (add_backref) { +		ret = btrfs_insert_inode_ref(trans, root, +					     name, name_len, inode->i_ino, +					     parent_inode->i_ino, index); +	} -	ret = btrfs_insert_dir_item(trans, root, name, name_len, -				    parent_inode->i_ino, -				    &key, btrfs_inode_type(inode), -				    index);  	if (ret == 0) { -		if (add_backref) { -			ret = btrfs_insert_inode_ref(trans, root, -						     name, name_len, -						     inode->i_ino, -						     parent_inode->i_ino, -						     index); -		} +		ret = btrfs_insert_dir_item(trans, root, name, name_len, +					    parent_inode->i_ino, &key, +					    btrfs_inode_type(inode), index); +		BUG_ON(ret); +  		btrfs_i_size_write(parent_inode, parent_inode->i_size +  				   name_len * 2);  		parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME; @@ -3875,18 +4133,16 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,  	err = btrfs_add_nondir(trans, dentry, inode, 1, index); -	if (err) -		drop_inode = 1; - -	btrfs_update_inode_block_group(trans, dir); -	err = btrfs_update_inode(trans, root, inode); - -	if (err) +	if (err) {  		drop_inode = 1; +	} else { +		btrfs_update_inode_block_group(trans, dir); +		err = btrfs_update_inode(trans, root, inode); +		BUG_ON(err); +		btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); +	}  	nr = trans->blocks_used; - -	btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);  	btrfs_end_transaction_throttle(trans, root);  fail:  	if (drop_inode) { @@ -4064,11 +4320,11 @@ struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,  	int compressed;  again: -	spin_lock(&em_tree->lock); +	read_lock(&em_tree->lock);  	em = lookup_extent_mapping(em_tree, start, len);  	if (em)  		em->bdev = root->fs_info->fs_devices->latest_bdev; -	spin_unlock(&em_tree->lock); +	read_unlock(&em_tree->lock);  	if (em) {  		if (em->start > start || em->start + em->len <= start) @@ -4215,6 +4471,11 @@ again:  				map = kmap(page);  				read_extent_buffer(leaf, map + pg_offset, ptr,  						   copy_size); +				if (pg_offset + copy_size < PAGE_CACHE_SIZE) { +					memset(map + pg_offset + copy_size, 0, +					       PAGE_CACHE_SIZE - pg_offset - +					       copy_size); +				}  				kunmap(page);  			}  			flush_dcache_page(page); @@ -4259,7 +4520,7 @@ insert:  	}  	err = 0; -	spin_lock(&em_tree->lock); +	write_lock(&em_tree->lock);  	ret = add_extent_mapping(em_tree, em);  	/* it is possible that someone inserted the extent into the tree  	 * while we had the lock dropped.  It is also possible that @@ -4299,7 +4560,7 @@ insert:  			err = 0;  		}  	} -	spin_unlock(&em_tree->lock); +	write_unlock(&em_tree->lock);  out:  	if (path)  		btrfs_free_path(path); @@ -4398,13 +4659,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)  	u64 page_start = page_offset(page);  	u64 page_end = page_start + PAGE_CACHE_SIZE - 1; + +	/* +	 * we have the page locked, so new writeback can't start, +	 * and the dirty bit won't be cleared while we are here. +	 * +	 * Wait for IO on this page so that we can safely clear +	 * the PagePrivate2 bit and do ordered accounting +	 */  	wait_on_page_writeback(page); +  	tree = &BTRFS_I(page->mapping->host)->io_tree;  	if (offset) {  		btrfs_releasepage(page, GFP_NOFS);  		return;  	} -  	lock_extent(tree, page_start, page_end, GFP_NOFS);  	ordered = btrfs_lookup_ordered_extent(page->mapping->host,  					   page_offset(page)); @@ -4415,16 +4684,21 @@ static void btrfs_invalidatepage(struct page *page, unsigned long offset)  		 */  		clear_extent_bit(tree, page_start, page_end,  				 EXTENT_DIRTY | EXTENT_DELALLOC | -				 EXTENT_LOCKED, 1, 0, GFP_NOFS); -		btrfs_finish_ordered_io(page->mapping->host, -					page_start, page_end); +				 EXTENT_LOCKED, 1, 0, NULL, GFP_NOFS); +		/* +		 * whoever cleared the private bit is responsible +		 * for the finish_ordered_io +		 */ +		if (TestClearPagePrivate2(page)) { +			btrfs_finish_ordered_io(page->mapping->host, +						page_start, page_end); +		}  		btrfs_put_ordered_extent(ordered);  		lock_extent(tree, page_start, page_end, GFP_NOFS);  	}  	clear_extent_bit(tree, page_start, page_end, -		 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC | -		 EXTENT_ORDERED, -		 1, 1, GFP_NOFS); +		 EXTENT_LOCKED | EXTENT_DIRTY | EXTENT_DELALLOC, +		 1, 1, NULL, GFP_NOFS);  	__btrfs_releasepage(page, GFP_NOFS);  	ClearPageChecked(page); @@ -4521,11 +4795,14 @@ again:  	}  	ClearPageChecked(page);  	set_page_dirty(page); +	SetPageUptodate(page);  	BTRFS_I(inode)->last_trans = root->fs_info->generation + 1;  	unlock_extent(io_tree, page_start, page_end, GFP_NOFS);  out_unlock: +	if (!ret) +		return VM_FAULT_LOCKED;  	unlock_page(page);  out:  	return ret; @@ -4594,11 +4871,11 @@ out:   * create a new subvolume directory/inode (helper for the ioctl).   */  int btrfs_create_subvol_root(struct btrfs_trans_handle *trans, -			     struct btrfs_root *new_root, struct dentry *dentry, +			     struct btrfs_root *new_root,  			     u64 new_dirid, u64 alloc_hint)  {  	struct inode *inode; -	int error; +	int err;  	u64 index = 0;  	inode = btrfs_new_inode(trans, new_root, NULL, "..", 2, new_dirid, @@ -4611,11 +4888,10 @@ int btrfs_create_subvol_root(struct btrfs_trans_handle *trans,  	inode->i_nlink = 1;  	btrfs_i_size_write(inode, 0); -	error = btrfs_update_inode(trans, new_root, inode); -	if (error) -		return error; +	err = btrfs_update_inode(trans, new_root, inode); +	BUG_ON(err); -	d_instantiate(dentry, inode); +	iput(inode);  	return 0;  } @@ -4693,6 +4969,16 @@ void btrfs_destroy_inode(struct inode *inode)  	kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));  } +void btrfs_drop_inode(struct inode *inode) +{ +	struct btrfs_root *root = BTRFS_I(inode)->root; + +	if (inode->i_nlink > 0 && btrfs_root_refs(&root->root_item) == 0) +		generic_delete_inode(inode); +	else +		generic_drop_inode(inode); +} +  static void init_once(void *foo)  {  	struct btrfs_inode *ei = (struct btrfs_inode *) foo; @@ -4761,31 +5047,32 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  {  	struct btrfs_trans_handle *trans;  	struct btrfs_root *root = BTRFS_I(old_dir)->root; +	struct btrfs_root *dest = BTRFS_I(new_dir)->root;  	struct inode *new_inode = new_dentry->d_inode;  	struct inode *old_inode = old_dentry->d_inode;  	struct timespec ctime = CURRENT_TIME;  	u64 index = 0; +	u64 root_objectid;  	int ret; -	/* we're not allowed to rename between subvolumes */ -	if (BTRFS_I(old_inode)->root->root_key.objectid != -	    BTRFS_I(new_dir)->root->root_key.objectid) +	if (new_dir->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) +		return -EPERM; + +	/* we only allow rename subvolume link between subvolumes */ +	if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID && root != dest)  		return -EXDEV; -	if (S_ISDIR(old_inode->i_mode) && new_inode && -	    new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) { +	if (old_inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID || +	    (new_inode && new_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID))  		return -ENOTEMPTY; -	} -	/* to rename a snapshot or subvolume, we need to juggle the -	 * backrefs.  This isn't coded yet -	 */ -	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) -		return -EXDEV; +	if (S_ISDIR(old_inode->i_mode) && new_inode && +	    new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) +		return -ENOTEMPTY;  	ret = btrfs_check_metadata_free_space(root);  	if (ret) -		goto out_unlock; +		return ret;  	/*  	 * we're using rename to replace one file with another. @@ -4796,8 +5083,40 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	    old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)  		filemap_flush(old_inode->i_mapping); +	/* close the racy window with snapshot create/destroy ioctl */ +	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) +		down_read(&root->fs_info->subvol_sem); +  	trans = btrfs_start_transaction(root, 1); +	btrfs_set_trans_block_group(trans, new_dir); + +	if (dest != root) +		btrfs_record_root_in_trans(trans, dest); +	ret = btrfs_set_inode_index(new_dir, &index); +	if (ret) +		goto out_fail; + +	if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +		/* force full log commit if subvolume involved. */ +		root->fs_info->last_trans_log_full_commit = trans->transid; +	} else { +		ret = btrfs_insert_inode_ref(trans, dest, +					     new_dentry->d_name.name, +					     new_dentry->d_name.len, +					     old_inode->i_ino, +					     new_dir->i_ino, index); +		if (ret) +			goto out_fail; +		/* +		 * this is an ugly little race, but the rename is required +		 * to make sure that if we crash, the inode is either at the +		 * old name or the new one.  pinning the log transaction lets +		 * us make sure we don't allow a log commit to come in after +		 * we unlink the name but before we add the new name back in. +		 */ +		btrfs_pin_log_trans(root); +	}  	/*  	 * make sure the inode gets flushed if it is replacing  	 * something. @@ -4807,18 +5126,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  		btrfs_add_ordered_operation(trans, root, old_inode);  	} -	/* -	 * this is an ugly little race, but the rename is required to make -	 * sure that if we crash, the inode is either at the old name -	 * or the new one.  pinning the log transaction lets us make sure -	 * we don't allow a log commit to come in after we unlink the -	 * name but before we add the new name back in. -	 */ -	btrfs_pin_log_trans(root); - -	btrfs_set_trans_block_group(trans, new_dir); - -	btrfs_inc_nlink(old_dentry->d_inode);  	old_dir->i_ctime = old_dir->i_mtime = ctime;  	new_dir->i_ctime = new_dir->i_mtime = ctime;  	old_inode->i_ctime = ctime; @@ -4826,47 +5133,58 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,  	if (old_dentry->d_parent != new_dentry->d_parent)  		btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); -	ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, -				 old_dentry->d_name.name, -				 old_dentry->d_name.len); -	if (ret) -		goto out_fail; +	if (unlikely(old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID)) { +		root_objectid = BTRFS_I(old_inode)->root->root_key.objectid; +		ret = btrfs_unlink_subvol(trans, root, old_dir, root_objectid, +					old_dentry->d_name.name, +					old_dentry->d_name.len); +	} else { +		btrfs_inc_nlink(old_dentry->d_inode); +		ret = btrfs_unlink_inode(trans, root, old_dir, +					 old_dentry->d_inode, +					 old_dentry->d_name.name, +					 old_dentry->d_name.len); +	} +	BUG_ON(ret);  	if (new_inode) {  		new_inode->i_ctime = CURRENT_TIME; -		ret = btrfs_unlink_inode(trans, root, new_dir, -					 new_dentry->d_inode, -					 new_dentry->d_name.name, -					 new_dentry->d_name.len); -		if (ret) -			goto out_fail; +		if (unlikely(new_inode->i_ino == +			     BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) { +			root_objectid = BTRFS_I(new_inode)->location.objectid; +			ret = btrfs_unlink_subvol(trans, dest, new_dir, +						root_objectid, +						new_dentry->d_name.name, +						new_dentry->d_name.len); +			BUG_ON(new_inode->i_nlink == 0); +		} else { +			ret = btrfs_unlink_inode(trans, dest, new_dir, +						 new_dentry->d_inode, +						 new_dentry->d_name.name, +						 new_dentry->d_name.len); +		} +		BUG_ON(ret);  		if (new_inode->i_nlink == 0) {  			ret = btrfs_orphan_add(trans, new_dentry->d_inode); -			if (ret) -				goto out_fail; +			BUG_ON(ret);  		} -  	} -	ret = btrfs_set_inode_index(new_dir, &index); -	if (ret) -		goto out_fail; -	ret = btrfs_add_link(trans, new_dentry->d_parent->d_inode, -			     old_inode, new_dentry->d_name.name, -			     new_dentry->d_name.len, 1, index); -	if (ret) -		goto out_fail; +	ret = btrfs_add_link(trans, new_dir, old_inode, +			     new_dentry->d_name.name, +			     new_dentry->d_name.len, 0, index); +	BUG_ON(ret); -	btrfs_log_new_name(trans, old_inode, old_dir, -				       new_dentry->d_parent); +	if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) { +		btrfs_log_new_name(trans, old_inode, old_dir, +				   new_dentry->d_parent); +		btrfs_end_log_trans(root); +	}  out_fail: - -	/* this btrfs_end_log_trans just allows the current -	 * log-sub transaction to complete -	 */ -	btrfs_end_log_trans(root);  	btrfs_end_transaction_throttle(trans, root); -out_unlock: + +	if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) +		up_read(&root->fs_info->subvol_sem);  	return ret;  } @@ -5058,6 +5376,8 @@ static int prealloc_file_range(struct btrfs_trans_handle *trans,  						  0, 0, 0,  						  BTRFS_FILE_EXTENT_PREALLOC);  		BUG_ON(ret); +		btrfs_drop_extent_cache(inode, cur_offset, +					cur_offset + ins.offset -1, 0);  		num_bytes -= ins.offset;  		cur_offset += ins.offset;  		alloc_hint = ins.objectid + ins.offset; @@ -5201,7 +5521,7 @@ static int btrfs_permission(struct inode *inode, int mask)  	return generic_permission(inode, mask, btrfs_check_acl);  } -static struct inode_operations btrfs_dir_inode_operations = { +static const struct inode_operations btrfs_dir_inode_operations = {  	.getattr	= btrfs_getattr,  	.lookup		= btrfs_lookup,  	.create		= btrfs_create, @@ -5219,10 +5539,11 @@ static struct inode_operations btrfs_dir_inode_operations = {  	.removexattr	= btrfs_removexattr,  	.permission	= btrfs_permission,  }; -static struct inode_operations btrfs_dir_ro_inode_operations = { +static const struct inode_operations btrfs_dir_ro_inode_operations = {  	.lookup		= btrfs_lookup,  	.permission	= btrfs_permission,  }; +  static struct file_operations btrfs_dir_file_operations = {  	.llseek		= generic_file_llseek,  	.read		= generic_read_dir, @@ -5259,7 +5580,7 @@ static struct extent_io_ops btrfs_extent_io_ops = {   *   * For now we're avoiding this by dropping bmap.   */ -static struct address_space_operations btrfs_aops = { +static const struct address_space_operations btrfs_aops = {  	.readpage	= btrfs_readpage,  	.writepage	= btrfs_writepage,  	.writepages	= btrfs_writepages, @@ -5269,16 +5590,17 @@ static struct address_space_operations btrfs_aops = {  	.invalidatepage = btrfs_invalidatepage,  	.releasepage	= btrfs_releasepage,  	.set_page_dirty	= btrfs_set_page_dirty, +	.error_remove_page = generic_error_remove_page,  }; -static struct address_space_operations btrfs_symlink_aops = { +static const struct address_space_operations btrfs_symlink_aops = {  	.readpage	= btrfs_readpage,  	.writepage	= btrfs_writepage,  	.invalidatepage = btrfs_invalidatepage,  	.releasepage	= btrfs_releasepage,  }; -static struct inode_operations btrfs_file_inode_operations = { +static const struct inode_operations btrfs_file_inode_operations = {  	.truncate	= btrfs_truncate,  	.getattr	= btrfs_getattr,  	.setattr	= btrfs_setattr, @@ -5290,7 +5612,7 @@ static struct inode_operations btrfs_file_inode_operations = {  	.fallocate	= btrfs_fallocate,  	.fiemap		= btrfs_fiemap,  }; -static struct inode_operations btrfs_special_inode_operations = { +static const struct inode_operations btrfs_special_inode_operations = {  	.getattr	= btrfs_getattr,  	.setattr	= btrfs_setattr,  	.permission	= btrfs_permission, @@ -5299,7 +5621,7 @@ static struct inode_operations btrfs_special_inode_operations = {  	.listxattr	= btrfs_listxattr,  	.removexattr	= btrfs_removexattr,  }; -static struct inode_operations btrfs_symlink_inode_operations = { +static const struct inode_operations btrfs_symlink_inode_operations = {  	.readlink	= generic_readlink,  	.follow_link	= page_follow_link_light,  	.put_link	= page_put_link, @@ -5309,3 +5631,7 @@ static struct inode_operations btrfs_symlink_inode_operations = {  	.listxattr	= btrfs_listxattr,  	.removexattr	= btrfs_removexattr,  }; + +struct dentry_operations btrfs_dentry_operations = { +	.d_delete	= btrfs_dentry_delete, +};  | 
