diff options
Diffstat (limited to 'fs/ext4/move_extent.c')
| -rw-r--r-- | fs/ext4/move_extent.c | 648 | 
1 files changed, 364 insertions, 284 deletions
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index b9f3e7862f1..2484c7ec6a7 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -17,8 +17,8 @@  #include <linux/quotaops.h>  #include <linux/slab.h>  #include "ext4_jbd2.h" -#include "ext4_extents.h"  #include "ext4.h" +#include "ext4_extents.h"  /**   * get_ext_path - Find an extent path for designated logical block number. @@ -32,16 +32,18 @@   */  static inline int  get_ext_path(struct inode *inode, ext4_lblk_t lblock, -		struct ext4_ext_path **path) +		struct ext4_ext_path **orig_path)  {  	int ret = 0; +	struct ext4_ext_path *path; -	*path = ext4_ext_find_extent(inode, lblock, *path); -	if (IS_ERR(*path)) { -		ret = PTR_ERR(*path); -		*path = NULL; -	} else if ((*path)[ext_depth(inode)].p_ext == NULL) +	path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); +	if (IS_ERR(path)) +		ret = PTR_ERR(path); +	else if (path[ext_depth(inode)].p_ext == NULL)  		ret = -ENODATA; +	else +		*orig_path = path;  	return ret;  } @@ -55,8 +57,8 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,  static void  copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)  { -	if (ext4_ext_is_uninitialized(src)) -		ext4_ext_mark_uninitialized(dest); +	if (ext4_ext_is_unwritten(src)) +		ext4_ext_mark_unwritten(dest);  	else  		dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));  } @@ -74,7 +76,7 @@ copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)   * ext4_ext_path structure refers to the last extent, or a negative error   * value on failure.   */ -static int +int  mext_next_extent(struct inode *inode, struct ext4_ext_path *path,  		      struct ext4_extent **extent)  { @@ -142,66 +144,34 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,  }  /** - * mext_check_null_inode - NULL check for two inodes + * ext4_double_down_write_data_sem - Acquire two inodes' write lock + *                                   of i_data_sem   * - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. + * Acquire write lock of i_data_sem of the two inodes   */ -static int -mext_check_null_inode(struct inode *inode1, struct inode *inode2, -		      const char *function, unsigned int line) +void +ext4_double_down_write_data_sem(struct inode *first, struct inode *second)  { -	int ret = 0; - -	if (inode1 == NULL) { -		__ext4_error(inode2->i_sb, function, line, -			"Both inodes should not be NULL: " -			"inode1 NULL inode2 %lu", inode2->i_ino); -		ret = -EIO; -	} else if (inode2 == NULL) { -		__ext4_error(inode1->i_sb, function, line, -			"Both inodes should not be NULL: " -			"inode1 %lu inode2 NULL", inode1->i_ino); -		ret = -EIO; -	} -	return ret; -} - -/** - * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem - * - * @orig_inode:		original inode structure - * @donor_inode:	donor inode structure - * Acquire write lock of i_data_sem of the two inodes (orig and donor) by - * i_ino order. - */ -static void -double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) -{ -	struct inode *first = orig_inode, *second = donor_inode; +	if (first < second) { +		down_write(&EXT4_I(first)->i_data_sem); +		down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); +	} else { +		down_write(&EXT4_I(second)->i_data_sem); +		down_write_nested(&EXT4_I(first)->i_data_sem, SINGLE_DEPTH_NESTING); -	/* -	 * Use the inode number to provide the stable locking order instead -	 * of its address, because the C language doesn't guarantee you can -	 * compare pointers that don't come from the same array. -	 */ -	if (donor_inode->i_ino < orig_inode->i_ino) { -		first = donor_inode; -		second = orig_inode;  	} - -	down_write(&EXT4_I(first)->i_data_sem); -	down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);  }  /** - * double_up_write_data_sem - Release two inodes' write lock of i_data_sem + * ext4_double_up_write_data_sem - Release two inodes' write lock of i_data_sem   *   * @orig_inode:		original inode structure to be released its lock first   * @donor_inode:	donor inode structure to be released its lock second   * Release write lock of i_data_sem of two inodes (orig and donor).   */ -static void -double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) +void +ext4_double_up_write_data_sem(struct inode *orig_inode, +			      struct inode *donor_inode)  {  	up_write(&EXT4_I(orig_inode)->i_data_sem);  	up_write(&EXT4_I(donor_inode)->i_data_sem); @@ -421,6 +391,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode,  	if (depth) {  		/* Register to journal */ +		BUFFER_TRACE(orig_path->p_bh, "get_write_access");  		ret = ext4_journal_get_write_access(handle, orig_path->p_bh);  		if (ret)  			return ret; @@ -439,18 +410,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode,  		mext_insert_inside_block(o_start, o_end, start_ext, new_ext,  						end_ext, eh, range_to_move); -	if (depth) { -		ret = ext4_handle_dirty_metadata(handle, orig_inode, -						 orig_path->p_bh); -		if (ret) -			return ret; -	} else { -		ret = ext4_mark_inode_dirty(handle, orig_inode); -		if (ret < 0) -			return ret; -	} - -	return 0; +	return ext4_ext_dirty(handle, orig_inode, orig_path);  }  /** @@ -605,9 +565,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,  	diff = donor_off - le32_to_cpu(tmp_dext->ee_block);  	ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); -	tmp_dext->ee_block = -			cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); -	tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); +	le32_add_cpu(&tmp_dext->ee_block, diff); +	le16_add_cpu(&tmp_dext->ee_len, -diff);  	if (max_count < ext4_ext_get_actual_len(tmp_dext))  		tmp_dext->ee_len = cpu_to_le16(max_count); @@ -630,6 +589,44 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,  }  /** + * mext_check_coverage - Check that all extents in range has the same type + * + * @inode:		inode in question + * @from:		block offset of inode + * @count:		block count to be checked + * @unwritten:		extents expected to be unwritten + * @err:		pointer to save error value + * + * Return 1 if all extents in range has expected type, and zero otherwise. + */ +static int +mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, +		    int unwritten, int *err) +{ +	struct ext4_ext_path *path = NULL; +	struct ext4_extent *ext; +	int ret = 0; +	ext4_lblk_t last = from + count; +	while (from < last) { +		*err = get_ext_path(inode, from, &path); +		if (*err) +			goto out; +		ext = path[ext_depth(inode)].p_ext; +		if (unwritten != ext4_ext_is_unwritten(ext)) +			goto out; +		from += ext4_ext_get_actual_len(ext); +		ext4_ext_drop_refs(path); +	} +	ret = 1; +out: +	if (path) { +		ext4_ext_drop_refs(path); +		kfree(path); +	} +	return ret; +} + +/**   * mext_replace_branches - Replace original extents with new extents   *   * @handle:		journal handle @@ -664,8 +661,13 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,  	int replaced_count = 0;  	int dext_alen; -	/* Protect extent trees against block allocations via delalloc */ -	double_down_write_data_sem(orig_inode, donor_inode); +	*err = ext4_es_remove_extent(orig_inode, from, count); +	if (*err) +		goto out; + +	*err = ext4_es_remove_extent(donor_inode, from, count); +	if (*err) +		goto out;  	/* Get the original extent for the block "orig_off" */  	*err = get_ext_path(orig_inode, orig_off, &orig_path); @@ -682,6 +684,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,  	depth = ext_depth(donor_inode);  	dext = donor_path[depth].p_ext; +	if (unlikely(!dext)) +		goto missing_donor_extent;  	tmp_dext = *dext;  	*err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, @@ -692,7 +696,8 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,  	/* Loop for the donor extents */  	while (1) {  		/* The extent for donor must be found. */ -		if (!dext) { +		if (unlikely(!dext)) { +		missing_donor_extent:  			EXT4_ERROR_INODE(donor_inode,  				   "The extent for donor must be found");  			*err = -EIO; @@ -724,6 +729,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,  		donor_off += dext_alen;  		orig_off += dext_alen; +		BUG_ON(replaced_count > count);  		/* Already moved the expected blocks */  		if (replaced_count >= count)  			break; @@ -762,12 +768,123 @@ out:  		kfree(donor_path);  	} -	ext4_ext_invalidate_cache(orig_inode); -	ext4_ext_invalidate_cache(donor_inode); +	return replaced_count; +} -	double_up_write_data_sem(orig_inode, donor_inode); +/** + * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 + * + * @inode1:	the inode structure + * @inode2:	the inode structure + * @index:	page index + * @page:	result page vector + * + * Grab two locked pages for inode's by inode order + */ +static int +mext_page_double_lock(struct inode *inode1, struct inode *inode2, +		      pgoff_t index, struct page *page[2]) +{ +	struct address_space *mapping[2]; +	unsigned fl = AOP_FLAG_NOFS; -	return replaced_count; +	BUG_ON(!inode1 || !inode2); +	if (inode1 < inode2) { +		mapping[0] = inode1->i_mapping; +		mapping[1] = inode2->i_mapping; +	} else { +		mapping[0] = inode2->i_mapping; +		mapping[1] = inode1->i_mapping; +	} + +	page[0] = grab_cache_page_write_begin(mapping[0], index, fl); +	if (!page[0]) +		return -ENOMEM; + +	page[1] = grab_cache_page_write_begin(mapping[1], index, fl); +	if (!page[1]) { +		unlock_page(page[0]); +		page_cache_release(page[0]); +		return -ENOMEM; +	} +	/* +	 * grab_cache_page_write_begin() may not wait on page's writeback if +	 * BDI not demand that. But it is reasonable to be very conservative +	 * here and explicitly wait on page's writeback +	 */ +	wait_on_page_writeback(page[0]); +	wait_on_page_writeback(page[1]); +	if (inode1 > inode2) { +		struct page *tmp; +		tmp = page[0]; +		page[0] = page[1]; +		page[1] = tmp; +	} +	return 0; +} + +/* Force page buffers uptodate w/o dropping page's lock */ +static int +mext_page_mkuptodate(struct page *page, unsigned from, unsigned to) +{ +	struct inode *inode = page->mapping->host; +	sector_t block; +	struct buffer_head *bh, *head, *arr[MAX_BUF_PER_PAGE]; +	unsigned int blocksize, block_start, block_end; +	int i, err,  nr = 0, partial = 0; +	BUG_ON(!PageLocked(page)); +	BUG_ON(PageWriteback(page)); + +	if (PageUptodate(page)) +		return 0; + +	blocksize = 1 << inode->i_blkbits; +	if (!page_has_buffers(page)) +		create_empty_buffers(page, blocksize, 0); + +	head = page_buffers(page); +	block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits); +	for (bh = head, block_start = 0; bh != head || !block_start; +	     block++, block_start = block_end, bh = bh->b_this_page) { +		block_end = block_start + blocksize; +		if (block_end <= from || block_start >= to) { +			if (!buffer_uptodate(bh)) +				partial = 1; +			continue; +		} +		if (buffer_uptodate(bh)) +			continue; +		if (!buffer_mapped(bh)) { +			err = ext4_get_block(inode, block, bh, 0); +			if (err) { +				SetPageError(page); +				return err; +			} +			if (!buffer_mapped(bh)) { +				zero_user(page, block_start, blocksize); +				set_buffer_uptodate(bh); +				continue; +			} +		} +		BUG_ON(nr >= MAX_BUF_PER_PAGE); +		arr[nr++] = bh; +	} +	/* No io required */ +	if (!nr) +		goto out; + +	for (i = 0; i < nr; i++) { +		bh = arr[i]; +		if (!bh_uptodate_or_lock(bh)) { +			err = bh_submit_read(bh); +			if (err) +				return err; +		} +	} +out: +	if (!partial) +		SetPageUptodate(page); +	return 0;  }  /** @@ -778,7 +895,7 @@ out:   * @orig_page_offset:		page index on original file   * @data_offset_in_page:	block index where data swapping starts   * @block_len_in_page:		the number of blocks to be swapped - * @uninit:			orig extent is uninitialized or not + * @unwritten:			orig extent is unwritten or not   * @err:			pointer to save return value   *   * Save the data in original inode blocks and replace original inode extents @@ -789,31 +906,28 @@ out:  static int  move_extent_per_page(struct file *o_filp, struct inode *donor_inode,  		  pgoff_t orig_page_offset, int data_offset_in_page, -		  int block_len_in_page, int uninit, int *err) +		  int block_len_in_page, int unwritten, int *err)  { -	struct inode *orig_inode = o_filp->f_dentry->d_inode; -	struct address_space *mapping = orig_inode->i_mapping; -	struct buffer_head *bh; -	struct page *page = NULL; -	const struct address_space_operations *a_ops = mapping->a_ops; +	struct inode *orig_inode = file_inode(o_filp); +	struct page *pagep[2] = {NULL, NULL};  	handle_t *handle;  	ext4_lblk_t orig_blk_offset; -	long long offs = orig_page_offset << PAGE_CACHE_SHIFT;  	unsigned long blocksize = orig_inode->i_sb->s_blocksize;  	unsigned int w_flags = 0;  	unsigned int tmp_data_size, data_size, replaced_size; -	void *fsdata; -	int i, jblocks; -	int err2 = 0; +	int err2, jblocks, retries = 0;  	int replaced_count = 0; +	int from = data_offset_in_page << orig_inode->i_blkbits;  	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;  	/*  	 * It needs twice the amount of ordinary journal buffers because  	 * inode and donor_inode may change each different metadata blocks.  	 */ +again: +	*err = 0;  	jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; -	handle = ext4_journal_start(orig_inode, jblocks); +	handle = ext4_journal_start(orig_inode, EXT4_HT_MOVE_EXTENTS, jblocks);  	if (IS_ERR(handle)) {  		*err = PTR_ERR(handle);  		return 0; @@ -825,21 +939,6 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,  	orig_blk_offset = orig_page_offset * blocks_per_page +  		data_offset_in_page; -	/* -	 * If orig extent is uninitialized one, -	 * it's not necessary force the page into memory -	 * and then force it to be written out again. -	 * Just swap data blocks between orig and donor. -	 */ -	if (uninit) { -		replaced_count = mext_replace_branches(handle, orig_inode, -						donor_inode, orig_blk_offset, -						block_len_in_page, err); -		goto out2; -	} - -	offs = (long long)orig_blk_offset << orig_inode->i_blkbits; -  	/* Calculate data_size */  	if ((orig_blk_offset + block_len_in_page - 1) ==  	    ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { @@ -859,76 +958,120 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,  	replaced_size = data_size; -	*err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, -				 &page, &fsdata); +	*err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, +				     pagep);  	if (unlikely(*err < 0)) -		goto out; - -	if (!PageUptodate(page)) { -		mapping->a_ops->readpage(o_filp, page); -		lock_page(page); -	} - +		goto stop_journal;  	/* -	 * try_to_release_page() doesn't call releasepage in writeback mode. -	 * We should care about the order of writing to the same file -	 * by multiple move extent processes. -	 * It needs to call wait_on_page_writeback() to wait for the -	 * writeback of the page. +	 * If orig extent was unwritten it can become initialized +	 * at any time after i_data_sem was dropped, in order to +	 * serialize with delalloc we have recheck extent while we +	 * hold page's lock, if it is still the case data copy is not +	 * necessary, just swap data blocks between orig and donor.  	 */ -	if (PageWriteback(page)) -		wait_on_page_writeback(page); +	if (unwritten) { +		ext4_double_down_write_data_sem(orig_inode, donor_inode); +		/* If any of extents in range became initialized we have to +		 * fallback to data copying */ +		unwritten = mext_check_coverage(orig_inode, orig_blk_offset, +						block_len_in_page, 1, err); +		if (*err) +			goto drop_data_sem; -	/* Release old bh and drop refs */ -	try_to_release_page(page, 0); +		unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, +						 block_len_in_page, 1, err); +		if (*err) +			goto drop_data_sem; + +		if (!unwritten) { +			ext4_double_up_write_data_sem(orig_inode, donor_inode); +			goto data_copy; +		} +		if ((page_has_private(pagep[0]) && +		     !try_to_release_page(pagep[0], 0)) || +		    (page_has_private(pagep[1]) && +		     !try_to_release_page(pagep[1], 0))) { +			*err = -EBUSY; +			goto drop_data_sem; +		} +		replaced_count = mext_replace_branches(handle, orig_inode, +						donor_inode, orig_blk_offset, +						block_len_in_page, err); +	drop_data_sem: +		ext4_double_up_write_data_sem(orig_inode, donor_inode); +		goto unlock_pages; +	} +data_copy: +	*err = mext_page_mkuptodate(pagep[0], from, from + replaced_size); +	if (*err) +		goto unlock_pages; + +	/* At this point all buffers in range are uptodate, old mapping layout +	 * is no longer required, try to drop it now. */ +	if ((page_has_private(pagep[0]) && !try_to_release_page(pagep[0], 0)) || +	    (page_has_private(pagep[1]) && !try_to_release_page(pagep[1], 0))) { +		*err = -EBUSY; +		goto unlock_pages; +	}  	replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, -					orig_blk_offset, block_len_in_page, -					&err2); -	if (err2) { +					       orig_blk_offset, +					       block_len_in_page, err); +	if (*err) {  		if (replaced_count) {  			block_len_in_page = replaced_count;  			replaced_size =  				block_len_in_page << orig_inode->i_blkbits;  		} else -			goto out; -	} - -	if (!page_has_buffers(page)) -		create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); - -	bh = page_buffers(page); -	for (i = 0; i < data_offset_in_page; i++) -		bh = bh->b_this_page; - -	for (i = 0; i < block_len_in_page; i++) { -		*err = ext4_get_block(orig_inode, -				(sector_t)(orig_blk_offset + i), bh, 0); -		if (*err < 0) -			goto out; - -		if (bh->b_this_page != NULL) -			bh = bh->b_this_page; +			goto unlock_pages;  	} +	/* Perform all necessary steps similar write_begin()/write_end() +	 * but keeping in mind that i_size will not change */ +	*err = __block_write_begin(pagep[0], from, replaced_size, +				   ext4_get_block); +	if (!*err) +		*err = block_commit_write(pagep[0], from, from + replaced_size); -	*err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, -			       page, fsdata); -	page = NULL; - -out: -	if (unlikely(page)) { -		if (PageLocked(page)) -			unlock_page(page); -		page_cache_release(page); -		ext4_journal_stop(handle); -	} -out2: +	if (unlikely(*err < 0)) +		goto repair_branches; + +	/* Even in case of data=writeback it is reasonable to pin +	 * inode to transaction, to prevent unexpected data loss */ +	*err = ext4_jbd2_file_inode(handle, orig_inode); + +unlock_pages: +	unlock_page(pagep[0]); +	page_cache_release(pagep[0]); +	unlock_page(pagep[1]); +	page_cache_release(pagep[1]); +stop_journal:  	ext4_journal_stop(handle); - -	if (err2) -		*err = err2; - +	/* Buffer was busy because probably is pinned to journal transaction, +	 * force transaction commit may help to free it. */ +	if (*err == -EBUSY && ext4_should_retry_alloc(orig_inode->i_sb, +						      &retries)) +		goto again;  	return replaced_count; + +repair_branches: +	/* +	 * This should never ever happen! +	 * Extents are swapped already, but we are not able to copy data. +	 * Try to swap extents to it's original places +	 */ +	ext4_double_down_write_data_sem(orig_inode, donor_inode); +	replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, +					       orig_blk_offset, +					       block_len_in_page, &err2); +	ext4_double_up_write_data_sem(orig_inode, donor_inode); +	if (replaced_count != block_len_in_page) { +		EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), +				       "Unable to copy data block," +				       " data will be lost."); +		*err = -EIO; +	} +	replaced_count = 0; +	goto unlock_pages;  }  /** @@ -971,14 +1114,6 @@ mext_check_arguments(struct inode *orig_inode,  		return -EINVAL;  	} -	/* Files should be in the same ext4 FS */ -	if (orig_inode->i_sb != donor_inode->i_sb) { -		ext4_debug("ext4 move extent: The argument files " -			"should be in same FS [ino:orig %lu, donor %lu]\n", -			orig_inode->i_ino, donor_inode->i_ino); -		return -EINVAL; -	} -  	/* Ext4 move extent supports only extent based file */  	if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {  		ext4_debug("ext4 move extent: orig file is not extents " @@ -1003,12 +1138,11 @@ mext_check_arguments(struct inode *orig_inode,  		return -EINVAL;  	} -	if ((orig_start > EXT_MAX_BLOCK) || -	    (donor_start > EXT_MAX_BLOCK) || -	    (*len > EXT_MAX_BLOCK) || -	    (orig_start + *len > EXT_MAX_BLOCK))  { +	if ((orig_start >= EXT_MAX_BLOCKS) || +	    (*len > EXT_MAX_BLOCKS) || +	    (orig_start + *len >= EXT_MAX_BLOCKS))  {  		ext4_debug("ext4 move extent: Can't handle over [%u] blocks " -			"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK, +			"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,  			orig_inode->i_ino, donor_inode->i_ino);  		return -EINVAL;  	} @@ -1069,73 +1203,6 @@ mext_check_arguments(struct inode *orig_inode,  }  /** - * mext_inode_double_lock - Lock i_mutex on both @inode1 and @inode2 - * - * @inode1:	the inode structure - * @inode2:	the inode structure - * - * Lock two inodes' i_mutex by i_ino order. - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. - */ -static int -mext_inode_double_lock(struct inode *inode1, struct inode *inode2) -{ -	int ret = 0; - -	BUG_ON(inode1 == NULL && inode2 == NULL); - -	ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); -	if (ret < 0) -		goto out; - -	if (inode1 == inode2) { -		mutex_lock(&inode1->i_mutex); -		goto out; -	} - -	if (inode1->i_ino < inode2->i_ino) { -		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); -		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); -	} else { -		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT); -		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD); -	} - -out: -	return ret; -} - -/** - * mext_inode_double_unlock - Release i_mutex on both @inode1 and @inode2 - * - * @inode1:     the inode that is released first - * @inode2:     the inode that is released second - * - * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0. - */ - -static int -mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) -{ -	int ret = 0; - -	BUG_ON(inode1 == NULL && inode2 == NULL); - -	ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); -	if (ret < 0) -		goto out; - -	if (inode1) -		mutex_unlock(&inode1->i_mutex); - -	if (inode2 && inode2 != inode1) -		mutex_unlock(&inode2->i_mutex); - -out: -	return ret; -} - -/**   * ext4_move_extents - Exchange the specified range of a file   *   * @o_filp:		file structure of the original file @@ -1181,24 +1248,31 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  		 __u64 orig_start, __u64 donor_start, __u64 len,  		 __u64 *moved_len)  { -	struct inode *orig_inode = o_filp->f_dentry->d_inode; -	struct inode *donor_inode = d_filp->f_dentry->d_inode; +	struct inode *orig_inode = file_inode(o_filp); +	struct inode *donor_inode = file_inode(d_filp);  	struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL;  	struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;  	ext4_lblk_t block_start = orig_start;  	ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;  	ext4_lblk_t rest_blocks;  	pgoff_t orig_page_offset = 0, seq_end_page; -	int ret1, ret2, depth, last_extent = 0; +	int ret, depth, last_extent = 0;  	int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;  	int data_offset_in_page;  	int block_len_in_page; -	int uninit; +	int unwritten; -	/* orig and donor should be different file */ -	if (orig_inode->i_ino == donor_inode->i_ino) { +	if (orig_inode->i_sb != donor_inode->i_sb) { +		ext4_debug("ext4 move extent: The argument files " +			"should be in same FS [ino:orig %lu, donor %lu]\n", +			orig_inode->i_ino, donor_inode->i_ino); +		return -EINVAL; +	} + +	/* orig and donor should be different inodes */ +	if (orig_inode == donor_inode) {  		ext4_debug("ext4 move extent: The argument files should not " -			"be same file [ino:orig %lu, donor %lu]\n", +			"be same inode [ino:orig %lu, donor %lu]\n",  			orig_inode->i_ino, donor_inode->i_ino);  		return -EINVAL;  	} @@ -1210,18 +1284,27 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  			orig_inode->i_ino, donor_inode->i_ino);  		return -EINVAL;  	} - +	/* TODO: This is non obvious task to swap blocks for inodes with full +	   jornaling enabled */ +	if (ext4_should_journal_data(orig_inode) || +	    ext4_should_journal_data(donor_inode)) { +		return -EINVAL; +	}  	/* Protect orig and donor inodes against a truncate */ -	ret1 = mext_inode_double_lock(orig_inode, donor_inode); -	if (ret1 < 0) -		return ret1; +	lock_two_nondirectories(orig_inode, donor_inode); + +	/* Wait for all existing dio workers */ +	ext4_inode_block_unlocked_dio(orig_inode); +	ext4_inode_block_unlocked_dio(donor_inode); +	inode_dio_wait(orig_inode); +	inode_dio_wait(donor_inode);  	/* Protect extent tree against block allocations via delalloc */ -	double_down_write_data_sem(orig_inode, donor_inode); +	ext4_double_down_write_data_sem(orig_inode, donor_inode);  	/* Check the filesystem environment whether move_extent can be done */ -	ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, +	ret = mext_check_arguments(orig_inode, donor_inode, orig_start,  				    donor_start, &len); -	if (ret1) +	if (ret)  		goto out;  	file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; @@ -1229,13 +1312,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  	if (file_end < block_end)  		len -= block_end - file_end; -	ret1 = get_ext_path(orig_inode, block_start, &orig_path); -	if (ret1) +	ret = get_ext_path(orig_inode, block_start, &orig_path); +	if (ret)  		goto out;  	/* Get path structure to check the hole */ -	ret1 = get_ext_path(orig_inode, block_start, &holecheck_path); -	if (ret1) +	ret = get_ext_path(orig_inode, block_start, &holecheck_path); +	if (ret)  		goto out;  	depth = ext_depth(orig_inode); @@ -1254,13 +1337,13 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  		last_extent = mext_next_extent(orig_inode,  					holecheck_path, &ext_cur);  		if (last_extent < 0) { -			ret1 = last_extent; +			ret = last_extent;  			goto out;  		}  		last_extent = mext_next_extent(orig_inode, orig_path,  							&ext_dummy);  		if (last_extent < 0) { -			ret1 = last_extent; +			ret = last_extent;  			goto out;  		}  		seq_start = le32_to_cpu(ext_cur->ee_block); @@ -1274,7 +1357,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  	if (le32_to_cpu(ext_cur->ee_block) > block_end) {  		ext4_debug("ext4 move extent: The specified range of file "  							"may be the hole\n"); -		ret1 = -EINVAL; +		ret = -EINVAL;  		goto out;  	} @@ -1294,7 +1377,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  		last_extent = mext_next_extent(orig_inode, holecheck_path,  						&ext_cur);  		if (last_extent < 0) { -			ret1 = last_extent; +			ret = last_extent;  			break;  		}  		add_blocks = ext4_ext_get_actual_len(ext_cur); @@ -1309,8 +1392,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  		    !last_extent)  			continue; -		/* Is original extent is uninitialized */ -		uninit = ext4_ext_is_uninitialized(ext_prev); +		/* Is original extent is unwritten */ +		unwritten = ext4_ext_is_unwritten(ext_prev);  		data_offset_in_page = seq_start % blocks_per_page; @@ -1341,7 +1424,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  		 * b. racing with ->readpage, ->write_begin, and ext4_get_block  		 *    in move_extent_per_page  		 */ -		double_up_write_data_sem(orig_inode, donor_inode); +		ext4_double_up_write_data_sem(orig_inode, donor_inode);  		while (orig_page_offset <= seq_end_page) { @@ -1350,19 +1433,19 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  						o_filp, donor_inode,  						orig_page_offset,  						data_offset_in_page, -						block_len_in_page, uninit, -						&ret1); +						block_len_in_page, +						unwritten, &ret);  			/* Count how many blocks we have exchanged */  			*moved_len += block_len_in_page; -			if (ret1 < 0) +			if (ret < 0)  				break;  			if (*moved_len > len) {  				EXT4_ERROR_INODE(orig_inode,  					"We replaced blocks too much! "  					"sum of replaced: %llu requested: %llu",  					*moved_len, len); -				ret1 = -EIO; +				ret = -EIO;  				break;  			} @@ -1375,23 +1458,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,  				block_len_in_page = rest_blocks;  		} -		double_down_write_data_sem(orig_inode, donor_inode); -		if (ret1 < 0) +		ext4_double_down_write_data_sem(orig_inode, donor_inode); +		if (ret < 0)  			break;  		/* Decrease buffer counter */  		if (holecheck_path)  			ext4_ext_drop_refs(holecheck_path); -		ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path); -		if (ret1) +		ret = get_ext_path(orig_inode, seq_start, &holecheck_path); +		if (ret)  			break;  		depth = holecheck_path->p_depth;  		/* Decrease buffer counter */  		if (orig_path)  			ext4_ext_drop_refs(orig_path); -		ret1 = get_ext_path(orig_inode, seq_start, &orig_path); -		if (ret1) +		ret = get_ext_path(orig_inode, seq_start, &orig_path); +		if (ret)  			break;  		ext_cur = holecheck_path[depth].p_ext; @@ -1413,13 +1496,10 @@ out:  		ext4_ext_drop_refs(holecheck_path);  		kfree(holecheck_path);  	} -	double_up_write_data_sem(orig_inode, donor_inode); -	ret2 = mext_inode_double_unlock(orig_inode, donor_inode); +	ext4_double_up_write_data_sem(orig_inode, donor_inode); +	ext4_inode_resume_unlocked_dio(orig_inode); +	ext4_inode_resume_unlocked_dio(donor_inode); +	unlock_two_nondirectories(orig_inode, donor_inode); -	if (ret1) -		return ret1; -	else if (ret2) -		return ret2; - -	return 0; +	return ret;  }  | 
