diff options
Diffstat (limited to 'fs/xfs/xfs_bmap_util.c')
| -rw-r--r-- | fs/xfs/xfs_bmap_util.c | 440 | 
1 files changed, 146 insertions, 294 deletions
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 97f952caea7..64731ef3324 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -18,31 +18,31 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h"  #include "xfs_inode.h"  #include "xfs_btree.h" +#include "xfs_trans.h"  #include "xfs_extfree_item.h"  #include "xfs_alloc.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h"  #include "xfs_rtalloc.h"  #include "xfs_error.h"  #include "xfs_quota.h"  #include "xfs_trans_space.h"  #include "xfs_trace.h"  #include "xfs_icache.h" +#include "xfs_log.h" +#include "xfs_dinode.h"  /* Kernel only BMAP related definitions and functions */ @@ -249,48 +249,6 @@ xfs_bmap_rtalloc(  }  /* - * Stack switching interfaces for allocation - */ -static void -xfs_bmapi_allocate_worker( -	struct work_struct	*work) -{ -	struct xfs_bmalloca	*args = container_of(work, -						struct xfs_bmalloca, work); -	unsigned long		pflags; - -	/* we are in a transaction context here */ -	current_set_flags_nested(&pflags, PF_FSTRANS); - -	args->result = __xfs_bmapi_allocate(args); -	complete(args->done); - -	current_restore_flags_nested(&pflags, PF_FSTRANS); -} - -/* - * Some allocation requests often come in with little stack to work on. Push - * them off to a worker thread so there is lots of stack to use. Otherwise just - * call directly to avoid the context switch overhead here. - */ -int -xfs_bmapi_allocate( -	struct xfs_bmalloca	*args) -{ -	DECLARE_COMPLETION_ONSTACK(done); - -	if (!args->stack_switch) -		return __xfs_bmapi_allocate(args); - - -	args->done = &done; -	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); -	queue_work(xfs_alloc_wq, &args->work); -	wait_for_completion(&done); -	return args->result; -} - -/*   * Check if the endoff is outside the last extent. If so the caller will grow   * the allocation to a stripe unit boundary.  All offsets are considered outside   * the end of file for an empty fork, so 1 is returned in *eof in that case. @@ -617,22 +575,27 @@ xfs_getbmap(  		return XFS_ERROR(ENOMEM);  	xfs_ilock(ip, XFS_IOLOCK_SHARED); -	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { -		if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { +	if (whichfork == XFS_DATA_FORK) { +		if (!(iflags & BMV_IF_DELALLOC) && +		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {  			error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);  			if (error)  				goto out_unlock_iolock; + +			/* +			 * Even after flushing the inode, there can still be +			 * delalloc blocks on the inode beyond EOF due to +			 * speculative preallocation.  These are not removed +			 * until the release function is called or the inode +			 * is inactivated.  Hence we cannot assert here that +			 * ip->i_delayed_blks == 0. +			 */  		} -		/* -		 * even after flushing the inode, there can still be delalloc -		 * blocks on the inode beyond EOF due to speculative -		 * preallocation. These are not removed until the release -		 * function is called or the inode is inactivated. Hence we -		 * cannot assert here that ip->i_delayed_blks == 0. -		 */ -	} -	lock = xfs_ilock_map_shared(ip); +		lock = xfs_ilock_data_map_shared(ip); +	} else { +		lock = xfs_ilock_attr_map_shared(ip); +	}  	/*  	 * Don't let nex be bigger than the number of extents @@ -737,7 +700,7 @@ xfs_getbmap(   out_free_map:  	kmem_free(map);   out_unlock_ilock: -	xfs_iunlock_map_shared(ip, lock); +	xfs_iunlock(ip, lock);   out_unlock_iolock:  	xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -965,32 +928,12 @@ xfs_free_eofblocks(  	return error;  } -/* - * xfs_alloc_file_space() - *      This routine allocates disk space for the given file. - * - *	If alloc_type == 0, this request is for an ALLOCSP type - *	request which will change the file size.  In this case, no - *	DMAPI event will be generated by the call.  A TRUNCATE event - *	will be generated later by xfs_setattr. - * - *	If alloc_type != 0, this request is for a RESVSP type - *	request, and a DMAPI DM_EVENT_WRITE will be generated if the - *	lower block boundary byte address is less than the file's - *	length. - * - * RETURNS: - *       0 on success - *      errno on error - * - */ -STATIC int +int  xfs_alloc_file_space( -	xfs_inode_t		*ip, +	struct xfs_inode	*ip,  	xfs_off_t		offset,  	xfs_off_t		len, -	int			alloc_type, -	int			attr_flags) +	int			alloc_type)  {  	xfs_mount_t		*mp = ip->i_mount;  	xfs_off_t		count; @@ -1188,9 +1131,15 @@ xfs_zero_remaining_bytes(  	xfs_buf_unlock(bp);  	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { +		uint lock_mode; +  		offset_fsb = XFS_B_TO_FSBT(mp, offset);  		nimap = 1; + +		lock_mode = xfs_ilock_data_map_shared(ip);  		error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); +		xfs_iunlock(ip, lock_mode); +  		if (error || nimap < 1)  			break;  		ASSERT(imap.br_blockcount >= 1); @@ -1207,7 +1156,12 @@ xfs_zero_remaining_bytes(  		XFS_BUF_UNWRITE(bp);  		XFS_BUF_READ(bp);  		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); -		xfsbdstrat(mp, bp); + +		if (XFS_FORCED_SHUTDOWN(mp)) { +			error = XFS_ERROR(EIO); +			break; +		} +		xfs_buf_iorequest(bp);  		error = xfs_buf_iowait(bp);  		if (error) {  			xfs_buf_ioerror_alert(bp, @@ -1220,7 +1174,12 @@ xfs_zero_remaining_bytes(  		XFS_BUF_UNDONE(bp);  		XFS_BUF_UNREAD(bp);  		XFS_BUF_WRITE(bp); -		xfsbdstrat(mp, bp); + +		if (XFS_FORCED_SHUTDOWN(mp)) { +			error = XFS_ERROR(EIO); +			break; +		} +		xfs_buf_iorequest(bp);  		error = xfs_buf_iowait(bp);  		if (error) {  			xfs_buf_ioerror_alert(bp, @@ -1232,24 +1191,11 @@ xfs_zero_remaining_bytes(  	return error;  } -/* - * xfs_free_file_space() - *      This routine frees disk space for the given file. - * - *	This routine is only called by xfs_change_file_space - *	for an UNRESVSP type call. - * - * RETURNS: - *       0 on success - *      errno on error - * - */ -STATIC int +int  xfs_free_file_space( -	xfs_inode_t		*ip, +	struct xfs_inode	*ip,  	xfs_off_t		offset, -	xfs_off_t		len, -	int			attr_flags) +	xfs_off_t		len)  {  	int			committed;  	int			done; @@ -1267,7 +1213,6 @@ xfs_free_file_space(  	int			rt;  	xfs_fileoff_t		startoffset_fsb;  	xfs_trans_t		*tp; -	int			need_iolock = 1;  	mp = ip->i_mount; @@ -1284,20 +1229,15 @@ xfs_free_file_space(  	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);  	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); -	if (attr_flags & XFS_ATTR_NOLOCK) -		need_iolock = 0; -	if (need_iolock) { -		xfs_ilock(ip, XFS_IOLOCK_EXCL); -		/* wait for the completion of any pending DIOs */ -		inode_dio_wait(VFS_I(ip)); -	} +	/* wait for the completion of any pending DIOs */ +	inode_dio_wait(VFS_I(ip));  	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);  	ioffset = offset & ~(rounding - 1);  	error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,  					      ioffset, -1);  	if (error) -		goto out_unlock_iolock; +		goto out;  	truncate_pagecache_range(VFS_I(ip), ioffset, -1);  	/* @@ -1311,7 +1251,7 @@ xfs_free_file_space(  		error = xfs_bmapi_read(ip, startoffset_fsb, 1,  					&imap, &nimap, 0);  		if (error) -			goto out_unlock_iolock; +			goto out;  		ASSERT(nimap == 0 || nimap == 1);  		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {  			xfs_daddr_t	block; @@ -1326,7 +1266,7 @@ xfs_free_file_space(  		error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,  					&imap, &nimap, 0);  		if (error) -			goto out_unlock_iolock; +			goto out;  		ASSERT(nimap == 0 || nimap == 1);  		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {  			ASSERT(imap.br_startblock != DELAYSTARTBLOCK); @@ -1366,7 +1306,6 @@ xfs_free_file_space(  		 * the freeing of the space succeeds at ENOSPC.  		 */  		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); -		tp->t_flags |= XFS_TRANS_RESERVE;  		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);  		/* @@ -1412,27 +1351,23 @@ xfs_free_file_space(  		xfs_iunlock(ip, XFS_ILOCK_EXCL);  	} - out_unlock_iolock: -	if (need_iolock) -		xfs_iunlock(ip, XFS_IOLOCK_EXCL); + out:  	return error;   error0:  	xfs_bmap_cancel(&free_list);   error1:  	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); -	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : -		    XFS_ILOCK_EXCL); -	return error; +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	goto out;  } -STATIC int +int  xfs_zero_file_space(  	struct xfs_inode	*ip,  	xfs_off_t		offset, -	xfs_off_t		len, -	int			attr_flags) +	xfs_off_t		len)  {  	struct xfs_mount	*mp = ip->i_mount;  	uint			granularity; @@ -1440,6 +1375,8 @@ xfs_zero_file_space(  	xfs_off_t		end_boundary;  	int			error; +	trace_xfs_zero_file_space(ip); +  	granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);  	/* @@ -1453,26 +1390,32 @@ xfs_zero_file_space(  	ASSERT(start_boundary >= offset);  	ASSERT(end_boundary <= offset + len); -	if (!(attr_flags & XFS_ATTR_NOLOCK)) -		xfs_ilock(ip, XFS_IOLOCK_EXCL); -  	if (start_boundary < end_boundary - 1) { -		/* punch out the page cache over the conversion range */ +		/* +		 * punch out delayed allocation blocks and the page cache over +		 * the conversion range +		 */ +		xfs_ilock(ip, XFS_ILOCK_EXCL); +		error = xfs_bmap_punch_delalloc_range(ip, +				XFS_B_TO_FSBT(mp, start_boundary), +				XFS_B_TO_FSB(mp, end_boundary - start_boundary)); +		xfs_iunlock(ip, XFS_ILOCK_EXCL);  		truncate_pagecache_range(VFS_I(ip), start_boundary,  					 end_boundary - 1); +  		/* convert the blocks */  		error = xfs_alloc_file_space(ip, start_boundary,  					end_boundary - start_boundary - 1, -					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT, -					attr_flags); +					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);  		if (error) -			goto out_unlock; +			goto out;  		/* We've handled the interior of the range, now for the edges */ -		if (start_boundary != offset) +		if (start_boundary != offset) {  			error = xfs_iozero(ip, offset, start_boundary - offset); -		if (error) -			goto out_unlock; +			if (error) +				goto out; +		}  		if (end_boundary != offset + len)  			error = xfs_iozero(ip, end_boundary, @@ -1486,194 +1429,103 @@ xfs_zero_file_space(  		error = xfs_iozero(ip, offset, len);  	} -out_unlock: -	if (!(attr_flags & XFS_ATTR_NOLOCK)) -		xfs_iunlock(ip, XFS_IOLOCK_EXCL); +out:  	return error;  }  /* - * xfs_change_file_space() - *      This routine allocates or frees disk space for the given file. - *      The user specified parameters are checked for alignment and size - *      limitations. - * + * xfs_collapse_file_space() + *	This routine frees disk space and shift extent for the given file. + *	The first thing we do is to free data blocks in the specified range + *	by calling xfs_free_file_space(). It would also sync dirty data + *	and invalidate page cache over the region on which collapse range + *	is working. And Shift extent records to the left to cover a hole.   * RETURNS: - *       0 on success - *      errno on error + *	0 on success + *	errno on error   *   */  int -xfs_change_file_space( -	xfs_inode_t	*ip, -	int		cmd, -	xfs_flock64_t	*bf, -	xfs_off_t	offset, -	int		attr_flags) +xfs_collapse_file_space( +	struct xfs_inode	*ip, +	xfs_off_t		offset, +	xfs_off_t		len)  { -	xfs_mount_t	*mp = ip->i_mount; -	int		clrprealloc; -	int		error; -	xfs_fsize_t	fsize; -	int		setprealloc; -	xfs_off_t	startoffset; -	xfs_trans_t	*tp; -	struct iattr	iattr; - -	if (!S_ISREG(ip->i_d.di_mode)) -		return XFS_ERROR(EINVAL); - -	switch (bf->l_whence) { -	case 0: /*SEEK_SET*/ -		break; -	case 1: /*SEEK_CUR*/ -		bf->l_start += offset; -		break; -	case 2: /*SEEK_END*/ -		bf->l_start += XFS_ISIZE(ip); -		break; -	default: -		return XFS_ERROR(EINVAL); -	} - -	/* -	 * length of <= 0 for resv/unresv/zero is invalid.  length for -	 * alloc/free is ignored completely and we have no idea what userspace -	 * might have set it to, so set it to zero to allow range -	 * checks to pass. -	 */ -	switch (cmd) { -	case XFS_IOC_ZERO_RANGE: -	case XFS_IOC_RESVSP: -	case XFS_IOC_RESVSP64: -	case XFS_IOC_UNRESVSP: -	case XFS_IOC_UNRESVSP64: -		if (bf->l_len <= 0) -			return XFS_ERROR(EINVAL); -		break; -	default: -		bf->l_len = 0; -		break; -	} - -	if (bf->l_start < 0 || -	    bf->l_start > mp->m_super->s_maxbytes || -	    bf->l_start + bf->l_len < 0 || -	    bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) -		return XFS_ERROR(EINVAL); - -	bf->l_whence = 0; +	int			done = 0; +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_trans	*tp; +	int			error; +	xfs_extnum_t		current_ext = 0; +	struct xfs_bmap_free	free_list; +	xfs_fsblock_t		first_block; +	int			committed; +	xfs_fileoff_t		start_fsb; +	xfs_fileoff_t		shift_fsb; -	startoffset = bf->l_start; -	fsize = XFS_ISIZE(ip); +	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); -	setprealloc = clrprealloc = 0; -	switch (cmd) { -	case XFS_IOC_ZERO_RANGE: -		error = xfs_zero_file_space(ip, startoffset, bf->l_len, -						attr_flags); -		if (error) -			return error; -		setprealloc = 1; -		break; +	trace_xfs_collapse_file_space(ip); -	case XFS_IOC_RESVSP: -	case XFS_IOC_RESVSP64: -		error = xfs_alloc_file_space(ip, startoffset, bf->l_len, -						XFS_BMAPI_PREALLOC, attr_flags); -		if (error) -			return error; -		setprealloc = 1; -		break; +	start_fsb = XFS_B_TO_FSB(mp, offset + len); +	shift_fsb = XFS_B_TO_FSB(mp, len); -	case XFS_IOC_UNRESVSP: -	case XFS_IOC_UNRESVSP64: -		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, -								attr_flags))) -			return error; -		break; +	error = xfs_free_file_space(ip, offset, len); +	if (error) +		return error; -	case XFS_IOC_ALLOCSP: -	case XFS_IOC_ALLOCSP64: -	case XFS_IOC_FREESP: -	case XFS_IOC_FREESP64: +	while (!error && !done) { +		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);  		/* -		 * These operations actually do IO when extending the file, but -		 * the allocation is done seperately to the zeroing that is -		 * done. This set of operations need to be serialised against -		 * other IO operations, such as truncate and buffered IO. We -		 * need to take the IOLOCK here to serialise the allocation and -		 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, -		 * truncate, direct IO) from racing against the transient -		 * allocated but not written state we can have here. +		 * We would need to reserve permanent block for transaction. +		 * This will come into picture when after shifting extent into +		 * hole we found that adjacent extents can be merged which +		 * may lead to freeing of a block during record update.  		 */ -		xfs_ilock(ip, XFS_IOLOCK_EXCL); -		if (startoffset > fsize) { -			error = xfs_alloc_file_space(ip, fsize, -					startoffset - fsize, 0, -					attr_flags | XFS_ATTR_NOLOCK); -			if (error) { -				xfs_iunlock(ip, XFS_IOLOCK_EXCL); -				break; -			} +		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, +				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); +		if (error) { +			xfs_trans_cancel(tp, 0); +			break;  		} -		iattr.ia_valid = ATTR_SIZE; -		iattr.ia_size = startoffset; - -		error = xfs_setattr_size(ip, &iattr, -					 attr_flags | XFS_ATTR_NOLOCK); -		xfs_iunlock(ip, XFS_IOLOCK_EXCL); - +		xfs_ilock(ip, XFS_ILOCK_EXCL); +		error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, +				ip->i_gdquot, ip->i_pdquot, +				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, +				XFS_QMOPT_RES_REGBLKS);  		if (error) -			return error; - -		clrprealloc = 1; -		break; - -	default: -		ASSERT(0); -		return XFS_ERROR(EINVAL); -	} - -	/* -	 * update the inode timestamp, mode, and prealloc flag bits -	 */ -	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); -	if (error) { -		xfs_trans_cancel(tp, 0); -		return error; -	} +			goto out; -	xfs_ilock(ip, XFS_ILOCK_EXCL); -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +		xfs_trans_ijoin(tp, ip, 0); -	if ((attr_flags & XFS_ATTR_DMI) == 0) { -		ip->i_d.di_mode &= ~S_ISUID; +		xfs_bmap_init(&free_list, &first_block);  		/* -		 * Note that we don't have to worry about mandatory -		 * file locking being disabled here because we only -		 * clear the S_ISGID bit if the Group execute bit is -		 * on, but if it was on then mandatory locking wouldn't -		 * have been enabled. +		 * We are using the write transaction in which max 2 bmbt +		 * updates are allowed  		 */ -		if (ip->i_d.di_mode & S_IXGRP) -			ip->i_d.di_mode &= ~S_ISGID; +		error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, +					       shift_fsb, ¤t_ext, +					       &first_block, &free_list, +					       XFS_BMAP_MAX_SHIFT_EXTENTS); +		if (error) +			goto out; + +		error = xfs_bmap_finish(&tp, &free_list, &committed); +		if (error) +			goto out; -		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); +		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +		xfs_iunlock(ip, XFS_ILOCK_EXCL);  	} -	if (setprealloc) -		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; -	else if (clrprealloc) -		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; -	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -	if (attr_flags & XFS_ATTR_SYNC) -		xfs_trans_set_sync(tp); -	return xfs_trans_commit(tp, 0); +	return error; + +out: +	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	return error;  }  /*  | 
