diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
| -rw-r--r-- | fs/xfs/xfs_file.c | 268 | 
1 files changed, 128 insertions, 140 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4c749ab543d..1f66779d7a4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -17,25 +17,27 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_trans.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h"  #include "xfs_error.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_ioctl.h"  #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_dinode.h"  #include <linux/aio.h>  #include <linux/dcache.h> @@ -153,7 +155,7 @@ xfs_dir_fsync(  	if (!lsn)  		return 0; -	return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); +	return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);  }  STATIC int @@ -227,39 +229,33 @@ xfs_file_fsync(  }  STATIC ssize_t -xfs_file_aio_read( +xfs_file_read_iter(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos) +	struct iov_iter		*to)  {  	struct file		*file = iocb->ki_filp;  	struct inode		*inode = file->f_mapping->host;  	struct xfs_inode	*ip = XFS_I(inode);  	struct xfs_mount	*mp = ip->i_mount; -	size_t			size = 0; +	size_t			size = iov_iter_count(to);  	ssize_t			ret = 0;  	int			ioflags = 0;  	xfs_fsize_t		n; +	loff_t			pos = iocb->ki_pos;  	XFS_STATS_INC(xs_read_calls); -	BUG_ON(iocb->ki_pos != pos); -  	if (unlikely(file->f_flags & O_DIRECT))  		ioflags |= IO_ISDIRECT;  	if (file->f_mode & FMODE_NOCMTIME)  		ioflags |= IO_INVIS; -	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); -	if (ret < 0) -		return ret; -  	if (unlikely(ioflags & IO_ISDIRECT)) {  		xfs_buftarg_t	*target =  			XFS_IS_REALTIME_INODE(ip) ?  				mp->m_rtdev_targp : mp->m_ddev_targp; -		if ((pos & target->bt_smask) || (size & target->bt_smask)) { +		/* DIO must be aligned to device logical sector size */ +		if ((pos | size) & target->bt_logical_sectormask) {  			if (pos == i_size_read(inode))  				return 0;  			return -XFS_ERROR(EINVAL); @@ -292,7 +288,7 @@ xfs_file_aio_read(  		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);  		if (inode->i_mapping->nrpages) { -			ret = -filemap_write_and_wait_range( +			ret = filemap_write_and_wait_range(  							VFS_I(ip)->i_mapping,  							pos, -1);  			if (ret) { @@ -306,7 +302,7 @@ xfs_file_aio_read(  	trace_xfs_file_read(ip, size, pos, ioflags); -	ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); +	ret = generic_file_read_iter(iocb, to);  	if (ret > 0)  		XFS_STATS_ADD(xs_read_bytes, ret); @@ -347,47 +343,6 @@ xfs_file_splice_read(  }  /* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( -	struct pipe_inode_info	*pipe, -	struct file		*outfilp, -	loff_t			*ppos, -	size_t			count, -	unsigned int		flags) -{ -	struct inode		*inode = outfilp->f_mapping->host; -	struct xfs_inode	*ip = XFS_I(inode); -	int			ioflags = 0; -	ssize_t			ret; - -	XFS_STATS_INC(xs_write_calls); - -	if (outfilp->f_mode & FMODE_NOCMTIME) -		ioflags |= IO_INVIS; - -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) -		return -EIO; - -	xfs_ilock(ip, XFS_IOLOCK_EXCL); - -	trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - -	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); -	if (ret > 0) -		XFS_STATS_ADD(xs_write_bytes, ret); - -	xfs_iunlock(ip, XFS_IOLOCK_EXCL); -	return ret; -} - -/*   * This routine is called to handle zeroing any space in the last block of the   * file that is beyond the EOF.  We do this since the size is being increased   * without writing anything to that block and we don't want to read the @@ -622,10 +577,7 @@ restart:  STATIC ssize_t  xfs_file_dio_aio_write(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos, -	size_t			ocount) +	struct iov_iter		*from)  {  	struct file		*file = iocb->ki_filp;  	struct address_space	*mapping = file->f_mapping; @@ -633,15 +585,18 @@ xfs_file_dio_aio_write(  	struct xfs_inode	*ip = XFS_I(inode);  	struct xfs_mount	*mp = ip->i_mount;  	ssize_t			ret = 0; -	size_t			count = ocount;  	int			unaligned_io = 0;  	int			iolock; +	size_t			count = iov_iter_count(from); +	loff_t			pos = iocb->ki_pos;  	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?  					mp->m_rtdev_targp : mp->m_ddev_targp; -	if ((pos & target->bt_smask) || (count & target->bt_smask)) +	/* DIO must be aligned to device logical sector size */ +	if ((pos | count) & target->bt_logical_sectormask)  		return -XFS_ERROR(EINVAL); +	/* "unaligned" here means not aligned to a filesystem block */  	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))  		unaligned_io = 1; @@ -672,9 +627,10 @@ xfs_file_dio_aio_write(  	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);  	if (ret)  		goto out; +	iov_iter_truncate(from, count);  	if (mapping->nrpages) { -		ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, +		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,  						    pos, -1);  		if (ret)  			goto out; @@ -693,8 +649,7 @@ xfs_file_dio_aio_write(  	}  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); -	ret = generic_file_direct_write(iocb, iovp, -			&nr_segs, pos, &iocb->ki_pos, count, ocount); +	ret = generic_file_direct_write(iocb, from, pos);  out:  	xfs_rw_iunlock(ip, iolock); @@ -707,10 +662,7 @@ out:  STATIC ssize_t  xfs_file_buffered_aio_write(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos, -	size_t			ocount) +	struct iov_iter		*from)  {  	struct file		*file = iocb->ki_filp;  	struct address_space	*mapping = file->f_mapping; @@ -719,7 +671,8 @@ xfs_file_buffered_aio_write(  	ssize_t			ret;  	int			enospc = 0;  	int			iolock = XFS_IOLOCK_EXCL; -	size_t			count = ocount; +	loff_t			pos = iocb->ki_pos; +	size_t			count = iov_iter_count(from);  	xfs_rw_ilock(ip, iolock); @@ -727,14 +680,15 @@ xfs_file_buffered_aio_write(  	if (ret)  		goto out; +	iov_iter_truncate(from, count);  	/* We can write back this queue in page reclaim */  	current->backing_dev_info = mapping->backing_dev_info;  write_retry:  	trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); -	ret = generic_file_buffered_write(iocb, iovp, nr_segs, -			pos, &iocb->ki_pos, count, 0); - +	ret = generic_perform_write(file, from, pos); +	if (likely(ret >= 0)) +		iocb->ki_pos = pos + ret;  	/*  	 * If we just got an ENOSPC, try to write back all dirty inodes to  	 * convert delalloc space to free up some of the excess reserved @@ -753,40 +707,29 @@ out:  }  STATIC ssize_t -xfs_file_aio_write( +xfs_file_write_iter(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos) +	struct iov_iter		*from)  {  	struct file		*file = iocb->ki_filp;  	struct address_space	*mapping = file->f_mapping;  	struct inode		*inode = mapping->host;  	struct xfs_inode	*ip = XFS_I(inode);  	ssize_t			ret; -	size_t			ocount = 0; +	size_t			ocount = iov_iter_count(from);  	XFS_STATS_INC(xs_write_calls); -	BUG_ON(iocb->ki_pos != pos); - -	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); -	if (ret) -		return ret; -  	if (ocount == 0)  		return 0; -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { -		ret = -EIO; -		goto out; -	} +	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) +		return -EIO;  	if (unlikely(file->f_flags & O_DIRECT)) -		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); +		ret = xfs_file_dio_aio_write(iocb, from);  	else -		ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, -						  ocount); +		ret = xfs_file_buffered_aio_write(iocb, from);  	if (ret > 0) {  		ssize_t err; @@ -794,55 +737,99 @@ xfs_file_aio_write(  		XFS_STATS_ADD(xs_write_bytes, ret);  		/* Handle various SYNC-type writes */ -		err = generic_write_sync(file, pos, ret); +		err = generic_write_sync(file, iocb->ki_pos - ret, ret);  		if (err < 0)  			ret = err;  	} - -out:  	return ret;  }  STATIC long  xfs_file_fallocate( -	struct file	*file, -	int		mode, -	loff_t		offset, -	loff_t		len) +	struct file		*file, +	int			mode, +	loff_t			offset, +	loff_t			len)  { -	struct inode	*inode = file_inode(file); -	long		error; -	loff_t		new_size = 0; -	xfs_flock64_t	bf; -	xfs_inode_t	*ip = XFS_I(inode); -	int		cmd = XFS_IOC_RESVSP; -	int		attr_flags = XFS_ATTR_NOLOCK; +	struct inode		*inode = file_inode(file); +	struct xfs_inode	*ip = XFS_I(inode); +	struct xfs_trans	*tp; +	long			error; +	loff_t			new_size = 0; -	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) +	if (!S_ISREG(inode->i_mode)) +		return -EINVAL; +	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | +		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))  		return -EOPNOTSUPP; -	bf.l_whence = 0; -	bf.l_start = offset; -	bf.l_len = len; -  	xfs_ilock(ip, XFS_IOLOCK_EXCL); +	if (mode & FALLOC_FL_PUNCH_HOLE) { +		error = xfs_free_file_space(ip, offset, len); +		if (error) +			goto out_unlock; +	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) { +		unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + +		if (offset & blksize_mask || len & blksize_mask) { +			error = EINVAL; +			goto out_unlock; +		} + +		/* +		 * There is no need to overlap collapse range with EOF, +		 * in which case it is effectively a truncate operation +		 */ +		if (offset + len >= i_size_read(inode)) { +			error = EINVAL; +			goto out_unlock; +		} + +		new_size = i_size_read(inode) - len; -	if (mode & FALLOC_FL_PUNCH_HOLE) -		cmd = XFS_IOC_UNRESVSP; +		error = xfs_collapse_file_space(ip, offset, len); +		if (error) +			goto out_unlock; +	} else { +		if (!(mode & FALLOC_FL_KEEP_SIZE) && +		    offset + len > i_size_read(inode)) { +			new_size = offset + len; +			error = -inode_newsize_ok(inode, new_size); +			if (error) +				goto out_unlock; +		} -	/* check the new inode size is valid before allocating */ -	if (!(mode & FALLOC_FL_KEEP_SIZE) && -	    offset + len > i_size_read(inode)) { -		new_size = offset + len; -		error = inode_newsize_ok(inode, new_size); +		if (mode & FALLOC_FL_ZERO_RANGE) +			error = xfs_zero_file_space(ip, offset, len); +		else +			error = xfs_alloc_file_space(ip, offset, len, +						     XFS_BMAPI_PREALLOC);  		if (error)  			goto out_unlock;  	} -	if (file->f_flags & O_DSYNC) -		attr_flags |= XFS_ATTR_SYNC; +	tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); +	error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); +	if (error) { +		xfs_trans_cancel(tp, 0); +		goto out_unlock; +	} + +	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +	ip->i_d.di_mode &= ~S_ISUID; +	if (ip->i_d.di_mode & S_IXGRP) +		ip->i_d.di_mode &= ~S_ISGID; + +	if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) +		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + +	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -	error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); +	if (file->f_flags & O_DSYNC) +		xfs_trans_set_sync(tp); +	error = xfs_trans_commit(tp, 0);  	if (error)  		goto out_unlock; @@ -852,12 +839,12 @@ xfs_file_fallocate(  		iattr.ia_valid = ATTR_SIZE;  		iattr.ia_size = new_size; -		error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); +		error = xfs_setattr_size(ip, &iattr);  	}  out_unlock:  	xfs_iunlock(ip, XFS_IOLOCK_EXCL); -	return error; +	return -error;  } @@ -890,9 +877,9 @@ xfs_dir_open(  	 * If there are any blocks, read-ahead block 0 as we're almost  	 * certain to have the next operation be a read there.  	 */ -	mode = xfs_ilock_map_shared(ip); +	mode = xfs_ilock_data_map_shared(ip);  	if (ip->i_d.di_nextents > 0) -		xfs_dir3_data_readahead(NULL, ip, 0, -1); +		xfs_dir3_data_readahead(ip, 0, -1);  	xfs_iunlock(ip, mode);  	return 0;  } @@ -1193,7 +1180,7 @@ xfs_seek_data(  	uint			lock;  	int			error; -	lock = xfs_ilock_map_shared(ip); +	lock = xfs_ilock_data_map_shared(ip);  	isize = i_size_read(inode);  	if (start >= isize) { @@ -1272,7 +1259,7 @@ out:  	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);  out_unlock: -	xfs_iunlock_map_shared(ip, lock); +	xfs_iunlock(ip, lock);  	if (error)  		return -error; @@ -1297,7 +1284,7 @@ xfs_seek_hole(  	if (XFS_FORCED_SHUTDOWN(mp))  		return -XFS_ERROR(EIO); -	lock = xfs_ilock_map_shared(ip); +	lock = xfs_ilock_data_map_shared(ip);  	isize = i_size_read(inode);  	if (start >= isize) { @@ -1380,7 +1367,7 @@ out:  	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);  out_unlock: -	xfs_iunlock_map_shared(ip, lock); +	xfs_iunlock(ip, lock);  	if (error)  		return -error; @@ -1409,12 +1396,12 @@ xfs_file_llseek(  const struct file_operations xfs_file_operations = {  	.llseek		= xfs_file_llseek, -	.read		= do_sync_read, -	.write		= do_sync_write, -	.aio_read	= xfs_file_aio_read, -	.aio_write	= xfs_file_aio_write, +	.read		= new_sync_read, +	.write		= new_sync_write, +	.read_iter	= xfs_file_read_iter, +	.write_iter	= xfs_file_write_iter,  	.splice_read	= xfs_file_splice_read, -	.splice_write	= xfs_file_splice_write, +	.splice_write	= iter_file_splice_write,  	.unlocked_ioctl	= xfs_file_ioctl,  #ifdef CONFIG_COMPAT  	.compat_ioctl	= xfs_file_compat_ioctl, @@ -1440,6 +1427,7 @@ const struct file_operations xfs_dir_file_operations = {  static const struct vm_operations_struct xfs_file_vm_ops = {  	.fault		= filemap_fault, +	.map_pages	= filemap_map_pages,  	.page_mkwrite	= xfs_vm_page_mkwrite,  	.remap_pages	= generic_file_remap_pages,  };  | 
