diff options
Diffstat (limited to 'fs/xfs/xfs_file.c')
| -rw-r--r-- | fs/xfs/xfs_file.c | 268 |
1 files changed, 128 insertions, 140 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4c749ab543d..1f66779d7a4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -17,25 +17,27 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_trans.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" #include "xfs_bmap.h" #include "xfs_bmap_util.h" #include "xfs_error.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_ioctl.h" #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_dinode.h" #include <linux/aio.h> #include <linux/dcache.h> @@ -153,7 +155,7 @@ xfs_dir_fsync( if (!lsn) return 0; - return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); + return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); } STATIC int @@ -227,39 +229,33 @@ xfs_file_fsync( } STATIC ssize_t -xfs_file_aio_read( +xfs_file_read_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - size_t size = 0; + size_t size = iov_iter_count(to); ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; + loff_t pos = iocb->ki_pos; XFS_STATS_INC(xs_read_calls); - BUG_ON(iocb->ki_pos != pos); - if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); - if (ret < 0) - return ret; - if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (size & target->bt_smask)) { + /* DIO must be aligned to device logical sector size */ + if ((pos | size) & target->bt_logical_sectormask) { if (pos == i_size_read(inode)) return 0; return -XFS_ERROR(EINVAL); @@ -292,7 +288,7 @@ xfs_file_aio_read( xfs_rw_ilock(ip, XFS_IOLOCK_EXCL); if (inode->i_mapping->nrpages) { - ret = -filemap_write_and_wait_range( + ret = filemap_write_and_wait_range( VFS_I(ip)->i_mapping, pos, -1); if (ret) { @@ -306,7 +302,7 @@ xfs_file_aio_read( trace_xfs_file_read(ip, size, pos, ioflags); - ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); + ret = generic_file_read_iter(iocb, to); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -347,47 +343,6 @@ xfs_file_splice_read( } /* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t count, - unsigned int flags) -{ - struct inode *inode = outfilp->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_write_calls); - - if (outfilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_write_bytes, ret); - - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; -} - -/* * This routine is called to handle zeroing any space in the last block of the * file that is beyond the EOF. We do this since the size is being increased * without writing anything to that block and we don't want to read the @@ -622,10 +577,7 @@ restart: STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -633,15 +585,18 @@ xfs_file_dio_aio_write( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - size_t count = ocount; int unaligned_io = 0; int iolock; + size_t count = iov_iter_count(from); + loff_t pos = iocb->ki_pos; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; - if ((pos & target->bt_smask) || (count & target->bt_smask)) + /* DIO must be aligned to device logical sector size */ + if ((pos | count) & target->bt_logical_sectormask) return -XFS_ERROR(EINVAL); + /* "unaligned" here means not aligned to a filesystem block */ if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask)) unaligned_io = 1; @@ -672,9 +627,10 @@ xfs_file_dio_aio_write( ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); if (ret) goto out; + iov_iter_truncate(from, count); if (mapping->nrpages) { - ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, + ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, pos, -1); if (ret) goto out; @@ -693,8 +649,7 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, &iocb->ki_pos, count, ocount); + ret = generic_file_direct_write(iocb, from, pos); out: xfs_rw_iunlock(ip, iolock); @@ -707,10 +662,7 @@ out: STATIC ssize_t xfs_file_buffered_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -719,7 +671,8 @@ xfs_file_buffered_aio_write( ssize_t ret; int enospc = 0; int iolock = XFS_IOLOCK_EXCL; - size_t count = ocount; + loff_t pos = iocb->ki_pos; + size_t count = iov_iter_count(from); xfs_rw_ilock(ip, iolock); @@ -727,14 +680,15 @@ xfs_file_buffered_aio_write( if (ret) goto out; + iov_iter_truncate(from, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_buffered_write(iocb, iovp, nr_segs, - pos, &iocb->ki_pos, count, 0); - + ret = generic_perform_write(file, from, pos); + if (likely(ret >= 0)) + iocb->ki_pos = pos + ret; /* * If we just got an ENOSPC, try to write back all dirty inodes to * convert delalloc space to free up some of the excess reserved @@ -753,40 +707,29 @@ out: } STATIC ssize_t -xfs_file_aio_write( +xfs_file_write_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); ssize_t ret; - size_t ocount = 0; + size_t ocount = iov_iter_count(from); XFS_STATS_INC(xs_write_calls); - BUG_ON(iocb->ki_pos != pos); - - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - if (ocount == 0) return 0; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - ret = -EIO; - goto out; - } + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; if (unlikely(file->f_flags & O_DIRECT)) - ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); + ret = xfs_file_dio_aio_write(iocb, from); else - ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, - ocount); + ret = xfs_file_buffered_aio_write(iocb, from); if (ret > 0) { ssize_t err; @@ -794,55 +737,99 @@ xfs_file_aio_write( XFS_STATS_ADD(xs_write_bytes, ret); /* Handle various SYNC-type writes */ - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } - -out: return ret; } STATIC long xfs_file_fallocate( - struct file *file, - int mode, - loff_t offset, - loff_t len) + struct file *file, + int mode, + loff_t offset, + loff_t len) { - struct inode *inode = file_inode(file); - long error; - loff_t new_size = 0; - xfs_flock64_t bf; - xfs_inode_t *ip = XFS_I(inode); - int cmd = XFS_IOC_RESVSP; - int attr_flags = XFS_ATTR_NOLOCK; + struct inode *inode = file_inode(file); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_trans *tp; + long error; + loff_t new_size = 0; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (!S_ISREG(inode->i_mode)) + return -EINVAL; + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)) return -EOPNOTSUPP; - bf.l_whence = 0; - bf.l_start = offset; - bf.l_len = len; - xfs_ilock(ip, XFS_IOLOCK_EXCL); + if (mode & FALLOC_FL_PUNCH_HOLE) { + error = xfs_free_file_space(ip, offset, len); + if (error) + goto out_unlock; + } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { + unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + + if (offset & blksize_mask || len & blksize_mask) { + error = EINVAL; + goto out_unlock; + } + + /* + * There is no need to overlap collapse range with EOF, + * in which case it is effectively a truncate operation + */ + if (offset + len >= i_size_read(inode)) { + error = EINVAL; + goto out_unlock; + } + + new_size = i_size_read(inode) - len; - if (mode & FALLOC_FL_PUNCH_HOLE) - cmd = XFS_IOC_UNRESVSP; + error = xfs_collapse_file_space(ip, offset, len); + if (error) + goto out_unlock; + } else { + if (!(mode & FALLOC_FL_KEEP_SIZE) && + offset + len > i_size_read(inode)) { + new_size = offset + len; + error = -inode_newsize_ok(inode, new_size); + if (error) + goto out_unlock; + } - /* check the new inode size is valid before allocating */ - if (!(mode & FALLOC_FL_KEEP_SIZE) && - offset + len > i_size_read(inode)) { - new_size = offset + len; - error = inode_newsize_ok(inode, new_size); + if (mode & FALLOC_FL_ZERO_RANGE) + error = xfs_zero_file_space(ip, offset, len); + else + error = xfs_alloc_file_space(ip, offset, len, + XFS_BMAPI_PREALLOC); if (error) goto out_unlock; } - if (file->f_flags & O_DSYNC) - attr_flags |= XFS_ATTR_SYNC; + tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); + error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); + if (error) { + xfs_trans_cancel(tp, 0); + goto out_unlock; + } + + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + ip->i_d.di_mode &= ~S_ISUID; + if (ip->i_d.di_mode & S_IXGRP) + ip->i_d.di_mode &= ~S_ISGID; + + if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) + ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + + xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); + if (file->f_flags & O_DSYNC) + xfs_trans_set_sync(tp); + error = xfs_trans_commit(tp, 0); if (error) goto out_unlock; @@ -852,12 +839,12 @@ xfs_file_fallocate( iattr.ia_valid = ATTR_SIZE; iattr.ia_size = new_size; - error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); + error = xfs_setattr_size(ip, &iattr); } out_unlock: xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return error; + return -error; } @@ -890,9 +877,9 @@ xfs_dir_open( * If there are any blocks, read-ahead block 0 as we're almost * certain to have the next operation be a read there. */ - mode = xfs_ilock_map_shared(ip); + mode = xfs_ilock_data_map_shared(ip); if (ip->i_d.di_nextents > 0) - xfs_dir3_data_readahead(NULL, ip, 0, -1); + xfs_dir3_data_readahead(ip, 0, -1); xfs_iunlock(ip, mode); return 0; } @@ -1193,7 +1180,7 @@ xfs_seek_data( uint lock; int error; - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { @@ -1272,7 +1259,7 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) return -error; @@ -1297,7 +1284,7 @@ xfs_seek_hole( if (XFS_FORCED_SHUTDOWN(mp)) return -XFS_ERROR(EIO); - lock = xfs_ilock_map_shared(ip); + lock = xfs_ilock_data_map_shared(ip); isize = i_size_read(inode); if (start >= isize) { @@ -1380,7 +1367,7 @@ out: offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: - xfs_iunlock_map_shared(ip, lock); + xfs_iunlock(ip, lock); if (error) return -error; @@ -1409,12 +1396,12 @@ xfs_file_llseek( const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read, - .aio_write = xfs_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = xfs_file_read_iter, + .write_iter = xfs_file_write_iter, .splice_read = xfs_file_splice_read, - .splice_write = xfs_file_splice_write, + .splice_write = iter_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, @@ -1440,6 +1427,7 @@ const struct file_operations xfs_dir_file_operations = { static const struct vm_operations_struct xfs_file_vm_ops = { .fault = filemap_fault, + .map_pages = filemap_map_pages, .page_mkwrite = xfs_vm_page_mkwrite, .remap_pages = generic_file_remap_pages, }; |
