aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_file.c')
-rw-r--r--fs/xfs/xfs_file.c268
1 files changed, 128 insertions, 140 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 4c749ab543d..1f66779d7a4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -17,25 +17,27 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_log.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
-#include "xfs_trans.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc.h"
-#include "xfs_dinode.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
#include "xfs_inode.h"
+#include "xfs_trans.h"
#include "xfs_inode_item.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_error.h"
-#include "xfs_da_btree.h"
-#include "xfs_dir2_format.h"
#include "xfs_dir2.h"
#include "xfs_dir2_priv.h"
#include "xfs_ioctl.h"
#include "xfs_trace.h"
+#include "xfs_log.h"
+#include "xfs_dinode.h"
#include <linux/aio.h>
#include <linux/dcache.h>
@@ -153,7 +155,7 @@ xfs_dir_fsync(
if (!lsn)
return 0;
- return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
+ return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);
}
STATIC int
@@ -227,39 +229,33 @@ xfs_file_fsync(
}
STATIC ssize_t
-xfs_file_aio_read(
+xfs_file_read_iter(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos)
+ struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
- size_t size = 0;
+ size_t size = iov_iter_count(to);
ssize_t ret = 0;
int ioflags = 0;
xfs_fsize_t n;
+ loff_t pos = iocb->ki_pos;
XFS_STATS_INC(xs_read_calls);
- BUG_ON(iocb->ki_pos != pos);
-
if (unlikely(file->f_flags & O_DIRECT))
ioflags |= IO_ISDIRECT;
if (file->f_mode & FMODE_NOCMTIME)
ioflags |= IO_INVIS;
- ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE);
- if (ret < 0)
- return ret;
-
if (unlikely(ioflags & IO_ISDIRECT)) {
xfs_buftarg_t *target =
XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((pos & target->bt_smask) || (size & target->bt_smask)) {
+ /* DIO must be aligned to device logical sector size */
+ if ((pos | size) & target->bt_logical_sectormask) {
if (pos == i_size_read(inode))
return 0;
return -XFS_ERROR(EINVAL);
@@ -292,7 +288,7 @@ xfs_file_aio_read(
xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);
if (inode->i_mapping->nrpages) {
- ret = -filemap_write_and_wait_range(
+ ret = filemap_write_and_wait_range(
VFS_I(ip)->i_mapping,
pos, -1);
if (ret) {
@@ -306,7 +302,7 @@ xfs_file_aio_read(
trace_xfs_file_read(ip, size, pos, ioflags);
- ret = generic_file_aio_read(iocb, iovp, nr_segs, pos);
+ ret = generic_file_read_iter(iocb, to);
if (ret > 0)
XFS_STATS_ADD(xs_read_bytes, ret);
@@ -347,47 +343,6 @@ xfs_file_splice_read(
}
/*
- * xfs_file_splice_write() does not use xfs_rw_ilock() because
- * generic_file_splice_write() takes the i_mutex itself. This, in theory,
- * couuld cause lock inversions between the aio_write path and the splice path
- * if someone is doing concurrent splice(2) based writes and write(2) based
- * writes to the same inode. The only real way to fix this is to re-implement
- * the generic code here with correct locking orders.
- */
-STATIC ssize_t
-xfs_file_splice_write(
- struct pipe_inode_info *pipe,
- struct file *outfilp,
- loff_t *ppos,
- size_t count,
- unsigned int flags)
-{
- struct inode *inode = outfilp->f_mapping->host;
- struct xfs_inode *ip = XFS_I(inode);
- int ioflags = 0;
- ssize_t ret;
-
- XFS_STATS_INC(xs_write_calls);
-
- if (outfilp->f_mode & FMODE_NOCMTIME)
- ioflags |= IO_INVIS;
-
- if (XFS_FORCED_SHUTDOWN(ip->i_mount))
- return -EIO;
-
- xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
- trace_xfs_file_splice_write(ip, count, *ppos, ioflags);
-
- ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags);
- if (ret > 0)
- XFS_STATS_ADD(xs_write_bytes, ret);
-
- xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return ret;
-}
-
-/*
* This routine is called to handle zeroing any space in the last block of the
* file that is beyond the EOF. We do this since the size is being increased
* without writing anything to that block and we don't want to read the
@@ -622,10 +577,7 @@ restart:
STATIC ssize_t
xfs_file_dio_aio_write(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos,
- size_t ocount)
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -633,15 +585,18 @@ xfs_file_dio_aio_write(
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
ssize_t ret = 0;
- size_t count = ocount;
int unaligned_io = 0;
int iolock;
+ size_t count = iov_iter_count(from);
+ loff_t pos = iocb->ki_pos;
struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ?
mp->m_rtdev_targp : mp->m_ddev_targp;
- if ((pos & target->bt_smask) || (count & target->bt_smask))
+ /* DIO must be aligned to device logical sector size */
+ if ((pos | count) & target->bt_logical_sectormask)
return -XFS_ERROR(EINVAL);
+ /* "unaligned" here means not aligned to a filesystem block */
if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))
unaligned_io = 1;
@@ -672,9 +627,10 @@ xfs_file_dio_aio_write(
ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);
if (ret)
goto out;
+ iov_iter_truncate(from, count);
if (mapping->nrpages) {
- ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
+ ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
pos, -1);
if (ret)
goto out;
@@ -693,8 +649,7 @@ xfs_file_dio_aio_write(
}
trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0);
- ret = generic_file_direct_write(iocb, iovp,
- &nr_segs, pos, &iocb->ki_pos, count, ocount);
+ ret = generic_file_direct_write(iocb, from, pos);
out:
xfs_rw_iunlock(ip, iolock);
@@ -707,10 +662,7 @@ out:
STATIC ssize_t
xfs_file_buffered_aio_write(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos,
- size_t ocount)
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
@@ -719,7 +671,8 @@ xfs_file_buffered_aio_write(
ssize_t ret;
int enospc = 0;
int iolock = XFS_IOLOCK_EXCL;
- size_t count = ocount;
+ loff_t pos = iocb->ki_pos;
+ size_t count = iov_iter_count(from);
xfs_rw_ilock(ip, iolock);
@@ -727,14 +680,15 @@ xfs_file_buffered_aio_write(
if (ret)
goto out;
+ iov_iter_truncate(from, count);
/* We can write back this queue in page reclaim */
current->backing_dev_info = mapping->backing_dev_info;
write_retry:
trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0);
- ret = generic_file_buffered_write(iocb, iovp, nr_segs,
- pos, &iocb->ki_pos, count, 0);
-
+ ret = generic_perform_write(file, from, pos);
+ if (likely(ret >= 0))
+ iocb->ki_pos = pos + ret;
/*
* If we just got an ENOSPC, try to write back all dirty inodes to
* convert delalloc space to free up some of the excess reserved
@@ -753,40 +707,29 @@ out:
}
STATIC ssize_t
-xfs_file_aio_write(
+xfs_file_write_iter(
struct kiocb *iocb,
- const struct iovec *iovp,
- unsigned long nr_segs,
- loff_t pos)
+ struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
struct xfs_inode *ip = XFS_I(inode);
ssize_t ret;
- size_t ocount = 0;
+ size_t ocount = iov_iter_count(from);
XFS_STATS_INC(xs_write_calls);
- BUG_ON(iocb->ki_pos != pos);
-
- ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ);
- if (ret)
- return ret;
-
if (ocount == 0)
return 0;
- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- ret = -EIO;
- goto out;
- }
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ return -EIO;
if (unlikely(file->f_flags & O_DIRECT))
- ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount);
+ ret = xfs_file_dio_aio_write(iocb, from);
else
- ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos,
- ocount);
+ ret = xfs_file_buffered_aio_write(iocb, from);
if (ret > 0) {
ssize_t err;
@@ -794,55 +737,99 @@ xfs_file_aio_write(
XFS_STATS_ADD(xs_write_bytes, ret);
/* Handle various SYNC-type writes */
- err = generic_write_sync(file, pos, ret);
+ err = generic_write_sync(file, iocb->ki_pos - ret, ret);
if (err < 0)
ret = err;
}
-
-out:
return ret;
}
STATIC long
xfs_file_fallocate(
- struct file *file,
- int mode,
- loff_t offset,
- loff_t len)
+ struct file *file,
+ int mode,
+ loff_t offset,
+ loff_t len)
{
- struct inode *inode = file_inode(file);
- long error;
- loff_t new_size = 0;
- xfs_flock64_t bf;
- xfs_inode_t *ip = XFS_I(inode);
- int cmd = XFS_IOC_RESVSP;
- int attr_flags = XFS_ATTR_NOLOCK;
+ struct inode *inode = file_inode(file);
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_trans *tp;
+ long error;
+ loff_t new_size = 0;
- if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+ if (!S_ISREG(inode->i_mode))
+ return -EINVAL;
+ if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
+ FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
return -EOPNOTSUPP;
- bf.l_whence = 0;
- bf.l_start = offset;
- bf.l_len = len;
-
xfs_ilock(ip, XFS_IOLOCK_EXCL);
+ if (mode & FALLOC_FL_PUNCH_HOLE) {
+ error = xfs_free_file_space(ip, offset, len);
+ if (error)
+ goto out_unlock;
+ } else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
+ unsigned blksize_mask = (1 << inode->i_blkbits) - 1;
+
+ if (offset & blksize_mask || len & blksize_mask) {
+ error = EINVAL;
+ goto out_unlock;
+ }
+
+ /*
+ * There is no need to overlap collapse range with EOF,
+ * in which case it is effectively a truncate operation
+ */
+ if (offset + len >= i_size_read(inode)) {
+ error = EINVAL;
+ goto out_unlock;
+ }
+
+ new_size = i_size_read(inode) - len;
- if (mode & FALLOC_FL_PUNCH_HOLE)
- cmd = XFS_IOC_UNRESVSP;
+ error = xfs_collapse_file_space(ip, offset, len);
+ if (error)
+ goto out_unlock;
+ } else {
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+ offset + len > i_size_read(inode)) {
+ new_size = offset + len;
+ error = -inode_newsize_ok(inode, new_size);
+ if (error)
+ goto out_unlock;
+ }
- /* check the new inode size is valid before allocating */
- if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- offset + len > i_size_read(inode)) {
- new_size = offset + len;
- error = inode_newsize_ok(inode, new_size);
+ if (mode & FALLOC_FL_ZERO_RANGE)
+ error = xfs_zero_file_space(ip, offset, len);
+ else
+ error = xfs_alloc_file_space(ip, offset, len,
+ XFS_BMAPI_PREALLOC);
if (error)
goto out_unlock;
}
- if (file->f_flags & O_DSYNC)
- attr_flags |= XFS_ATTR_SYNC;
+ tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID);
+ error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp, 0);
+ goto out_unlock;
+ }
+
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ ip->i_d.di_mode &= ~S_ISUID;
+ if (ip->i_d.di_mode & S_IXGRP)
+ ip->i_d.di_mode &= ~S_ISGID;
+
+ if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE)))
+ ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;
+
+ xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags);
+ if (file->f_flags & O_DSYNC)
+ xfs_trans_set_sync(tp);
+ error = xfs_trans_commit(tp, 0);
if (error)
goto out_unlock;
@@ -852,12 +839,12 @@ xfs_file_fallocate(
iattr.ia_valid = ATTR_SIZE;
iattr.ia_size = new_size;
- error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK);
+ error = xfs_setattr_size(ip, &iattr);
}
out_unlock:
xfs_iunlock(ip, XFS_IOLOCK_EXCL);
- return error;
+ return -error;
}
@@ -890,9 +877,9 @@ xfs_dir_open(
* If there are any blocks, read-ahead block 0 as we're almost
* certain to have the next operation be a read there.
*/
- mode = xfs_ilock_map_shared(ip);
+ mode = xfs_ilock_data_map_shared(ip);
if (ip->i_d.di_nextents > 0)
- xfs_dir3_data_readahead(NULL, ip, 0, -1);
+ xfs_dir3_data_readahead(ip, 0, -1);
xfs_iunlock(ip, mode);
return 0;
}
@@ -1193,7 +1180,7 @@ xfs_seek_data(
uint lock;
int error;
- lock = xfs_ilock_map_shared(ip);
+ lock = xfs_ilock_data_map_shared(ip);
isize = i_size_read(inode);
if (start >= isize) {
@@ -1272,7 +1259,7 @@ out:
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out_unlock:
- xfs_iunlock_map_shared(ip, lock);
+ xfs_iunlock(ip, lock);
if (error)
return -error;
@@ -1297,7 +1284,7 @@ xfs_seek_hole(
if (XFS_FORCED_SHUTDOWN(mp))
return -XFS_ERROR(EIO);
- lock = xfs_ilock_map_shared(ip);
+ lock = xfs_ilock_data_map_shared(ip);
isize = i_size_read(inode);
if (start >= isize) {
@@ -1380,7 +1367,7 @@ out:
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out_unlock:
- xfs_iunlock_map_shared(ip, lock);
+ xfs_iunlock(ip, lock);
if (error)
return -error;
@@ -1409,12 +1396,12 @@ xfs_file_llseek(
const struct file_operations xfs_file_operations = {
.llseek = xfs_file_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = xfs_file_aio_read,
- .aio_write = xfs_file_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = xfs_file_read_iter,
+ .write_iter = xfs_file_write_iter,
.splice_read = xfs_file_splice_read,
- .splice_write = xfs_file_splice_write,
+ .splice_write = iter_file_splice_write,
.unlocked_ioctl = xfs_file_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = xfs_file_compat_ioctl,
@@ -1440,6 +1427,7 @@ const struct file_operations xfs_dir_file_operations = {
static const struct vm_operations_struct xfs_file_vm_ops = {
.fault = filemap_fault,
+ .map_pages = filemap_map_pages,
.page_mkwrite = xfs_vm_page_mkwrite,
.remap_pages = generic_file_remap_pages,
};