diff options
author | Wolfram Sang <wsa@the-dreams.de> | 2014-06-17 14:36:41 +0200 |
---|---|---|
committer | Wolfram Sang <wsa@the-dreams.de> | 2014-06-17 14:37:31 +0200 |
commit | f0b1f6442b5090fed3529cb39f3acf8c91693d3d (patch) | |
tree | bc5f62b017a82161c9a7f892f464813f6efd5bf3 /fs | |
parent | 4632a93f015caf6d7db4352f37aab74a39e60d7a (diff) | |
parent | 7171511eaec5bf23fb06078f59784a3a0626b38f (diff) |
Merge tag 'v3.16-rc1' into i2c/for-next
Merge a stable base (Linux 3.16-rc1)
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Diffstat (limited to 'fs')
94 files changed, 1541 insertions, 1820 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index c71e88602ff..cc1cfae726b 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -259,8 +259,7 @@ static int v9fs_launder_page(struct page *page) * */ static ssize_t -v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) +v9fs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { /* * FIXME @@ -269,7 +268,7 @@ v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, */ p9_debug(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) off/no(%lld/%lu) EINVAL\n", iocb->ki_filp->f_path.dentry->d_name.name, - (long long)pos, nr_segs); + (long long)pos, iter->nr_segs); return -EINVAL; } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index 96e55076069..520c11c2dcc 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -692,7 +692,7 @@ v9fs_cached_file_read(struct file *filp, char __user *data, size_t count, { if (filp->f_flags & O_DIRECT) return v9fs_direct_read(filp, data, count, offset); - return do_sync_read(filp, data, count, offset); + return new_sync_read(filp, data, count, offset); } /** @@ -760,7 +760,7 @@ err_out: buff_write: mutex_unlock(&inode->i_mutex); - return do_sync_write(filp, data, count, offsetp); + return new_sync_write(filp, data, count, offsetp); } /** @@ -778,7 +778,7 @@ v9fs_cached_file_write(struct file *filp, const char __user * data, if (filp->f_flags & O_DIRECT) return v9fs_direct_write(filp, data, count, offset); - return do_sync_write(filp, data, count, offset); + return new_sync_write(filp, data, count, offset); } @@ -847,8 +847,8 @@ const struct file_operations v9fs_cached_file_operations = { .llseek = generic_file_llseek, .read = v9fs_cached_file_read, .write = v9fs_cached_file_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock, @@ -860,8 +860,8 @@ const struct file_operations v9fs_cached_file_operations_dotl = { .llseek = generic_file_llseek, .read = v9fs_cached_file_read, .write = v9fs_cached_file_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .open = v9fs_file_open, .release = v9fs_dir_release, .lock = v9fs_file_lock_dotl, diff --git a/fs/adfs/file.c b/fs/adfs/file.c index a36da5382b4..07c9edce5aa 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -23,12 +23,12 @@ const struct file_operations adfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, + .read = new_sync_read, + .read_iter = generic_file_read_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .splice_read = generic_file_splice_read, }; diff --git a/fs/affs/file.c b/fs/affs/file.c index 0270303388e..a7fe57d2cd9 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -27,10 +27,10 @@ static int affs_file_release(struct inode *inode, struct file *filp); const struct file_operations affs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = affs_file_open, .release = affs_file_release, diff --git a/fs/afs/file.c b/fs/afs/file.c index 66d50fe2ee4..932ce07948b 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -31,10 +31,10 @@ const struct file_operations afs_file_operations = { .open = afs_open, .release = afs_release, .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = afs_file_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = afs_file_write, .mmap = generic_file_readonly_mmap, .splice_read = generic_file_splice_read, .fsync = afs_fsync, diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 590b55f46d6..71d5982312f 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -747,8 +747,7 @@ extern int afs_write_end(struct file *file, struct address_space *mapping, extern int afs_writepage(struct page *, struct writeback_control *); extern int afs_writepages(struct address_space *, struct writeback_control *); extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); -extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t); +extern ssize_t afs_file_write(struct kiocb *, struct iov_iter *); extern int afs_writeback_all(struct afs_vnode *); extern int afs_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/afs/write.c b/fs/afs/write.c index a890db4b989..ab6adfd5251 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -625,15 +625,14 @@ void afs_pages_written_back(struct afs_vnode *vnode, struct afs_call *call) /* * write to an AFS file */ -ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t afs_file_write(struct kiocb *iocb, struct iov_iter *from) { struct afs_vnode *vnode = AFS_FS_I(file_inode(iocb->ki_filp)); ssize_t result; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(from); - _enter("{%x.%u},{%zu},%lu,", - vnode->fid.vid, vnode->fid.vnode, count, nr_segs); + _enter("{%x.%u},{%zu},", + vnode->fid.vid, vnode->fid.vnode, count); if (IS_SWAPFILE(&vnode->vfs_inode)) { printk(KERN_INFO @@ -644,7 +643,7 @@ ssize_t afs_file_write(struct kiocb *iocb, const struct iovec *iov, if (!count) return 0; - result = generic_file_aio_write(iocb, iov, nr_segs, pos); + result = generic_file_write_iter(iocb, from); if (IS_ERR_VALUE(result)) { _leave(" = %zd", result); return result; @@ -477,7 +477,7 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel) } EXPORT_SYMBOL(kiocb_set_cancel_fn); -static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb) +static int kiocb_cancel(struct kiocb *kiocb) { kiocb_cancel_fn *old, *cancel; @@ -538,7 +538,7 @@ static void free_ioctx_users(struct percpu_ref *ref) struct kiocb, ki_list); list_del_init(&req->ki_list); - kiocb_cancel(ctx, req); + kiocb_cancel(req); } spin_unlock_irq(&ctx->ctx_lock); @@ -727,42 +727,42 @@ err: * when the processes owning a context have all exited to encourage * the rapid destruction of the kioctx. */ -static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, +static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, struct completion *requests_done) { - if (!atomic_xchg(&ctx->dead, 1)) { - struct kioctx_table *table; + struct kioctx_table *table; - spin_lock(&mm->ioctx_lock); - rcu_read_lock(); - table = rcu_dereference(mm->ioctx_table); + if (atomic_xchg(&ctx->dead, 1)) + return -EINVAL; - WARN_ON(ctx != table->table[ctx->id]); - table->table[ctx->id] = NULL; - rcu_read_unlock(); - spin_unlock(&mm->ioctx_lock); - /* percpu_ref_kill() will do the necessary call_rcu() */ - wake_up_all(&ctx->wait); + spin_lock(&mm->ioctx_lock); + rcu_read_lock(); + table = rcu_dereference(mm->ioctx_table); - /* - * It'd be more correct to do this in free_ioctx(), after all - * the outstanding kiocbs have finished - but by then io_destroy - * has already returned, so io_setup() could potentially return - * -EAGAIN with no ioctxs actually in use (as far as userspace - * could tell). - */ - aio_nr_sub(ctx->max_reqs); + WARN_ON(ctx != table->table[ctx->id]); + table->table[ctx->id] = NULL; + rcu_read_unlock(); + spin_unlock(&mm->ioctx_lock); - if (ctx->mmap_size) - vm_munmap(ctx->mmap_base, ctx->mmap_size); + /* percpu_ref_kill() will do the necessary call_rcu() */ + wake_up_all(&ctx->wait); - ctx->requests_done = requests_done; - percpu_ref_kill(&ctx->users); - } else { - if (requests_done) - complete(requests_done); - } + /* + * It'd be more correct to do this in free_ioctx(), after all + * the outstanding kiocbs have finished - but by then io_destroy + * has already returned, so io_setup() could potentially return + * -EAGAIN with no ioctxs actually in use (as far as userspace + * could tell). + */ + aio_nr_sub(ctx->max_reqs); + + if (ctx->mmap_size) + vm_munmap(ctx->mmap_base, ctx->mmap_size); + + ctx->requests_done = requests_done; + percpu_ref_kill(&ctx->users); + return 0; } /* wait_on_sync_kiocb: @@ -1219,21 +1219,23 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) if (likely(NULL != ioctx)) { struct completion requests_done = COMPLETION_INITIALIZER_ONSTACK(requests_done); + int ret; /* Pass requests_done to kill_ioctx() where it can be set * in a thread-safe way. If we try to set it here then we have * a race condition if two io_destroy() called simultaneously. */ - kill_ioctx(current->mm, ioctx, &requests_done); + ret = kill_ioctx(current->mm, ioctx, &requests_done); percpu_ref_put(&ioctx->users); /* Wait until all IO for the context are done. Otherwise kernel * keep using user-space buffers even if user thinks the context * is destroyed. */ - wait_for_completion(&requests_done); + if (!ret) + wait_for_completion(&requests_done); - return 0; + return ret; } pr_debug("EINVAL: io_destroy: invalid context id\n"); return -EINVAL; @@ -1241,6 +1243,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *, unsigned long, loff_t); +typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb, int rw, char __user *buf, @@ -1298,7 +1301,9 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, int rw; fmode_t mode; aio_rw_op *rw_op; + rw_iter_op *iter_op; struct iovec inline_vec, *iovec = &inline_vec; + struct iov_iter iter; switch (opcode) { case IOCB_CMD_PREAD: @@ -1306,6 +1311,7 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, mode = FMODE_READ; rw = READ; rw_op = file->f_op->aio_read; + iter_op = file->f_op->read_iter; goto rw_common; case IOCB_CMD_PWRITE: @@ -1313,12 +1319,13 @@ static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, mode = FMODE_WRITE; rw = WRITE; rw_op = file->f_op->aio_write; + iter_op = file->f_op->write_iter; goto rw_common; rw_common: if (unlikely(!(file->f_mode & mode))) return -EBADF; - if (!rw_op) + if (!rw_op && !iter_op) return -EINVAL; ret = (opcode == IOCB_CMD_PREADV || @@ -1347,7 +1354,12 @@ rw_common: if (rw == WRITE) file_start_write(file); - ret = rw_op(req, iovec, nr_segs, req->ki_pos); + if (iter_op) { + iov_iter_init(&iter, rw, iovec, nr_segs, req->ki_nbytes); + ret = iter_op(req, &iter); + } else { + ret = rw_op(req, iovec, nr_segs, req->ki_pos); + } if (rw == WRITE) file_end_write(file); @@ -1585,7 +1597,7 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, kiocb = lookup_kiocb(ctx, iocb, key); if (kiocb) - ret = kiocb_cancel(ctx, kiocb); + ret = kiocb_cancel(kiocb); else ret = -EINVAL; diff --git a/fs/bfs/file.c b/fs/bfs/file.c index ae289221833..e7f88ace1a2 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -23,10 +23,10 @@ const struct file_operations bfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, }; diff --git a/fs/block_dev.c b/fs/block_dev.c index 83fba15cc39..6d7274619bf 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -165,14 +165,15 @@ blkdev_get_block(struct inode *inode, sector_t iblock, } static ssize_t -blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; - return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset, - nr_segs, blkdev_get_block, NULL, NULL, 0); + return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter, + offset, blkdev_get_block, + NULL, NULL, 0); } int __sync_blockdev(struct block_device *bdev, int wait) @@ -1571,43 +1572,38 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) * Does not take i_mutex for the write and thus is not for general purpose * use. */ -ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct blk_plug plug; ssize_t ret; - BUG_ON(iocb->ki_pos != pos); - blk_start_plug(&plug); - ret = __generic_file_aio_write(iocb, iov, nr_segs); + ret = __generic_file_write_iter(iocb, from); if (ret > 0) { ssize_t err; - - err = generic_write_sync(file, pos, ret); + err = generic_write_sync(file, iocb->ki_pos - ret, ret); if (err < 0) ret = err; } blk_finish_plug(&plug); return ret; } -EXPORT_SYMBOL_GPL(blkdev_aio_write); +EXPORT_SYMBOL_GPL(blkdev_write_iter); -static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *bd_inode = file->f_mapping->host; loff_t size = i_size_read(bd_inode); + loff_t pos = iocb->ki_pos; if (pos >= size) return 0; size -= pos; - if (size < iocb->ki_nbytes) - nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size); - return generic_file_aio_read(iocb, iov, nr_segs, pos); + iov_iter_truncate(to, size); + return generic_file_read_iter(iocb, to); } /* @@ -1639,10 +1635,10 @@ const struct file_operations def_blk_fops = { .open = blkdev_open, .release = blkdev_close, .llseek = block_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = blkdev_aio_read, - .aio_write = blkdev_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = blkdev_read_iter, + .write_iter = blkdev_write_iter, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, @@ -1650,7 +1646,7 @@ const struct file_operations def_blk_fops = { .compat_ioctl = compat_blkdev_ioctl, #endif .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index f25a9092b94..a389820d158 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2354,7 +2354,7 @@ int end_extent_writepage(struct page *page, int err, u64 start, u64 end) { int uptodate = (err == 0); struct extent_io_tree *tree; - int ret; + int ret = 0; tree = &BTRFS_I(page->mapping->host)->io_tree; @@ -5068,6 +5068,43 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } } +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv, + unsigned long start, + unsigned long len) +{ + size_t cur; + size_t offset; + struct page *page; + char *kaddr; + char __user *dst = (char __user *)dstv; + size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1); + unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT; + int ret = 0; + + WARN_ON(start > eb->len); + WARN_ON(start + len > eb->start + eb->len); + + offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1); + + while (len > 0) { + page = extent_buffer_page(eb, i); + + cur = min(len, (PAGE_CACHE_SIZE - offset)); + kaddr = page_address(page); + if (copy_to_user(dst, kaddr + offset, cur)) { + ret = -EFAULT; + break; + } + + dst += cur; + len -= cur; + offset = 0; + i++; + } + + return ret; +} + int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, unsigned long min_len, char **map, unsigned long *map_start, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 8b63f2d4651..15ce5f2a2b6 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -304,6 +304,9 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, void read_extent_buffer(struct extent_buffer *eb, void *dst, unsigned long start, unsigned long len); +int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dst, + unsigned long start, + unsigned long len); void write_extent_buffer(struct extent_buffer *eb, const void *src, unsigned long start, unsigned long len); void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index e472441feb5..1f2b99cb55e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -448,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, write_bytes -= copied; total_copied += copied; - /* Return to btrfs_file_aio_write to fault page */ + /* Return to btrfs_file_write_iter to fault page */ if (unlikely(copied == 0)) break; @@ -1675,27 +1675,22 @@ again: } static ssize_t __btrfs_direct_write(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, loff_t pos, - size_t count, size_t ocount) + struct iov_iter *from, + loff_t pos) { struct file *file = iocb->ki_filp; - struct iov_iter i; ssize_t written; ssize_t written_buffered; loff_t endbyte; int err; - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - count, ocount); + written = generic_file_direct_write(iocb, from, pos); - if (written < 0 || written == count) + if (written < 0 || !iov_iter_count(from)) return written; pos += written; - count -= written; - iov_iter_init(&i, iov, nr_segs, count, written); - written_buffered = __btrfs_buffered_write(file, &i, pos); + written_buffered = __btrfs_buffered_write(file, from, pos); if (written_buffered < 0) { err = written_buffered; goto out; @@ -1730,9 +1725,8 @@ static void update_time_for_write(struct inode *inode) inode_inc_iversion(inode); } -static ssize_t btrfs_file_aio_write(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t btrfs_file_write_iter(struct kiocb *iocb, + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -1741,18 +1735,12 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, u64 end_pos; ssize_t num_written = 0; ssize_t err = 0; - size_t count, ocount; + size_t count = iov_iter_count(from); bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host); + loff_t pos = iocb->ki_pos; mutex_lock(&inode->i_mutex); - err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (err) { - mutex_unlock(&inode->i_mutex); - goto out; - } - count = ocount; - current->backing_dev_info = inode->i_mapping->backing_dev_info; err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); if (err) { @@ -1765,6 +1753,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, goto out; } + iov_iter_truncate(from, count); + err = file_remove_suid(file); if (err) { mutex_unlock(&inode->i_mutex); @@ -1806,14 +1796,9 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, atomic_inc(&BTRFS_I(inode)->sync_writers); if (unlikely(file->f_flags & O_DIRECT)) { - num_written = __btrfs_direct_write(iocb, iov, nr_segs, - pos, count, ocount); + num_written = __btrfs_direct_write(iocb, from, pos); } else { - struct iov_iter i; - - iov_iter_init(&i, iov, nr_segs, count, num_written); - - num_written = __btrfs_buffered_write(file, &i, pos); + num_written = __btrfs_buffered_write(file, from, pos); if (num_written > 0) iocb->ki_pos = pos + num_written; } @@ -2740,11 +2725,11 @@ out: const struct file_operations btrfs_file_operations = { .llseek = btrfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, - .aio_write = btrfs_file_aio_write, + .write_iter = btrfs_file_write_iter, .mmap = btrfs_file_mmap, .open = generic_file_open, .release = btrfs_release_file, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7fa5f7fd7bc..8925f66a141 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7445,39 +7445,30 @@ free_ordered: } static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + const struct iov_iter *iter, loff_t offset) { int seg; int i; - size_t size; - unsigned long addr; unsigned blocksize_mask = root->sectorsize - 1; ssize_t retval = -EINVAL; - loff_t end = offset; if (offset & blocksize_mask) goto out; - /* Check the memory alignment. Blocks cannot straddle pages */ - for (seg = 0; seg < nr_segs; seg++) { - addr = (unsigned long)iov[seg].iov_base; - size = iov[seg].iov_len; - end += size; - if ((addr & blocksize_mask) || (size & blocksize_mask)) - goto out; - - /* If this is a write we don't need to check anymore */ - if (rw & WRITE) - continue; + if (iov_iter_alignment(iter) & blocksize_mask) + goto out; - /* - * Check to make sure we don't have duplicate iov_base's in this - * iovec, if so return EINVAL, otherwise we'll get csum errors - * when reading back. - */ - for (i = seg + 1; i < nr_segs; i++) { - if (iov[seg].iov_base == iov[i].iov_base) + /* If this is a write we don't need to check anymore */ + if (rw & WRITE) + return 0; + /* + * Check to make sure we don't have duplicate iov_base's in this + * iovec, if so return EINVAL, otherwise we'll get csum errors + * when reading back. + */ + for (seg = 0; seg < iter->nr_segs; seg++) { + for (i = seg + 1; i < iter->nr_segs; i++) { + if (iter->iov[seg].iov_base == iter->iov[i].iov_base) goto out; } } @@ -7487,8 +7478,7 @@ out: } static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -7498,8 +7488,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, bool relock = false; ssize_t ret; - if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, - offset, nr_segs)) + if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iter, offset)) return 0; atomic_inc(&inode->i_dio_count); @@ -7511,7 +7500,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, * we need to flush the dirty pages again to make absolutely sure * that any outstanding dirty pages are on disk. */ - count = iov_length(iov, nr_segs); + count = iov_iter_count(iter); if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &BTRFS_I(inode)->runtime_flags)) filemap_fdatawrite_range(inode->i_mapping, offset, count); @@ -7538,7 +7527,7 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, ret = __blockdev_direct_IO(rw, iocb, inode, BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, - iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, + iter, offset, btrfs_get_blocks_direct, NULL, btrfs_submit_direct, flags); if (rw & WRITE) { if (ret < 0 && ret != -EIOCBQUEUED) diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 82c18ba12e3..0d321c23069 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -1957,7 +1957,8 @@ static noinline int copy_to_sk(struct btrfs_root *root, struct btrfs_path *path, struct btrfs_key *key, struct btrfs_ioctl_search_key *sk, - char *buf, + size_t *buf_size, + char __user *ubuf, unsigned long *sk_offset, int *num_found) { @@ -1989,13 +1990,25 @@ static noinline int copy_to_sk(struct btrfs_root *root, if (!key_in_sk(key, sk)) continue; - if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) + if (sizeof(sh) + item_len > *buf_size) { + if (*num_found) { + ret = 1; + goto out; + } + + /* + * return one empty item back for v1, which does not + * handle -EOVERFLOW + */ + + *buf_size = sizeof(sh) + item_len; item_len = 0; + ret = -EOVERFLOW; + } - if (sizeof(sh) + item_len + *sk_offset > - BTRFS_SEARCH_ARGS_BUFSIZE) { + if (sizeof(sh) + item_len + *sk_offset > *buf_size) { ret = 1; - goto overflow; + goto out; } sh.objectid = key->objectid; @@ -2005,20 +2018,33 @@ static noinline int copy_to_sk(struct btrfs_root *root, sh.transid = found_transid; /* copy search result header */ - memcpy(buf + *sk_offset, &sh, sizeof(sh)); + if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { + ret = -EFAULT; + goto out; + } + *sk_offset += sizeof(sh); if (item_len) { - char *p = buf + *sk_offset; + char __user *up = ubuf + *sk_offset; /* copy the item */ - read_extent_buffer(leaf, p, - item_off, item_len); + if (read_extent_buffer_to_user(leaf, up, + item_off, item_len)) { + ret = -EFAULT; + goto out; + } + *sk_offset += item_len; } (*num_found)++; - if (*num_found >= sk->nr_items) - break; + if (ret) /* -EOVERFLOW from above */ + goto out; + + if (*num_found >= sk->nr_items) { + ret = 1; + goto out; + } } advance_key: ret = 0; @@ -2033,22 +2059,37 @@ advance_key: key->objectid++; } else ret = 1; -overflow: +out: + /* + * 0: all items from this leaf copied, continue with next + * 1: * more items can be copied, but unused buffer is too small + * * all items were found + * Either way, it will stops the loop which iterates to the next + * leaf + * -EOVERFLOW: item was to large for buffer + * -EFAULT: could not copy extent buffer back to userspace + */ return ret; } static noinline int search_ioctl(struct inode *inode, - struct btrfs_ioctl_search_args *args) + struct btrfs_ioctl_search_key *sk, + size_t *buf_size, + char __user *ubuf) { struct btrfs_root *root; struct btrfs_key key; struct btrfs_path *path; - struct btrfs_ioctl_search_key *sk = &args->key; struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; int ret; int num_found = 0; unsigned long sk_offset = 0; + if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { + *buf_size = sizeof(struct btrfs_ioctl_search_header); + return -EOVERFLOW; + } + path = btrfs_alloc_path(); if (!path) return -ENOMEM; @@ -2082,14 +2123,15 @@ static noinline int search_ioctl(struct inode *inode, ret = 0; goto err; } - ret = copy_to_sk(root, path, &key, sk, args->buf, + ret = copy_to_sk(root, path, &key, sk, buf_size, ubuf, &sk_offset, &num_found); btrfs_release_path(path); - if (ret || num_found >= sk->nr_items) + if (ret) break; } - ret = 0; + if (ret > 0) + ret = 0; err: sk->nr_items = num_found; btrfs_free_path(path); @@ -2099,22 +2141,73 @@ err: static noinline int btrfs_ioctl_tree_search(struct file *file, void __user *argp) { - struct btrfs_ioctl_search_args *args; - struct inode *inode; - int ret; + struct btrfs_ioctl_search_args __user *uargs; + struct btrfs_ioctl_search_key sk; + struct inode *inode; + int ret; + size_t buf_size; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - args = memdup_user(argp, sizeof(*args)); - if (IS_ERR(args)) - return PTR_ERR(args); + uargs = (struct btrfs_ioctl_search_args __user *)argp; + + if (copy_from_user(&sk, &uargs->key, sizeof(sk))) + return -EFAULT; + + buf_size = sizeof(uargs->buf); inode = file_inode(file); - ret = search_ioctl(inode, args); - if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) + ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); + + /* + * In the origin implementation an overflow is handled by returning a + * search header with a len of zero, so reset ret. + */ + if (ret == -EOVERFLOW) + ret = 0; + + if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) ret = -EFAULT; - kfree(args); + return ret; +} + +static noinline int btrfs_ioctl_tree_search_v2(struct file *file, + void __user *argp) +{ + struct btrfs_ioctl_search_args_v2 __user *uarg; + struct btrfs_ioctl_search_args_v2 args; + struct inode *inode; + int ret; + size_t buf_size; + const size_t buf_limit = 16 * 1024 * 1024; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + /* copy search header and buffer size */ + uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; + if (copy_from_user(&args, uarg, sizeof(args))) + return -EFAULT; + + buf_size = args.buf_size; + + if (buf_size < sizeof(struct btrfs_ioctl_search_header)) + return -EOVERFLOW; + + /* limit result size to 16MB */ + if (buf_size > buf_limit) + buf_size = buf_limit; + + inode = file_inode(file); + ret = search_ioctl(inode, &args.key, &buf_size, + (char *)(&uarg->buf[0])); + if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) + ret = -EFAULT; + else if (ret == -EOVERFLOW && + copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) + ret = -EFAULT; + return ret; } @@ -5198,6 +5291,8 @@ long btrfs_ioctl(struct file *file, unsigned int return btrfs_ioctl_trans_end(file); case BTRFS_IOC_TREE_SEARCH: return btrfs_ioctl_tree_search(file, argp); + case BTRFS_IOC_TREE_SEARCH_V2: + return btrfs_ioctl_tree_search_v2(file, argp); case BTRFS_IOC_INO_LOOKUP: return btrfs_ioctl_ino_lookup(file, argp); case BTRFS_IOC_INO_PATHS: diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index cf5aead95a7..98cb6b2630f 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1798,8 +1798,10 @@ static int qgroup_shared_accounting(struct btrfs_trans_handle *trans, return -ENOMEM; tmp = ulist_alloc(GFP_NOFS); - if (!tmp) + if (!tmp) { + ulist_free(qgroups); return -ENOMEM; + } btrfs_get_tree_mod_seq(fs_info, &elem); ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr, elem.seq, diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 30947f92362..09230cf3a24 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -428,8 +428,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, continue; } if (!dev->bdev) { - /* cannot read ahead on missing device */ - continue; + /* + * cannot read ahead on missing device, but for RAID5/6, + * REQ_GET_READ_MIRRORS return 1. So don't skip missing + * device for such case. + */ + if (nzones > 1) + continue; } if (dev_replace_is_ongoing && dev == fs_info->dev_replace.tgtdev) { diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c index a5dcacb5df9..9626252ee6b 100644 --- a/fs/btrfs/tests/btrfs-tests.c +++ b/fs/btrfs/tests/btrfs-tests.c @@ -135,7 +135,7 @@ restart: radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) { struct extent_buffer *eb; - eb = radix_tree_deref_slot(slot); + eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock); if (!eb) continue; /* Shouldn't happen but that kind of thinking creates CVE's */ diff --git a/fs/btrfs/tests/qgroup-tests.c b/fs/btrfs/tests/qgroup-tests.c index fa691b754aa..ec3dcb20235 100644 --- a/fs/btrfs/tests/qgroup-tests.c +++ b/fs/btrfs/tests/qgroup-tests.c @@ -415,6 +415,8 @@ int btrfs_test_qgroups(void) ret = -ENOMEM; goto out; } + btrfs_set_header_level(root->node, 0); + btrfs_set_header_nritems(root->node, 0); root->alloc_bytenr += 8192; tmp_root = btrfs_alloc_dummy_root(); diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 9630f10f8e1..511839c04f1 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1284,11 +1284,13 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, goto fail; } - pending->error = btrfs_qgroup_inherit(trans, fs_info, - root->root_key.objectid, - objectid, pending->inherit); - if (pending->error) - goto no_free_objectid; + ret = btrfs_qgroup_inherit(trans, fs_info, + root->root_key.objectid, + objectid, pending->inherit); + if (ret) { + btrfs_abort_transaction(trans, root, ret); + goto fail; + } /* see comments in should_cow_block() */ set_bit(BTRFS_ROOT_FORCE_COW, &root->state); diff --git a/fs/ceph/acl.c b/fs/ceph/acl.c index 21887d63dad..469f2e8657e 100644 --- a/fs/ceph/acl.c +++ b/fs/ceph/acl.c @@ -104,12 +104,6 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type) umode_t new_mode = inode->i_mode, old_mode = inode->i_mode; struct dentry *dentry; - if (acl) { - ret = posix_acl_valid(acl); - if (ret < 0) - goto out; - } - switch (type) { case ACL_TYPE_ACCESS: name = POSIX_ACL_XATTR_ACCESS; diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 65a30e817dd..90b3954d48e 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -211,18 +211,15 @@ static int readpage_nounlock(struct file *filp, struct page *page) SetPageError(page); ceph_fscache_readpage_cancel(inode, page); goto out; - } else { - if (err < PAGE_CACHE_SIZE) { - /* zero fill remainder of page */ - zero_user_segment(page, err, PAGE_CACHE_SIZE); - } else { - flush_dcache_page(page); - } } - SetPageUptodate(page); + if (err < PAGE_CACHE_SIZE) + /* zero fill remainder of page */ + zero_user_segment(page, err, PAGE_CACHE_SIZE); + else + flush_dcache_page(page); - if (err >= 0) - ceph_readpage_to_fscache(inode, page); + SetPageUptodate(page); + ceph_readpage_to_fscache(inode, page); out: return err < 0 ? err : 0; @@ -1187,8 +1184,8 @@ static int ceph_write_end(struct file *file, struct address_space *mapping, * never get called. */ static ssize_t ceph_direct_io(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t pos, unsigned long nr_segs) + struct iov_iter *iter, + loff_t pos) { WARN_ON(1); return -EINVAL; diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index c561b628ebc..1fde164b74b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -221,8 +221,8 @@ int ceph_unreserve_caps(struct ceph_mds_client *mdsc, return 0; } -static struct ceph_cap *get_cap(struct ceph_mds_client *mdsc, - struct ceph_cap_reservation *ctx) +struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx) { struct ceph_cap *cap = NULL; @@ -508,15 +508,14 @@ static void __check_cap_issue(struct ceph_inode_info *ci, struct ceph_cap *cap, * it is < 0. (This is so we can atomically add the cap and add an * open file reference to it.) */ -int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned seq, unsigned mseq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation) +void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned seq, unsigned mseq, u64 realmino, int flags, + struct ceph_cap **new_cap) { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *new_cap = NULL; struct ceph_cap *cap; int mds = session->s_mds; int actual_wanted; @@ -531,20 +530,10 @@ int ceph_add_cap(struct inode *inode, if (fmode >= 0) wanted |= ceph_caps_for_mode(fmode); -retry: - spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); if (!cap) { - if (new_cap) { - cap = new_cap; - new_cap = NULL; - } else { - spin_unlock(&ci->i_ceph_lock); - new_cap = get_cap(mdsc, caps_reservation); - if (new_cap == NULL) - return -ENOMEM; - goto retry; - } + cap = *new_cap; + *new_cap = NULL; cap->issued = 0; cap->implemented = 0; @@ -562,9 +551,6 @@ retry: session->s_nr_caps++; spin_unlock(&session->s_cap_lock); } else { - if (new_cap) - ceph_put_cap(mdsc, new_cap); - /* * auth mds of the inode changed. we received the cap export * message, but still haven't received the cap import message. @@ -626,7 +612,6 @@ retry: ci->i_auth_cap = cap; cap->mds_wanted = wanted; } - ci->i_cap_exporting_issued = 0; } else { WARN_ON(ci->i_auth_cap == cap); } @@ -648,9 +633,6 @@ retry: if (fmode >= 0) __ceph_get_fmode(ci, fmode); - spin_unlock(&ci->i_ceph_lock); - wake_up_all(&ci->i_cap_wq); - return 0; } /* @@ -685,7 +667,7 @@ static int __cap_is_valid(struct ceph_cap *cap) */ int __ceph_caps_issued(struct ceph_inode_info *ci, int *implemented) { - int have = ci->i_snap_caps | ci->i_cap_exporting_issued; + int have = ci->i_snap_caps; struct ceph_cap *cap; struct rb_node *p; @@ -900,7 +882,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci) */ static int __ceph_is_any_caps(struct ceph_inode_info *ci) { - return !RB_EMPTY_ROOT(&ci->i_caps) || ci->i_cap_exporting_issued; + return !RB_EMPTY_ROOT(&ci->i_caps); } int ceph_is_any_caps(struct inode *inode) @@ -2397,32 +2379,30 @@ static void invalidate_aliases(struct inode *inode) * actually be a revocation if it specifies a smaller cap set.) * * caller holds s_mutex and i_ceph_lock, we drop both. - * - * return value: - * 0 - ok - * 1 - check_caps on auth cap only (writeback) - * 2 - check_caps (ack revoke) */ -static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, +static void handle_cap_grant(struct ceph_mds_client *mdsc, + struct inode *inode, struct ceph_mds_caps *grant, + void *snaptrace, int snaptrace_len, + struct ceph_buffer *xattr_buf, struct ceph_mds_session *session, - struct ceph_cap *cap, - struct ceph_buffer *xattr_buf) - __releases(ci->i_ceph_lock) + struct ceph_cap *cap, int issued) + __releases(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); int mds = session->s_mds; int seq = le32_to_cpu(grant->seq); int newcaps = le32_to_cpu(grant->caps); - int issued, implemented, used, wanted, dirty; + int used, wanted, dirty; u64 size = le64_to_cpu(grant->size); u64 max_size = le64_to_cpu(grant->max_size); struct timespec mtime, atime, ctime; int check_caps = 0; - int wake = 0; - int writeback = 0; - int queue_invalidate = 0; - int deleted_inode = 0; - int queue_revalidate = 0; + bool wake = 0; + bool writeback = 0; + bool queue_trunc = 0; + bool queue_invalidate = 0; + bool queue_revalidate = 0; + bool deleted_inode = 0; dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n", inode, cap, mds, seq, ceph_cap_string(newcaps)); @@ -2466,16 +2446,13 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, } /* side effects now are allowed */ - - issued = __ceph_caps_issued(ci, &implemented); - issued |= implemented | __ceph_caps_dirty(ci); - cap->cap_gen = session->s_cap_gen; cap->seq = seq; __check_cap_issue(ci, cap, newcaps); - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(grant->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(grant->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(grant->gid)); @@ -2484,7 +2461,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) { + if ((newcaps & CEPH_CAP_AUTH_SHARED) && + (issued & CEPH_CAP_LINK_EXCL) == 0) { set_nlink(inode, le32_to_cpu(grant->nlink)); if (inode->i_nlink == 0 && (newcaps & (CEPH_CAP_LINK_SHARED | CEPH_CAP_LINK_EXCL))) @@ -2511,30 +2489,35 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, if ((issued & CEPH_CAP_FILE_CACHE) && ci->i_rdcache_gen > 1) queue_revalidate = 1; - /* size/ctime/mtime/atime? */ - ceph_fill_file_size(inode, issued, - le32_to_cpu(grant->truncate_seq), - le64_to_cpu(grant->truncate_size), size); - ceph_decode_timespec(&mtime, &grant->mtime); - ceph_decode_timespec(&atime, &grant->atime); - ceph_decode_timespec(&ctime, &grant->ctime); - ceph_fill_file_time(inode, issued, - le32_to_cpu(grant->time_warp_seq), &ctime, &mtime, - &atime); - - - /* file layout may have changed */ - ci->i_layout = grant->layout; - - /* max size increase? */ - if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { - dout("max_size %lld -> %llu\n", ci->i_max_size, max_size); - ci->i_max_size = max_size; - if (max_size >= ci->i_wanted_max_size) { - ci->i_wanted_max_size = 0; /* reset */ - ci->i_requested_max_size = 0; + if (newcaps & CEPH_CAP_ANY_RD) { + /* ctime/mtime/atime? */ + ceph_decode_timespec(&mtime, &grant->mtime); + ceph_decode_timespec(&atime, &grant->atime); + ceph_decode_timespec(&ctime, &grant->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(grant->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (newcaps & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR)) { + /* file layout may have changed */ + ci->i_layout = grant->layout; + /* size/truncate_seq? */ + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(grant->truncate_seq), + le64_to_cpu(grant->truncate_size), + size); + /* max size increase? */ + if (ci->i_auth_cap == cap && max_size != ci->i_max_size) { + dout("max_size %lld -> %llu\n", + ci->i_max_size, max_size); + ci->i_max_size = max_size; + if (max_size >= ci->i_wanted_max_size) { + ci->i_wanted_max_size = 0; /* reset */ + ci->i_requested_max_size = 0; + } + wake = 1; } - wake = 1; } /* check cap bits */ @@ -2595,6 +2578,23 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, spin_unlock(&ci->i_ceph_lock); + if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) { + down_write(&mdsc->snap_rwsem); + ceph_update_snap_trace(mdsc, snaptrace, + snaptrace + snaptrace_len, false); + downgrade_write(&mdsc->snap_rwsem); + kick_flushing_inode_caps(mdsc, session, inode); + up_read(&mdsc->snap_rwsem); + if (newcaps & ~issued) + wake = 1; + } + + if (queue_trunc) { + ceph_queue_vmtruncate(inode); + ceph_queue_revalidate(inode); + } else if (queue_revalidate) + ceph_queue_revalidate(inode); + if (writeback) /* * queue inode for writeback: we can't actually call @@ -2606,8 +2606,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant, ceph_queue_invalidate(inode); if (deleted_inode) invalidate_aliases(inode); - if (queue_revalidate) - ceph_queue_revalidate(inode); if (wake) wake_up_all(&ci->i_cap_wq); @@ -2784,7 +2782,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, { struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_session *tsession = NULL; - struct ceph_cap *cap, *tcap; + struct ceph_cap *cap, *tcap, *new_cap = NULL; struct ceph_inode_info *ci = ceph_inode(inode); u64 t_cap_id; unsigned mseq = le32_to_cpu(ex->migrate_seq); @@ -2807,7 +2805,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex, retry: spin_lock(&ci->i_ceph_lock); cap = __get_cap_for_mds(ci, mds); - if (!cap) + if (!cap || cap->cap_id != le64_to_cpu(ex->cap_id)) goto out_unlock; if (target < 0) { @@ -2846,15 +2844,14 @@ retry: } __ceph_remove_cap(cap, false); goto out_unlock; - } - - if (tsession) { - int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; - spin_unlock(&ci->i_ceph_lock); + } else if (tsession) { /* add placeholder for the export tagert */ + int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0; ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0, - t_seq - 1, t_mseq, (u64)-1, flag, NULL); - goto retry; + t_seq - 1, t_mseq, (u64)-1, flag, &new_cap); + + __ceph_remove_cap(cap, false); + goto out_unlock; } spin_unlock(&ci->i_ceph_lock); @@ -2873,6 +2870,7 @@ retry: SINGLE_DEPTH_NESTING); } ceph_add_cap_releases(mdsc, tsession); + new_cap = ceph_get_cap(mdsc, NULL); } else { WARN_ON(1); tsession = NULL; @@ -2887,24 +2885,27 @@ out_unlock: mutex_unlock(&tsession->s_mutex); ceph_put_mds_session(tsession); } + if (new_cap) + ceph_put_cap(mdsc, new_cap); } /* - * Handle cap IMPORT. If there are temp bits from an older EXPORT, - * clean them up. + * Handle cap IMPORT. * - * caller holds s_mutex. + * caller holds s_mutex. acquires i_ceph_lock */ static void handle_cap_import(struct ceph_mds_client *mdsc, struct inode *inode, struct ceph_mds_caps *im, struct ceph_mds_cap_peer *ph, struct ceph_mds_session *session, - void *snaptrace, int snaptrace_len) + struct ceph_cap **target_cap, int *old_issued) + __acquires(ci->i_ceph_lock) { struct ceph_inode_info *ci = ceph_inode(inode); - struct ceph_cap *cap; + struct ceph_cap *cap, *ocap, *new_cap = NULL; int mds = session->s_mds; - unsigned issued = le32_to_cpu(im->caps); + int issued; + unsigned caps = le32_to_cpu(im->caps); unsigned wanted = le32_to_cpu(im->wanted); unsigned seq = le32_to_cpu(im->seq); unsigned mseq = le32_to_cpu(im->migrate_seq); @@ -2924,40 +2925,52 @@ static void handle_cap_import(struct ceph_mds_client *mdsc, dout("handle_cap_import inode %p ci %p mds%d mseq %d peer %d\n", inode, ci, mds, mseq, peer); +retry: spin_lock(&ci->i_ceph_lock); - cap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; - if (cap && cap->cap_id == p_cap_id) { + cap = __get_cap_for_mds(ci, mds); + if (!cap) { + if (!new_cap) { + spin_unlock(&ci->i_ceph_lock); + new_cap = ceph_get_cap(mdsc, NULL); + goto retry; + } + cap = new_cap; + } else { + if (new_cap) { + ceph_put_cap(mdsc, new_cap); + new_cap = NULL; + } + } + + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + + ceph_add_cap(inode, session, cap_id, -1, caps, wanted, seq, mseq, + realmino, CEPH_CAP_FLAG_AUTH, &new_cap); + + ocap = peer >= 0 ? __get_cap_for_mds(ci, peer) : NULL; + if (ocap && ocap->cap_id == p_cap_id) { dout(" remove export cap %p mds%d flags %d\n", - cap, peer, ph->flags); + ocap, peer, ph->flags); if ((ph->flags & CEPH_CAP_FLAG_AUTH) && - (cap->seq != le32_to_cpu(ph->seq) || - cap->mseq != le32_to_cpu(ph->mseq))) { + (ocap->seq != le32_to_cpu(ph->seq) || + ocap->mseq != le32_to_cpu(ph->mseq))) { pr_err("handle_cap_import: mismatched seq/mseq: " "ino (%llx.%llx) mds%d seq %d mseq %d " "importer mds%d has peer seq %d mseq %d\n", - ceph_vinop(inode), peer, cap->seq, - cap->mseq, mds, le32_to_cpu(ph->seq), + ceph_vinop(inode), peer, ocap->seq, + ocap->mseq, mds, le32_to_cpu(ph->seq), le32_to_cpu(ph->mseq)); } - ci->i_cap_exporting_issued = cap->issued; - __ceph_remove_cap(cap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); + __ceph_remove_cap(ocap, (ph->flags & CEPH_CAP_FLAG_RELEASE)); } /* make sure we re-request max_size, if necessary */ ci->i_wanted_max_size = 0; ci->i_requested_max_size = 0; - spin_unlock(&ci->i_ceph_lock); - - down_write(&mdsc->snap_rwsem); - ceph_update_snap_trace(mdsc, snaptrace, snaptrace+snaptrace_len, - false); - downgrade_write(&mdsc->snap_rwsem); - ceph_add_cap(inode, session, cap_id, -1, - issued, wanted, seq, mseq, realmino, CEPH_CAP_FLAG_AUTH, - NULL /* no caps context */); - kick_flushing_inode_caps(mdsc, session, inode); - up_read(&mdsc->snap_rwsem); + *old_issued = issued; + *target_cap = cap; } /* @@ -2977,7 +2990,7 @@ void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_mds_caps *h; struct ceph_mds_cap_peer *peer = NULL; int mds = session->s_mds; - int op; + int op, issued; u32 seq, mseq; struct ceph_vino vino; u64 cap_id; @@ -3069,7 +3082,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, case CEPH_CAP_OP_IMPORT: handle_cap_import(mdsc, inode, h, peer, session, - snaptrace, snaptrace_len); + &cap, &issued); + handle_cap_grant(mdsc, inode, h, snaptrace, snaptrace_len, + msg->middle, session, cap, issued); + goto done_unlocked; } /* the rest require a cap */ @@ -3086,8 +3102,10 @@ void ceph_handle_caps(struct ceph_mds_session *session, switch (op) { case CEPH_CAP_OP_REVOKE: case CEPH_CAP_OP_GRANT: - case CEPH_CAP_OP_IMPORT: - handle_cap_grant(inode, h, session, cap, msg->middle); + __ceph_caps_issued(ci, &issued); + issued |= __ceph_caps_dirty(ci); + handle_cap_grant(mdsc, inode, h, NULL, 0, msg->middle, + session, cap, issued); goto done_unlocked; case CEPH_CAP_OP_FLUSH_ACK: diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 00d6af6a32e..8d7d782f438 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -169,7 +169,7 @@ static struct dentry *__get_parent(struct super_block *sb, return dentry; } -struct dentry *ceph_get_parent(struct dentry *child) +static struct dentry *ceph_get_parent(struct dentry *child) { /* don't re-export snaps */ if (ceph_snap(child->d_inode) != CEPH_NOSNAP) diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 88a6df4cbe6..302085100c2 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -418,7 +418,7 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, struct page **pages; u64 off = iocb->ki_pos; int num_pages, ret; - size_t len = i->count; + size_t len = iov_iter_count(i); dout("sync_read on file %p %llu~%u %s\n", file, off, (unsigned)len, @@ -436,25 +436,26 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, if (file->f_flags & O_DIRECT) { while (iov_iter_count(i)) { - void __user *data = i->iov[0].iov_base + i->iov_offset; - size_t len = i->iov[0].iov_len - i->iov_offset; + size_t start; + ssize_t n; - num_pages = calc_pages_for((unsigned long)data, len); - pages = ceph_get_direct_page_vector(data, - num_pages, true); - if (IS_ERR(pages)) - return PTR_ERR(pages); + n = iov_iter_get_pages_alloc(i, &pages, INT_MAX, &start); + if (n < 0) + return n; - ret = striped_read(inode, off, len, + num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; + + ret = striped_read(inode, off, n, pages, num_pages, checkeof, - 1, (unsigned long)data & ~PAGE_MASK); + 1, start); + ceph_put_page_vector(pages, num_pages, true); if (ret <= 0) break; off += ret; iov_iter_advance(i, ret); - if (ret < len) + if (ret < n) break; } } else { @@ -466,25 +467,14 @@ static ssize_t ceph_sync_read(struct kiocb *iocb, struct iov_iter *i, num_pages, checkeof, 0, 0); if (ret > 0) { int l, k = 0; - size_t left = len = ret; + size_t left = ret; while (left) { - void __user *data = i->iov[0].iov_base - + i->iov_offset; - l = min(i->iov[0].iov_len - i->iov_offset, - left); - - ret = ceph_copy_page_vector_to_user(&pages[k], - data, off, - l); - if (ret > 0) { - iov_iter_advance(i, ret); - left -= ret; - off += ret; - k = calc_pages_for(iocb->ki_pos, - len - left + 1) - 1; - BUG_ON(k >= num_pages && left); - } else + int copy = min_t(size_t, PAGE_SIZE, left); + l = copy_page_to_iter(pages[k++], 0, copy, i); + off += l; + left -= l; + if (l < copy) break; } } @@ -541,8 +531,7 @@ static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe) * objects, rollback on failure, etc.) */ static ssize_t -ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, size_t count) +ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -556,11 +545,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, int written = 0; int flags; int check_caps = 0; - int page_align; int ret; struct timespec mtime = CURRENT_TIME; loff_t pos = iocb->ki_pos; - struct iov_iter i; + size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; @@ -582,13 +570,10 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE; - iov_iter_init(&i, iov, nr_segs, count, 0); - - while (iov_iter_count(&i) > 0) { - void __user *data = i.iov->iov_base + i.iov_offset; - u64 len = i.iov->iov_len - i.iov_offset; - - page_align = (unsigned long)data & ~PAGE_MASK; + while (iov_iter_count(from) > 0) { + u64 len = iov_iter_single_seg_count(from); + size_t start; + ssize_t n; snapc = ci->i_snap_realm->cached_context; vino = ceph_vino(inode); @@ -604,20 +589,21 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, break; } - num_pages = calc_pages_for(page_align, len); - pages = ceph_get_direct_page_vector(data, num_pages, false); - if (IS_ERR(pages)) { - ret = PTR_ERR(pages); - goto out; + n = iov_iter_get_pages_alloc(from, &pages, len, &start); + if (unlikely(n < 0)) { + ret = n; + ceph_osdc_put_request(req); + break; } + num_pages = (n + start + PAGE_SIZE - 1) / PAGE_SIZE; /* * throw out any page cache pages in this range. this * may block. */ truncate_inode_pages_range(inode->i_mapping, pos, - (pos+len) | (PAGE_CACHE_SIZE-1)); - osd_req_op_extent_osd_data_pages(req, 0, pages, len, page_align, + (pos+n) | (PAGE_CACHE_SIZE-1)); + osd_req_op_extent_osd_data_pages(req, 0, pages, n, start, false, false); /* BUG_ON(vino.snap != CEPH_NOSNAP); */ @@ -629,22 +615,20 @@ ceph_sync_direct_write(struct kiocb *iocb, const struct iovec *iov, ceph_put_page_vector(pages, num_pages, false); -out: ceph_osdc_put_request(req); - if (ret == 0) { - pos += len; - written += len; - iov_iter_advance(&i, (size_t)len); - - if (pos > i_size_read(inode)) { - check_caps = ceph_inode_set_size(inode, pos); - if (check_caps) - ceph_check_caps(ceph_inode(inode), - CHECK_CAPS_AUTHONLY, - NULL); - } - } else + if (ret) break; + pos += n; + written += n; + iov_iter_advance(from, n); + + if (pos > i_size_read(inode)) { + check_caps = ceph_inode_set_size(inode, pos); + if (check_caps) + ceph_check_caps(ceph_inode(inode), + CHECK_CAPS_AUTHONLY, + NULL); + } } if (ret != -EOLDSNAPC && written > 0) { @@ -662,8 +646,7 @@ out: * correct atomic write, we should e.g. take write locks on all * objects, rollback on failure, etc.) */ -static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, size_t count) +static ssize_t ceph_sync_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); @@ -681,7 +664,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, int ret; struct timespec mtime = CURRENT_TIME; loff_t pos = iocb->ki_pos; - struct iov_iter i; + size_t count = iov_iter_count(from); if (ceph_snap(file_inode(file)) != CEPH_NOSNAP) return -EROFS; @@ -703,9 +686,7 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK; - iov_iter_init(&i, iov, nr_segs, count, 0); - - while ((len = iov_iter_count(&i)) > 0) { + while ((len = iov_iter_count(from)) > 0) { size_t left; int n; @@ -737,13 +718,12 @@ static ssize_t ceph_sync_write(struct kiocb *iocb, const struct iovec *iov, left = len; for (n = 0; n < num_pages; n++) { size_t plen = min_t(size_t, left, PAGE_SIZE); - ret = iov_iter_copy_from_user(pages[n], &i, 0, plen); + ret = copy_page_from_iter(pages[n], 0, plen, from); if (ret != plen) { ret = -EFAULT; break; } left -= ret; - iov_iter_advance(&i, ret); } if (ret < 0) { @@ -796,8 +776,7 @@ out: * * Hmm, the sync read case isn't actually async... should it be? */ -static ssize_t ceph_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ceph_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct file *filp = iocb->ki_filp; struct ceph_file_info *fi = filp->private_data; @@ -823,40 +802,20 @@ again: if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_filp->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { - struct iov_iter i; dout("aio_sync_read %p %llx.%llx %llu~%u got cap refs on %s\n", inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); - if (!read) { - ret = generic_segment_checks(iov, &nr_segs, - &len, VERIFY_WRITE); - if (ret) - goto out; - } - - iov_iter_init(&i, iov, nr_segs, len, read); - /* hmm, this isn't really async... */ - ret = ceph_sync_read(iocb, &i, &checkeof); + ret = ceph_sync_read(iocb, to, &checkeof); } else { - /* - * We can't modify the content of iov, - * so we only read from beginning. - */ - if (read) { - iocb->ki_pos = pos; - len = iocb->ki_nbytes; - read = 0; - } dout("aio_read %p %llx.%llx %llu~%u got cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)len, + inode, ceph_vinop(inode), iocb->ki_pos, (unsigned)len, ceph_cap_string(got)); - ret = generic_file_aio_read(iocb, iov, nr_segs, pos); + ret = generic_file_read_iter(iocb, to); } -out: dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n", inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret); ceph_put_cap_refs(ci, got); @@ -872,6 +831,7 @@ out: ", reading more\n", iocb->ki_pos, inode->i_size); + iov_iter_advance(to, ret); read += ret; len -= ret; checkeof = 0; @@ -895,8 +855,7 @@ out: * * If we are near ENOSPC, write synchronously. */ -static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ceph_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct ceph_file_info *fi = file->private_data; @@ -904,18 +863,15 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, struct ceph_inode_info *ci = ceph_inode(inode); struct ceph_osd_client *osdc = &ceph_sb_to_client(inode->i_sb)->client->osdc; - ssize_t count, written = 0; + ssize_t count = iov_iter_count(from), written = 0; int err, want, got; + loff_t pos = iocb->ki_pos; if (ceph_snap(inode) != CEPH_NOSNAP) return -EROFS; mutex_lock(&inode->i_mutex); - err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (err) - goto out; - /* We can write back this queue in page reclaim */ current->backing_dev_info = file->f_mapping->backing_dev_info; @@ -925,6 +881,7 @@ static ssize_t ceph_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; + iov_iter_truncate(from, count); err = file_remove_suid(file); if (err) @@ -956,23 +913,26 @@ retry_snap: if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (file->f_flags & O_DIRECT) || (fi->flags & CEPH_F_SYNC)) { + struct iov_iter data; mutex_unlock(&inode->i_mutex); + /* we might need to revert back to that point */ + data = *from; if (file->f_flags & O_DIRECT) - written = ceph_sync_direct_write(iocb, iov, - nr_segs, count); + written = ceph_sync_direct_write(iocb, &data); else - written = ceph_sync_write(iocb, iov, nr_segs, count); + written = ceph_sync_write(iocb, &data); if (written == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u" "got EOLDSNAPC, retrying\n", inode, ceph_vinop(inode), - pos, (unsigned)iov->iov_len); + pos, (unsigned)count); mutex_lock(&inode->i_mutex); goto retry_snap; } + if (written > 0) + iov_iter_advance(from, written); } else { loff_t old_size = inode->i_size; - struct iov_iter from; /* * No need to acquire the i_truncate_mutex. Because * the MDS revokes Fwb caps before sending truncate @@ -980,8 +940,7 @@ retry_snap: * are pending vmtruncate. So write and vmtruncate * can not run at the same time */ - iov_iter_init(&from, iov, nr_segs, count, 0); - written = generic_perform_write(file, &from, pos); + written = generic_perform_write(file, from, pos); if (likely(written >= 0)) iocb->ki_pos = pos + written; if (inode->i_size > old_size) @@ -999,7 +958,7 @@ retry_snap: } dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", - inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, + inode, ceph_vinop(inode), pos, (unsigned)count, ceph_cap_string(got)); ceph_put_cap_refs(ci, got); @@ -1276,16 +1235,16 @@ const struct file_operations ceph_file_fops = { .open = ceph_open, .release = ceph_release, .llseek = ceph_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = ceph_aio_read, - .aio_write = ceph_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = ceph_read_iter, + .write_iter = ceph_write_iter, .mmap = ceph_mmap, .fsync = ceph_fsync, .lock = ceph_lock, .flock = ceph_flock, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .unlocked_ioctl = ceph_ioctl, .compat_ioctl = ceph_ioctl, .fallocate = ceph_fallocate, diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index e4fff9ff1c2..04c89c266ce 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -10,6 +10,7 @@ #include <linux/writeback.h> #include <linux/vmalloc.h> #include <linux/posix_acl.h> +#include <linux/random.h> #include "super.h" #include "mds_client.h" @@ -179,9 +180,8 @@ struct ceph_inode_frag *__ceph_find_frag(struct ceph_inode_info *ci, u32 f) * specified, copy the frag delegation info to the caller if * it is present. */ -u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, - struct ceph_inode_frag *pfrag, - int *found) +static u32 __ceph_choose_frag(struct ceph_inode_info *ci, u32 v, + struct ceph_inode_frag *pfrag, int *found) { u32 t = ceph_frag_make(0, 0); struct ceph_inode_frag *frag; @@ -191,7 +191,6 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, if (found) *found = 0; - mutex_lock(&ci->i_fragtree_mutex); while (1) { WARN_ON(!ceph_frag_contains_value(t, v)); frag = __ceph_find_frag(ci, t); @@ -220,10 +219,19 @@ u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, } dout("choose_frag(%x) = %x\n", v, t); - mutex_unlock(&ci->i_fragtree_mutex); return t; } +u32 ceph_choose_frag(struct ceph_inode_info *ci, u32 v, + struct ceph_inode_frag *pfrag, int *found) +{ + u32 ret; + mutex_lock(&ci->i_fragtree_mutex); + ret = __ceph_choose_frag(ci, v, pfrag, found); + mutex_unlock(&ci->i_fragtree_mutex); + return ret; +} + /* * Process dirfrag (delegation) info from the mds. Include leaf * fragment in tree ONLY if ndist > 0. Otherwise, only @@ -237,11 +245,17 @@ static int ceph_fill_dirfrag(struct inode *inode, u32 id = le32_to_cpu(dirinfo->frag); int mds = le32_to_cpu(dirinfo->auth); int ndist = le32_to_cpu(dirinfo->ndist); + int diri_auth = -1; int i; int err = 0; + spin_lock(&ci->i_ceph_lock); + if (ci->i_auth_cap) + diri_auth = ci->i_auth_cap->mds; + spin_unlock(&ci->i_ceph_lock); + mutex_lock(&ci->i_fragtree_mutex); - if (ndist == 0) { + if (ndist == 0 && mds == diri_auth) { /* no delegation info needed. */ frag = __ceph_find_frag(ci, id); if (!frag) @@ -286,6 +300,75 @@ out: return err; } +static int ceph_fill_fragtree(struct inode *inode, + struct ceph_frag_tree_head *fragtree, + struct ceph_mds_reply_dirfrag *dirinfo) +{ + struct ceph_inode_info *ci = ceph_inode(inode); + struct ceph_inode_frag *frag; + struct rb_node *rb_node; + int i; + u32 id, nsplits; + bool update = false; + + mutex_lock(&ci->i_fragtree_mutex); + nsplits = le32_to_cpu(fragtree->nsplits); + if (nsplits) { + i = prandom_u32() % nsplits; + id = le32_to_cpu(fragtree->splits[i].frag); + if (!__ceph_find_frag(ci, id)) + update = true; + } else if (!RB_EMPTY_ROOT(&ci->i_fragtree)) { + rb_node = rb_first(&ci->i_fragtree); + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + if (frag->frag != ceph_frag_make(0, 0) || rb_next(rb_node)) + update = true; + } + if (!update && dirinfo) { + id = le32_to_cpu(dirinfo->frag); + if (id != __ceph_choose_frag(ci, id, NULL, NULL)) + update = true; + } + if (!update) + goto out_unlock; + + dout("fill_fragtree %llx.%llx\n", ceph_vinop(inode)); + rb_node = rb_first(&ci->i_fragtree); + for (i = 0; i < nsplits; i++) { + id = le32_to_cpu(fragtree->splits[i].frag); + frag = NULL; + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + if (ceph_frag_compare(frag->frag, id) >= 0) { + if (frag->frag != id) + frag = NULL; + else + rb_node = rb_next(rb_node); + break; + } + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + frag = NULL; + } + if (!frag) { + frag = __get_or_create_frag(ci, id); + if (IS_ERR(frag)) + continue; + } + frag->split_by = le32_to_cpu(fragtree->splits[i].by); + dout(" frag %x split by %d\n", frag->frag, frag->split_by); + } + while (rb_node) { + frag = rb_entry(rb_node, struct ceph_inode_frag, node); + rb_node = rb_next(rb_node); + rb_erase(&frag->node, &ci->i_fragtree); + kfree(frag); + } +out_unlock: + mutex_unlock(&ci->i_fragtree_mutex); + return 0; +} /* * initialize a newly allocated inode. @@ -341,7 +424,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb) INIT_LIST_HEAD(&ci->i_cap_snaps); ci->i_head_snapc = NULL; ci->i_snap_caps = 0; - ci->i_cap_exporting_issued = 0; for (i = 0; i < CEPH_FILE_MODE_NUM; i++) ci->i_nr_by_mode[i] = 0; @@ -407,7 +489,7 @@ void ceph_destroy_inode(struct inode *inode) /* * we may still have a snap_realm reference if there are stray - * caps in i_cap_exporting_issued or i_snap_caps. + * caps in i_snap_caps. */ if (ci->i_snap_realm) { struct ceph_mds_client *mdsc = @@ -582,22 +664,26 @@ static int fill_inode(struct inode *inode, unsigned long ttl_from, int cap_fmode, struct ceph_cap_reservation *caps_reservation) { + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); - int i; - int issued = 0, implemented; + int issued = 0, implemented, new_issued; struct timespec mtime, atime, ctime; - u32 nsplits; - struct ceph_inode_frag *frag; - struct rb_node *rb_node; struct ceph_buffer *xattr_blob = NULL; + struct ceph_cap *new_cap = NULL; int err = 0; - int queue_trunc = 0; + bool wake = false; + bool queue_trunc = false; + bool new_version = false; dout("fill_inode %p ino %llx.%llx v %llu had %llu\n", inode, ceph_vinop(inode), le64_to_cpu(info->version), ci->i_version); + /* prealloc new cap struct */ + if (info->cap.caps && ceph_snap(inode) == CEPH_NOSNAP) + new_cap = ceph_get_cap(mdsc, caps_reservation); + /* * prealloc xattr data, if it looks like we'll need it. only * if len > 4 (meaning there are actually xattrs; the first 4 @@ -623,19 +709,23 @@ static int fill_inode(struct inode *inode, * 3 2 skip * 3 3 update */ - if (le64_to_cpu(info->version) > 0 && - (ci->i_version & ~1) >= le64_to_cpu(info->version)) - goto no_change; - + if (ci->i_version == 0 || + ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + le64_to_cpu(info->version) > (ci->i_version & ~1))) + new_version = true; + issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); + new_issued = ~issued & le32_to_cpu(info->cap.caps); /* update inode */ ci->i_version = le64_to_cpu(info->version); inode->i_version++; inode->i_rdev = le32_to_cpu(info->rdev); + inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; - if ((issued & CEPH_CAP_AUTH_EXCL) == 0) { + if ((new_version || (new_issued & CEPH_CAP_AUTH_SHARED)) && + (issued & CEPH_CAP_AUTH_EXCL) == 0) { inode->i_mode = le32_to_cpu(info->mode); inode->i_uid = make_kuid(&init_user_ns, le32_to_cpu(info->uid)); inode->i_gid = make_kgid(&init_user_ns, le32_to_cpu(info->gid)); @@ -644,23 +734,35 @@ static int fill_inode(struct inode *inode, from_kgid(&init_user_ns, inode->i_gid)); } - if ((issued & CEPH_CAP_LINK_EXCL) == 0) + if ((new_version || (new_issued & CEPH_CAP_LINK_SHARED)) && + (issued & CEPH_CAP_LINK_EXCL) == 0) set_nlink(inode, le32_to_cpu(info->nlink)); - /* be careful with mtime, atime, size */ - ceph_decode_timespec(&atime, &info->atime); - ceph_decode_timespec(&mtime, &info->mtime); - ceph_decode_timespec(&ctime, &info->ctime); - queue_trunc = ceph_fill_file_size(inode, issued, - le32_to_cpu(info->truncate_seq), - le64_to_cpu(info->truncate_size), - le64_to_cpu(info->size)); - ceph_fill_file_time(inode, issued, - le32_to_cpu(info->time_warp_seq), - &ctime, &mtime, &atime); - - ci->i_layout = info->layout; - inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1; + if (new_version || (new_issued & CEPH_CAP_ANY_RD)) { + /* be careful with mtime, atime, size */ + ceph_decode_timespec(&atime, &info->atime); + ceph_decode_timespec(&mtime, &info->mtime); + ceph_decode_timespec(&ctime, &info->ctime); + ceph_fill_file_time(inode, issued, + le32_to_cpu(info->time_warp_seq), + &ctime, &mtime, &atime); + } + + if (new_version || + (new_issued & (CEPH_CAP_ANY_FILE_RD | CEPH_CAP_ANY_FILE_WR))) { + ci->i_layout = info->layout; + queue_trunc = ceph_fill_file_size(inode, issued, + le32_to_cpu(info->truncate_seq), + le64_to_cpu(info->truncate_size), + le64_to_cpu(info->size)); + /* only update max_size on auth cap */ + if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && + ci->i_max_size != le64_to_cpu(info->max_size)) { + dout("max_size %lld -> %llu\n", ci->i_max_size, + le64_to_cpu(info->max_size)); + ci->i_max_size = le64_to_cpu(info->max_size); + } + } /* xattrs */ /* note that if i_xattrs.len <= 4, i_xattrs.data will still be NULL. */ @@ -745,58 +847,6 @@ static int fill_inode(struct inode *inode, dout(" marking %p complete (empty)\n", inode); __ceph_dir_set_complete(ci, atomic_read(&ci->i_release_count)); } -no_change: - /* only update max_size on auth cap */ - if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) && - ci->i_max_size != le64_to_cpu(info->max_size)) { - dout("max_size %lld -> %llu\n", ci->i_max_size, - le64_to_cpu(info->max_size)); - ci->i_max_size = le64_to_cpu(info->max_size); - } - - spin_unlock(&ci->i_ceph_lock); - - /* queue truncate if we saw i_size decrease */ - if (queue_trunc) - ceph_queue_vmtruncate(inode); - - /* populate frag tree */ - /* FIXME: move me up, if/when version reflects fragtree changes */ - nsplits = le32_to_cpu(info->fragtree.nsplits); - mutex_lock(&ci->i_fragtree_mutex); - rb_node = rb_first(&ci->i_fragtree); - for (i = 0; i < nsplits; i++) { - u32 id = le32_to_cpu(info->fragtree.splits[i].frag); - frag = NULL; - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - if (ceph_frag_compare(frag->frag, id) >= 0) { - if (frag->frag != id) - frag = NULL; - else - rb_node = rb_next(rb_node); - break; - } - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - frag = NULL; - } - if (!frag) { - frag = __get_or_create_frag(ci, id); - if (IS_ERR(frag)) - continue; - } - frag->split_by = le32_to_cpu(info->fragtree.splits[i].by); - dout(" frag %x split by %d\n", frag->frag, frag->split_by); - } - while (rb_node) { - frag = rb_entry(rb_node, struct ceph_inode_frag, node); - rb_node = rb_next(rb_node); - rb_erase(&frag->node, &ci->i_fragtree); - kfree(frag); - } - mutex_unlock(&ci->i_fragtree_mutex); /* were we issued a capability? */ if (info->cap.caps) { @@ -809,30 +859,41 @@ no_change: le32_to_cpu(info->cap.seq), le32_to_cpu(info->cap.mseq), le64_to_cpu(info->cap.realm), - info->cap.flags, - caps_reservation); + info->cap.flags, &new_cap); + wake = true; } else { - spin_lock(&ci->i_ceph_lock); dout(" %p got snap_caps %s\n", inode, ceph_cap_string(le32_to_cpu(info->cap.caps))); ci->i_snap_caps |= le32_to_cpu(info->cap.caps); if (cap_fmode >= 0) __ceph_get_fmode(ci, cap_fmode); - spin_unlock(&ci->i_ceph_lock); } } else if (cap_fmode >= 0) { pr_warn("mds issued no caps on %llx.%llx\n", ceph_vinop(inode)); __ceph_get_fmode(ci, cap_fmode); } + spin_unlock(&ci->i_ceph_lock); + + if (wake) + wake_up_all(&ci->i_cap_wq); + + /* queue truncate if we saw i_size decrease */ + if (queue_trunc) + ceph_queue_vmtruncate(inode); + + /* populate frag tree */ + if (S_ISDIR(inode->i_mode)) + ceph_fill_fragtree(inode, &info->fragtree, dirinfo); /* update delegation info? */ if (dirinfo) ceph_fill_dirfrag(inode, dirinfo); err = 0; - out: + if (new_cap) + ceph_put_cap(mdsc, new_cap); if (xattr_blob) ceph_buffer_put(xattr_blob); return err; @@ -1485,7 +1546,7 @@ static void ceph_invalidate_work(struct work_struct *work) orig_gen = ci->i_rdcache_gen; spin_unlock(&ci->i_ceph_lock); - truncate_inode_pages(inode->i_mapping, 0); + truncate_pagecache(inode, 0); spin_lock(&ci->i_ceph_lock); if (orig_gen == ci->i_rdcache_gen && @@ -1588,7 +1649,7 @@ retry: ci->i_truncate_pending, to); spin_unlock(&ci->i_ceph_lock); - truncate_inode_pages(inode->i_mapping, to); + truncate_pagecache(inode, to); spin_lock(&ci->i_ceph_lock); if (to == ci->i_truncate_size) { diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 9a33b98cb00..92a2548278f 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1558,6 +1558,8 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode) init_completion(&req->r_safe_completion); INIT_LIST_HEAD(&req->r_unsafe_item); + req->r_stamp = CURRENT_TIME; + req->r_op = op; req->r_direct_mode = mode; return req; @@ -1783,7 +1785,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, } len = sizeof(*head) + - pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)); + pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + + sizeof(struct timespec); /* calculate (max) length for cap releases */ len += sizeof(struct ceph_mds_request_release) * @@ -1800,6 +1803,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, goto out_free2; } + msg->hdr.version = 2; msg->hdr.tid = cpu_to_le64(req->r_tid); head = msg->front.iov_base; @@ -1836,6 +1840,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc, mds, req->r_old_inode_drop, req->r_old_inode_unless, 0); head->num_releases = cpu_to_le16(releases); + /* time stamp */ + ceph_encode_copy(&p, &req->r_stamp, sizeof(req->r_stamp)); + BUG_ON(p > end); msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index e90cfccf93b..e00737cf523 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -194,6 +194,7 @@ struct ceph_mds_request { int r_fmode; /* file mode, if expecting cap */ kuid_t r_uid; kgid_t r_gid; + struct timespec r_stamp; /* for choosing which mds to send this request to */ int r_direct_mode; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index ead05cc1f44..12b20744e38 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -292,7 +292,6 @@ struct ceph_inode_info { struct ceph_snap_context *i_head_snapc; /* set if wr_buffer_head > 0 or dirty|flushing caps */ unsigned i_snap_caps; /* cap bits for snapped files */ - unsigned i_cap_exporting_issued; int i_nr_by_mode[CEPH_FILE_MODE_NUM]; /* open file counts */ @@ -775,11 +774,13 @@ static inline void ceph_forget_all_cached_acls(struct inode *inode) extern const char *ceph_cap_string(int c); extern void ceph_handle_caps(struct ceph_mds_session *session, struct ceph_msg *msg); -extern int ceph_add_cap(struct inode *inode, - struct ceph_mds_session *session, u64 cap_id, - int fmode, unsigned issued, unsigned wanted, - unsigned cap, unsigned seq, u64 realmino, int flags, - struct ceph_cap_reservation *caps_reservation); +extern struct ceph_cap *ceph_get_cap(struct ceph_mds_client *mdsc, + struct ceph_cap_reservation *ctx); +extern void ceph_add_cap(struct inode *inode, + struct ceph_mds_session *session, u64 cap_id, + int fmode, unsigned issued, unsigned wanted, + unsigned cap, unsigned seq, u64 realmino, int flags, + struct ceph_cap **new_cap); extern void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release); extern void ceph_put_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap); diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 6aaa8112c53..2c90d07c0b3 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -725,8 +725,7 @@ out_nls: goto out; } -static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t cifs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); @@ -737,14 +736,14 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (written) return written; - written = generic_file_aio_write(iocb, iov, nr_segs, pos); + written = generic_file_write_iter(iocb, from); if (CIFS_CACHE_WRITE(CIFS_I(inode))) goto out; rc = filemap_fdatawrite(inode->i_mapping); if (rc) - cifs_dbg(FYI, "cifs_file_aio_write: %d rc on %p inode\n", + cifs_dbg(FYI, "cifs_file_write_iter: %d rc on %p inode\n", rc, inode); out: @@ -880,10 +879,10 @@ const struct inode_operations cifs_symlink_inode_ops = { }; const struct file_operations cifs_file_ops = { - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = cifs_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = cifs_file_write_iter, .open = cifs_open, .release = cifs_close, .lock = cifs_lock, @@ -899,10 +898,10 @@ const struct file_operations cifs_file_ops = { }; const struct file_operations cifs_file_strict_ops = { - .read = do_sync_read, - .write = do_sync_write, - .aio_read = cifs_strict_readv, - .aio_write = cifs_strict_writev, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = cifs_strict_readv, + .write_iter = cifs_strict_writev, .open = cifs_open, .release = cifs_close, .lock = cifs_lock, @@ -919,10 +918,10 @@ const struct file_operations cifs_file_strict_ops = { const struct file_operations cifs_file_direct_ops = { /* BB reevaluate whether they can be done with directio, no cache */ - .read = do_sync_read, - .write = do_sync_write, - .aio_read = cifs_user_readv, - .aio_write = cifs_user_writev, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = cifs_user_readv, + .write_iter = cifs_user_writev, .open = cifs_open, .release = cifs_close, .lock = cifs_lock, @@ -938,10 +937,10 @@ const struct file_operations cifs_file_direct_ops = { }; const struct file_operations cifs_file_nobrl_ops = { - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = cifs_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = cifs_file_write_iter, .open = cifs_open, .release = cifs_close, .fsync = cifs_fsync, @@ -956,10 +955,10 @@ const struct file_operations cifs_file_nobrl_ops = { }; const struct file_operations cifs_file_strict_nobrl_ops = { - .read = do_sync_read, - .write = do_sync_write, - .aio_read = cifs_strict_readv, - .aio_write = cifs_strict_writev, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = cifs_strict_readv, + .write_iter = cifs_strict_writev, .open = cifs_open, .release = cifs_close, .fsync = cifs_strict_fsync, @@ -975,10 +974,10 @@ const struct file_operations cifs_file_strict_nobrl_ops = { const struct file_operations cifs_file_direct_nobrl_ops = { /* BB reevaluate whether they can be done with directio, no cache */ - .read = do_sync_read, - .write = do_sync_write, - .aio_read = cifs_user_readv, - .aio_write = cifs_user_writev, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = cifs_user_readv, + .write_iter = cifs_user_writev, .open = cifs_open, .release = cifs_close, .fsync = cifs_fsync, diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 8fe51166d6e..70f178a7c75 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -95,14 +95,10 @@ extern const struct file_operations cifs_file_strict_nobrl_ops; extern int cifs_open(struct inode *inode, struct file *file); extern int cifs_close(struct inode *inode, struct file *file); extern int cifs_closedir(struct inode *inode, struct file *file); -extern ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -extern ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -extern ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); -extern ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos); +extern ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to); +extern ssize_t cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to); +extern ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from); +extern ssize_t cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from); extern int cifs_lock(struct file *, int, struct file_lock *); extern int cifs_fsync(struct file *, loff_t, loff_t, int); extern int cifs_strict_fsync(struct file *, loff_t, loff_t, int); diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 208f56eca4b..e90a1e9aa62 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2385,14 +2385,12 @@ cifs_uncached_retry_writev(struct cifs_writedata *wdata) } static ssize_t -cifs_iovec_write(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *poffset) +cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset) { unsigned long nr_pages, i; size_t bytes, copied, len, cur_len; ssize_t total_written = 0; loff_t offset; - struct iov_iter it; struct cifsFileInfo *open_file; struct cifs_tcon *tcon; struct cifs_sb_info *cifs_sb; @@ -2401,14 +2399,16 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, int rc; pid_t pid; - len = iov_length(iov, nr_segs); - if (!len) - return 0; - + len = iov_iter_count(from); rc = generic_write_checks(file, poffset, &len, 0); if (rc) return rc; + if (!len) + return 0; + + iov_iter_truncate(from, len); + INIT_LIST_HEAD(&wdata_list); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); open_file = file->private_data; @@ -2424,7 +2424,6 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, else pid = current->tgid; - iov_iter_init(&it, iov, nr_segs, len, 0); do { size_t save_len; @@ -2444,11 +2443,10 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, save_len = cur_len; for (i = 0; i < nr_pages; i++) { - bytes = min_t(const size_t, cur_len, PAGE_SIZE); - copied = iov_iter_copy_from_user(wdata->pages[i], &it, - 0, bytes); + bytes = min_t(size_t, cur_len, PAGE_SIZE); + copied = copy_page_from_iter(wdata->pages[i], 0, bytes, + from); cur_len -= copied; - iov_iter_advance(&it, copied); /* * If we didn't copy as much as we expected, then that * may mean we trod into an unmapped area. Stop copying @@ -2546,11 +2544,11 @@ restart_loop: return total_written ? total_written : (ssize_t)rc; } -ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from) { ssize_t written; struct inode *inode; + loff_t pos = iocb->ki_pos; inode = file_inode(iocb->ki_filp); @@ -2560,7 +2558,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, * write request. */ - written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos); + written = cifs_iovec_write(iocb->ki_filp, from, &pos); if (written > 0) { set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags); iocb->ki_pos = pos; @@ -2570,8 +2568,7 @@ ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov, } static ssize_t -cifs_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +cifs_writev(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data; @@ -2589,10 +2586,10 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, mutex_lock(&inode->i_mutex); if (file->f_flags & O_APPEND) lock_pos = i_size_read(inode); - if (!cifs_find_lock_conflict(cfile, lock_pos, iov_length(iov, nr_segs), + if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from), server->vals->exclusive_lock_type, NULL, CIFS_WRITE_OP)) { - rc = __generic_file_aio_write(iocb, iov, nr_segs); + rc = __generic_file_write_iter(iocb, from); mutex_unlock(&inode->i_mutex); if (rc > 0) { @@ -2610,8 +2607,7 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov, } ssize_t -cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); @@ -2629,11 +2625,10 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) { - written = generic_file_aio_write( - iocb, iov, nr_segs, pos); + written = generic_file_write_iter(iocb, from); goto out; } - written = cifs_writev(iocb, iov, nr_segs, pos); + written = cifs_writev(iocb, from); goto out; } /* @@ -2642,7 +2637,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov, * affected pages because it may cause a error with mandatory locks on * these pages but not on the region from pos to ppos+len-1. */ - written = cifs_user_writev(iocb, iov, nr_segs, pos); + written = cifs_user_writev(iocb, from); if (written > 0 && CIFS_CACHE_READ(cinode)) { /* * Windows 7 server can delay breaking level2 oplock if a write @@ -2831,32 +2826,25 @@ cifs_uncached_read_into_pages(struct TCP_Server_Info *server, return total_read > 0 ? total_read : result; } -ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to) { struct file *file = iocb->ki_filp; ssize_t rc; size_t len, cur_len; ssize_t total_read = 0; - loff_t offset = pos; + loff_t offset = iocb->ki_pos; unsigned int npages; struct cifs_sb_info *cifs_sb; struct cifs_tcon *tcon; struct cifsFileInfo *open_file; struct cifs_readdata *rdata, *tmp; struct list_head rdata_list; - struct iov_iter to; pid_t pid; - if (!nr_segs) - return 0; - - len = iov_length(iov, nr_segs); + len = iov_iter_count(to); if (!len) return 0; - iov_iter_init(&to, iov, nr_segs, len, 0); - INIT_LIST_HEAD(&rdata_list); cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); open_file = file->private_data; @@ -2914,7 +2902,7 @@ error: if (!list_empty(&rdata_list)) rc = 0; - len = iov_iter_count(&to); + len = iov_iter_count(to); /* the loop below should proceed in the order of increasing offsets */ list_for_each_entry_safe(rdata, tmp, &rdata_list, list) { again: @@ -2931,7 +2919,7 @@ error: goto again; } } else { - rc = cifs_readdata_to_iov(rdata, &to); + rc = cifs_readdata_to_iov(rdata, to); } } @@ -2939,7 +2927,7 @@ error: kref_put(&rdata->refcount, cifs_uncached_readdata_release); } - total_read = len - iov_iter_count(&to); + total_read = len - iov_iter_count(to); cifs_stats_bytes_read(tcon, total_read); @@ -2948,15 +2936,14 @@ error: rc = 0; if (total_read) { - iocb->ki_pos = pos + total_read; + iocb->ki_pos += total_read; return total_read; } return rc; } ssize_t -cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); struct cifsInodeInfo *cinode = CIFS_I(inode); @@ -2975,22 +2962,22 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov, * pos+len-1. */ if (!CIFS_CACHE_READ(cinode)) - return cifs_user_readv(iocb, iov, nr_segs, pos); + return cifs_user_readv(iocb, to); if (cap_unix(tcon->ses) && (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) - return generic_file_aio_read(iocb, iov, nr_segs, pos); + return generic_file_read_iter(iocb, to); /* * We need to hold the sem to be sure nobody modifies lock list * with a brlock that prevents reading. */ down_read(&cinode->lock_sem); - if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs), + if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to), tcon->ses->server->vals->shared_lock_type, NULL, CIFS_READ_OP)) - rc = generic_file_aio_read(iocb, iov, nr_segs, pos); + rc = generic_file_read_iter(iocb, to); up_read(&cinode->lock_sem); return rc; } @@ -3703,8 +3690,8 @@ void cifs_oplock_break(struct work_struct *work) * Direct IO is not yet supported in the cached mode. */ static ssize_t -cifs_direct_io(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t pos, unsigned long nr_segs) +cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t pos) { /* * FIXME diff --git a/fs/dcache.c b/fs/dcache.c index 1792d6075b4..06f65857a85 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -532,10 +532,12 @@ static inline struct dentry *lock_parent(struct dentry *dentry) struct dentry *parent = dentry->d_parent; if (IS_ROOT(dentry)) return NULL; + if (unlikely((int)dentry->d_lockref.count < 0)) + return NULL; if (likely(spin_trylock(&parent->d_lock))) return parent; - spin_unlock(&dentry->d_lock); rcu_read_lock(); + spin_unlock(&dentry->d_lock); again: parent = ACCESS_ONCE(dentry->d_parent); spin_lock(&parent->d_lock); diff --git a/fs/direct-io.c b/fs/direct-io.c index 31ba0935e32..98040ba388a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -77,7 +77,6 @@ struct dio_submit { unsigned blocks_available; /* At block_in_file. changes */ int reap_counter; /* rate limit reaping */ sector_t final_block_in_request;/* doesn't change */ - unsigned first_block_in_page; /* doesn't change, Used only once */ int boundary; /* prev block is at a boundary */ get_block_t *get_block; /* block mapping function */ dio_submit_t *submit_io; /* IO submition function */ @@ -98,19 +97,14 @@ struct dio_submit { sector_t cur_page_block; /* Where it starts */ loff_t cur_page_fs_offset; /* Offset in file */ - /* - * Page fetching state. These variables belong to dio_refill_pages(). - */ - int curr_page; /* changes */ - int total_pages; /* doesn't change */ - unsigned long curr_user_address;/* changes */ - + struct iov_iter *iter; /* * Page queue. These variables belong to dio_refill_pages() and * dio_get_page(). */ unsigned head; /* next page to process */ unsigned tail; /* last valid page + 1 */ + size_t from, to; }; /* dio_state communicated between submission path and end_io */ @@ -163,15 +157,10 @@ static inline unsigned dio_pages_present(struct dio_submit *sdio) */ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) { - int ret; - int nr_pages; + ssize_t ret; - nr_pages = min(sdio->total_pages - sdio->curr_page, DIO_PAGES); - ret = get_user_pages_fast( - sdio->curr_user_address, /* Where from? */ - nr_pages, /* How many pages? */ - dio->rw == READ, /* Write to memory? */ - &dio->pages[0]); /* Put results here */ + ret = iov_iter_get_pages(sdio->iter, dio->pages, DIO_PAGES * PAGE_SIZE, + &sdio->from); if (ret < 0 && sdio->blocks_available && (dio->rw & WRITE)) { struct page *page = ZERO_PAGE(0); @@ -186,18 +175,19 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio) dio->pages[0] = page; sdio->head = 0; sdio->tail = 1; - ret = 0; - goto out; + sdio->from = 0; + sdio->to = PAGE_SIZE; + return 0; } if (ret >= 0) { - sdio->curr_user_address += ret * PAGE_SIZE; - sdio->curr_page += ret; + iov_iter_advance(sdio->iter, ret); + ret += sdio->from; sdio->head = 0; - sdio->tail = ret; - ret = 0; + sdio->tail = (ret + PAGE_SIZE - 1) / PAGE_SIZE; + sdio->to = ((ret - 1) & (PAGE_SIZE - 1)) + 1; + return 0; } -out: return ret; } @@ -208,8 +198,9 @@ out: * L1 cache. */ static inline struct page *dio_get_page(struct dio *dio, - struct dio_submit *sdio) + struct dio_submit *sdio, size_t *from, size_t *to) { + int n; if (dio_pages_present(sdio) == 0) { int ret; @@ -218,7 +209,10 @@ static inline struct page *dio_get_page(struct dio *dio, return ERR_PTR(ret); BUG_ON(dio_pages_present(sdio) == 0); } - return dio->pages[sdio->head++]; + n = sdio->head++; + *from = n ? 0 : sdio->from; + *to = (n == sdio->tail - 1) ? sdio->to : PAGE_SIZE; + return dio->pages[n]; } /** @@ -422,8 +416,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) */ static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio) { - while (dio_pages_present(sdio)) - page_cache_release(dio_get_page(dio, sdio)); + while (sdio->head < sdio->tail) + page_cache_release(dio->pages[sdio->head++]); } /* @@ -912,23 +906,18 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio, struct buffer_head *map_bh) { const unsigned blkbits = sdio->blkbits; - const unsigned blocks_per_page = PAGE_SIZE >> blkbits; - struct page *page; - unsigned block_in_page; int ret = 0; - /* The I/O can start at any block offset within the first page */ - block_in_page = sdio->first_block_in_page; - while (sdio->block_in_file < sdio->final_block_in_request) { - page = dio_get_page(dio, sdio); + struct page *page; + size_t from, to; + page = dio_get_page(dio, sdio, &from, &to); if (IS_ERR(page)) { ret = PTR_ERR(page); goto out; } - while (block_in_page < blocks_per_page) { - unsigned offset_in_page = block_in_page << blkbits; + while (from < to) { unsigned this_chunk_bytes; /* # of bytes mapped */ unsigned this_chunk_blocks; /* # of blocks */ unsigned u; @@ -999,10 +988,10 @@ do_holes: page_cache_release(page); goto out; } - zero_user(page, block_in_page << blkbits, - 1 << blkbits); + zero_user(page, from, 1 << blkbits); sdio->block_in_file++; - block_in_page++; + from += 1 << blkbits; + dio->result += 1 << blkbits; goto next_block; } @@ -1019,7 +1008,7 @@ do_holes: * can add to this page */ this_chunk_blocks = sdio->blocks_available; - u = (PAGE_SIZE - offset_in_page) >> blkbits; + u = (to - from) >> blkbits; if (this_chunk_blocks > u) this_chunk_blocks = u; u = sdio->final_block_in_request - sdio->block_in_file; @@ -1031,7 +1020,7 @@ do_holes: if (this_chunk_blocks == sdio->blocks_available) sdio->boundary = buffer_boundary(map_bh); ret = submit_page_section(dio, sdio, page, - offset_in_page, + from, this_chunk_bytes, sdio->next_block_for_io, map_bh); @@ -1042,7 +1031,8 @@ do_holes: sdio->next_block_for_io += this_chunk_blocks; sdio->block_in_file += this_chunk_blocks; - block_in_page += this_chunk_blocks; + from += this_chunk_bytes; + dio->result += this_chunk_bytes; sdio->blocks_available -= this_chunk_blocks; next_block: BUG_ON(sdio->block_in_file > sdio->final_block_in_request); @@ -1052,7 +1042,6 @@ next_block: /* Drop the ref which was taken in get_user_pages() */ page_cache_release(page); - block_in_page = 0; } out: return ret; @@ -1107,24 +1096,20 @@ static inline int drop_refcount(struct dio *dio) */ static inline ssize_t do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + struct block_device *bdev, struct iov_iter *iter, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { - int seg; - size_t size; - unsigned long addr; unsigned i_blkbits = ACCESS_ONCE(inode->i_blkbits); unsigned blkbits = i_blkbits; unsigned blocksize_mask = (1 << blkbits) - 1; ssize_t retval = -EINVAL; - loff_t end = offset; + loff_t end = offset + iov_iter_count(iter); struct dio *dio; struct dio_submit sdio = { 0, }; - unsigned long user_addr; - size_t bytes; struct buffer_head map_bh = { 0, }; struct blk_plug plug; + unsigned long align = offset | iov_iter_alignment(iter); if (rw & WRITE) rw = WRITE_ODIRECT; @@ -1134,32 +1119,16 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, * the early prefetch in the caller enough time. */ - if (offset & blocksize_mask) { + if (align & blocksize_mask) { if (bdev) blkbits = blksize_bits(bdev_logical_block_size(bdev)); blocksize_mask = (1 << blkbits) - 1; - if (offset & blocksize_mask) + if (align & blocksize_mask) goto out; } - /* Check the memory alignment. Blocks cannot straddle pages */ - for (seg = 0; seg < nr_segs; seg++) { - addr = (unsigned long)iov[seg].iov_base; - size = iov[seg].iov_len; - end += size; - if (unlikely((addr & blocksize_mask) || - (size & blocksize_mask))) { - if (bdev) - blkbits = blksize_bits( - bdev_logical_block_size(bdev)); - blocksize_mask = (1 << blkbits) - 1; - if ((addr & blocksize_mask) || (size & blocksize_mask)) - goto out; - } - } - /* watch out for a 0 len io from a tricksy fs */ - if (rw == READ && end == offset) + if (rw == READ && !iov_iter_count(iter)) return 0; dio = kmem_cache_alloc(dio_cache, GFP_KERNEL); @@ -1249,6 +1218,10 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, spin_lock_init(&dio->bio_lock); dio->refcount = 1; + sdio.iter = iter; + sdio.final_block_in_request = + (offset + iov_iter_count(iter)) >> blkbits; + /* * In case of non-aligned buffers, we may need 2 more * pages since we need to zero out first and last block. @@ -1256,47 +1229,13 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, if (unlikely(sdio.blkfactor)) sdio.pages_in_io = 2; - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; - sdio.pages_in_io += - ((user_addr + iov[seg].iov_len + PAGE_SIZE-1) / - PAGE_SIZE - user_addr / PAGE_SIZE); - } + sdio.pages_in_io += iov_iter_npages(iter, INT_MAX); blk_start_plug(&plug); - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; - sdio.size += bytes = iov[seg].iov_len; - - /* Index into the first page of the first block */ - sdio.first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; - sdio.final_block_in_request = sdio.block_in_file + - (bytes >> blkbits); - /* Page fetching state */ - sdio.head = 0; - sdio.tail = 0; - sdio.curr_page = 0; - - sdio.total_pages = 0; - if (user_addr & (PAGE_SIZE-1)) { - sdio.total_pages++; - bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); - } - sdio.total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; - sdio.curr_user_address = user_addr; - - retval = do_direct_IO(dio, &sdio, &map_bh); - - dio->result += iov[seg].iov_len - - ((sdio.final_block_in_request - sdio.block_in_file) << - blkbits); - - if (retval) { - dio_cleanup(dio, &sdio); - break; - } - } /* end iovec loop */ + retval = do_direct_IO(dio, &sdio, &map_bh); + if (retval) + dio_cleanup(dio, &sdio); if (retval == -ENOTBLK) { /* @@ -1365,8 +1304,8 @@ out: ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + struct block_device *bdev, struct iov_iter *iter, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, dio_submit_t submit_io, int flags) { /* @@ -1381,9 +1320,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, prefetch(bdev->bd_queue); prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES); - return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, - submit_io, flags); + return do_blockdev_direct_IO(rw, iocb, inode, bdev, iter, offset, + get_block, end_io, submit_io, flags); } EXPORT_SYMBOL(__blockdev_direct_IO); diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 1e5b4535950..d08e079ea5d 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -617,6 +617,11 @@ static void retry_failed_sctp_send(struct connection *recv_con, int nodeid = sn_send_failed->ssf_info.sinfo_ppid; log_print("Retry sending %d bytes to node id %d", len, nodeid); + + if (!nodeid) { + log_print("Shouldn't resend data via listening connection."); + return; + } con = nodeid2con(nodeid, 0); if (!con) { diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c index b1eaa7a1f82..db0fad3269c 100644 --- a/fs/ecryptfs/file.c +++ b/fs/ecryptfs/file.c @@ -45,14 +45,13 @@ * The function to be used for directory reads is ecryptfs_read. */ static ssize_t ecryptfs_read_update_atime(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, loff_t pos) + struct iov_iter *to) { ssize_t rc; struct path *path; struct file *file = iocb->ki_filp; - rc = generic_file_aio_read(iocb, iov, nr_segs, pos); + rc = generic_file_read_iter(iocb, to); /* * Even though this is a async interface, we need to wait * for IO to finish to update atime @@ -352,10 +351,10 @@ const struct file_operations ecryptfs_dir_fops = { const struct file_operations ecryptfs_main_fops = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = ecryptfs_read_update_atime, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = ecryptfs_read_update_atime, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .iterate = ecryptfs_readdir, .unlocked_ioctl = ecryptfs_unlocked_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/exec.c b/fs/exec.c index 238b7aa26f6..a3d33fe592d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1046,13 +1046,13 @@ EXPORT_SYMBOL_GPL(get_task_comm); * so that a new one can be started */ -void set_task_comm(struct task_struct *tsk, const char *buf) +void __set_task_comm(struct task_struct *tsk, const char *buf, bool exec) { task_lock(tsk); trace_task_rename(tsk, buf); strlcpy(tsk->comm, buf, sizeof(tsk->comm)); task_unlock(tsk); - perf_event_comm(tsk); + perf_event_comm(tsk, exec); } int flush_old_exec(struct linux_binprm * bprm) @@ -1110,7 +1110,8 @@ void setup_new_exec(struct linux_binprm * bprm) else set_dumpable(current->mm, suid_dumpable); - set_task_comm(current, kbasename(bprm->filename)); + perf_event_exec(); + __set_task_comm(current, kbasename(bprm->filename), true); /* Set the new mm task size. We have to do that late because it may * depend on TIF_32BIT which is only updated in flush_thread() on diff --git a/fs/exofs/file.c b/fs/exofs/file.c index 491c6c078e7..71bf8e4fb5d 100644 --- a/fs/exofs/file.c +++ b/fs/exofs/file.c @@ -67,17 +67,17 @@ static int exofs_flush(struct file *file, fl_owner_t id) const struct file_operations exofs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = generic_file_open, .release = exofs_release_file, .fsync = exofs_file_fsync, .flush = exofs_flush, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; const struct inode_operations exofs_file_inode_operations = { diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index d1c244d6766..3f9cafd7393 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -964,7 +964,7 @@ static void exofs_invalidatepage(struct page *page, unsigned int offset, /* TODO: Should be easy enough to do proprly */ static ssize_t exofs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { return 0; } diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 44c36e59076..7c87b22a722 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -62,10 +62,10 @@ int ext2_fsync(struct file *file, loff_t start, loff_t end, int datasync) */ const struct file_operations ext2_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .unlocked_ioctl = ext2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext2_compat_ioctl, @@ -75,7 +75,7 @@ const struct file_operations ext2_file_operations = { .release = ext2_release_file, .fsync = ext2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; #ifdef CONFIG_EXT2_FS_XIP diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index b1d2a4675d4..36d35c36311 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -850,18 +850,18 @@ static sector_t ext2_bmap(struct address_space *mapping, sector_t block) } static ssize_t -ext2_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +ext2_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - ext2_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext2_get_block); if (ret < 0 && (rw & WRITE)) - ext2_write_failed(mapping, offset + iov_length(iov, nr_segs)); + ext2_write_failed(mapping, offset + count); return ret; } diff --git a/fs/ext3/file.c b/fs/ext3/file.c index aad05311392..a062fa1e1b1 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -50,10 +50,10 @@ static int ext3_release_file (struct inode * inode, struct file * filp) const struct file_operations ext3_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .unlocked_ioctl = ext3_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext3_compat_ioctl, @@ -63,7 +63,7 @@ const struct file_operations ext3_file_operations = { .release = ext3_release_file, .fsync = ext3_sync_file, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; const struct inode_operations ext3_file_inode_operations = { diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 695abe738a2..2c6ccc49ba2 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -1821,8 +1821,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait) * VFS code falls back into buffered path in that case so we are safe. */ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -1830,10 +1829,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, handle_t *handle; ssize_t ret; int orphan = 0; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); int retries = 0; - trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); + trace_ext3_direct_IO_enter(inode, offset, count, rw); if (rw == WRITE) { loff_t final_size = offset + count; @@ -1857,15 +1856,14 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, } retry: - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - ext3_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, ext3_get_block); /* * In case of error extending write may have instantiated a few * blocks outside i_size. Trim these off again. */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) ext3_truncate_failed_direct_write(inode); @@ -1910,8 +1908,7 @@ retry: ret = err; } out: - trace_ext3_direct_IO_exit(inode, offset, - iov_length(iov, nr_segs), rw, ret); + trace_ext3_direct_IO_exit(inode, offset, count, rw, ret); return ret; } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 1479e2ae00d..7cc5a0e2368 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2140,8 +2140,7 @@ extern void ext4_da_update_reserve_space(struct inode *inode, extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, struct ext4_map_blocks *map, int flags); extern ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs); + struct iov_iter *iter, loff_t offset); extern int ext4_ind_calc_metadata_amount(struct inode *inode, sector_t lblock); extern int ext4_ind_trans_blocks(struct inode *inode, int nrblocks); extern void ext4_ind_truncate(handle_t *, struct inode *inode); diff --git a/fs/ext4/file.c b/fs/ext4/file.c index 4e8bc284ec0..8695f70af1e 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -74,26 +74,22 @@ static void ext4_unwritten_wait(struct inode *inode) * or one thread will zero the other's data, causing corruption. */ static int -ext4_unaligned_aio(struct inode *inode, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos) { struct super_block *sb = inode->i_sb; int blockmask = sb->s_blocksize - 1; - size_t count = iov_length(iov, nr_segs); - loff_t final_size = pos + count; if (pos >= i_size_read(inode)) return 0; - if ((pos & blockmask) || (final_size & blockmask)) + if ((pos | iov_iter_alignment(from)) & blockmask) return 1; return 0; } static ssize_t -ext4_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(iocb->ki_filp); @@ -101,10 +97,9 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, struct blk_plug plug; int o_direct = file->f_flags & O_DIRECT; int overwrite = 0; - size_t length = iov_length(iov, nr_segs); + size_t length = iov_iter_count(from); ssize_t ret; - - BUG_ON(iocb->ki_pos != pos); + loff_t pos = iocb->ki_pos; /* * Unaligned direct AIO must be serialized; see comment above @@ -114,7 +109,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && !is_sync_kiocb(iocb) && (file->f_flags & O_APPEND || - ext4_unaligned_aio(inode, iov, nr_segs, pos))) { + ext4_unaligned_aio(inode, from, pos))) { aio_mutex = ext4_aio_mutex(inode); mutex_lock(aio_mutex); ext4_unwritten_wait(inode); @@ -138,10 +133,8 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, goto errout; } - if (pos + length > sbi->s_bitmap_maxbytes) { - nr_segs = iov_shorten((struct iovec *)iov, nr_segs, - sbi->s_bitmap_maxbytes - pos); - } + if (pos + length > sbi->s_bitmap_maxbytes) + iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); } if (o_direct) { @@ -179,7 +172,7 @@ ext4_file_write(struct kiocb *iocb, const struct iovec *iov, } } - ret = __generic_file_aio_write(iocb, iov, nr_segs); + ret = __generic_file_write_iter(iocb, from); mutex_unlock(&inode->i_mutex); if (ret > 0) { @@ -594,10 +587,10 @@ loff_t ext4_llseek(struct file *file, loff_t offset, int whence) const struct file_operations ext4_file_operations = { .llseek = ext4_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = ext4_file_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = ext4_file_write_iter, .unlocked_ioctl = ext4_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ext4_compat_ioctl, @@ -607,7 +600,7 @@ const struct file_operations ext4_file_operations = { .release = ext4_release_file, .fsync = ext4_sync_file, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .fallocate = ext4_fallocate, }; diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c index 594009f5f52..8a57e9fcd1b 100644 --- a/fs/ext4/indirect.c +++ b/fs/ext4/indirect.c @@ -639,8 +639,7 @@ out: * VFS code falls back into buffered path in that case so we are safe. */ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -648,7 +647,7 @@ ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb, handle_t *handle; ssize_t ret; int orphan = 0; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); int retries = 0; if (rw == WRITE) { @@ -687,18 +686,17 @@ retry: goto locked; } ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, + inode->i_sb->s_bdev, iter, offset, ext4_get_block, NULL, NULL, 0); inode_dio_done(inode); } else { locked: - ret = blockdev_direct_IO(rw, iocb, inode, iov, - offset, nr_segs, ext4_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, + offset, ext4_get_block); if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) ext4_truncate_failed_write(inode); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 7fcd68ee915..8a064734e6e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3093,13 +3093,12 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset, * */ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; ssize_t ret; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); int overwrite = 0; get_block_t *get_block_func = NULL; int dio_flags = 0; @@ -3108,7 +3107,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, /* Use the old path for reads and writes beyond i_size. */ if (rw != WRITE || final_size > inode->i_size) - return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); + return ext4_ind_direct_IO(rw, iocb, iter, offset); BUG_ON(iocb->private == NULL); @@ -3175,8 +3174,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, dio_flags = DIO_LOCKING; } ret = __blockdev_direct_IO(rw, iocb, inode, - inode->i_sb->s_bdev, iov, - offset, nr_segs, + inode->i_sb->s_bdev, iter, + offset, get_block_func, ext4_end_io_dio, NULL, @@ -3230,11 +3229,11 @@ retake_lock: } static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; /* @@ -3247,13 +3246,12 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, if (ext4_has_inline_data(inode)) return 0; - trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); + trace_ext4_direct_IO_enter(inode, offset, count, rw); if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) - ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); + ret = ext4_ext_direct_IO(rw, iocb, iter, offset); else - ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); - trace_ext4_direct_IO_exit(inode, offset, - iov_length(iov, nr_segs), rw, ret); + ret = ext4_ind_direct_IO(rw, iocb, iter, offset); + trace_ext4_direct_IO_exit(inode, offset, count, rw, ret); return ret; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index c1fb6dd1091..0924521306b 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -1017,10 +1017,9 @@ static int f2fs_write_end(struct file *file, } static int check_direct_IO(struct inode *inode, int rw, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { unsigned blocksize_mask = inode->i_sb->s_blocksize - 1; - int i; if (rw == READ) return 0; @@ -1028,14 +1027,14 @@ static int check_direct_IO(struct inode *inode, int rw, if (offset & blocksize_mask) return -EINVAL; - for (i = 0; i < nr_segs; i++) - if (iov[i].iov_len & blocksize_mask) - return -EINVAL; + if (iov_iter_alignment(iter) & blocksize_mask) + return -EINVAL; + return 0; } static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -1044,14 +1043,14 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb, if (f2fs_has_inline_data(inode)) return 0; - if (check_direct_IO(inode, rw, iov, offset, nr_segs)) + if (check_direct_IO(inode, rw, iter, offset)) return 0; /* clear fsync mark to recover these blocks */ fsync_mark_clear(F2FS_SB(inode->i_sb), inode->i_ino); - return blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - get_data_block); + return blockdev_direct_IO(rw, iocb, inode, iter, offset, + get_data_block); } static void f2fs_invalidate_data_page(struct page *page, unsigned int offset, diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 9c49c593d8e..c58e3307571 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -808,10 +808,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) const struct file_operations f2fs_file_operations = { .llseek = f2fs_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .open = generic_file_open, .mmap = f2fs_file_mmap, .fsync = f2fs_sync_file, @@ -821,5 +821,5 @@ const struct file_operations f2fs_file_operations = { .compat_ioctl = f2fs_compat_ioctl, #endif .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, }; diff --git a/fs/fat/file.c b/fs/fat/file.c index 9b104f54305..85f79a89e74 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -170,10 +170,10 @@ int fat_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) const struct file_operations fat_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .release = fat_file_release, .unlocked_ioctl = fat_generic_ioctl, diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 9c83594d7fb..756aead10d9 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -247,12 +247,13 @@ static int fat_write_end(struct file *file, struct address_space *mapping, } static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; if (rw == WRITE) { @@ -265,7 +266,7 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * * Return 0, and fallback to normal buffered write. */ - loff_t size = offset + iov_length(iov, nr_segs); + loff_t size = offset + count; if (MSDOS_I(inode)->mmu_private < size) return 0; } @@ -274,10 +275,9 @@ static ssize_t fat_direct_IO(int rw, struct kiocb *iocb, * FAT need to use the DIO_LOCKING for avoiding the race * condition of fat_get_block() and ->truncate(). */ - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - fat_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, fat_get_block); if (ret < 0 && (rw & WRITE)) - fat_write_failed(mapping, offset + iov_length(iov, nr_segs)); + fat_write_failed(mapping, offset + count); return ret; } diff --git a/fs/file.c b/fs/file.c index 8f294cfac69..66923fe3176 100644 --- a/fs/file.c +++ b/fs/file.c @@ -44,15 +44,10 @@ static void *alloc_fdmem(size_t size) return vmalloc(size); } -static void free_fdmem(void *ptr) -{ - is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr); -} - static void __free_fdtable(struct fdtable *fdt) { - free_fdmem(fdt->fd); - free_fdmem(fdt->open_fds); + kvfree(fdt->fd); + kvfree(fdt->open_fds); kfree(fdt); } @@ -130,7 +125,7 @@ static struct fdtable * alloc_fdtable(unsigned int nr) return fdt; out_arr: - free_fdmem(fdt->fd); + kvfree(fdt->fd); out_fdt: kfree(fdt); out: diff --git a/fs/file_table.c b/fs/file_table.c index 40bf4660f0a..385bfd31512 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -175,6 +175,12 @@ struct file *alloc_file(struct path *path, fmode_t mode, file->f_path = *path; file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; + if ((mode & FMODE_READ) && + likely(fop->read || fop->aio_read || fop->read_iter)) + mode |= FMODE_CAN_READ; + if ((mode & FMODE_WRITE) && + likely(fop->write || fop->aio_write || fop->write_iter)) + mode |= FMODE_CAN_WRITE; file->f_mode = mode; file->f_op = fop; if ((mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index 13b691a8a7d..966ace8b243 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -94,8 +94,10 @@ static ssize_t cuse_read(struct file *file, char __user *buf, size_t count, loff_t pos = 0; struct iovec iov = { .iov_base = buf, .iov_len = count }; struct fuse_io_priv io = { .async = 0, .file = file }; + struct iov_iter ii; + iov_iter_init(&ii, READ, &iov, 1, count); - return fuse_direct_io(&io, &iov, 1, count, &pos, FUSE_DIO_CUSE); + return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_CUSE); } static ssize_t cuse_write(struct file *file, const char __user *buf, @@ -104,12 +106,14 @@ static ssize_t cuse_write(struct file *file, const char __user *buf, loff_t pos = 0; struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; struct fuse_io_priv io = { .async = 0, .file = file }; + struct iov_iter ii; + iov_iter_init(&ii, WRITE, &iov, 1, count); /* * No locking or generic_write_checks(), the server is * responsible for locking and sanity checks. */ - return fuse_direct_io(&io, &iov, 1, count, &pos, + return fuse_direct_io(&io, &ii, &pos, FUSE_DIO_WRITE | FUSE_DIO_CUSE); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 903cbc9cd6b..6e16dad13e9 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -933,8 +933,7 @@ out: return err; } -static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = iocb->ki_filp->f_mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); @@ -945,14 +944,14 @@ static ssize_t fuse_file_aio_read(struct kiocb *iocb, const struct iovec *iov, * i_size is up to date). */ if (fc->auto_inval_data || - (pos + iov_length(iov, nr_segs) > i_size_read(inode))) { + (iocb->ki_pos + iov_iter_count(to) > i_size_read(inode))) { int err; err = fuse_update_attributes(inode, NULL, iocb->ki_filp, NULL); if (err) return err; } - return generic_file_aio_read(iocb, iov, nr_segs, pos); + return generic_file_read_iter(iocb, to); } static void fuse_write_fill(struct fuse_req *req, struct fuse_file *ff, @@ -1181,19 +1180,17 @@ static ssize_t fuse_perform_write(struct file *file, return res > 0 ? res : err; } -static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; - size_t count = 0; - size_t ocount = 0; + size_t count = iov_iter_count(from); ssize_t written = 0; ssize_t written_buffered = 0; struct inode *inode = mapping->host; ssize_t err; - struct iov_iter i; loff_t endbyte = 0; + loff_t pos = iocb->ki_pos; if (get_fuse_conn(inode)->writeback_cache) { /* Update size (EOF optimization) and mode (SUID clearing) */ @@ -1201,17 +1198,9 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (err) return err; - return generic_file_aio_write(iocb, iov, nr_segs, pos); + return generic_file_write_iter(iocb, from); } - WARN_ON(iocb->ki_pos != pos); - - ocount = 0; - err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); - if (err) - return err; - - count = ocount; mutex_lock(&inode->i_mutex); /* We can write back this queue in page reclaim */ @@ -1224,6 +1213,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (count == 0) goto out; + iov_iter_truncate(from, count); err = file_remove_suid(file); if (err) goto out; @@ -1233,16 +1223,13 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, goto out; if (file->f_flags & O_DIRECT) { - written = generic_file_direct_write(iocb, iov, &nr_segs, pos, - count, ocount); - if (written < 0 || written == count) + written = generic_file_direct_write(iocb, from, pos); + if (written < 0 || !iov_iter_count(from)) goto out; pos += written; - count -= written; - iov_iter_init(&i, iov, nr_segs, count, written); - written_buffered = fuse_perform_write(file, mapping, &i, pos); + written_buffered = fuse_perform_write(file, mapping, from, pos); if (written_buffered < 0) { err = written_buffered; goto out; @@ -1261,8 +1248,7 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, written += written_buffered; iocb->ki_pos = pos + written_buffered; } else { - iov_iter_init(&i, iov, nr_segs, count, 0); - written = fuse_perform_write(file, mapping, &i, pos); + written = fuse_perform_write(file, mapping, from, pos); if (written >= 0) iocb->ki_pos = pos + written; } @@ -1300,7 +1286,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t nbytes = 0; /* # bytes already packed in req */ /* Special case for kernel I/O: can copy directly into the buffer */ - if (segment_eq(get_fs(), KERNEL_DS)) { + if (ii->type & ITER_KVEC) { unsigned long user_addr = fuse_get_user_addr(ii); size_t frag_size = fuse_get_frag_size(ii, *nbytesp); @@ -1316,35 +1302,26 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, while (nbytes < *nbytesp && req->num_pages < req->max_pages) { unsigned npages; - unsigned long user_addr = fuse_get_user_addr(ii); - unsigned offset = user_addr & ~PAGE_MASK; - size_t frag_size = fuse_get_frag_size(ii, *nbytesp - nbytes); - int ret; - + size_t start; unsigned n = req->max_pages - req->num_pages; - frag_size = min_t(size_t, frag_size, n << PAGE_SHIFT); - - npages = (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - npages = clamp(npages, 1U, n); - - ret = get_user_pages_fast(user_addr, npages, !write, - &req->pages[req->num_pages]); + ssize_t ret = iov_iter_get_pages(ii, + &req->pages[req->num_pages], + n * PAGE_SIZE, &start); if (ret < 0) return ret; - npages = ret; - frag_size = min_t(size_t, frag_size, - (npages << PAGE_SHIFT) - offset); - iov_iter_advance(ii, frag_size); + iov_iter_advance(ii, ret); + nbytes += ret; + + ret += start; + npages = (ret + PAGE_SIZE - 1) / PAGE_SIZE; - req->page_descs[req->num_pages].offset = offset; + req->page_descs[req->num_pages].offset = start; fuse_page_descs_length_init(req, req->num_pages, npages); req->num_pages += npages; req->page_descs[req->num_pages - 1].length -= - (npages << PAGE_SHIFT) - offset - frag_size; - - nbytes += frag_size; + (PAGE_SIZE - ret) & (PAGE_SIZE - 1); } if (write) @@ -1359,24 +1336,11 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, static inline int fuse_iter_npages(const struct iov_iter *ii_p) { - struct iov_iter ii = *ii_p; - int npages = 0; - - while (iov_iter_count(&ii) && npages < FUSE_MAX_PAGES_PER_REQ) { - unsigned long user_addr = fuse_get_user_addr(&ii); - unsigned offset = user_addr & ~PAGE_MASK; - size_t frag_size = iov_iter_single_seg_count(&ii); - - npages += (frag_size + offset + PAGE_SIZE - 1) >> PAGE_SHIFT; - iov_iter_advance(&ii, frag_size); - } - - return min(npages, FUSE_MAX_PAGES_PER_REQ); + return iov_iter_npages(ii_p, FUSE_MAX_PAGES_PER_REQ); } -ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, - unsigned long nr_segs, size_t count, loff_t *ppos, - int flags) +ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, + loff_t *ppos, int flags) { int write = flags & FUSE_DIO_WRITE; int cuse = flags & FUSE_DIO_CUSE; @@ -1386,18 +1350,16 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, struct fuse_conn *fc = ff->fc; size_t nmax = write ? fc->max_write : fc->max_read; loff_t pos = *ppos; + size_t count = iov_iter_count(iter); pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT; pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT; ssize_t res = 0; struct fuse_req *req; - struct iov_iter ii; - - iov_iter_init(&ii, iov, nr_segs, count, 0); if (io->async) - req = fuse_get_req_for_background(fc, fuse_iter_npages(&ii)); + req = fuse_get_req_for_background(fc, fuse_iter_npages(iter)); else - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + req = fuse_get_req(fc, fuse_iter_npages(iter)); if (IS_ERR(req)) return PTR_ERR(req); @@ -1413,7 +1375,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, size_t nres; fl_owner_t owner = current->files; size_t nbytes = min(count, nmax); - int err = fuse_get_user_pages(req, &ii, &nbytes, write); + int err = fuse_get_user_pages(req, iter, &nbytes, write); if (err) { res = err; break; @@ -1443,9 +1405,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, fuse_put_request(fc, req); if (io->async) req = fuse_get_req_for_background(fc, - fuse_iter_npages(&ii)); + fuse_iter_npages(iter)); else - req = fuse_get_req(fc, fuse_iter_npages(&ii)); + req = fuse_get_req(fc, fuse_iter_npages(iter)); if (IS_ERR(req)) break; } @@ -1460,9 +1422,8 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, EXPORT_SYMBOL_GPL(fuse_direct_io); static ssize_t __fuse_direct_read(struct fuse_io_priv *io, - const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos, - size_t count) + struct iov_iter *iter, + loff_t *ppos) { ssize_t res; struct file *file = io->file; @@ -1471,7 +1432,7 @@ static ssize_t __fuse_direct_read(struct fuse_io_priv *io, if (is_bad_inode(inode)) return -EIO; - res = fuse_direct_io(io, iov, nr_segs, count, ppos, 0); + res = fuse_direct_io(io, iter, ppos, 0); fuse_invalidate_attr(inode); @@ -1483,22 +1444,26 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, { struct fuse_io_priv io = { .async = 0, .file = file }; struct iovec iov = { .iov_base = buf, .iov_len = count }; - return __fuse_direct_read(&io, &iov, 1, ppos, count); + struct iov_iter ii; + iov_iter_init(&ii, READ, &iov, 1, count); + return __fuse_direct_read(&io, &ii, ppos); } static ssize_t __fuse_direct_write(struct fuse_io_priv *io, - const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) + struct iov_iter *iter, + loff_t *ppos) { struct file *file = io->file; struct inode *inode = file_inode(file); - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); ssize_t res; + res = generic_write_checks(file, ppos, &count, 0); - if (!res) - res = fuse_direct_io(io, iov, nr_segs, count, ppos, - FUSE_DIO_WRITE); + if (!res) { + iov_iter_truncate(iter, count); + res = fuse_direct_io(io, iter, ppos, FUSE_DIO_WRITE); + } fuse_invalidate_attr(inode); @@ -1512,13 +1477,15 @@ static ssize_t fuse_direct_write(struct file *file, const char __user *buf, struct inode *inode = file_inode(file); ssize_t res; struct fuse_io_priv io = { .async = 0, .file = file }; + struct iov_iter ii; + iov_iter_init(&ii, WRITE, &iov, 1, count); if (is_bad_inode(inode)) return -EIO; /* Don't allow parallel writes to the same file */ mutex_lock(&inode->i_mutex); - res = __fuse_direct_write(&io, &iov, 1, ppos); + res = __fuse_direct_write(&io, &ii, ppos); if (res > 0) fuse_write_update_size(inode, *ppos); mutex_unlock(&inode->i_mutex); @@ -2372,7 +2339,7 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov, if (!bytes) return 0; - iov_iter_init(&ii, iov, nr_segs, bytes, 0); + iov_iter_init(&ii, to_user ? READ : WRITE, iov, nr_segs, bytes); while (iov_iter_count(&ii)) { struct page *page = pages[page_idx++]; @@ -2894,8 +2861,8 @@ static inline loff_t fuse_round_up(loff_t off) } static ssize_t -fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +fuse_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { ssize_t ret = 0; struct file *file = iocb->ki_filp; @@ -2904,7 +2871,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos = 0; struct inode *inode; loff_t i_size; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); struct fuse_io_priv *io; pos = offset; @@ -2919,6 +2886,7 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, if (offset >= i_size) return 0; count = min_t(loff_t, count, fuse_round_up(i_size - offset)); + iov_iter_truncate(iter, count); } io = kmalloc(sizeof(struct fuse_io_priv), GFP_KERNEL); @@ -2948,9 +2916,9 @@ fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, io->async = false; if (rw == WRITE) - ret = __fuse_direct_write(io, iov, nr_segs, &pos); + ret = __fuse_direct_write(io, iter, &pos); else - ret = __fuse_direct_read(io, iov, nr_segs, &pos, count); + ret = __fuse_direct_read(io, iter, &pos); if (io->async) { fuse_aio_complete(io, ret < 0 ? ret : 0, -1); @@ -3061,10 +3029,10 @@ out: static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, - .read = do_sync_read, - .aio_read = fuse_file_aio_read, - .write = do_sync_write, - .aio_write = fuse_file_aio_write, + .read = new_sync_read, + .read_iter = fuse_file_read_iter, + .write = new_sync_write, + .write_iter = fuse_file_write_iter, .mmap = fuse_file_mmap, .open = fuse_open, .flush = fuse_flush, diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index 7aa5c75e0de..e8e47a6ab51 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -880,9 +880,8 @@ int fuse_do_open(struct fuse_conn *fc, u64 nodeid, struct file *file, /** CUSE pass fuse_direct_io() a file which f_mapping->host is not from FUSE */ #define FUSE_DIO_CUSE (1 << 1) -ssize_t fuse_direct_io(struct fuse_io_priv *io, const struct iovec *iov, - unsigned long nr_segs, size_t count, loff_t *ppos, - int flags); +ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter, + loff_t *ppos, int flags); long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg, unsigned int flags); long fuse_ioctl_common(struct file *file, unsigned int cmd, diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 492123cda64..805b37fed63 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -1040,8 +1040,7 @@ static int gfs2_ok_for_dio(struct gfs2_inode *ip, int rw, loff_t offset) static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; @@ -1081,7 +1080,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, */ if (mapping->nrpages) { loff_t lstart = offset & (PAGE_CACHE_SIZE - 1); - loff_t len = iov_length(iov, nr_segs); + loff_t len = iov_iter_count(iter); loff_t end = PAGE_ALIGN(offset + len) - 1; rv = 0; @@ -1096,9 +1095,9 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, truncate_inode_pages_range(mapping, lstart, end); } - rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, gfs2_get_block_direct, - NULL, NULL, 0); + rv = __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, + iter, offset, + gfs2_get_block_direct, NULL, NULL, 0); out: gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6ab0cfb2e89..4fc3a304617 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -684,7 +684,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, } /** - * gfs2_file_aio_write - Perform a write to a file + * gfs2_file_write_iter - Perform a write to a file * @iocb: The io context * @iov: The data to write * @nr_segs: Number of @iov segments @@ -697,11 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, * */ -static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t gfs2_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; - size_t writesize = iov_length(iov, nr_segs); struct gfs2_inode *ip = GFS2_I(file_inode(file)); int ret; @@ -709,7 +707,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (ret) return ret; - gfs2_size_hint(file, pos, writesize); + gfs2_size_hint(file, iocb->ki_pos, iov_iter_count(from)); if (file->f_flags & O_APPEND) { struct gfs2_holder gh; @@ -720,7 +718,7 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov, gfs2_glock_dq_uninit(&gh); } - return generic_file_aio_write(iocb, iov, nr_segs, pos); + return generic_file_write_iter(iocb, from); } static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len, @@ -1058,10 +1056,10 @@ static int gfs2_flock(struct file *file, int cmd, struct file_lock *fl) const struct file_operations gfs2_file_fops = { .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = gfs2_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, @@ -1070,7 +1068,7 @@ const struct file_operations gfs2_file_fops = { .lock = gfs2_lock, .flock = gfs2_flock, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = gfs2_setlease, .fallocate = gfs2_fallocate, }; @@ -1090,17 +1088,17 @@ const struct file_operations gfs2_dir_fops = { const struct file_operations gfs2_file_fops_nolock = { .llseek = gfs2_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = gfs2_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = gfs2_file_write_iter, .unlocked_ioctl = gfs2_ioctl, .mmap = gfs2_mmap, .open = gfs2_open, .release = gfs2_release, .fsync = gfs2_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .setlease = generic_setlease, .fallocate = gfs2_fallocate, }; diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index 9e2fecd62f6..d0929bc8178 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -125,15 +125,15 @@ static int hfs_releasepage(struct page *page, gfp_t mask) } static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file_inode(file)->i_mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - hfs_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfs_get_block); /* * In case of error extending write may have instantiated a few @@ -141,7 +141,7 @@ static ssize_t hfs_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) hfs_write_failed(mapping, end); @@ -674,10 +674,10 @@ static int hfs_file_fsync(struct file *filp, loff_t start, loff_t end, static const struct file_operations hfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, .fsync = hfs_file_fsync, diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index a4f45bd88a6..0cf786f2d04 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -123,14 +123,15 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask) } static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file_inode(file)->i_mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, hfsplus_get_block); /* @@ -139,7 +140,7 @@ static ssize_t hfsplus_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) hfsplus_write_failed(mapping, end); @@ -340,10 +341,10 @@ static const struct inode_operations hfsplus_file_inode_operations = { static const struct file_operations hfsplus_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, .fsync = hfsplus_file_fsync, diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 9c470fde987..bb529f3b7f2 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -378,11 +378,11 @@ static int hostfs_fsync(struct file *file, loff_t start, loff_t end, static const struct file_operations hostfs_file_fops = { .llseek = generic_file_llseek, - .read = do_sync_read, + .read = new_sync_read, .splice_read = generic_file_splice_read, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, - .write = do_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, + .write = new_sync_write, .mmap = generic_file_mmap, .open = hostfs_file_open, .release = hostfs_file_release, diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index 67c1a61e095..7f54e5f76ce 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -197,10 +197,10 @@ const struct address_space_operations hpfs_aops = { const struct file_operations hpfs_file_ops = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .release = hpfs_file_release, .fsync = hpfs_file_fsync, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 256cd19a3b7..64989ca9ba9 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -51,10 +51,10 @@ const struct file_operations jffs2_file_operations = { .llseek = generic_file_llseek, .open = generic_file_open, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .unlocked_ioctl=jffs2_ioctl, .mmap = generic_file_readonly_mmap, .fsync = jffs2_fsync, diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 794da944d5c..33aa0cc1f8b 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -151,13 +151,13 @@ const struct inode_operations jfs_file_inode_operations = { const struct file_operations jfs_file_operations = { .open = jfs_open, .llseek = generic_file_llseek, - .write = do_sync_write, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .write = new_sync_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .fsync = jfs_fsync, .release = jfs_release, .unlocked_ioctl = jfs_ioctl, diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 6f8fe72c2a7..bd3df1ca3c9 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -331,15 +331,15 @@ static sector_t jfs_bmap(struct address_space *mapping, sector_t block) } static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - jfs_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, jfs_get_block); /* * In case of error extending write may have instantiated a few @@ -347,7 +347,7 @@ static ssize_t jfs_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) jfs_write_failed(mapping, end); diff --git a/fs/logfs/file.c b/fs/logfs/file.c index 57914fc32b6..8538752df2f 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -264,15 +264,15 @@ const struct inode_operations logfs_reg_iops = { }; const struct file_operations logfs_reg_fops = { - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .fsync = logfs_fsync, .unlocked_ioctl = logfs_ioctl, .llseek = generic_file_llseek, .mmap = generic_file_readonly_mmap, .open = generic_file_open, - .read = do_sync_read, - .write = do_sync_write, + .read = new_sync_read, + .write = new_sync_write, }; const struct address_space_operations logfs_reg_aops = { diff --git a/fs/minix/file.c b/fs/minix/file.c index adc6f549423..a967de085ac 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -14,10 +14,10 @@ */ const struct file_operations minix_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4ad7bc38867..8f98138cbc4 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -212,20 +212,20 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, * shunt off direct read and write requests before the VFS gets them, * so this method is only ever called for swap. */ -ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos) { #ifndef CONFIG_NFS_SWAP dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", - iocb->ki_filp, (long long) pos, nr_segs); + iocb->ki_filp, (long long) pos, iter->nr_segs); return -EINVAL; #else VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE); if (rw == READ || rw == KERNEL_READ) - return nfs_file_direct_read(iocb, iov, nr_segs, pos, + return nfs_file_direct_read(iocb, iter, pos, rw == READ ? true : false); - return nfs_file_direct_write(iocb, iov, nr_segs, pos, + return nfs_file_direct_write(iocb, iter, pos, rw == WRITE ? true : false); #endif /* CONFIG_NFS_SWAP */ } @@ -414,60 +414,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = { * handled automatically by nfs_direct_read_result(). Otherwise, if * no requests have been sent, just return an error. */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, - const struct iovec *iov, - loff_t pos, bool uio) -{ - struct nfs_direct_req *dreq = desc->pg_dreq; - struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; - unsigned long user_addr = (unsigned long)iov->iov_base; - size_t count = iov->iov_len; - size_t rsize = NFS_SERVER(inode)->rsize; - unsigned int pgbase; - int result; - ssize_t started = 0; - struct page **pagevec = NULL; - unsigned int npages; - - do { - size_t bytes; - int i; - pgbase = user_addr & ~PAGE_MASK; - bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); +static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, + struct iov_iter *iter, + loff_t pos) +{ + struct nfs_pageio_descriptor desc; + struct inode *inode = dreq->inode; + ssize_t result = -EINVAL; + size_t requested_bytes = 0; + size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); - result = -ENOMEM; - npages = nfs_page_array_len(pgbase, bytes); - if (!pagevec) - pagevec = kmalloc(npages * sizeof(struct page *), - GFP_KERNEL); - if (!pagevec) - break; - if (uio) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 1, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; - } else { - WARN_ON(npages != 1); - result = get_kernel_page(user_addr, 1, pagevec); - if (WARN_ON(result != 1)) - break; - } + nfs_pageio_init_read(&desc, dreq->inode, false, + &nfs_direct_read_completion_ops); + get_dreq(dreq); + desc.pg_dreq = dreq; + atomic_inc(&inode->i_dio_count); - if ((unsigned)result < npages) { - bytes = result * PAGE_SIZE; - if (bytes <= pgbase) { - nfs_direct_release_pages(pagevec, result); - break; - } - bytes -= pgbase; - npages = result; - } + while (iov_iter_count(iter)) { + struct page **pagevec; + size_t bytes; + size_t pgbase; + unsigned npages, i; + result = iov_iter_get_pages_alloc(iter, &pagevec, + rsize, &pgbase); + if (result < 0) + break; + + bytes = result; + iov_iter_advance(iter, bytes); + npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; for (i = 0; i < npages; i++) { struct nfs_page *req; unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); @@ -480,56 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de } req->wb_index = pos >> PAGE_SHIFT; req->wb_offset = pos & ~PAGE_MASK; - if (!nfs_pageio_add_request(desc, req)) { - result = desc->pg_error; + if (!nfs_pageio_add_request(&desc, req)) { + result = desc.pg_error; nfs_release_request(req); break; } pgbase = 0; bytes -= req_len; - started += req_len; - user_addr += req_len; + requested_bytes += req_len; pos += req_len; - count -= req_len; dreq->bytes_left -= req_len; } - /* The nfs_page now hold references to these pages */ nfs_direct_release_pages(pagevec, npages); - } while (count != 0 && result >= 0); - - kfree(pagevec); - - if (started) - return started; - return result < 0 ? (ssize_t) result : -EFAULT; -} - -static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos, bool uio) -{ - struct nfs_pageio_descriptor desc; - struct inode *inode = dreq->inode; - ssize_t result = -EINVAL; - size_t requested_bytes = 0; - unsigned long seg; - - nfs_pageio_init_read(&desc, dreq->inode, false, - &nfs_direct_read_completion_ops); - get_dreq(dreq); - desc.pg_dreq = dreq; - atomic_inc(&inode->i_dio_count); - - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *vec = &iov[seg]; - result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); + kvfree(pagevec); if (result < 0) break; - requested_bytes += result; - if ((size_t)result < vec->iov_len) - break; - pos += vec->iov_len; } nfs_pageio_complete(&desc); @@ -552,8 +494,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block - * @iov: vector of user buffers into which to read data - * @nr_segs: size of iov vector + * @iter: vector of user buffers into which to read data * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling @@ -570,8 +511,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, + loff_t pos, bool uio) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -579,9 +520,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; ssize_t result = -EINVAL; - size_t count; - - count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", @@ -604,7 +543,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, goto out_unlock; dreq->inode = inode; - dreq->bytes_left = iov_length(iov, nr_segs); + dreq->bytes_left = count; dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); l_ctx = nfs_get_lock_context(dreq->ctx); if (IS_ERR(l_ctx)) { @@ -615,8 +554,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - NFS_I(inode)->read_io += iov_length(iov, nr_segs); - result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); + NFS_I(inode)->read_io += count; + result = nfs_direct_read_schedule_iovec(dreq, iter, pos); mutex_unlock(&inode->i_mutex); @@ -772,108 +711,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode } #endif -/* - * NB: Return the value of the first error return code. Subsequent - * errors after the first one are ignored. - */ -/* - * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE - * operation. If nfs_writedata_alloc() or get_user_pages() fails, - * bail and stop sending more writes. Write length accounting is - * handled automatically by nfs_direct_write_result(). Otherwise, if - * no requests have been sent, just return an error. - */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, - const struct iovec *iov, - loff_t pos, bool uio) -{ - struct nfs_direct_req *dreq = desc->pg_dreq; - struct nfs_open_context *ctx = dreq->ctx; - struct inode *inode = ctx->dentry->d_inode; - unsigned long user_addr = (unsigned long)iov->iov_base; - size_t count = iov->iov_len; - size_t wsize = NFS_SERVER(inode)->wsize; - unsigned int pgbase; - int result; - ssize_t started = 0; - struct page **pagevec = NULL; - unsigned int npages; - - do { - size_t bytes; - int i; - - pgbase = user_addr & ~PAGE_MASK; - bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); - - result = -ENOMEM; - npages = nfs_page_array_len(pgbase, bytes); - if (!pagevec) - pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); - if (!pagevec) - break; - - if (uio) { - down_read(¤t->mm->mmap_sem); - result = get_user_pages(current, current->mm, user_addr, - npages, 0, 0, pagevec, NULL); - up_read(¤t->mm->mmap_sem); - if (result < 0) - break; - } else { - WARN_ON(npages != 1); - result = get_kernel_page(user_addr, 0, pagevec); - if (WARN_ON(result != 1)) - break; - } - - if ((unsigned)result < npages) { - bytes = result * PAGE_SIZE; - if (bytes <= pgbase) { - nfs_direct_release_pages(pagevec, result); - break; - } - bytes -= pgbase; - npages = result; - } - - for (i = 0; i < npages; i++) { - struct nfs_page *req; - unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - - req = nfs_create_request(dreq->ctx, pagevec[i], NULL, - pgbase, req_len); - if (IS_ERR(req)) { - result = PTR_ERR(req); - break; - } - nfs_lock_request(req); - req->wb_index = pos >> PAGE_SHIFT; - req->wb_offset = pos & ~PAGE_MASK; - if (!nfs_pageio_add_request(desc, req)) { - result = desc->pg_error; - nfs_unlock_and_release_request(req); - break; - } - pgbase = 0; - bytes -= req_len; - started += req_len; - user_addr += req_len; - pos += req_len; - count -= req_len; - dreq->bytes_left -= req_len; - } - /* The nfs_page now hold references to these pages */ - nfs_direct_release_pages(pagevec, npages); - } while (count != 0 && result >= 0); - - kfree(pagevec); - - if (started) - return started; - return result < 0 ? (ssize_t) result : -EFAULT; -} - static void nfs_direct_write_completion(struct nfs_pgio_header *hdr) { struct nfs_direct_req *dreq = hdr->dreq; @@ -956,16 +793,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { .completion = nfs_direct_write_completion, }; + +/* + * NB: Return the value of the first error return code. Subsequent + * errors after the first one are ignored. + */ +/* + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation. If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes. Write length accounting is + * handled automatically by nfs_direct_write_result(). Otherwise, if + * no requests have been sent, just return an error. + */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos, bool uio) + struct iov_iter *iter, + loff_t pos) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; ssize_t result = 0; size_t requested_bytes = 0; - unsigned long seg; + size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, &nfs_direct_write_completion_ops); @@ -973,16 +821,49 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, get_dreq(dreq); atomic_inc(&inode->i_dio_count); - NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); - for (seg = 0; seg < nr_segs; seg++) { - const struct iovec *vec = &iov[seg]; - result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); + NFS_I(inode)->write_io += iov_iter_count(iter); + while (iov_iter_count(iter)) { + struct page **pagevec; + size_t bytes; + size_t pgbase; + unsigned npages, i; + + result = iov_iter_get_pages_alloc(iter, &pagevec, + wsize, &pgbase); if (result < 0) break; - requested_bytes += result; - if ((size_t)result < vec->iov_len) + + bytes = result; + iov_iter_advance(iter, bytes); + npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; + for (i = 0; i < npages; i++) { + struct nfs_page *req; + unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); + + req = nfs_create_request(dreq->ctx, pagevec[i], NULL, + pgbase, req_len); + if (IS_ERR(req)) { + result = PTR_ERR(req); + break; + } + nfs_lock_request(req); + req->wb_index = pos >> PAGE_SHIFT; + req->wb_offset = pos & ~PAGE_MASK; + if (!nfs_pageio_add_request(&desc, req)) { + result = desc.pg_error; + nfs_unlock_and_release_request(req); + break; + } + pgbase = 0; + bytes -= req_len; + requested_bytes += req_len; + pos += req_len; + dreq->bytes_left -= req_len; + } + nfs_direct_release_pages(pagevec, npages); + kvfree(pagevec); + if (result < 0) break; - pos += vec->iov_len; } nfs_pageio_complete(&desc); @@ -1004,8 +885,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block - * @iov: vector of user buffers from which to write data - * @nr_segs: size of iov vector + * @iter: vector of user buffers from which to write data * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling @@ -1023,8 +903,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos, bool uio) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + loff_t pos, bool uio) { ssize_t result = -EINVAL; struct file *file = iocb->ki_filp; @@ -1033,9 +913,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, struct nfs_direct_req *dreq; struct nfs_lock_context *l_ctx; loff_t end; - size_t count; - - count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(iter); end = (pos + count - 1) >> PAGE_CACHE_SHIFT; nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); @@ -1086,7 +964,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, if (!is_sync_kiocb(iocb)) dreq->iocb = iocb; - result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); + result = nfs_direct_write_schedule_iovec(dreq, iter, pos); if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index c1edf733631..4042ff58fe3 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -165,22 +165,21 @@ nfs_file_flush(struct file *file, fl_owner_t id) EXPORT_SYMBOL_GPL(nfs_file_flush); ssize_t -nfs_file_read(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +nfs_file_read(struct kiocb *iocb, struct iov_iter *to) { struct inode *inode = file_inode(iocb->ki_filp); ssize_t result; if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, iov, nr_segs, pos, true); + return nfs_file_direct_read(iocb, to, iocb->ki_pos, true); - dprintk("NFS: read(%pD2, %lu@%lu)\n", + dprintk("NFS: read(%pD2, %zu@%lu)\n", iocb->ki_filp, - (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); + iov_iter_count(to), (unsigned long) iocb->ki_pos); result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); if (!result) { - result = generic_file_aio_read(iocb, iov, nr_segs, pos); + result = generic_file_read_iter(iocb, to); if (result > 0) nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); } @@ -635,24 +634,24 @@ static int nfs_need_sync_write(struct file *filp, struct inode *inode) return 0; } -ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file); unsigned long written = 0; ssize_t result; - size_t count = iov_length(iov, nr_segs); + size_t count = iov_iter_count(from); + loff_t pos = iocb->ki_pos; result = nfs_key_timeout_notify(file, inode); if (result) return result; if (file->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, iov, nr_segs, pos, true); + return nfs_file_direct_write(iocb, from, pos, true); - dprintk("NFS: write(%pD2, %lu@%Ld)\n", - file, (unsigned long) count, (long long) pos); + dprintk("NFS: write(%pD2, %zu@%Ld)\n", + file, count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -670,7 +669,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, if (!count) goto out; - result = generic_file_aio_write(iocb, iov, nr_segs, pos); + result = generic_file_write_iter(iocb, from); if (result > 0) written = result; @@ -691,36 +690,6 @@ out_swapfile: } EXPORT_SYMBOL_GPL(nfs_file_write); -ssize_t nfs_file_splice_write(struct pipe_inode_info *pipe, - struct file *filp, loff_t *ppos, - size_t count, unsigned int flags) -{ - struct inode *inode = file_inode(filp); - unsigned long written = 0; - ssize_t ret; - - dprintk("NFS splice_write(%pD2, %lu@%llu)\n", - filp, (unsigned long) count, (unsigned long long) *ppos); - - /* - * The combination of splice and an O_APPEND destination is disallowed. - */ - - ret = generic_file_splice_write(pipe, filp, ppos, count, flags); - if (ret > 0) - written = ret; - - if (ret >= 0 && nfs_need_sync_write(filp, inode)) { - int err = vfs_fsync(filp, 0); - if (err < 0) - ret = err; - } - if (ret > 0) - nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); - return ret; -} -EXPORT_SYMBOL_GPL(nfs_file_splice_write); - static int do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) { @@ -935,10 +904,10 @@ EXPORT_SYMBOL_GPL(nfs_setlease); const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = nfs_file_read, + .write_iter = nfs_file_write, .mmap = nfs_file_mmap, .open = nfs_file_open, .flush = nfs_file_flush, @@ -947,7 +916,7 @@ const struct file_operations nfs_file_operations = { .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, + .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8b69cba1bb0..82ddbf46660 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -327,16 +327,14 @@ int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *) int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int); loff_t nfs_file_llseek(struct file *, loff_t, int); int nfs_file_flush(struct file *, fl_owner_t); -ssize_t nfs_file_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +ssize_t nfs_file_read(struct kiocb *, struct iov_iter *); ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); int nfs_file_mmap(struct file *, struct vm_area_struct *); -ssize_t nfs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); +ssize_t nfs_file_write(struct kiocb *, struct iov_iter *); int nfs_file_release(struct inode *, struct file *); int nfs_lock(struct file *, int, struct file_lock *); int nfs_flock(struct file *, int, struct file_lock *); -ssize_t nfs_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, - size_t, unsigned int); int nfs_check_flags(int); int nfs_setlease(struct file *, long, struct file_lock **); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 464db9dd631..a816f0627a6 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -117,10 +117,10 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) const struct file_operations nfs4_file_operations = { .llseek = nfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = nfs_file_read, + .write_iter = nfs_file_write, .mmap = nfs_file_mmap, .open = nfs4_file_open, .flush = nfs_file_flush, @@ -129,7 +129,7 @@ const struct file_operations nfs4_file_operations = { .lock = nfs_lock, .flock = nfs_flock, .splice_read = nfs_file_splice_read, - .splice_write = nfs_file_splice_write, + .splice_write = iter_file_splice_write, .check_flags = nfs_check_flags, .setlease = nfs_setlease, }; diff --git a/fs/nilfs2/file.c b/fs/nilfs2/file.c index f3a82fbcae0..24978153c0c 100644 --- a/fs/nilfs2/file.c +++ b/fs/nilfs2/file.c @@ -152,10 +152,10 @@ static int nilfs_file_mmap(struct file *file, struct vm_area_struct *vma) */ const struct file_operations nilfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .unlocked_ioctl = nilfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = nilfs_compat_ioctl, diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index b9c5726120e..6252b173a46 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -298,19 +298,20 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping, } static ssize_t -nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, - loff_t offset, unsigned long nr_segs) +nilfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t size; if (rw == WRITE) return 0; /* Needs synchronization with the cleaner */ - size = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, + size = blockdev_direct_IO(rw, iocb, inode, iter, offset, nilfs_get_block); /* @@ -319,7 +320,7 @@ nilfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, */ if (unlikely((rw & WRITE) && size < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if (end > isize) nilfs_write_failed(mapping, end); diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 86ddab916b6..5c9e2c81cb1 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2090,10 +2090,7 @@ static ssize_t ntfs_file_aio_write_nolock(struct kiocb *iocb, size_t count; /* after file limit checks */ ssize_t written, err; - count = 0; - err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); - if (err) - return err; + count = iov_length(iov, nr_segs); pos = *ppos; /* We can write back this queue in page reclaim. */ current->backing_dev_info = mapping->backing_dev_info; @@ -2202,8 +2199,8 @@ static int ntfs_file_fsync(struct file *filp, loff_t start, loff_t end, const struct file_operations ntfs_file_ops = { .llseek = generic_file_llseek, /* Seek inside file. */ - .read = do_sync_read, /* Read from file. */ - .aio_read = generic_file_aio_read, /* Async read from file. */ + .read = new_sync_read, /* Read from file. */ + .read_iter = generic_file_read_iter, /* Async read from file. */ #ifdef NTFS_RW .write = do_sync_write, /* Write to file. */ .aio_write = ntfs_file_aio_write, /* Async write to file. */ diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index d310d12a9ad..4a231a166cf 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -599,9 +599,8 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait) static ssize_t ocfs2_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file_inode(file)->i_mapping->host; @@ -618,7 +617,7 @@ static ssize_t ocfs2_direct_IO(int rw, return 0; return __blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, - iov, offset, nr_segs, + iter, offset, ocfs2_direct_IO_get_blocks, ocfs2_dio_end_io, NULL, 0); } diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8eb6e5732d3..2930e231f3f 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2233,16 +2233,13 @@ out: return ret; } -static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) +static ssize_t ocfs2_file_write_iter(struct kiocb *iocb, + struct iov_iter *from) { int ret, direct_io, appending, rw_level, have_alloc_sem = 0; int can_do_direct, has_refcount = 0; ssize_t written = 0; - size_t ocount; /* original count */ - size_t count; /* after file limit checks */ + size_t count = iov_iter_count(from); loff_t old_size, *ppos = &iocb->ki_pos; u32 old_clusters; struct file *file = iocb->ki_filp; @@ -2256,7 +2253,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, (unsigned long long)OCFS2_I(inode)->ip_blkno, file->f_path.dentry->d_name.len, file->f_path.dentry->d_name.name, - (unsigned int)nr_segs); + (unsigned int)from->nr_segs); /* GRRRRR */ if (iocb->ki_nbytes == 0) return 0; @@ -2354,29 +2351,21 @@ relock: /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb, rw_level); - ret = generic_segment_checks(iov, &nr_segs, &ocount, - VERIFY_READ); - if (ret) - goto out_dio; - - count = ocount; ret = generic_write_checks(file, ppos, &count, S_ISBLK(inode->i_mode)); if (ret) goto out_dio; + iov_iter_truncate(from, count); if (direct_io) { - written = generic_file_direct_write(iocb, iov, &nr_segs, *ppos, - count, ocount); + written = generic_file_direct_write(iocb, from, *ppos); if (written < 0) { ret = written; goto out_dio; } } else { - struct iov_iter from; - iov_iter_init(&from, iov, nr_segs, count, 0); current->backing_dev_info = file->f_mapping->backing_dev_info; - written = generic_perform_write(file, &from, *ppos); + written = generic_perform_write(file, from, *ppos); if (likely(written >= 0)) iocb->ki_pos = *ppos + written; current->backing_dev_info = NULL; @@ -2441,84 +2430,6 @@ out_sems: return ret; } -static int ocfs2_splice_to_file(struct pipe_inode_info *pipe, - struct file *out, - struct splice_desc *sd) -{ - int ret; - - ret = ocfs2_prepare_inode_for_write(out, &sd->pos, - sd->total_len, 0, NULL, NULL); - if (ret < 0) { - mlog_errno(ret); - return ret; - } - - return splice_from_pipe_feed(pipe, sd, pipe_to_file); -} - -static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe, - struct file *out, - loff_t *ppos, - size_t len, - unsigned int flags) -{ - int ret; - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; - struct splice_desc sd = { - .total_len = len, - .flags = flags, - .pos = *ppos, - .u.file = out, - }; - - - trace_ocfs2_file_splice_write(inode, out, out->f_path.dentry, - (unsigned long long)OCFS2_I(inode)->ip_blkno, - out->f_path.dentry->d_name.len, - out->f_path.dentry->d_name.name, len); - - pipe_lock(pipe); - - splice_from_pipe_begin(&sd); - do { - ret = splice_from_pipe_next(pipe, &sd); - if (ret <= 0) - break; - - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); - ret = ocfs2_rw_lock(inode, 1); - if (ret < 0) - mlog_errno(ret); - else { - ret = ocfs2_splice_to_file(pipe, out, &sd); - ocfs2_rw_unlock(inode, 1); - } - mutex_unlock(&inode->i_mutex); - } while (ret > 0); - splice_from_pipe_end(pipe, &sd); - - pipe_unlock(pipe); - - if (sd.num_spliced) - ret = sd.num_spliced; - - if (ret > 0) { - int err; - - err = generic_write_sync(out, *ppos, ret); - if (err) - ret = err; - else - *ppos += ret; - - balance_dirty_pages_ratelimited(mapping); - } - - return ret; -} - static ssize_t ocfs2_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, @@ -2534,7 +2445,7 @@ static ssize_t ocfs2_file_splice_read(struct file *in, in->f_path.dentry->d_name.name, len); /* - * See the comment in ocfs2_file_aio_read() + * See the comment in ocfs2_file_read_iter() */ ret = ocfs2_inode_lock_atime(inode, in->f_path.mnt, &lock_level); if (ret < 0) { @@ -2549,10 +2460,8 @@ bail: return ret; } -static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, - const struct iovec *iov, - unsigned long nr_segs, - loff_t pos) +static ssize_t ocfs2_file_read_iter(struct kiocb *iocb, + struct iov_iter *to) { int ret = 0, rw_level = -1, have_alloc_sem = 0, lock_level = 0; struct file *filp = iocb->ki_filp; @@ -2561,7 +2470,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, trace_ocfs2_file_aio_read(inode, filp, filp->f_path.dentry, (unsigned long long)OCFS2_I(inode)->ip_blkno, filp->f_path.dentry->d_name.len, - filp->f_path.dentry->d_name.name, nr_segs); + filp->f_path.dentry->d_name.name, + to->nr_segs); /* GRRRRR */ if (!inode) { @@ -2606,13 +2516,13 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, } ocfs2_inode_unlock(inode, lock_level); - ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); + ret = generic_file_read_iter(iocb, to); trace_generic_file_aio_read_ret(ret); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); - /* see ocfs2_file_aio_write */ + /* see ocfs2_file_write_iter */ if (ret == -EIOCBQUEUED || !ocfs2_iocb_is_rw_locked(iocb)) { rw_level = -1; have_alloc_sem = 0; @@ -2705,14 +2615,14 @@ const struct inode_operations ocfs2_special_file_iops = { */ const struct file_operations ocfs2_fops = { .llseek = ocfs2_file_llseek, - .read = do_sync_read, - .write = do_sync_write, + .read = new_sync_read, + .write = new_sync_write, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, .open = ocfs2_file_open, - .aio_read = ocfs2_file_aio_read, - .aio_write = ocfs2_file_aio_write, + .read_iter = ocfs2_file_read_iter, + .write_iter = ocfs2_file_write_iter, .unlocked_ioctl = ocfs2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, @@ -2720,7 +2630,7 @@ const struct file_operations ocfs2_fops = { .lock = ocfs2_lock, .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, - .splice_write = ocfs2_file_splice_write, + .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, }; @@ -2753,21 +2663,21 @@ const struct file_operations ocfs2_dops = { */ const struct file_operations ocfs2_fops_no_plocks = { .llseek = ocfs2_file_llseek, - .read = do_sync_read, - .write = do_sync_write, + .read = new_sync_read, + .write = new_sync_write, .mmap = ocfs2_mmap, .fsync = ocfs2_sync_file, .release = ocfs2_file_release, .open = ocfs2_file_open, - .aio_read = ocfs2_file_aio_read, - .aio_write = ocfs2_file_aio_write, + .read_iter = ocfs2_file_read_iter, + .write_iter = ocfs2_file_write_iter, .unlocked_ioctl = ocfs2_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = ocfs2_compat_ioctl, #endif .flock = ocfs2_flock, .splice_read = ocfs2_file_splice_read, - .splice_write = ocfs2_file_splice_write, + .splice_write = iter_file_splice_write, .fallocate = ocfs2_fallocate, }; diff --git a/fs/omfs/file.c b/fs/omfs/file.c index 54d57d6ba68..902e88527fc 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -337,10 +337,10 @@ static sector_t omfs_bmap(struct address_space *mapping, sector_t block) const struct file_operations omfs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/open.c b/fs/open.c index 9d64679cec7..36662d03623 100644 --- a/fs/open.c +++ b/fs/open.c @@ -725,6 +725,12 @@ static int do_dentry_open(struct file *f, } if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ) i_readcount_inc(inode); + if ((f->f_mode & FMODE_READ) && + likely(f->f_op->read || f->f_op->aio_read || f->f_op->read_iter)) + f->f_mode |= FMODE_CAN_READ; + if ((f->f_mode & FMODE_WRITE) && + likely(f->f_op->write || f->f_op->aio_write || f->f_op->write_iter)) + f->f_mode |= FMODE_CAN_WRITE; f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); diff --git a/fs/pipe.c b/fs/pipe.c index 034bffac3f9..21981e58e2a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -116,50 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe) pipe_lock(pipe); } -static int -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, - int atomic) -{ - unsigned long copy; - - while (len > 0) { - while (!iov->iov_len) - iov++; - copy = min_t(unsigned long, len, iov->iov_len); - - if (atomic) { - if (__copy_from_user_inatomic(to, iov->iov_base, copy)) - return -EFAULT; - } else { - if (copy_from_user(to, iov->iov_base, copy)) - return -EFAULT; - } - to += copy; - len -= copy; - iov->iov_base += copy; - iov->iov_len -= copy; - } - return 0; -} - -/* - * Pre-fault in the user memory, so we can use atomic copies. - */ -static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) -{ - while (!iov->iov_len) - iov++; - - while (len > 0) { - unsigned long this_len; - - this_len = min_t(unsigned long, len, iov->iov_len); - fault_in_pages_readable(iov->iov_base, this_len); - len -= this_len; - iov++; - } -} - static void anon_pipe_buf_release(struct pipe_inode_info *pipe, struct pipe_buffer *buf) { @@ -271,24 +227,18 @@ static const struct pipe_buf_operations packet_pipe_buf_ops = { }; static ssize_t -pipe_read(struct kiocb *iocb, const struct iovec *_iov, - unsigned long nr_segs, loff_t pos) +pipe_read(struct kiocb *iocb, struct iov_iter *to) { + size_t total_len = iov_iter_count(to); struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; int do_wakeup; ssize_t ret; - struct iovec *iov = (struct iovec *)_iov; - size_t total_len; - struct iov_iter iter; - total_len = iov_length(iov, nr_segs); /* Null read succeeds. */ if (unlikely(total_len == 0)) return 0; - iov_iter_init(&iter, iov, nr_segs, total_len, 0); - do_wakeup = 0; ret = 0; __pipe_lock(pipe); @@ -312,7 +262,7 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov, break; } - written = copy_page_to_iter(buf->page, buf->offset, chars, &iter); + written = copy_page_to_iter(buf->page, buf->offset, chars, to); if (unlikely(written < chars)) { if (!ret) ret = -EFAULT; @@ -386,24 +336,19 @@ static inline int is_packetized(struct file *file) } static ssize_t -pipe_write(struct kiocb *iocb, const struct iovec *_iov, - unsigned long nr_segs, loff_t ppos) +pipe_write(struct kiocb *iocb, struct iov_iter *from) { struct file *filp = iocb->ki_filp; struct pipe_inode_info *pipe = filp->private_data; - ssize_t ret; - int do_wakeup; - struct iovec *iov = (struct iovec *)_iov; - size_t total_len; + ssize_t ret = 0; + int do_wakeup = 0; + size_t total_len = iov_iter_count(from); ssize_t chars; - total_len = iov_length(iov, nr_segs); /* Null write succeeds. */ if (unlikely(total_len == 0)) return 0; - do_wakeup = 0; - ret = 0; __pipe_lock(pipe); if (!pipe->readers) { @@ -422,38 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov, int offset = buf->offset + buf->len; if (ops->can_merge && offset + chars <= PAGE_SIZE) { - int error, atomic = 1; - void *addr; - - error = ops->confirm(pipe, buf); + int error = ops->confirm(pipe, buf); if (error) goto out; - iov_fault_in_pages_read(iov, chars); -redo1: - if (atomic) - addr = kmap_atomic(buf->page); - else - addr = kmap(buf->page); - error = pipe_iov_copy_from_user(offset + addr, iov, - chars, atomic); - if (atomic) - kunmap_atomic(addr); - else - kunmap(buf->page); - ret = error; - do_wakeup = 1; - if (error) { - if (atomic) { - atomic = 0; - goto redo1; - } + ret = copy_page_from_iter(buf->page, offset, chars, from); + if (unlikely(ret < chars)) { + error = -EFAULT; goto out; } + do_wakeup = 1; buf->len += chars; - total_len -= chars; ret = chars; - if (!total_len) + if (!iov_iter_count(from)) goto out; } } @@ -472,8 +398,7 @@ redo1: int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1); struct pipe_buffer *buf = pipe->bufs + newbuf; struct page *page = pipe->tmp_page; - char *src; - int error, atomic = 1; + int copied; if (!page) { page = alloc_page(GFP_HIGHUSER); @@ -489,40 +414,19 @@ redo1: * FIXME! Is this really true? */ do_wakeup = 1; - chars = PAGE_SIZE; - if (chars > total_len) - chars = total_len; - - iov_fault_in_pages_read(iov, chars); -redo2: - if (atomic) - src = kmap_atomic(page); - else - src = kmap(page); - - error = pipe_iov_copy_from_user(src, iov, chars, - atomic); - if (atomic) - kunmap_atomic(src); - else - kunmap(page); - - if (unlikely(error)) { - if (atomic) { - atomic = 0; - goto redo2; - } + copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); + if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { if (!ret) - ret = error; + ret = -EFAULT; break; } - ret += chars; + ret += copied; /* Insert it into the buffer array */ buf->page = page; buf->ops = &anon_pipe_buf_ops; buf->offset = 0; - buf->len = chars; + buf->len = copied; buf->flags = 0; if (is_packetized(filp)) { buf->ops = &packet_pipe_buf_ops; @@ -531,8 +435,7 @@ redo2: pipe->nrbufs = ++bufs; pipe->tmp_page = NULL; - total_len -= chars; - if (!total_len) + if (!iov_iter_count(from)) break; } if (bufs < pipe->buffers) @@ -1044,10 +947,10 @@ err: const struct file_operations pipefifo_fops = { .open = fifo_open, .llseek = no_llseek, - .read = do_sync_read, - .aio_read = pipe_read, - .write = do_sync_write, - .aio_write = pipe_write, + .read = new_sync_read, + .read_iter = pipe_read, + .write = new_sync_write, + .write_iter = pipe_write, .poll = pipe_poll, .unlocked_ioctl = pipe_ioctl, .release = pipe_release, diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c index 1e56a4e8cf7..4f56de822d2 100644 --- a/fs/ramfs/file-mmu.c +++ b/fs/ramfs/file-mmu.c @@ -31,14 +31,14 @@ #include "internal.h" const struct file_operations ramfs_file_operations = { - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = noop_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, }; diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c index 0b3d8e4cb2f..dda012ad420 100644 --- a/fs/ramfs/file-nommu.c +++ b/fs/ramfs/file-nommu.c @@ -37,13 +37,13 @@ static int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma); const struct file_operations ramfs_file_operations = { .mmap = ramfs_nommu_mmap, .get_unmapped_area = ramfs_nommu_get_unmapped_area, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .fsync = noop_fsync, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, }; diff --git a/fs/read_write.c b/fs/read_write.c index 31c6efa4318..009d8542a88 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -25,11 +25,12 @@ typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, unsigned long, loff_t); +typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *); const struct file_operations generic_ro_fops = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, + .read = new_sync_read, + .read_iter = generic_file_read_iter, .mmap = generic_file_readonly_mmap, .splice_read = generic_file_splice_read, }; @@ -390,13 +391,34 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp EXPORT_SYMBOL(do_sync_read); +ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) +{ + struct iovec iov = { .iov_base = buf, .iov_len = len }; + struct kiocb kiocb; + struct iov_iter iter; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_nbytes = len; + iov_iter_init(&iter, READ, &iov, 1, len); + + ret = filp->f_op->read_iter(&kiocb, &iter); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + +EXPORT_SYMBOL(new_sync_read); + ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) { ssize_t ret; if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op->read && !file->f_op->aio_read) + if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) return -EFAULT; @@ -406,8 +428,10 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) count = ret; if (file->f_op->read) ret = file->f_op->read(file, buf, count, pos); - else + else if (file->f_op->aio_read) ret = do_sync_read(file, buf, count, pos); + else + ret = new_sync_read(file, buf, count, pos); if (ret > 0) { fsnotify_access(file); add_rchar(current, ret); @@ -439,13 +463,34 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof EXPORT_SYMBOL(do_sync_write); +ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) +{ + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; + struct kiocb kiocb; + struct iov_iter iter; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_nbytes = len; + iov_iter_init(&iter, WRITE, &iov, 1, len); + + ret = filp->f_op->write_iter(&kiocb, &iter); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + +EXPORT_SYMBOL(new_sync_write); + ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) { mm_segment_t old_fs; const char __user *p; ssize_t ret; - if (!file->f_op->write && !file->f_op->aio_write) + if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; old_fs = get_fs(); @@ -455,8 +500,10 @@ ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t count = MAX_RW_COUNT; if (file->f_op->write) ret = file->f_op->write(file, p, count, pos); - else + else if (file->f_op->aio_write) ret = do_sync_write(file, p, count, pos); + else + ret = new_sync_write(file, p, count, pos); set_fs(old_fs); if (ret > 0) { fsnotify_modify(file); @@ -472,7 +519,7 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op->write && !file->f_op->aio_write) + if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (unlikely(!access_ok(VERIFY_READ, buf, count))) return -EFAULT; @@ -483,8 +530,10 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_ file_start_write(file); if (file->f_op->write) ret = file->f_op->write(file, buf, count, pos); - else + else if (file->f_op->aio_write) ret = do_sync_write(file, buf, count, pos); + else + ret = new_sync_write(file, buf, count, pos); if (ret > 0) { fsnotify_modify(file); add_wchar(current, ret); @@ -601,6 +650,25 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) } EXPORT_SYMBOL(iov_shorten); +static ssize_t do_iter_readv_writev(struct file *filp, int rw, const struct iovec *iov, + unsigned long nr_segs, size_t len, loff_t *ppos, iter_fn_t fn) +{ + struct kiocb kiocb; + struct iov_iter iter; + ssize_t ret; + + init_sync_kiocb(&kiocb, filp); + kiocb.ki_pos = *ppos; + kiocb.ki_nbytes = len; + + iov_iter_init(&iter, rw, iov, nr_segs, len); + ret = fn(&kiocb, &iter); + if (ret == -EIOCBQUEUED) + ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; + return ret; +} + static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) { @@ -738,6 +806,7 @@ static ssize_t do_readv_writev(int type, struct file *file, ssize_t ret; io_fn_t fn; iov_fn_t fnv; + iter_fn_t iter_fn; ret = rw_copy_check_uvector(type, uvector, nr_segs, ARRAY_SIZE(iovstack), iovstack, &iov); @@ -753,13 +822,18 @@ static ssize_t do_readv_writev(int type, struct file *file, if (type == READ) { fn = file->f_op->read; fnv = file->f_op->aio_read; + iter_fn = file->f_op->read_iter; } else { fn = (io_fn_t)file->f_op->write; fnv = file->f_op->aio_write; + iter_fn = file->f_op->write_iter; file_start_write(file); } - if (fnv) + if (iter_fn) + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, + pos, iter_fn); + else if (fnv) ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); else @@ -785,7 +859,7 @@ ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_READ)) return -EBADF; - if (!file->f_op->aio_read && !file->f_op->read) + if (!(file->f_mode & FMODE_CAN_READ)) return -EINVAL; return do_readv_writev(READ, file, vec, vlen, pos); @@ -798,7 +872,7 @@ ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, { if (!(file->f_mode & FMODE_WRITE)) return -EBADF; - if (!file->f_op->aio_write && !file->f_op->write) + if (!(file->f_mode & FMODE_CAN_WRITE)) return -EINVAL; return do_readv_writev(WRITE, file, vec, vlen, pos); @@ -912,6 +986,7 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, ssize_t ret; io_fn_t fn; iov_fn_t fnv; + iter_fn_t iter_fn; ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, UIO_FASTIOV, iovstack, &iov); @@ -927,13 +1002,18 @@ static ssize_t compat_do_readv_writev(int type, struct file *file, if (type == READ) { fn = file->f_op->read; fnv = file->f_op->aio_read; + iter_fn = file->f_op->read_iter; } else { fn = (io_fn_t)file->f_op->write; fnv = file->f_op->aio_write; + iter_fn = file->f_op->write_iter; file_start_write(file); } - if (fnv) + if (iter_fn) + ret = do_iter_readv_writev(file, type, iov, nr_segs, tot_len, + pos, iter_fn); + else if (fnv) ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, pos, fnv); else @@ -964,7 +1044,7 @@ static size_t compat_readv(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op->aio_read && !file->f_op->read) + if (!(file->f_mode & FMODE_CAN_READ)) goto out; ret = compat_do_readv_writev(READ, file, vec, vlen, pos); @@ -1041,7 +1121,7 @@ static size_t compat_writev(struct file *file, goto out; ret = -EINVAL; - if (!file->f_op->aio_write && !file->f_op->write) + if (!(file->f_mode & FMODE_CAN_WRITE)) goto out; ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index 5f6c32c668b..db9e80ba53a 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -243,8 +243,8 @@ drop_write_lock: } const struct file_operations reiserfs_file_operations = { - .read = do_sync_read, - .write = do_sync_write, + .read = new_sync_read, + .write = new_sync_write, .unlocked_ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = reiserfs_compat_ioctl, @@ -253,10 +253,10 @@ const struct file_operations reiserfs_file_operations = { .open = reiserfs_file_open, .release = reiserfs_file_release, .fsync = reiserfs_sync_file, - .aio_read = generic_file_aio_read, - .aio_write = generic_file_aio_write, + .read_iter = generic_file_read_iter, + .write_iter = generic_file_write_iter, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, .llseek = generic_file_llseek, }; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index e3ca0489491..63b2b0ec49e 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -3279,15 +3279,15 @@ static int reiserfs_releasepage(struct page *page, gfp_t unused_gfp_flags) * to do in this section of the code. */ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, loff_t offset) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - reiserfs_get_blocks_direct_io); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, + reiserfs_get_blocks_direct_io); /* * In case of error extending write may have instantiated a few @@ -3295,7 +3295,7 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb, */ if (unlikely((rw & WRITE) && ret < 0)) { loff_t isize = i_size_read(inode); - loff_t end = offset + iov_length(iov, nr_segs); + loff_t end = offset + count; if ((end > isize) && inode_newsize_ok(inode, isize) == 0) { truncate_setsize(inode, isize); diff --git a/fs/romfs/mmap-nommu.c b/fs/romfs/mmap-nommu.c index f373bde8f54..ea06c755486 100644 --- a/fs/romfs/mmap-nommu.c +++ b/fs/romfs/mmap-nommu.c @@ -72,8 +72,8 @@ static int romfs_mmap(struct file *file, struct vm_area_struct *vma) const struct file_operations romfs_ro_fops = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, + .read = new_sync_read, + .read_iter = generic_file_read_iter, .splice_read = generic_file_splice_read, .mmap = romfs_mmap, .get_unmapped_area = romfs_get_unmapped_area, diff --git a/fs/splice.c b/fs/splice.c index e246954ea48..f5cb9ba8451 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -32,6 +32,7 @@ #include <linux/gfp.h> #include <linux/socket.h> #include <linux/compat.h> +#include <linux/aio.h> #include "internal.h" /* @@ -717,63 +718,6 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe, sd->len, &pos, more); } -/* - * This is a little more tricky than the file -> pipe splicing. There are - * basically three cases: - * - * - Destination page already exists in the address space and there - * are users of it. For that case we have no other option that - * copying the data. Tough luck. - * - Destination page already exists in the address space, but there - * are no users of it. Make sure it's uptodate, then drop it. Fall - * through to last case. - * - Destination page does not exist, we can add the pipe page to - * the page cache and avoid the copy. - * - * If asked to move pages to the output file (SPLICE_F_MOVE is set in - * sd->flags), we attempt to migrate pages from the pipe to the output - * file address space page cache. This is possible if no one else has - * the pipe page referenced outside of the pipe and page cache. If - * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create - * a new page in the output file page cache and fill/dirty that. - */ -int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, - struct splice_desc *sd) -{ - struct file *file = sd->u.file; - struct address_space *mapping = file->f_mapping; - unsigned int offset, this_len; - struct page *page; - void *fsdata; - int ret; - - offset = sd->pos & ~PAGE_CACHE_MASK; - - this_len = sd->len; - if (this_len + offset > PAGE_CACHE_SIZE) - this_len = PAGE_CACHE_SIZE - offset; - - ret = pagecache_write_begin(file, mapping, sd->pos, this_len, - AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); - if (unlikely(ret)) - goto out; - - if (buf->page != page) { - char *src = kmap_atomic(buf->page); - char *dst = kmap_atomic(page); - - memcpy(dst + offset, src + buf->offset, this_len); - flush_dcache_page(page); - kunmap_atomic(dst); - kunmap_atomic(src); - } - ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, - page, fsdata); -out: - return ret; -} -EXPORT_SYMBOL(pipe_to_file); - static void wakeup_pipe_writers(struct pipe_inode_info *pipe) { smp_mb(); @@ -802,7 +746,7 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe) * locking is required around copying the pipe buffers to the * destination. */ -int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, +static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_actor *actor) { int ret; @@ -849,7 +793,6 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, return 1; } -EXPORT_SYMBOL(splice_from_pipe_feed); /** * splice_from_pipe_next - wait for some data to splice from @@ -861,7 +804,7 @@ EXPORT_SYMBOL(splice_from_pipe_feed); * value (one) if pipe buffers are available. It will return zero * or -errno if no more data needs to be spliced. */ -int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) +static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) { while (!pipe->nrbufs) { if (!pipe->writers) @@ -886,7 +829,6 @@ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) return 1; } -EXPORT_SYMBOL(splice_from_pipe_next); /** * splice_from_pipe_begin - start splicing from pipe @@ -897,12 +839,11 @@ EXPORT_SYMBOL(splice_from_pipe_next); * splice_from_pipe_next() and splice_from_pipe_feed() to * initialize the necessary fields of @sd. */ -void splice_from_pipe_begin(struct splice_desc *sd) +static void splice_from_pipe_begin(struct splice_desc *sd) { sd->num_spliced = 0; sd->need_wakeup = false; } -EXPORT_SYMBOL(splice_from_pipe_begin); /** * splice_from_pipe_end - finish splicing from pipe @@ -914,12 +855,11 @@ EXPORT_SYMBOL(splice_from_pipe_begin); * be called after a loop containing splice_from_pipe_next() and * splice_from_pipe_feed(). */ -void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) +static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) { if (sd->need_wakeup) wakeup_pipe_writers(pipe); } -EXPORT_SYMBOL(splice_from_pipe_end); /** * __splice_from_pipe - splice data from a pipe to given actor @@ -985,7 +925,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, } /** - * generic_file_splice_write - splice data from a pipe to a file + * iter_file_splice_write - splice data from a pipe to a file * @pipe: pipe info * @out: file to write to * @ppos: position in @out @@ -995,40 +935,122 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, * Description: * Will either move or copy pages (determined by @flags options) from * the given pipe inode to the given file. + * This one is ->write_iter-based. * */ ssize_t -generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, +iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, loff_t *ppos, size_t len, unsigned int flags) { - struct address_space *mapping = out->f_mapping; - struct inode *inode = mapping->host; struct splice_desc sd = { .total_len = len, .flags = flags, .pos = *ppos, .u.file = out, }; + int nbufs = pipe->buffers; + struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), + GFP_KERNEL); ssize_t ret; + if (unlikely(!array)) + return -ENOMEM; + pipe_lock(pipe); splice_from_pipe_begin(&sd); - do { + while (sd.total_len) { + struct iov_iter from; + struct kiocb kiocb; + size_t left; + int n, idx; + ret = splice_from_pipe_next(pipe, &sd); if (ret <= 0) break; - mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); - ret = file_remove_suid(out); - if (!ret) { - ret = file_update_time(out); - if (!ret) - ret = splice_from_pipe_feed(pipe, &sd, - pipe_to_file); + if (unlikely(nbufs < pipe->buffers)) { + kfree(array); + nbufs = pipe->buffers; + array = kcalloc(nbufs, sizeof(struct bio_vec), + GFP_KERNEL); + if (!array) { + ret = -ENOMEM; + break; + } } - mutex_unlock(&inode->i_mutex); - } while (ret > 0); + + /* build the vector */ + left = sd.total_len; + for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) { + struct pipe_buffer *buf = pipe->bufs + idx; + size_t this_len = buf->len; + + if (this_len > left) + this_len = left; + + if (idx == pipe->buffers - 1) + idx = -1; + + ret = buf->ops->confirm(pipe, buf); + if (unlikely(ret)) { + if (ret == -ENODATA) + ret = 0; + goto done; + } + + array[n].bv_page = buf->page; + array[n].bv_len = this_len; + array[n].bv_offset = buf->offset; + left -= this_len; + } + + /* ... iov_iter */ + from.type = ITER_BVEC | WRITE; + from.bvec = array; + from.nr_segs = n; + from.count = sd.total_len - left; + from.iov_offset = 0; + + /* ... and iocb */ + init_sync_kiocb(&kiocb, out); + kiocb.ki_pos = sd.pos; + kiocb.ki_nbytes = sd.total_len - left; + + /* now, send it */ + ret = out->f_op->write_iter(&kiocb, &from); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&kiocb); + + if (ret <= 0) + break; + + sd.num_spliced += ret; + sd.total_len -= ret; + *ppos = sd.pos = kiocb.ki_pos; + + /* dismiss the fully eaten buffers, adjust the partial one */ + while (ret) { + struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; + if (ret >= buf->len) { + const struct pipe_buf_operations *ops = buf->ops; + ret -= buf->len; + buf->len = 0; + buf->ops = NULL; + ops->release(pipe, buf); + pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); + pipe->nrbufs--; + if (pipe->files) + sd.need_wakeup = true; + } else { + buf->offset += ret; + buf->len -= ret; + ret = 0; + } + } + } +done: + kfree(array); splice_from_pipe_end(pipe, &sd); pipe_unlock(pipe); @@ -1036,21 +1058,10 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, if (sd.num_spliced) ret = sd.num_spliced; - if (ret > 0) { - int err; - - err = generic_write_sync(out, *ppos, ret); - if (err) - ret = err; - else - *ppos += ret; - balance_dirty_pages_ratelimited(mapping); - } - return ret; } -EXPORT_SYMBOL(generic_file_splice_write); +EXPORT_SYMBOL(iter_file_splice_write); static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf, struct splice_desc *sd) @@ -1549,7 +1560,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov, goto out; count = ret; - iov_iter_init(&iter, iov, nr_segs, count, 0); + iov_iter_init(&iter, READ, iov, nr_segs, count); sd.len = 0; sd.total_len = count; diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 9d4dc683179..b00811c75b2 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -21,10 +21,10 @@ */ const struct file_operations sysv_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index 0ab7f7dfb98..b5b593c4527 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -1364,17 +1364,17 @@ static inline int mctime_update_needed(const struct inode *inode, /** * update_ctime - update mtime and ctime of an inode. - * @c: UBIFS file-system description object * @inode: inode to update * * This function updates mtime and ctime of the inode if it is not equivalent to * current time. Returns zero in case of success and a negative error code in * case of failure. */ -static int update_mctime(struct ubifs_info *c, struct inode *inode) +static int update_mctime(struct inode *inode) { struct timespec now = ubifs_current_time(inode); struct ubifs_inode *ui = ubifs_inode(inode); + struct ubifs_info *c = inode->i_sb->s_fs_info; if (mctime_update_needed(inode, &now)) { int err, release; @@ -1397,18 +1397,13 @@ static int update_mctime(struct ubifs_info *c, struct inode *inode) return 0; } -static ssize_t ubifs_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t pos) +static ssize_t ubifs_write_iter(struct kiocb *iocb, struct iov_iter *from) { - int err; - struct inode *inode = iocb->ki_filp->f_mapping->host; - struct ubifs_info *c = inode->i_sb->s_fs_info; - - err = update_mctime(c, inode); + int err = update_mctime(file_inode(iocb->ki_filp)); if (err) return err; - return generic_file_aio_write(iocb, iov, nr_segs, pos); + return generic_file_write_iter(iocb, from); } static int ubifs_set_page_dirty(struct page *page) @@ -1582,15 +1577,15 @@ const struct inode_operations ubifs_symlink_inode_operations = { const struct file_operations ubifs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = generic_file_aio_read, - .aio_write = ubifs_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = generic_file_read_iter, + .write_iter = ubifs_write_iter, .mmap = ubifs_file_mmap, .fsync = ubifs_fsync, .unlocked_ioctl = ubifs_ioctl, .splice_read = generic_file_splice_read, - .splice_write = generic_file_splice_write, + .splice_write = iter_file_splice_write, #ifdef CONFIG_COMPAT .compat_ioctl = ubifs_compat_ioctl, #endif diff --git a/fs/udf/file.c b/fs/udf/file.c index d2c170f8b03..d80738fdf42 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -119,8 +119,8 @@ static int udf_adinicb_write_end(struct file *file, } static ssize_t udf_adinicb_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { /* Fallback to buffered I/O. */ return 0; @@ -134,8 +134,7 @@ const struct address_space_operations udf_adinicb_aops = { .direct_IO = udf_adinicb_direct_IO, }; -static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t ppos) +static ssize_t udf_file_write_iter(struct kiocb *iocb, struct iov_iter *from) { ssize_t retval; struct file *file = iocb->ki_filp; @@ -150,7 +149,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, if (file->f_flags & O_APPEND) pos = inode->i_size; else - pos = ppos; + pos = iocb->ki_pos; if (inode->i_sb->s_blocksize < (udf_file_entry_alloc_offset(inode) + @@ -171,7 +170,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov, } else up_write(&iinfo->i_data_sem); - retval = __generic_file_aio_write(iocb, iov, nr_segs); + retval = __generic_file_write_iter(iocb, from); mutex_unlock(&inode->i_mutex); if (retval > 0) { @@ -252,13 +251,13 @@ static int udf_release_file(struct inode *inode, struct file *filp) } const struct file_operations udf_file_operations = { - .read = do_sync_read, - .aio_read = generic_file_aio_read, + .read = new_sync_read, + .read_iter = generic_file_read_iter, .unlocked_ioctl = udf_ioctl, .open = generic_file_open, .mmap = generic_file_mmap, - .write = do_sync_write, - .aio_write = udf_file_aio_write, + .write = new_sync_write, + .write_iter = udf_file_write_iter, .release = udf_release_file, .fsync = generic_file_fsync, .splice_read = generic_file_splice_read, diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 5d643706212..236cd48184c 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -217,18 +217,18 @@ static int udf_write_begin(struct file *file, struct address_space *mapping, } static ssize_t udf_direct_IO(int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; + size_t count = iov_iter_count(iter); ssize_t ret; - ret = blockdev_direct_IO(rw, iocb, inode, iov, offset, nr_segs, - udf_get_block); + ret = blockdev_direct_IO(rw, iocb, inode, iter, offset, udf_get_block); if (unlikely(ret < 0 && (rw & WRITE))) - udf_write_failed(mapping, offset + iov_length(iov, nr_segs)); + udf_write_failed(mapping, offset + count); return ret; } diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 33afa20d450..c84ec010a67 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -35,10 +35,10 @@ const struct file_operations ufs_file_operations = { .llseek = generic_file_llseek, - .read = do_sync_read, - .aio_read = generic_file_aio_read, - .write = do_sync_write, - .aio_write = generic_file_aio_write, + .read = new_sync_read, + .read_iter = generic_file_read_iter, + .write = new_sync_write, + .write_iter = generic_file_write_iter, .mmap = generic_file_mmap, .open = generic_file_open, .fsync = generic_file_fsync, diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e32640eedea..faaf716e208 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -1486,9 +1486,8 @@ STATIC ssize_t xfs_vm_direct_IO( int rw, struct kiocb *iocb, - const struct iovec *iov, - loff_t offset, - unsigned long nr_segs) + struct iov_iter *iter, + loff_t offset) { struct inode *inode = iocb->ki_filp->f_mapping->host; struct block_device *bdev = xfs_find_bdev_for_inode(inode); @@ -1496,7 +1495,7 @@ xfs_vm_direct_IO( ssize_t ret; if (rw & WRITE) { - size_t size = iov_length(iov, nr_segs); + size_t size = iov_iter_count(iter); /* * We cannot preallocate a size update transaction here as we @@ -1508,17 +1507,15 @@ xfs_vm_direct_IO( if (offset + size > XFS_I(inode)->i_d.di_size) ioend->io_isdirect = 1; - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, xfs_end_io_direct_write, NULL, DIO_ASYNC_EXTEND); if (ret != -EIOCBQUEUED && iocb->private) goto out_destroy_ioend; } else { - ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, - offset, nr_segs, - xfs_get_blocks_direct, + ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, + offset, xfs_get_blocks_direct, NULL, NULL, 0); } diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 1b8160dc04d..1f66779d7a4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -229,34 +229,27 @@ xfs_file_fsync( } STATIC ssize_t -xfs_file_aio_read( +xfs_file_read_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *to) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; - size_t size = 0; + size_t size = iov_iter_count(to); ssize_t ret = 0; int ioflags = 0; xfs_fsize_t n; + loff_t pos = iocb->ki_pos; XFS_STATS_INC(xs_read_calls); - BUG_ON(iocb->ki_pos != pos); - if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; if (file->f_mode & FMODE_NOCMTIME) ioflags |= IO_INVIS; - ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); - if (ret < 0) - return ret; - if (unlikely(ioflags & IO_ISDIRECT)) { xfs_buftarg_t *target = XFS_IS_REALTIME_INODE(ip) ? @@ -309,7 +302,7 @@ xfs_file_aio_read( trace_xfs_file_read(ip, size, pos, ioflags); - ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); + ret = generic_file_read_iter(iocb, to); if (ret > 0) XFS_STATS_ADD(xs_read_bytes, ret); @@ -350,47 +343,6 @@ xfs_file_splice_read( } /* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( - struct pipe_inode_info *pipe, - struct file *outfilp, - loff_t *ppos, - size_t count, - unsigned int flags) -{ - struct inode *inode = outfilp->f_mapping->host; - struct xfs_inode *ip = XFS_I(inode); - int ioflags = 0; - ssize_t ret; - - XFS_STATS_INC(xs_write_calls); - - if (outfilp->f_mode & FMODE_NOCMTIME) - ioflags |= IO_INVIS; - - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) - return -EIO; - - xfs_ilock(ip, XFS_IOLOCK_EXCL); - - trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - - ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); - if (ret > 0) - XFS_STATS_ADD(xs_write_bytes, ret); - - xfs_iunlock(ip, XFS_IOLOCK_EXCL); - return ret; -} - -/* * This routine is called to handle zeroing any space in the last block of the * file that is beyond the EOF. We do this since the size is being increased * without writing anything to that block and we don't want to read the @@ -625,10 +577,7 @@ restart: STATIC ssize_t xfs_file_dio_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t ocount) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -636,9 +585,10 @@ xfs_file_dio_aio_write( struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; ssize_t ret = 0; - size_t count = ocount; int unaligned_io = 0; int iolock; + size_t count = iov_iter_count(from); + loff_t pos = iocb->ki_pos; struct xfs_buftarg *target = XFS_IS_REALTIME_INODE(ip) ? mp->m_rtdev_targp : mp->m_ddev_targp; @@ -677,6 +627,7 @@ xfs_file_dio_aio_write( ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock); if (ret) goto out; + iov_iter_truncate(from, count); if (mapping->nrpages) { ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, @@ -698,8 +649,7 @@ xfs_file_dio_aio_write( } trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); - ret = generic_file_direct_write(iocb, iovp, - &nr_segs, pos, count, ocount); + ret = generic_file_direct_write(iocb, from, pos); out: xfs_rw_iunlock(ip, iolock); @@ -712,10 +662,7 @@ out: STATIC ssize_t xfs_file_buffered_aio_write( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos, - size_t count) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; @@ -724,7 +671,8 @@ xfs_file_buffered_aio_write( ssize_t ret; int enospc = 0; int iolock = XFS_IOLOCK_EXCL; - struct iov_iter from; + loff_t pos = iocb->ki_pos; + size_t count = iov_iter_count(from); xfs_rw_ilock(ip, iolock); @@ -732,13 +680,13 @@ xfs_file_buffered_aio_write( if (ret) goto out; - iov_iter_init(&from, iovp, nr_segs, count, 0); + iov_iter_truncate(from, count); /* We can write back this queue in page reclaim */ current->backing_dev_info = mapping->backing_dev_info; write_retry: trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); - ret = generic_perform_write(file, &from, pos); + ret = generic_perform_write(file, from, pos); if (likely(ret >= 0)) iocb->ki_pos = pos + ret; /* @@ -759,40 +707,29 @@ out: } STATIC ssize_t -xfs_file_aio_write( +xfs_file_write_iter( struct kiocb *iocb, - const struct iovec *iovp, - unsigned long nr_segs, - loff_t pos) + struct iov_iter *from) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; struct xfs_inode *ip = XFS_I(inode); ssize_t ret; - size_t ocount = 0; + size_t ocount = iov_iter_count(from); XFS_STATS_INC(xs_write_calls); - BUG_ON(iocb->ki_pos != pos); - - ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); - if (ret) - return ret; - if (ocount == 0) return 0; - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { - ret = -EIO; - goto out; - } + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + return -EIO; if (unlikely(file->f_flags & O_DIRECT)) - ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); + ret = xfs_file_dio_aio_write(iocb, from); else - ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, - ocount); + ret = xfs_file_buffered_aio_write(iocb, from); if (ret > 0) { ssize_t err; @@ -804,8 +741,6 @@ xfs_file_aio_write( if (err < 0) ret = err; } - -out: return ret; } @@ -1461,12 +1396,12 @@ xfs_file_llseek( const struct file_operations xfs_file_operations = { .llseek = xfs_file_llseek, - .read = do_sync_read, - .write = do_sync_write, - .aio_read = xfs_file_aio_read, - .aio_write = xfs_file_aio_write, + .read = new_sync_read, + .write = new_sync_write, + .read_iter = xfs_file_read_iter, + .write_iter = xfs_file_write_iter, .splice_read = xfs_file_splice_read, - .splice_write = xfs_file_splice_write, + .splice_write = iter_file_splice_write, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 6910458915c..152f8278263 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1118,7 +1118,6 @@ DEFINE_RW_EVENT(xfs_file_read); DEFINE_RW_EVENT(xfs_file_buffered_write); DEFINE_RW_EVENT(xfs_file_direct_write); DEFINE_RW_EVENT(xfs_file_splice_read); -DEFINE_RW_EVENT(xfs_file_splice_write); DECLARE_EVENT_CLASS(xfs_page_class, TP_PROTO(struct inode *inode, struct page *page, unsigned long off, |