aboutsummaryrefslogtreecommitdiff
path: root/fs/nfs/direct.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs/direct.c')
-rw-r--r--fs/nfs/direct.c705
1 files changed, 349 insertions, 356 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 91ff089d341..f11b9eed0de 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}
+/*
+ * nfs_direct_select_verf - select the right verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
+ * @ds_idx - index of data server in data server list, only valid if ds_clp set
+ *
+ * returns the correct verifier to use given the role of the server
+ */
+static struct nfs_writeverf *
+nfs_direct_select_verf(struct nfs_direct_req *dreq,
+ struct nfs_client *ds_clp,
+ int ds_idx)
+{
+ struct nfs_writeverf *verfp = &dreq->verf;
+
+#ifdef CONFIG_NFS_V4_1
+ if (ds_clp) {
+ /* pNFS is in use, use the DS verf */
+ if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
+ verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
+ else
+ WARN_ON_ONCE(1);
+ }
+#endif
+ return verfp;
+}
+
+
+/*
+ * nfs_direct_set_hdr_verf - set the write/commit verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verfs
+ *
+ * Set the server's (MDS or DS) "seen" verifier
+ */
+static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ WARN_ON_ONCE(verfp->committed >= 0);
+ memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+ WARN_ON_ONCE(verfp->committed < 0);
+}
+
+/*
+ * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verf
+ *
+ * set the server's "seen" verf if not initialized.
+ * returns result of comparison between @hdr->verf and the "seen"
+ * verf of the server used by @hdr (DS or MDS)
+ */
+static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ if (verfp->committed < 0) {
+ nfs_direct_set_hdr_verf(dreq, hdr);
+ return 0;
+ }
+ return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+}
+
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
+ * @dreq - direct request possibly spanning multiple servers
+ * @data - commit data to validate against previously seen verf
+ *
+ * returns result of comparison between @data->verf and the verf of
+ * the server used by @data (DS or MDS)
+ */
+static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
+ struct nfs_commit_data *data)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, data->ds_clp,
+ data->ds_commit_index);
+ WARN_ON_ONCE(verfp->committed < 0);
+ return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+}
+#endif
+
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* @rw: direction (read or write)
@@ -121,21 +212,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
* shunt off direct read and write requests before the VFS gets them,
* so this method is only ever called for swap.
*/
-ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs)
+ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
#ifndef CONFIG_NFS_SWAP
- dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n",
- iocb->ki_filp->f_path.dentry->d_name.name,
- (long long) pos, nr_segs);
+ dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n",
+ iocb->ki_filp, (long long) pos, iter->nr_segs);
return -EINVAL;
#else
VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
if (rw == READ || rw == KERNEL_READ)
- return nfs_file_direct_read(iocb, iov, nr_segs, pos,
+ return nfs_file_direct_read(iocb, iter, pos,
rw == READ ? true : false);
- return nfs_file_direct_write(iocb, iov, nr_segs, pos,
+ return nfs_file_direct_write(iocb, iter, pos,
rw == WRITE ? true : false);
#endif /* CONFIG_NFS_SWAP */
}
@@ -169,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
kref_get(&dreq->kref);
init_completion(&dreq->completion);
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+ dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);
@@ -223,14 +314,31 @@ out:
* Synchronous I/O uses a stack-allocated iocb. Thus we can't trust
* the iocb is still valid here if this is a synchronous request.
*/
-static void nfs_direct_complete(struct nfs_direct_req *dreq)
+static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)
{
+ struct inode *inode = dreq->inode;
+
+ if (dreq->iocb && write) {
+ loff_t pos = dreq->iocb->ki_pos + dreq->count;
+
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < pos)
+ i_size_write(inode, pos);
+ spin_unlock(&inode->i_lock);
+ }
+
+ if (write)
+ nfs_zap_mapping(inode, inode->i_mapping);
+
+ inode_dio_done(inode);
+
if (dreq->iocb) {
long res = (long) dreq->error;
if (!res)
res = (long) dreq->count;
aio_complete(dreq->iocb, res, 0);
}
+
complete_all(&dreq->completion);
nfs_direct_req_release(dreq);
@@ -238,9 +346,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
static void nfs_direct_readpage_release(struct nfs_page *req)
{
- dprintk("NFS: direct read done (%s/%lld %d@%lld)\n",
+ dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
- (long long)NFS_FILEID(req->wb_context->dentry->d_inode),
+ (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
req->wb_bytes,
(long long)req_offset(req));
nfs_release_request(req);
@@ -273,7 +381,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
}
out_put:
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
hdr->release(hdr);
}
@@ -306,66 +414,42 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {
* handled automatically by nfs_direct_read_result(). Otherwise, if
* no requests have been sent, just return an error.
*/
-static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc,
- const struct iovec *iov,
- loff_t pos, bool uio)
-{
- struct nfs_direct_req *dreq = desc->pg_dreq;
- struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
- unsigned long user_addr = (unsigned long)iov->iov_base;
- size_t count = iov->iov_len;
- size_t rsize = NFS_SERVER(inode)->rsize;
- unsigned int pgbase;
- int result;
- ssize_t started = 0;
- struct page **pagevec = NULL;
- unsigned int npages;
-
- do {
- size_t bytes;
- int i;
- pgbase = user_addr & ~PAGE_MASK;
- bytes = min(max_t(size_t, rsize, PAGE_SIZE), count);
+static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
+ struct iov_iter *iter,
+ loff_t pos)
+{
+ struct nfs_pageio_descriptor desc;
+ struct inode *inode = dreq->inode;
+ ssize_t result = -EINVAL;
+ size_t requested_bytes = 0;
+ size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE);
- result = -ENOMEM;
- npages = nfs_page_array_len(pgbase, bytes);
- if (!pagevec)
- pagevec = kmalloc(npages * sizeof(struct page *),
- GFP_KERNEL);
- if (!pagevec)
- break;
- if (uio) {
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 1, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
- } else {
- WARN_ON(npages != 1);
- result = get_kernel_page(user_addr, 1, pagevec);
- if (WARN_ON(result != 1))
- break;
- }
+ nfs_pageio_init_read(&desc, dreq->inode, false,
+ &nfs_direct_read_completion_ops);
+ get_dreq(dreq);
+ desc.pg_dreq = dreq;
+ atomic_inc(&inode->i_dio_count);
- if ((unsigned)result < npages) {
- bytes = result * PAGE_SIZE;
- if (bytes <= pgbase) {
- nfs_direct_release_pages(pagevec, result);
- break;
- }
- bytes -= pgbase;
- npages = result;
- }
+ while (iov_iter_count(iter)) {
+ struct page **pagevec;
+ size_t bytes;
+ size_t pgbase;
+ unsigned npages, i;
+ result = iov_iter_get_pages_alloc(iter, &pagevec,
+ rsize, &pgbase);
+ if (result < 0)
+ break;
+
+ bytes = result;
+ iov_iter_advance(iter, bytes);
+ npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
for (i = 0; i < npages; i++) {
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
@@ -373,54 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
}
req->wb_index = pos >> PAGE_SHIFT;
req->wb_offset = pos & ~PAGE_MASK;
- if (!nfs_pageio_add_request(desc, req)) {
- result = desc->pg_error;
+ if (!nfs_pageio_add_request(&desc, req)) {
+ result = desc.pg_error;
nfs_release_request(req);
break;
}
pgbase = 0;
bytes -= req_len;
- started += req_len;
- user_addr += req_len;
+ requested_bytes += req_len;
pos += req_len;
- count -= req_len;
dreq->bytes_left -= req_len;
}
- /* The nfs_page now hold references to these pages */
nfs_direct_release_pages(pagevec, npages);
- } while (count != 0 && result >= 0);
-
- kfree(pagevec);
-
- if (started)
- return started;
- return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
-static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos, bool uio)
-{
- struct nfs_pageio_descriptor desc;
- ssize_t result = -EINVAL;
- size_t requested_bytes = 0;
- unsigned long seg;
-
- NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
- &nfs_direct_read_completion_ops);
- get_dreq(dreq);
- desc.pg_dreq = dreq;
-
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *vec = &iov[seg];
- result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio);
+ kvfree(pagevec);
if (result < 0)
break;
- requested_bytes += result;
- if ((size_t)result < vec->iov_len)
- break;
- pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
@@ -430,29 +481,69 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
+ inode_dio_done(inode);
nfs_direct_req_release(dreq);
return result < 0 ? result : -EIO;
}
if (put_dreq(dreq))
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, false);
return 0;
}
-static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
+/**
+ * nfs_file_direct_read - file direct read operation for NFS files
+ * @iocb: target I/O control block
+ * @iter: vector of user buffers into which to read data
+ * @pos: byte offset in file where reading starts
+ *
+ * We use this function for direct reads instead of calling
+ * generic_file_aio_read() in order to avoid gfar's check to see if
+ * the request starts before the end of the file. For that check
+ * to work, we must generate a GETATTR before each direct read, and
+ * even then there is a window between the GETATTR and the subsequent
+ * READ where the file size could change. Our preference is simply
+ * to do all reads the application wants, and the server will take
+ * care of managing the end of file boundary.
+ *
+ * This function also eliminates unnecessarily updating the file's
+ * atime locally, as the NFS server sets the file's atime, and this
+ * client must read the updated atime from the server back into its
+ * cache.
+ */
+ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos, bool uio)
{
- ssize_t result = -ENOMEM;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
+ struct file *file = iocb->ki_filp;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
struct nfs_direct_req *dreq;
struct nfs_lock_context *l_ctx;
+ ssize_t result = -EINVAL;
+ size_t count = iov_iter_count(iter);
+ nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
+
+ dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n",
+ file, count, (long long) pos);
+
+ result = 0;
+ if (!count)
+ goto out;
+
+ mutex_lock(&inode->i_mutex);
+ result = nfs_sync_mapping(mapping);
+ if (result)
+ goto out_unlock;
+ task_io_account_read(count);
+
+ result = -ENOMEM;
dreq = nfs_direct_req_alloc();
if (dreq == NULL)
- goto out;
+ goto out_unlock;
dreq->inode = inode;
- dreq->bytes_left = iov_length(iov, nr_segs);
+ dreq->bytes_left = count;
dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
l_ctx = nfs_get_lock_context(dreq->ctx);
if (IS_ERR(l_ctx)) {
@@ -463,22 +554,28 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
if (!is_sync_kiocb(iocb))
dreq->iocb = iocb;
- NFS_I(inode)->read_io += iov_length(iov, nr_segs);
- result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
- if (!result)
+ NFS_I(inode)->read_io += count;
+ result = nfs_direct_read_schedule_iovec(dreq, iter, pos);
+
+ mutex_unlock(&inode->i_mutex);
+
+ if (!result) {
result = nfs_direct_wait(dreq);
+ if (result > 0)
+ iocb->ki_pos = pos + result;
+ }
+
+ nfs_direct_req_release(dreq);
+ return result;
+
out_release:
nfs_direct_req_release(dreq);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
out:
return result;
}
-static void nfs_inode_dio_write_done(struct inode *inode)
-{
- nfs_zap_mapping(inode, inode->i_mapping);
- inode_dio_done(inode);
-}
-
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
@@ -497,7 +594,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
dreq->count = 0;
get_dreq(dreq);
- NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
@@ -536,7 +633,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
dprintk("NFS: %5u commit failed with error %d.\n",
data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
@@ -594,8 +691,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
nfs_direct_write_reschedule(dreq);
break;
default:
- nfs_inode_dio_write_done(dreq->inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
}
@@ -611,114 +707,10 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
{
- nfs_inode_dio_write_done(inode);
- nfs_direct_complete(dreq);
+ nfs_direct_complete(dreq, true);
}
#endif
-/*
- * NB: Return the value of the first error return code. Subsequent
- * errors after the first one are ignored.
- */
-/*
- * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
- * operation. If nfs_writedata_alloc() or get_user_pages() fails,
- * bail and stop sending more writes. Write length accounting is
- * handled automatically by nfs_direct_write_result(). Otherwise, if
- * no requests have been sent, just return an error.
- */
-static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc,
- const struct iovec *iov,
- loff_t pos, bool uio)
-{
- struct nfs_direct_req *dreq = desc->pg_dreq;
- struct nfs_open_context *ctx = dreq->ctx;
- struct inode *inode = ctx->dentry->d_inode;
- unsigned long user_addr = (unsigned long)iov->iov_base;
- size_t count = iov->iov_len;
- size_t wsize = NFS_SERVER(inode)->wsize;
- unsigned int pgbase;
- int result;
- ssize_t started = 0;
- struct page **pagevec = NULL;
- unsigned int npages;
-
- do {
- size_t bytes;
- int i;
-
- pgbase = user_addr & ~PAGE_MASK;
- bytes = min(max_t(size_t, wsize, PAGE_SIZE), count);
-
- result = -ENOMEM;
- npages = nfs_page_array_len(pgbase, bytes);
- if (!pagevec)
- pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL);
- if (!pagevec)
- break;
-
- if (uio) {
- down_read(&current->mm->mmap_sem);
- result = get_user_pages(current, current->mm, user_addr,
- npages, 0, 0, pagevec, NULL);
- up_read(&current->mm->mmap_sem);
- if (result < 0)
- break;
- } else {
- WARN_ON(npages != 1);
- result = get_kernel_page(user_addr, 0, pagevec);
- if (WARN_ON(result != 1))
- break;
- }
-
- if ((unsigned)result < npages) {
- bytes = result * PAGE_SIZE;
- if (bytes <= pgbase) {
- nfs_direct_release_pages(pagevec, result);
- break;
- }
- bytes -= pgbase;
- npages = result;
- }
-
- for (i = 0; i < npages; i++) {
- struct nfs_page *req;
- unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
-
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
- pgbase, req_len);
- if (IS_ERR(req)) {
- result = PTR_ERR(req);
- break;
- }
- nfs_lock_request(req);
- req->wb_index = pos >> PAGE_SHIFT;
- req->wb_offset = pos & ~PAGE_MASK;
- if (!nfs_pageio_add_request(desc, req)) {
- result = desc->pg_error;
- nfs_unlock_and_release_request(req);
- break;
- }
- pgbase = 0;
- bytes -= req_len;
- started += req_len;
- user_addr += req_len;
- pos += req_len;
- count -= req_len;
- dreq->bytes_left -= req_len;
- }
- /* The nfs_page now hold references to these pages */
- nfs_direct_release_pages(pagevec, npages);
- } while (count != 0 && result >= 0);
-
- kfree(pagevec);
-
- if (started)
- return started;
- return result < 0 ? (ssize_t) result : -EFAULT;
-}
-
static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
@@ -748,13 +740,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
bit = NFS_IOHDR_NEED_RESCHED;
else if (dreq->flags == 0) {
- memcpy(&dreq->verf, hdr->verf,
- sizeof(dreq->verf));
+ nfs_direct_set_hdr_verf(dreq, hdr);
bit = NFS_IOHDR_NEED_COMMIT;
dreq->flags = NFS_ODIRECT_DO_COMMIT;
} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
- if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+ dreq->flags =
+ NFS_ODIRECT_RESCHED_WRITES;
bit = NFS_IOHDR_NEED_RESCHED;
} else
bit = NFS_IOHDR_NEED_COMMIT;
@@ -764,6 +756,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
+
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
switch (bit) {
@@ -798,33 +791,77 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
.completion = nfs_direct_write_completion,
};
+
+/*
+ * NB: Return the value of the first error return code. Subsequent
+ * errors after the first one are ignored.
+ */
+/*
+ * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE
+ * operation. If nfs_writedata_alloc() or get_user_pages() fails,
+ * bail and stop sending more writes. Write length accounting is
+ * handled automatically by nfs_direct_write_result(). Otherwise, if
+ * no requests have been sent, just return an error.
+ */
static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
- const struct iovec *iov,
- unsigned long nr_segs,
- loff_t pos, bool uio)
+ struct iov_iter *iter,
+ loff_t pos)
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
ssize_t result = 0;
size_t requested_bytes = 0;
- unsigned long seg;
+ size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
- NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+ nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
get_dreq(dreq);
atomic_inc(&inode->i_dio_count);
- NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
- for (seg = 0; seg < nr_segs; seg++) {
- const struct iovec *vec = &iov[seg];
- result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
+ NFS_I(inode)->write_io += iov_iter_count(iter);
+ while (iov_iter_count(iter)) {
+ struct page **pagevec;
+ size_t bytes;
+ size_t pgbase;
+ unsigned npages, i;
+
+ result = iov_iter_get_pages_alloc(iter, &pagevec,
+ wsize, &pgbase);
if (result < 0)
break;
- requested_bytes += result;
- if ((size_t)result < vec->iov_len)
+
+ bytes = result;
+ iov_iter_advance(iter, bytes);
+ npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
+ for (i = 0; i < npages; i++) {
+ struct nfs_page *req;
+ unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
+
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
+ pgbase, req_len);
+ if (IS_ERR(req)) {
+ result = PTR_ERR(req);
+ break;
+ }
+ nfs_lock_request(req);
+ req->wb_index = pos >> PAGE_SHIFT;
+ req->wb_offset = pos & ~PAGE_MASK;
+ if (!nfs_pageio_add_request(&desc, req)) {
+ result = desc.pg_error;
+ nfs_unlock_and_release_request(req);
+ break;
+ }
+ pgbase = 0;
+ bytes -= req_len;
+ requested_bytes += req_len;
+ pos += req_len;
+ dreq->bytes_left -= req_len;
+ }
+ nfs_direct_release_pages(pagevec, npages);
+ kvfree(pagevec);
+ if (result < 0)
break;
- pos += vec->iov_len;
}
nfs_pageio_complete(&desc);
@@ -843,100 +880,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
return 0;
}
-static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos,
- size_t count, bool uio)
-{
- ssize_t result = -ENOMEM;
- struct inode *inode = iocb->ki_filp->f_mapping->host;
- struct nfs_direct_req *dreq;
- struct nfs_lock_context *l_ctx;
-
- dreq = nfs_direct_req_alloc();
- if (!dreq)
- goto out;
-
- dreq->inode = inode;
- dreq->bytes_left = count;
- dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
- l_ctx = nfs_get_lock_context(dreq->ctx);
- if (IS_ERR(l_ctx)) {
- result = PTR_ERR(l_ctx);
- goto out_release;
- }
- dreq->l_ctx = l_ctx;
- if (!is_sync_kiocb(iocb))
- dreq->iocb = iocb;
-
- result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio);
- if (!result)
- result = nfs_direct_wait(dreq);
-out_release:
- nfs_direct_req_release(dreq);
-out:
- return result;
-}
-
-/**
- * nfs_file_direct_read - file direct read operation for NFS files
- * @iocb: target I/O control block
- * @iov: vector of user buffers into which to read data
- * @nr_segs: size of iov vector
- * @pos: byte offset in file where reading starts
- *
- * We use this function for direct reads instead of calling
- * generic_file_aio_read() in order to avoid gfar's check to see if
- * the request starts before the end of the file. For that check
- * to work, we must generate a GETATTR before each direct read, and
- * even then there is a window between the GETATTR and the subsequent
- * READ where the file size could change. Our preference is simply
- * to do all reads the application wants, and the server will take
- * care of managing the end of file boundary.
- *
- * This function also eliminates unnecessarily updating the file's
- * atime locally, as the NFS server sets the file's atime, and this
- * client must read the updated atime from the server back into its
- * cache.
- */
-ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
-{
- ssize_t retval = -EINVAL;
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
- size_t count;
-
- count = iov_length(iov, nr_segs);
- nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count);
-
- dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name,
- count, (long long) pos);
-
- retval = 0;
- if (!count)
- goto out;
-
- retval = nfs_sync_mapping(mapping);
- if (retval)
- goto out;
-
- task_io_account_read(count);
-
- retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio);
- if (retval > 0)
- iocb->ki_pos = pos + retval;
-
-out:
- return retval;
-}
-
/**
* nfs_file_direct_write - file direct write operation for NFS files
* @iocb: target I/O control block
- * @iov: vector of user buffers from which to write data
- * @nr_segs: size of iov vector
+ * @iter: vector of user buffers from which to write data
* @pos: byte offset in file where writing starts
*
* We use this function for direct writes instead of calling
@@ -954,51 +901,97 @@ out:
* Note that O_APPEND is not supported for NFS direct writes, as there
* is no atomic O_APPEND write facility in the NFS protocol.
*/
-ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos, bool uio)
+ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t pos, bool uio)
{
- ssize_t retval = -EINVAL;
+ ssize_t result = -EINVAL;
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
- size_t count;
+ struct inode *inode = mapping->host;
+ struct nfs_direct_req *dreq;
+ struct nfs_lock_context *l_ctx;
+ loff_t end;
+ size_t count = iov_iter_count(iter);
+ end = (pos + count - 1) >> PAGE_CACHE_SHIFT;
- count = iov_length(iov, nr_segs);
nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count);
- dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n",
- file->f_path.dentry->d_parent->d_name.name,
- file->f_path.dentry->d_name.name,
- count, (long long) pos);
+ dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n",
+ file, count, (long long) pos);
- retval = generic_write_checks(file, &pos, &count, 0);
- if (retval)
+ result = generic_write_checks(file, &pos, &count, 0);
+ if (result)
goto out;
- retval = -EINVAL;
+ result = -EINVAL;
if ((ssize_t) count < 0)
goto out;
- retval = 0;
+ result = 0;
if (!count)
goto out;
- retval = nfs_sync_mapping(mapping);
- if (retval)
- goto out;
+ mutex_lock(&inode->i_mutex);
+
+ result = nfs_sync_mapping(mapping);
+ if (result)
+ goto out_unlock;
+
+ if (mapping->nrpages) {
+ result = invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_CACHE_SHIFT, end);
+ if (result)
+ goto out_unlock;
+ }
task_io_account_write(count);
- retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio);
- if (retval > 0) {
- struct inode *inode = mapping->host;
+ result = -ENOMEM;
+ dreq = nfs_direct_req_alloc();
+ if (!dreq)
+ goto out_unlock;
- iocb->ki_pos = pos + retval;
- spin_lock(&inode->i_lock);
- if (i_size_read(inode) < iocb->ki_pos)
- i_size_write(inode, iocb->ki_pos);
- spin_unlock(&inode->i_lock);
+ dreq->inode = inode;
+ dreq->bytes_left = count;
+ dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
+ l_ctx = nfs_get_lock_context(dreq->ctx);
+ if (IS_ERR(l_ctx)) {
+ result = PTR_ERR(l_ctx);
+ goto out_release;
+ }
+ dreq->l_ctx = l_ctx;
+ if (!is_sync_kiocb(iocb))
+ dreq->iocb = iocb;
+
+ result = nfs_direct_write_schedule_iovec(dreq, iter, pos);
+
+ if (mapping->nrpages) {
+ invalidate_inode_pages2_range(mapping,
+ pos >> PAGE_CACHE_SHIFT, end);
}
+
+ mutex_unlock(&inode->i_mutex);
+
+ if (!result) {
+ result = nfs_direct_wait(dreq);
+ if (result > 0) {
+ struct inode *inode = mapping->host;
+
+ iocb->ki_pos = pos + result;
+ spin_lock(&inode->i_lock);
+ if (i_size_read(inode) < iocb->ki_pos)
+ i_size_write(inode, iocb->ki_pos);
+ spin_unlock(&inode->i_lock);
+ }
+ }
+ nfs_direct_req_release(dreq);
+ return result;
+
+out_release:
+ nfs_direct_req_release(dreq);
+out_unlock:
+ mutex_unlock(&inode->i_mutex);
out:
- return retval;
+ return result;
}
/**