diff options
Diffstat (limited to 'fs/nfs/direct.c')
| -rw-r--r-- | fs/nfs/direct.c | 705 | 
1 files changed, 349 insertions, 356 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 91ff089d341..f11b9eed0de 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)  	return atomic_dec_and_test(&dreq->io_count);  } +/* + * nfs_direct_select_verf - select the right verifier + * @dreq - direct request possibly spanning multiple servers + * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs + * @ds_idx - index of data server in data server list, only valid if ds_clp set + * + * returns the correct verifier to use given the role of the server + */ +static struct nfs_writeverf * +nfs_direct_select_verf(struct nfs_direct_req *dreq, +		       struct nfs_client *ds_clp, +		       int ds_idx) +{ +	struct nfs_writeverf *verfp = &dreq->verf; + +#ifdef CONFIG_NFS_V4_1 +	if (ds_clp) { +		/* pNFS is in use, use the DS verf */ +		if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets) +			verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf; +		else +			WARN_ON_ONCE(1); +	} +#endif +	return verfp; +} + + +/* + * nfs_direct_set_hdr_verf - set the write/commit verifier + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verfs + * + * Set the server's (MDS or DS) "seen" verifier + */ +static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq, +				    struct nfs_pgio_header *hdr) +{ +	struct nfs_writeverf *verfp; + +	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, +				      hdr->data->ds_idx); +	WARN_ON_ONCE(verfp->committed >= 0); +	memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); +	WARN_ON_ONCE(verfp->committed < 0); +} + +/* + * nfs_direct_cmp_hdr_verf - compare verifier for pgio header + * @dreq - direct request possibly spanning multiple servers + * @hdr - pageio header to validate against previously seen verf + * + * set the server's "seen" verf if not initialized. + * returns result of comparison between @hdr->verf and the "seen" + * verf of the server used by @hdr (DS or MDS) + */ +static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq, +					  struct nfs_pgio_header *hdr) +{ +	struct nfs_writeverf *verfp; + +	verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp, +					 hdr->data->ds_idx); +	if (verfp->committed < 0) { +		nfs_direct_set_hdr_verf(dreq, hdr); +		return 0; +	} +	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf)); +} + +#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4) +/* + * nfs_direct_cmp_commit_data_verf - compare verifier for commit data + * @dreq - direct request possibly spanning multiple servers + * @data - commit data to validate against previously seen verf + * + * returns result of comparison between @data->verf and the verf of + * the server used by @data (DS or MDS) + */ +static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq, +					   struct nfs_commit_data *data) +{ +	struct nfs_writeverf *verfp; + +	verfp = nfs_direct_select_verf(dreq, data->ds_clp, +					 data->ds_commit_index); +	WARN_ON_ONCE(verfp->committed < 0); +	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf)); +} +#endif +  /**   * nfs_direct_IO - NFS address space operation for direct I/O   * @rw: direction (read or write) @@ -121,21 +212,20 @@ static inline int put_dreq(struct nfs_direct_req *dreq)   * shunt off direct read and write requests before the VFS gets them,   * so this method is only ever called for swap.   */ -ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_t pos, unsigned long nr_segs) +ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t pos)  {  #ifndef CONFIG_NFS_SWAP -	dprintk("NFS: nfs_direct_IO (%s) off/no(%Ld/%lu) EINVAL\n", -			iocb->ki_filp->f_path.dentry->d_name.name, -			(long long) pos, nr_segs); +	dprintk("NFS: nfs_direct_IO (%pD) off/no(%Ld/%lu) EINVAL\n", +			iocb->ki_filp, (long long) pos, iter->nr_segs);  	return -EINVAL;  #else  	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);  	if (rw == READ || rw == KERNEL_READ) -		return nfs_file_direct_read(iocb, iov, nr_segs, pos, +		return nfs_file_direct_read(iocb, iter, pos,  				rw == READ ? true : false); -	return nfs_file_direct_write(iocb, iov, nr_segs, pos, +	return nfs_file_direct_write(iocb, iter, pos,  				rw == WRITE ? true : false);  #endif /* CONFIG_NFS_SWAP */  } @@ -169,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)  	kref_get(&dreq->kref);  	init_completion(&dreq->completion);  	INIT_LIST_HEAD(&dreq->mds_cinfo.list); +	dreq->verf.committed = NFS_INVALID_STABLE_HOW;	/* not set yet */  	INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);  	spin_lock_init(&dreq->lock); @@ -223,14 +314,31 @@ out:   * Synchronous I/O uses a stack-allocated iocb.  Thus we can't trust   * the iocb is still valid here if this is a synchronous request.   */ -static void nfs_direct_complete(struct nfs_direct_req *dreq) +static void nfs_direct_complete(struct nfs_direct_req *dreq, bool write)  { +	struct inode *inode = dreq->inode; + +	if (dreq->iocb && write) { +		loff_t pos = dreq->iocb->ki_pos + dreq->count; + +		spin_lock(&inode->i_lock); +		if (i_size_read(inode) < pos) +			i_size_write(inode, pos); +		spin_unlock(&inode->i_lock); +	} + +	if (write) +		nfs_zap_mapping(inode, inode->i_mapping); + +	inode_dio_done(inode); +  	if (dreq->iocb) {  		long res = (long) dreq->error;  		if (!res)  			res = (long) dreq->count;  		aio_complete(dreq->iocb, res, 0);  	} +  	complete_all(&dreq->completion);  	nfs_direct_req_release(dreq); @@ -238,9 +346,9 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)  static void nfs_direct_readpage_release(struct nfs_page *req)  { -	dprintk("NFS: direct read done (%s/%lld %d@%lld)\n", +	dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",  		req->wb_context->dentry->d_inode->i_sb->s_id, -		(long long)NFS_FILEID(req->wb_context->dentry->d_inode), +		(unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),  		req->wb_bytes,  		(long long)req_offset(req));  	nfs_release_request(req); @@ -273,7 +381,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)  	}  out_put:  	if (put_dreq(dreq)) -		nfs_direct_complete(dreq); +		nfs_direct_complete(dreq, false);  	hdr->release(hdr);  } @@ -306,66 +414,42 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {   * handled automatically by nfs_direct_read_result().  Otherwise, if   * no requests have been sent, just return an error.   */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, -						const struct iovec *iov, -						loff_t pos, bool uio) -{ -	struct nfs_direct_req *dreq = desc->pg_dreq; -	struct nfs_open_context *ctx = dreq->ctx; -	struct inode *inode = ctx->dentry->d_inode; -	unsigned long user_addr = (unsigned long)iov->iov_base; -	size_t count = iov->iov_len; -	size_t rsize = NFS_SERVER(inode)->rsize; -	unsigned int pgbase; -	int result; -	ssize_t started = 0; -	struct page **pagevec = NULL; -	unsigned int npages; - -	do { -		size_t bytes; -		int i; -		pgbase = user_addr & ~PAGE_MASK; -		bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); +static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, +					      struct iov_iter *iter, +					      loff_t pos) +{ +	struct nfs_pageio_descriptor desc; +	struct inode *inode = dreq->inode; +	ssize_t result = -EINVAL; +	size_t requested_bytes = 0; +	size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); -		result = -ENOMEM; -		npages = nfs_page_array_len(pgbase, bytes); -		if (!pagevec) -			pagevec = kmalloc(npages * sizeof(struct page *), -					  GFP_KERNEL); -		if (!pagevec) -			break; -		if (uio) { -			down_read(¤t->mm->mmap_sem); -			result = get_user_pages(current, current->mm, user_addr, -					npages, 1, 0, pagevec, NULL); -			up_read(¤t->mm->mmap_sem); -			if (result < 0) -				break; -		} else { -			WARN_ON(npages != 1); -			result = get_kernel_page(user_addr, 1, pagevec); -			if (WARN_ON(result != 1)) -				break; -		} +	nfs_pageio_init_read(&desc, dreq->inode, false, +			     &nfs_direct_read_completion_ops); +	get_dreq(dreq); +	desc.pg_dreq = dreq; +	atomic_inc(&inode->i_dio_count); -		if ((unsigned)result < npages) { -			bytes = result * PAGE_SIZE; -			if (bytes <= pgbase) { -				nfs_direct_release_pages(pagevec, result); -				break; -			} -			bytes -= pgbase; -			npages = result; -		} +	while (iov_iter_count(iter)) { +		struct page **pagevec; +		size_t bytes; +		size_t pgbase; +		unsigned npages, i; +		result = iov_iter_get_pages_alloc(iter, &pagevec,  +						  rsize, &pgbase); +		if (result < 0) +			break; +	 +		bytes = result; +		iov_iter_advance(iter, bytes); +		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;  		for (i = 0; i < npages; i++) {  			struct nfs_page *req;  			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);  			/* XXX do we need to do the eof zeroing found in async_filler? */ -			req = nfs_create_request(dreq->ctx, dreq->inode, -						 pagevec[i], +			req = nfs_create_request(dreq->ctx, pagevec[i], NULL,  						 pgbase, req_len);  			if (IS_ERR(req)) {  				result = PTR_ERR(req); @@ -373,54 +457,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de  			}  			req->wb_index = pos >> PAGE_SHIFT;  			req->wb_offset = pos & ~PAGE_MASK; -			if (!nfs_pageio_add_request(desc, req)) { -				result = desc->pg_error; +			if (!nfs_pageio_add_request(&desc, req)) { +				result = desc.pg_error;  				nfs_release_request(req);  				break;  			}  			pgbase = 0;  			bytes -= req_len; -			started += req_len; -			user_addr += req_len; +			requested_bytes += req_len;  			pos += req_len; -			count -= req_len;  			dreq->bytes_left -= req_len;  		} -		/* The nfs_page now hold references to these pages */  		nfs_direct_release_pages(pagevec, npages); -	} while (count != 0 && result >= 0); - -	kfree(pagevec); - -	if (started) -		return started; -	return result < 0 ? (ssize_t) result : -EFAULT; -} - -static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, -					      const struct iovec *iov, -					      unsigned long nr_segs, -					      loff_t pos, bool uio) -{ -	struct nfs_pageio_descriptor desc; -	ssize_t result = -EINVAL; -	size_t requested_bytes = 0; -	unsigned long seg; - -	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, -			     &nfs_direct_read_completion_ops); -	get_dreq(dreq); -	desc.pg_dreq = dreq; - -	for (seg = 0; seg < nr_segs; seg++) { -		const struct iovec *vec = &iov[seg]; -		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); +		kvfree(pagevec);  		if (result < 0)  			break; -		requested_bytes += result; -		if ((size_t)result < vec->iov_len) -			break; -		pos += vec->iov_len;  	}  	nfs_pageio_complete(&desc); @@ -430,29 +481,69 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,  	 * generic layer handle the completion.  	 */  	if (requested_bytes == 0) { +		inode_dio_done(inode);  		nfs_direct_req_release(dreq);  		return result < 0 ? result : -EIO;  	}  	if (put_dreq(dreq)) -		nfs_direct_complete(dreq); +		nfs_direct_complete(dreq, false);  	return 0;  } -static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov, -			       unsigned long nr_segs, loff_t pos, bool uio) +/** + * nfs_file_direct_read - file direct read operation for NFS files + * @iocb: target I/O control block + * @iter: vector of user buffers into which to read data + * @pos: byte offset in file where reading starts + * + * We use this function for direct reads instead of calling + * generic_file_aio_read() in order to avoid gfar's check to see if + * the request starts before the end of the file.  For that check + * to work, we must generate a GETATTR before each direct read, and + * even then there is a window between the GETATTR and the subsequent + * READ where the file size could change.  Our preference is simply + * to do all reads the application wants, and the server will take + * care of managing the end of file boundary. + * + * This function also eliminates unnecessarily updating the file's + * atime locally, as the NFS server sets the file's atime, and this + * client must read the updated atime from the server back into its + * cache. + */ +ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, +				loff_t pos, bool uio)  { -	ssize_t result = -ENOMEM; -	struct inode *inode = iocb->ki_filp->f_mapping->host; +	struct file *file = iocb->ki_filp; +	struct address_space *mapping = file->f_mapping; +	struct inode *inode = mapping->host;  	struct nfs_direct_req *dreq;  	struct nfs_lock_context *l_ctx; +	ssize_t result = -EINVAL; +	size_t count = iov_iter_count(iter); +	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); + +	dfprintk(FILE, "NFS: direct read(%pD2, %zd@%Ld)\n", +		file, count, (long long) pos); + +	result = 0; +	if (!count) +		goto out; + +	mutex_lock(&inode->i_mutex); +	result = nfs_sync_mapping(mapping); +	if (result) +		goto out_unlock; +	task_io_account_read(count); + +	result = -ENOMEM;  	dreq = nfs_direct_req_alloc();  	if (dreq == NULL) -		goto out; +		goto out_unlock;  	dreq->inode = inode; -	dreq->bytes_left = iov_length(iov, nr_segs); +	dreq->bytes_left = count;  	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));  	l_ctx = nfs_get_lock_context(dreq->ctx);  	if (IS_ERR(l_ctx)) { @@ -463,22 +554,28 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,  	if (!is_sync_kiocb(iocb))  		dreq->iocb = iocb; -	NFS_I(inode)->read_io += iov_length(iov, nr_segs); -	result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); -	if (!result) +	NFS_I(inode)->read_io += count; +	result = nfs_direct_read_schedule_iovec(dreq, iter, pos); + +	mutex_unlock(&inode->i_mutex); + +	if (!result) {  		result = nfs_direct_wait(dreq); +		if (result > 0) +			iocb->ki_pos = pos + result; +	} + +	nfs_direct_req_release(dreq); +	return result; +  out_release:  	nfs_direct_req_release(dreq); +out_unlock: +	mutex_unlock(&inode->i_mutex);  out:  	return result;  } -static void nfs_inode_dio_write_done(struct inode *inode) -{ -	nfs_zap_mapping(inode, inode->i_mapping); -	inode_dio_done(inode); -} -  #if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)  static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)  { @@ -497,7 +594,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)  	dreq->count = 0;  	get_dreq(dreq); -	NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE, +	nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,  			      &nfs_direct_write_completion_ops);  	desc.pg_dreq = dreq; @@ -536,7 +633,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)  		dprintk("NFS: %5u commit failed with error %d.\n",  			data->task.tk_pid, status);  		dreq->flags = NFS_ODIRECT_RESCHED_WRITES; -	} else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) { +	} else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {  		dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);  		dreq->flags = NFS_ODIRECT_RESCHED_WRITES;  	} @@ -594,8 +691,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)  			nfs_direct_write_reschedule(dreq);  			break;  		default: -			nfs_inode_dio_write_done(dreq->inode); -			nfs_direct_complete(dreq); +			nfs_direct_complete(dreq, true);  	}  } @@ -611,114 +707,10 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)  static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)  { -	nfs_inode_dio_write_done(inode); -	nfs_direct_complete(dreq); +	nfs_direct_complete(dreq, true);  }  #endif -/* - * NB: Return the value of the first error return code.  Subsequent - *     errors after the first one are ignored. - */ -/* - * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE - * operation.  If nfs_writedata_alloc() or get_user_pages() fails, - * bail and stop sending more writes.  Write length accounting is - * handled automatically by nfs_direct_write_result().  Otherwise, if - * no requests have been sent, just return an error. - */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, -						 const struct iovec *iov, -						 loff_t pos, bool uio) -{ -	struct nfs_direct_req *dreq = desc->pg_dreq; -	struct nfs_open_context *ctx = dreq->ctx; -	struct inode *inode = ctx->dentry->d_inode; -	unsigned long user_addr = (unsigned long)iov->iov_base; -	size_t count = iov->iov_len; -	size_t wsize = NFS_SERVER(inode)->wsize; -	unsigned int pgbase; -	int result; -	ssize_t started = 0; -	struct page **pagevec = NULL; -	unsigned int npages; - -	do { -		size_t bytes; -		int i; - -		pgbase = user_addr & ~PAGE_MASK; -		bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); - -		result = -ENOMEM; -		npages = nfs_page_array_len(pgbase, bytes); -		if (!pagevec) -			pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); -		if (!pagevec) -			break; - -		if (uio) { -			down_read(¤t->mm->mmap_sem); -			result = get_user_pages(current, current->mm, user_addr, -						npages, 0, 0, pagevec, NULL); -			up_read(¤t->mm->mmap_sem); -			if (result < 0) -				break; -		} else { -			WARN_ON(npages != 1); -			result = get_kernel_page(user_addr, 0, pagevec); -			if (WARN_ON(result != 1)) -				break; -		} - -		if ((unsigned)result < npages) { -			bytes = result * PAGE_SIZE; -			if (bytes <= pgbase) { -				nfs_direct_release_pages(pagevec, result); -				break; -			} -			bytes -= pgbase; -			npages = result; -		} - -		for (i = 0; i < npages; i++) { -			struct nfs_page *req; -			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - -			req = nfs_create_request(dreq->ctx, dreq->inode, -						 pagevec[i], -						 pgbase, req_len); -			if (IS_ERR(req)) { -				result = PTR_ERR(req); -				break; -			} -			nfs_lock_request(req); -			req->wb_index = pos >> PAGE_SHIFT; -			req->wb_offset = pos & ~PAGE_MASK; -			if (!nfs_pageio_add_request(desc, req)) { -				result = desc->pg_error; -				nfs_unlock_and_release_request(req); -				break; -			} -			pgbase = 0; -			bytes -= req_len; -			started += req_len; -			user_addr += req_len; -			pos += req_len; -			count -= req_len; -			dreq->bytes_left -= req_len; -		} -		/* The nfs_page now hold references to these pages */ -		nfs_direct_release_pages(pagevec, npages); -	} while (count != 0 && result >= 0); - -	kfree(pagevec); - -	if (started) -		return started; -	return result < 0 ? (ssize_t) result : -EFAULT; -} -  static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)  {  	struct nfs_direct_req *dreq = hdr->dreq; @@ -748,13 +740,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)  			if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)  				bit = NFS_IOHDR_NEED_RESCHED;  			else if (dreq->flags == 0) { -				memcpy(&dreq->verf, hdr->verf, -				       sizeof(dreq->verf)); +				nfs_direct_set_hdr_verf(dreq, hdr);  				bit = NFS_IOHDR_NEED_COMMIT;  				dreq->flags = NFS_ODIRECT_DO_COMMIT;  			} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) { -				if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) { -					dreq->flags = NFS_ODIRECT_RESCHED_WRITES; +				if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) { +					dreq->flags = +						NFS_ODIRECT_RESCHED_WRITES;  					bit = NFS_IOHDR_NEED_RESCHED;  				} else  					bit = NFS_IOHDR_NEED_COMMIT; @@ -764,6 +756,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)  	spin_unlock(&dreq->lock);  	while (!list_empty(&hdr->pages)) { +  		req = nfs_list_entry(hdr->pages.next);  		nfs_list_remove_request(req);  		switch (bit) { @@ -798,33 +791,77 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {  	.completion = nfs_direct_write_completion,  }; + +/* + * NB: Return the value of the first error return code.  Subsequent + *     errors after the first one are ignored. + */ +/* + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation.  If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes.  Write length accounting is + * handled automatically by nfs_direct_write_result().  Otherwise, if + * no requests have been sent, just return an error. + */  static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, -					       const struct iovec *iov, -					       unsigned long nr_segs, -					       loff_t pos, bool uio) +					       struct iov_iter *iter, +					       loff_t pos)  {  	struct nfs_pageio_descriptor desc;  	struct inode *inode = dreq->inode;  	ssize_t result = 0;  	size_t requested_bytes = 0; -	unsigned long seg; +	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); -	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE, +	nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,  			      &nfs_direct_write_completion_ops);  	desc.pg_dreq = dreq;  	get_dreq(dreq);  	atomic_inc(&inode->i_dio_count); -	NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs); -	for (seg = 0; seg < nr_segs; seg++) { -		const struct iovec *vec = &iov[seg]; -		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); +	NFS_I(inode)->write_io += iov_iter_count(iter); +	while (iov_iter_count(iter)) { +		struct page **pagevec; +		size_t bytes; +		size_t pgbase; +		unsigned npages, i; + +		result = iov_iter_get_pages_alloc(iter, &pagevec,  +						  wsize, &pgbase);  		if (result < 0)  			break; -		requested_bytes += result; -		if ((size_t)result < vec->iov_len) + +		bytes = result; +		iov_iter_advance(iter, bytes); +		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; +		for (i = 0; i < npages; i++) { +			struct nfs_page *req; +			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); + +			req = nfs_create_request(dreq->ctx, pagevec[i], NULL, +						 pgbase, req_len); +			if (IS_ERR(req)) { +				result = PTR_ERR(req); +				break; +			} +			nfs_lock_request(req); +			req->wb_index = pos >> PAGE_SHIFT; +			req->wb_offset = pos & ~PAGE_MASK; +			if (!nfs_pageio_add_request(&desc, req)) { +				result = desc.pg_error; +				nfs_unlock_and_release_request(req); +				break; +			} +			pgbase = 0; +			bytes -= req_len; +			requested_bytes += req_len; +			pos += req_len; +			dreq->bytes_left -= req_len; +		} +		nfs_direct_release_pages(pagevec, npages); +		kvfree(pagevec); +		if (result < 0)  			break; -		pos += vec->iov_len;  	}  	nfs_pageio_complete(&desc); @@ -843,100 +880,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,  	return 0;  } -static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov, -				unsigned long nr_segs, loff_t pos, -				size_t count, bool uio) -{ -	ssize_t result = -ENOMEM; -	struct inode *inode = iocb->ki_filp->f_mapping->host; -	struct nfs_direct_req *dreq; -	struct nfs_lock_context *l_ctx; - -	dreq = nfs_direct_req_alloc(); -	if (!dreq) -		goto out; - -	dreq->inode = inode; -	dreq->bytes_left = count; -	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); -	l_ctx = nfs_get_lock_context(dreq->ctx); -	if (IS_ERR(l_ctx)) { -		result = PTR_ERR(l_ctx); -		goto out_release; -	} -	dreq->l_ctx = l_ctx; -	if (!is_sync_kiocb(iocb)) -		dreq->iocb = iocb; - -	result = nfs_direct_write_schedule_iovec(dreq, iov, nr_segs, pos, uio); -	if (!result) -		result = nfs_direct_wait(dreq); -out_release: -	nfs_direct_req_release(dreq); -out: -	return result; -} - -/** - * nfs_file_direct_read - file direct read operation for NFS files - * @iocb: target I/O control block - * @iov: vector of user buffers into which to read data - * @nr_segs: size of iov vector - * @pos: byte offset in file where reading starts - * - * We use this function for direct reads instead of calling - * generic_file_aio_read() in order to avoid gfar's check to see if - * the request starts before the end of the file.  For that check - * to work, we must generate a GETATTR before each direct read, and - * even then there is a window between the GETATTR and the subsequent - * READ where the file size could change.  Our preference is simply - * to do all reads the application wants, and the server will take - * care of managing the end of file boundary. - * - * This function also eliminates unnecessarily updating the file's - * atime locally, as the NFS server sets the file's atime, and this - * client must read the updated atime from the server back into its - * cache. - */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, -				unsigned long nr_segs, loff_t pos, bool uio) -{ -	ssize_t retval = -EINVAL; -	struct file *file = iocb->ki_filp; -	struct address_space *mapping = file->f_mapping; -	size_t count; - -	count = iov_length(iov, nr_segs); -	nfs_add_stats(mapping->host, NFSIOS_DIRECTREADBYTES, count); - -	dfprintk(FILE, "NFS: direct read(%s/%s, %zd@%Ld)\n", -		file->f_path.dentry->d_parent->d_name.name, -		file->f_path.dentry->d_name.name, -		count, (long long) pos); - -	retval = 0; -	if (!count) -		goto out; - -	retval = nfs_sync_mapping(mapping); -	if (retval) -		goto out; - -	task_io_account_read(count); - -	retval = nfs_direct_read(iocb, iov, nr_segs, pos, uio); -	if (retval > 0) -		iocb->ki_pos = pos + retval; - -out: -	return retval; -} -  /**   * nfs_file_direct_write - file direct write operation for NFS files   * @iocb: target I/O control block - * @iov: vector of user buffers from which to write data - * @nr_segs: size of iov vector + * @iter: vector of user buffers from which to write data   * @pos: byte offset in file where writing starts   *   * We use this function for direct writes instead of calling @@ -954,51 +901,97 @@ out:   * Note that O_APPEND is not supported for NFS direct writes, as there   * is no atomic O_APPEND write facility in the NFS protocol.   */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, -				unsigned long nr_segs, loff_t pos, bool uio) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, +				loff_t pos, bool uio)  { -	ssize_t retval = -EINVAL; +	ssize_t result = -EINVAL;  	struct file *file = iocb->ki_filp;  	struct address_space *mapping = file->f_mapping; -	size_t count; +	struct inode *inode = mapping->host; +	struct nfs_direct_req *dreq; +	struct nfs_lock_context *l_ctx; +	loff_t end; +	size_t count = iov_iter_count(iter); +	end = (pos + count - 1) >> PAGE_CACHE_SHIFT; -	count = iov_length(iov, nr_segs);  	nfs_add_stats(mapping->host, NFSIOS_DIRECTWRITTENBYTES, count); -	dfprintk(FILE, "NFS: direct write(%s/%s, %zd@%Ld)\n", -		file->f_path.dentry->d_parent->d_name.name, -		file->f_path.dentry->d_name.name, -		count, (long long) pos); +	dfprintk(FILE, "NFS: direct write(%pD2, %zd@%Ld)\n", +		file, count, (long long) pos); -	retval = generic_write_checks(file, &pos, &count, 0); -	if (retval) +	result = generic_write_checks(file, &pos, &count, 0); +	if (result)  		goto out; -	retval = -EINVAL; +	result = -EINVAL;  	if ((ssize_t) count < 0)  		goto out; -	retval = 0; +	result = 0;  	if (!count)  		goto out; -	retval = nfs_sync_mapping(mapping); -	if (retval) -		goto out; +	mutex_lock(&inode->i_mutex); + +	result = nfs_sync_mapping(mapping); +	if (result) +		goto out_unlock; + +	if (mapping->nrpages) { +		result = invalidate_inode_pages2_range(mapping, +					pos >> PAGE_CACHE_SHIFT, end); +		if (result) +			goto out_unlock; +	}  	task_io_account_write(count); -	retval = nfs_direct_write(iocb, iov, nr_segs, pos, count, uio); -	if (retval > 0) { -		struct inode *inode = mapping->host; +	result = -ENOMEM; +	dreq = nfs_direct_req_alloc(); +	if (!dreq) +		goto out_unlock; -		iocb->ki_pos = pos + retval; -		spin_lock(&inode->i_lock); -		if (i_size_read(inode) < iocb->ki_pos) -			i_size_write(inode, iocb->ki_pos); -		spin_unlock(&inode->i_lock); +	dreq->inode = inode; +	dreq->bytes_left = count; +	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); +	l_ctx = nfs_get_lock_context(dreq->ctx); +	if (IS_ERR(l_ctx)) { +		result = PTR_ERR(l_ctx); +		goto out_release; +	} +	dreq->l_ctx = l_ctx; +	if (!is_sync_kiocb(iocb)) +		dreq->iocb = iocb; + +	result = nfs_direct_write_schedule_iovec(dreq, iter, pos); + +	if (mapping->nrpages) { +		invalidate_inode_pages2_range(mapping, +					      pos >> PAGE_CACHE_SHIFT, end);  	} + +	mutex_unlock(&inode->i_mutex); + +	if (!result) { +		result = nfs_direct_wait(dreq); +		if (result > 0) { +			struct inode *inode = mapping->host; + +			iocb->ki_pos = pos + result; +			spin_lock(&inode->i_lock); +			if (i_size_read(inode) < iocb->ki_pos) +				i_size_write(inode, iocb->ki_pos); +			spin_unlock(&inode->i_lock); +		} +	} +	nfs_direct_req_release(dreq); +	return result; + +out_release: +	nfs_direct_req_release(dreq); +out_unlock: +	mutex_unlock(&inode->i_mutex);  out: -	return retval; +	return result;  }  /**  | 
