diff options
Diffstat (limited to 'fs/splice.c')
| -rw-r--r-- | fs/splice.c | 657 | 
1 files changed, 312 insertions, 345 deletions
diff --git a/fs/splice.c b/fs/splice.c index 8f1dfaecc8f..f5cb9ba8451 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -25,12 +25,15 @@  #include <linux/mm_inline.h>  #include <linux/swap.h>  #include <linux/writeback.h> -#include <linux/buffer_head.h> -#include <linux/module.h> +#include <linux/export.h>  #include <linux/syscalls.h>  #include <linux/uio.h>  #include <linux/security.h>  #include <linux/gfp.h> +#include <linux/socket.h> +#include <linux/compat.h> +#include <linux/aio.h> +#include "internal.h"  /*   * Attempt to steal a page from a pipe buffer. This should perhaps go into @@ -132,10 +135,8 @@ error:  	return err;  } -static const struct pipe_buf_operations page_cache_pipe_buf_ops = { +const struct pipe_buf_operations page_cache_pipe_buf_ops = {  	.can_merge = 0, -	.map = generic_pipe_buf_map, -	.unmap = generic_pipe_buf_unmap,  	.confirm = page_cache_pipe_buf_confirm,  	.release = page_cache_pipe_buf_release,  	.steal = page_cache_pipe_buf_steal, @@ -154,14 +155,20 @@ static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe,  static const struct pipe_buf_operations user_page_pipe_buf_ops = {  	.can_merge = 0, -	.map = generic_pipe_buf_map, -	.unmap = generic_pipe_buf_unmap,  	.confirm = generic_pipe_buf_confirm,  	.release = page_cache_pipe_buf_release,  	.steal = user_page_pipe_buf_steal,  	.get = generic_pipe_buf_get,  }; +static void wakeup_pipe_readers(struct pipe_inode_info *pipe) +{ +	smp_mb(); +	if (waitqueue_active(&pipe->wait)) +		wake_up_interruptible(&pipe->wait); +	kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); +} +  /**   * splice_to_pipe - fill passed data into a pipe   * @pipe:	pipe to fill @@ -209,7 +216,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,  			page_nr++;  			ret += buf->len; -			if (pipe->inode) +			if (pipe->files)  				do_wakeup = 1;  			if (!--spd->nr_pages) @@ -247,12 +254,8 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,  	pipe_unlock(pipe); -	if (do_wakeup) { -		smp_mb(); -		if (waitqueue_active(&pipe->wait)) -			wake_up_interruptible(&pipe->wait); -		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); -	} +	if (do_wakeup) +		wakeup_pipe_readers(pipe);  	while (page_nr < spd_pages)  		spd->spd_release(spd, page_nr++); @@ -260,7 +263,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe,  	return ret;  } -static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) +void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)  {  	page_cache_release(spd->pages[i]);  } @@ -269,13 +272,16 @@ static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)   * Check if we need to grow the arrays holding pages and partial page   * descriptions.   */ -int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) +int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)  { -	if (pipe->buffers <= PIPE_DEF_BUFFERS) +	unsigned int buffers = ACCESS_ONCE(pipe->buffers); + +	spd->nr_pages_max = buffers; +	if (buffers <= PIPE_DEF_BUFFERS)  		return 0; -	spd->pages = kmalloc(pipe->buffers * sizeof(struct page *), GFP_KERNEL); -	spd->partial = kmalloc(pipe->buffers * sizeof(struct partial_page), GFP_KERNEL); +	spd->pages = kmalloc(buffers * sizeof(struct page *), GFP_KERNEL); +	spd->partial = kmalloc(buffers * sizeof(struct partial_page), GFP_KERNEL);  	if (spd->pages && spd->partial)  		return 0; @@ -285,10 +291,9 @@ int splice_grow_spd(struct pipe_inode_info *pipe, struct splice_pipe_desc *spd)  	return -ENOMEM;  } -void splice_shrink_spd(struct pipe_inode_info *pipe, -		       struct splice_pipe_desc *spd) +void splice_shrink_spd(struct splice_pipe_desc *spd)  { -	if (pipe->buffers <= PIPE_DEF_BUFFERS) +	if (spd->nr_pages_max <= PIPE_DEF_BUFFERS)  		return;  	kfree(spd->pages); @@ -311,6 +316,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,  	struct splice_pipe_desc spd = {  		.pages = pages,  		.partial = partial, +		.nr_pages_max = PIPE_DEF_BUFFERS,  		.flags = flags,  		.ops = &page_cache_pipe_buf_ops,  		.spd_release = spd_release_page, @@ -322,7 +328,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,  	index = *ppos >> PAGE_CACHE_SHIFT;  	loff = *ppos & ~PAGE_CACHE_MASK;  	req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -	nr_pages = min(req_pages, pipe->buffers); +	nr_pages = min(req_pages, spd.nr_pages_max);  	/*  	 * Lookup the (hopefully) full range of pages we need. @@ -493,7 +499,7 @@ fill_it:  	if (spd.nr_pages)  		error = splice_to_pipe(pipe, &spd); -	splice_shrink_spd(pipe, &spd); +	splice_shrink_spd(&spd);  	return error;  } @@ -538,14 +544,28 @@ EXPORT_SYMBOL(generic_file_splice_read);  static const struct pipe_buf_operations default_pipe_buf_ops = {  	.can_merge = 0, -	.map = generic_pipe_buf_map, -	.unmap = generic_pipe_buf_unmap,  	.confirm = generic_pipe_buf_confirm,  	.release = generic_pipe_buf_release,  	.steal = generic_pipe_buf_steal,  	.get = generic_pipe_buf_get,  }; +static int generic_pipe_buf_nosteal(struct pipe_inode_info *pipe, +				    struct pipe_buffer *buf) +{ +	return 1; +} + +/* Pipe buffer operations for a socket and similar. */ +const struct pipe_buf_operations nosteal_pipe_buf_ops = { +	.can_merge = 0, +	.confirm = generic_pipe_buf_confirm, +	.release = generic_pipe_buf_release, +	.steal = generic_pipe_buf_nosteal, +	.get = generic_pipe_buf_get, +}; +EXPORT_SYMBOL(nosteal_pipe_buf_ops); +  static ssize_t kernel_readv(struct file *file, const struct iovec *vec,  			    unsigned long vlen, loff_t offset)  { @@ -562,7 +582,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,  	return res;  } -static ssize_t kernel_write(struct file *file, const char *buf, size_t count, +ssize_t kernel_write(struct file *file, const char *buf, size_t count,  			    loff_t pos)  {  	mm_segment_t old_fs; @@ -571,11 +591,12 @@ static ssize_t kernel_write(struct file *file, const char *buf, size_t count,  	old_fs = get_fs();  	set_fs(get_ds());  	/* The cast to a user pointer is valid due to the set_fs() */ -	res = vfs_write(file, (const char __user *)buf, count, &pos); +	res = vfs_write(file, (__force const char __user *)buf, count, &pos);  	set_fs(old_fs);  	return res;  } +EXPORT_SYMBOL(kernel_write);  ssize_t default_file_splice_read(struct file *in, loff_t *ppos,  				 struct pipe_inode_info *pipe, size_t len, @@ -594,6 +615,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,  	struct splice_pipe_desc spd = {  		.pages = pages,  		.partial = partial, +		.nr_pages_max = PIPE_DEF_BUFFERS,  		.flags = flags,  		.ops = &default_pipe_buf_ops,  		.spd_release = spd_release_page, @@ -604,8 +626,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,  	res = -ENOMEM;  	vec = __vec; -	if (pipe->buffers > PIPE_DEF_BUFFERS) { -		vec = kmalloc(pipe->buffers * sizeof(struct iovec), GFP_KERNEL); +	if (spd.nr_pages_max > PIPE_DEF_BUFFERS) { +		vec = kmalloc(spd.nr_pages_max * sizeof(struct iovec), GFP_KERNEL);  		if (!vec)  			goto shrink_ret;  	} @@ -613,7 +635,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,  	offset = *ppos & ~PAGE_CACHE_MASK;  	nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; -	for (i = 0; i < nr_pages && i < pipe->buffers && len; i++) { +	for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {  		struct page *page;  		page = alloc_page(GFP_USER); @@ -661,7 +683,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,  shrink_ret:  	if (vec != __vec)  		kfree(vec); -	splice_shrink_spd(pipe, &spd); +	splice_shrink_spd(&spd);  	return res;  err: @@ -682,87 +704,19 @@ static int pipe_to_sendpage(struct pipe_inode_info *pipe,  {  	struct file *file = sd->u.file;  	loff_t pos = sd->pos; -	int ret, more; - -	ret = buf->ops->confirm(pipe, buf); -	if (!ret) { -		more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; -		if (file->f_op && file->f_op->sendpage) -			ret = file->f_op->sendpage(file, buf->page, buf->offset, -						   sd->len, &pos, more); -		else -			ret = -EINVAL; -	} - -	return ret; -} +	int more; -/* - * This is a little more tricky than the file -> pipe splicing. There are - * basically three cases: - * - *	- Destination page already exists in the address space and there - *	  are users of it. For that case we have no other option that - *	  copying the data. Tough luck. - *	- Destination page already exists in the address space, but there - *	  are no users of it. Make sure it's uptodate, then drop it. Fall - *	  through to last case. - *	- Destination page does not exist, we can add the pipe page to - *	  the page cache and avoid the copy. - * - * If asked to move pages to the output file (SPLICE_F_MOVE is set in - * sd->flags), we attempt to migrate pages from the pipe to the output - * file address space page cache. This is possible if no one else has - * the pipe page referenced outside of the pipe and page cache. If - * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create - * a new page in the output file page cache and fill/dirty that. - */ -int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, -		 struct splice_desc *sd) -{ -	struct file *file = sd->u.file; -	struct address_space *mapping = file->f_mapping; -	unsigned int offset, this_len; -	struct page *page; -	void *fsdata; -	int ret; - -	/* -	 * make sure the data in this buffer is uptodate -	 */ -	ret = buf->ops->confirm(pipe, buf); -	if (unlikely(ret)) -		return ret; - -	offset = sd->pos & ~PAGE_CACHE_MASK; - -	this_len = sd->len; -	if (this_len + offset > PAGE_CACHE_SIZE) -		this_len = PAGE_CACHE_SIZE - offset; +	if (!likely(file->f_op->sendpage)) +		return -EINVAL; -	ret = pagecache_write_begin(file, mapping, sd->pos, this_len, -				AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); -	if (unlikely(ret)) -		goto out; +	more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; -	if (buf->page != page) { -		/* -		 * Careful, ->map() uses KM_USER0! -		 */ -		char *src = buf->ops->map(pipe, buf, 1); -		char *dst = kmap_atomic(page, KM_USER1); +	if (sd->len < sd->total_len && pipe->nrbufs > 1) +		more |= MSG_SENDPAGE_NOTLAST; -		memcpy(dst + offset, src + buf->offset, this_len); -		flush_dcache_page(page); -		kunmap_atomic(dst, KM_USER1); -		buf->ops->unmap(pipe, buf, src); -	} -	ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, -				page, fsdata); -out: -	return ret; +	return file->f_op->sendpage(file, buf->page, buf->offset, +				    sd->len, &pos, more);  } -EXPORT_SYMBOL(pipe_to_file);  static void wakeup_pipe_writers(struct pipe_inode_info *pipe)  { @@ -792,7 +746,7 @@ static void wakeup_pipe_writers(struct pipe_inode_info *pipe)   *    locking is required around copying the pipe buffers to the   *    destination.   */ -int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, +static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,  			  splice_actor *actor)  {  	int ret; @@ -805,12 +759,17 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,  		if (sd->len > sd->total_len)  			sd->len = sd->total_len; -		ret = actor(pipe, buf, sd); -		if (ret <= 0) { +		ret = buf->ops->confirm(pipe, buf); +		if (unlikely(ret)) {  			if (ret == -ENODATA)  				ret = 0;  			return ret;  		} + +		ret = actor(pipe, buf, sd); +		if (ret <= 0) +			return ret; +  		buf->offset += ret;  		buf->len -= ret; @@ -824,7 +783,7 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,  			ops->release(pipe, buf);  			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);  			pipe->nrbufs--; -			if (pipe->inode) +			if (pipe->files)  				sd->need_wakeup = true;  		} @@ -834,7 +793,6 @@ int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd,  	return 1;  } -EXPORT_SYMBOL(splice_from_pipe_feed);  /**   * splice_from_pipe_next - wait for some data to splice from @@ -846,7 +804,7 @@ EXPORT_SYMBOL(splice_from_pipe_feed);   *    value (one) if pipe buffers are available.  It will return zero   *    or -errno if no more data needs to be spliced.   */ -int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) +static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)  {  	while (!pipe->nrbufs) {  		if (!pipe->writers) @@ -871,7 +829,6 @@ int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd)  	return 1;  } -EXPORT_SYMBOL(splice_from_pipe_next);  /**   * splice_from_pipe_begin - start splicing from pipe @@ -882,12 +839,11 @@ EXPORT_SYMBOL(splice_from_pipe_next);   *    splice_from_pipe_next() and splice_from_pipe_feed() to   *    initialize the necessary fields of @sd.   */ -void splice_from_pipe_begin(struct splice_desc *sd) +static void splice_from_pipe_begin(struct splice_desc *sd)  {  	sd->num_spliced = 0;  	sd->need_wakeup = false;  } -EXPORT_SYMBOL(splice_from_pipe_begin);  /**   * splice_from_pipe_end - finish splicing from pipe @@ -899,12 +855,11 @@ EXPORT_SYMBOL(splice_from_pipe_begin);   *    be called after a loop containing splice_from_pipe_next() and   *    splice_from_pipe_feed().   */ -void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) +static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd)  {  	if (sd->need_wakeup)  		wakeup_pipe_writers(pipe);  } -EXPORT_SYMBOL(splice_from_pipe_end);  /**   * __splice_from_pipe - splice data from a pipe to given actor @@ -970,7 +925,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,  }  /** - * generic_file_splice_write - splice data from a pipe to a file + * iter_file_splice_write - splice data from a pipe to a file   * @pipe:	pipe info   * @out:	file to write to   * @ppos:	position in @out @@ -980,77 +935,144 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,   * Description:   *    Will either move or copy pages (determined by @flags options) from   *    the given pipe inode to the given file. + *    This one is ->write_iter-based.   *   */  ssize_t -generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, +iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,  			  loff_t *ppos, size_t len, unsigned int flags)  { -	struct address_space *mapping = out->f_mapping; -	struct inode *inode = mapping->host;  	struct splice_desc sd = {  		.total_len = len,  		.flags = flags,  		.pos = *ppos,  		.u.file = out,  	}; +	int nbufs = pipe->buffers; +	struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), +					GFP_KERNEL);  	ssize_t ret; +	if (unlikely(!array)) +		return -ENOMEM; +  	pipe_lock(pipe);  	splice_from_pipe_begin(&sd); -	do { +	while (sd.total_len) { +		struct iov_iter from; +		struct kiocb kiocb; +		size_t left; +		int n, idx; +  		ret = splice_from_pipe_next(pipe, &sd);  		if (ret <= 0)  			break; -		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); -		ret = file_remove_suid(out); -		if (!ret) { -			file_update_time(out); -			ret = splice_from_pipe_feed(pipe, &sd, pipe_to_file); +		if (unlikely(nbufs < pipe->buffers)) { +			kfree(array); +			nbufs = pipe->buffers; +			array = kcalloc(nbufs, sizeof(struct bio_vec), +					GFP_KERNEL); +			if (!array) { +				ret = -ENOMEM; +				break; +			}  		} -		mutex_unlock(&inode->i_mutex); -	} while (ret > 0); -	splice_from_pipe_end(pipe, &sd); -	pipe_unlock(pipe); +		/* build the vector */ +		left = sd.total_len; +		for (n = 0, idx = pipe->curbuf; left && n < pipe->nrbufs; n++, idx++) { +			struct pipe_buffer *buf = pipe->bufs + idx; +			size_t this_len = buf->len; -	if (sd.num_spliced) -		ret = sd.num_spliced; +			if (this_len > left) +				this_len = left; -	if (ret > 0) { -		unsigned long nr_pages; -		int err; +			if (idx == pipe->buffers - 1) +				idx = -1; + +			ret = buf->ops->confirm(pipe, buf); +			if (unlikely(ret)) { +				if (ret == -ENODATA) +					ret = 0; +				goto done; +			} + +			array[n].bv_page = buf->page; +			array[n].bv_len = this_len; +			array[n].bv_offset = buf->offset; +			left -= this_len; +		} + +		/* ... iov_iter */ +		from.type = ITER_BVEC | WRITE; +		from.bvec = array; +		from.nr_segs = n; +		from.count = sd.total_len - left; +		from.iov_offset = 0; + +		/* ... and iocb */ +		init_sync_kiocb(&kiocb, out); +		kiocb.ki_pos = sd.pos; +		kiocb.ki_nbytes = sd.total_len - left; -		nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; +		/* now, send it */ +		ret = out->f_op->write_iter(&kiocb, &from); +		if (-EIOCBQUEUED == ret) +			ret = wait_on_sync_kiocb(&kiocb); -		err = generic_write_sync(out, *ppos, ret); -		if (err) -			ret = err; -		else -			*ppos += ret; -		balance_dirty_pages_ratelimited_nr(mapping, nr_pages); +		if (ret <= 0) +			break; + +		sd.num_spliced += ret; +		sd.total_len -= ret; +		*ppos = sd.pos = kiocb.ki_pos; + +		/* dismiss the fully eaten buffers, adjust the partial one */ +		while (ret) { +			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; +			if (ret >= buf->len) { +				const struct pipe_buf_operations *ops = buf->ops; +				ret -= buf->len; +				buf->len = 0; +				buf->ops = NULL; +				ops->release(pipe, buf); +				pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1); +				pipe->nrbufs--; +				if (pipe->files) +					sd.need_wakeup = true; +			} else { +				buf->offset += ret; +				buf->len -= ret; +				ret = 0; +			} +		}  	} +done: +	kfree(array); +	splice_from_pipe_end(pipe, &sd); + +	pipe_unlock(pipe); + +	if (sd.num_spliced) +		ret = sd.num_spliced;  	return ret;  } -EXPORT_SYMBOL(generic_file_splice_write); +EXPORT_SYMBOL(iter_file_splice_write);  static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,  			  struct splice_desc *sd)  {  	int ret;  	void *data; +	loff_t tmp = sd->pos; -	ret = buf->ops->confirm(pipe, buf); -	if (ret) -		return ret; - -	data = buf->ops->map(pipe, buf, 0); -	ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos); -	buf->ops->unmap(pipe, buf, data); +	data = kmap(buf->page); +	ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp); +	kunmap(buf->page);  	return ret;  } @@ -1097,19 +1119,8 @@ static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,  {  	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *,  				loff_t *, size_t, unsigned int); -	int ret; - -	if (unlikely(!(out->f_mode & FMODE_WRITE))) -		return -EBADF; - -	if (unlikely(out->f_flags & O_APPEND)) -		return -EINVAL; -	ret = rw_verify_area(WRITE, out, ppos, len); -	if (unlikely(ret < 0)) -		return ret; - -	if (out->f_op && out->f_op->splice_write) +	if (out->f_op->splice_write)  		splice_write = out->f_op->splice_write;  	else  		splice_write = default_file_splice_write; @@ -1135,7 +1146,7 @@ static long do_splice_to(struct file *in, loff_t *ppos,  	if (unlikely(ret < 0))  		return ret; -	if (in->f_op && in->f_op->splice_read) +	if (in->f_op->splice_read)  		splice_read = in->f_op->splice_read;  	else  		splice_read = default_file_splice_read; @@ -1170,7 +1181,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,  	 * randomly drop data for eg socket -> socket splicing. Use the  	 * piped splicing for that!  	 */ -	i_mode = in->f_path.dentry->d_inode->i_mode; +	i_mode = file_inode(in)->i_mode;  	if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode)))  		return -EINVAL; @@ -1180,7 +1191,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd,  	 */  	pipe = current->splice_pipe;  	if (unlikely(!pipe)) { -		pipe = alloc_pipe_info(NULL); +		pipe = alloc_pipe_info();  		if (!pipe)  			return -ENOMEM; @@ -1270,7 +1281,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,  {  	struct file *file = sd->u.file; -	return do_splice_from(pipe, file, &file->f_pos, sd->total_len, +	return do_splice_from(pipe, file, sd->opos, sd->total_len,  			      sd->flags);  } @@ -1279,6 +1290,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,   * @in:		file to splice from   * @ppos:	input file offset   * @out:	file to splice to + * @opos:	output file offset   * @len:	number of bytes to splice   * @flags:	splice modifier flags   * @@ -1290,7 +1302,7 @@ static int direct_splice_actor(struct pipe_inode_info *pipe,   *   */  long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, -		      size_t len, unsigned int flags) +		      loff_t *opos, size_t len, unsigned int flags)  {  	struct splice_desc sd = {  		.len		= len, @@ -1298,9 +1310,20 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,  		.flags		= flags,  		.pos		= *ppos,  		.u.file		= out, +		.opos		= opos,  	};  	long ret; +	if (unlikely(!(out->f_mode & FMODE_WRITE))) +		return -EBADF; + +	if (unlikely(out->f_flags & O_APPEND)) +		return -EINVAL; + +	ret = rw_verify_area(WRITE, out, opos, len); +	if (unlikely(ret < 0)) +		return ret; +  	ret = splice_direct_to_actor(in, &sd, direct_splice_actor);  	if (ret > 0)  		*ppos = sd.pos; @@ -1311,18 +1334,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,  static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,  			       struct pipe_inode_info *opipe,  			       size_t len, unsigned int flags); -/* - * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same - * location, so checking ->i_pipe is not enough to verify that this is a - * pipe. - */ -static inline struct pipe_inode_info *pipe_info(struct inode *inode) -{ -	if (S_ISFIFO(inode->i_mode)) -		return inode->i_pipe; - -	return NULL; -}  /*   * Determine where to splice to/from. @@ -1333,11 +1344,11 @@ static long do_splice(struct file *in, loff_t __user *off_in,  {  	struct pipe_inode_info *ipipe;  	struct pipe_inode_info *opipe; -	loff_t offset, *off; +	loff_t offset;  	long ret; -	ipipe = pipe_info(in->f_path.dentry->d_inode); -	opipe = pipe_info(out->f_path.dentry->d_inode); +	ipipe = get_pipe_info(in); +	opipe = get_pipe_info(out);  	if (ipipe && opipe) {  		if (off_in || off_out) @@ -1364,13 +1375,27 @@ static long do_splice(struct file *in, loff_t __user *off_in,  				return -EINVAL;  			if (copy_from_user(&offset, off_out, sizeof(loff_t)))  				return -EFAULT; -			off = &offset; -		} else -			off = &out->f_pos; +		} else { +			offset = out->f_pos; +		} + +		if (unlikely(!(out->f_mode & FMODE_WRITE))) +			return -EBADF; -		ret = do_splice_from(ipipe, out, off, len, flags); +		if (unlikely(out->f_flags & O_APPEND)) +			return -EINVAL; -		if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) +		ret = rw_verify_area(WRITE, out, &offset, len); +		if (unlikely(ret < 0)) +			return ret; + +		file_start_write(out); +		ret = do_splice_from(ipipe, out, &offset, len, flags); +		file_end_write(out); + +		if (!off_out) +			out->f_pos = offset; +		else if (copy_to_user(off_out, &offset, sizeof(loff_t)))  			ret = -EFAULT;  		return ret; @@ -1384,13 +1409,15 @@ static long do_splice(struct file *in, loff_t __user *off_in,  				return -EINVAL;  			if (copy_from_user(&offset, off_in, sizeof(loff_t)))  				return -EFAULT; -			off = &offset; -		} else -			off = &in->f_pos; +		} else { +			offset = in->f_pos; +		} -		ret = do_splice_to(in, off, opipe, len, flags); +		ret = do_splice_to(in, &offset, opipe, len, flags); -		if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) +		if (!off_in) +			in->f_pos = offset; +		else if (copy_to_user(off_in, &offset, sizeof(loff_t)))  			ret = -EFAULT;  		return ret; @@ -1408,7 +1435,7 @@ static long do_splice(struct file *in, loff_t __user *off_in,   */  static int get_iovec_page_array(const struct iovec __user *iov,  				unsigned int nr_vecs, struct page **pages, -				struct partial_page *partial, int aligned, +				struct partial_page *partial, bool aligned,  				unsigned int pipe_buffers)  {  	int buffers = 0, error = 0; @@ -1504,120 +1531,50 @@ static int get_iovec_page_array(const struct iovec __user *iov,  static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf,  			struct splice_desc *sd)  { -	char *src; -	int ret; - -	ret = buf->ops->confirm(pipe, buf); -	if (unlikely(ret)) -		return ret; - -	/* -	 * See if we can use the atomic maps, by prefaulting in the -	 * pages and doing an atomic copy -	 */ -	if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { -		src = buf->ops->map(pipe, buf, 1); -		ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, -							sd->len); -		buf->ops->unmap(pipe, buf, src); -		if (!ret) { -			ret = sd->len; -			goto out; -		} -	} - -	/* -	 * No dice, use slow non-atomic map and copy - 	 */ -	src = buf->ops->map(pipe, buf, 0); - -	ret = sd->len; -	if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) -		ret = -EFAULT; - -	buf->ops->unmap(pipe, buf, src); -out: -	if (ret > 0) -		sd->u.userptr += ret; -	return ret; +	int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data); +	return n == sd->len ? n : -EFAULT;  }  /*   * For lack of a better implementation, implement vmsplice() to userspace   * as a simple copy of the pipes pages to the user iov.   */ -static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, +static long vmsplice_to_user(struct file *file, const struct iovec __user *uiov,  			     unsigned long nr_segs, unsigned int flags)  {  	struct pipe_inode_info *pipe;  	struct splice_desc sd; -	ssize_t size; -	int error;  	long ret; +	struct iovec iovstack[UIO_FASTIOV]; +	struct iovec *iov = iovstack; +	struct iov_iter iter; +	ssize_t count; -	pipe = pipe_info(file->f_path.dentry->d_inode); +	pipe = get_pipe_info(file);  	if (!pipe)  		return -EBADF; -	pipe_lock(pipe); - -	error = ret = 0; -	while (nr_segs) { -		void __user *base; -		size_t len; - -		/* -		 * Get user address base and length for this iovec. -		 */ -		error = get_user(base, &iov->iov_base); -		if (unlikely(error)) -			break; -		error = get_user(len, &iov->iov_len); -		if (unlikely(error)) -			break; - -		/* -		 * Sanity check this iovec. 0 read succeeds. -		 */ -		if (unlikely(!len)) -			break; -		if (unlikely(!base)) { -			error = -EFAULT; -			break; -		} - -		if (unlikely(!access_ok(VERIFY_WRITE, base, len))) { -			error = -EFAULT; -			break; -		} - -		sd.len = 0; -		sd.total_len = len; -		sd.flags = flags; -		sd.u.userptr = base; -		sd.pos = 0; - -		size = __splice_from_pipe(pipe, &sd, pipe_to_user); -		if (size < 0) { -			if (!ret) -				ret = size; - -			break; -		} - -		ret += size; +	ret = rw_copy_check_uvector(READ, uiov, nr_segs, +				    ARRAY_SIZE(iovstack), iovstack, &iov); +	if (ret <= 0) +		goto out; -		if (size < len) -			break; +	count = ret; +	iov_iter_init(&iter, READ, iov, nr_segs, count); -		nr_segs--; -		iov++; -	} +	sd.len = 0; +	sd.total_len = count; +	sd.flags = flags; +	sd.u.data = &iter; +	sd.pos = 0; +	pipe_lock(pipe); +	ret = __splice_from_pipe(pipe, &sd, pipe_to_user);  	pipe_unlock(pipe); -	if (!ret) -		ret = error; +out: +	if (iov != iovstack) +		kfree(iov);  	return ret;  } @@ -1636,13 +1593,14 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,  	struct splice_pipe_desc spd = {  		.pages = pages,  		.partial = partial, +		.nr_pages_max = PIPE_DEF_BUFFERS,  		.flags = flags,  		.ops = &user_page_pipe_buf_ops,  		.spd_release = spd_release_page,  	};  	long ret; -	pipe = pipe_info(file->f_path.dentry->d_inode); +	pipe = get_pipe_info(file);  	if (!pipe)  		return -EBADF; @@ -1650,14 +1608,14 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,  		return -ENOMEM;  	spd.nr_pages = get_iovec_page_array(iov, nr_segs, spd.pages, -					    spd.partial, flags & SPLICE_F_GIFT, -					    pipe->buffers); +					    spd.partial, false, +					    spd.nr_pages_max);  	if (spd.nr_pages <= 0)  		ret = spd.nr_pages;  	else  		ret = splice_to_pipe(pipe, &spd); -	splice_shrink_spd(pipe, &spd); +	splice_shrink_spd(&spd);  	return ret;  } @@ -1680,9 +1638,8 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,  SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,  		unsigned long, nr_segs, unsigned int, flags)  { -	struct file *file; +	struct fd f;  	long error; -	int fput;  	if (unlikely(nr_segs > UIO_MAXIOV))  		return -EINVAL; @@ -1690,47 +1647,65 @@ SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov,  		return 0;  	error = -EBADF; -	file = fget_light(fd, &fput); -	if (file) { -		if (file->f_mode & FMODE_WRITE) -			error = vmsplice_to_pipe(file, iov, nr_segs, flags); -		else if (file->f_mode & FMODE_READ) -			error = vmsplice_to_user(file, iov, nr_segs, flags); - -		fput_light(file, fput); +	f = fdget(fd); +	if (f.file) { +		if (f.file->f_mode & FMODE_WRITE) +			error = vmsplice_to_pipe(f.file, iov, nr_segs, flags); +		else if (f.file->f_mode & FMODE_READ) +			error = vmsplice_to_user(f.file, iov, nr_segs, flags); + +		fdput(f);  	}  	return error;  } +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, iov32, +		    unsigned int, nr_segs, unsigned int, flags) +{ +	unsigned i; +	struct iovec __user *iov; +	if (nr_segs > UIO_MAXIOV) +		return -EINVAL; +	iov = compat_alloc_user_space(nr_segs * sizeof(struct iovec)); +	for (i = 0; i < nr_segs; i++) { +		struct compat_iovec v; +		if (get_user(v.iov_base, &iov32[i].iov_base) || +		    get_user(v.iov_len, &iov32[i].iov_len) || +		    put_user(compat_ptr(v.iov_base), &iov[i].iov_base) || +		    put_user(v.iov_len, &iov[i].iov_len)) +			return -EFAULT; +	} +	return sys_vmsplice(fd, iov, nr_segs, flags); +} +#endif +  SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in,  		int, fd_out, loff_t __user *, off_out,  		size_t, len, unsigned int, flags)  { +	struct fd in, out;  	long error; -	struct file *in, *out; -	int fput_in, fput_out;  	if (unlikely(!len))  		return 0;  	error = -EBADF; -	in = fget_light(fd_in, &fput_in); -	if (in) { -		if (in->f_mode & FMODE_READ) { -			out = fget_light(fd_out, &fput_out); -			if (out) { -				if (out->f_mode & FMODE_WRITE) -					error = do_splice(in, off_in, -							  out, off_out, +	in = fdget(fd_in); +	if (in.file) { +		if (in.file->f_mode & FMODE_READ) { +			out = fdget(fd_out); +			if (out.file) { +				if (out.file->f_mode & FMODE_WRITE) +					error = do_splice(in.file, off_in, +							  out.file, off_out,  							  len, flags); -				fput_light(out, fput_out); +				fdput(out);  			}  		} - -		fput_light(in, fput_in); +		fdput(in);  	} -  	return error;  } @@ -1919,12 +1894,9 @@ retry:  	/*  	 * If we put data in the output pipe, wakeup any potential readers.  	 */ -	if (ret > 0) { -		smp_mb(); -		if (waitqueue_active(&opipe->wait)) -			wake_up_interruptible(&opipe->wait); -		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); -	} +	if (ret > 0) +		wakeup_pipe_readers(opipe); +  	if (input_wakeup)  		wakeup_pipe_writers(ipipe); @@ -2003,12 +1975,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,  	/*  	 * If we put data in the output pipe, wakeup any potential readers.  	 */ -	if (ret > 0) { -		smp_mb(); -		if (waitqueue_active(&opipe->wait)) -			wake_up_interruptible(&opipe->wait); -		kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); -	} +	if (ret > 0) +		wakeup_pipe_readers(opipe);  	return ret;  } @@ -2022,8 +1990,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,  static long do_tee(struct file *in, struct file *out, size_t len,  		   unsigned int flags)  { -	struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); -	struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); +	struct pipe_inode_info *ipipe = get_pipe_info(in); +	struct pipe_inode_info *opipe = get_pipe_info(out);  	int ret = -EINVAL;  	/* @@ -2048,26 +2016,25 @@ static long do_tee(struct file *in, struct file *out, size_t len,  SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags)  { -	struct file *in; -	int error, fput_in; +	struct fd in; +	int error;  	if (unlikely(!len))  		return 0;  	error = -EBADF; -	in = fget_light(fdin, &fput_in); -	if (in) { -		if (in->f_mode & FMODE_READ) { -			int fput_out; -			struct file *out = fget_light(fdout, &fput_out); - -			if (out) { -				if (out->f_mode & FMODE_WRITE) -					error = do_tee(in, out, len, flags); -				fput_light(out, fput_out); +	in = fdget(fdin); +	if (in.file) { +		if (in.file->f_mode & FMODE_READ) { +			struct fd out = fdget(fdout); +			if (out.file) { +				if (out.file->f_mode & FMODE_WRITE) +					error = do_tee(in.file, out.file, +							len, flags); +				fdput(out);  			}  		} - 		fput_light(in, fput_in); + 		fdput(in);   	}  	return error;  | 
