diff options
Diffstat (limited to 'fs/pipe.c')
| -rw-r--r-- | fs/pipe.c | 875 | 
1 files changed, 360 insertions, 515 deletions
diff --git a/fs/pipe.c b/fs/pipe.c index a8012a95572..21981e58e2a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -13,6 +13,7 @@  #include <linux/fs.h>  #include <linux/log2.h>  #include <linux/mount.h> +#include <linux/magic.h>  #include <linux/pipe_fs_i.h>  #include <linux/uio.h>  #include <linux/highmem.h> @@ -20,10 +21,13 @@  #include <linux/audit.h>  #include <linux/syscalls.h>  #include <linux/fcntl.h> +#include <linux/aio.h>  #include <asm/uaccess.h>  #include <asm/ioctls.h> +#include "internal.h" +  /*   * The max size that a non-root user is allowed to grow the pipe. Can   * be set by root in /proc/sys/fs/pipe-max-size @@ -52,8 +56,8 @@ unsigned int pipe_min_size = PAGE_SIZE;  static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)  { -	if (pipe->inode) -		mutex_lock_nested(&pipe->inode->i_mutex, subclass); +	if (pipe->files) +		mutex_lock_nested(&pipe->mutex, subclass);  }  void pipe_lock(struct pipe_inode_info *pipe) @@ -67,11 +71,21 @@ EXPORT_SYMBOL(pipe_lock);  void pipe_unlock(struct pipe_inode_info *pipe)  { -	if (pipe->inode) -		mutex_unlock(&pipe->inode->i_mutex); +	if (pipe->files) +		mutex_unlock(&pipe->mutex);  }  EXPORT_SYMBOL(pipe_unlock); +static inline void __pipe_lock(struct pipe_inode_info *pipe) +{ +	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); +} + +static inline void __pipe_unlock(struct pipe_inode_info *pipe) +{ +	mutex_unlock(&pipe->mutex); +} +  void pipe_double_lock(struct pipe_inode_info *pipe1,  		      struct pipe_inode_info *pipe2)  { @@ -102,99 +116,6 @@ void pipe_wait(struct pipe_inode_info *pipe)  	pipe_lock(pipe);  } -static int -pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, -			int atomic) -{ -	unsigned long copy; - -	while (len > 0) { -		while (!iov->iov_len) -			iov++; -		copy = min_t(unsigned long, len, iov->iov_len); - -		if (atomic) { -			if (__copy_from_user_inatomic(to, iov->iov_base, copy)) -				return -EFAULT; -		} else { -			if (copy_from_user(to, iov->iov_base, copy)) -				return -EFAULT; -		} -		to += copy; -		len -= copy; -		iov->iov_base += copy; -		iov->iov_len -= copy; -	} -	return 0; -} - -static int -pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, -		      int atomic) -{ -	unsigned long copy; - -	while (len > 0) { -		while (!iov->iov_len) -			iov++; -		copy = min_t(unsigned long, len, iov->iov_len); - -		if (atomic) { -			if (__copy_to_user_inatomic(iov->iov_base, from, copy)) -				return -EFAULT; -		} else { -			if (copy_to_user(iov->iov_base, from, copy)) -				return -EFAULT; -		} -		from += copy; -		len -= copy; -		iov->iov_base += copy; -		iov->iov_len -= copy; -	} -	return 0; -} - -/* - * Attempt to pre-fault in the user memory, so we can use atomic copies. - * Returns the number of bytes not faulted in. - */ -static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) -{ -	while (!iov->iov_len) -		iov++; - -	while (len > 0) { -		unsigned long this_len; - -		this_len = min_t(unsigned long, len, iov->iov_len); -		if (fault_in_pages_writeable(iov->iov_base, this_len)) -			break; - -		len -= this_len; -		iov++; -	} - -	return len; -} - -/* - * Pre-fault in the user memory, so we can use atomic copies. - */ -static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) -{ -	while (!iov->iov_len) -		iov++; - -	while (len > 0) { -		unsigned long this_len; - -		this_len = min_t(unsigned long, len, iov->iov_len); -		fault_in_pages_readable(iov->iov_base, this_len); -		len -= this_len; -		iov++; -	} -} -  static void anon_pipe_buf_release(struct pipe_inode_info *pipe,  				  struct pipe_buffer *buf)  { @@ -212,52 +133,6 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,  }  /** - * generic_pipe_buf_map - virtually map a pipe buffer - * @pipe:	the pipe that the buffer belongs to - * @buf:	the buffer that should be mapped - * @atomic:	whether to use an atomic map - * - * Description: - *	This function returns a kernel virtual address mapping for the - *	pipe_buffer passed in @buf. If @atomic is set, an atomic map is provided - *	and the caller has to be careful not to fault before calling - *	the unmap function. - * - *	Note that this function occupies KM_USER0 if @atomic != 0. - */ -void *generic_pipe_buf_map(struct pipe_inode_info *pipe, -			   struct pipe_buffer *buf, int atomic) -{ -	if (atomic) { -		buf->flags |= PIPE_BUF_FLAG_ATOMIC; -		return kmap_atomic(buf->page, KM_USER0); -	} - -	return kmap(buf->page); -} -EXPORT_SYMBOL(generic_pipe_buf_map); - -/** - * generic_pipe_buf_unmap - unmap a previously mapped pipe buffer - * @pipe:	the pipe that the buffer belongs to - * @buf:	the buffer that should be unmapped - * @map_data:	the data that the mapping function returned - * - * Description: - *	This function undoes the mapping that ->map() provided. - */ -void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, -			    struct pipe_buffer *buf, void *map_data) -{ -	if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { -		buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; -		kunmap_atomic(map_data, KM_USER0); -	} else -		kunmap(buf->page); -} -EXPORT_SYMBOL(generic_pipe_buf_unmap); - -/**   * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer   * @pipe:	the pipe that the buffer belongs to   * @buf:	the buffer to attempt to steal @@ -337,8 +212,14 @@ EXPORT_SYMBOL(generic_pipe_buf_release);  static const struct pipe_buf_operations anon_pipe_buf_ops = {  	.can_merge = 1, -	.map = generic_pipe_buf_map, -	.unmap = generic_pipe_buf_unmap, +	.confirm = generic_pipe_buf_confirm, +	.release = anon_pipe_buf_release, +	.steal = generic_pipe_buf_steal, +	.get = generic_pipe_buf_get, +}; + +static const struct pipe_buf_operations packet_pipe_buf_ops = { +	.can_merge = 0,  	.confirm = generic_pipe_buf_confirm,  	.release = anon_pipe_buf_release,  	.steal = generic_pipe_buf_steal, @@ -346,35 +227,30 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = {  };  static ssize_t -pipe_read(struct kiocb *iocb, const struct iovec *_iov, -	   unsigned long nr_segs, loff_t pos) +pipe_read(struct kiocb *iocb, struct iov_iter *to)  { +	size_t total_len = iov_iter_count(to);  	struct file *filp = iocb->ki_filp; -	struct inode *inode = filp->f_path.dentry->d_inode; -	struct pipe_inode_info *pipe; +	struct pipe_inode_info *pipe = filp->private_data;  	int do_wakeup;  	ssize_t ret; -	struct iovec *iov = (struct iovec *)_iov; -	size_t total_len; -	total_len = iov_length(iov, nr_segs);  	/* Null read succeeds. */  	if (unlikely(total_len == 0))  		return 0;  	do_wakeup = 0;  	ret = 0; -	mutex_lock(&inode->i_mutex); -	pipe = inode->i_pipe; +	__pipe_lock(pipe);  	for (;;) {  		int bufs = pipe->nrbufs;  		if (bufs) {  			int curbuf = pipe->curbuf;  			struct pipe_buffer *buf = pipe->bufs + curbuf;  			const struct pipe_buf_operations *ops = buf->ops; -			void *addr;  			size_t chars = buf->len; -			int error, atomic; +			size_t written; +			int error;  			if (chars > total_len)  				chars = total_len; @@ -386,26 +262,22 @@ pipe_read(struct kiocb *iocb, const struct iovec *_iov,  				break;  			} -			atomic = !iov_fault_in_pages_write(iov, chars); -redo: -			addr = ops->map(pipe, buf, atomic); -			error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); -			ops->unmap(pipe, buf, addr); -			if (unlikely(error)) { -				/* -				 * Just retry with the slow path if we failed. -				 */ -				if (atomic) { -					atomic = 0; -					goto redo; -				} +			written = copy_page_to_iter(buf->page, buf->offset, chars, to); +			if (unlikely(written < chars)) {  				if (!ret) -					ret = error; +					ret = -EFAULT;  				break;  			}  			ret += chars;  			buf->offset += chars;  			buf->len -= chars; + +			/* Was it a packet buffer? Clean up and exit */ +			if (buf->flags & PIPE_BUF_FLAG_PACKET) { +				total_len = chars; +				buf->len = 0; +			} +  			if (!buf->len) {  				buf->ops = NULL;  				ops->release(pipe, buf); @@ -441,16 +313,16 @@ redo:  			break;  		}  		if (do_wakeup) { -			wake_up_interruptible_sync(&pipe->wait); +			wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);   			kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);  		}  		pipe_wait(pipe);  	} -	mutex_unlock(&inode->i_mutex); +	__pipe_unlock(pipe);  	/* Signal writers asynchronously that there is more room. */  	if (do_wakeup) { -		wake_up_interruptible_sync(&pipe->wait); +		wake_up_interruptible_sync_poll(&pipe->wait, POLLOUT | POLLWRNORM);  		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT);  	}  	if (ret > 0) @@ -458,28 +330,26 @@ redo:  	return ret;  } +static inline int is_packetized(struct file *file) +{ +	return (file->f_flags & O_DIRECT) != 0; +} +  static ssize_t -pipe_write(struct kiocb *iocb, const struct iovec *_iov, -	    unsigned long nr_segs, loff_t ppos) +pipe_write(struct kiocb *iocb, struct iov_iter *from)  {  	struct file *filp = iocb->ki_filp; -	struct inode *inode = filp->f_path.dentry->d_inode; -	struct pipe_inode_info *pipe; -	ssize_t ret; -	int do_wakeup; -	struct iovec *iov = (struct iovec *)_iov; -	size_t total_len; +	struct pipe_inode_info *pipe = filp->private_data; +	ssize_t ret = 0; +	int do_wakeup = 0; +	size_t total_len = iov_iter_count(from);  	ssize_t chars; -	total_len = iov_length(iov, nr_segs);  	/* Null write succeeds. */  	if (unlikely(total_len == 0))  		return 0; -	do_wakeup = 0; -	ret = 0; -	mutex_lock(&inode->i_mutex); -	pipe = inode->i_pipe; +	__pipe_lock(pipe);  	if (!pipe->readers) {  		send_sig(SIGPIPE, current, 0); @@ -497,32 +367,19 @@ pipe_write(struct kiocb *iocb, const struct iovec *_iov,  		int offset = buf->offset + buf->len;  		if (ops->can_merge && offset + chars <= PAGE_SIZE) { -			int error, atomic = 1; -			void *addr; - -			error = ops->confirm(pipe, buf); +			int error = ops->confirm(pipe, buf);  			if (error)  				goto out; -			iov_fault_in_pages_read(iov, chars); -redo1: -			addr = ops->map(pipe, buf, atomic); -			error = pipe_iov_copy_from_user(offset + addr, iov, -							chars, atomic); -			ops->unmap(pipe, buf, addr); -			ret = error; -			do_wakeup = 1; -			if (error) { -				if (atomic) { -					atomic = 0; -					goto redo1; -				} +			ret = copy_page_from_iter(buf->page, offset, chars, from); +			if (unlikely(ret < chars)) { +				error = -EFAULT;  				goto out;  			} +			do_wakeup = 1;  			buf->len += chars; -			total_len -= chars;  			ret = chars; -			if (!total_len) +			if (!iov_iter_count(from))  				goto out;  		}  	} @@ -541,8 +398,7 @@ redo1:  			int newbuf = (pipe->curbuf + bufs) & (pipe->buffers-1);  			struct pipe_buffer *buf = pipe->bufs + newbuf;  			struct page *page = pipe->tmp_page; -			char *src; -			int error, atomic = 1; +			int copied;  			if (!page) {  				page = alloc_page(GFP_HIGHUSER); @@ -558,45 +414,28 @@ redo1:  			 * FIXME! Is this really true?  			 */  			do_wakeup = 1; -			chars = PAGE_SIZE; -			if (chars > total_len) -				chars = total_len; - -			iov_fault_in_pages_read(iov, chars); -redo2: -			if (atomic) -				src = kmap_atomic(page, KM_USER0); -			else -				src = kmap(page); - -			error = pipe_iov_copy_from_user(src, iov, chars, -							atomic); -			if (atomic) -				kunmap_atomic(src, KM_USER0); -			else -				kunmap(page); - -			if (unlikely(error)) { -				if (atomic) { -					atomic = 0; -					goto redo2; -				} +			copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); +			if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) {  				if (!ret) -					ret = error; +					ret = -EFAULT;  				break;  			} -			ret += chars; +			ret += copied;  			/* Insert it into the buffer array */  			buf->page = page;  			buf->ops = &anon_pipe_buf_ops;  			buf->offset = 0; -			buf->len = chars; +			buf->len = copied; +			buf->flags = 0; +			if (is_packetized(filp)) { +				buf->ops = &packet_pipe_buf_ops; +				buf->flags = PIPE_BUF_FLAG_PACKET; +			}  			pipe->nrbufs = ++bufs;  			pipe->tmp_page = NULL; -			total_len -= chars; -			if (!total_len) +			if (!iov_iter_count(from))  				break;  		}  		if (bufs < pipe->buffers) @@ -612,7 +451,7 @@ redo2:  			break;  		}  		if (do_wakeup) { -			wake_up_interruptible_sync(&pipe->wait); +			wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);  			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);  			do_wakeup = 0;  		} @@ -621,39 +460,28 @@ redo2:  		pipe->waiting_writers--;  	}  out: -	mutex_unlock(&inode->i_mutex); +	__pipe_unlock(pipe);  	if (do_wakeup) { -		wake_up_interruptible_sync(&pipe->wait); +		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLRDNORM);  		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);  	} -	if (ret > 0) -		file_update_time(filp); +	if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { +		int err = file_update_time(filp); +		if (err) +			ret = err; +		sb_end_write(file_inode(filp)->i_sb); +	}  	return ret;  } -static ssize_t -bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) -{ -	return -EBADF; -} - -static ssize_t -bad_pipe_w(struct file *filp, const char __user *buf, size_t count, -	   loff_t *ppos) -{ -	return -EBADF; -} -  static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)  { -	struct inode *inode = filp->f_path.dentry->d_inode; -	struct pipe_inode_info *pipe; +	struct pipe_inode_info *pipe = filp->private_data;  	int count, buf, nrbufs;  	switch (cmd) {  		case FIONREAD: -			mutex_lock(&inode->i_mutex); -			pipe = inode->i_pipe; +			__pipe_lock(pipe);  			count = 0;  			buf = pipe->curbuf;  			nrbufs = pipe->nrbufs; @@ -661,11 +489,11 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)  				count += pipe->bufs[buf].len;  				buf = (buf+1) & (pipe->buffers - 1);  			} -			mutex_unlock(&inode->i_mutex); +			__pipe_unlock(pipe);  			return put_user(count, (int __user *)arg);  		default: -			return -EINVAL; +			return -ENOIOCTLCMD;  	}  } @@ -674,8 +502,7 @@ static unsigned int  pipe_poll(struct file *filp, poll_table *wait)  {  	unsigned int mask; -	struct inode *inode = filp->f_path.dentry->d_inode; -	struct pipe_inode_info *pipe = inode->i_pipe; +	struct pipe_inode_info *pipe = filp->private_data;  	int nrbufs;  	poll_wait(filp, &pipe->wait, wait); @@ -702,195 +529,63 @@ pipe_poll(struct file *filp, poll_table *wait)  	return mask;  } -static int -pipe_release(struct inode *inode, int decr, int decw) +static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)  { -	struct pipe_inode_info *pipe; - -	mutex_lock(&inode->i_mutex); -	pipe = inode->i_pipe; -	pipe->readers -= decr; -	pipe->writers -= decw; +	int kill = 0; -	if (!pipe->readers && !pipe->writers) { -		free_pipe_info(inode); -	} else { -		wake_up_interruptible_sync(&pipe->wait); -		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); -		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); +	spin_lock(&inode->i_lock); +	if (!--pipe->files) { +		inode->i_pipe = NULL; +		kill = 1;  	} -	mutex_unlock(&inode->i_mutex); +	spin_unlock(&inode->i_lock); -	return 0; +	if (kill) +		free_pipe_info(pipe);  }  static int -pipe_read_fasync(int fd, struct file *filp, int on) +pipe_release(struct inode *inode, struct file *file)  { -	struct inode *inode = filp->f_path.dentry->d_inode; -	int retval; +	struct pipe_inode_info *pipe = file->private_data; -	mutex_lock(&inode->i_mutex); -	retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); -	mutex_unlock(&inode->i_mutex); +	__pipe_lock(pipe); +	if (file->f_mode & FMODE_READ) +		pipe->readers--; +	if (file->f_mode & FMODE_WRITE) +		pipe->writers--; -	return retval; -} - - -static int -pipe_write_fasync(int fd, struct file *filp, int on) -{ -	struct inode *inode = filp->f_path.dentry->d_inode; -	int retval; - -	mutex_lock(&inode->i_mutex); -	retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); -	mutex_unlock(&inode->i_mutex); +	if (pipe->readers || pipe->writers) { +		wake_up_interruptible_sync_poll(&pipe->wait, POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM | POLLERR | POLLHUP); +		kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); +		kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); +	} +	__pipe_unlock(pipe); -	return retval; +	put_pipe_info(inode, pipe); +	return 0;  } -  static int -pipe_rdwr_fasync(int fd, struct file *filp, int on) +pipe_fasync(int fd, struct file *filp, int on)  { -	struct inode *inode = filp->f_path.dentry->d_inode; -	struct pipe_inode_info *pipe = inode->i_pipe; -	int retval; +	struct pipe_inode_info *pipe = filp->private_data; +	int retval = 0; -	mutex_lock(&inode->i_mutex); -	retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); -	if (retval >= 0) { +	__pipe_lock(pipe); +	if (filp->f_mode & FMODE_READ) +		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); +	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {  		retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); -		if (retval < 0) /* this can happen only if on == T */ +		if (retval < 0 && (filp->f_mode & FMODE_READ)) +			/* this can happen only if on == T */  			fasync_helper(-1, filp, 0, &pipe->fasync_readers);  	} -	mutex_unlock(&inode->i_mutex); +	__pipe_unlock(pipe);  	return retval;  } - -static int -pipe_read_release(struct inode *inode, struct file *filp) -{ -	return pipe_release(inode, 1, 0); -} - -static int -pipe_write_release(struct inode *inode, struct file *filp) -{ -	return pipe_release(inode, 0, 1); -} - -static int -pipe_rdwr_release(struct inode *inode, struct file *filp) -{ -	int decr, decw; - -	decr = (filp->f_mode & FMODE_READ) != 0; -	decw = (filp->f_mode & FMODE_WRITE) != 0; -	return pipe_release(inode, decr, decw); -} - -static int -pipe_read_open(struct inode *inode, struct file *filp) -{ -	int ret = -ENOENT; - -	mutex_lock(&inode->i_mutex); - -	if (inode->i_pipe) { -		ret = 0; -		inode->i_pipe->readers++; -	} - -	mutex_unlock(&inode->i_mutex); - -	return ret; -} - -static int -pipe_write_open(struct inode *inode, struct file *filp) -{ -	int ret = -ENOENT; - -	mutex_lock(&inode->i_mutex); - -	if (inode->i_pipe) { -		ret = 0; -		inode->i_pipe->writers++; -	} - -	mutex_unlock(&inode->i_mutex); - -	return ret; -} - -static int -pipe_rdwr_open(struct inode *inode, struct file *filp) -{ -	int ret = -ENOENT; - -	mutex_lock(&inode->i_mutex); - -	if (inode->i_pipe) { -		ret = 0; -		if (filp->f_mode & FMODE_READ) -			inode->i_pipe->readers++; -		if (filp->f_mode & FMODE_WRITE) -			inode->i_pipe->writers++; -	} - -	mutex_unlock(&inode->i_mutex); - -	return ret; -} - -/* - * The file_operations structs are not static because they - * are also used in linux/fs/fifo.c to do operations on FIFOs. - * - * Pipes reuse fifos' file_operations structs. - */ -const struct file_operations read_pipefifo_fops = { -	.llseek		= no_llseek, -	.read		= do_sync_read, -	.aio_read	= pipe_read, -	.write		= bad_pipe_w, -	.poll		= pipe_poll, -	.unlocked_ioctl	= pipe_ioctl, -	.open		= pipe_read_open, -	.release	= pipe_read_release, -	.fasync		= pipe_read_fasync, -}; - -const struct file_operations write_pipefifo_fops = { -	.llseek		= no_llseek, -	.read		= bad_pipe_r, -	.write		= do_sync_write, -	.aio_write	= pipe_write, -	.poll		= pipe_poll, -	.unlocked_ioctl	= pipe_ioctl, -	.open		= pipe_write_open, -	.release	= pipe_write_release, -	.fasync		= pipe_write_fasync, -}; - -const struct file_operations rdwr_pipefifo_fops = { -	.llseek		= no_llseek, -	.read		= do_sync_read, -	.aio_read	= pipe_read, -	.write		= do_sync_write, -	.aio_write	= pipe_write, -	.poll		= pipe_poll, -	.unlocked_ioctl	= pipe_ioctl, -	.open		= pipe_rdwr_open, -	.release	= pipe_rdwr_release, -	.fasync		= pipe_rdwr_fasync, -}; - -struct pipe_inode_info * alloc_pipe_info(struct inode *inode) +struct pipe_inode_info *alloc_pipe_info(void)  {  	struct pipe_inode_info *pipe; @@ -900,8 +595,8 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)  		if (pipe->bufs) {  			init_waitqueue_head(&pipe->wait);  			pipe->r_counter = pipe->w_counter = 1; -			pipe->inode = inode;  			pipe->buffers = PIPE_DEF_BUFFERS; +			mutex_init(&pipe->mutex);  			return pipe;  		}  		kfree(pipe); @@ -910,7 +605,7 @@ struct pipe_inode_info * alloc_pipe_info(struct inode *inode)  	return NULL;  } -void __free_pipe_info(struct pipe_inode_info *pipe) +void free_pipe_info(struct pipe_inode_info *pipe)  {  	int i; @@ -925,12 +620,6 @@ void __free_pipe_info(struct pipe_inode_info *pipe)  	kfree(pipe);  } -void free_pipe_info(struct inode *inode) -{ -	__free_pipe_info(inode->i_pipe); -	inode->i_pipe = NULL; -} -  static struct vfsmount *pipe_mnt __read_mostly;  /* @@ -948,7 +637,7 @@ static const struct dentry_operations pipefs_dentry_operations = {  static struct inode * get_pipe_inode(void)  { -	struct inode *inode = new_inode(pipe_mnt->mnt_sb); +	struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb);  	struct pipe_inode_info *pipe;  	if (!inode) @@ -956,13 +645,14 @@ static struct inode * get_pipe_inode(void)  	inode->i_ino = get_next_ino(); -	pipe = alloc_pipe_info(inode); +	pipe = alloc_pipe_info();  	if (!pipe)  		goto fail_iput; -	inode->i_pipe = pipe; +	inode->i_pipe = pipe; +	pipe->files = 2;  	pipe->readers = pipe->writers = 1; -	inode->i_fop = &rdwr_pipefifo_fops; +	inode->i_fop = &pipefifo_fops;  	/*  	 * Mark the inode dirty from the very beginning, @@ -985,88 +675,67 @@ fail_inode:  	return NULL;  } -struct file *create_write_pipe(int flags) +int create_pipe_files(struct file **res, int flags)  {  	int err; -	struct inode *inode; +	struct inode *inode = get_pipe_inode();  	struct file *f;  	struct path path; -	struct qstr name = { .name = "" }; +	static struct qstr name = { .name = "" }; -	err = -ENFILE; -	inode = get_pipe_inode();  	if (!inode) -		goto err; +		return -ENFILE;  	err = -ENOMEM; -	path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name); +	path.dentry = d_alloc_pseudo(pipe_mnt->mnt_sb, &name);  	if (!path.dentry)  		goto err_inode;  	path.mnt = mntget(pipe_mnt); -	path.dentry->d_op = &pipefs_dentry_operations;  	d_instantiate(path.dentry, inode);  	err = -ENFILE; -	f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops); -	if (!f) +	f = alloc_file(&path, FMODE_WRITE, &pipefifo_fops); +	if (IS_ERR(f))  		goto err_dentry; -	f->f_mapping = inode->i_mapping; -	f->f_flags = O_WRONLY | (flags & O_NONBLOCK); -	f->f_version = 0; +	f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); +	f->private_data = inode->i_pipe; -	return f; +	res[0] = alloc_file(&path, FMODE_READ, &pipefifo_fops); +	if (IS_ERR(res[0])) +		goto err_file; - err_dentry: -	free_pipe_info(inode); -	path_put(&path); -	return ERR_PTR(err); - - err_inode: -	free_pipe_info(inode); -	iput(inode); - err: -	return ERR_PTR(err); -} +	path_get(&path); +	res[0]->private_data = inode->i_pipe; +	res[0]->f_flags = O_RDONLY | (flags & O_NONBLOCK); +	res[1] = f; +	return 0; -void free_write_pipe(struct file *f) -{ -	free_pipe_info(f->f_dentry->d_inode); -	path_put(&f->f_path); +err_file:  	put_filp(f); -} - -struct file *create_read_pipe(struct file *wrf, int flags) -{ -	/* Grab pipe from the writer */ -	struct file *f = alloc_file(&wrf->f_path, FMODE_READ, -				    &read_pipefifo_fops); -	if (!f) -		return ERR_PTR(-ENFILE); - -	path_get(&wrf->f_path); -	f->f_flags = O_RDONLY | (flags & O_NONBLOCK); +err_dentry: +	free_pipe_info(inode->i_pipe); +	path_put(&path); +	return err; -	return f; +err_inode: +	free_pipe_info(inode->i_pipe); +	iput(inode); +	return err;  } -int do_pipe_flags(int *fd, int flags) +static int __do_pipe_flags(int *fd, struct file **files, int flags)  { -	struct file *fw, *fr;  	int error;  	int fdw, fdr; -	if (flags & ~(O_CLOEXEC | O_NONBLOCK)) +	if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT))  		return -EINVAL; -	fw = create_write_pipe(flags); -	if (IS_ERR(fw)) -		return PTR_ERR(fw); -	fr = create_read_pipe(fw, flags); -	error = PTR_ERR(fr); -	if (IS_ERR(fr)) -		goto err_write_pipe; +	error = create_pipe_files(files, flags); +	if (error) +		return error;  	error = get_unused_fd_flags(flags);  	if (error < 0) @@ -1079,20 +748,26 @@ int do_pipe_flags(int *fd, int flags)  	fdw = error;  	audit_fd_pair(fdr, fdw); -	fd_install(fdr, fr); -	fd_install(fdw, fw);  	fd[0] = fdr;  	fd[1] = fdw; -  	return 0;   err_fdr:  	put_unused_fd(fdr);   err_read_pipe: -	path_put(&fr->f_path); -	put_filp(fr); - err_write_pipe: -	free_write_pipe(fw); +	fput(files[0]); +	fput(files[1]); +	return error; +} + +int do_pipe_flags(int *fd, int flags) +{ +	struct file *files[2]; +	int error = __do_pipe_flags(fd, files, flags); +	if (!error) { +		fd_install(fd[0], files[0]); +		fd_install(fd[1], files[1]); +	}  	return error;  } @@ -1102,15 +777,21 @@ int do_pipe_flags(int *fd, int flags)   */  SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags)  { +	struct file *files[2];  	int fd[2];  	int error; -	error = do_pipe_flags(fd, flags); +	error = __do_pipe_flags(fd, files, flags);  	if (!error) { -		if (copy_to_user(fildes, fd, sizeof(fd))) { -			sys_close(fd[0]); -			sys_close(fd[1]); +		if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { +			fput(files[0]); +			fput(files[1]); +			put_unused_fd(fd[0]); +			put_unused_fd(fd[1]);  			error = -EFAULT; +		} else { +			fd_install(fd[0], files[0]); +			fd_install(fd[1], files[1]);  		}  	}  	return error; @@ -1121,6 +802,161 @@ SYSCALL_DEFINE1(pipe, int __user *, fildes)  	return sys_pipe2(fildes, 0);  } +static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) +{ +	int cur = *cnt;	 + +	while (cur == *cnt) { +		pipe_wait(pipe); +		if (signal_pending(current)) +			break; +	} +	return cur == *cnt ? -ERESTARTSYS : 0; +} + +static void wake_up_partner(struct pipe_inode_info *pipe) +{ +	wake_up_interruptible(&pipe->wait); +} + +static int fifo_open(struct inode *inode, struct file *filp) +{ +	struct pipe_inode_info *pipe; +	bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; +	int ret; + +	filp->f_version = 0; + +	spin_lock(&inode->i_lock); +	if (inode->i_pipe) { +		pipe = inode->i_pipe; +		pipe->files++; +		spin_unlock(&inode->i_lock); +	} else { +		spin_unlock(&inode->i_lock); +		pipe = alloc_pipe_info(); +		if (!pipe) +			return -ENOMEM; +		pipe->files = 1; +		spin_lock(&inode->i_lock); +		if (unlikely(inode->i_pipe)) { +			inode->i_pipe->files++; +			spin_unlock(&inode->i_lock); +			free_pipe_info(pipe); +			pipe = inode->i_pipe; +		} else { +			inode->i_pipe = pipe; +			spin_unlock(&inode->i_lock); +		} +	} +	filp->private_data = pipe; +	/* OK, we have a pipe and it's pinned down */ + +	__pipe_lock(pipe); + +	/* We can only do regular read/write on fifos */ +	filp->f_mode &= (FMODE_READ | FMODE_WRITE); + +	switch (filp->f_mode) { +	case FMODE_READ: +	/* +	 *  O_RDONLY +	 *  POSIX.1 says that O_NONBLOCK means return with the FIFO +	 *  opened, even when there is no process writing the FIFO. +	 */ +		pipe->r_counter++; +		if (pipe->readers++ == 0) +			wake_up_partner(pipe); + +		if (!is_pipe && !pipe->writers) { +			if ((filp->f_flags & O_NONBLOCK)) { +				/* suppress POLLHUP until we have +				 * seen a writer */ +				filp->f_version = pipe->w_counter; +			} else { +				if (wait_for_partner(pipe, &pipe->w_counter)) +					goto err_rd; +			} +		} +		break; +	 +	case FMODE_WRITE: +	/* +	 *  O_WRONLY +	 *  POSIX.1 says that O_NONBLOCK means return -1 with +	 *  errno=ENXIO when there is no process reading the FIFO. +	 */ +		ret = -ENXIO; +		if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers) +			goto err; + +		pipe->w_counter++; +		if (!pipe->writers++) +			wake_up_partner(pipe); + +		if (!is_pipe && !pipe->readers) { +			if (wait_for_partner(pipe, &pipe->r_counter)) +				goto err_wr; +		} +		break; +	 +	case FMODE_READ | FMODE_WRITE: +	/* +	 *  O_RDWR +	 *  POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. +	 *  This implementation will NEVER block on a O_RDWR open, since +	 *  the process can at least talk to itself. +	 */ + +		pipe->readers++; +		pipe->writers++; +		pipe->r_counter++; +		pipe->w_counter++; +		if (pipe->readers == 1 || pipe->writers == 1) +			wake_up_partner(pipe); +		break; + +	default: +		ret = -EINVAL; +		goto err; +	} + +	/* Ok! */ +	__pipe_unlock(pipe); +	return 0; + +err_rd: +	if (!--pipe->readers) +		wake_up_interruptible(&pipe->wait); +	ret = -ERESTARTSYS; +	goto err; + +err_wr: +	if (!--pipe->writers) +		wake_up_interruptible(&pipe->wait); +	ret = -ERESTARTSYS; +	goto err; + +err: +	__pipe_unlock(pipe); + +	put_pipe_info(inode, pipe); +	return ret; +} + +const struct file_operations pipefifo_fops = { +	.open		= fifo_open, +	.llseek		= no_llseek, +	.read		= new_sync_read, +	.read_iter	= pipe_read, +	.write		= new_sync_write, +	.write_iter	= pipe_write, +	.poll		= pipe_poll, +	.unlocked_ioctl	= pipe_ioctl, +	.release	= pipe_release, +	.fasync		= pipe_fasync, +}; +  /*   * Allocate a new array of pipe buffers and copy the info over. Returns the   * pipe size if successful, or return -ERROR on error. @@ -1138,7 +974,7 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long nr_pages)  	if (nr_pages < pipe->nrbufs)  		return -EBUSY; -	bufs = kcalloc(nr_pages, sizeof(struct pipe_buffer), GFP_KERNEL); +	bufs = kcalloc(nr_pages, sizeof(*bufs), GFP_KERNEL | __GFP_NOWARN);  	if (unlikely(!bufs))  		return -ENOMEM; @@ -1199,16 +1035,26 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,  	return ret;  } +/* + * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same + * location, so checking ->i_pipe is not enough to verify that this is a + * pipe. + */ +struct pipe_inode_info *get_pipe_info(struct file *file) +{ +	return file->f_op == &pipefifo_fops ? file->private_data : NULL; +} +  long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)  {  	struct pipe_inode_info *pipe;  	long ret; -	pipe = file->f_path.dentry->d_inode->i_pipe; +	pipe = get_pipe_info(file);  	if (!pipe)  		return -EBADF; -	mutex_lock(&pipe->inode->i_mutex); +	__pipe_lock(pipe);  	switch (cmd) {  	case F_SETPIPE_SZ: { @@ -1237,10 +1083,15 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)  	}  out: -	mutex_unlock(&pipe->inode->i_mutex); +	__pipe_unlock(pipe);  	return ret;  } +static const struct super_operations pipefs_ops = { +	.destroy_inode = free_inode_nonrcu, +	.statfs = simple_statfs, +}; +  /*   * pipefs should _never_ be mounted by userland - too much of security hassle,   * no real gain from having the whole whorehouse mounted. So we don't need @@ -1250,7 +1101,8 @@ out:  static struct dentry *pipefs_mount(struct file_system_type *fs_type,  			 int flags, const char *dev_name, void *data)  { -	return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC); +	return mount_pseudo(fs_type, "pipe:", &pipefs_ops, +			&pipefs_dentry_operations, PIPEFS_MAGIC);  }  static struct file_system_type pipe_fs_type = { @@ -1273,11 +1125,4 @@ static int __init init_pipe_fs(void)  	return err;  } -static void __exit exit_pipe_fs(void) -{ -	unregister_filesystem(&pipe_fs_type); -	mntput(pipe_mnt); -} -  fs_initcall(init_pipe_fs); -module_exit(exit_pipe_fs);  | 
