diff options
Diffstat (limited to 'net/socket.c')
| -rw-r--r-- | net/socket.c | 1346 |
1 files changed, 836 insertions, 510 deletions
diff --git a/net/socket.c b/net/socket.c index 769c386bd42..abf56b2a14f 100644 --- a/net/socket.c +++ b/net/socket.c @@ -69,10 +69,10 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/mutex.h> -#include <linux/wanrouter.h> #include <linux/if_bridge.h> #include <linux/if_frad.h> #include <linux/if_vlan.h> +#include <linux/ptp_classify.h> #include <linux/init.h> #include <linux/poll.h> #include <linux/cache.h> @@ -87,12 +87,15 @@ #include <linux/wireless.h> #include <linux/nsproxy.h> #include <linux/magic.h> +#include <linux/slab.h> +#include <linux/xattr.h> #include <asm/uaccess.h> #include <asm/unistd.h> #include <net/compat.h> #include <net/wext.h> +#include <net/cls_cgroup.h> #include <net/sock.h> #include <linux/netfilter.h> @@ -102,6 +105,12 @@ #include <linux/route.h> #include <linux/sockios.h> #include <linux/atalk.h> +#include <net/busy_poll.h> + +#ifdef CONFIG_NET_RX_BUSY_POLL +unsigned int sysctl_net_busy_read __read_mostly; +unsigned int sysctl_net_busy_poll __read_mostly; +#endif static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, @@ -122,7 +131,7 @@ static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags); /* @@ -154,13 +163,13 @@ static const struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; +static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists */ -static DEFINE_PER_CPU(int, sockets_in_use) = 0; +static DEFINE_PER_CPU(int, sockets_in_use); /* * Support routines. @@ -168,15 +177,6 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; * divide and look after the messy bits. */ -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - - 16 for IP, 16 for IPX, - 24 for IPv6, - about 80 for AX.25 - must be at least one bigger than - the AF_UNIX size (see net/unix/af_unix.c - :unix_mkname()). - */ - /** * move_addr_to_kernel - copy a socket address into kernel space * @uaddr: Address in user space @@ -188,7 +188,7 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; * invalid addresses -EFAULT is returned. On a success 0 is returned. */ -int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) +int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr) { if (ulen < 0 || ulen > sizeof(struct sockaddr_storage)) return -EINVAL; @@ -216,18 +216,19 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) * specified. Zero is returned for a success. */ -int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, - int __user *ulen) +static int move_addr_to_user(struct sockaddr_storage *kaddr, int klen, + void __user *uaddr, int __user *ulen) { int err; int len; + BUG_ON(klen > sizeof(struct sockaddr_storage)); err = get_user(len, ulen); if (err) return err; if (len > klen) len = klen; - if (len < 0 || len > sizeof(struct sockaddr_storage)) + if (len < 0) return -EINVAL; if (len) { if (audit_sockaddr(klen, kaddr)) @@ -247,13 +248,20 @@ static struct kmem_cache *sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; + struct socket_wq *wq; ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL); if (!ei) return NULL; - init_waitqueue_head(&ei->socket.wait); + wq = kmalloc(sizeof(*wq), GFP_KERNEL); + if (!wq) { + kmem_cache_free(sock_inode_cachep, ei); + return NULL; + } + init_waitqueue_head(&wq->wait); + wq->fasync_list = NULL; + RCU_INIT_POINTER(ei->socket.wq, wq); - ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; ei->socket.ops = NULL; @@ -265,8 +273,13 @@ static struct inode *sock_alloc_inode(struct super_block *sb) static void sock_destroy_inode(struct inode *inode) { - kmem_cache_free(sock_inode_cachep, - container_of(inode, struct socket_alloc, vfs_inode)); + struct socket_alloc *ei; + struct socket_wq *wq; + + ei = container_of(inode, struct socket_alloc, vfs_inode); + wq = rcu_dereference_protected(ei->socket.wq, 1); + kfree_rcu(wq, rcu); + kmem_cache_free(sock_inode_cachep, ei); } static void init_once(void *foo) @@ -291,25 +304,9 @@ static int init_inodecache(void) } static const struct super_operations sockfs_ops = { - .alloc_inode = sock_alloc_inode, - .destroy_inode =sock_destroy_inode, - .statfs = simple_statfs, -}; - -static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, - mnt); -} - -static struct vfsmount *sock_mnt __read_mostly; - -static struct file_system_type sock_fs_type = { - .name = "sockfs", - .get_sb = sockfs_get_sb, - .kill_sb = kill_anon_super, + .alloc_inode = sock_alloc_inode, + .destroy_inode = sock_destroy_inode, + .statfs = simple_statfs, }; /* @@ -325,6 +322,21 @@ static const struct dentry_operations sockfs_dentry_operations = { .d_dname = sockfs_dname, }; +static struct dentry *sockfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "socket:", &sockfs_ops, + &sockfs_dentry_operations, SOCKFS_MAGIC); +} + +static struct vfsmount *sock_mnt __read_mostly; + +static struct file_system_type sock_fs_type = { + .name = "sockfs", + .mount = sockfs_mount, + .kill_sb = kill_anon_super, +}; + /* * Obtains the first available file descriptor and sets it up for use. * @@ -342,59 +354,61 @@ static const struct dentry_operations sockfs_dentry_operations = { * but we take care of internal coherence yet. */ -static int sock_alloc_file(struct socket *sock, struct file **f, int flags) +struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname) { struct qstr name = { .name = "" }; struct path path; struct file *file; - int fd; - - fd = get_unused_fd_flags(flags); - if (unlikely(fd < 0)) - return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); - if (unlikely(!path.dentry)) { - put_unused_fd(fd); - return -ENOMEM; + if (dname) { + name.name = dname; + name.len = strlen(name.name); + } else if (sock->sk) { + name.name = sock->sk->sk_prot_creator->name; + name.len = strlen(name.name); } + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); + if (unlikely(!path.dentry)) + return ERR_PTR(-ENOMEM); path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; file = alloc_file(&path, FMODE_READ | FMODE_WRITE, &socket_file_ops); - if (unlikely(!file)) { + if (unlikely(IS_ERR(file))) { /* drop dentry, keep inode */ - atomic_inc(&path.dentry->d_inode->i_count); + ihold(path.dentry->d_inode); path_put(&path); - put_unused_fd(fd); - return -ENFILE; + return file; } sock->file = file; file->f_flags = O_RDWR | (flags & O_NONBLOCK); - file->f_pos = 0; file->private_data = sock; - - *f = file; - return fd; + return file; } +EXPORT_SYMBOL(sock_alloc_file); -int sock_map_fd(struct socket *sock, int flags) +static int sock_map_fd(struct socket *sock, int flags) { struct file *newfile; - int fd = sock_alloc_file(sock, &newfile, flags); + int fd = get_unused_fd_flags(flags); + if (unlikely(fd < 0)) + return fd; - if (likely(fd >= 0)) + newfile = sock_alloc_file(sock, flags, NULL); + if (likely(!IS_ERR(newfile))) { fd_install(fd, newfile); + return fd; + } - return fd; + put_unused_fd(fd); + return PTR_ERR(newfile); } -static struct socket *sock_from_file(struct file *file, int *err) +struct socket *sock_from_file(struct file *file, int *err) { if (file->f_op == &socket_file_ops) return file->private_data; /* set in sock_map_fd */ @@ -402,9 +416,10 @@ static struct socket *sock_from_file(struct file *file, int *err) *err = -ENOTSOCK; return NULL; } +EXPORT_SYMBOL(sock_from_file); /** - * sockfd_lookup - Go from a file number to its socket slot + * sockfd_lookup - Go from a file number to its socket slot * @fd: file handle * @err: pointer to an error code return * @@ -432,23 +447,87 @@ struct socket *sockfd_lookup(int fd, int *err) fput(file); return sock; } +EXPORT_SYMBOL(sockfd_lookup); static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) { - struct file *file; + struct fd f = fdget(fd); struct socket *sock; *err = -EBADF; - file = fget_light(fd, fput_needed); - if (file) { - sock = sock_from_file(file, err); - if (sock) + if (f.file) { + sock = sock_from_file(f.file, err); + if (likely(sock)) { + *fput_needed = f.flags; return sock; - fput_light(file, *fput_needed); + } + fdput(f); } return NULL; } +#define XATTR_SOCKPROTONAME_SUFFIX "sockprotoname" +#define XATTR_NAME_SOCKPROTONAME (XATTR_SYSTEM_PREFIX XATTR_SOCKPROTONAME_SUFFIX) +#define XATTR_NAME_SOCKPROTONAME_LEN (sizeof(XATTR_NAME_SOCKPROTONAME)-1) +static ssize_t sockfs_getxattr(struct dentry *dentry, + const char *name, void *value, size_t size) +{ + const char *proto_name; + size_t proto_size; + int error; + + error = -ENODATA; + if (!strncmp(name, XATTR_NAME_SOCKPROTONAME, XATTR_NAME_SOCKPROTONAME_LEN)) { + proto_name = dentry->d_name.name; + proto_size = strlen(proto_name); + + if (value) { + error = -ERANGE; + if (proto_size + 1 > size) + goto out; + + strncpy(value, proto_name, proto_size + 1); + } + error = proto_size + 1; + } + +out: + return error; +} + +static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer, + size_t size) +{ + ssize_t len; + ssize_t used = 0; + + len = security_inode_listsecurity(dentry->d_inode, buffer, size); + if (len < 0) + return len; + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + buffer += len; + } + + len = (XATTR_NAME_SOCKPROTONAME_LEN + 1); + used += len; + if (buffer) { + if (size < used) + return -ERANGE; + memcpy(buffer, XATTR_NAME_SOCKPROTONAME, len); + buffer += len; + } + + return used; +} + +static const struct inode_operations sockfs_inode_ops = { + .getxattr = sockfs_getxattr, + .listxattr = sockfs_listxattr, +}; + /** * sock_alloc - allocate a socket * @@ -462,18 +541,20 @@ static struct socket *sock_alloc(void) struct inode *inode; struct socket *sock; - inode = new_inode(sock_mnt->mnt_sb); + inode = new_inode_pseudo(sock_mnt->mnt_sb); if (!inode) return NULL; sock = SOCKET_I(inode); kmemcheck_annotate_bitfield(sock, type); + inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); + inode->i_op = &sockfs_inode_ops; - percpu_add(sockets_in_use, 1); + this_cpu_add(sockets_in_use, 1); return sock; } @@ -491,6 +572,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare) const struct file_operations bad_sock_fops = { .owner = THIS_MODULE, .open = sock_no_open, + .llseek = noop_llseek, }; /** @@ -512,47 +594,54 @@ void sock_release(struct socket *sock) module_put(owner); } - if (sock->fasync_list) - printk(KERN_ERR "sock_release: fasync list not empty!\n"); + if (rcu_dereference_protected(sock->wq, 1)->fasync_list) + pr_err("%s: fasync list not empty!\n", __func__); + + if (test_bit(SOCK_EXTERNALLY_ALLOCATED, &sock->flags)) + return; - percpu_sub(sockets_in_use, 1); + this_cpu_sub(sockets_in_use, 1); if (!sock->file) { iput(SOCK_INODE(sock)); return; } sock->file = NULL; } +EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, - union skb_shared_tx *shtx) +void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { - shtx->flags = 0; + *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) - shtx->hardware = 1; + *tx_flags |= SKBTX_HW_TSTAMP; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) - shtx->software = 1; - return 0; + *tx_flags |= SKBTX_SW_TSTAMP; + if (sock_flag(sk, SOCK_WIFI_STATUS)) + *tx_flags |= SKBTX_WIFI_STATUS; } EXPORT_SYMBOL(sock_tx_timestamp); -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, - struct msghdr *msg, size_t size) +static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); - int err; si->sock = sock; si->scm = NULL; si->msg = msg; si->size = size; - err = security_socket_sendmsg(sock, msg, size); - if (err) - return err; - return sock->ops->sendmsg(iocb, sock, msg, size); } +static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size) +{ + int err = security_socket_sendmsg(sock, msg, size); + + return err ?: __sock_sendmsg_nosec(iocb, sock, msg, size); +} + int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) { struct kiocb iocb; @@ -566,6 +655,21 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_sendmsg); + +static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) +{ + struct kiocb iocb; + struct sock_iocb siocb; + int ret; + + init_sync_kiocb(&iocb, NULL); + iocb.private = &siocb; + ret = __sock_sendmsg_nosec(&iocb, sock, msg, size); + if (-EIOCBQUEUED == ret) + ret = wait_on_sync_kiocb(&iocb); + return ret; +} int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size) @@ -584,16 +688,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } - -static int ktime2ts(ktime_t kt, struct timespec *ts) -{ - if (kt.tv64) { - *ts = ktime_to_timespec(kt); - return 1; - } else { - return 0; - } -} +EXPORT_SYMBOL(kernel_sendmsg); /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) @@ -619,49 +714,62 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); } else { - struct timespec ts; - skb_get_timestampns(skb, &ts); + skb_get_timestampns(skb, &ts[0]); put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, - sizeof(ts), &ts); + sizeof(ts[0]), &ts[0]); } } memset(ts, 0, sizeof(ts)); - if (skb->tstamp.tv64 && - sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { - skb_get_timestampns(skb, ts + 0); + if (sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE) && + ktime_to_timespec_cond(skb->tstamp, ts + 0)) empty = 0; - } if (shhwtstamps) { if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && - ktime2ts(shhwtstamps->syststamp, ts + 1)) + ktime_to_timespec_cond(shhwtstamps->syststamp, ts + 1)) empty = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && - ktime2ts(shhwtstamps->hwtstamp, ts + 2)) + ktime_to_timespec_cond(shhwtstamps->hwtstamp, ts + 2)) empty = 0; } if (!empty) put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, sizeof(ts), &ts); } - EXPORT_SYMBOL_GPL(__sock_recv_timestamp); -inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) +void __sock_recv_wifi_status(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) +{ + int ack; + + if (!sock_flag(sk, SOCK_WIFI_STATUS)) + return; + if (!skb->wifi_acked_valid) + return; + + ack = skb->wifi_acked; + + put_cmsg(msg, SOL_SOCKET, SCM_WIFI_STATUS, sizeof(ack), &ack); +} +EXPORT_SYMBOL_GPL(__sock_recv_wifi_status); + +static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, sizeof(__u32), &skb->dropcount); } -void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, +void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { sock_recv_timestamp(msg, sk, skb); sock_recv_drops(msg, sk, skb); } -EXPORT_SYMBOL_GPL(sock_recv_ts_and_drops); +EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops); static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -699,6 +807,7 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, ret = wait_on_sync_kiocb(&iocb); return ret; } +EXPORT_SYMBOL(sock_recvmsg); static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size, int flags) @@ -715,6 +824,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, return ret; } +/** + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) + * + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. + * + * The returned value is the total number of bytes received, or an error. + */ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -731,11 +855,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, set_fs(oldfs); return result; } - -static void sock_aio_dtor(struct kiocb *iocb) -{ - kfree(iocb->private); -} +EXPORT_SYMBOL(kernel_recvmsg); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more) @@ -745,15 +865,15 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, sock = file->private_data; - flags = !(file->f_flags & O_NONBLOCK) ? 0 : MSG_DONTWAIT; - if (more) - flags |= MSG_MORE; + flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; + /* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */ + flags |= more; return kernel_sendpage(sock, page, offset, size, flags); } static ssize_t sock_splice_read(struct file *file, loff_t *ppos, - struct pipe_inode_info *pipe, size_t len, + struct pipe_inode_info *pipe, size_t len, unsigned int flags) { struct socket *sock = file->private_data; @@ -767,12 +887,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos, static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, struct sock_iocb *siocb) { - if (!is_sync_kiocb(iocb)) { - siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); - if (!siocb) - return NULL; - iocb->ki_dtor = sock_aio_dtor; - } + if (!is_sync_kiocb(iocb)) + BUG(); siocb->kiocb = iocb; iocb->private = siocb; @@ -809,7 +925,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, if (pos != 0) return -ESPIPE; - if (iocb->ki_left == 0) /* Match SYS5 behaviour */ + if (iocb->ki_nbytes == 0) /* Match SYS5 behaviour */ return 0; @@ -864,7 +980,7 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook) (struct net *, unsigned int cmd, void __user *arg); void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) { @@ -872,7 +988,6 @@ void brioctl_set(int (*hook) (struct net *, unsigned int, void __user *)) br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } - EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); @@ -884,7 +999,6 @@ void vlan_ioctl_set(int (*hook) (struct net *, void __user *)) vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } - EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); @@ -896,7 +1010,6 @@ void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } - EXPORT_SYMBOL(dlci_ioctl_set); static long sock_do_ioctl(struct net *net, struct socket *sock, @@ -1024,17 +1137,29 @@ out_release: sock = NULL; goto out; } +EXPORT_SYMBOL(sock_create_lite); /* No kernel lock held - perfect */ static unsigned int sock_poll(struct file *file, poll_table *wait) { + unsigned int busy_flag = 0; struct socket *sock; /* * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; - return sock->ops->poll(file, sock, wait); + + if (sk_can_busy_loop(sock->sk)) { + /* this socket can poll_ll so tell the system call */ + busy_flag = POLL_BUSY_LOOP; + + /* once, only if requested by syscall */ + if (wait && (wait->_key & POLL_BUSY_LOOP)) + sk_busy_loop(sock->sk, 1); + } + + return busy_flag | sock->ops->poll(file, sock, wait); } static int sock_mmap(struct file *file, struct vm_area_struct *vma) @@ -1046,15 +1171,6 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma) static int sock_close(struct inode *inode, struct file *filp) { - /* - * It was possible the inode is NULL we were - * closing an unfinished socket. - */ - - if (!inode) { - printk(KERN_DEBUG "sock_close: NULL inode\n"); - return 0; - } sock_release(SOCKET_I(inode)); return 0; } @@ -1067,87 +1183,45 @@ static int sock_close(struct inode *inode, struct file *filp) * 1. fasync_list is modified only under process context socket lock * i.e. under semaphore. * 2. fasync_list is used under read_lock(&sk->sk_callback_lock) - * or under socket lock. - * 3. fasync_list can be used from softirq context, so that - * modification under socket lock have to be enhanced with - * write_lock_bh(&sk->sk_callback_lock). - * --ANK (990710) + * or under socket lock */ static int sock_fasync(int fd, struct file *filp, int on) { - struct fasync_struct *fa, *fna = NULL, **prev; - struct socket *sock; - struct sock *sk; - - if (on) { - fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); - if (fna == NULL) - return -ENOMEM; - } - - sock = filp->private_data; + struct socket *sock = filp->private_data; + struct sock *sk = sock->sk; + struct socket_wq *wq; - sk = sock->sk; - if (sk == NULL) { - kfree(fna); + if (sk == NULL) return -EINVAL; - } lock_sock(sk); + wq = rcu_dereference_protected(sock->wq, sock_owned_by_user(sk)); + fasync_helper(fd, filp, on, &wq->fasync_list); - spin_lock(&filp->f_lock); - if (on) - filp->f_flags |= FASYNC; + if (!wq->fasync_list) + sock_reset_flag(sk, SOCK_FASYNC); else - filp->f_flags &= ~FASYNC; - spin_unlock(&filp->f_lock); - - prev = &(sock->fasync_list); - - for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) - if (fa->fa_file == filp) - break; - - if (on) { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - fa->fa_fd = fd; - write_unlock_bh(&sk->sk_callback_lock); - - kfree(fna); - goto out; - } - fna->fa_file = filp; - fna->fa_fd = fd; - fna->magic = FASYNC_MAGIC; - fna->fa_next = sock->fasync_list; - write_lock_bh(&sk->sk_callback_lock); - sock->fasync_list = fna; sock_set_flag(sk, SOCK_FASYNC); - write_unlock_bh(&sk->sk_callback_lock); - } else { - if (fa != NULL) { - write_lock_bh(&sk->sk_callback_lock); - *prev = fa->fa_next; - if (!sock->fasync_list) - sock_reset_flag(sk, SOCK_FASYNC); - write_unlock_bh(&sk->sk_callback_lock); - kfree(fa); - } - } -out: - release_sock(sock->sk); + release_sock(sk); return 0; } -/* This function may be called only under socket lock or callback_lock */ +/* This function may be called only under socket lock or callback_lock or rcu_lock */ int sock_wake_async(struct socket *sock, int how, int band) { - if (!sock || !sock->fasync_list) + struct socket_wq *wq; + + if (!sock) return -1; + rcu_read_lock(); + wq = rcu_dereference(sock->wq); + if (!wq || !wq->fasync_list) { + rcu_read_unlock(); + return -1; + } switch (how) { case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) @@ -1159,15 +1233,17 @@ int sock_wake_async(struct socket *sock, int how, int band) /* fall through */ case SOCK_WAKE_IO: call_kill: - __kill_fasync(sock->fasync_list, SIGIO, band); + kill_fasync(&wq->fasync_list, SIGIO, band); break; case SOCK_WAKE_URG: - __kill_fasync(sock->fasync_list, SIGURG, band); + kill_fasync(&wq->fasync_list, SIGURG, band); } + rcu_read_unlock(); return 0; } +EXPORT_SYMBOL(sock_wake_async); -static int __sock_create(struct net *net, int family, int type, int protocol, +int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1191,8 +1267,8 @@ static int __sock_create(struct net *net, int family, int type, int protocol, static int warned; if (!warned) { warned = 1; - printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", - current->comm); + pr_info("%s uses obsolete (PF_INET,SOCK_PACKET)\n", + current->comm); } family = PF_PACKET; } @@ -1208,8 +1284,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, */ sock = sock_alloc(); if (!sock) { - if (net_ratelimit()) - printk(KERN_WARNING "socket: no more sockets\n"); + net_warn_ratelimited("socket: no more sockets\n"); return -ENFILE; /* Not exactly a match, but its the closest posix thing */ } @@ -1223,7 +1298,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family] == NULL) + if (rcu_access_pointer(net_families[family]) == NULL) request_module("net-pf-%d", family); #endif @@ -1279,16 +1354,19 @@ out_release: rcu_read_unlock(); goto out_sock_release; } +EXPORT_SYMBOL(__sock_create); int sock_create(int family, int type, int protocol, struct socket **res) { return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0); } +EXPORT_SYMBOL(sock_create); int sock_create_kern(int family, int type, int protocol, struct socket **res) { return __sock_create(&init_net, family, type, protocol, res, 1); } +EXPORT_SYMBOL(sock_create_kern); SYSCALL_DEFINE3(socket, int, family, int, type, int, protocol) { @@ -1364,38 +1442,66 @@ SYSCALL_DEFINE4(socketpair, int, family, int, type, int, protocol, if (err < 0) goto out_release_both; - fd1 = sock_alloc_file(sock1, &newfile1, flags); + fd1 = get_unused_fd_flags(flags); if (unlikely(fd1 < 0)) { err = fd1; goto out_release_both; } - fd2 = sock_alloc_file(sock2, &newfile2, flags); + fd2 = get_unused_fd_flags(flags); if (unlikely(fd2 < 0)) { err = fd2; - fput(newfile1); - put_unused_fd(fd1); - sock_release(sock2); - goto out; + goto out_put_unused_1; } + newfile1 = sock_alloc_file(sock1, flags, NULL); + if (unlikely(IS_ERR(newfile1))) { + err = PTR_ERR(newfile1); + goto out_put_unused_both; + } + + newfile2 = sock_alloc_file(sock2, flags, NULL); + if (IS_ERR(newfile2)) { + err = PTR_ERR(newfile2); + goto out_fput_1; + } + + err = put_user(fd1, &usockvec[0]); + if (err) + goto out_fput_both; + + err = put_user(fd2, &usockvec[1]); + if (err) + goto out_fput_both; + audit_fd_pair(fd1, fd2); + fd_install(fd1, newfile1); fd_install(fd2, newfile2); /* fd1 and fd2 may be already another descriptors. * Not kernel problem. */ - err = put_user(fd1, &usockvec[0]); - if (!err) - err = put_user(fd2, &usockvec[1]); - if (!err) - return 0; + return 0; - sys_close(fd2); - sys_close(fd1); - return err; +out_fput_both: + fput(newfile2); + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + goto out; + +out_fput_1: + fput(newfile1); + put_unused_fd(fd2); + put_unused_fd(fd1); + sock_release(sock2); + goto out; +out_put_unused_both: + put_unused_fd(fd2); +out_put_unused_1: + put_unused_fd(fd1); out_release_both: sock_release(sock2); out_release_1: @@ -1420,7 +1526,7 @@ SYSCALL_DEFINE3(bind, int, fd, struct sockaddr __user *, umyaddr, int, addrlen) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - err = move_addr_to_kernel(umyaddr, addrlen, (struct sockaddr *)&address); + err = move_addr_to_kernel(umyaddr, addrlen, &address); if (err >= 0) { err = security_socket_bind(sock, (struct sockaddr *)&address, @@ -1450,7 +1556,7 @@ SYSCALL_DEFINE2(listen, int, fd, int, backlog) sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn; - if ((unsigned)backlog > somaxconn) + if ((unsigned int)backlog > somaxconn) backlog = somaxconn; err = security_socket_listen(sock, backlog); @@ -1493,7 +1599,8 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + newsock = sock_alloc(); + if (!newsock) goto out_put; newsock->type = sock->type; @@ -1505,12 +1612,19 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, */ __module_get(newsock->ops->owner); - newfd = sock_alloc_file(newsock, &newfile, flags); + newfd = get_unused_fd_flags(flags); if (unlikely(newfd < 0)) { err = newfd; sock_release(newsock); goto out_put; } + newfile = sock_alloc_file(newsock, flags, sock->sk->sk_prot_creator->name); + if (unlikely(IS_ERR(newfile))) { + err = PTR_ERR(newfile); + put_unused_fd(newfd); + sock_release(newsock); + goto out_put; + } err = security_socket_accept(sock, newsock); if (err) @@ -1526,7 +1640,7 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, err = -ECONNABORTED; goto out_fd; } - err = move_addr_to_user((struct sockaddr *)&address, + err = move_addr_to_user(&address, len, upeer_sockaddr, upeer_addrlen); if (err < 0) goto out_fd; @@ -1575,7 +1689,7 @@ SYSCALL_DEFINE3(connect, int, fd, struct sockaddr __user *, uservaddr, sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - err = move_addr_to_kernel(uservaddr, addrlen, (struct sockaddr *)&address); + err = move_addr_to_kernel(uservaddr, addrlen, &address); if (err < 0) goto out_put; @@ -1615,7 +1729,7 @@ SYSCALL_DEFINE3(getsockname, int, fd, struct sockaddr __user *, usockaddr, err = sock->ops->getname(sock, (struct sockaddr *)&address, &len, 0); if (err) goto out_put; - err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, usockaddr_len); + err = move_addr_to_user(&address, len, usockaddr, usockaddr_len); out_put: fput_light(sock->file, fput_needed); @@ -1647,7 +1761,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, sock->ops->getname(sock, (struct sockaddr *)&address, &len, 1); if (!err) - err = move_addr_to_user((struct sockaddr *)&address, len, usockaddr, + err = move_addr_to_user(&address, len, usockaddr, usockaddr_len); fput_light(sock->file, fput_needed); } @@ -1661,7 +1775,7 @@ SYSCALL_DEFINE3(getpeername, int, fd, struct sockaddr __user *, usockaddr, */ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, - unsigned, flags, struct sockaddr __user *, addr, + unsigned int, flags, struct sockaddr __user *, addr, int, addr_len) { struct socket *sock; @@ -1671,6 +1785,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, struct iovec iov; int fput_needed; + if (len > INT_MAX) + len = INT_MAX; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1684,7 +1800,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, msg.msg_controllen = 0; msg.msg_namelen = 0; if (addr) { - err = move_addr_to_kernel(addr, addr_len, (struct sockaddr *)&address); + err = move_addr_to_kernel(addr, addr_len, &address); if (err < 0) goto out_put; msg.msg_name = (struct sockaddr *)&address; @@ -1706,7 +1822,7 @@ out: */ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, - unsigned, flags) + unsigned int, flags) { return sys_sendto(fd, buff, len, flags, NULL, 0); } @@ -1718,7 +1834,7 @@ SYSCALL_DEFINE4(send, int, fd, void __user *, buff, size_t, len, */ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, - unsigned, flags, struct sockaddr __user *, addr, + unsigned int, flags, struct sockaddr __user *, addr, int __user *, addr_len) { struct socket *sock; @@ -1728,6 +1844,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, int err, err2; int fput_needed; + if (size > INT_MAX) + size = INT_MAX; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1738,14 +1856,16 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, msg.msg_iov = &iov; iov.iov_len = size; iov.iov_base = ubuf; - msg.msg_name = (struct sockaddr *)&address; - msg.msg_namelen = sizeof(address); + /* Save some cycles and don't copy the address if not needed */ + msg.msg_name = addr ? (struct sockaddr *)&address : NULL; + /* We assume all kernel code knows the size of sockaddr_storage */ + msg.msg_namelen = 0; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg, size, flags); if (err >= 0 && addr != NULL) { - err2 = move_addr_to_user((struct sockaddr *)&address, + err2 = move_addr_to_user(&address, msg.msg_namelen, addr, addr_len); if (err2 < 0) err = err2; @@ -1760,8 +1880,8 @@ out: * Receive a datagram from a socket. */ -asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, - unsigned flags) +SYSCALL_DEFINE4(recv, int, fd, void __user *, ubuf, size_t, size, + unsigned int, flags) { return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } @@ -1857,77 +1977,82 @@ SYSCALL_DEFINE2(shutdown, int, fd, int, how) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) -/* - * BSD sendmsg interface - */ +struct used_address { + struct sockaddr_storage name; + unsigned int name_len; +}; + +static int copy_msghdr_from_user(struct msghdr *kmsg, + struct msghdr __user *umsg) +{ + if (copy_from_user(kmsg, umsg, sizeof(struct msghdr))) + return -EFAULT; + + if (kmsg->msg_namelen < 0) + return -EINVAL; + + if (kmsg->msg_namelen > sizeof(struct sockaddr_storage)) + kmsg->msg_namelen = sizeof(struct sockaddr_storage); + return 0; +} -SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) +static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, + struct used_address *used_address) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; - struct socket *sock; struct sockaddr_storage address; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; unsigned char ctl[sizeof(struct cmsghdr) + 20] __attribute__ ((aligned(sizeof(__kernel_size_t)))); /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; - struct msghdr msg_sys; - int err, ctl_len, iov_size, total_len; - int fput_needed; + int err, ctl_len, total_len; err = -EFAULT; if (MSG_CMSG_COMPAT & flags) { - if (get_compat_msghdr(&msg_sys, msg_compat)) + if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) - goto out; - /* do not move before msg_sys is valid */ - err = -EMSGSIZE; - if (msg_sys.msg_iovlen > UIO_MAXIOV) - goto out_put; - - /* Check whether to allocate the iovec area */ - err = -ENOMEM; - iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); - if (msg_sys.msg_iovlen > UIO_FASTIOV) { - iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); + if (msg_sys->msg_iovlen > UIO_FASTIOV) { + err = -EMSGSIZE; + if (msg_sys->msg_iovlen > UIO_MAXIOV) + goto out; + err = -ENOMEM; + iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec), + GFP_KERNEL); if (!iov) - goto out_put; + goto out; } /* This will also move the address data into kernel space */ if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(&msg_sys, iov, - (struct sockaddr *)&address, - VERIFY_READ); + err = verify_compat_iovec(msg_sys, iov, &address, VERIFY_READ); } else - err = verify_iovec(&msg_sys, iov, - (struct sockaddr *)&address, - VERIFY_READ); + err = verify_iovec(msg_sys, iov, &address, VERIFY_READ); if (err < 0) goto out_freeiov; total_len = err; err = -ENOBUFS; - if (msg_sys.msg_controllen > INT_MAX) + if (msg_sys->msg_controllen > INT_MAX) goto out_freeiov; - ctl_len = msg_sys.msg_controllen; + ctl_len = msg_sys->msg_controllen; if ((MSG_CMSG_COMPAT & flags) && ctl_len) { err = - cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, + cmsghdr_from_user_compat_to_kern(msg_sys, sock->sk, ctl, sizeof(ctl)); if (err) goto out_freeiov; - ctl_buf = msg_sys.msg_control; - ctl_len = msg_sys.msg_controllen; + ctl_buf = msg_sys->msg_control; + ctl_len = msg_sys->msg_controllen; } else if (ctl_len) { if (ctl_len > sizeof(ctl)) { ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); @@ -1936,42 +2061,160 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) } err = -EFAULT; /* - * Careful! Before this, msg_sys.msg_control contains a user pointer. + * Careful! Before this, msg_sys->msg_control contains a user pointer. * Afterwards, it will be a kernel pointer. Thus the compiler-assisted * checking falls down on this. */ - if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, + if (copy_from_user(ctl_buf, + (void __user __force *)msg_sys->msg_control, ctl_len)) goto out_freectl; - msg_sys.msg_control = ctl_buf; + msg_sys->msg_control = ctl_buf; } - msg_sys.msg_flags = flags; + msg_sys->msg_flags = flags; if (sock->file->f_flags & O_NONBLOCK) - msg_sys.msg_flags |= MSG_DONTWAIT; - err = sock_sendmsg(sock, &msg_sys, total_len); + msg_sys->msg_flags |= MSG_DONTWAIT; + /* + * If this is sendmmsg() and current destination address is same as + * previously succeeded address, omit asking LSM's decision. + * used_address->name_len is initialized to UINT_MAX so that the first + * destination address never matches. + */ + if (used_address && msg_sys->msg_name && + used_address->name_len == msg_sys->msg_namelen && + !memcmp(&used_address->name, msg_sys->msg_name, + used_address->name_len)) { + err = sock_sendmsg_nosec(sock, msg_sys, total_len); + goto out_freectl; + } + err = sock_sendmsg(sock, msg_sys, total_len); + /* + * If this is sendmmsg() and sending to current destination address was + * successful, remember it. + */ + if (used_address && err >= 0) { + used_address->name_len = msg_sys->msg_namelen; + if (msg_sys->msg_name) + memcpy(&used_address->name, msg_sys->msg_name, + used_address->name_len); + } out_freectl: if (ctl_buf != ctl) sock_kfree_s(sock->sk, ctl_buf, ctl_len); out_freeiov: if (iov != iovstack) - sock_kfree_s(sock->sk, iov, iov_size); -out_put: + kfree(iov); +out: + return err; +} + +/* + * BSD sendmsg interface + */ + +long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) +{ + int fput_needed, err; + struct msghdr msg_sys; + struct socket *sock; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + goto out; + + err = ___sys_sendmsg(sock, msg, &msg_sys, flags, NULL); + fput_light(sock->file, fput_needed); out: return err; } -static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, - struct msghdr *msg_sys, unsigned flags, int nosec) +SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags) +{ + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + return __sys_sendmsg(fd, msg, flags); +} + +/* + * Linux sendmmsg interface + */ + +int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, + unsigned int flags) +{ + int fput_needed, err, datagrams; + struct socket *sock; + struct mmsghdr __user *entry; + struct compat_mmsghdr __user *compat_entry; + struct msghdr msg_sys; + struct used_address used_address; + + if (vlen > UIO_MAXIOV) + vlen = UIO_MAXIOV; + + datagrams = 0; + + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) + return err; + + used_address.name_len = UINT_MAX; + entry = mmsg; + compat_entry = (struct compat_mmsghdr __user *)mmsg; + err = 0; + + while (datagrams < vlen) { + if (MSG_CMSG_COMPAT & flags) { + err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry, + &msg_sys, flags, &used_address); + if (err < 0) + break; + err = __put_user(err, &compat_entry->msg_len); + ++compat_entry; + } else { + err = ___sys_sendmsg(sock, + (struct msghdr __user *)entry, + &msg_sys, flags, &used_address); + if (err < 0) + break; + err = put_user(err, &entry->msg_len); + ++entry; + } + + if (err) + break; + ++datagrams; + } + + fput_light(sock->file, fput_needed); + + /* We only return an error if no datagrams were able to be sent */ + if (datagrams != 0) + return datagrams; + + return err; +} + +SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg, + unsigned int, vlen, unsigned int, flags) +{ + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + return __sys_sendmmsg(fd, mmsg, vlen, flags); +} + +static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg, + struct msghdr *msg_sys, unsigned int flags, int nosec) { struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; struct iovec iovstack[UIO_FASTIOV]; struct iovec *iov = iovstack; unsigned long cmsg_ptr; - int err, iov_size, total_len, len; + int err, total_len, len; /* kernel mode address */ struct sockaddr_storage addr; @@ -1983,38 +2226,32 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(msg_sys, msg_compat)) return -EFAULT; + } else { + err = copy_msghdr_from_user(msg_sys, msg); + if (err) + return err; } - else if (copy_from_user(msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; - - err = -EMSGSIZE; - if (msg_sys->msg_iovlen > UIO_MAXIOV) - goto out; - /* Check whether to allocate the iovec area */ - err = -ENOMEM; - iov_size = msg_sys->msg_iovlen * sizeof(struct iovec); if (msg_sys->msg_iovlen > UIO_FASTIOV) { - iov = sock_kmalloc(sock->sk, iov_size, GFP_KERNEL); + err = -EMSGSIZE; + if (msg_sys->msg_iovlen > UIO_MAXIOV) + goto out; + err = -ENOMEM; + iov = kmalloc(msg_sys->msg_iovlen * sizeof(struct iovec), + GFP_KERNEL); if (!iov) goto out; } - /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + /* Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - uaddr = (__force void __user *)msg_sys->msg_name; uaddr_len = COMPAT_NAMELEN(msg); - if (MSG_CMSG_COMPAT & flags) { - err = verify_compat_iovec(msg_sys, iov, - (struct sockaddr *)&addr, - VERIFY_WRITE); - } else - err = verify_iovec(msg_sys, iov, - (struct sockaddr *)&addr, - VERIFY_WRITE); + if (MSG_CMSG_COMPAT & flags) + err = verify_compat_iovec(msg_sys, iov, &addr, VERIFY_WRITE); + else + err = verify_iovec(msg_sys, iov, &addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; total_len = err; @@ -2022,6 +2259,9 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, cmsg_ptr = (unsigned long)msg_sys->msg_control; msg_sys->msg_flags = flags & (MSG_CMSG_CLOEXEC|MSG_CMSG_COMPAT); + /* We assume all kernel code knows the size of sockaddr_storage */ + msg_sys->msg_namelen = 0; + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = (nosec ? sock_recvmsg_nosec : sock_recvmsg)(sock, msg_sys, @@ -2031,7 +2271,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, len = err; if (uaddr != NULL) { - err = move_addr_to_user((struct sockaddr *)&addr, + err = move_addr_to_user(&addr, msg_sys->msg_namelen, uaddr, uaddr_len); if (err < 0) @@ -2053,7 +2293,7 @@ static int __sys_recvmsg(struct socket *sock, struct msghdr __user *msg, out_freeiov: if (iov != iovstack) - sock_kfree_s(sock->sk, iov, iov_size); + kfree(iov); out: return err; } @@ -2062,23 +2302,31 @@ out: * BSD recvmsg interface */ -SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, - unsigned int, flags) +long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags) { int fput_needed, err; struct msghdr msg_sys; - struct socket *sock = sockfd_lookup_light(fd, &err, &fput_needed); + struct socket *sock; + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; - err = __sys_recvmsg(sock, msg, &msg_sys, flags, 0); + err = ___sys_recvmsg(sock, msg, &msg_sys, flags, 0); fput_light(sock->file, fput_needed); out: return err; } +SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg, + unsigned int, flags) +{ + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + return __sys_recvmsg(fd, msg, flags); +} + /* * Linux recvmmsg interface */ @@ -2116,15 +2364,18 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, * No need to ask LSM for more than the first datagram. */ if (MSG_CMSG_COMPAT & flags) { - err = __sys_recvmsg(sock, (struct msghdr __user *)compat_entry, - &msg_sys, flags, datagrams); + err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry, + &msg_sys, flags & ~MSG_WAITFORONE, + datagrams); if (err < 0) break; err = __put_user(err, &compat_entry->msg_len); ++compat_entry; } else { - err = __sys_recvmsg(sock, (struct msghdr __user *)entry, - &msg_sys, flags, datagrams); + err = ___sys_recvmsg(sock, + (struct msghdr __user *)entry, + &msg_sys, flags & ~MSG_WAITFORONE, + datagrams); if (err < 0) break; err = put_user(err, &entry->msg_len); @@ -2135,6 +2386,10 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen, break; ++datagrams; + /* MSG_WAITFORONE turns on MSG_DONTWAIT after one packet */ + if (flags & MSG_WAITFORONE) + flags |= MSG_DONTWAIT; + if (timeout) { ktime_get_ts(timeout); *timeout = timespec_sub(end_time, *timeout); @@ -2187,6 +2442,9 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, int datagrams; struct timespec timeout_sys; + if (flags & MSG_CMSG_COMPAT) + return -EINVAL; + if (!timeout) return __sys_recvmmsg(fd, mmsg, vlen, flags, NULL); @@ -2205,11 +2463,11 @@ SYSCALL_DEFINE5(recvmmsg, int, fd, struct mmsghdr __user *, mmsg, #ifdef __ARCH_WANT_SYS_SOCKETCALL /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[20] = { - AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3), - AL(4),AL(5) +static const unsigned char nargs[21] = { + AL(0), AL(3), AL(3), AL(3), AL(2), AL(3), + AL(3), AL(3), AL(4), AL(4), AL(4), AL(6), + AL(6), AL(2), AL(5), AL(5), AL(3), AL(3), + AL(4), AL(5), AL(4) }; #undef AL @@ -2224,12 +2482,12 @@ static const unsigned char nargs[20] = { SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) { - unsigned long a[6]; + unsigned long a[AUDITSC_ARGS]; unsigned long a0, a1; int err; unsigned int len; - if (call < 1 || call > SYS_RECVMMSG) + if (call < 1 || call > SYS_SENDMMSG) return -EINVAL; len = nargs[call]; @@ -2240,7 +2498,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) if (copy_from_user(a, args, len)) return -EFAULT; - audit_socketcall(nargs[call] / sizeof(unsigned long), a); + err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); + if (err) + return err; a0 = a[0]; a1 = a[1]; @@ -2304,6 +2564,9 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args) case SYS_SENDMSG: err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); break; + case SYS_SENDMMSG: + err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]); + break; case SYS_RECVMSG: err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); break; @@ -2338,23 +2601,24 @@ int sock_register(const struct net_proto_family *ops) int err; if (ops->family >= NPROTO) { - printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, - NPROTO); + pr_crit("protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); return -ENOBUFS; } spin_lock(&net_family_lock); - if (net_families[ops->family]) + if (rcu_dereference_protected(net_families[ops->family], + lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - net_families[ops->family] = ops; + rcu_assign_pointer(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); - printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); + pr_info("NET: Registered protocol family %d\n", ops->family); return err; } +EXPORT_SYMBOL(sock_register); /** * sock_unregister - remove a protocol handler @@ -2374,21 +2638,24 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - net_families[family] = NULL; + RCU_INIT_POINTER(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); - printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); + pr_info("NET: Unregistered protocol family %d\n", family); } +EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; /* - * Initialize sock SLAB cache. + * Initialize the network sysctl infrastructure. */ - - sk_init(); + err = net_sysctl_init(); + if (err) + goto out; /* * Initialize skbuff SLAB cache @@ -2400,17 +2667,34 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ #ifdef CONFIG_NETFILTER - netfilter_init(); + err = netfilter_init(); + if (err) + goto out; #endif - return 0; + ptp_classifier_init(); + +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ @@ -2434,7 +2718,7 @@ void socket_seq_show(struct seq_file *seq) #ifdef CONFIG_COMPAT static int do_siocgstamp(struct net *net, struct socket *sock, - unsigned int cmd, struct compat_timeval __user *up) + unsigned int cmd, void __user *up) { mm_segment_t old_fs = get_fs(); struct timeval ktv; @@ -2443,15 +2727,14 @@ static int do_siocgstamp(struct net *net, struct socket *sock, set_fs(KERNEL_DS); err = sock_do_ioctl(net, sock, cmd, (unsigned long)&ktv); set_fs(old_fs); - if (!err) { - err = put_user(ktv.tv_sec, &up->tv_sec); - err |= __put_user(ktv.tv_usec, &up->tv_usec); - } + if (!err) + err = compat_put_timeval(&ktv, up); + return err; } static int do_siocgstampns(struct net *net, struct socket *sock, - unsigned int cmd, struct compat_timespec __user *up) + unsigned int cmd, void __user *up) { mm_segment_t old_fs = get_fs(); struct timespec kts; @@ -2460,10 +2743,9 @@ static int do_siocgstampns(struct net *net, struct socket *sock, set_fs(KERNEL_DS); err = sock_do_ioctl(net, sock, cmd, (unsigned long)&kts); set_fs(old_fs); - if (!err) { - err = put_user(kts.tv_sec, &up->tv_sec); - err |= __put_user(kts.tv_nsec, &up->tv_nsec); - } + if (!err) + err = compat_put_timespec(&kts, up); + return err; } @@ -2499,19 +2781,20 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) if (copy_from_user(&ifc32, uifc32, sizeof(struct compat_ifconf))) return -EFAULT; + memset(&ifc, 0, sizeof(ifc)); if (ifc32.ifcbuf == 0) { ifc32.ifc_len = 0; ifc.ifc_len = 0; ifc.ifc_req = NULL; uifc = compat_alloc_user_space(sizeof(struct ifconf)); } else { - size_t len =((ifc32.ifc_len / sizeof (struct compat_ifreq)) + 1) * - sizeof (struct ifreq); + size_t len = ((ifc32.ifc_len / sizeof(struct compat_ifreq)) + 1) * + sizeof(struct ifreq); uifc = compat_alloc_user_space(sizeof(struct ifconf) + len); ifc.ifc_len = len; ifr = ifc.ifc_req = (void __user *)(uifc + 1); ifr32 = compat_ptr(ifc32.ifcbuf); - for (i = 0; i < ifc32.ifc_len; i += sizeof (struct compat_ifreq)) { + for (i = 0; i < ifc32.ifc_len; i += sizeof(struct compat_ifreq)) { if (copy_in_user(ifr, ifr32, sizeof(struct compat_ifreq))) return -EFAULT; ifr++; @@ -2531,9 +2814,9 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) ifr = ifc.ifc_req; ifr32 = compat_ptr(ifc32.ifcbuf); for (i = 0, j = 0; - i + sizeof (struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; - i += sizeof (struct compat_ifreq), j += sizeof (struct ifreq)) { - if (copy_in_user(ifr32, ifr, sizeof (struct compat_ifreq))) + i + sizeof(struct compat_ifreq) <= ifc32.ifc_len && j < ifc.ifc_len; + i += sizeof(struct compat_ifreq), j += sizeof(struct ifreq)) { + if (copy_in_user(ifr32, ifr, sizeof(struct compat_ifreq))) return -EFAULT; ifr32++; ifr++; @@ -2557,23 +2840,123 @@ static int dev_ifconf(struct net *net, struct compat_ifconf __user *uifc32) static int ethtool_ioctl(struct net *net, struct compat_ifreq __user *ifr32) { + struct compat_ethtool_rxnfc __user *compat_rxnfc; + bool convert_in = false, convert_out = false; + size_t buf_size = ALIGN(sizeof(struct ifreq), 8); + struct ethtool_rxnfc __user *rxnfc; struct ifreq __user *ifr; + u32 rule_cnt = 0, actual_rule_cnt; + u32 ethcmd; u32 data; - void __user *datap; + int ret; - ifr = compat_alloc_user_space(sizeof(*ifr)); + if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + return -EFAULT; - if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) + compat_rxnfc = compat_ptr(data); + + if (get_user(ethcmd, &compat_rxnfc->cmd)) return -EFAULT; - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) + /* Most ethtool structures are defined without padding. + * Unfortunately struct ethtool_rxnfc is an exception. + */ + switch (ethcmd) { + default: + break; + case ETHTOOL_GRXCLSRLALL: + /* Buffer size is variable */ + if (get_user(rule_cnt, &compat_rxnfc->rule_cnt)) + return -EFAULT; + if (rule_cnt > KMALLOC_MAX_SIZE / sizeof(u32)) + return -ENOMEM; + buf_size += rule_cnt * sizeof(u32); + /* fall through */ + case ETHTOOL_GRXRINGS: + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_SRXCLSRLINS: + convert_out = true; + /* fall through */ + case ETHTOOL_SRXCLSRLDEL: + buf_size += sizeof(struct ethtool_rxnfc); + convert_in = true; + break; + } + + ifr = compat_alloc_user_space(buf_size); + rxnfc = (void __user *)ifr + ALIGN(sizeof(struct ifreq), 8); + + if (copy_in_user(&ifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) return -EFAULT; - datap = compat_ptr(data); - if (put_user(datap, &ifr->ifr_ifru.ifru_data)) + if (put_user(convert_in ? rxnfc : compat_ptr(data), + &ifr->ifr_ifru.ifru_data)) return -EFAULT; - return dev_ioctl(net, SIOCETHTOOL, ifr); + if (convert_in) { + /* We expect there to be holes between fs.m_ext and + * fs.ring_cookie and at the end of fs, but nowhere else. + */ + BUILD_BUG_ON(offsetof(struct compat_ethtool_rxnfc, fs.m_ext) + + sizeof(compat_rxnfc->fs.m_ext) != + offsetof(struct ethtool_rxnfc, fs.m_ext) + + sizeof(rxnfc->fs.m_ext)); + BUILD_BUG_ON( + offsetof(struct compat_ethtool_rxnfc, fs.location) - + offsetof(struct compat_ethtool_rxnfc, fs.ring_cookie) != + offsetof(struct ethtool_rxnfc, fs.location) - + offsetof(struct ethtool_rxnfc, fs.ring_cookie)); + + if (copy_in_user(rxnfc, compat_rxnfc, + (void __user *)(&rxnfc->fs.m_ext + 1) - + (void __user *)rxnfc) || + copy_in_user(&rxnfc->fs.ring_cookie, + &compat_rxnfc->fs.ring_cookie, + (void __user *)(&rxnfc->fs.location + 1) - + (void __user *)&rxnfc->fs.ring_cookie) || + copy_in_user(&rxnfc->rule_cnt, &compat_rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + } + + ret = dev_ioctl(net, SIOCETHTOOL, ifr); + if (ret) + return ret; + + if (convert_out) { + if (copy_in_user(compat_rxnfc, rxnfc, + (const void __user *)(&rxnfc->fs.m_ext + 1) - + (const void __user *)rxnfc) || + copy_in_user(&compat_rxnfc->fs.ring_cookie, + &rxnfc->fs.ring_cookie, + (const void __user *)(&rxnfc->fs.location + 1) - + (const void __user *)&rxnfc->fs.ring_cookie) || + copy_in_user(&compat_rxnfc->rule_cnt, &rxnfc->rule_cnt, + sizeof(rxnfc->rule_cnt))) + return -EFAULT; + + if (ethcmd == ETHTOOL_GRXCLSRLALL) { + /* As an optimisation, we only copy the actual + * number of rules that the underlying + * function returned. Since Mallory might + * change the rule count in user memory, we + * check that it is less than the rule count + * originally given (as the user buffer size), + * which has been range-checked. + */ + if (get_user(actual_rule_cnt, &rxnfc->rule_cnt)) + return -EFAULT; + if (actual_rule_cnt < rule_cnt) + rule_cnt = actual_rule_cnt; + if (copy_in_user(&compat_rxnfc->rule_locs[0], + &rxnfc->rule_locs[0], + rule_cnt * sizeof(u32))) + return -EFAULT; + } + } + + return 0; } static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32) @@ -2582,7 +2965,7 @@ static int compat_siocwandev(struct net *net, struct compat_ifreq __user *uifr32 compat_uptr_t uptr32; struct ifreq __user *uifr; - uifr = compat_alloc_user_space(sizeof (*uifr)); + uifr = compat_alloc_user_space(sizeof(*uifr)); if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) return -EFAULT; @@ -2601,11 +2984,8 @@ static int bond_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *ifr32) { struct ifreq kifr; - struct ifreq __user *uifr; mm_segment_t old_fs; int err; - u32 data; - void __user *datap; switch (cmd) { case SIOCBONDENSLAVE: @@ -2616,31 +2996,19 @@ static int bond_ioctl(struct net *net, unsigned int cmd, return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); - err = dev_ioctl(net, cmd, &kifr); - set_fs (old_fs); + set_fs(KERNEL_DS); + err = dev_ioctl(net, cmd, + (struct ifreq __user __force *) &kifr); + set_fs(old_fs); return err; - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: - uifr = compat_alloc_user_space(sizeof(*uifr)); - if (copy_in_user(&uifr->ifr_name, &ifr32->ifr_name, IFNAMSIZ)) - return -EFAULT; - - if (get_user(data, &ifr32->ifr_ifru.ifru_data)) - return -EFAULT; - - datap = compat_ptr(data); - if (put_user(datap, &uifr->ifr_ifru.ifru_data)) - return -EFAULT; - - return dev_ioctl(net, cmd, uifr); default: - return -EINVAL; - }; + return -ENOIOCTLCMD; + } } -static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, +/* Handle ioctls that use ifreq::ifr_data and just need struct ifreq converted */ +static int compat_ifr_data_ioctl(struct net *net, unsigned int cmd, struct compat_ifreq __user *u_ifreq32) { struct ifreq __user *u_ifreq64; @@ -2651,19 +3019,16 @@ static int siocdevprivate_ioctl(struct net *net, unsigned int cmd, if (copy_from_user(&tmp_buf[0], &(u_ifreq32->ifr_ifrn.ifrn_name[0]), IFNAMSIZ)) return -EFAULT; - if (__get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) + if (get_user(data32, &u_ifreq32->ifr_ifru.ifru_data)) return -EFAULT; data64 = compat_ptr(data32); u_ifreq64 = compat_alloc_user_space(sizeof(*u_ifreq64)); - /* Don't check these user accesses, just let that get trapped - * in the ioctl handler instead. - */ if (copy_to_user(&u_ifreq64->ifr_ifrn.ifrn_name[0], &tmp_buf[0], IFNAMSIZ)) return -EFAULT; - if (__put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) + if (put_user(data64, &u_ifreq64->ifr_ifru.ifru_data)) return -EFAULT; return dev_ioctl(net, cmd, u_ifreq64); @@ -2715,70 +3080,49 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd, uifmap32 = &uifr32->ifr_ifru.ifru_map; err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name)); - err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= __get_user(ifr.ifr_map.port, &uifmap32->port); + err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= get_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= get_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= get_user(ifr.ifr_map.port, &uifmap32->port); if (err) return -EFAULT; old_fs = get_fs(); - set_fs (KERNEL_DS); - err = dev_ioctl(net, cmd, (void __user *)&ifr); - set_fs (old_fs); + set_fs(KERNEL_DS); + err = dev_ioctl(net, cmd, (void __user __force *)&ifr); + set_fs(old_fs); if (cmd == SIOCGIFMAP && !err) { err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name)); - err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); - err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); - err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); - err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq); - err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma); - err |= __put_user(ifr.ifr_map.port, &uifmap32->port); + err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start); + err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end); + err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr); + err |= put_user(ifr.ifr_map.irq, &uifmap32->irq); + err |= put_user(ifr.ifr_map.dma, &uifmap32->dma); + err |= put_user(ifr.ifr_map.port, &uifmap32->port); if (err) err = -EFAULT; } return err; } -static int compat_siocshwtstamp(struct net *net, struct compat_ifreq __user *uifr32) -{ - void __user *uptr; - compat_uptr_t uptr32; - struct ifreq __user *uifr; - - uifr = compat_alloc_user_space(sizeof (*uifr)); - if (copy_in_user(uifr, uifr32, sizeof(struct compat_ifreq))) - return -EFAULT; - - if (get_user(uptr32, &uifr32->ifr_data)) - return -EFAULT; - - uptr = compat_ptr(uptr32); - - if (put_user(uptr, &uifr->ifr_data)) - return -EFAULT; - - return dev_ioctl(net, SIOCSHWTSTAMP, uifr); -} - struct rtentry32 { - u32 rt_pad1; + u32 rt_pad1; struct sockaddr rt_dst; /* target address */ struct sockaddr rt_gateway; /* gateway addr (RTF_GATEWAY) */ struct sockaddr rt_genmask; /* target network mask (IP) */ - unsigned short rt_flags; - short rt_pad2; - u32 rt_pad3; - unsigned char rt_tos; - unsigned char rt_class; - short rt_pad4; - short rt_metric; /* +1 for binary compatibility! */ + unsigned short rt_flags; + short rt_pad2; + u32 rt_pad3; + unsigned char rt_tos; + unsigned char rt_class; + short rt_pad4; + short rt_metric; /* +1 for binary compatibility! */ /* char * */ u32 rt_dev; /* forcing the device at add */ - u32 rt_mtu; /* per route MTU/Window */ - u32 rt_window; /* Window clamping */ + u32 rt_mtu; /* per route MTU/Window */ + u32 rt_window; /* Window clamping */ unsigned short rt_irtt; /* Initial RTT */ }; @@ -2808,30 +3152,31 @@ static int routing_ioctl(struct net *net, struct socket *sock, if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */ struct in6_rtmsg32 __user *ur6 = argp; - ret = copy_from_user (&r6.rtmsg_dst, &(ur6->rtmsg_dst), + ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst), 3 * sizeof(struct in6_addr)); - ret |= __get_user (r6.rtmsg_type, &(ur6->rtmsg_type)); - ret |= __get_user (r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); - ret |= __get_user (r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); - ret |= __get_user (r6.rtmsg_metric, &(ur6->rtmsg_metric)); - ret |= __get_user (r6.rtmsg_info, &(ur6->rtmsg_info)); - ret |= __get_user (r6.rtmsg_flags, &(ur6->rtmsg_flags)); - ret |= __get_user (r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); + ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type)); + ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len)); + ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len)); + ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric)); + ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info)); + ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags)); + ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex)); r = (void *) &r6; } else { /* ipv4 */ struct rtentry32 __user *ur4 = argp; - ret = copy_from_user (&r4.rt_dst, &(ur4->rt_dst), + ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst), 3 * sizeof(struct sockaddr)); - ret |= __get_user (r4.rt_flags, &(ur4->rt_flags)); - ret |= __get_user (r4.rt_metric, &(ur4->rt_metric)); - ret |= __get_user (r4.rt_mtu, &(ur4->rt_mtu)); - ret |= __get_user (r4.rt_window, &(ur4->rt_window)); - ret |= __get_user (r4.rt_irtt, &(ur4->rt_irtt)); - ret |= __get_user (rtdev, &(ur4->rt_dev)); + ret |= get_user(r4.rt_flags, &(ur4->rt_flags)); + ret |= get_user(r4.rt_metric, &(ur4->rt_metric)); + ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu)); + ret |= get_user(r4.rt_window, &(ur4->rt_window)); + ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt)); + ret |= get_user(rtdev, &(ur4->rt_dev)); if (rtdev) { - ret |= copy_from_user (devname, compat_ptr(rtdev), 15); - r4.rt_dev = devname; devname[15] = 0; + ret |= copy_from_user(devname, compat_ptr(rtdev), 15); + r4.rt_dev = (char __user __force *)devname; + devname[15] = 0; } else r4.rt_dev = NULL; @@ -2843,9 +3188,9 @@ static int routing_ioctl(struct net *net, struct socket *sock, goto out; } - set_fs (KERNEL_DS); + set_fs(KERNEL_DS); ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r); - set_fs (old_fs); + set_fs(old_fs); out: return ret; @@ -2853,7 +3198,7 @@ out: /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE * for some operations; this forces use of the newer bridge-utils that - * use compatiable ioctls + * use compatible ioctls */ static int old_bridge_ioctl(compat_ulong_t __user *argp) { @@ -2874,7 +3219,7 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, struct net *net = sock_net(sk); if (cmd >= SIOCDEVPRIVATE && cmd <= (SIOCDEVPRIVATE + 15)) - return siocdevprivate_ioctl(net, cmd, argp); + return compat_ifr_data_ioctl(net, cmd, argp); switch (cmd) { case SIOCSIFBR: @@ -2894,8 +3239,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, case SIOCBONDENSLAVE: case SIOCBONDRELEASE: case SIOCBONDSETHWADDR: - case SIOCBONDSLAVEINFOQUERY: - case SIOCBONDINFOQUERY: case SIOCBONDCHANGEACTIVE: return bond_ioctl(net, cmd, argp); case SIOCADDRT: @@ -2905,8 +3248,11 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return do_siocgstamp(net, sock, cmd, argp); case SIOCGSTAMPNS: return do_siocgstampns(net, sock, cmd, argp); + case SIOCBONDSLAVEINFOQUERY: + case SIOCBONDINFOQUERY: case SIOCSHWTSTAMP: - return compat_siocshwtstamp(net, argp); + case SIOCGHWTSTAMP: + return compat_ifr_data_ioctl(net, cmd, argp); case FIOSETOWN: case SIOCSPGRP: @@ -2962,24 +3308,10 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock, return sock_do_ioctl(net, sock, cmd, arg); } - /* Prevent warning from compat_sys_ioctl, these always - * result in -EINVAL in the native case anyway. */ - switch (cmd) { - case SIOCRTMSG: - case SIOCGIFCOUNT: - case SIOCSRARP: - case SIOCGRARP: - case SIOCDRARP: - case SIOCSIFLINK: - case SIOCGIFSLAVE: - case SIOCSIFSLAVE: - return -EINVAL; - } - return -ENOIOCTLCMD; } -static long compat_sock_ioctl(struct file *file, unsigned cmd, +static long compat_sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { struct socket *sock = file->private_data; @@ -3008,11 +3340,13 @@ int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) { return sock->ops->bind(sock, addr, addrlen); } +EXPORT_SYMBOL(kernel_bind); int kernel_listen(struct socket *sock, int backlog) { return sock->ops->listen(sock, backlog); } +EXPORT_SYMBOL(kernel_listen); int kernel_accept(struct socket *sock, struct socket **newsock, int flags) { @@ -3037,56 +3371,70 @@ int kernel_accept(struct socket *sock, struct socket **newsock, int flags) done: return err; } +EXPORT_SYMBOL(kernel_accept); int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, int flags) { return sock->ops->connect(sock, addr, addrlen, flags); } +EXPORT_SYMBOL(kernel_connect); int kernel_getsockname(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 0); } +EXPORT_SYMBOL(kernel_getsockname); int kernel_getpeername(struct socket *sock, struct sockaddr *addr, int *addrlen) { return sock->ops->getname(sock, addr, addrlen, 1); } +EXPORT_SYMBOL(kernel_getpeername); int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { mm_segment_t oldfs = get_fs(); + char __user *uoptval; + int __user *uoptlen; int err; + uoptval = (char __user __force *) optval; + uoptlen = (int __user __force *) optlen; + set_fs(KERNEL_DS); if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, optval, optlen); + err = sock_getsockopt(sock, level, optname, uoptval, uoptlen); else - err = sock->ops->getsockopt(sock, level, optname, optval, - optlen); + err = sock->ops->getsockopt(sock, level, optname, uoptval, + uoptlen); set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_getsockopt); int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { mm_segment_t oldfs = get_fs(); + char __user *uoptval; int err; + uoptval = (char __user __force *) optval; + set_fs(KERNEL_DS); if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, optval, optlen); + err = sock_setsockopt(sock, level, optname, uoptval, optlen); else - err = sock->ops->setsockopt(sock, level, optname, optval, + err = sock->ops->setsockopt(sock, level, optname, uoptval, optlen); set_fs(oldfs); return err; } +EXPORT_SYMBOL(kernel_setsockopt); int kernel_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) @@ -3096,6 +3444,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, return sock_no_sendpage(sock, page, offset, size, flags); } +EXPORT_SYMBOL(kernel_sendpage); int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) { @@ -3108,33 +3457,10 @@ int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) return err; } +EXPORT_SYMBOL(kernel_sock_ioctl); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how) { return sock->ops->shutdown(sock, how); } - -EXPORT_SYMBOL(sock_create); -EXPORT_SYMBOL(sock_create_kern); -EXPORT_SYMBOL(sock_create_lite); -EXPORT_SYMBOL(sock_map_fd); -EXPORT_SYMBOL(sock_recvmsg); -EXPORT_SYMBOL(sock_register); -EXPORT_SYMBOL(sock_release); -EXPORT_SYMBOL(sock_sendmsg); -EXPORT_SYMBOL(sock_unregister); -EXPORT_SYMBOL(sock_wake_async); -EXPORT_SYMBOL(sockfd_lookup); -EXPORT_SYMBOL(kernel_sendmsg); -EXPORT_SYMBOL(kernel_recvmsg); -EXPORT_SYMBOL(kernel_bind); -EXPORT_SYMBOL(kernel_listen); -EXPORT_SYMBOL(kernel_accept); -EXPORT_SYMBOL(kernel_connect); -EXPORT_SYMBOL(kernel_getsockname); -EXPORT_SYMBOL(kernel_getpeername); -EXPORT_SYMBOL(kernel_getsockopt); -EXPORT_SYMBOL(kernel_setsockopt); -EXPORT_SYMBOL(kernel_sendpage); -EXPORT_SYMBOL(kernel_sock_ioctl); EXPORT_SYMBOL(kernel_sock_shutdown); |
