From 9c55e01c0cc835818475a6ce8c4d684df9949ac8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Nov 2007 23:30:13 -0800 Subject: [TCP]: Splice receive support. Support for network splice receive. Signed-off-by: Jens Axboe Signed-off-by: David S. Miller --- net/socket.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 74784dfe8e5..92fab9e1c60 100644 --- a/net/socket.c +++ b/net/socket.c @@ -112,6 +112,9 @@ static long compat_sock_ioctl(struct file *file, static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); +static ssize_t sock_splice_read(struct file *file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags); /* * Socket files have a set of 'special' operations as well as the generic file ones. These don't appear @@ -134,6 +137,7 @@ static const struct file_operations socket_file_ops = { .fasync = sock_fasync, .sendpage = sock_sendpage, .splice_write = generic_splice_sendpage, + .splice_read = sock_splice_read, }; /* @@ -691,6 +695,15 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, return sock->ops->sendpage(sock, page, offset, size, flags); } +static ssize_t sock_splice_read(struct file *file, loff_t *ppos, + struct pipe_inode_info *pipe, size_t len, + unsigned int flags) +{ + struct socket *sock = file->private_data; + + return sock->ops->splice_read(sock, ppos, pipe, len, flags); +} + static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, struct sock_iocb *siocb) { -- cgit v1.2.3-70-g09d2 From de0fa95c14bc4d4b545fae26439371ebfdcb8534 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 14 Nov 2007 16:01:43 -0800 Subject: [NET]: Use sockfd_lookup_light in the rest of the net/socket.c Some time ago a sockfd_lookup_light was introduced and most of the socket.c file was patched to use it. However two routines were left - sys_sendto and sys_recvfrom. Patch them as well, since this helper does exactly what these two need. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/socket.c | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 92fab9e1c60..aeeab388cc3 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1594,16 +1594,11 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, struct msghdr msg; struct iovec iov; int fput_needed; - struct file *sock_file; - sock_file = fget_light(fd, &fput_needed); - err = -EBADF; - if (!sock_file) + sock = sockfd_lookup_light(fd, &err, &fput_needed); + if (!sock) goto out; - sock = sock_from_file(sock_file, &err); - if (!sock) - goto out_put; iov.iov_base = buff; iov.iov_len = len; msg.msg_name = NULL; @@ -1625,7 +1620,7 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, err = sock_sendmsg(sock, &msg, len); out_put: - fput_light(sock_file, fput_needed); + fput_light(sock->file, fput_needed); out: return err; } @@ -1654,17 +1649,11 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, struct msghdr msg; char address[MAX_SOCK_ADDR]; int err, err2; - struct file *sock_file; int fput_needed; - sock_file = fget_light(fd, &fput_needed); - err = -EBADF; - if (!sock_file) - goto out; - - sock = sock_from_file(sock_file, &err); + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) - goto out_put; + goto out; msg.msg_control = NULL; msg.msg_controllen = 0; @@ -1683,8 +1672,8 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, if (err2 < 0) err = err2; } -out_put: - fput_light(sock_file, fput_needed); + + fput_light(sock->file, fput_needed); out: return err; } -- cgit v1.2.3-70-g09d2 From 8d8ad9d7c4bfe79bc91b7fc419ecfb9dcdfe6a51 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 26 Nov 2007 20:10:50 +0800 Subject: [NET]: Name magic constants in sock_wake_async() The sock_wake_async() performs a bit different actions depending on "how" argument. Unfortunately this argument ony has numerical magic values. I propose to give names to their constants to help people reading this function callers understand what's going on without looking into this function all the time. I suppose this is 2.6.25 material, but if it's not (or the naming seems poor/bad/awful), I can rework it against the current net-2.6 tree. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/net.h | 7 +++++++ net/atm/common.c | 2 +- net/core/sock.c | 8 ++++---- net/core/stream.c | 2 +- net/dccp/input.c | 8 ++++---- net/dccp/output.c | 2 +- net/ipv4/tcp_input.c | 12 ++++++------ net/rxrpc/af_rxrpc.c | 2 +- net/sctp/socket.c | 3 ++- net/socket.c | 9 ++++----- net/unix/af_unix.c | 8 ++++---- 11 files changed, 35 insertions(+), 28 deletions(-) (limited to 'net/socket.c') diff --git a/include/linux/net.h b/include/linux/net.h index 0235d917d5c..f95f12c5840 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -186,6 +186,13 @@ struct net_proto_family { struct iovec; struct kvec; +enum { + SOCK_WAKE_IO, + SOCK_WAKE_WAITD, + SOCK_WAKE_SPACE, + SOCK_WAKE_URG, +}; + extern int sock_wake_async(struct socket *sk, int how, int band); extern int sock_register(const struct net_proto_family *fam); extern void sock_unregister(int family); diff --git a/net/atm/common.c b/net/atm/common.c index eba09a04f6b..c865517ba44 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -113,7 +113,7 @@ static void vcc_write_space(struct sock *sk) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); diff --git a/net/core/sock.c b/net/core/sock.c index eac7aa0721d..118214047ed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1498,7 +1498,7 @@ static void sock_def_error_report(struct sock *sk) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,0,POLL_ERR); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); read_unlock(&sk->sk_callback_lock); } @@ -1507,7 +1507,7 @@ static void sock_def_readable(struct sock *sk, int len) read_lock(&sk->sk_callback_lock); if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk,1,POLL_IN); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&sk->sk_callback_lock); } @@ -1524,7 +1524,7 @@ static void sock_def_write_space(struct sock *sk) /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); @@ -1539,7 +1539,7 @@ void sk_send_sigurg(struct sock *sk) { if (sk->sk_socket && sk->sk_socket->file) if (send_sigurg(&sk->sk_socket->file->f_owner)) - sk_wake_async(sk, 3, POLL_PRI); + sk_wake_async(sk, SOCK_WAKE_URG, POLL_PRI); } void sk_reset_timer(struct sock *sk, struct timer_list* timer, diff --git a/net/core/stream.c b/net/core/stream.c index b2fb846f42a..5586879bb9b 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -35,7 +35,7 @@ void sk_stream_write_space(struct sock *sk) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, 2, POLL_OUT); + sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT); } } diff --git a/net/dccp/input.c b/net/dccp/input.c index df0fb2c149a..ef299fbd7c2 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -37,7 +37,7 @@ static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb) dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED); dccp_fin(sk, skb); dccp_set_state(sk, DCCP_CLOSED); - sk_wake_async(sk, 1, POLL_HUP); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); } static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb) @@ -90,7 +90,7 @@ static void dccp_rcv_reset(struct sock *sk, struct sk_buff *skb) dccp_fin(sk, skb); if (err && !sock_flag(sk, SOCK_DEAD)) - sk_wake_async(sk, 0, POLL_ERR); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR); dccp_time_wait(sk, DCCP_TIME_WAIT, 0); } @@ -416,7 +416,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk, if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } if (sk->sk_write_pending || icsk->icsk_ack.pingpong || @@ -624,7 +624,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb, switch (old_state) { case DCCP_PARTOPEN: sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); break; } } else if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) { diff --git a/net/dccp/output.c b/net/dccp/output.c index f49544618f2..33ce737ef3a 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -170,7 +170,7 @@ void dccp_write_space(struct sock *sk) wake_up_interruptible(sk->sk_sleep); /* Should agree with poll, otherwise some programs break */ if (sock_writeable(sk)) - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); read_unlock(&sk->sk_callback_lock); } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 79996b16b94..97ea3eda206 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3595,9 +3595,9 @@ static void tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th) /* Do not send POLL_HUP for half duplex close. */ if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == TCP_CLOSE) - sk_wake_async(sk, 1, POLL_HUP); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP); else - sk_wake_async(sk, 1, POLL_IN); + sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN); } } @@ -4956,7 +4956,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, if (!sock_flag(sk, SOCK_DEAD)) { sk->sk_state_change(sk); - sk_wake_async(sk, 0, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT); } if (sk->sk_write_pending || @@ -5186,9 +5186,9 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * are not waked up, because sk->sk_sleep == * NULL and sk->sk_socket == NULL. */ - if (sk->sk_socket) { - sk_wake_async(sk,0,POLL_OUT); - } + if (sk->sk_socket) + sk_wake_async(sk, + SOCK_WAKE_IO, POLL_OUT); tp->snd_una = TCP_SKB_CB(skb)->ack_seq; tp->snd_wnd = ntohs(th->window) << diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index d6389450c4b..5e82f1c0afb 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -65,7 +65,7 @@ static void rxrpc_write_space(struct sock *sk) if (rxrpc_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible(sk->sk_sleep); - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ea9649ca0b2..dc2f9221f09 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -6008,7 +6008,8 @@ static void __sctp_write_space(struct sctp_association *asoc) */ if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN)) - sock_wake_async(sock, 2, POLL_OUT); + sock_wake_async(sock, + SOCK_WAKE_SPACE, POLL_OUT); } } } diff --git a/net/socket.c b/net/socket.c index aeeab388cc3..9ebca5c695d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1070,20 +1070,19 @@ int sock_wake_async(struct socket *sock, int how, int band) if (!sock || !sock->fasync_list) return -1; switch (how) { - case 1: - + case SOCK_WAKE_WAITD: if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) break; goto call_kill; - case 2: + case SOCK_WAKE_SPACE: if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags)) break; /* fall through */ - case 0: + case SOCK_WAKE_IO: call_kill: __kill_fasync(sock->fasync_list, SIGIO, band); break; - case 3: + case SOCK_WAKE_URG: __kill_fasync(sock->fasync_list, SIGURG, band); } return 0; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0f1ecbf86d0..393197afb19 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -317,7 +317,7 @@ static void unix_write_space(struct sock *sk) if (unix_writable(sk)) { if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) wake_up_interruptible_sync(sk->sk_sleep); - sk_wake_async(sk, 2, POLL_OUT); + sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); } read_unlock(&sk->sk_callback_lock); } @@ -403,7 +403,7 @@ static int unix_release_sock (struct sock *sk, int embrion) unix_state_unlock(skpair); skpair->sk_state_change(skpair); read_lock(&skpair->sk_callback_lock); - sk_wake_async(skpair,1,POLL_HUP); + sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP); read_unlock(&skpair->sk_callback_lock); } sock_put(skpair); /* It may now die */ @@ -1900,9 +1900,9 @@ static int unix_shutdown(struct socket *sock, int mode) other->sk_state_change(other); read_lock(&other->sk_callback_lock); if (peer_mode == SHUTDOWN_MASK) - sk_wake_async(other,1,POLL_HUP); + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP); else if (peer_mode & RCV_SHUTDOWN) - sk_wake_async(other,1,POLL_IN); + sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN); read_unlock(&other->sk_callback_lock); } if (other) -- cgit v1.2.3-70-g09d2 From b8e1f9b5c37e77cc8f978a58859b35fe5edd5542 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Sat, 8 Dec 2007 00:12:33 -0800 Subject: [NET] sysctl: make sysctl_somaxconn per-namespace Just move the variable on the struct net and adjust its usage. Others sysctls from sys.net.core table are more difficult to virtualize (i.e. make them per-namespace), but I'll look at them as well a bit later. Signed-off-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/linux/socket.h | 1 - include/net/net_namespace.h | 1 + net/core/sysctl_net_core.c | 4 +++- net/socket.c | 8 ++++---- 4 files changed, 8 insertions(+), 6 deletions(-) (limited to 'net/socket.c') diff --git a/include/linux/socket.h b/include/linux/socket.h index eb5bdd59a64..bd2b30a74e7 100644 --- a/include/linux/socket.h +++ b/include/linux/socket.h @@ -24,7 +24,6 @@ struct __kernel_sockaddr_storage { #include /* pid_t */ #include /* __user */ -extern int sysctl_somaxconn; #ifdef CONFIG_PROC_FS struct seq_file; extern void socket_seq_show(struct seq_file *seq); diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index d5936115d97..b62e31fca47 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -39,6 +39,7 @@ struct net { /* core sysctls */ struct ctl_table_header *sysctl_core_hdr; + int sysctl_somaxconn; /* List of all packet sockets. */ rwlock_t packet_sklist_lock; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index dc4cf7dda9d..130338f83ae 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -127,7 +127,7 @@ static struct ctl_table net_core_table[] = { { .ctl_name = NET_CORE_SOMAXCONN, .procname = "somaxconn", - .data = &sysctl_somaxconn, + .data = &init_net.sysctl_somaxconn, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &proc_dointvec @@ -161,6 +161,8 @@ static __net_init int sysctl_core_net_init(struct net *net) { struct ctl_table *tbl, *tmp; + net->sysctl_somaxconn = SOMAXCONN; + tbl = net_core_table; if (net != &init_net) { tbl = kmemdup(tbl, sizeof(net_core_table), GFP_KERNEL); diff --git a/net/socket.c b/net/socket.c index 9ebca5c695d..7651de00850 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1365,17 +1365,17 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) * ready for listening. */ -int sysctl_somaxconn __read_mostly = SOMAXCONN; - asmlinkage long sys_listen(int fd, int backlog) { struct socket *sock; int err, fput_needed; + int somaxconn; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (sock) { - if ((unsigned)backlog > sysctl_somaxconn) - backlog = sysctl_somaxconn; + somaxconn = sock->sk->sk_net->sysctl_somaxconn; + if ((unsigned)backlog > somaxconn) + backlog = somaxconn; err = security_socket_listen(sock, backlog); if (!err) -- cgit v1.2.3-70-g09d2