diff options
Diffstat (limited to 'net/sunrpc/svcsock.c')
| -rw-r--r-- | net/sunrpc/svcsock.c | 806 | 
1 files changed, 459 insertions, 347 deletions
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 07919e16be3..b507cd327d9 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -21,6 +21,7 @@  #include <linux/kernel.h>  #include <linux/sched.h> +#include <linux/module.h>  #include <linux/errno.h>  #include <linux/fcntl.h>  #include <linux/net.h> @@ -42,6 +43,7 @@  #include <net/tcp_states.h>  #include <asm/uaccess.h>  #include <asm/ioctls.h> +#include <trace/events/skb.h>  #include <linux/sunrpc/types.h>  #include <linux/sunrpc/clnt.h> @@ -51,12 +53,14 @@  #include <linux/sunrpc/stats.h>  #include <linux/sunrpc/xprt.h> +#include "sunrpc.h" +  #define RPCDBG_FACILITY	RPCDBG_SVCXPRT  static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *, -					 int *errp, int flags); -static void		svc_udp_data_ready(struct sock *, int); +					 int flags); +static void		svc_udp_data_ready(struct sock *);  static int		svc_udp_recvfrom(struct svc_rqst *);  static int		svc_udp_sendto(struct svc_rqst *);  static void		svc_sock_detach(struct svc_xprt *); @@ -66,6 +70,13 @@ static void		svc_sock_free(struct svc_xprt *);  static struct svc_xprt *svc_create_socket(struct svc_serv *, int,  					  struct net *, struct sockaddr *,  					  int, int); +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, +					     struct net *, struct sockaddr *, +					     int, int); +static void svc_bc_sock_free(struct svc_xprt *xprt); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ +  #ifdef CONFIG_DEBUG_LOCK_ALLOC  static struct lock_class_key svc_key[2];  static struct lock_class_key svc_slock_key[2]; @@ -73,7 +84,11 @@ static struct lock_class_key svc_slock_key[2];  static void svc_reclassify_socket(struct socket *sock)  {  	struct sock *sk = sock->sk; -	BUG_ON(sock_owned_by_user(sk)); + +	WARN_ON_ONCE(sock_owned_by_user(sk)); +	if (sock_owned_by_user(sk)) +		return; +  	switch (sk->sk_family) {  	case AF_INET:  		sock_lock_init_class_and_name(sk, "slock-AF_INET-NFSD", @@ -134,19 +149,20 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)  			cmh->cmsg_level = SOL_IP;  			cmh->cmsg_type = IP_PKTINFO;  			pki->ipi_ifindex = 0; -			pki->ipi_spec_dst.s_addr = rqstp->rq_daddr.addr.s_addr; +			pki->ipi_spec_dst.s_addr = +				 svc_daddr_in(rqstp)->sin_addr.s_addr;  			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));  		}  		break;  	case AF_INET6: {  			struct in6_pktinfo *pki = CMSG_DATA(cmh); +			struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp);  			cmh->cmsg_level = SOL_IPV6;  			cmh->cmsg_type = IPV6_PKTINFO; -			pki->ipi6_ifindex = 0; -			ipv6_addr_copy(&pki->ipi6_addr, -					&rqstp->rq_daddr.addr6); +			pki->ipi6_ifindex = daddr->sin6_scope_id; +			pki->ipi6_addr = daddr->sin6_addr;  			cmh->cmsg_len = CMSG_LEN(sizeof(*pki));  		}  		break; @@ -275,12 +291,14 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)  				&inet_sk(sk)->inet_rcv_saddr,  				inet_sk(sk)->inet_num);  		break; +#if IS_ENABLED(CONFIG_IPV6)  	case PF_INET6:  		len = snprintf(buf, remaining, "ipv6 %s %pI6 %d\n",  				proto_name, -				&inet6_sk(sk)->rcv_saddr, +				&sk->sk_v6_rcv_saddr,  				inet_sk(sk)->inet_num);  		break; +#endif  	default:  		len = snprintf(buf, remaining, "*unknown-%d*\n",  				sk->sk_family); @@ -293,55 +311,6 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining)  	return len;  } -/** - * svc_sock_names - construct a list of listener names in a string - * @serv: pointer to RPC service - * @buf: pointer to a buffer to fill in with socket names - * @buflen: size of the buffer to be filled - * @toclose: pointer to '\0'-terminated C string containing the name - *		of a listener to be closed - * - * Fills in @buf with a '\n'-separated list of names of listener - * sockets.  If @toclose is not NULL, the socket named by @toclose - * is closed, and is not included in the output list. - * - * Returns positive length of the socket name string, or a negative - * errno value on error. - */ -int svc_sock_names(struct svc_serv *serv, char *buf, const size_t buflen, -		   const char *toclose) -{ -	struct svc_sock *svsk, *closesk = NULL; -	int len = 0; - -	if (!serv) -		return 0; - -	spin_lock_bh(&serv->sv_lock); -	list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list) { -		int onelen = svc_one_sock_name(svsk, buf + len, buflen - len); -		if (onelen < 0) { -			len = onelen; -			break; -		} -		if (toclose && strcmp(toclose, buf + len) == 0) -			closesk = svsk; -		else -			len += onelen; -	} -	spin_unlock_bh(&serv->sv_lock); - -	if (closesk) -		/* Should unregister with portmap, but you cannot -		 * unregister just one protocol... -		 */ -		svc_close_xprt(&closesk->sk_xprt); -	else if (toclose) -		return -ENOENT; -	return len; -} -EXPORT_SYMBOL_GPL(svc_sock_names); -  /*   * Check input queue length   */ @@ -378,6 +347,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,  	return len;  } +static int svc_partial_recvfrom(struct svc_rqst *rqstp, +				struct kvec *iov, int nr, +				int buflen, unsigned int base) +{ +	size_t save_iovlen; +	void *save_iovbase; +	unsigned int i; +	int ret; + +	if (base == 0) +		return svc_recvfrom(rqstp, iov, nr, buflen); + +	for (i = 0; i < nr; i++) { +		if (iov[i].iov_len > base) +			break; +		base -= iov[i].iov_len; +	} +	save_iovlen = iov[i].iov_len; +	save_iovbase = iov[i].iov_base; +	iov[i].iov_len -= base; +	iov[i].iov_base += base; +	ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen); +	iov[i].iov_len = save_iovlen; +	iov[i].iov_base = save_iovbase; +	return ret; +} +  /*   * Set socket snd and rcv buffer lengths   */ @@ -400,27 +396,33 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,  	lock_sock(sock->sk);  	sock->sk->sk_sndbuf = snd * 2;  	sock->sk->sk_rcvbuf = rcv * 2; -	sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;  	sock->sk->sk_write_space(sock->sk);  	release_sock(sock->sk);  #endif  } + +static int svc_sock_secure_port(struct svc_rqst *rqstp) +{ +	return svc_port_is_privileged(svc_addr(rqstp)); +} +  /*   * INET callback when data has been received on the socket.   */ -static void svc_udp_data_ready(struct sock *sk, int count) +static void svc_udp_data_ready(struct sock *sk)  {  	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data; +	wait_queue_head_t *wq = sk_sleep(sk);  	if (svsk) { -		dprintk("svc: socket %p(inet %p), count=%d, busy=%d\n", -			svsk, sk, count, +		dprintk("svc: socket %p(inet %p), busy=%d\n", +			svsk, sk,  			test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));  		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);  		svc_xprt_enqueue(&svsk->sk_xprt);  	} -	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) -		wake_up_interruptible(sk_sleep(sk)); +	if (wq && waitqueue_active(wq)) +		wake_up_interruptible(wq);  }  /* @@ -429,6 +431,7 @@ static void svc_udp_data_ready(struct sock *sk, int count)  static void svc_write_space(struct sock *sk)  {  	struct svc_sock	*svsk = (struct svc_sock *)(sk->sk_user_data); +	wait_queue_head_t *wq = sk_sleep(sk);  	if (svsk) {  		dprintk("svc: socket %p(inet %p), write_space busy=%d\n", @@ -436,10 +439,10 @@ static void svc_write_space(struct sock *sk)  		svc_xprt_enqueue(&svsk->sk_xprt);  	} -	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) { +	if (wq && waitqueue_active(wq)) {  		dprintk("RPC svc_write_space: someone sleeping on %p\n",  		       svsk); -		wake_up_interruptible(sk_sleep(sk)); +		wake_up_interruptible(wq);  	}  } @@ -447,7 +450,7 @@ static void svc_tcp_write_space(struct sock *sk)  {  	struct socket *sock = sk->sk_socket; -	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) +	if (sk_stream_is_writeable(sk) && sock)  		clear_bit(SOCK_NOSPACE, &sock->flags);  	svc_write_space(sk);  } @@ -459,22 +462,31 @@ static int svc_udp_get_dest_address4(struct svc_rqst *rqstp,  				     struct cmsghdr *cmh)  {  	struct in_pktinfo *pki = CMSG_DATA(cmh); +	struct sockaddr_in *daddr = svc_daddr_in(rqstp); +  	if (cmh->cmsg_type != IP_PKTINFO)  		return 0; -	rqstp->rq_daddr.addr.s_addr = pki->ipi_spec_dst.s_addr; + +	daddr->sin_family = AF_INET; +	daddr->sin_addr.s_addr = pki->ipi_spec_dst.s_addr;  	return 1;  }  /* - * See net/ipv6/datagram.c : datagram_recv_ctl + * See net/ipv6/datagram.c : ip6_datagram_recv_ctl   */  static int svc_udp_get_dest_address6(struct svc_rqst *rqstp,  				     struct cmsghdr *cmh)  {  	struct in6_pktinfo *pki = CMSG_DATA(cmh); +	struct sockaddr_in6 *daddr = svc_daddr_in6(rqstp); +  	if (cmh->cmsg_type != IPV6_PKTINFO)  		return 0; -	ipv6_addr_copy(&rqstp->rq_daddr.addr6, &pki->ipi6_addr); + +	daddr->sin6_family = AF_INET6; +	daddr->sin6_addr = pki->ipi6_addr; +	daddr->sin6_scope_id = pki->ipi6_ifindex;  	return 1;  } @@ -547,11 +559,9 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)  			dprintk("svc: recvfrom returned error %d\n", -err);  			set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);  		} -		return -EAGAIN; +		return 0;  	}  	len = svc_addr_len(svc_addr(rqstp)); -	if (len == 0) -		return -EAFNOSUPPORT;  	rqstp->rq_addrlen = len;  	if (skb->tstamp.tv64 == 0) {  		skb->tstamp = ktime_get_real(); @@ -567,14 +577,11 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)  	rqstp->rq_prot = IPPROTO_UDP;  	if (!svc_udp_get_dest_address(rqstp, cmh)) { -		if (net_ratelimit()) -			printk(KERN_WARNING -				"svc: received unknown control message %d/%d; " -				"dropping RPC reply datagram\n", -					cmh->cmsg_level, cmh->cmsg_type); -		skb_free_datagram_locked(svsk->sk_sk, skb); -		return 0; +		net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", +				     cmh->cmsg_level, cmh->cmsg_type); +		goto out_free;  	} +	rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp));  	if (skb_is_nonlinear(skb)) {  		/* we have to copy */ @@ -582,8 +589,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)  		if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) {  			local_bh_enable();  			/* checksum error */ -			skb_free_datagram_locked(svsk->sk_sk, skb); -			return 0; +			goto out_free;  		}  		local_bh_enable();  		skb_free_datagram_locked(svsk->sk_sk, skb); @@ -592,10 +598,8 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)  		rqstp->rq_arg.head[0].iov_base = skb->data +  			sizeof(struct udphdr);  		rqstp->rq_arg.head[0].iov_len = len; -		if (skb_checksum_complete(skb)) { -			skb_free_datagram_locked(svsk->sk_sk, skb); -			return 0; -		} +		if (skb_checksum_complete(skb)) +			goto out_free;  		rqstp->rq_xprt_ctxt = skb;  	} @@ -609,11 +613,16 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)  		rqstp->rq_respages = rqstp->rq_pages + 1 +  			DIV_ROUND_UP(rqstp->rq_arg.page_len, PAGE_SIZE);  	} +	rqstp->rq_next_page = rqstp->rq_respages+1;  	if (serv->sv_stats)  		serv->sv_stats->netudpcnt++;  	return len; +out_free: +	trace_kfree_skb(skb, svc_udp_recvfrom); +	skb_free_datagram_locked(svsk->sk_sk, skb); +	return 0;  }  static int @@ -675,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {  	.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,  	.xpo_has_wspace = svc_udp_has_wspace,  	.xpo_accept = svc_udp_accept, +	.xpo_secure_port = svc_sock_secure_port,  };  static struct svc_xprt_class svc_udp_class = { @@ -688,7 +698,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)  {  	int err, level, optname, one = 1; -	svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv); +	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, +		      &svsk->sk_xprt, serv);  	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);  	svsk->sk_sk->sk_data_ready = svc_udp_data_ready;  	svsk->sk_sk->sk_write_space = svc_write_space; @@ -727,9 +738,10 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)   * A data_ready event on a listening socket means there's a connection   * pending. Do not use state_change as a substitute for it.   */ -static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused) +static void svc_tcp_listen_data_ready(struct sock *sk)  {  	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data; +	wait_queue_head_t *wq;  	dprintk("svc: socket %p TCP (listen) state change %d\n",  		sk, sk->sk_state); @@ -752,8 +764,9 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)  			printk("svc: socket %p: no user data\n", sk);  	} -	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) -		wake_up_interruptible_all(sk_sleep(sk)); +	wq = sk_sleep(sk); +	if (wq && waitqueue_active(wq)) +		wake_up_interruptible_all(wq);  }  /* @@ -762,6 +775,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk, int count_unused)  static void svc_tcp_state_change(struct sock *sk)  {  	struct svc_sock	*svsk = (struct svc_sock *)sk->sk_user_data; +	wait_queue_head_t *wq = sk_sleep(sk);  	dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",  		sk, sk->sk_state, sk->sk_user_data); @@ -772,13 +786,14 @@ static void svc_tcp_state_change(struct sock *sk)  		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);  		svc_xprt_enqueue(&svsk->sk_xprt);  	} -	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) -		wake_up_interruptible_all(sk_sleep(sk)); +	if (wq && waitqueue_active(wq)) +		wake_up_interruptible_all(wq);  } -static void svc_tcp_data_ready(struct sock *sk, int count) +static void svc_tcp_data_ready(struct sock *sk)  {  	struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; +	wait_queue_head_t *wq = sk_sleep(sk);  	dprintk("svc: socket %p TCP data ready (svsk %p)\n",  		sk, sk->sk_user_data); @@ -786,8 +801,8 @@ static void svc_tcp_data_ready(struct sock *sk, int count)  		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);  		svc_xprt_enqueue(&svsk->sk_xprt);  	} -	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) -		wake_up_interruptible(sk_sleep(sk)); +	if (wq && waitqueue_active(wq)) +		wake_up_interruptible(wq);  }  /* @@ -815,18 +830,17 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)  		if (err == -ENOMEM)  			printk(KERN_WARNING "%s: no more sockets!\n",  			       serv->sv_name); -		else if (err != -EAGAIN && net_ratelimit()) -			printk(KERN_WARNING "%s: accept failed (err %d)!\n", -				   serv->sv_name, -err); +		else if (err != -EAGAIN) +			net_warn_ratelimited("%s: accept failed (err %d)!\n", +					     serv->sv_name, -err);  		return NULL;  	}  	set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags);  	err = kernel_getpeername(newsock, sin, &slen);  	if (err < 0) { -		if (net_ratelimit()) -			printk(KERN_WARNING "%s: peername failed (err %d)!\n", -				   serv->sv_name, -err); +		net_warn_ratelimited("%s: peername failed (err %d)!\n", +				     serv->sv_name, -err);  		goto failed;		/* aborted connection or whatever */  	} @@ -835,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)  	 * tell us anything.  For now just warn about unpriv connections.  	 */  	if (!svc_port_is_privileged(sin)) { -		dprintk(KERN_WARNING -			"%s: connect from unprivileged port: %s\n", +		dprintk("%s: connect from unprivileged port: %s\n",  			serv->sv_name,  			__svc_print_addr(sin, buf, sizeof(buf)));  	} @@ -848,8 +861,9 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)  	 */  	newsock->sk->sk_sndtimeo = HZ*30; -	if (!(newsvsk = svc_setup_socket(serv, newsock, &err, -				 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)))) +	newsvsk = svc_setup_socket(serv, newsock, +				 (SVC_SOCK_ANONYMOUS | SVC_SOCK_TEMPORARY)); +	if (IS_ERR(newsvsk))  		goto failed;  	svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen);  	err = kernel_getsockname(newsock, sin, &slen); @@ -859,6 +873,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)  	}  	svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); +	if (sock_is_loopback(newsock->sk)) +		set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags); +	else +		clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);  	if (serv->sv_stats)  		serv->sv_stats->nettcpconn++; @@ -869,40 +887,76 @@ failed:  	return NULL;  } +static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ +	unsigned int i, len, npages; + +	if (svsk->sk_datalen == 0) +		return 0; +	len = svsk->sk_datalen; +	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; +	for (i = 0; i < npages; i++) { +		if (rqstp->rq_pages[i] != NULL) +			put_page(rqstp->rq_pages[i]); +		BUG_ON(svsk->sk_pages[i] == NULL); +		rqstp->rq_pages[i] = svsk->sk_pages[i]; +		svsk->sk_pages[i] = NULL; +	} +	rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]); +	return len; +} + +static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +{ +	unsigned int i, len, npages; + +	if (svsk->sk_datalen == 0) +		return; +	len = svsk->sk_datalen; +	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; +	for (i = 0; i < npages; i++) { +		svsk->sk_pages[i] = rqstp->rq_pages[i]; +		rqstp->rq_pages[i] = NULL; +	} +} + +static void svc_tcp_clear_pages(struct svc_sock *svsk) +{ +	unsigned int i, len, npages; + +	if (svsk->sk_datalen == 0) +		goto out; +	len = svsk->sk_datalen; +	npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; +	for (i = 0; i < npages; i++) { +		if (svsk->sk_pages[i] == NULL) { +			WARN_ON_ONCE(1); +			continue; +		} +		put_page(svsk->sk_pages[i]); +		svsk->sk_pages[i] = NULL; +	} +out: +	svsk->sk_tcplen = 0; +	svsk->sk_datalen = 0; +} +  /* - * Receive data. + * Receive fragment record header.   * If we haven't gotten the record length yet, get the next four bytes. - * Otherwise try to gobble up as much as possible up to the complete - * record length.   */  static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)  {  	struct svc_serv	*serv = svsk->sk_xprt.xpt_server; +	unsigned int want;  	int len; -	if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags)) -		/* sndbuf needs to have room for one request -		 * per thread, otherwise we can stall even when the -		 * network isn't a bottleneck. -		 * -		 * We count all threads rather than threads in a -		 * particular pool, which provides an upper bound -		 * on the number of threads which will access the socket. -		 * -		 * rcvbuf just needs to be able to hold a few requests. -		 * Normally they will be removed from the queue -		 * as soon a a complete request arrives. -		 */ -		svc_sock_setbufsize(svsk->sk_sock, -				    (serv->sv_nrthreads+3) * serv->sv_max_mesg, -				    3 * serv->sv_max_mesg); -  	clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);  	if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { -		int		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;  		struct kvec	iov; +		want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;  		iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;  		iov.iov_len  = want;  		if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0) @@ -912,106 +966,91 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)  		if (len < want) {  			dprintk("svc: short recvfrom while reading record "  				"length (%d of %d)\n", len, want); -			goto err_again; /* record header not complete */ -		} - -		svsk->sk_reclen = ntohl(svsk->sk_reclen); -		if (!(svsk->sk_reclen & RPC_LAST_STREAM_FRAGMENT)) { -			/* FIXME: technically, a record can be fragmented, -			 *  and non-terminal fragments will not have the top -			 *  bit set in the fragment length header. -			 *  But apparently no known nfs clients send fragmented -			 *  records. */ -			if (net_ratelimit()) -				printk(KERN_NOTICE "RPC: multiple fragments " -					"per record not supported\n"); -			goto err_delete; +			return -EAGAIN;  		} -		svsk->sk_reclen &= RPC_FRAGMENT_SIZE_MASK; -		dprintk("svc: TCP record, %d bytes\n", svsk->sk_reclen); -		if (svsk->sk_reclen > serv->sv_max_mesg) { -			if (net_ratelimit()) -				printk(KERN_NOTICE "RPC: " -					"fragment too large: 0x%08lx\n", -					(unsigned long)svsk->sk_reclen); +		dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); +		if (svc_sock_reclen(svsk) + svsk->sk_datalen > +							serv->sv_max_mesg) { +			net_notice_ratelimited("RPC: fragment too large: %d\n", +					svc_sock_reclen(svsk));  			goto err_delete;  		}  	} -	/* Check whether enough data is available */ -	len = svc_recv_available(svsk); -	if (len < 0) -		goto error; - -	if (len < svsk->sk_reclen) { -		dprintk("svc: incomplete TCP record (%d of %d)\n", -			len, svsk->sk_reclen); -		goto err_again;	/* record not complete */ -	} -	len = svsk->sk_reclen; -	set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - -	return len; - error: -	if (len == -EAGAIN) -		dprintk("RPC: TCP recv_record got EAGAIN\n"); +	return svc_sock_reclen(svsk); +error: +	dprintk("RPC: TCP recv_record got %d\n", len);  	return len; - err_delete: +err_delete:  	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); - err_again:  	return -EAGAIN;  } -static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp, -			       struct rpc_rqst **reqpp, struct kvec *vec) +static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)  { +	struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;  	struct rpc_rqst *req = NULL; -	u32 *p; -	u32 xid; -	u32 calldir; -	int len; - -	len = svc_recvfrom(rqstp, vec, 1, 8); -	if (len < 0) -		goto error; +	struct kvec *src, *dst; +	__be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base; +	__be32 xid; +	__be32 calldir; -	p = (u32 *)rqstp->rq_arg.head[0].iov_base;  	xid = *p++;  	calldir = *p; -	if (calldir == 0) { -		/* REQUEST is the most common case */ -		vec[0] = rqstp->rq_arg.head[0]; -	} else { -		/* REPLY */ -		if (svsk->sk_bc_xprt) -			req = xprt_lookup_rqst(svsk->sk_bc_xprt, xid); - -		if (!req) { -			printk(KERN_NOTICE -				"%s: Got unrecognized reply: " -				"calldir 0x%x sk_bc_xprt %p xid %08x\n", -				__func__, ntohl(calldir), -				svsk->sk_bc_xprt, xid); -			vec[0] = rqstp->rq_arg.head[0]; -			goto out; -		} +	if (bc_xprt) +		req = xprt_lookup_rqst(bc_xprt, xid); -		memcpy(&req->rq_private_buf, &req->rq_rcv_buf, -		       sizeof(struct xdr_buf)); -		/* copy the xid and call direction */ -		memcpy(req->rq_private_buf.head[0].iov_base, -		       rqstp->rq_arg.head[0].iov_base, 8); -		vec[0] = req->rq_private_buf.head[0]; +	if (!req) { +		printk(KERN_NOTICE +			"%s: Got unrecognized reply: " +			"calldir 0x%x xpt_bc_xprt %p xid %08x\n", +			__func__, ntohl(calldir), +			bc_xprt, xid); +		return -EAGAIN;  	} - out: -	vec[0].iov_base += 8; -	vec[0].iov_len -= 8; -	len = svsk->sk_reclen - 8; - error: -	*reqpp = req; -	return len; + +	memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); +	/* +	 * XXX!: cheating for now!  Only copying HEAD. +	 * But we know this is good enough for now (in fact, for any +	 * callback reply in the forseeable future). +	 */ +	dst = &req->rq_private_buf.head[0]; +	src = &rqstp->rq_arg.head[0]; +	if (dst->iov_len < src->iov_len) +		return -EAGAIN; /* whatever; just giving up. */ +	memcpy(dst->iov_base, src->iov_base, src->iov_len); +	xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); +	rqstp->rq_arg.len = 0; +	return 0; +} + +static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) +{ +	int i = 0; +	int t = 0; + +	while (t < len) { +		vec[i].iov_base = page_address(pages[i]); +		vec[i].iov_len = PAGE_SIZE; +		i++; +		t += PAGE_SIZE; +	} +	return i; +} + +static void svc_tcp_fragment_received(struct svc_sock *svsk) +{ +	/* If we have more data, signal svc_xprt_enqueue() to try again */ +	if (svc_recv_available(svsk) > sizeof(rpc_fraghdr)) +		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); +	dprintk("svc: TCP %s record (%d bytes)\n", +		svc_sock_final_rec(svsk) ? "final" : "nonfinal", +		svc_sock_reclen(svsk)); +	svsk->sk_tcplen = 0; +	svsk->sk_reclen = 0;  }  /* @@ -1024,8 +1063,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)  	struct svc_serv	*serv = svsk->sk_xprt.xpt_server;  	int		len;  	struct kvec *vec; -	int pnum, vlen; -	struct rpc_rqst *req = NULL; +	unsigned int want, base; +	__be32 *p; +	__be32 calldir; +	int pnum;  	dprintk("svc: tcp_recv %p data %d conn %d close %d\n",  		svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), @@ -1036,87 +1077,82 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)  	if (len < 0)  		goto error; +	base = svc_tcp_restore_pages(svsk, rqstp); +	want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); +  	vec = rqstp->rq_vec; -	vec[0] = rqstp->rq_arg.head[0]; -	vlen = PAGE_SIZE; -	/* -	 * We have enough data for the whole tcp record. Let's try and read the -	 * first 8 bytes to get the xid and the call direction. We can use this -	 * to figure out if this is a call or a reply to a callback. If -	 * sk_reclen is < 8 (xid and calldir), then this is a malformed packet. -	 * In that case, don't bother with the calldir and just read the data. -	 * It will be rejected in svc_process. -	 */ -	if (len >= 8) { -		len = svc_process_calldir(svsk, rqstp, &req, vec); -		if (len < 0) -			goto err_again; -		vlen -= 8; -	} +	pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], +						svsk->sk_datalen + want); -	pnum = 1; -	while (vlen < len) { -		vec[pnum].iov_base = (req) ? -			page_address(req->rq_private_buf.pages[pnum - 1]) : -			page_address(rqstp->rq_pages[pnum]); -		vec[pnum].iov_len = PAGE_SIZE; -		pnum++; -		vlen += PAGE_SIZE; -	}  	rqstp->rq_respages = &rqstp->rq_pages[pnum]; +	rqstp->rq_next_page = rqstp->rq_respages + 1;  	/* Now receive data */ -	len = svc_recvfrom(rqstp, vec, pnum, len); -	if (len < 0) -		goto err_again; - -	/* -	 * Account for the 8 bytes we read earlier -	 */ -	len += 8; +	len = svc_partial_recvfrom(rqstp, vec, pnum, want, base); +	if (len >= 0) { +		svsk->sk_tcplen += len; +		svsk->sk_datalen += len; +	} +	if (len != want || !svc_sock_final_rec(svsk)) { +		svc_tcp_save_pages(svsk, rqstp); +		if (len < 0 && len != -EAGAIN) +			goto err_delete; +		if (len == want) +			svc_tcp_fragment_received(svsk); +		else +			dprintk("svc: incomplete TCP record (%d of %d)\n", +				(int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), +				svc_sock_reclen(svsk)); +		goto err_noclose; +	} -	if (req) { -		xprt_complete_rqst(req->rq_task, len); -		len = 0; -		goto out; +	if (svsk->sk_datalen < 8) { +		svsk->sk_datalen = 0; +		goto err_delete; /* client is nuts. */  	} -	dprintk("svc: TCP complete record (%d bytes)\n", len); -	rqstp->rq_arg.len = len; + +	rqstp->rq_arg.len = svsk->sk_datalen;  	rqstp->rq_arg.page_base = 0; -	if (len <= rqstp->rq_arg.head[0].iov_len) { -		rqstp->rq_arg.head[0].iov_len = len; +	if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { +		rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;  		rqstp->rq_arg.page_len = 0; -	} else { -		rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len; -	} +	} else +		rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;  	rqstp->rq_xprt_ctxt   = NULL;  	rqstp->rq_prot	      = IPPROTO_TCP; +	rqstp->rq_local	      = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags); + +	p = (__be32 *)rqstp->rq_arg.head[0].iov_base; +	calldir = p[1]; +	if (calldir) +		len = receive_cb_reply(svsk, rqstp); -out:  	/* Reset TCP read info */ -	svsk->sk_reclen = 0; -	svsk->sk_tcplen = 0; +	svsk->sk_datalen = 0; +	svc_tcp_fragment_received(svsk); + +	if (len < 0) +		goto error;  	svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);  	if (serv->sv_stats)  		serv->sv_stats->nettcpcnt++; -	return len; +	return rqstp->rq_arg.len; -err_again: -	if (len == -EAGAIN) { -		dprintk("RPC: TCP recvfrom got EAGAIN\n"); -		return len; -	}  error: -	if (len != -EAGAIN) { -		printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", -		       svsk->sk_xprt.xpt_server->sv_name, -len); -		set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); -	} -	return -EAGAIN; +	if (len != -EAGAIN) +		goto err_delete; +	dprintk("RPC: TCP recvfrom got EAGAIN\n"); +	return 0; +err_delete: +	printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", +	       svsk->sk_xprt.xpt_server->sv_name, -len); +	set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_noclose: +	return 0;	/* record not complete */  }  /* @@ -1170,7 +1206,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)  	if (test_bit(XPT_LISTENER, &xprt->xpt_flags))  		return 1;  	required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg; -	if (sk_stream_wspace(svsk->sk_sk) >= required) +	if (sk_stream_wspace(svsk->sk_sk) >= required || +	    (sk_stream_min_wspace(svsk->sk_sk) == 0 && +	     atomic_read(&xprt->xpt_reserved) == 0))  		return 1;  	set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);  	return 0; @@ -1184,6 +1222,58 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,  	return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);  } +#if defined(CONFIG_SUNRPC_BACKCHANNEL) +static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, +					     struct net *, struct sockaddr *, +					     int, int); +static void svc_bc_sock_free(struct svc_xprt *xprt); + +static struct svc_xprt *svc_bc_tcp_create(struct svc_serv *serv, +				       struct net *net, +				       struct sockaddr *sa, int salen, +				       int flags) +{ +	return svc_bc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); +} + +static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt) +{ +} + +static struct svc_xprt_ops svc_tcp_bc_ops = { +	.xpo_create = svc_bc_tcp_create, +	.xpo_detach = svc_bc_tcp_sock_detach, +	.xpo_free = svc_bc_sock_free, +	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr, +	.xpo_secure_port = svc_sock_secure_port, +}; + +static struct svc_xprt_class svc_tcp_bc_class = { +	.xcl_name = "tcp-bc", +	.xcl_owner = THIS_MODULE, +	.xcl_ops = &svc_tcp_bc_ops, +	.xcl_max_payload = RPCSVC_MAXPAYLOAD_TCP, +}; + +static void svc_init_bc_xprt_sock(void) +{ +	svc_reg_xprt_class(&svc_tcp_bc_class); +} + +static void svc_cleanup_bc_xprt_sock(void) +{ +	svc_unreg_xprt_class(&svc_tcp_bc_class); +} +#else /* CONFIG_SUNRPC_BACKCHANNEL */ +static void svc_init_bc_xprt_sock(void) +{ +} + +static void svc_cleanup_bc_xprt_sock(void) +{ +} +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ +  static struct svc_xprt_ops svc_tcp_ops = {  	.xpo_create = svc_tcp_create,  	.xpo_recvfrom = svc_tcp_recvfrom, @@ -1194,6 +1284,7 @@ static struct svc_xprt_ops svc_tcp_ops = {  	.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,  	.xpo_has_wspace = svc_tcp_has_wspace,  	.xpo_accept = svc_tcp_accept, +	.xpo_secure_port = svc_sock_secure_port,  };  static struct svc_xprt_class svc_tcp_class = { @@ -1207,19 +1298,22 @@ void svc_init_xprt_sock(void)  {  	svc_reg_xprt_class(&svc_tcp_class);  	svc_reg_xprt_class(&svc_udp_class); +	svc_init_bc_xprt_sock();  }  void svc_cleanup_xprt_sock(void)  {  	svc_unreg_xprt_class(&svc_tcp_class);  	svc_unreg_xprt_class(&svc_udp_class); +	svc_cleanup_bc_xprt_sock();  }  static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)  {  	struct sock	*sk = svsk->sk_sk; -	svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv); +	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class, +		      &svsk->sk_xprt, serv);  	set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);  	if (sk->sk_state == TCP_LISTEN) {  		dprintk("setting up TCP socket for listening\n"); @@ -1234,18 +1328,11 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)  		svsk->sk_reclen = 0;  		svsk->sk_tcplen = 0; +		svsk->sk_datalen = 0; +		memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));  		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF; -		/* initialise setting must have enough space to -		 * receive and respond to one request. -		 * svc_tcp_recvfrom will re-adjust if necessary -		 */ -		svc_sock_setbufsize(svsk->sk_sock, -				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg, -				    3 * svsk->sk_xprt.xpt_server->sv_max_mesg); - -		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);  		set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);  		if (sk->sk_state != TCP_ESTABLISHED)  			set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1263,8 +1350,6 @@ void svc_sock_update_bufs(struct svc_serv *serv)  	spin_lock_bh(&serv->sv_lock);  	list_for_each_entry(svsk, &serv->sv_permsocks, sk_xprt.xpt_list)  		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags); -	list_for_each_entry(svsk, &serv->sv_tempsocks, sk_xprt.xpt_list) -		set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);  	spin_unlock_bh(&serv->sv_lock);  }  EXPORT_SYMBOL_GPL(svc_sock_update_bufs); @@ -1275,28 +1360,29 @@ EXPORT_SYMBOL_GPL(svc_sock_update_bufs);   */  static struct svc_sock *svc_setup_socket(struct svc_serv *serv,  						struct socket *sock, -						int *errp, int flags) +						int flags)  {  	struct svc_sock	*svsk;  	struct sock	*inet;  	int		pmap_register = !(flags & SVC_SOCK_ANONYMOUS); +	int		err = 0;  	dprintk("svc: svc_setup_socket %p\n", sock); -	if (!(svsk = kzalloc(sizeof(*svsk), GFP_KERNEL))) { -		*errp = -ENOMEM; -		return NULL; -	} +	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); +	if (!svsk) +		return ERR_PTR(-ENOMEM);  	inet = sock->sk;  	/* Register socket with portmapper */ -	if (*errp >= 0 && pmap_register) -		*errp = svc_register(serv, inet->sk_family, inet->sk_protocol, +	if (pmap_register) +		err = svc_register(serv, sock_net(sock->sk), inet->sk_family, +				     inet->sk_protocol,  				     ntohs(inet_sk(inet)->inet_sport)); -	if (*errp < 0) { +	if (err < 0) {  		kfree(svsk); -		return NULL; +		return ERR_PTR(err);  	}  	inet->sk_user_data = svsk; @@ -1309,8 +1395,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,  	/* Initialize the socket */  	if (sock->type == SOCK_DGRAM)  		svc_udp_init(svsk, serv); -	else +	else { +		/* initialise setting must have enough space to +		 * receive and respond to one request. +		 */ +		svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg, +					4 * serv->sv_max_mesg);  		svc_tcp_init(svsk, serv); +	}  	dprintk("svc: svc_setup_socket created %p (inet %p)\n",  				svsk, svsk->sk_sk); @@ -1318,6 +1410,22 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,  	return svsk;  } +bool svc_alien_sock(struct net *net, int fd) +{ +	int err; +	struct socket *sock = sockfd_lookup(fd, &err); +	bool ret = false; + +	if (!sock) +		goto out; +	if (sock_net(sock->sk) != net) +		ret = true; +	sockfd_put(sock); +out: +	return ret; +} +EXPORT_SYMBOL_GPL(svc_alien_sock); +  /**   * svc_addsock - add a listener socket to an RPC service   * @serv: pointer to RPC service to which to add a new listener @@ -1335,42 +1443,38 @@ int svc_addsock(struct svc_serv *serv, const int fd, char *name_return,  	int err = 0;  	struct socket *so = sockfd_lookup(fd, &err);  	struct svc_sock *svsk = NULL; +	struct sockaddr_storage addr; +	struct sockaddr *sin = (struct sockaddr *)&addr; +	int salen;  	if (!so)  		return err; +	err = -EAFNOSUPPORT;  	if ((so->sk->sk_family != PF_INET) && (so->sk->sk_family != PF_INET6)) -		err =  -EAFNOSUPPORT; -	else if (so->sk->sk_protocol != IPPROTO_TCP && +		goto out; +	err =  -EPROTONOSUPPORT; +	if (so->sk->sk_protocol != IPPROTO_TCP &&  	    so->sk->sk_protocol != IPPROTO_UDP) -		err =  -EPROTONOSUPPORT; -	else if (so->state > SS_UNCONNECTED) -		err = -EISCONN; -	else { -		if (!try_module_get(THIS_MODULE)) -			err = -ENOENT; -		else -			svsk = svc_setup_socket(serv, so, &err, -						SVC_SOCK_DEFAULTS); -		if (svsk) { -			struct sockaddr_storage addr; -			struct sockaddr *sin = (struct sockaddr *)&addr; -			int salen; -			if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) -				svc_xprt_set_local(&svsk->sk_xprt, sin, salen); -			clear_bit(XPT_TEMP, &svsk->sk_xprt.xpt_flags); -			spin_lock_bh(&serv->sv_lock); -			list_add(&svsk->sk_xprt.xpt_list, &serv->sv_permsocks); -			spin_unlock_bh(&serv->sv_lock); -			svc_xprt_received(&svsk->sk_xprt); -			err = 0; -		} else -			module_put(THIS_MODULE); -	} -	if (err) { -		sockfd_put(so); -		return err; +		goto out; +	err = -EISCONN; +	if (so->state > SS_UNCONNECTED) +		goto out; +	err = -ENOENT; +	if (!try_module_get(THIS_MODULE)) +		goto out; +	svsk = svc_setup_socket(serv, so, SVC_SOCK_DEFAULTS); +	if (IS_ERR(svsk)) { +		module_put(THIS_MODULE); +		err = PTR_ERR(svsk); +		goto out;  	} +	if (kernel_getsockname(svsk->sk_sock, sin, &salen) == 0) +		svc_xprt_set_local(&svsk->sk_xprt, sin, salen); +	svc_add_new_perm_xprt(serv, &svsk->sk_xprt);  	return svc_one_sock_name(svsk, name_return, len); +out: +	sockfd_put(so); +	return err;  }  EXPORT_SYMBOL_GPL(svc_addsock); @@ -1433,7 +1537,7 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,  					(char *)&val, sizeof(val));  	if (type == SOCK_STREAM) -		sock->sk->sk_reuse = 1;		/* allow address reuse */ +		sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */  	error = kernel_bind(sock, sin, len);  	if (error < 0)  		goto bummer; @@ -1448,11 +1552,13 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,  			goto bummer;  	} -	if ((svsk = svc_setup_socket(serv, sock, &error, flags)) != NULL) { -		svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); -		return (struct svc_xprt *)svsk; +	svsk = svc_setup_socket(serv, sock, flags); +	if (IS_ERR(svsk)) { +		error = PTR_ERR(svsk); +		goto bummer;  	} - +	svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); +	return (struct svc_xprt *)svsk;  bummer:  	dprintk("svc: svc_create_socket error = %d\n", -error);  	sock_release(sock); @@ -1467,6 +1573,7 @@ static void svc_sock_detach(struct svc_xprt *xprt)  {  	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);  	struct sock *sk = svsk->sk_sk; +	wait_queue_head_t *wq;  	dprintk("svc: svc_sock_detach(%p)\n", svsk); @@ -1475,8 +1582,9 @@ static void svc_sock_detach(struct svc_xprt *xprt)  	sk->sk_data_ready = svsk->sk_odata;  	sk->sk_write_space = svsk->sk_owspace; -	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk))) -		wake_up_interruptible(sk_sleep(sk)); +	wq = sk_sleep(sk); +	if (wq && waitqueue_active(wq)) +		wake_up_interruptible(wq);  }  /* @@ -1490,8 +1598,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)  	svc_sock_detach(xprt); -	if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) +	if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { +		svc_tcp_clear_pages(svsk);  		kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR); +	}  }  /* @@ -1509,41 +1619,43 @@ static void svc_sock_free(struct svc_xprt *xprt)  	kfree(svsk);  } +#if defined(CONFIG_SUNRPC_BACKCHANNEL)  /* - * Create a svc_xprt. - * - * For internal use only (e.g. nfsv4.1 backchannel). - * Callers should typically use the xpo_create() method. + * Create a back channel svc_xprt which shares the fore channel socket.   */ -struct svc_xprt *svc_sock_create(struct svc_serv *serv, int prot) +static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv, +					     int protocol, +					     struct net *net, +					     struct sockaddr *sin, int len, +					     int flags)  {  	struct svc_sock *svsk; -	struct svc_xprt *xprt = NULL; +	struct svc_xprt *xprt; + +	if (protocol != IPPROTO_TCP) { +		printk(KERN_WARNING "svc: only TCP sockets" +			" supported on shared back channel\n"); +		return ERR_PTR(-EINVAL); +	} -	dprintk("svc: %s\n", __func__);  	svsk = kzalloc(sizeof(*svsk), GFP_KERNEL);  	if (!svsk) -		goto out; +		return ERR_PTR(-ENOMEM);  	xprt = &svsk->sk_xprt; -	if (prot == IPPROTO_TCP) -		svc_xprt_init(&svc_tcp_class, xprt, serv); -	else if (prot == IPPROTO_UDP) -		svc_xprt_init(&svc_udp_class, xprt, serv); -	else -		BUG(); -out: -	dprintk("svc: %s return %p\n", __func__, xprt); +	svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv); + +	serv->sv_bc_xprt = xprt; +  	return xprt;  } -EXPORT_SYMBOL_GPL(svc_sock_create);  /* - * Destroy a svc_sock. + * Free a back channel svc_sock.   */ -void svc_sock_destroy(struct svc_xprt *xprt) +static void svc_bc_sock_free(struct svc_xprt *xprt)  {  	if (xprt)  		kfree(container_of(xprt, struct svc_sock, sk_xprt));  } -EXPORT_SYMBOL_GPL(svc_sock_destroy); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */  | 
