diff options
Diffstat (limited to 'net/sunrpc/svc_xprt.c')
| -rw-r--r-- | net/sunrpc/svc_xprt.c | 464 | 
1 files changed, 274 insertions, 190 deletions
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index c82fe739fbd..b4737fbdec1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -5,7 +5,6 @@   */  #include <linux/sched.h> -#include <linux/smp_lock.h>  #include <linux/errno.h>  #include <linux/freezer.h>  #include <linux/kthread.h> @@ -14,6 +13,8 @@  #include <linux/sunrpc/stats.h>  #include <linux/sunrpc/svc_xprt.h>  #include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/xprt.h> +#include <linux/module.h>  #define RPCDBG_FACILITY	RPCDBG_SVCXPRT @@ -21,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);  static int svc_deferred_recv(struct svc_rqst *rqstp);  static struct cache_deferred_req *svc_defer(struct cache_req *req);  static void svc_age_temp_xprts(unsigned long closure); +static void svc_delete_xprt(struct svc_xprt *xprt);  /* apparently the "standard" is that clients close   * idle connections after 5 minutes, servers after @@ -129,6 +131,9 @@ static void svc_xprt_free(struct kref *kref)  	if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags))  		svcauth_unix_info_release(xprt);  	put_net(xprt->xpt_net); +	/* See comment on corresponding get in xs_setup_bc_tcp(): */ +	if (xprt->xpt_bc_xprt) +		xprt_put(xprt->xpt_bc_xprt);  	xprt->xpt_ops->xpo_free(xprt);  	module_put(owner);  } @@ -143,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);   * Called by transport drivers to initialize the transport independent   * portion of the transport instance.   */ -void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, -		   struct svc_serv *serv) +void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, +		   struct svc_xprt *xprt, struct svc_serv *serv)  {  	memset(xprt, 0, sizeof(*xprt));  	xprt->xpt_class = xcl; @@ -159,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,  	spin_lock_init(&xprt->xpt_lock);  	set_bit(XPT_BUSY, &xprt->xpt_flags);  	rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); -	xprt->xpt_net = get_net(&init_net); +	xprt->xpt_net = get_net(net);  }  EXPORT_SYMBOL_GPL(svc_xprt_init); @@ -175,13 +180,13 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,  		.sin_addr.s_addr	= htonl(INADDR_ANY),  		.sin_port		= htons(port),  	}; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	struct sockaddr_in6 sin6 = {  		.sin6_family		= AF_INET6,  		.sin6_addr		= IN6ADDR_ANY_INIT,  		.sin6_port		= htons(port),  	}; -#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif  	struct sockaddr *sap;  	size_t len; @@ -190,12 +195,12 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,  		sap = (struct sockaddr *)&sin;  		len = sizeof(sin);  		break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +#if IS_ENABLED(CONFIG_IPV6)  	case PF_INET6:  		sap = (struct sockaddr *)&sin6;  		len = sizeof(sin6);  		break; -#endif	/* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */ +#endif  	default:  		return ERR_PTR(-EAFNOSUPPORT);  	} @@ -203,6 +208,37 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl,  	return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags);  } +/* + * svc_xprt_received conditionally queues the transport for processing + * by another thread. The caller must hold the XPT_BUSY bit and must + * not thereafter touch transport data. + * + * Note: XPT_DATA only gets cleared when a read-attempt finds no (or + * insufficient) data. + */ +static void svc_xprt_received(struct svc_xprt *xprt) +{ +	WARN_ON_ONCE(!test_bit(XPT_BUSY, &xprt->xpt_flags)); +	if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) +		return; +	/* As soon as we clear busy, the xprt could be closed and +	 * 'put', so we need a reference to call svc_xprt_enqueue with: +	 */ +	svc_xprt_get(xprt); +	clear_bit(XPT_BUSY, &xprt->xpt_flags); +	svc_xprt_enqueue(xprt); +	svc_xprt_put(xprt); +} + +void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) +{ +	clear_bit(XPT_TEMP, &new->xpt_flags); +	spin_lock_bh(&serv->sv_lock); +	list_add(&new->xpt_list, &serv->sv_permsocks); +	spin_unlock_bh(&serv->sv_lock); +	svc_xprt_received(new); +} +  int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,  		    struct net *net, const int family,  		    const unsigned short port, int flags) @@ -213,6 +249,7 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,  	spin_lock(&svc_xprt_class_lock);  	list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) {  		struct svc_xprt *newxprt; +		unsigned short newport;  		if (strcmp(xprt_name, xcl->xcl_name))  			continue; @@ -226,13 +263,9 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name,  			module_put(xcl->xcl_owner);  			return PTR_ERR(newxprt);  		} - -		clear_bit(XPT_TEMP, &newxprt->xpt_flags); -		spin_lock_bh(&serv->sv_lock); -		list_add(&newxprt->xpt_list, &serv->sv_permsocks); -		spin_unlock_bh(&serv->sv_lock); -		clear_bit(XPT_BUSY, &newxprt->xpt_flags); -		return svc_xprt_local_port(newxprt); +		svc_add_new_perm_xprt(serv, newxprt); +		newport = svc_xprt_local_port(newxprt); +		return newport;  	}   err:  	spin_unlock(&svc_xprt_class_lock); @@ -249,8 +282,6 @@ EXPORT_SYMBOL_GPL(svc_create_xprt);   */  void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt)  { -	struct sockaddr *sin; -  	memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen);  	rqstp->rq_addrlen = xprt->xpt_remotelen; @@ -258,15 +289,8 @@ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt)  	 * Destination address in request is needed for binding the  	 * source address in RPC replies/callbacks later.  	 */ -	sin = (struct sockaddr *)&xprt->xpt_local; -	switch (sin->sa_family) { -	case AF_INET: -		rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; -		break; -	case AF_INET6: -		rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; -		break; -	} +	memcpy(&rqstp->rq_daddr, &xprt->xpt_local, xprt->xpt_locallen); +	rqstp->rq_daddrlen = xprt->xpt_locallen;  }  EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); @@ -302,6 +326,15 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)  	list_del(&rqstp->rq_list);  } +static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) +{ +	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE))) +		return true; +	if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) +		return xprt->xpt_ops->xpo_has_wspace(xprt); +	return false; +} +  /*   * Queue up a transport with data pending. If there are idle nfsd   * processes, wake 'em up. @@ -309,13 +342,11 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)   */  void svc_xprt_enqueue(struct svc_xprt *xprt)  { -	struct svc_serv	*serv = xprt->xpt_server;  	struct svc_pool *pool;  	struct svc_rqst	*rqstp;  	int cpu; -	if (!(xprt->xpt_flags & -	      ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) +	if (!svc_xprt_has_something_to_do(xprt))  		return;  	cpu = get_cpu(); @@ -342,28 +373,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)  		dprintk("svc: transport %p busy, not enqueued\n", xprt);  		goto out_unlock;  	} -	BUG_ON(xprt->xpt_pool != NULL); -	xprt->xpt_pool = pool; - -	/* Handle pending connection */ -	if (test_bit(XPT_CONN, &xprt->xpt_flags)) -		goto process; - -	/* Handle close in-progress */ -	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) -		goto process; - -	/* Check if we have space to reply to a request */ -	if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { -		/* Don't enqueue while not enough space for reply */ -		dprintk("svc: no write space, transport %p  not enqueued\n", -			xprt); -		xprt->xpt_pool = NULL; -		clear_bit(XPT_BUSY, &xprt->xpt_flags); -		goto out_unlock; -	} - process:  	if (!list_empty(&pool->sp_threads)) {  		rqstp = list_entry(pool->sp_threads.next,  				   struct svc_rqst, @@ -377,16 +387,12 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)  				rqstp, rqstp->rq_xprt);  		rqstp->rq_xprt = xprt;  		svc_xprt_get(xprt); -		rqstp->rq_reserved = serv->sv_max_mesg; -		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);  		pool->sp_stats.threads_woken++; -		BUG_ON(xprt->xpt_pool != pool);  		wake_up(&rqstp->rq_wait);  	} else {  		dprintk("svc: transport %p put into queue\n", xprt);  		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);  		pool->sp_stats.sockets_queued++; -		BUG_ON(xprt->xpt_pool != pool);  	}  out_unlock: @@ -414,23 +420,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)  	return xprt;  } -/* - * svc_xprt_received conditionally queues the transport for processing - * by another thread. The caller must hold the XPT_BUSY bit and must - * not thereafter touch transport data. - * - * Note: XPT_DATA only gets cleared when a read-attempt finds no (or - * insufficient) data. - */ -void svc_xprt_received(struct svc_xprt *xprt) -{ -	BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); -	xprt->xpt_pool = NULL; -	clear_bit(XPT_BUSY, &xprt->xpt_flags); -	svc_xprt_enqueue(xprt); -} -EXPORT_SYMBOL_GPL(svc_xprt_received); -  /**   * svc_reserve - change the space reserved for the reply to a request.   * @rqstp:  The request in question @@ -510,7 +499,8 @@ void svc_wake_up(struct svc_serv *serv)  			rqstp->rq_xprt = NULL;  			 */  			wake_up(&rqstp->rq_wait); -		} +		} else +			pool->sp_task_pending = 1;  		spin_unlock_bh(&pool->sp_lock);  	}  } @@ -557,14 +547,11 @@ static void svc_check_conn_limits(struct svc_serv *serv)  		struct svc_xprt *xprt = NULL;  		spin_lock_bh(&serv->sv_lock);  		if (!list_empty(&serv->sv_tempsocks)) { -			if (net_ratelimit()) { -				/* Try to help the admin */ -				printk(KERN_NOTICE "%s: too many open  " -				       "connections, consider increasing %s\n", -				       serv->sv_name, serv->sv_maxconn ? -				       "the max number of connections." : -				       "the number of threads."); -			} +			/* Try to help the admin */ +			net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n", +					       serv->sv_name, serv->sv_maxconn ? +					       "max number of connections" : +					       "number of threads");  			/*  			 * Always select the oldest connection. It's not fair,  			 * but so is life @@ -584,36 +571,19 @@ static void svc_check_conn_limits(struct svc_serv *serv)  	}  } -/* - * Receive the next request on any transport.  This code is carefully - * organised not to touch any cachelines in the shared svc_serv - * structure, only cachelines in the local svc_pool. - */ -int svc_recv(struct svc_rqst *rqstp, long timeout) +static int svc_alloc_arg(struct svc_rqst *rqstp)  { -	struct svc_xprt		*xprt = NULL; -	struct svc_serv		*serv = rqstp->rq_server; -	struct svc_pool		*pool = rqstp->rq_pool; -	int			len, i; -	int			pages; -	struct xdr_buf		*arg; -	DECLARE_WAITQUEUE(wait, current); -	long			time_left; - -	dprintk("svc: server %p waiting for data (to = %ld)\n", -		rqstp, timeout); - -	if (rqstp->rq_xprt) -		printk(KERN_ERR -			"svc_recv: service %p, transport not NULL!\n", -			 rqstp); -	if (waitqueue_active(&rqstp->rq_wait)) -		printk(KERN_ERR -			"svc_recv: service %p, wait queue active!\n", -			 rqstp); +	struct svc_serv *serv = rqstp->rq_server; +	struct xdr_buf *arg; +	int pages; +	int i;  	/* now allocate needed pages.  If we get a failure, sleep briefly */  	pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; +	WARN_ON_ONCE(pages >= RPCSVC_MAXPAGES); +	if (pages >= RPCSVC_MAXPAGES) +		/* use as many pages as possible */ +		pages = RPCSVC_MAXPAGES - 1;  	for (i = 0; i < pages ; i++)  		while (rqstp->rq_pages[i] == NULL) {  			struct page *p = alloc_page(GFP_KERNEL); @@ -627,8 +597,8 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)  			}  			rqstp->rq_pages[i] = p;  		} +	rqstp->rq_page_end = &rqstp->rq_pages[i];  	rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ -	BUG_ON(pages >= RPCSVC_MAXPAGES);  	/* Make arg->head point to first page and arg->pages point to rest */  	arg = &rqstp->rq_arg; @@ -640,11 +610,15 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)  	arg->page_len = (pages-2)*PAGE_SIZE;  	arg->len = (pages-1)*PAGE_SIZE;  	arg->tail[0].iov_len = 0; +	return 0; +} -	try_to_freeze(); -	cond_resched(); -	if (signalled() || kthread_should_stop()) -		return -EINTR; +static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) +{ +	struct svc_xprt *xprt; +	struct svc_pool		*pool = rqstp->rq_pool; +	DECLARE_WAITQUEUE(wait, current); +	long			time_left;  	/* Normally we will wait up to 5 seconds for any required  	 * cache information to be provided. @@ -656,15 +630,19 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)  	if (xprt) {  		rqstp->rq_xprt = xprt;  		svc_xprt_get(xprt); -		rqstp->rq_reserved = serv->sv_max_mesg; -		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);  		/* As there is a shortage of threads and this request  		 * had to be queued, don't allow the thread to wait so  		 * long for cache updates.  		 */  		rqstp->rq_chandle.thread_wait = 1*HZ; +		pool->sp_task_pending = 0;  	} else { +		if (pool->sp_task_pending) { +			pool->sp_task_pending = 0; +			spin_unlock_bh(&pool->sp_lock); +			return ERR_PTR(-EAGAIN); +		}  		/* No data pending. Go to sleep */  		svc_thread_enqueue(pool, rqstp); @@ -684,7 +662,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)  		if (kthread_should_stop()) {  			set_current_state(TASK_RUNNING);  			spin_unlock_bh(&pool->sp_lock); -			return -EINTR; +			return ERR_PTR(-EINTR);  		}  		add_wait_queue(&rqstp->rq_wait, &wait); @@ -705,72 +683,129 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)  			spin_unlock_bh(&pool->sp_lock);  			dprintk("svc: server %p, no data yet\n", rqstp);  			if (signalled() || kthread_should_stop()) -				return -EINTR; +				return ERR_PTR(-EINTR);  			else -				return -EAGAIN; +				return ERR_PTR(-EAGAIN);  		}  	}  	spin_unlock_bh(&pool->sp_lock); +	return xprt; +} + +static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) +{ +	spin_lock_bh(&serv->sv_lock); +	set_bit(XPT_TEMP, &newxpt->xpt_flags); +	list_add(&newxpt->xpt_list, &serv->sv_tempsocks); +	serv->sv_tmpcnt++; +	if (serv->sv_temptimer.function == NULL) { +		/* setup timer to age temp transports */ +		setup_timer(&serv->sv_temptimer, svc_age_temp_xprts, +			    (unsigned long)serv); +		mod_timer(&serv->sv_temptimer, +			  jiffies + svc_conn_age_period * HZ); +	} +	spin_unlock_bh(&serv->sv_lock); +	svc_xprt_received(newxpt); +} + +static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) +{ +	struct svc_serv *serv = rqstp->rq_server; +	int len = 0; -	len = 0;  	if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) {  		dprintk("svc_recv: found XPT_CLOSE\n");  		svc_delete_xprt(xprt); -	} else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { +		/* Leave XPT_BUSY set on the dead xprt: */ +		return 0; +	} +	if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) {  		struct svc_xprt *newxpt; +		/* +		 * We know this module_get will succeed because the +		 * listener holds a reference too +		 */ +		__module_get(xprt->xpt_class->xcl_owner); +		svc_check_conn_limits(xprt->xpt_server);  		newxpt = xprt->xpt_ops->xpo_accept(xprt); -		if (newxpt) { -			/* -			 * We know this module_get will succeed because the -			 * listener holds a reference too -			 */ -			__module_get(newxpt->xpt_class->xcl_owner); -			svc_check_conn_limits(xprt->xpt_server); -			spin_lock_bh(&serv->sv_lock); -			set_bit(XPT_TEMP, &newxpt->xpt_flags); -			list_add(&newxpt->xpt_list, &serv->sv_tempsocks); -			serv->sv_tmpcnt++; -			if (serv->sv_temptimer.function == NULL) { -				/* setup timer to age temp transports */ -				setup_timer(&serv->sv_temptimer, -					    svc_age_temp_xprts, -					    (unsigned long)serv); -				mod_timer(&serv->sv_temptimer, -					  jiffies + svc_conn_age_period * HZ); -			} -			spin_unlock_bh(&serv->sv_lock); -			svc_xprt_received(newxpt); -		} -		svc_xprt_received(xprt); -	} else { +		if (newxpt) +			svc_add_new_temp_xprt(serv, newxpt); +		else +			module_put(xprt->xpt_class->xcl_owner); +	} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { +		/* XPT_DATA|XPT_DEFERRED case: */  		dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", -			rqstp, pool->sp_id, xprt, +			rqstp, rqstp->rq_pool->sp_id, xprt,  			atomic_read(&xprt->xpt_ref.refcount));  		rqstp->rq_deferred = svc_deferred_dequeue(xprt); -		if (rqstp->rq_deferred) { -			svc_xprt_received(xprt); +		if (rqstp->rq_deferred)  			len = svc_deferred_recv(rqstp); -		} else { +		else  			len = xprt->xpt_ops->xpo_recvfrom(rqstp); -			svc_xprt_received(xprt); -		}  		dprintk("svc: got len=%d\n", len); +		rqstp->rq_reserved = serv->sv_max_mesg; +		atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);  	} +	/* clear XPT_BUSY: */ +	svc_xprt_received(xprt); +	return len; +} + +/* + * Receive the next request on any transport.  This code is carefully + * organised not to touch any cachelines in the shared svc_serv + * structure, only cachelines in the local svc_pool. + */ +int svc_recv(struct svc_rqst *rqstp, long timeout) +{ +	struct svc_xprt		*xprt = NULL; +	struct svc_serv		*serv = rqstp->rq_server; +	int			len, err; + +	dprintk("svc: server %p waiting for data (to = %ld)\n", +		rqstp, timeout); + +	if (rqstp->rq_xprt) +		printk(KERN_ERR +			"svc_recv: service %p, transport not NULL!\n", +			 rqstp); +	if (waitqueue_active(&rqstp->rq_wait)) +		printk(KERN_ERR +			"svc_recv: service %p, wait queue active!\n", +			 rqstp); + +	err = svc_alloc_arg(rqstp); +	if (err) +		return err; + +	try_to_freeze(); +	cond_resched(); +	if (signalled() || kthread_should_stop()) +		return -EINTR; + +	xprt = svc_get_next_xprt(rqstp, timeout); +	if (IS_ERR(xprt)) +		return PTR_ERR(xprt); + +	len = svc_handle_xprt(rqstp, xprt);  	/* No data, incomplete (TCP) read, or accept() */ -	if (len == 0 || len == -EAGAIN) { -		rqstp->rq_res.len = 0; -		svc_xprt_release(rqstp); -		return -EAGAIN; -	} +	if (len <= 0) +		goto out; +  	clear_bit(XPT_OLD, &xprt->xpt_flags); -	rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); +	rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);  	rqstp->rq_chandle.defer = svc_defer;  	if (serv->sv_stats)  		serv->sv_stats->netcnt++;  	return len; +out: +	rqstp->rq_res.len = 0; +	svc_xprt_release(rqstp); +	return -EAGAIN;  }  EXPORT_SYMBOL_GPL(svc_recv); @@ -808,7 +843,8 @@ int svc_send(struct svc_rqst *rqstp)  	/* Grab mutex to serialize outgoing data. */  	mutex_lock(&xprt->xpt_mutex); -	if (test_bit(XPT_DEAD, &xprt->xpt_flags)) +	if (test_bit(XPT_DEAD, &xprt->xpt_flags) +			|| test_bit(XPT_CLOSE, &xprt->xpt_flags))  		len = -ENOTCONN;  	else  		len = xprt->xpt_ops->xpo_sendto(rqstp); @@ -830,7 +866,6 @@ static void svc_age_temp_xprts(unsigned long closure)  	struct svc_serv *serv = (struct svc_serv *)closure;  	struct svc_xprt *xprt;  	struct list_head *le, *next; -	LIST_HEAD(to_be_aged);  	dprintk("svc_age_temp_xprts\n"); @@ -851,25 +886,15 @@ static void svc_age_temp_xprts(unsigned long closure)  		if (atomic_read(&xprt->xpt_ref.refcount) > 1 ||  		    test_bit(XPT_BUSY, &xprt->xpt_flags))  			continue; -		svc_xprt_get(xprt); -		list_move(le, &to_be_aged); +		list_del_init(le);  		set_bit(XPT_CLOSE, &xprt->xpt_flags);  		set_bit(XPT_DETACHED, &xprt->xpt_flags); -	} -	spin_unlock_bh(&serv->sv_lock); - -	while (!list_empty(&to_be_aged)) { -		le = to_be_aged.next; -		/* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ -		list_del_init(le); -		xprt = list_entry(le, struct svc_xprt, xpt_list); -  		dprintk("queuing xprt %p for closing\n", xprt);  		/* a thread will dequeue and close it soon */  		svc_xprt_enqueue(xprt); -		svc_xprt_put(xprt);  	} +	spin_unlock_bh(&serv->sv_lock);  	mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ);  } @@ -890,7 +915,7 @@ static void call_xpt_users(struct svc_xprt *xprt)  /*   * Remove a dead transport   */ -void svc_delete_xprt(struct svc_xprt *xprt) +static void svc_delete_xprt(struct svc_xprt *xprt)  {  	struct svc_serv	*serv = xprt->xpt_server;  	struct svc_deferred_req *dr; @@ -905,13 +930,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)  	spin_lock_bh(&serv->sv_lock);  	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))  		list_del_init(&xprt->xpt_list); -	/* -	 * We used to delete the transport from whichever list -	 * it's sk_xprt.xpt_ready node was on, but we don't actually -	 * need to.  This is because the only time we're called -	 * while still attached to a queue, the queue itself -	 * is about to be destroyed (in svc_destroy). -	 */ +	WARN_ON_ONCE(!list_empty(&xprt->xpt_ready));  	if (test_bit(XPT_TEMP, &xprt->xpt_flags))  		serv->sv_tmpcnt--;  	spin_unlock_bh(&serv->sv_lock); @@ -929,26 +948,87 @@ void svc_close_xprt(struct svc_xprt *xprt)  	if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags))  		/* someone else will have to effect the close */  		return; - +	/* +	 * We expect svc_close_xprt() to work even when no threads are +	 * running (e.g., while configuring the server before starting +	 * any threads), so if the transport isn't busy, we delete +	 * it ourself: +	 */  	svc_delete_xprt(xprt);  }  EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net)  {  	struct svc_xprt *xprt; -	struct svc_xprt *tmp; +	int ret = 0; -	list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { +	spin_lock(&serv->sv_lock); +	list_for_each_entry(xprt, xprt_list, xpt_list) { +		if (xprt->xpt_net != net) +			continue; +		ret++;  		set_bit(XPT_CLOSE, &xprt->xpt_flags); -		if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { -			/* Waiting to be processed, but no threads left, -			 * So just remove it from the waiting list -			 */ +		svc_xprt_enqueue(xprt); +	} +	spin_unlock(&serv->sv_lock); +	return ret; +} + +static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) +{ +	struct svc_pool *pool; +	struct svc_xprt *xprt; +	struct svc_xprt *tmp; +	int i; + +	for (i = 0; i < serv->sv_nrpools; i++) { +		pool = &serv->sv_pools[i]; + +		spin_lock_bh(&pool->sp_lock); +		list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { +			if (xprt->xpt_net != net) +				continue;  			list_del_init(&xprt->xpt_ready); -			clear_bit(XPT_BUSY, &xprt->xpt_flags); +			spin_unlock_bh(&pool->sp_lock); +			return xprt;  		} -		svc_close_xprt(xprt); +		spin_unlock_bh(&pool->sp_lock); +	} +	return NULL; +} + +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) +{ +	struct svc_xprt *xprt; + +	while ((xprt = svc_dequeue_net(serv, net))) { +		set_bit(XPT_CLOSE, &xprt->xpt_flags); +		svc_delete_xprt(xprt); +	} +} + +/* + * Server threads may still be running (especially in the case where the + * service is still running in other network namespaces). + * + * So we shut down sockets the same way we would on a running server, by + * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do + * the close.  In the case there are no such other threads, + * threads running, svc_clean_up_xprts() does a simple version of a + * server's main event loop, and in the case where there are other + * threads, we may need to wait a little while and then check again to + * see if they're done. + */ +void svc_close_net(struct svc_serv *serv, struct net *net) +{ +	int delay = 0; + +	while (svc_close_list(serv, &serv->sv_permsocks, net) + +	       svc_close_list(serv, &serv->sv_tempsocks, net)) { + +		svc_clean_up_xprts(serv, net); +		msleep(delay++);  	}  } @@ -1022,6 +1102,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)  	}  	svc_xprt_get(rqstp->rq_xprt);  	dr->xprt = rqstp->rq_xprt; +	rqstp->rq_dropme = true;  	dr->handle.revisit = svc_revisit;  	return &dr->handle; @@ -1059,14 +1140,13 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)  	if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags))  		return NULL;  	spin_lock(&xprt->xpt_lock); -	clear_bit(XPT_DEFERRED, &xprt->xpt_flags);  	if (!list_empty(&xprt->xpt_deferred)) {  		dr = list_entry(xprt->xpt_deferred.next,  				struct svc_deferred_req,  				handle.recent);  		list_del_init(&dr->handle.recent); -		set_bit(XPT_DEFERRED, &xprt->xpt_flags); -	} +	} else +		clear_bit(XPT_DEFERRED, &xprt->xpt_flags);  	spin_unlock(&xprt->xpt_lock);  	return dr;  } @@ -1075,6 +1155,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)   * svc_find_xprt - find an RPC transport instance   * @serv: pointer to svc_serv to search   * @xcl_name: C string containing transport's class name + * @net: owner net pointer   * @af: Address family of transport's local address   * @port: transport's IP port number   * @@ -1087,7 +1168,8 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)   * service's list that has a matching class name.   */  struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, -			       const sa_family_t af, const unsigned short port) +			       struct net *net, const sa_family_t af, +			       const unsigned short port)  {  	struct svc_xprt *xprt;  	struct svc_xprt *found = NULL; @@ -1098,6 +1180,8 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,  	spin_lock_bh(&serv->sv_lock);  	list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { +		if (xprt->xpt_net != net) +			continue;  		if (strcmp(xprt->xpt_class->xcl_name, xcl_name))  			continue;  		if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family)  | 
