diff options
Diffstat (limited to 'net/sunrpc/svc_xprt.c')
| -rw-r--r-- | net/sunrpc/svc_xprt.c | 810 |
1 files changed, 544 insertions, 266 deletions
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 332eb47539e..b4737fbdec1 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -6,31 +6,15 @@ #include <linux/sched.h> #include <linux/errno.h> -#include <linux/fcntl.h> -#include <linux/net.h> -#include <linux/in.h> -#include <linux/inet.h> -#include <linux/udp.h> -#include <linux/tcp.h> -#include <linux/unistd.h> -#include <linux/slab.h> -#include <linux/netdevice.h> -#include <linux/skbuff.h> -#include <linux/file.h> #include <linux/freezer.h> +#include <linux/kthread.h> +#include <linux/slab.h> #include <net/sock.h> -#include <net/checksum.h> -#include <net/ip.h> -#include <net/ipv6.h> -#include <net/tcp_states.h> -#include <linux/uaccess.h> -#include <asm/ioctls.h> - -#include <linux/sunrpc/types.h> -#include <linux/sunrpc/clnt.h> -#include <linux/sunrpc/xdr.h> #include <linux/sunrpc/stats.h> #include <linux/sunrpc/svc_xprt.h> +#include <linux/sunrpc/svcsock.h> +#include <linux/sunrpc/xprt.h> +#include <linux/module.h> #define RPCDBG_FACILITY RPCDBG_SVCXPRT @@ -38,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); static void svc_age_temp_xprts(unsigned long closure); +static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close * idle connections after 5 minutes, servers after @@ -117,16 +102,14 @@ EXPORT_SYMBOL_GPL(svc_unreg_xprt_class); */ int svc_print_xprts(char *buf, int maxlen) { - struct list_head *le; + struct svc_xprt_class *xcl; char tmpstr[80]; int len = 0; buf[0] = '\0'; spin_lock(&svc_xprt_class_lock); - list_for_each(le, &svc_xprt_class_list) { + list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { int slen; - struct svc_xprt_class *xcl = - list_entry(le, struct svc_xprt_class, xcl_list); sprintf(tmpstr, "%s %d\n", xcl->xcl_name, xcl->xcl_max_payload); slen = strlen(tmpstr); @@ -145,9 +128,12 @@ static void svc_xprt_free(struct kref *kref) struct svc_xprt *xprt = container_of(kref, struct svc_xprt, xpt_ref); struct module *owner = xprt->xpt_class->xcl_owner; - if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags) - && xprt->xpt_auth_cache != NULL) - svcauth_unix_info_release(xprt->xpt_auth_cache); + if (test_bit(XPT_CACHE_AUTH, &xprt->xpt_flags)) + svcauth_unix_info_release(xprt); + put_net(xprt->xpt_net); + /* See comment on corresponding get in xs_setup_bc_tcp(): */ + if (xprt->xpt_bc_xprt) + xprt_put(xprt->xpt_bc_xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } @@ -162,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put); * Called by transport drivers to initialize the transport independent * portion of the transport instance. */ -void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, - struct svc_serv *serv) +void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl, + struct svc_xprt *xprt, struct svc_serv *serv) { memset(xprt, 0, sizeof(*xprt)); xprt->xpt_class = xcl; @@ -173,25 +159,97 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt, INIT_LIST_HEAD(&xprt->xpt_list); INIT_LIST_HEAD(&xprt->xpt_ready); INIT_LIST_HEAD(&xprt->xpt_deferred); + INIT_LIST_HEAD(&xprt->xpt_users); mutex_init(&xprt->xpt_mutex); spin_lock_init(&xprt->xpt_lock); set_bit(XPT_BUSY, &xprt->xpt_flags); + rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending"); + xprt->xpt_net = get_net(net); } EXPORT_SYMBOL_GPL(svc_xprt_init); -int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, - int flags) +static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, + struct svc_serv *serv, + struct net *net, + const int family, + const unsigned short port, + int flags) { - struct svc_xprt_class *xcl; struct sockaddr_in sin = { .sin_family = AF_INET, .sin_addr.s_addr = htonl(INADDR_ANY), .sin_port = htons(port), }; +#if IS_ENABLED(CONFIG_IPV6) + struct sockaddr_in6 sin6 = { + .sin6_family = AF_INET6, + .sin6_addr = IN6ADDR_ANY_INIT, + .sin6_port = htons(port), + }; +#endif + struct sockaddr *sap; + size_t len; + + switch (family) { + case PF_INET: + sap = (struct sockaddr *)&sin; + len = sizeof(sin); + break; +#if IS_ENABLED(CONFIG_IPV6) + case PF_INET6: + sap = (struct sockaddr *)&sin6; + len = sizeof(sin6); + break; +#endif + default: + return ERR_PTR(-EAFNOSUPPORT); + } + + return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); +} + +/* + * svc_xprt_received conditionally queues the transport for processing + * by another thread. The caller must hold the XPT_BUSY bit and must + * not thereafter touch transport data. + * + * Note: XPT_DATA only gets cleared when a read-attempt finds no (or + * insufficient) data. + */ +static void svc_xprt_received(struct svc_xprt *xprt) +{ + WARN_ON_ONCE(!test_bit(XPT_BUSY, &xprt->xpt_flags)); + if (!test_bit(XPT_BUSY, &xprt->xpt_flags)) + return; + /* As soon as we clear busy, the xprt could be closed and + * 'put', so we need a reference to call svc_xprt_enqueue with: + */ + svc_xprt_get(xprt); + clear_bit(XPT_BUSY, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); + svc_xprt_put(xprt); +} + +void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) +{ + clear_bit(XPT_TEMP, &new->xpt_flags); + spin_lock_bh(&serv->sv_lock); + list_add(&new->xpt_list, &serv->sv_permsocks); + spin_unlock_bh(&serv->sv_lock); + svc_xprt_received(new); +} + +int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags) +{ + struct svc_xprt_class *xcl; + dprintk("svc: creating transport %s[%d]\n", xprt_name, port); spin_lock(&svc_xprt_class_lock); list_for_each_entry(xcl, &svc_xprt_class_list, xcl_list) { struct svc_xprt *newxprt; + unsigned short newport; if (strcmp(xprt_name, xcl->xcl_name)) continue; @@ -200,25 +258,22 @@ int svc_create_xprt(struct svc_serv *serv, char *xprt_name, unsigned short port, goto err; spin_unlock(&svc_xprt_class_lock); - newxprt = xcl->xcl_ops-> - xpo_create(serv, (struct sockaddr *)&sin, sizeof(sin), - flags); + newxprt = __svc_xpo_create(xcl, serv, net, family, port, flags); if (IS_ERR(newxprt)) { module_put(xcl->xcl_owner); return PTR_ERR(newxprt); } - - clear_bit(XPT_TEMP, &newxprt->xpt_flags); - spin_lock_bh(&serv->sv_lock); - list_add(&newxprt->xpt_list, &serv->sv_permsocks); - spin_unlock_bh(&serv->sv_lock); - clear_bit(XPT_BUSY, &newxprt->xpt_flags); - return svc_xprt_local_port(newxprt); + svc_add_new_perm_xprt(serv, newxprt); + newport = svc_xprt_local_port(newxprt); + return newport; } err: spin_unlock(&svc_xprt_class_lock); dprintk("svc: transport %s not found\n", xprt_name); - return -ENOENT; + + /* This errno is exposed to user space. Provide a reasonable + * perror msg for a bad transport. */ + return -EPROTONOSUPPORT; } EXPORT_SYMBOL_GPL(svc_create_xprt); @@ -227,8 +282,6 @@ EXPORT_SYMBOL_GPL(svc_create_xprt); */ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) { - struct sockaddr *sin; - memcpy(&rqstp->rq_addr, &xprt->xpt_remote, xprt->xpt_remotelen); rqstp->rq_addrlen = xprt->xpt_remotelen; @@ -236,15 +289,8 @@ void svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt) * Destination address in request is needed for binding the * source address in RPC replies/callbacks later. */ - sin = (struct sockaddr *)&xprt->xpt_local; - switch (sin->sa_family) { - case AF_INET: - rqstp->rq_daddr.addr = ((struct sockaddr_in *)sin)->sin_addr; - break; - case AF_INET6: - rqstp->rq_daddr.addr6 = ((struct sockaddr_in6 *)sin)->sin6_addr; - break; - } + memcpy(&rqstp->rq_daddr, &xprt->xpt_local, xprt->xpt_locallen); + rqstp->rq_daddrlen = xprt->xpt_locallen; } EXPORT_SYMBOL_GPL(svc_xprt_copy_addrs); @@ -280,6 +326,15 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) list_del(&rqstp->rq_list); } +static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt) +{ + if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE))) + return true; + if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) + return xprt->xpt_ops->xpo_has_wspace(xprt); + return false; +} + /* * Queue up a transport with data pending. If there are idle nfsd * processes, wake 'em up. @@ -287,15 +342,11 @@ static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp) */ void svc_xprt_enqueue(struct svc_xprt *xprt) { - struct svc_serv *serv = xprt->xpt_server; struct svc_pool *pool; struct svc_rqst *rqstp; int cpu; - if (!(xprt->xpt_flags & - ((1<<XPT_CONN)|(1<<XPT_DATA)|(1<<XPT_CLOSE)|(1<<XPT_DEFERRED)))) - return; - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (!svc_xprt_has_something_to_do(xprt)) return; cpu = get_cpu(); @@ -310,11 +361,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) "svc_xprt_enqueue: " "threads and transports both waiting??\n"); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) { - /* Don't enqueue dead transports */ - dprintk("svc: transport %p is dead, not enqueued\n", xprt); - goto out_unlock; - } + pool->sp_stats.packets++; /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY @@ -326,28 +373,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) dprintk("svc: transport %p busy, not enqueued\n", xprt); goto out_unlock; } - BUG_ON(xprt->xpt_pool != NULL); - xprt->xpt_pool = pool; - - /* Handle pending connection */ - if (test_bit(XPT_CONN, &xprt->xpt_flags)) - goto process; - - /* Handle close in-progress */ - if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) - goto process; - - /* Check if we have space to reply to a request */ - if (!xprt->xpt_ops->xpo_has_wspace(xprt)) { - /* Don't enqueue while not enough space for reply */ - dprintk("svc: no write space, transport %p not enqueued\n", - xprt); - xprt->xpt_pool = NULL; - clear_bit(XPT_BUSY, &xprt->xpt_flags); - goto out_unlock; - } - process: if (!list_empty(&pool->sp_threads)) { rqstp = list_entry(pool->sp_threads.next, struct svc_rqst, @@ -361,14 +387,12 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) rqstp, rqstp->rq_xprt); rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); - BUG_ON(xprt->xpt_pool != pool); + pool->sp_stats.threads_woken++; wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); - BUG_ON(xprt->xpt_pool != pool); + pool->sp_stats.sockets_queued++; } out_unlock: @@ -396,23 +420,6 @@ static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool) return xprt; } -/* - * svc_xprt_received conditionally queues the transport for processing - * by another thread. The caller must hold the XPT_BUSY bit and must - * not thereafter touch transport data. - * - * Note: XPT_DATA only gets cleared when a read-attempt finds no (or - * insufficient) data. - */ -void svc_xprt_received(struct svc_xprt *xprt) -{ - BUG_ON(!test_bit(XPT_BUSY, &xprt->xpt_flags)); - xprt->xpt_pool = NULL; - clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_enqueue(xprt); -} -EXPORT_SYMBOL_GPL(svc_xprt_received); - /** * svc_reserve - change the space reserved for the reply to a request. * @rqstp: The request in question @@ -435,7 +442,7 @@ void svc_reserve(struct svc_rqst *rqstp, int space) svc_xprt_enqueue(xprt); } } -EXPORT_SYMBOL(svc_reserve); +EXPORT_SYMBOL_GPL(svc_reserve); static void svc_xprt_release(struct svc_rqst *rqstp) { @@ -443,6 +450,9 @@ static void svc_xprt_release(struct svc_rqst *rqstp) rqstp->rq_xprt->xpt_ops->xpo_release_rqst(rqstp); + kfree(rqstp->rq_deferred); + rqstp->rq_deferred = NULL; + svc_free_res_pages(rqstp); rqstp->rq_res.page_len = 0; rqstp->rq_res.page_base = 0; @@ -489,11 +499,12 @@ void svc_wake_up(struct svc_serv *serv) rqstp->rq_xprt = NULL; */ wake_up(&rqstp->rq_wait); - } + } else + pool->sp_task_pending = 1; spin_unlock_bh(&pool->sp_lock); } } -EXPORT_SYMBOL(svc_wake_up); +EXPORT_SYMBOL_GPL(svc_wake_up); int svc_port_is_privileged(struct sockaddr *sin) { @@ -510,8 +521,10 @@ int svc_port_is_privileged(struct sockaddr *sin) } /* - * Make sure that we don't have too many active connections. If we - * have, something must be dropped. + * Make sure that we don't have too many active connections. If we have, + * something must be dropped. It's not clear what will happen if we allow + * "too many" connections, but when dealing with network-facing software, + * we have to code defensively. Here we do that by imposing hard limits. * * There's no point in trying to do random drop here for DoS * prevention. The NFS clients does 1 reconnect in 15 seconds. An @@ -520,20 +533,25 @@ int svc_port_is_privileged(struct sockaddr *sin) * The only somewhat efficient mechanism would be if drop old * connections from the same IP first. But right now we don't even * record the client IP in svc_sock. + * + * single-threaded services that expect a lot of clients will probably + * need to set sv_maxconn to override the default value which is based + * on the number of threads */ static void svc_check_conn_limits(struct svc_serv *serv) { - if (serv->sv_tmpcnt > (serv->sv_nrthreads+3)*20) { + unsigned int limit = serv->sv_maxconn ? serv->sv_maxconn : + (serv->sv_nrthreads+3) * 20; + + if (serv->sv_tmpcnt > limit) { struct svc_xprt *xprt = NULL; spin_lock_bh(&serv->sv_lock); if (!list_empty(&serv->sv_tempsocks)) { - if (net_ratelimit()) { - /* Try to help the admin */ - printk(KERN_NOTICE "%s: too many open " - "connections, consider increasing the " - "number of nfsd threads\n", - serv->sv_name); - } + /* Try to help the admin */ + net_notice_ratelimited("%s: too many open connections, consider increasing the %s\n", + serv->sv_name, serv->sv_maxconn ? + "max number of connections" : + "number of threads"); /* * Always select the oldest connection. It's not fair, * but so is life @@ -553,46 +571,34 @@ static void svc_check_conn_limits(struct svc_serv *serv) } } -/* - * Receive the next request on any transport. This code is carefully - * organised not to touch any cachelines in the shared svc_serv - * structure, only cachelines in the local svc_pool. - */ -int svc_recv(struct svc_rqst *rqstp, long timeout) +static int svc_alloc_arg(struct svc_rqst *rqstp) { - struct svc_xprt *xprt = NULL; - struct svc_serv *serv = rqstp->rq_server; - struct svc_pool *pool = rqstp->rq_pool; - int len, i; - int pages; - struct xdr_buf *arg; - DECLARE_WAITQUEUE(wait, current); - - dprintk("svc: server %p waiting for data (to = %ld)\n", - rqstp, timeout); - - if (rqstp->rq_xprt) - printk(KERN_ERR - "svc_recv: service %p, transport not NULL!\n", - rqstp); - if (waitqueue_active(&rqstp->rq_wait)) - printk(KERN_ERR - "svc_recv: service %p, wait queue active!\n", - rqstp); + struct svc_serv *serv = rqstp->rq_server; + struct xdr_buf *arg; + int pages; + int i; /* now allocate needed pages. If we get a failure, sleep briefly */ pages = (serv->sv_max_mesg + PAGE_SIZE) / PAGE_SIZE; + WARN_ON_ONCE(pages >= RPCSVC_MAXPAGES); + if (pages >= RPCSVC_MAXPAGES) + /* use as many pages as possible */ + pages = RPCSVC_MAXPAGES - 1; for (i = 0; i < pages ; i++) while (rqstp->rq_pages[i] == NULL) { struct page *p = alloc_page(GFP_KERNEL); if (!p) { - int j = msecs_to_jiffies(500); - schedule_timeout_uninterruptible(j); + set_current_state(TASK_INTERRUPTIBLE); + if (signalled() || kthread_should_stop()) { + set_current_state(TASK_RUNNING); + return -EINTR; + } + schedule_timeout(msecs_to_jiffies(500)); } rqstp->rq_pages[i] = p; } + rqstp->rq_page_end = &rqstp->rq_pages[i]; rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */ - BUG_ON(pages >= RPCSVC_MAXPAGES); /* Make arg->head point to first page and arg->pages point to rest */ arg = &rqstp->rq_arg; @@ -604,20 +610,39 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) arg->page_len = (pages-2)*PAGE_SIZE; arg->len = (pages-1)*PAGE_SIZE; arg->tail[0].iov_len = 0; + return 0; +} - try_to_freeze(); - cond_resched(); - if (signalled()) - return -EINTR; +static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) +{ + struct svc_xprt *xprt; + struct svc_pool *pool = rqstp->rq_pool; + DECLARE_WAITQUEUE(wait, current); + long time_left; + + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + rqstp->rq_chandle.thread_wait = 5*HZ; spin_lock_bh(&pool->sp_lock); xprt = svc_xprt_dequeue(pool); if (xprt) { rqstp->rq_xprt = xprt; svc_xprt_get(xprt); - rqstp->rq_reserved = serv->sv_max_mesg; - atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); + + /* As there is a shortage of threads and this request + * had to be queued, don't allow the thread to wait so + * long for cache updates. + */ + rqstp->rq_chandle.thread_wait = 1*HZ; + pool->sp_task_pending = 0; } else { + if (pool->sp_task_pending) { + pool->sp_task_pending = 0; + spin_unlock_bh(&pool->sp_lock); + return ERR_PTR(-EAGAIN); + } /* No data pending. Go to sleep */ svc_thread_enqueue(pool, rqstp); @@ -626,85 +651,163 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + + /* + * checking kthread_should_stop() here allows us to avoid + * locking and signalling when stopping kthreads that call + * svc_recv. If the thread has already been woken up, then + * we can exit here without sleeping. If not, then it + * it'll be woken up quickly during the schedule_timeout + */ + if (kthread_should_stop()) { + set_current_state(TASK_RUNNING); + spin_unlock_bh(&pool->sp_lock); + return ERR_PTR(-EINTR); + } + add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); - schedule_timeout(timeout); + time_left = schedule_timeout(timeout); try_to_freeze(); spin_lock_bh(&pool->sp_lock); remove_wait_queue(&rqstp->rq_wait, &wait); + if (!time_left) + pool->sp_stats.threads_timedout++; xprt = rqstp->rq_xprt; if (!xprt) { svc_thread_dequeue(pool, rqstp); spin_unlock_bh(&pool->sp_lock); dprintk("svc: server %p, no data yet\n", rqstp); - return signalled()? -EINTR : -EAGAIN; + if (signalled() || kthread_should_stop()) + return ERR_PTR(-EINTR); + else + return ERR_PTR(-EAGAIN); } } spin_unlock_bh(&pool->sp_lock); + return xprt; +} + +static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) +{ + spin_lock_bh(&serv->sv_lock); + set_bit(XPT_TEMP, &newxpt->xpt_flags); + list_add(&newxpt->xpt_list, &serv->sv_tempsocks); + serv->sv_tmpcnt++; + if (serv->sv_temptimer.function == NULL) { + /* setup timer to age temp transports */ + setup_timer(&serv->sv_temptimer, svc_age_temp_xprts, + (unsigned long)serv); + mod_timer(&serv->sv_temptimer, + jiffies + svc_conn_age_period * HZ); + } + spin_unlock_bh(&serv->sv_lock); + svc_xprt_received(newxpt); +} + +static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) +{ + struct svc_serv *serv = rqstp->rq_server; + int len = 0; - len = 0; if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { dprintk("svc_recv: found XPT_CLOSE\n"); svc_delete_xprt(xprt); - } else if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { + /* Leave XPT_BUSY set on the dead xprt: */ + return 0; + } + if (test_bit(XPT_LISTENER, &xprt->xpt_flags)) { struct svc_xprt *newxpt; + /* + * We know this module_get will succeed because the + * listener holds a reference too + */ + __module_get(xprt->xpt_class->xcl_owner); + svc_check_conn_limits(xprt->xpt_server); newxpt = xprt->xpt_ops->xpo_accept(xprt); - if (newxpt) { - /* - * We know this module_get will succeed because the - * listener holds a reference too - */ - __module_get(newxpt->xpt_class->xcl_owner); - svc_check_conn_limits(xprt->xpt_server); - spin_lock_bh(&serv->sv_lock); - set_bit(XPT_TEMP, &newxpt->xpt_flags); - list_add(&newxpt->xpt_list, &serv->sv_tempsocks); - serv->sv_tmpcnt++; - if (serv->sv_temptimer.function == NULL) { - /* setup timer to age temp transports */ - setup_timer(&serv->sv_temptimer, - svc_age_temp_xprts, - (unsigned long)serv); - mod_timer(&serv->sv_temptimer, - jiffies + svc_conn_age_period * HZ); - } - spin_unlock_bh(&serv->sv_lock); - svc_xprt_received(newxpt); - } - svc_xprt_received(xprt); - } else { + if (newxpt) + svc_add_new_temp_xprt(serv, newxpt); + else + module_put(xprt->xpt_class->xcl_owner); + } else if (xprt->xpt_ops->xpo_has_wspace(xprt)) { + /* XPT_DATA|XPT_DEFERRED case: */ dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n", - rqstp, pool->sp_id, xprt, + rqstp, rqstp->rq_pool->sp_id, xprt, atomic_read(&xprt->xpt_ref.refcount)); rqstp->rq_deferred = svc_deferred_dequeue(xprt); - if (rqstp->rq_deferred) { - svc_xprt_received(xprt); + if (rqstp->rq_deferred) len = svc_deferred_recv(rqstp); - } else + else len = xprt->xpt_ops->xpo_recvfrom(rqstp); dprintk("svc: got len=%d\n", len); + rqstp->rq_reserved = serv->sv_max_mesg; + atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); } + /* clear XPT_BUSY: */ + svc_xprt_received(xprt); + return len; +} + +/* + * Receive the next request on any transport. This code is carefully + * organised not to touch any cachelines in the shared svc_serv + * structure, only cachelines in the local svc_pool. + */ +int svc_recv(struct svc_rqst *rqstp, long timeout) +{ + struct svc_xprt *xprt = NULL; + struct svc_serv *serv = rqstp->rq_server; + int len, err; + + dprintk("svc: server %p waiting for data (to = %ld)\n", + rqstp, timeout); + + if (rqstp->rq_xprt) + printk(KERN_ERR + "svc_recv: service %p, transport not NULL!\n", + rqstp); + if (waitqueue_active(&rqstp->rq_wait)) + printk(KERN_ERR + "svc_recv: service %p, wait queue active!\n", + rqstp); + + err = svc_alloc_arg(rqstp); + if (err) + return err; + + try_to_freeze(); + cond_resched(); + if (signalled() || kthread_should_stop()) + return -EINTR; + + xprt = svc_get_next_xprt(rqstp, timeout); + if (IS_ERR(xprt)) + return PTR_ERR(xprt); + + len = svc_handle_xprt(rqstp, xprt); /* No data, incomplete (TCP) read, or accept() */ - if (len == 0 || len == -EAGAIN) { - rqstp->rq_res.len = 0; - svc_xprt_release(rqstp); - return -EAGAIN; - } + if (len <= 0) + goto out; + clear_bit(XPT_OLD, &xprt->xpt_flags); - rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp)); + rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp); rqstp->rq_chandle.defer = svc_defer; if (serv->sv_stats) serv->sv_stats->netcnt++; return len; +out: + rqstp->rq_res.len = 0; + svc_xprt_release(rqstp); + return -EAGAIN; } -EXPORT_SYMBOL(svc_recv); +EXPORT_SYMBOL_GPL(svc_recv); /* * Drop request @@ -714,7 +817,7 @@ void svc_drop(struct svc_rqst *rqstp) dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); svc_xprt_release(rqstp); } -EXPORT_SYMBOL(svc_drop); +EXPORT_SYMBOL_GPL(svc_drop); /* * Return reply to client. @@ -740,11 +843,13 @@ int svc_send(struct svc_rqst *rqstp) /* Grab mutex to serialize outgoing data. */ mutex_lock(&xprt->xpt_mutex); - if (test_bit(XPT_DEAD, &xprt->xpt_flags)) + if (test_bit(XPT_DEAD, &xprt->xpt_flags) + || test_bit(XPT_CLOSE, &xprt->xpt_flags)) len = -ENOTCONN; else len = xprt->xpt_ops->xpo_sendto(rqstp); mutex_unlock(&xprt->xpt_mutex); + rpc_wake_up(&xprt->xpt_bc_pending); svc_xprt_release(rqstp); if (len == -ECONNREFUSED || len == -ENOTCONN || len == -EAGAIN) @@ -761,7 +866,6 @@ static void svc_age_temp_xprts(unsigned long closure) struct svc_serv *serv = (struct svc_serv *)closure; struct svc_xprt *xprt; struct list_head *le, *next; - LIST_HEAD(to_be_aged); dprintk("svc_age_temp_xprts\n"); @@ -779,38 +883,46 @@ static void svc_age_temp_xprts(unsigned long closure) * through, close it. */ if (!test_and_set_bit(XPT_OLD, &xprt->xpt_flags)) continue; - if (atomic_read(&xprt->xpt_ref.refcount) > 1 - || test_bit(XPT_BUSY, &xprt->xpt_flags)) + if (atomic_read(&xprt->xpt_ref.refcount) > 1 || + test_bit(XPT_BUSY, &xprt->xpt_flags)) continue; - svc_xprt_get(xprt); - list_move(le, &to_be_aged); + list_del_init(le); set_bit(XPT_CLOSE, &xprt->xpt_flags); set_bit(XPT_DETACHED, &xprt->xpt_flags); - } - spin_unlock_bh(&serv->sv_lock); - - while (!list_empty(&to_be_aged)) { - le = to_be_aged.next; - /* fiddling the xpt_list node is safe 'cos we're XPT_DETACHED */ - list_del_init(le); - xprt = list_entry(le, struct svc_xprt, xpt_list); - dprintk("queuing xprt %p for closing\n", xprt); /* a thread will dequeue and close it soon */ svc_xprt_enqueue(xprt); - svc_xprt_put(xprt); } + spin_unlock_bh(&serv->sv_lock); mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } +static void call_xpt_users(struct svc_xprt *xprt) +{ + struct svc_xpt_user *u; + + spin_lock(&xprt->xpt_lock); + while (!list_empty(&xprt->xpt_users)) { + u = list_first_entry(&xprt->xpt_users, struct svc_xpt_user, list); + list_del(&u->list); + u->callback(u); + } + spin_unlock(&xprt->xpt_lock); +} + /* * Remove a dead transport */ -void svc_delete_xprt(struct svc_xprt *xprt) +static void svc_delete_xprt(struct svc_xprt *xprt) { struct svc_serv *serv = xprt->xpt_server; + struct svc_deferred_req *dr; + + /* Only do this once */ + if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) + BUG(); dprintk("svc: svc_delete_xprt(%p)\n", xprt); xprt->xpt_ops->xpo_detach(xprt); @@ -818,20 +930,16 @@ void svc_delete_xprt(struct svc_xprt *xprt) spin_lock_bh(&serv->sv_lock); if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags)) list_del_init(&xprt->xpt_list); - /* - * We used to delete the transport from whichever list - * it's sk_xprt.xpt_ready node was on, but we don't actually - * need to. This is because the only time we're called - * while still attached to a queue, the queue itself - * is about to be destroyed (in svc_destroy). - */ - if (!test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) { - BUG_ON(atomic_read(&xprt->xpt_ref.refcount) < 2); - if (test_bit(XPT_TEMP, &xprt->xpt_flags)) - serv->sv_tmpcnt--; - svc_xprt_put(xprt); - } + WARN_ON_ONCE(!list_empty(&xprt->xpt_ready)); + if (test_bit(XPT_TEMP, &xprt->xpt_flags)) + serv->sv_tmpcnt--; spin_unlock_bh(&serv->sv_lock); + + while ((dr = svc_deferred_dequeue(xprt)) != NULL) + kfree(dr); + + call_xpt_users(xprt); + svc_xprt_put(xprt); } void svc_close_xprt(struct svc_xprt *xprt) @@ -840,29 +948,87 @@ void svc_close_xprt(struct svc_xprt *xprt) if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ return; - - svc_xprt_get(xprt); + /* + * We expect svc_close_xprt() to work even when no threads are + * running (e.g., while configuring the server before starting + * any threads), so if the transport isn't busy, we delete + * it ourself: + */ svc_delete_xprt(xprt); - clear_bit(XPT_BUSY, &xprt->xpt_flags); - svc_xprt_put(xprt); } EXPORT_SYMBOL_GPL(svc_close_xprt); -void svc_close_all(struct list_head *xprt_list) +static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, struct net *net) { struct svc_xprt *xprt; - struct svc_xprt *tmp; + int ret = 0; - list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) { + spin_lock(&serv->sv_lock); + list_for_each_entry(xprt, xprt_list, xpt_list) { + if (xprt->xpt_net != net) + continue; + ret++; set_bit(XPT_CLOSE, &xprt->xpt_flags); - if (test_bit(XPT_BUSY, &xprt->xpt_flags)) { - /* Waiting to be processed, but no threads left, - * So just remove it from the waiting list - */ + svc_xprt_enqueue(xprt); + } + spin_unlock(&serv->sv_lock); + return ret; +} + +static struct svc_xprt *svc_dequeue_net(struct svc_serv *serv, struct net *net) +{ + struct svc_pool *pool; + struct svc_xprt *xprt; + struct svc_xprt *tmp; + int i; + + for (i = 0; i < serv->sv_nrpools; i++) { + pool = &serv->sv_pools[i]; + + spin_lock_bh(&pool->sp_lock); + list_for_each_entry_safe(xprt, tmp, &pool->sp_sockets, xpt_ready) { + if (xprt->xpt_net != net) + continue; list_del_init(&xprt->xpt_ready); - clear_bit(XPT_BUSY, &xprt->xpt_flags); + spin_unlock_bh(&pool->sp_lock); + return xprt; } - svc_close_xprt(xprt); + spin_unlock_bh(&pool->sp_lock); + } + return NULL; +} + +static void svc_clean_up_xprts(struct svc_serv *serv, struct net *net) +{ + struct svc_xprt *xprt; + + while ((xprt = svc_dequeue_net(serv, net))) { + set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_delete_xprt(xprt); + } +} + +/* + * Server threads may still be running (especially in the case where the + * service is still running in other network namespaces). + * + * So we shut down sockets the same way we would on a running server, by + * setting XPT_CLOSE, enqueuing, and letting a thread pick it up to do + * the close. In the case there are no such other threads, + * threads running, svc_clean_up_xprts() does a simple version of a + * server's main event loop, and in the case where there are other + * threads, we may need to wait a little while and then check again to + * see if they're done. + */ +void svc_close_net(struct svc_serv *serv, struct net *net) +{ + int delay = 0; + + while (svc_close_list(serv, &serv->sv_permsocks, net) + + svc_close_list(serv, &serv->sv_tempsocks, net)) { + + svc_clean_up_xprts(serv, net); + msleep(delay++); } } @@ -876,17 +1042,19 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) container_of(dreq, struct svc_deferred_req, handle); struct svc_xprt *xprt = dr->xprt; - if (too_many) { + spin_lock(&xprt->xpt_lock); + set_bit(XPT_DEFERRED, &xprt->xpt_flags); + if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) { + spin_unlock(&xprt->xpt_lock); + dprintk("revisit canceled\n"); svc_xprt_put(xprt); kfree(dr); return; } dprintk("revisit queued\n"); dr->xprt = NULL; - spin_lock(&xprt->xpt_lock); list_add(&dr->handle.recent, &xprt->xpt_deferred); spin_unlock(&xprt->xpt_lock); - set_bit(XPT_DEFERRED, &xprt->xpt_flags); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } @@ -905,7 +1073,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle); struct svc_deferred_req *dr; - if (rqstp->rq_arg.page_len) + if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral) return NULL; /* if more than a page, give up FIXME */ if (rqstp->rq_deferred) { dr = rqstp->rq_deferred; @@ -934,6 +1102,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) } svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; + rqstp->rq_dropme = true; dr->handle.revisit = svc_revisit; return &dr->handle; @@ -971,19 +1140,25 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) if (!test_bit(XPT_DEFERRED, &xprt->xpt_flags)) return NULL; spin_lock(&xprt->xpt_lock); - clear_bit(XPT_DEFERRED, &xprt->xpt_flags); if (!list_empty(&xprt->xpt_deferred)) { dr = list_entry(xprt->xpt_deferred.next, struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); - set_bit(XPT_DEFERRED, &xprt->xpt_flags); - } + } else + clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); return dr; } -/* +/** + * svc_find_xprt - find an RPC transport instance + * @serv: pointer to svc_serv to search + * @xcl_name: C string containing transport's class name + * @net: owner net pointer + * @af: Address family of transport's local address + * @port: transport's IP port number + * * Return the transport instance pointer for the endpoint accepting * connections/peer traffic from the specified transport class, * address family and port. @@ -992,23 +1167,26 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) * wild-card, and will result in matching the first transport in the * service's list that has a matching class name. */ -struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, - int af, int port) +struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name, + struct net *net, const sa_family_t af, + const unsigned short port) { struct svc_xprt *xprt; struct svc_xprt *found = NULL; /* Sanity check the args */ - if (!serv || !xcl_name) + if (serv == NULL || xcl_name == NULL) return found; spin_lock_bh(&serv->sv_lock); list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { + if (xprt->xpt_net != net) + continue; if (strcmp(xprt->xpt_class->xcl_name, xcl_name)) continue; if (af != AF_UNSPEC && af != xprt->xpt_local.ss_family) continue; - if (port && port != svc_xprt_local_port(xprt)) + if (port != 0 && port != svc_xprt_local_port(xprt)) continue; found = xprt; svc_xprt_get(xprt); @@ -1019,37 +1197,137 @@ struct svc_xprt *svc_find_xprt(struct svc_serv *serv, char *xcl_name, } EXPORT_SYMBOL_GPL(svc_find_xprt); -/* - * Format a buffer with a list of the active transports. A zero for - * the buflen parameter disables target buffer overflow checking. +static int svc_one_xprt_name(const struct svc_xprt *xprt, + char *pos, int remaining) +{ + int len; + + len = snprintf(pos, remaining, "%s %u\n", + xprt->xpt_class->xcl_name, + svc_xprt_local_port(xprt)); + if (len >= remaining) + return -ENAMETOOLONG; + return len; +} + +/** + * svc_xprt_names - format a buffer with a list of transport names + * @serv: pointer to an RPC service + * @buf: pointer to a buffer to be filled in + * @buflen: length of buffer to be filled in + * + * Fills in @buf with a string containing a list of transport names, + * each name terminated with '\n'. + * + * Returns positive length of the filled-in string on success; otherwise + * a negative errno value is returned if an error occurs. */ -int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) +int svc_xprt_names(struct svc_serv *serv, char *buf, const int buflen) { struct svc_xprt *xprt; - char xprt_str[64]; - int totlen = 0; - int len; + int len, totlen; + char *pos; /* Sanity check args */ if (!serv) return 0; spin_lock_bh(&serv->sv_lock); + + pos = buf; + totlen = 0; list_for_each_entry(xprt, &serv->sv_permsocks, xpt_list) { - len = snprintf(xprt_str, sizeof(xprt_str), - "%s %d\n", xprt->xpt_class->xcl_name, - svc_xprt_local_port(xprt)); - /* If the string was truncated, replace with error string */ - if (len >= sizeof(xprt_str)) - strcpy(xprt_str, "name-too-long\n"); - /* Don't overflow buffer */ - len = strlen(xprt_str); - if (buflen && (len + totlen >= buflen)) + len = svc_one_xprt_name(xprt, pos, buflen - totlen); + if (len < 0) { + *buf = '\0'; + totlen = len; + } + if (len <= 0) break; - strcpy(buf+totlen, xprt_str); + + pos += len; totlen += len; } + spin_unlock_bh(&serv->sv_lock); return totlen; } EXPORT_SYMBOL_GPL(svc_xprt_names); + + +/*----------------------------------------------------------------------------*/ + +static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) +{ + unsigned int pidx = (unsigned int)*pos; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); + + if (!pidx) + return SEQ_START_TOKEN; + return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); +} + +static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct svc_pool *pool = p; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_next, *pos=%llu\n", *pos); + + if (p == SEQ_START_TOKEN) { + pool = &serv->sv_pools[0]; + } else { + unsigned int pidx = (pool - &serv->sv_pools[0]); + if (pidx < serv->sv_nrpools-1) + pool = &serv->sv_pools[pidx+1]; + else + pool = NULL; + } + ++*pos; + return pool; +} + +static void svc_pool_stats_stop(struct seq_file *m, void *p) +{ +} + +static int svc_pool_stats_show(struct seq_file *m, void *p) +{ + struct svc_pool *pool = p; + + if (p == SEQ_START_TOKEN) { + seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken threads-timedout\n"); + return 0; + } + + seq_printf(m, "%u %lu %lu %lu %lu\n", + pool->sp_id, + pool->sp_stats.packets, + pool->sp_stats.sockets_queued, + pool->sp_stats.threads_woken, + pool->sp_stats.threads_timedout); + + return 0; +} + +static const struct seq_operations svc_pool_stats_seq_ops = { + .start = svc_pool_stats_start, + .next = svc_pool_stats_next, + .stop = svc_pool_stats_stop, + .show = svc_pool_stats_show, +}; + +int svc_pool_stats_open(struct svc_serv *serv, struct file *file) +{ + int err; + + err = seq_open(file, &svc_pool_stats_seq_ops); + if (!err) + ((struct seq_file *) file->private_data)->private = serv; + return err; +} +EXPORT_SYMBOL(svc_pool_stats_open); + +/*----------------------------------------------------------------------------*/ |
