aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/nfs-rdma.txt252
-rw-r--r--fs/Kconfig109
-rw-r--r--fs/lockd/host.c73
-rw-r--r--fs/lockd/svc.c150
-rw-r--r--fs/lockd/svclock.c6
-rw-r--r--fs/lockd/svcshare.c3
-rw-r--r--fs/nfs/callback.c93
-rw-r--r--fs/nfs/symlink.c1
-rw-r--r--fs/nfsd/auth.c1
-rw-r--r--fs/nfsd/export.c9
-rw-r--r--fs/nfsd/nfs4callback.c28
-rw-r--r--fs/nfsd/nfs4idmap.c2
-rw-r--r--fs/nfsd/nfs4state.c71
-rw-r--r--fs/nfsd/nfs4xdr.c13
-rw-r--r--fs/nfsd/nfsctl.c22
-rw-r--r--fs/nfsd/nfsfh.c228
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/vfs.c35
-rw-r--r--include/linux/exportfs.h2
-rw-r--r--include/linux/nfs3.h2
-rw-r--r--include/linux/nfsd/Kbuild4
-rw-r--r--include/linux/nfsd/cache.h2
-rw-r--r--include/linux/nfsd/nfsd.h11
-rw-r--r--include/linux/sunrpc/cache.h1
-rw-r--r--include/linux/sunrpc/gss_krb5.h6
-rw-r--r--include/linux/sunrpc/svc.h1
-rw-r--r--include/linux/sunrpc/svcauth.h5
-rw-r--r--include/linux/sunrpc/svcsock.h4
-rw-r--r--include/net/ipv6.h9
-rw-r--r--net/sunrpc/auth_gss/gss_generic_token.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c6
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c4
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c8
-rw-r--r--net/sunrpc/auth_gss/gss_spkm3_seal.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c9
-rw-r--r--net/sunrpc/cache.c1
-rw-r--r--net/sunrpc/svc.c25
-rw-r--r--net/sunrpc/svc_xprt.c30
-rw-r--r--net/sunrpc/svcauth_unix.c118
-rw-r--r--net/sunrpc/svcsock.c29
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
43 files changed, 879 insertions, 517 deletions
diff --git a/Documentation/filesystems/nfs-rdma.txt b/Documentation/filesystems/nfs-rdma.txt
new file mode 100644
index 00000000000..1ae34879574
--- /dev/null
+++ b/Documentation/filesystems/nfs-rdma.txt
@@ -0,0 +1,252 @@
+################################################################################
+# #
+# NFS/RDMA README #
+# #
+################################################################################
+
+ Author: NetApp and Open Grid Computing
+ Date: February 25, 2008
+
+Table of Contents
+~~~~~~~~~~~~~~~~~
+ - Overview
+ - Getting Help
+ - Installation
+ - Check RDMA and NFS Setup
+ - NFS/RDMA Setup
+
+Overview
+~~~~~~~~
+
+ This document describes how to install and setup the Linux NFS/RDMA client
+ and server software.
+
+ The NFS/RDMA client was first included in Linux 2.6.24. The NFS/RDMA server
+ was first included in the following release, Linux 2.6.25.
+
+ In our testing, we have obtained excellent performance results (full 10Gbit
+ wire bandwidth at minimal client CPU) under many workloads. The code passes
+ the full Connectathon test suite and operates over both Infiniband and iWARP
+ RDMA adapters.
+
+Getting Help
+~~~~~~~~~~~~
+
+ If you get stuck, you can ask questions on the
+
+ nfs-rdma-devel@lists.sourceforge.net
+
+ mailing list.
+
+Installation
+~~~~~~~~~~~~
+
+ These instructions are a step by step guide to building a machine for
+ use with NFS/RDMA.
+
+ - Install an RDMA device
+
+ Any device supported by the drivers in drivers/infiniband/hw is acceptable.
+
+ Testing has been performed using several Mellanox-based IB cards, the
+ Ammasso AMS1100 iWARP adapter, and the Chelsio cxgb3 iWARP adapter.
+
+ - Install a Linux distribution and tools
+
+ The first kernel release to contain both the NFS/RDMA client and server was
+ Linux 2.6.25 Therefore, a distribution compatible with this and subsequent
+ Linux kernel release should be installed.
+
+ The procedures described in this document have been tested with
+ distributions from Red Hat's Fedora Project (http://fedora.redhat.com/).
+
+ - Install nfs-utils-1.1.1 or greater on the client
+
+ An NFS/RDMA mount point can only be obtained by using the mount.nfs
+ command in nfs-utils-1.1.1 or greater. To see which version of mount.nfs
+ you are using, type:
+
+ > /sbin/mount.nfs -V
+
+ If the version is less than 1.1.1 or the command does not exist,
+ then you will need to install the latest version of nfs-utils.
+
+ Download the latest package from:
+
+ http://www.kernel.org/pub/linux/utils/nfs
+
+ Uncompress the package and follow the installation instructions.
+
+ If you will not be using GSS and NFSv4, the installation process
+ can be simplified by disabling these features when running configure:
+
+ > ./configure --disable-gss --disable-nfsv4
+
+ For more information on this see the package's README and INSTALL files.
+
+ After building the nfs-utils package, there will be a mount.nfs binary in
+ the utils/mount directory. This binary can be used to initiate NFS v2, v3,
+ or v4 mounts. To initiate a v4 mount, the binary must be called mount.nfs4.
+ The standard technique is to create a symlink called mount.nfs4 to mount.nfs.
+
+ NOTE: mount.nfs and therefore nfs-utils-1.1.1 or greater is only needed
+ on the NFS client machine. You do not need this specific version of
+ nfs-utils on the server. Furthermore, only the mount.nfs command from
+ nfs-utils-1.1.1 is needed on the client.
+
+ - Install a Linux kernel with NFS/RDMA
+
+ The NFS/RDMA client and server are both included in the mainline Linux
+ kernel version 2.6.25 and later. This and other versions of the 2.6 Linux
+ kernel can be found at:
+
+ ftp://ftp.kernel.org/pub/linux/kernel/v2.6/
+
+ Download the sources and place them in an appropriate location.
+
+ - Configure the RDMA stack
+
+ Make sure your kernel configuration has RDMA support enabled. Under
+ Device Drivers -> InfiniBand support, update the kernel configuration
+ to enable InfiniBand support [NOTE: the option name is misleading. Enabling
+ InfiniBand support is required for all RDMA devices (IB, iWARP, etc.)].
+
+ Enable the appropriate IB HCA support (mlx4, mthca, ehca, ipath, etc.) or
+ iWARP adapter support (amso, cxgb3, etc.).
+
+ If you are using InfiniBand, be sure to enable IP-over-InfiniBand support.
+
+ - Configure the NFS client and server
+
+ Your kernel configuration must also have NFS file system support and/or
+ NFS server support enabled. These and other NFS related configuration
+ options can be found under File Systems -> Network File Systems.
+
+ - Build, install, reboot
+
+ The NFS/RDMA code will be enabled automatically if NFS and RDMA
+ are turned on. The NFS/RDMA client and server are configured via the hidden
+ SUNRPC_XPRT_RDMA config option that depends on SUNRPC and INFINIBAND. The
+ value of SUNRPC_XPRT_RDMA will be:
+
+ - N if either SUNRPC or INFINIBAND are N, in this case the NFS/RDMA client
+ and server will not be built
+ - M if both SUNRPC and INFINIBAND are on (M or Y) and at least one is M,
+ in this case the NFS/RDMA client and server will be built as modules
+ - Y if both SUNRPC and INFINIBAND are Y, in this case the NFS/RDMA client
+ and server will be built into the kernel
+
+ Therefore, if you have followed the steps above and turned no NFS and RDMA,
+ the NFS/RDMA client and server will be built.
+
+ Build a new kernel, install it, boot it.
+
+Check RDMA and NFS Setup
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Before configuring the NFS/RDMA software, it is a good idea to test
+ your new kernel to ensure that the kernel is working correctly.
+ In particular, it is a good idea to verify that the RDMA stack
+ is functioning as expected and standard NFS over TCP/IP and/or UDP/IP
+ is working properly.
+
+ - Check RDMA Setup
+
+ If you built the RDMA components as modules, load them at
+ this time. For example, if you are using a Mellanox Tavor/Sinai/Arbel
+ card:
+
+ > modprobe ib_mthca
+ > modprobe ib_ipoib
+
+ If you are using InfiniBand, make sure there is a Subnet Manager (SM)
+ running on the network. If your IB switch has an embedded SM, you can
+ use it. Otherwise, you will need to run an SM, such as OpenSM, on one
+ of your end nodes.
+
+ If an SM is running on your network, you should see the following:
+
+ > cat /sys/class/infiniband/driverX/ports/1/state
+ 4: ACTIVE
+
+ where driverX is mthca0, ipath5, ehca3, etc.
+
+ To further test the InfiniBand software stack, use IPoIB (this
+ assumes you have two IB hosts named host1 and host2):
+
+ host1> ifconfig ib0 a.b.c.x
+ host2> ifconfig ib0 a.b.c.y
+ host1> ping a.b.c.y
+ host2> ping a.b.c.x
+
+ For other device types, follow the appropriate procedures.
+
+ - Check NFS Setup
+
+ For the NFS components enabled above (client and/or server),
+ test their functionality over standard Ethernet using TCP/IP or UDP/IP.
+
+NFS/RDMA Setup
+~~~~~~~~~~~~~~
+
+ We recommend that you use two machines, one to act as the client and
+ one to act as the server.
+
+ One time configuration:
+
+ - On the server system, configure the /etc/exports file and
+ start the NFS/RDMA server.
+
+ Exports entries with the following format have been tested:
+
+ /vol0 10.97.103.47(rw,async) 192.168.0.47(rw,async,insecure,no_root_squash)
+
+ Here the first IP address is the client's Ethernet address and the second
+ IP address is the clients IPoIB address.
+
+ Each time a machine boots:
+
+ - Load and configure the RDMA drivers
+
+ For InfiniBand using a Mellanox adapter:
+
+ > modprobe ib_mthca
+ > modprobe ib_ipoib
+ > ifconfig ib0 a.b.c.d
+
+ NOTE: use unique addresses for the client and server
+
+ - Start the NFS server
+
+ If the NFS/RDMA server was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
+ load the RDMA transport module:
+
+ > modprobe svcrdma
+
+ Regardless of how the server was built (module or built-in), start the server:
+
+ > /etc/init.d/nfs start
+
+ or
+
+ > service nfs start
+
+ Instruct the server to listen on the RDMA transport:
+
+ > echo rdma 2050 > /proc/fs/nfsd/portlist
+
+ - On the client system
+
+ If the NFS/RDMA client was built as a module (CONFIG_SUNRPC_XPRT_RDMA=m in kernel config),
+ load the RDMA client module:
+
+ > modprobe xprtrdma.ko
+
+ Regardless of how the client was built (module or built-in), issue the mount.nfs command:
+
+ > /path/to/your/mount.nfs <IPoIB-server-name-or-address>:/<export> /mnt -i -o rdma,port=2050
+
+ To verify that the mount is using RDMA, run "cat /proc/mounts" and check the
+ "proto" field for the given mount.
+
+ Congratulations! You're using NFS/RDMA!
diff --git a/fs/Kconfig b/fs/Kconfig
index 8b18a875867..56c83f40cdb 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -411,7 +411,7 @@ config JFS_STATISTICS
to be made available to the user in the /proc/fs/jfs/ directory.
config FS_POSIX_ACL
-# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs)
+# Posix ACL utility routines (for now, only ext2/ext3/jfs/reiserfs/nfs4)
#
# NOTE: you can implement Posix ACLs without these helpers (XFS does).
# Never use this symbol for ifdefs.
@@ -1694,75 +1694,80 @@ config NFSD
select LOCKD
select SUNRPC
select EXPORTFS
- select NFSD_V2_ACL if NFSD_V3_ACL
select NFS_ACL_SUPPORT if NFSD_V2_ACL
- select NFSD_TCP if NFSD_V4
- select CRYPTO_MD5 if NFSD_V4
- select CRYPTO if NFSD_V4
- select FS_POSIX_ACL if NFSD_V4
- select PROC_FS if NFSD_V4
- select PROC_FS if SUNRPC_GSS
- help
- If you want your Linux box to act as an NFS *server*, so that other
- computers on your local network which support NFS can access certain
- directories on your box transparently, you have two options: you can
- use the self-contained user space program nfsd, in which case you
- should say N here, or you can say Y and use the kernel based NFS
- server. The advantage of the kernel based solution is that it is
- faster.
-
- In either case, you will need support software; the respective
- locations are given in the file <file:Documentation/Changes> in the
- NFS section.
-
- If you say Y here, you will get support for version 2 of the NFS
- protocol (NFSv2). If you also want NFSv3, say Y to the next question
- as well.
-
- Please read the NFS-HOWTO, available from
- <http://www.tldp.org/docs.html#howto>.
-
- To compile the NFS server support as a module, choose M here: the
- module will be called nfsd. If unsure, say N.
+ help
+ Choose Y here if you want to allow other computers to access
+ files residing on this system using Sun's Network File System
+ protocol. To compile the NFS server support as a module,
+ choose M here: the module will be called nfsd.
+
+ You may choose to use a user-space NFS server instead, in which
+ case you can choose N here.
+
+ To export local file systems using NFS, you also need to install
+ user space programs which can be found in the Linux nfs-utils
+ package, available from http://linux-nfs.org/. More detail about
+ the Linux NFS server implementation is available via the
+ exports(5) man page.
+
+ Below you can choose which versions of the NFS protocol are
+ available to clients mounting the NFS server on this system.
+ Support for NFS version 2 (RFC 1094) is always available when
+ CONFIG_NFSD is selected.
+
+ If unsure, say N.
config NFSD_V2_ACL
bool
depends on NFSD
config NFSD_V3
- bool "Provide NFSv3 server support"
+ bool "NFS server support for NFS version 3"
depends on NFSD
help
- If you would like to include the NFSv3 server as well as the NFSv2
- server, say Y here. If unsure, say Y.
+ This option enables support in your system's NFS server for
+ version 3 of the NFS protocol (RFC 1813).
+
+ If unsure, say Y.
config NFSD_V3_ACL
- bool "Provide server support for the NFSv3 ACL protocol extension"
+ bool "NFS server support for the NFSv3 ACL protocol extension"
depends on NFSD_V3
+ select NFSD_V2_ACL
help
- Implement the NFSv3 ACL protocol extension for manipulating POSIX
- Access Control Lists on exported file systems. NFS clients should
- be compiled with the NFSv3 ACL protocol extension; see the
- CONFIG_NFS_V3_ACL option. If unsure, say N.
+ Solaris NFS servers support an auxiliary NFSv3 ACL protocol that
+ never became an official part of the NFS version 3 protocol.
+ This protocol extension allows applications on NFS clients to
+ manipulate POSIX Access Control Lists on files residing on NFS
+ servers. NFS servers enforce POSIX ACLs on local files whether
+ this protocol is available or not.
+
+ This option enables support in your system's NFS server for the
+ NFSv3 ACL protocol extension allowing NFS clients to manipulate
+ POSIX ACLs on files exported by your system's NFS server. NFS
+ clients which support the Solaris NFSv3 ACL protocol can then
+ access and modify ACLs on your NFS server.
+
+ To store ACLs on your NFS server, you also need to enable ACL-
+ related CONFIG options for your local file systems of choice.
+
+ If unsure, say N.
config NFSD_V4
- bool "Provide NFSv4 server support (EXPERIMENTAL)"
- depends on NFSD && NFSD_V3 && EXPERIMENTAL
+ bool "NFS server support for NFS version 4 (EXPERIMENTAL)"
+ depends on NFSD && PROC_FS && EXPERIMENTAL
+ select NFSD_V3
+ select FS_POSIX_ACL
select RPCSEC_GSS_KRB5
help
- If you would like to include the NFSv4 server as well as the NFSv2
- and NFSv3 servers, say Y here. This feature is experimental, and
- should only be used if you are interested in helping to test NFSv4.
- If unsure, say N.
+ This option enables support in your system's NFS server for
+ version 4 of the NFS protocol (RFC 3530).
-config NFSD_TCP
- bool "Provide NFS server over TCP support"
- depends on NFSD
- default y
- help
- If you want your NFS server to support TCP connections, say Y here.
- TCP connections usually perform better than the default UDP when
- the network is lossy or congested. If unsure, say Y.
+ To export files using NFSv4, you need to install additional user
+ space programs which can be found in the Linux nfs-utils package,
+ available from http://linux-nfs.org/.
+
+ If unsure, say N.
config ROOT_NFS
bool "Root file system on NFS"
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index f1ef49fff11..c7854791898 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -19,12 +19,11 @@
#define NLMDBG_FACILITY NLMDBG_HOSTCACHE
-#define NLM_HOST_MAX 64
#define NLM_HOST_NRHASH 32
#define NLM_ADDRHASH(addr) (ntohl(addr) & (NLM_HOST_NRHASH-1))
#define NLM_HOST_REBIND (60 * HZ)
-#define NLM_HOST_EXPIRE ((nrhosts > NLM_HOST_MAX)? 300 * HZ : 120 * HZ)
-#define NLM_HOST_COLLECT ((nrhosts > NLM_HOST_MAX)? 120 * HZ : 60 * HZ)
+#define NLM_HOST_EXPIRE (300 * HZ)
+#define NLM_HOST_COLLECT (120 * HZ)
static struct hlist_head nlm_hosts[NLM_HOST_NRHASH];
static unsigned long next_gc;
@@ -142,9 +141,7 @@ nlm_lookup_host(int server, const struct sockaddr_in *sin,
INIT_LIST_HEAD(&host->h_granted);
INIT_LIST_HEAD(&host->h_reclaim);
- if (++nrhosts > NLM_HOST_MAX)
- next_gc = 0;
-
+ nrhosts++;
out:
mutex_unlock(&nlm_host_mutex);
return host;
@@ -460,7 +457,7 @@ nlm_gc_hosts(void)
* Manage NSM handles
*/
static LIST_HEAD(nsm_handles);
-static DEFINE_MUTEX(nsm_mutex);
+static DEFINE_SPINLOCK(nsm_lock);
static struct nsm_handle *
__nsm_find(const struct sockaddr_in *sin,
@@ -468,7 +465,7 @@ __nsm_find(const struct sockaddr_in *sin,
int create)
{
struct nsm_handle *nsm = NULL;
- struct list_head *pos;
+ struct nsm_handle *pos;
if (!sin)
return NULL;
@@ -482,38 +479,43 @@ __nsm_find(const struct sockaddr_in *sin,
return NULL;
}
- mutex_lock(&nsm_mutex);
- list_for_each(pos, &nsm_handles) {
- nsm = list_entry(pos, struct nsm_handle, sm_link);
+retry:
+ spin_lock(&nsm_lock);
+ list_for_each_entry(pos, &nsm_handles, sm_link) {
if (hostname && nsm_use_hostnames) {
- if (strlen(nsm->sm_name) != hostname_len
- || memcmp(nsm->sm_name, hostname, hostname_len))
+ if (strlen(pos->sm_name) != hostname_len
+ || memcmp(pos->sm_name, hostname, hostname_len))
continue;
- } else if (!nlm_cmp_addr(&nsm->sm_addr, sin))
+ } else if (!nlm_cmp_addr(&pos->sm_addr, sin))
continue;
- atomic_inc(&nsm->sm_count);
- goto out;
+ atomic_inc(&pos->sm_count);
+ kfree(nsm);
+ nsm = pos;
+ goto found;
}
-
- if (!create) {
- nsm = NULL;
- goto out;
+ if (nsm) {
+ list_add(&nsm->sm_link, &nsm_handles);
+ goto found;
}
+ spin_unlock(&nsm_lock);
+
+ if (!create)
+ return NULL;
nsm = kzalloc(sizeof(*nsm) + hostname_len + 1, GFP_KERNEL);
- if (nsm != NULL) {
- nsm->sm_addr = *sin;
- nsm->sm_name = (char *) (nsm + 1);
- memcpy(nsm->sm_name, hostname, hostname_len);
- nsm->sm_name[hostname_len] = '\0';
- atomic_set(&nsm->sm_count, 1);
+ if (nsm == NULL)
+ return NULL;
- list_add(&nsm->sm_link, &nsm_handles);
- }
+ nsm->sm_addr = *sin;
+ nsm->sm_name = (char *) (nsm + 1);
+ memcpy(nsm->sm_name, hostname, hostname_len);
+ nsm->sm_name[hostname_len] = '\0';
+ atomic_set(&nsm->sm_count, 1);
+ goto retry;
-out:
- mutex_unlock(&nsm_mutex);
+found:
+ spin_unlock(&nsm_lock);
return nsm;
}
@@ -532,12 +534,9 @@ nsm_release(struct nsm_handle *nsm)
{
if (!nsm)
return;
- if (atomic_dec_and_test(&nsm->sm_count)) {
- mutex_lock(&nsm_mutex);
- if (atomic_read(&nsm->sm_count) == 0) {
- list_del(&nsm->sm_link);
- kfree(nsm);
- }
- mutex_unlock(&nsm_mutex);
+ if (atomic_dec_and_lock(&nsm->sm_count, &nsm_lock)) {
+ list_del(&nsm->sm_link);
+ spin_unlock(&nsm_lock);
+ kfree(nsm);
}
}
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 1ed8bd4de94..cf977bbcf30 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -25,6 +25,7 @@
#include <linux/smp.h>
#include <linux/smp_lock.h>
#include <linux/mutex.h>
+#include <linux/kthread.h>
#include <linux/freezer.h>
#include <linux/sunrpc/types.h>
@@ -48,14 +49,11 @@ EXPORT_SYMBOL(nlmsvc_ops);
static DEFINE_MUTEX(nlmsvc_mutex);
static unsigned int nlmsvc_users;
-static pid_t nlmsvc_pid;
+static struct task_struct *nlmsvc_task;
static struct svc_serv *nlmsvc_serv;
int nlmsvc_grace_period;
unsigned long nlmsvc_timeout;
-static DECLARE_COMPLETION(lockd_start_done);
-static DECLARE_WAIT_QUEUE_HEAD(lockd_exit);
-
/*
* These can be set at insmod time (useful for NFS as root filesystem),
* and also changed through the sysctl interface. -- Jamie Lokier, Aug 2003
@@ -111,35 +109,30 @@ static inline void clear_grace_period(void)
/*
* This is the lockd kernel thread
*/
-static void
-lockd(struct svc_rqst *rqstp)
+static int
+lockd(void *vrqstp)
{
- int err = 0;
+ int err = 0, preverr = 0;
+ struct svc_rqst *rqstp = vrqstp;
unsigned long grace_period_expire;
- /* Lock module and set up kernel thread */
- /* lockd_up is waiting for us to startup, so will
- * be holding a reference to this module, so it
- * is safe to just claim another reference
- */
- __module_get(THIS_MODULE);
- lock_kernel();
-
- /*
- * Let our maker know we're running.
- */
- nlmsvc_pid = current->pid;
- nlmsvc_serv = rqstp->rq_server;
- complete(&lockd_start_done);
-
- daemonize("lockd");
+ /* try_to_freeze() is called from svc_recv() */
set_freezable();
- /* Process request with signals blocked, but allow SIGKILL. */
+ /* Allow SIGKILL to tell lockd to drop all of its locks */
allow_signal(SIGKILL);
dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
+ /*
+ * FIXME: it would be nice if lockd didn't spend its entire life
+ * running under the BKL. At the very least, it would be good to
+ * have someone clarify what it's intended to protect here. I've
+ * seen some handwavy posts about posix locking needing to be
+ * done under the BKL, but it's far from clear.
+ */
+ lock_kernel();
+
if (!nlm_timeout)
nlm_timeout = LOCKD_DFLT_TIMEO;
nlmsvc_timeout = nlm_timeout * HZ;
@@ -148,10 +141,9 @@ lockd(struct svc_rqst *rqstp)
/*
* The main request loop. We don't terminate until the last
- * NFS mount or NFS daemon has gone away, and we've been sent a
- * signal, or else another process has taken over our job.
+ * NFS mount or NFS daemon has gone away.
*/
- while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid) {
+ while (!kthread_should_stop()) {
long timeout = MAX_SCHEDULE_TIMEOUT;
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
@@ -161,6 +153,7 @@ lockd(struct svc_rqst *rqstp)
nlmsvc_invalidate_all();
grace_period_expire = set_grace_period();
}
+ continue;
}
/*
@@ -179,14 +172,20 @@ lockd(struct svc_rqst *rqstp)
* recvfrom routine.
*/
err = svc_recv(rqstp, timeout);
- if (err == -EAGAIN || err == -EINTR)
+ if (err == -EAGAIN || err == -EINTR) {
+ preverr = err;
continue;
+ }
if (err < 0) {
- printk(KERN_WARNING
- "lockd: terminating on error %d\n",
- -err);
- break;
+ if (err != preverr) {
+ printk(KERN_WARNING "%s: unexpected error "
+ "from svc_recv (%d)\n", __func__, err);
+ preverr = err;
+ }
+ schedule_timeout_interruptible(HZ);
+ continue;
}
+ preverr = err;
dprintk("lockd: request from %s\n",
svc_print_addr(rqstp, buf, sizeof(buf)));
@@ -195,28 +194,19 @@ lockd(struct svc_rqst *rqstp)
}
flush_signals(current);
+ if (nlmsvc_ops)
+ nlmsvc_invalidate_all();
+ nlm_shutdown_hosts();
- /*
- * Check whether there's a new lockd process before
- * shutting down the hosts and clearing the slot.
- */
- if (!nlmsvc_pid || current->pid == nlmsvc_pid) {
- if (nlmsvc_ops)
- nlmsvc_invalidate_all();
- nlm_shutdown_hosts();
- nlmsvc_pid = 0;
- nlmsvc_serv = NULL;
- } else
- printk(KERN_DEBUG
- "lockd: new process, skipping host shutdown\n");
- wake_up(&lockd_exit);
+ unlock_kernel();
+
+ nlmsvc_task = NULL;
+ nlmsvc_serv = NULL;
/* Exit the RPC thread */
svc_exit_thread(rqstp);
- /* Release module */
- unlock_kernel();
- module_put_and_exit(0);
+ return 0;
}
/*
@@ -261,14 +251,15 @@ static int make_socks(struct svc_serv *serv, int proto)
int
lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
{
- struct svc_serv * serv;
- int error = 0;
+ struct svc_serv *serv;
+ struct svc_rqst *rqstp;
+ int error = 0;
mutex_lock(&nlmsvc_mutex);
/*
* Check whether we're already up and running.
*/
- if (nlmsvc_pid) {
+ if (nlmsvc_serv) {
if (proto)
error = make_socks(nlmsvc_serv, proto);
goto out;
@@ -295,13 +286,28 @@ lockd_up(int proto) /* Maybe add a 'family' option when IPv6 is supported ?? */
/*
* Create the kernel thread and wait for it to start.
*/
- error = svc_create_thread(lockd, serv);
- if (error) {
+ rqstp = svc_prepare_thread(serv, &serv->sv_pools[0]);
+ if (IS_ERR(rqstp)) {
+ error = PTR_ERR(rqstp);
+ printk(KERN_WARNING
+ "lockd_up: svc_rqst allocation failed, error=%d\n",
+ error);
+ goto destroy_and_out;
+ }
+
+ svc_sock_update_bufs(serv);
+ nlmsvc_serv = rqstp->rq_server;
+
+ nlmsvc_task = kthread_run(lockd, rqstp, serv->sv_name);
+ if (IS_ERR(nlmsvc_task)) {
+ error = PTR_ERR(nlmsvc_task);
+ nlmsvc_task = NULL;
+ nlmsvc_serv = NULL;
printk(KERN_WARNING
- "lockd_up: create thread failed, error=%d\n", error);
+ "lockd_up: kthread_run failed, error=%d\n", error);
+ svc_exit_thread(rqstp);
goto destroy_and_out;
}
- wait_for_completion(&lockd_start_done);
/*
* Note: svc_serv structures have an initial use count of 1,
@@ -323,37 +329,21 @@ EXPORT_SYMBOL(lockd_up);
void
lockd_down(void)
{
- static int warned;
-
mutex_lock(&nlmsvc_mutex);
if (nlmsvc_users) {
if (--nlmsvc_users)
goto out;
- } else
- printk(KERN_WARNING "lockd_down: no users! pid=%d\n", nlmsvc_pid);
-
- if (!nlmsvc_pid) {
- if (warned++ == 0)
- printk(KERN_WARNING "lockd_down: no lockd running.\n");
- goto out;
+ } else {
+ printk(KERN_ERR "lockd_down: no users! task=%p\n",
+ nlmsvc_task);
+ BUG();
}
- warned = 0;
- kill_proc(nlmsvc_pid, SIGKILL, 1);
- /*
- * Wait for the lockd process to exit, but since we're holding
- * the lockd semaphore, we can't wait around forever ...
- */
- clear_thread_flag(TIF_SIGPENDING);
- interruptible_sleep_on_timeout(&lockd_exit, HZ);
- if (nlmsvc_pid) {
- printk(KERN_WARNING
- "lockd_down: lockd failed to exit, clearing pid\n");
- nlmsvc_pid = 0;
+ if (!nlmsvc_task) {
+ printk(KERN_ERR "lockd_down: no lockd running.\n");
+ BUG();
}
- spin_lock_irq(&current->sighand->siglock);
- recalc_sigpending();
- spin_unlock_irq(&current->sighand->siglock);
+ kthread_stop(nlmsvc_task);
out:
mutex_unlock(&