400 files changed, 10721 insertions, 14439 deletions
diff --git a/fs/9p/Kconfig b/fs/9p/Kconfig
index 795233702a4..7e051147679 100644
--- a/fs/9p/Kconfig
+++ b/fs/9p/Kconfig
@@ -17,3 +17,16 @@ config 9P_FSCACHE
 	  Choose Y here to enable persistent, read-only local
 	  caching support for 9p clients using FS-Cache
 
+
+config 9P_FS_POSIX_ACL
+	bool "9P POSIX Access Control Lists"
+	depends on 9P_FS
+	select FS_POSIX_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
diff --git a/fs/9p/Makefile b/fs/9p/Makefile
index 91fba025fcb..f8ba37effd1 100644
--- a/fs/9p/Makefile
+++ b/fs/9p/Makefile
@@ -13,3 +13,4 @@ obj-$(CONFIG_9P_FS) := 9p.o
 	xattr_user.o
 
 9p-$(CONFIG_9P_FSCACHE) += cache.o
+9p-$(CONFIG_9P_FS_POSIX_ACL) += acl.o
diff --git a/fs/9p/acl.c b/fs/9p/acl.c
new file mode 100644
index 00000000000..12d602351db
--- /dev/null
+++ b/fs/9p/acl.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/posix_acl_xattr.h>
+#include "xattr.h"
+#include "acl.h"
+#include "v9fs_vfs.h"
+#include "v9fs.h"
+
+static struct posix_acl *__v9fs_get_acl(struct p9_fid *fid, char *name)
+{
+	ssize_t size;
+	void *value = NULL;
+	struct posix_acl *acl = NULL;;
+
+	size = v9fs_fid_xattr_get(fid, name, NULL, 0);
+	if (size > 0) {
+		value = kzalloc(size, GFP_NOFS);
+		if (!value)
+			return ERR_PTR(-ENOMEM);
+		size = v9fs_fid_xattr_get(fid, name, value, size);
+		if (size > 0) {
+			acl = posix_acl_from_xattr(value, size);
+			if (IS_ERR(acl))
+				goto err_out;
+		}
+	} else if (size == -ENODATA || size == 0 ||
+		   size == -ENOSYS || size == -EOPNOTSUPP) {
+		acl = NULL;
+	} else
+		acl = ERR_PTR(-EIO);
+
+err_out:
+	kfree(value);
+	return acl;
+}
+
+int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
+{
+	int retval = 0;
+	struct posix_acl *pacl, *dacl;
+	struct v9fs_session_info *v9ses;
+
+	v9ses = v9fs_inode2v9ses(inode);
+	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+		set_cached_acl(inode, ACL_TYPE_DEFAULT, NULL);
+		set_cached_acl(inode, ACL_TYPE_ACCESS, NULL);
+		return 0;
+	}
+	/* get the default/access acl values and cache them */
+	dacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_DEFAULT);
+	pacl = __v9fs_get_acl(fid, POSIX_ACL_XATTR_ACCESS);
+
+	if (!IS_ERR(dacl) && !IS_ERR(pacl)) {
+		set_cached_acl(inode, ACL_TYPE_DEFAULT, dacl);
+		set_cached_acl(inode, ACL_TYPE_ACCESS, pacl);
+		posix_acl_release(dacl);
+		posix_acl_release(pacl);
+	} else
+		retval = -EIO;
+
+	return retval;
+}
+
+static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type)
+{
+	struct posix_acl *acl;
+	/*
+	 * 9p Always cache the acl value when
+	 * instantiating the inode (v9fs_inode_from_fid)
+	 */
+	acl = get_cached_acl(inode, type);
+	BUG_ON(acl == ACL_NOT_CACHED);
+	return acl;
+}
+
+int v9fs_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl;
+	struct v9fs_session_info *v9ses;
+
+	v9ses = v9fs_inode2v9ses(inode);
+	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) {
+		/*
+		 * On access = client mode get the acl
+		 * values from the server
+		 */
+		return 0;
+	}
+	acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+	return -EAGAIN;
+}
+
+static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl)
+{
+	int retval;
+	char *name;
+	size_t size;
+	void *buffer;
+	struct inode *inode = dentry->d_inode;
+
+	set_cached_acl(inode, type, acl);
+	/* Set a setxattr request to server */
+	size = posix_acl_xattr_size(acl->a_count);
+	buffer = kmalloc(size, GFP_KERNEL);
+	if (!buffer)
+		return -ENOMEM;
+	retval = posix_acl_to_xattr(acl, buffer, size);
+	if (retval < 0)
+		goto err_free_out;
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		BUG();
+	}
+	retval = v9fs_xattr_set(dentry, name, buffer, size, 0);
+err_free_out:
+	kfree(buffer);
+	return retval;
+}
+
+int v9fs_acl_chmod(struct dentry *dentry)
+{
+	int retval = 0;
+	struct posix_acl *acl, *clone;
+	struct inode *inode = dentry->d_inode;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+	acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS);
+	if (acl) {
+		clone = posix_acl_clone(acl, GFP_KERNEL);
+		posix_acl_release(acl);
+		if (!clone)
+			return -ENOMEM;
+		retval = posix_acl_chmod_masq(clone, inode->i_mode);
+		if (!retval)
+			retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, clone);
+		posix_acl_release(clone);
+	}
+	return retval;
+}
+
+int v9fs_set_create_acl(struct dentry *dentry,
+			struct posix_acl *dpacl, struct posix_acl *pacl)
+{
+	if (dpacl)
+		v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl);
+	if (pacl)
+		v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl);
+	posix_acl_release(dpacl);
+	posix_acl_release(pacl);
+	return 0;
+}
+
+int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+		  struct posix_acl **dpacl, struct posix_acl **pacl)
+{
+	int retval = 0;
+	mode_t mode = *modep;
+	struct posix_acl *acl = NULL;
+
+	if (!S_ISLNK(mode)) {
+		acl = v9fs_get_cached_acl(dir, ACL_TYPE_DEFAULT);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		if (!acl)
+			mode &= ~current_umask();
+	}
+	if (acl) {
+		struct posix_acl *clone;
+
+		if (S_ISDIR(mode))
+			*dpacl = acl;
+		clone = posix_acl_clone(acl, GFP_NOFS);
+		retval = -ENOMEM;
+		if (!clone)
+			goto cleanup;
+
+		retval = posix_acl_create_masq(clone, &mode);
+		if (retval < 0) {
+			posix_acl_release(clone);
+			goto cleanup;
+		}
+		if (retval > 0)
+			*pacl = clone;
+	}
+	*modep  = mode;
+	return 0;
+cleanup:
+	posix_acl_release(acl);
+	return retval;
+
+}
+
+static int v9fs_remote_get_acl(struct dentry *dentry, const char *name,
+			       void *buffer, size_t size, int type)
+{
+	char *full_name;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		full_name =  POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		full_name = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		BUG();
+	}
+	return v9fs_xattr_get(dentry, full_name, buffer, size);
+}
+
+static int v9fs_xattr_get_acl(struct dentry *dentry, const char *name,
+			      void *buffer, size_t size, int type)
+{
+	struct v9fs_session_info *v9ses;
+	struct posix_acl *acl;
+	int error;
+
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	/*
+	 * We allow set/get/list of acl when access=client is not specified
+	 */
+	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
+		return v9fs_remote_get_acl(dentry, name, buffer, size, type);
+
+	acl = v9fs_get_cached_acl(dentry->d_inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+	error = posix_acl_to_xattr(acl, buffer, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+static int v9fs_remote_set_acl(struct dentry *dentry, const char *name,
+			      const void *value, size_t size,
+			      int flags, int type)
+{
+	char *full_name;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		full_name =  POSIX_ACL_XATTR_ACCESS;
+		break;
+	case ACL_TYPE_DEFAULT:
+		full_name = POSIX_ACL_XATTR_DEFAULT;
+		break;
+	default:
+		BUG();
+	}
+	return v9fs_xattr_set(dentry, full_name, value, size, flags);
+}
+
+
+static int v9fs_xattr_set_acl(struct dentry *dentry, const char *name,
+			      const void *value, size_t size,
+			      int flags, int type)
+{
+	int retval;
+	struct posix_acl *acl;
+	struct v9fs_session_info *v9ses;
+	struct inode *inode = dentry->d_inode;
+
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+
+	v9ses = v9fs_inode2v9ses(dentry->d_inode);
+	/*
+	 * set the attribute on the remote. Without even looking at the
+	 * xattr value. We leave it to the server to validate
+	 */
+	if ((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT)
+		return v9fs_remote_set_acl(dentry, name,
+					   value, size, flags, type);
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+	if (!is_owner_or_cap(inode))
+		return -EPERM;
+	if (value) {
+		/* update the cached acl value */
+		acl = posix_acl_from_xattr(value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		else if (acl) {
+			retval = posix_acl_valid(acl);
+			if (retval)
+				goto err_out;
+		}
+	} else
+		acl = NULL;
+
+	switch (type) {
+	case ACL_TYPE_ACCESS:
+		name = POSIX_ACL_XATTR_ACCESS;
+		if (acl) {
+			mode_t mode = inode->i_mode;
+			retval = posix_acl_equiv_mode(acl, &mode);
+			if (retval < 0)
+				goto err_out;
+			else {
+				struct iattr iattr;
+				if (retval == 0) {
+					/*
+					 * ACL can be represented
+					 * by the mode bits. So don't
+					 * update ACL.
+					 */
+					acl = NULL;
+					value = NULL;
+					size = 0;
+				}
+				/* Updte the mode bits */
+				iattr.ia_mode = ((mode & S_IALLUGO) |
+						 (inode->i_mode & ~S_IALLUGO));
+				iattr.ia_valid = ATTR_MODE;
+				/* FIXME should we update ctime ?
+				 * What is the following setxattr update the
+				 * mode ?
+				 */
+				v9fs_vfs_setattr_dotl(dentry, &iattr);
+			}
+		}
+		break;
+	case ACL_TYPE_DEFAULT:
+		name = POSIX_ACL_XATTR_DEFAULT;
+		if (!S_ISDIR(inode->i_mode)) {
+			retval = -EINVAL;
+			goto err_out;
+		}
+		break;
+	default:
+		BUG();
+	}
+	retval = v9fs_xattr_set(dentry, name, value, size, flags);
+	if (!retval)
+		set_cached_acl(inode, type, acl);
+err_out:
+	posix_acl_release(acl);
+	return retval;
+}
+
+const struct xattr_handler v9fs_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.flags	= ACL_TYPE_ACCESS,
+	.get	= v9fs_xattr_get_acl,
+	.set	= v9fs_xattr_set_acl,
+};
+
+const struct xattr_handler v9fs_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.flags	= ACL_TYPE_DEFAULT,
+	.get	= v9fs_xattr_get_acl,
+	.set	= v9fs_xattr_set_acl,
+};
diff --git a/fs/9p/acl.h b/fs/9p/acl.h
new file mode 100644
index 00000000000..59e18c2e8c7
--- /dev/null
+++ b/fs/9p/acl.h
@@ -0,0 +1,49 @@
+/*
+ * Copyright IBM Corporation, 2010
+ * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+ *
+ */
+#ifndef FS_9P_ACL_H
+#define FS_9P_ACL_H
+
+#ifdef CONFIG_9P_FS_POSIX_ACL
+extern int v9fs_get_acl(struct inode *, struct p9_fid *);
+extern int v9fs_check_acl(struct inode *inode, int mask);
+extern int v9fs_acl_chmod(struct dentry *);
+extern int v9fs_set_create_acl(struct dentry *,
+			       struct posix_acl *, struct posix_acl *);
+extern int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+			 struct posix_acl **dpacl, struct posix_acl **pacl);
+#else
+#define v9fs_check_acl NULL
+static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid)
+{
+	return 0;
+}
+static inline int v9fs_acl_chmod(struct dentry *dentry)
+{
+	return 0;
+}
+static inline int v9fs_set_create_acl(struct dentry *dentry,
+				      struct posix_acl *dpacl,
+				      struct posix_acl *pacl)
+{
+	return 0;
+}
+static inline int v9fs_acl_mode(struct inode *dir, mode_t *modep,
+				struct posix_acl **dpacl,
+				struct posix_acl **pacl)
+{
+	return 0;
+}
+
+#endif
+#endif /* FS_9P_XATTR_H */
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 6406f896bf9..b00223c99d7 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -149,6 +149,7 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 	switch (access) {
 	case V9FS_ACCESS_SINGLE:
 	case V9FS_ACCESS_USER:
+	case V9FS_ACCESS_CLIENT:
 		uid = current_fsuid();
 		any = 0;
 		break;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 38dc0e06759..2f77cd33ba8 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -193,7 +193,17 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
 				v9ses->flags |= V9FS_ACCESS_USER;
 			else if (strcmp(s, "any") == 0)
 				v9ses->flags |= V9FS_ACCESS_ANY;
-			else {
+			else if (strcmp(s, "client") == 0) {
+#ifdef CONFIG_9P_FS_POSIX_ACL
+				v9ses->flags |= V9FS_ACCESS_CLIENT;
+#else
+				P9_DPRINTK(P9_DEBUG_ERROR,
+					"access=client option not supported\n");
+				kfree(s);
+				ret = -EINVAL;
+				goto free_and_return;
+#endif
+			} else {
 				v9ses->flags |= V9FS_ACCESS_SINGLE;
 				v9ses->uid = simple_strtoul(s, &e, 10);
 				if (*e != '\0')
@@ -278,6 +288,16 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 
 	v9ses->maxdata = v9ses->clnt->msize - P9_IOHDRSZ;
 
+	if (!v9fs_proto_dotl(v9ses) &&
+	    ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)) {
+		/*
+		 * We support ACCESS_CLIENT only for dotl.
+		 * Fall back to ACCESS_USER
+		 */
+		v9ses->flags &= ~V9FS_ACCESS_MASK;
+		v9ses->flags |= V9FS_ACCESS_USER;
+	}
+	/*FIXME !! */
 	/* for legacy mode, fall back to V9FS_ACCESS_ANY */
 	if (!(v9fs_proto_dotu(v9ses) || v9fs_proto_dotl(v9ses)) &&
 		((v9ses->flags&V9FS_ACCESS_MASK) == V9FS_ACCESS_USER)) {
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 4c963c9fc41..cb6396855e2 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -33,13 +33,17 @@
  *
  * Session flags reflect options selected by users at mount time
  */
+#define	V9FS_ACCESS_ANY (V9FS_ACCESS_SINGLE | \
+			 V9FS_ACCESS_USER |   \
+			 V9FS_ACCESS_CLIENT)
+#define V9FS_ACCESS_MASK V9FS_ACCESS_ANY
+
 enum p9_session_flags {
 	V9FS_PROTO_2000U	= 0x01,
 	V9FS_PROTO_2000L	= 0x02,
 	V9FS_ACCESS_SINGLE	= 0x04,
 	V9FS_ACCESS_USER	= 0x08,
-	V9FS_ACCESS_ANY		= 0x0C,
-	V9FS_ACCESS_MASK	= 0x0C,
+	V9FS_ACCESS_CLIENT	= 0x10
 };
 
 /* possible values of ->cache */
@@ -113,8 +117,6 @@ void v9fs_session_close(struct v9fs_session_info *v9ses);
 void v9fs_session_cancel(struct v9fs_session_info *v9ses);
 void v9fs_session_begin_cancel(struct v9fs_session_info *v9ses);
 
-#define V9FS_MAGIC 0x01021997
-
 /* other default globals */
 #define V9FS_PORT	564
 #define V9FS_DEFUSER	"nobody"
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 88418c419ea..bab0eac873f 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -64,3 +64,7 @@ int v9fs_uflags2omode(int uflags, int extended);
 
 ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
 void v9fs_blank_wstat(struct p9_wstat *wstat);
+int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
+int v9fs_file_fsync_dotl(struct file *filp, int datasync);
+
+#define P9_LOCK_TIMEOUT (30*HZ)
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 90e38449f4b..b7f2a8e3863 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -154,10 +154,40 @@ static int v9fs_launder_page(struct page *page)
 	return 0;
 }
 
+/**
+ * v9fs_direct_IO - 9P address space operation for direct I/O
+ * @rw: direction (read or write)
+ * @iocb: target I/O control block
+ * @iov: array of vectors that define I/O buffer
+ * @pos: offset in file to begin the operation
+ * @nr_segs: size of iovec array
+ *
+ * The presence of v9fs_direct_IO() in the address space ops vector
+ * allowes open() O_DIRECT flags which would have failed otherwise.
+ *
+ * In the non-cached mode, we shunt off direct read and write requests before
+ * the VFS gets them, so this method should never be called.
+ *
+ * Direct IO is not 'yet' supported in the cached mode. Hence when
+ * this routine is called through generic_file_aio_read(), the read/write fails
+ * with an error.
+ *
+ */
+ssize_t v9fs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
+		loff_t pos, unsigned long nr_segs)
+{
+	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_direct_IO: v9fs_direct_IO (%s) "
+			"off/no(%lld/%lu) EINVAL\n",
+			iocb->ki_filp->f_path.dentry->d_name.name,
+			(long long) pos, nr_segs);
+
+	return -EINVAL;
+}
 const struct address_space_operations v9fs_addr_operations = {
       .readpage = v9fs_vfs_readpage,
       .readpages = v9fs_vfs_readpages,
       .releasepage = v9fs_release_page,
       .invalidatepage = v9fs_invalidate_page,
       .launder_page = v9fs_launder_page,
+      .direct_IO = v9fs_direct_IO,
 };
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 899f168fd19..b84ebe8cefe 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -242,7 +242,8 @@ static int v9fs_dir_readdir_dotl(struct file *filp, void *dirent,
 		while (rdir->head < rdir->tail) {
 
 			err = p9dirent_read(rdir->buf + rdir->head,
-						buflen - rdir->head, &curdirent,
+						rdir->tail - rdir->head,
+						&curdirent,
 						fid->clnt->proto_version);
 			if (err < 0) {
 				P9_DPRINTK(P9_DEBUG_VFS, "returned %d\n", err);
@@ -314,4 +315,5 @@ const struct file_operations v9fs_dir_operations_dotl = {
 	.readdir = v9fs_dir_readdir_dotl,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
+        .fsync = v9fs_file_fsync_dotl,
 };
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index e97c92bd6f1..240c3067439 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -33,6 +33,7 @@
 #include <linux/inet.h>
 #include <linux/list.h>
 #include <linux/pagemap.h>
+#include <linux/utsname.h>
 #include <asm/uaccess.h>
 #include <linux/idr.h>
 #include <net/9p/9p.h>
@@ -44,6 +45,7 @@
 #include "cache.h"
 
 static const struct file_operations v9fs_cached_file_operations;
+static const struct file_operations v9fs_cached_file_operations_dotl;
 
 /**
  * v9fs_file_open - open a file (or directory)
@@ -92,6 +94,8 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		/* enable cached file options */
 		if(file->f_op == &v9fs_file_operations)
 			file->f_op = &v9fs_cached_file_operations;
+		else if (file->f_op == &v9fs_file_operations_dotl)
+			file->f_op = &v9fs_cached_file_operations_dotl;
 
 #ifdef CONFIG_9P_FSCACHE
 		v9fs_cache_inode_set_cookie(inode, file);
@@ -130,6 +134,206 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 	return res;
 }
 
+static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct p9_flock flock;
+	struct p9_fid *fid;
+	uint8_t status;
+	int res = 0;
+	unsigned char fl_type;
+
+	fid = filp->private_data;
+	BUG_ON(fid == NULL);
+
+	if ((fl->fl_flags & FL_POSIX) != FL_POSIX)
+		BUG();
+
+	res = posix_lock_file_wait(filp, fl);
+	if (res < 0)
+		goto out;
+
+	/* convert posix lock to p9 tlock args */
+	memset(&flock, 0, sizeof(flock));
+	flock.type = fl->fl_type;
+	flock.start = fl->fl_start;
+	if (fl->fl_end == OFFSET_MAX)
+		flock.length = 0;
+	else
+		flock.length = fl->fl_end - fl->fl_start + 1;
+	flock.proc_id = fl->fl_pid;
+	flock.client_id = utsname()->nodename;
+	if (IS_SETLKW(cmd))
+		flock.flags = P9_LOCK_FLAGS_BLOCK;
+
+	/*
+	 * if its a blocked request and we get P9_LOCK_BLOCKED as the status
+	 * for lock request, keep on trying
+	 */
+	for (;;) {
+		res = p9_client_lock_dotl(fid, &flock, &status);
+		if (res < 0)
+			break;
+
+		if (status != P9_LOCK_BLOCKED)
+			break;
+		if (status == P9_LOCK_BLOCKED && !IS_SETLKW(cmd))
+			break;
+		schedule_timeout_interruptible(P9_LOCK_TIMEOUT);
+	}
+
+	/* map 9p status to VFS status */
+	switch (status) {
+	case P9_LOCK_SUCCESS:
+		res = 0;
+		break;
+	case P9_LOCK_BLOCKED:
+		res = -EAGAIN;
+		break;
+	case P9_LOCK_ERROR:
+	case P9_LOCK_GRACE:
+		res = -ENOLCK;
+		break;
+	default:
+		BUG();
+	}
+
+	/*
+	 * incase server returned error for lock request, revert
+	 * it locally
+	 */
+	if (res < 0 && fl->fl_type != F_UNLCK) {
+		fl_type = fl->fl_type;
+		fl->fl_type = F_UNLCK;
+		res = posix_lock_file_wait(filp, fl);
+		fl->fl_type = fl_type;
+	}
+out:
+	return res;
+}
+
+static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
+{
+	struct p9_getlock glock;
+	struct p9_fid *fid;
+	int res = 0;
+
+	fid = filp->private_data;
+	BUG_ON(fid == NULL);
+
+	posix_test_lock(filp, fl);
+	/*
+	 * if we have a conflicting lock locally, no need to validate
+	 * with server
+	 */
+	if (fl->fl_type != F_UNLCK)
+		return res;
+
+	/* convert posix lock to p9 tgetlock args */
+	memset(&glock, 0, sizeof(glock));
+	glock.type = fl->fl_type;
+	glock.start = fl->fl_start;
+	if (fl->fl_end == OFFSET_MAX)
+		glock.length = 0;
+	else
+		glock.length = fl->fl_end - fl->fl_start + 1;
+	glock.proc_id = fl->fl_pid;
+	glock.client_id = utsname()->nodename;
+
+	res = p9_client_getlock_dotl(fid, &glock);
+	if (res < 0)
+		return res;
+	if (glock.type != F_UNLCK) {
+		fl->fl_type = glock.type;
+		fl->fl_start = glock.start;
+		if (glock.length == 0)
+			fl->fl_end = OFFSET_MAX;
+		else
+			fl->fl_end = glock.start + glock.length - 1;
+		fl->fl_pid = glock.proc_id;
+	} else
+		fl->fl_type = F_UNLCK;
+
+	return res;
+}
+
+/**
+ * v9fs_file_lock_dotl - lock a file (or directory)
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_lock_dotl(struct file *filp, int cmd, struct file_lock *fl)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret = -ENOLCK;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+				cmd, fl, filp->f_path.dentry->d_name.name);
+
+	/* No mandatory locks */
+	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+		goto out_err;
+
+	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+		filemap_write_and_wait(inode->i_mapping);
+		invalidate_mapping_pages(&inode->i_data, 0, -1);
+	}
+
+	if (IS_SETLK(cmd) || IS_SETLKW(cmd))
+		ret = v9fs_file_do_lock(filp, cmd, fl);
+	else if (IS_GETLK(cmd))
+		ret = v9fs_file_getlock(filp, fl);
+	else
+		ret = -EINVAL;
+out_err:
+	return ret;
+}
+
+/**
+ * v9fs_file_flock_dotl - lock a file
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
+ *
+ */
+
+static int v9fs_file_flock_dotl(struct file *filp, int cmd,
+	struct file_lock *fl)
+{
+	struct inode *inode = filp->f_path.dentry->d_inode;
+	int ret = -ENOLCK;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "filp: %p cmd:%d lock: %p name: %s\n", filp,
+				cmd, fl, filp->f_path.dentry->d_name.name);
+
+	/* No mandatory locks */
+	if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+		goto out_err;
+
+	if (!(fl->fl_flags & FL_FLOCK))
+		goto out_err;
+
+	if ((IS_SETLK(cmd) || IS_SETLKW(cmd)) && fl->fl_type != F_UNLCK) {
+		filemap_write_and_wait(inode->i_mapping);
+		invalidate_mapping_pages(&inode->i_data, 0, -1);
+	}
+	/* Convert flock to posix lock */
+	fl->fl_owner = (fl_owner_t)filp;
+	fl->fl_start = 0;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_flags |= FL_POSIX;
+	fl->fl_flags ^= FL_FLOCK;
+
+	if (IS_SETLK(cmd) | IS_SETLKW(cmd))
+		ret = v9fs_file_do_lock(filp, cmd, fl);
+	else
+		ret = -EINVAL;
+out_err:
+	return ret;
+}
+
 /**
  * v9fs_file_readn - read from a file
  * @filp: file pointer to read
@@ -219,7 +423,9 @@ static ssize_t
 v9fs_file_write(struct file *filp, const char __user * data,
 		size_t count, loff_t * offset)
 {
-	int n, rsize, total = 0;
+	ssize_t retval;
+	size_t total = 0;
+	int n;
 	struct p9_fid *fid;
 	struct p9_client *clnt;
 	struct inode *inode = filp->f_path.dentry->d_inode;
@@ -232,14 +438,19 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	fid = filp->private_data;
 	clnt = fid->clnt;
 
-	rsize = fid->iounit ? fid->iounit : clnt->msize - P9_IOHDRSZ;
+	retval = generic_write_checks(filp, &origin, &count, 0);
+	if (retval)
+		goto out;
 
-	do {
-		if (count < rsize)
-			rsize = count;
+	retval = -EINVAL;
+	if ((ssize_t) count < 0)
+		goto out;
+	retval = 0;
+	if (!count)
+		goto out;
 
-		n = p9_client_write(fid, NULL, data+total, origin+total,
-									rsize);
+	do {
+		n = p9_client_write(fid, NULL, data+total, origin+total, count);
 		if (n <= 0)
 			break;
 		count -= n;
@@ -258,9 +469,11 @@ v9fs_file_write(struct file *filp, const char __user * data,
 	}
 
 	if (n < 0)
-		return n;
-
-	return total;
+		retval = n;
+	else
+		retval = total;
+out:
+	return retval;
 }
 
 static int v9fs_file_fsync(struct file *filp, int datasync)
@@ -278,6 +491,20 @@ static int v9fs_file_fsync(struct file *filp, int datasync)
 	return retval;
 }
 
+int v9fs_file_fsync_dotl(struct file *filp, int datasync)
+{
+	struct p9_fid *fid;
+	int retval;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n",
+			filp, datasync);
+
+	fid = filp->private_data;
+
+	retval = p9_client_fsync(fid, datasync);
+	return retval;
+}
+
 static const struct file_operations v9fs_cached_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = do_sync_read,
@@ -290,6 +517,19 @@ static const struct file_operations v9fs_cached_file_operations = {
 	.fsync = v9fs_file_fsync,
 };
 
+static const struct file_operations v9fs_cached_file_operations_dotl = {
+	.llseek = generic_file_llseek,
+	.read = do_sync_read,
+	.aio_read = generic_file_aio_read,
+	.write = v9fs_file_write,
+	.open = v9fs_file_open,
+	.release = v9fs_dir_release,
+	.lock = v9fs_file_lock_dotl,
+	.flock = v9fs_file_flock_dotl,
+	.mmap = generic_file_readonly_mmap,
+	.fsync = v9fs_file_fsync_dotl,
+};
+
 const struct file_operations v9fs_file_operations = {
 	.llseek = generic_file_llseek,
 	.read = v9fs_file_read,
@@ -307,7 +547,8 @@ const struct file_operations v9fs_file_operations_dotl = {
 	.write = v9fs_file_write,
 	.open = v9fs_file_open,
 	.release = v9fs_dir_release,
-	.lock = v9fs_file_lock,
+	.lock = v9fs_file_lock_dotl,
+	.flock = v9fs_file_flock_dotl,
 	.mmap = generic_file_readonly_mmap,
-	.fsync = v9fs_file_fsync,
+	.fsync = v9fs_file_fsync_dotl,
 };
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 9e670d52764..34bf71b5654 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -36,6 +36,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/xattr.h>
+#include <linux/posix_acl.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
@@ -44,6 +45,7 @@
 #include "fid.h"
 #include "cache.h"
 #include "xattr.h"
+#include "acl.h"
 
 static const struct inode_operations v9fs_dir_inode_operations;
 static const struct inode_operations v9fs_dir_inode_operations_dotu;
@@ -53,6 +55,10 @@ static const struct inode_operations v9fs_file_inode_operations_dotl;
 static const struct inode_operations v9fs_symlink_inode_operations;
 static const struct inode_operations v9fs_symlink_inode_operations_dotl;
 
+static int
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
+		    dev_t rdev);
+
 /**
  * unixmode2p9mode - convert unix mode bits to plan 9
  * @v9ses: v9fs session information
@@ -500,6 +506,11 @@ v9fs_inode_dotl(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 	v9fs_vcookie_set_qid(ret, &st->qid);
 	v9fs_cache_inode_get_cookie(ret);
 #endif
+	err = v9fs_get_acl(ret, fid);
+	if (err) {
+		iput(ret);
+		goto error;
+	}
 	kfree(st);
 	return ret;
 error:
@@ -553,13 +564,6 @@ static int v9fs_remove(struct inode *dir, struct dentry *file, int rmdir)
 	return retval;
 }
 
-static int
-v9fs_open_created(struct inode *inode, struct file *file)
-{
-	return 0;
-}
-
-
 /**
  * v9fs_create - Create a file
  * @v9ses: session information
@@ -655,29 +659,37 @@ error:
  */
 
 static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
+v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode,
 		struct nameidata *nd)
 {
 	int err = 0;
 	char *name = NULL;
 	gid_t gid;
 	int flags;
+	mode_t mode;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL;
 	struct p9_fid *dfid, *ofid;
 	struct file *filp;
 	struct p9_qid qid;
 	struct inode *inode;
+	struct posix_acl *pacl = NULL, *dacl = NULL;
 
 	v9ses = v9fs_inode2v9ses(dir);
 	if (nd && nd->flags & LOOKUP_OPEN)
 		flags = nd->intent.open.flags - 1;
-	else
-		flags = O_RDWR;
+	else {
+		/*
+		 * create call without LOOKUP_OPEN is due
+		 * to mknod of regular files. So use mknod
+		 * operation.
+		 */
+		return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+	}
 
 	name = (char *) dentry->d_name.name;
 	P9_DPRINTK(P9_DEBUG_VFS, "v9fs_vfs_create_dotl: name:%s flags:0x%x "
-			"mode:0x%x\n", name, flags, mode);
+			"mode:0x%x\n", name, flags, omode);
 
 	dfid = v9fs_fid_lookup(dentry->d_parent);
 	if (IS_ERR(dfid)) {
@@ -695,6 +707,15 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
 	}
 
 	gid = v9fs_get_fsgid_for_create(dir);
+
+	mode = omode;
+	/* Update mode based on ACL value */
+	err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_VFS,
+			   "Failed to get acl values in creat %d\n", err);
+		goto error;
+	}
 	err = p9_client_create_dotl(ofid, name, flags, mode, gid, &qid);
 	if (err < 0) {
 		P9_DPRINTK(P9_DEBUG_VFS,
@@ -702,46 +723,52 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int mode,
 				err);
 		goto error;
 	}
+	/* instantiate inode and assign the unopened fid to the dentry */
+	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE ||
+	    (nd && nd->flags & LOOKUP_OPEN)) {
+		fid = p9_client_walk(dfid, 1, &name, 1);
+		if (IS_ERR(fid)) {
+			err = PTR_ERR(fid);
+			P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n",
+				err);
+			fid = NULL;
+			goto error;
+		}
 
-	/* No need to populate the inode if we are not opening the file AND
-	 * not in cached mode.
-	 */
-	if (!v9ses->cache && !(nd && nd->flags & LOOKUP_OPEN)) {
-		/* Not in cached mode. No need to populate inode with stat */
-		dentry->d_op = &v9fs_dentry_operations;
-		p9_client_clunk(ofid);
-		d_instantiate(dentry, NULL);
-		return 0;
-	}
-
-	/* Now walk from the parent so we can get an unopened fid. */
-	fid = p9_client_walk(dfid, 1, &name, 1);
-	if (IS_ERR(fid)) {
-		err = PTR_ERR(fid);
-		P9_DPRINTK(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
-		fid = NULL;
-		goto error;
-	}
-
-	/* instantiate inode and assign the unopened fid to dentry */
-	inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
-	if (IS_ERR(inode)) {
-		err = PTR_ERR(inode);
-		P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n", err);
-		goto error;
-	}
-	if (v9ses->cache)
+		inode = v9fs_inode_from_fid(v9ses, fid, dir->i_sb);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			P9_DPRINTK(P9_DEBUG_VFS, "inode creation failed %d\n",
+				err);
+			goto error;
+		}
 		dentry->d_op = &v9fs_cached_dentry_operations;
-	else
+		d_instantiate(dentry, inode);
+		err = v9fs_fid_add(dentry, fid);
+		if (err < 0)
+			goto error;
+		/* The fid would get clunked via a dput */
+		fid = NULL;
+	} else {
+		/*
+		 * Not in cached mode. No need to populate
+		 * inode with stat. We need to get an inode
+		 * so that we can set the acl with dentry
+		 */
+		inode = v9fs_get_inode(dir->i_sb, mode);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			goto error;
+		}
 		dentry->d_op = &v9fs_dentry_operations;
-	d_instantiate(dentry, inode);
-	err = v9fs_fid_add(dentry, fid);
-	if (err < 0)
-		goto error;
+		d_instantiate(dentry, inode);
+	}
+	/* Now set the ACL based on the default value */
+	v9fs_set_create_acl(dentry, dacl, pacl);
 
 	/* if we are opening a file, assign the open fid to the file */
 	if (nd && nd->flags & LOOKUP_OPEN) {
-		filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+		filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
 		if (IS_ERR(filp)) {
 			p9_client_clunk(ofid);
 			return PTR_ERR(filp);
@@ -800,7 +827,7 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, int mode,
 
 	/* if we are opening a file, assign the open fid to the file */
 	if (nd && nd->flags & LOOKUP_OPEN) {
-		filp = lookup_instantiate_filp(nd, dentry, v9fs_open_created);
+		filp = lookup_instantiate_filp(nd, dentry, generic_file_open);
 		if (IS_ERR(filp)) {
 			err = PTR_ERR(filp);
 			goto error;
@@ -859,23 +886,28 @@ static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
  *
  */
 
-static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
-					int mode)
+static int v9fs_vfs_mkdir_dotl(struct inode *dir,
+			       struct dentry *dentry, int omode)
 {
 	int err;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	gid_t gid;
 	char *name;
+	mode_t mode;
 	struct inode *inode;
 	struct p9_qid qid;
 	struct dentry *dir_dentry;
+	struct posix_acl *dacl = NULL, *pacl = NULL;
 
 	P9_DPRINTK(P9_DEBUG_VFS, "name %s\n", dentry->d_name.name);
 	err = 0;
 	v9ses = v9fs_inode2v9ses(dir);
 
-	mode |= S_IFDIR;
+	omode |= S_IFDIR;
+	if (dir->i_mode & S_ISGID)
+		omode |= S_ISGID;
+
 	dir_dentry = v9fs_dentry_from_dir_inode(dir);
 	dfid = v9fs_fid_lookup(dir_dentry);
 	if (IS_ERR(dfid)) {
@@ -886,11 +918,14 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
 	}
 
 	gid = v9fs_get_fsgid_for_create(dir);
-	if (gid < 0) {
-		P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+	mode = omode;
+	/* Update mode based on ACL value */
+	err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_VFS,
+			   "Failed to get acl values in mkdir %d\n", err);
 		goto error;
 	}
-
 	name = (char *) dentry->d_name.name;
 	err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
 	if (err < 0)
@@ -920,7 +955,23 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, struct dentry *dentry,
 		if (err < 0)
 			goto error;
 		fid = NULL;
+	} else {
+		/*
+		 * Not in cached mode. No need to populate
+		 * inode with stat. We need to get an inode
+		 * so that we can set the acl with dentry
+		 */
+		inode = v9fs_get_inode(dir->i_sb, mode);
+		if (IS_ERR(inode)) {
+			err = PTR_ERR(inode);
+			goto error;
+		}
+		dentry->d_op = &v9fs_dentry_operations;
+		d_instantiate(dentry, inode);
 	}
+	/* Now set the ACL based on the default value */
+	v9fs_set_create_acl(dentry, dacl, pacl);
+
 error:
 	if (fid)
 		p9_client_clunk(fid);
@@ -979,7 +1030,7 @@ static struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 
 	result = v9fs_fid_add(dentry, fid);
 	if (result < 0)
-		goto error;
+		goto error_iput;
 
 inst_out:
 	if (v9ses->cache)
@@ -990,6 +1041,8 @@ inst_out:
 	d_add(dentry, inode);
 	return NULL;
 
+error_iput:
+	iput(inode);
 error:
 	p9_client_clunk(fid);
 
@@ -1237,7 +1290,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
  *
  */
 
-static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 {
 	int retval;
 	struct v9fs_session_info *v9ses;
@@ -1279,6 +1332,12 @@ static int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 
 	setattr_copy(dentry->d_inode, iattr);
 	mark_inode_dirty(dentry->d_inode);
+	if (iattr->ia_valid & ATTR_MODE) {
+		/* We also want to update ACL when we update mode bits */
+		retval = v9fs_acl_chmod(dentry);
+		if (retval < 0)
+			return retval;
+	}
 	return 0;
 }
 
@@ -1473,7 +1532,7 @@ static int v9fs_readlink(struct dentry *dentry, char *buffer, int buflen)
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
-	if (!v9fs_proto_dotu(v9ses) && !v9fs_proto_dotl(v9ses))
+	if (!v9fs_proto_dotu(v9ses))
 		return -EBADF;
 
 	st = p9_client_stat(fid);
@@ -1616,11 +1675,6 @@ v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
 
 	gid = v9fs_get_fsgid_for_create(dir);
 
-	if (gid < 0) {
-		P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_egid failed %d\n", gid);
-		goto error;
-	}
-
 	/* Server doesn't alter fid on TSYMLINK. Hence no need to clone it. */
 	err = p9_client_symlink(dfid, name, (char *)symname, gid, &qid);
 
@@ -1789,9 +1843,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 		kfree(st);
 	} else {
 		/* Caching disabled. No need to get upto date stat info.
-		 * This dentry will be released immediately. So, just i_count++
+		 * This dentry will be released immediately. So, just hold the
+		 * inode
 		 */
-		atomic_inc(&old_dentry->d_inode->i_count);
+		ihold(old_dentry->d_inode);
 	}
 
 	dentry->d_op = old_dentry->d_op;
@@ -1854,21 +1909,23 @@ v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t rdev)
  *
  */
 static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
+v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode,
 		dev_t rdev)
 {
 	int err;
 	char *name;
+	mode_t mode;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL, *dfid = NULL;
 	struct inode *inode;
 	gid_t gid;
 	struct p9_qid qid;
 	struct dentry *dir_dentry;
+	struct posix_acl *dacl = NULL, *pacl = NULL;
 
 	P9_DPRINTK(P9_DEBUG_VFS,
 		" %lu,%s mode: %x MAJOR: %u MINOR: %u\n", dir->i_ino,
-		dentry->d_name.name, mode, MAJOR(rdev), MINOR(rdev));
+		dentry->d_name.name, omode, MAJOR(rdev), MINOR(rdev));
 
 	if (!new_valid_dev(rdev))
 		return -EINVAL;
@@ -1884,11 +1941,14 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
 	}
 
 	gid = v9fs_get_fsgid_for_create(dir);
-	if (gid < 0) {
-		P9_DPRINTK(P9_DEBUG_VFS, "v9fs_get_fsgid_for_create failed\n");
+	mode = omode;
+	/* Update mode based on ACL value */
+	err = v9fs_acl_mode(dir, &mode, &dacl, &pacl);
+	if (err) {
+		P9_DPRINTK(P9_DEBUG_VFS,
+			   "Failed to get acl values in mknod %d\n", err);
 		goto error;
 	}
-
 	name = (char *) dentry->d_name.name;
 
 	err = p9_client_mknod_dotl(dfid, name, mode, rdev, gid, &qid);
@@ -1932,13 +1992,68 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int mode,
 		dentry->d_op = &v9fs_dentry_operations;
 		d_instantiate(dentry, inode);
 	}
-
+	/* Now set the ACL based on the default value */
+	v9fs_set_create_acl(dentry, dacl, pacl);
 error:
 	if (fid)
 		p9_client_clunk(fid);
 	return err;
 }
 
+static int
+v9fs_vfs_readlink_dotl(struct dentry *dentry, char *buffer, int buflen)
+{
+	int retval;
+	struct p9_fid *fid;
+	char *target = NULL;
+
+	P9_DPRINTK(P9_DEBUG_VFS, " %s\n", dentry->d_name.name);
+	retval = -EPERM;
+	fid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
+
+	retval = p9_client_readlink(fid, &target);
+	if (retval < 0)
+		return retval;
+
+	strncpy(buffer, target, buflen);
+	P9_DPRINTK(P9_DEBUG_VFS, "%s -> %s\n", dentry->d_name.name, buffer);
+
+	retval = strnlen(buffer, buflen);
+	return retval;
+}
+
+/**
+ * v9fs_vfs_follow_link_dotl - follow a symlink path
+ * @dentry: dentry for symlink
+ * @nd: nameidata
+ *
+ */
+
+static void *
+v9fs_vfs_follow_link_dotl(struct dentry *dentry, struct nameidata *nd)
+{
+	int len = 0;
+	char *link = __getname();
+
+	P9_DPRINTK(P9_DEBUG_VFS, "%s n", dentry->d_name.name);
+
+	if (!link)
+		link = ERR_PTR(-ENOMEM);
+	else {
+		len = v9fs_vfs_readlink_dotl(dentry, link, PATH_MAX);
+		if (len < 0) {
+			__putname(link);
+			link = ERR_PTR(len);
+		} else
+			link[min(len, PATH_MAX-1)] = 0;
+	}
+	nd_set_link(nd, link);
+
+	return NULL;
+}
+
 static const struct inode_operations v9fs_dir_inode_operations_dotu = {
 	.create = v9fs_vfs_create,
 	.lookup = v9fs_vfs_lookup,
@@ -1969,7 +2084,7 @@ static const struct inode_operations v9fs_dir_inode_operations_dotl = {
 	.getxattr = generic_getxattr,
 	.removexattr = generic_removexattr,
 	.listxattr = v9fs_listxattr,
-
+	.check_acl = v9fs_check_acl,
 };
 
 static const struct inode_operations v9fs_dir_inode_operations = {
@@ -1996,6 +2111,7 @@ static const struct inode_operations v9fs_file_inode_operations_dotl = {
 	.getxattr = generic_getxattr,
 	.removexattr = generic_removexattr,
 	.listxattr = v9fs_listxattr,
+	.check_acl = v9fs_check_acl,
 };
 
 static const struct inode_operations v9fs_symlink_inode_operations = {
@@ -2007,8 +2123,8 @@ static const struct inode_operations v9fs_symlink_inode_operations = {
 };
 
 static const struct inode_operations v9fs_symlink_inode_operations_dotl = {
-	.readlink = generic_readlink,
-	.follow_link = v9fs_vfs_follow_link,
+	.readlink = v9fs_vfs_readlink_dotl,
+	.follow_link = v9fs_vfs_follow_link_dotl,
 	.put_link = v9fs_vfs_put_link,
 	.getattr = v9fs_vfs_getattr_dotl,
 	.setattr = v9fs_vfs_setattr_dotl,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 1d12ba0ed3d..c55c614500a 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -39,6 +39,7 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/statfs.h>
+#include <linux/magic.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 
@@ -46,6 +47,7 @@
 #include "v9fs_vfs.h"
 #include "fid.h"
 #include "xattr.h"
+#include "acl.h"
 
 static const struct super_operations v9fs_super_ops, v9fs_super_ops_dotl;
 
@@ -66,7 +68,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
  * v9fs_fill_super - populate superblock with info
  * @sb: superblock
  * @v9ses: session information
- * @flags: flags propagated from v9fs_get_sb()
+ * @flags: flags propagated from v9fs_mount()
  *
  */
 
@@ -88,22 +90,25 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
 	sb->s_flags = flags | MS_ACTIVE | MS_SYNCHRONOUS | MS_DIRSYNC |
 	    MS_NOATIME;
 
+#ifdef CONFIG_9P_FS_POSIX_ACL
+	if ((v9ses->flags & V9FS_ACCESS_MASK) == V9FS_ACCESS_CLIENT)
+		sb->s_flags |= MS_POSIXACL;
+#endif
+
 	save_mount_options(sb, data);
 }
 
 /**
- * v9fs_get_sb - mount a superblock
+ * v9fs_mount - mount a superblock
  * @fs_type: file system type
  * @flags: mount flags
  * @dev_name: device name that was mounted
  * @data: mount options
- * @mnt: mountpoint record to be instantiated
  *
  */
 
-static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
-		       const char *dev_name, void *data,
-		       struct vfsmount *mnt)
+static struct dentry *v9fs_mount(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data)
 {
 	struct super_block *sb = NULL;
 	struct inode *inode = NULL;
@@ -117,7 +122,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 
 	v9ses = kzalloc(sizeof(struct v9fs_session_info), GFP_KERNEL);
 	if (!v9ses)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	fid = v9fs_session_init(v9ses, dev_name, data);
 	if (IS_ERR(fid)) {
@@ -149,7 +154,6 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		goto release_sb;
 	}
 	sb->s_root = root;
-
 	if (v9fs_proto_dotl(v9ses)) {
 		struct p9_stat_dotl *st = NULL;
 		st = p9_client_getattr_dotl(fid, P9_STATS_BASIC);
@@ -174,19 +178,21 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 		p9stat_free(st);
 		kfree(st);
 	}
-
+	retval = v9fs_get_acl(inode, fid);
+	if (retval)
+		goto release_sb;
 	v9fs_fid_add(root, fid);
 
 	P9_DPRINTK(P9_DEBUG_VFS, " simple set mount, return 0\n");
-	simple_set_mnt(mnt, sb);
-	return 0;
+	return dget(sb->s_root);
 
 clunk_fid:
 	p9_client_clunk(fid);
 close_session:
 	v9fs_session_close(v9ses);
 	kfree(v9ses);
-	return retval;
+	return ERR_PTR(retval);
+
 release_sb:
 	/*
 	 * we will do the session_close and root dentry release
@@ -196,7 +202,7 @@ release_sb:
 	 */
 	p9_client_clunk(fid);
 	deactivate_locked_super(sb);
-	return retval;
+	return ERR_PTR(retval);
 }
 
 /**
@@ -249,7 +255,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	if (v9fs_proto_dotl(v9ses)) {
 		res = p9_client_statfs(fid, &rs);
 		if (res == 0) {
-			buf->f_type = rs.type;
+			buf->f_type = V9FS_MAGIC;
 			buf->f_bsize = rs.bsize;
 			buf->f_blocks = rs.blocks;
 			buf->f_bfree = rs.bfree;
@@ -292,7 +298,7 @@ static const struct super_operations v9fs_super_ops_dotl = {
 
 struct file_system_type v9fs_fs_type = {
 	.name = "9p",
-	.get_sb = v9fs_get_sb,
+	.mount = v9fs_mount,
 	.kill_sb = v9fs_kill_super,
 	.owner = THIS_MODULE,
 	.fs_flags = FS_RENAME_DOES_D_MOVE,
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index f88e5c2dc87..43ec7df8433 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -21,30 +21,13 @@
 #include "fid.h"
 #include "xattr.h"
 
-/*
- * v9fs_xattr_get()
- *
- * Copy an extended attribute into the buffer
- * provided, or compute the buffer size required.
- * Buffer is NULL to compute the size of the buffer required.
- *
- * Returns a negative error number on failure, or the number of bytes
- * used / required on success.
- */
-ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
-		       void *buffer, size_t buffer_size)
+ssize_t v9fs_fid_xattr_get(struct p9_fid *fid, const char *name,
+			   void *buffer, size_t buffer_size)
 {
 	ssize_t retval;
 	int msize, read_count;
 	u64 offset = 0, attr_size;
-	struct p9_fid *fid, *attr_fid;
-
-	P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
-		__func__, name, buffer_size);
-
-	fid = v9fs_fid_lookup(dentry);
-	if (IS_ERR(fid))
-		return PTR_ERR(fid);
+	struct p9_fid *attr_fid;
 
 	attr_fid = p9_client_xattrwalk(fid, name, &attr_size);
 	if (IS_ERR(attr_fid)) {
@@ -88,6 +71,31 @@ error:
 
 }
 
+
+/*
+ * v9fs_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
+		       void *buffer, size_t buffer_size)
+{
+	struct p9_fid *fid;
+
+	P9_DPRINTK(P9_DEBUG_VFS, "%s: name = %s value_len = %zu\n",
+		__func__, name, buffer_size);
+	fid = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
+
+	return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+}
+
 /*
  * v9fs_xattr_set()
  *
@@ -156,5 +164,9 @@ ssize_t v9fs_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
 
 const struct xattr_handler *v9fs_xattr_handlers[] = {
 	&v9fs_xattr_user_handler,
+#ifdef CONFIG_9P_FS_POSIX_ACL
+	&v9fs_xattr_acl_access_handler,
+	&v9fs_xattr_acl_default_handler,
+#endif
 	NULL
 };
diff --git a/fs/9p/xattr.h b/fs/9p/xattr.h
index 9ddf672ae5c..eaa837c53bd 100644
--- a/fs/9p/xattr.h
+++ b/fs/9p/xattr.h
@@ -15,10 +15,16 @@
 #define FS_9P_XATTR_H
 
 #include <linux/xattr.h>
+#include <net/9p/9p.h>
+#include <net/9p/client.h>
 
 extern const struct xattr_handler *v9fs_xattr_handlers[];
 extern struct xattr_handler v9fs_xattr_user_handler;
+extern const struct xattr_handler v9fs_xattr_acl_access_handler;
+extern const struct xattr_handler v9fs_xattr_acl_default_handler;
 
+extern ssize_t v9fs_fid_xattr_get(struct p9_fid *, const char *,
+				  void *, size_t);
 extern ssize_t v9fs_xattr_get(struct dentry *, const char *,
 			      void *, size_t);
 extern int v9fs_xattr_set(struct dentry *, const char *,
diff --git a/fs/Kconfig b/fs/Kconfig
index 65781de44fc..771f457402d 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -47,10 +47,12 @@ source "fs/nilfs2/Kconfig"
 
 endif # BLOCK
 
+config EXPORTFS
+	tristate
+
 config FILE_LOCKING
 	bool "Enable POSIX file locking API" if EMBEDDED
 	default y
-	select BKL # while lockd still uses it.
 	help
 	  This option enables standard file locking support, required
           for filesystems like NFS and for the flock() system
@@ -60,7 +62,6 @@ source "fs/notify/Kconfig"
 
 source "fs/quota/Kconfig"
 
-source "fs/autofs/Kconfig"
 source "fs/autofs4/Kconfig"
 source "fs/fuse/Kconfig"
 
@@ -222,9 +223,6 @@ config LOCKD_V4
 	depends on FILE_LOCKING
 	default y
 
-config EXPORTFS
-	tristate
-
 config NFS_ACL_SUPPORT
 	tristate
 	select FS_POSIX_ACL
@@ -235,7 +233,6 @@ config NFS_COMMON
 	default y
 
 source "net/sunrpc/Kconfig"
-source "fs/smbfs/Kconfig"
 source "fs/ceph/Kconfig"
 source "fs/cifs/Kconfig"
 source "fs/ncpfs/Kconfig"
diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index bb4cc5b8abc..79e2ca7973b 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -42,7 +42,7 @@ config BINFMT_ELF_FDPIC
 
 config CORE_DUMP_DEFAULT_ELF_HEADERS
 	bool "Write ELF core dumps with partial segments"
-	default n
+	default y
 	depends on BINFMT_ELF && ELF_CORE
 	help
 	  ELF core dump files describe each memory mapping of the crashed
@@ -60,7 +60,7 @@ config CORE_DUMP_DEFAULT_ELF_HEADERS
 	  inherited.  See Documentation/filesystems/proc.txt for details.
 
 	  This config option changes the default setting of coredump_filter
-	  seen at boot time.  If unsure, say N.
+	  seen at boot time.  If unsure, say Y.
 
 config BINFMT_FLAT
 	bool "Kernel support for flat binaries"
diff --git a/fs/Makefile b/fs/Makefile
index 26956fcec91..a7f7cef0c0c 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -88,7 +88,6 @@ obj-$(CONFIG_NFSD)		+= nfsd/
 obj-$(CONFIG_LOCKD)		+= lockd/
 obj-$(CONFIG_NLS)		+= nls/
 obj-$(CONFIG_SYSV_FS)		+= sysv/
-obj-$(CONFIG_SMB_FS)		+= smbfs/
 obj-$(CONFIG_CIFS)		+= cifs/
 obj-$(CONFIG_NCP_FS)		+= ncpfs/
 obj-$(CONFIG_HPFS_FS)		+= hpfs/
@@ -101,7 +100,6 @@ obj-$(CONFIG_UBIFS_FS)		+= ubifs/
 obj-$(CONFIG_AFFS_FS)		+= affs/
 obj-$(CONFIG_ROMFS_FS)		+= romfs/
 obj-$(CONFIG_QNX4FS_FS)		+= qnx4/
-obj-$(CONFIG_AUTOFS_FS)		+= autofs/
 obj-$(CONFIG_AUTOFS4_FS)	+= autofs4/
 obj-$(CONFIG_ADFS_FS)		+= adfs/
 obj-$(CONFIG_FUSE_FS)		+= fuse/
diff --git a/fs/adfs/super.c b/fs/adfs/super.c
index d9803f73236..959dbff2d42 100644
--- a/fs/adfs/super.c
+++ b/fs/adfs/super.c
@@ -490,17 +490,16 @@ error:
 	return -EINVAL;
 }
 
-static int adfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *adfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, adfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, adfs_fill_super);
 }
 
 static struct file_system_type adfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "adfs",
-	.get_sb		= adfs_get_sb,
+	.mount		= adfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/affs/file.c b/fs/affs/file.c
index c4a9875bd1a..0a90dcd46de 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -894,9 +894,9 @@ affs_truncate(struct inode *inode)
 		if (AFFS_SB(sb)->s_flags & SF_OFS) {
 			struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
 			u32 tmp;
-			if (IS_ERR(ext_bh)) {
+			if (IS_ERR(bh)) {
 				affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)",
-					     ext, PTR_ERR(ext_bh));
+					     ext, PTR_ERR(bh));
 				return;
 			}
 			tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 3a0fdec175b..5d828903ac6 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
 		affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
 		mark_buffer_dirty_inode(inode_bh, inode);
 		inode->i_nlink = 2;
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 	}
 	affs_fix_checksum(sb, bh);
 	mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index fa4fbe1e238..0cf7f4384cb 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -573,17 +573,16 @@ affs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int affs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *affs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, affs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, affs_fill_super);
 }
 
 static struct file_system_type affs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "affs",
-	.get_sb		= affs_get_sb,
+	.mount		= affs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index 0d38c09bd55..5439e1bc9a8 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
 	if (ret < 0)
 		goto link_error;
 
-	atomic_inc(&vnode->vfs_inode.i_count);
+	ihold(&vnode->vfs_inode);
 	d_instantiate(dentry, &vnode->vfs_inode);
 	key_put(key);
 	_leave(" = 0");
diff --git a/fs/afs/super.c b/fs/afs/super.c
index eacf76d98ae..27201cffece 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -29,9 +29,8 @@
 #define AFS_FS_MAGIC 0x6B414653 /* 'kAFS' */
 
 static void afs_i_init_once(void *foo);
-static int afs_get_sb(struct file_system_type *fs_type,
-		      int flags, const char *dev_name,
-		      void *data, struct vfsmount *mnt);
+static struct dentry *afs_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data);
 static struct inode *afs_alloc_inode(struct super_block *sb);
 static void afs_put_super(struct super_block *sb);
 static void afs_destroy_inode(struct inode *inode);
@@ -40,7 +39,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf);
 struct file_system_type afs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "afs",
-	.get_sb		= afs_get_sb,
+	.mount		= afs_mount,
 	.kill_sb	= kill_anon_super,
 	.fs_flags	= 0,
 };
@@ -359,11 +358,8 @@ error:
 /*
  * get an AFS superblock
  */
-static int afs_get_sb(struct file_system_type *fs_type,
-		      int flags,
-		      const char *dev_name,
-		      void *options,
-		      struct vfsmount *mnt)
+static struct dentry *afs_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *options)
 {
 	struct afs_mount_params params;
 	struct super_block *sb;
@@ -427,12 +423,11 @@ static int afs_get_sb(struct file_system_type *fs_type,
 		ASSERTCMP(sb->s_flags, &, MS_ACTIVE);
 	}
 
-	simple_set_mnt(mnt, sb);
 	afs_put_volume(params.volume);
 	afs_put_cell(params.cell);
 	kfree(new_opts);
 	_leave(" = 0 [%p]", sb);
-	return 0;
+	return dget(sb->s_root);
 
 error:
 	afs_put_volume(params.volume);
@@ -440,7 +435,7 @@ error:
 	key_put(params.key);
 	kfree(new_opts);
 	_leave(" = %d", ret);
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 722743b152d..15690bb1d3b 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -438,7 +438,6 @@ no_more:
  */
 int afs_writepage(struct page *page, struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = page->mapping->backing_dev_info;
 	struct afs_writeback *wb;
 	int ret;
 
@@ -455,8 +454,6 @@ int afs_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	wbc->nr_to_write -= ret;
-	if (wbc->nonblocking && bdi_write_congested(bdi))
-		wbc->encountered_congestion = 1;
 
 	_leave(" = 0");
 	return 0;
@@ -469,7 +466,6 @@ static int afs_writepages_region(struct address_space *mapping,
 				 struct writeback_control *wbc,
 				 pgoff_t index, pgoff_t end, pgoff_t *_next)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct afs_writeback *wb;
 	struct page *page;
 	int ret, n;
@@ -529,11 +525,6 @@ static int afs_writepages_region(struct address_space *mapping,
 
 		wbc->nr_to_write -= ret;
 
-		if (wbc->nonblocking && bdi_write_congested(bdi)) {
-			wbc->encountered_congestion = 1;
-			break;
-		}
-
 		cond_resched();
 	} while (index < end && wbc->nr_to_write > 0);
 
@@ -548,24 +539,16 @@ static int afs_writepages_region(struct address_space *mapping,
 int afs_writepages(struct address_space *mapping,
 		   struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	pgoff_t start, end, next;
 	int ret;
 
 	_enter("");
 
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		wbc->encountered_congestion = 1;
-		_leave(" = 0 [congest]");
-		return 0;
-	}
-
 	if (wbc->range_cyclic) {
 		start = mapping->writeback_index;
 		end = -1;
 		ret = afs_writepages_region(mapping, wbc, start, end, &next);
-		if (start > 0 && wbc->nr_to_write > 0 && ret == 0 &&
-		    !(wbc->nonblocking && wbc->encountered_congestion))
+		if (start > 0 && wbc->nr_to_write > 0 && ret == 0)
 			ret = afs_writepages_region(mapping, wbc, 0, start,
 						    &next);
 		mapping->writeback_index = next;
diff --git a/fs/aio.c b/fs/aio.c
index 250b0a73c8a..8c8f6c5b6d7 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1543,7 +1543,19 @@ static void aio_batch_add(struct address_space *mapping,
 	}
 
 	abe = mempool_alloc(abe_pool, GFP_KERNEL);
-	BUG_ON(!igrab(mapping->host));
+
+	/*
+	 * we should be using igrab here, but
+	 * we don't want to hammer on the global
+	 * inode spinlock just to take an extra
+	 * reference on a file that we must already
+	 * have a reference to.
+	 *
+	 * When we're called, we always have a reference
+	 * on the file, so we must always have a reference
+	 * on the inode, so ihold() is safe here.
+	 */
+	ihold(mapping->host);
 	abe->mapping = mapping;
 	hlist_add_head(&abe->list, &batch_hash[bucket]);
 	return;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index e4b75d6eda8..57ce55b2564 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -26,12 +26,10 @@ static struct vfsmount *anon_inode_mnt __read_mostly;
 static struct inode *anon_inode_inode;
 static const struct file_operations anon_inode_fops;
 
-static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
-			       const char *dev_name, void *data,
-			       struct vfsmount *mnt)
+static struct dentry *anon_inodefs_mount(struct file_system_type *fs_type,
+				int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC,
-			     mnt);
+	return mount_pseudo(fs_type, "anon_inode:", NULL, ANON_INODE_FS_MAGIC);
 }
 
 /*
@@ -45,7 +43,7 @@ static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
 
 static struct file_system_type anon_inode_fs_type = {
 	.name		= "anon_inodefs",
-	.get_sb		= anon_inodefs_get_sb,
+	.mount		= anon_inodefs_mount,
 	.kill_sb	= kill_anon_super,
 };
 static const struct dentry_operations anon_inodefs_dentry_operations = {
@@ -111,10 +109,9 @@ struct file *anon_inode_getfile(const char *name,
 	path.mnt = mntget(anon_inode_mnt);
 	/*
 	 * We know the anon_inode inode count is always greater than zero,
-	 * so we can avoid doing an igrab() and we can use an open-coded
-	 * atomic_inc().
+	 * so ihold() is safe.
 	 */
-	atomic_inc(&anon_inode_inode->i_count);
+	ihold(anon_inode_inode);
 
 	path.dentry->d_op = &anon_inodefs_dentry_operations;
 	d_instantiate(path.dentry, anon_inode_inode);
@@ -194,6 +191,7 @@ static struct inode *anon_inode_mkinode(void)
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
 
+	inode->i_ino = get_next_ino();
 	inode->i_fop = &anon_inode_fops;
 
 	inode->i_mapping->a_ops = &anon_aops;
diff --git a/fs/autofs/Kconfig b/fs/autofs/Kconfig
deleted file mode 100644
index 480e210c83a..00000000000
--- a/fs/autofs/Kconfig
+++ /dev/null
@@ -1,22 +0,0 @@
-config AUTOFS_FS
-	tristate "Kernel automounter support"
-	depends on BKL # unfixable, just use autofs4
-	help
-	  The automounter is a tool to automatically mount remote file systems
-	  on demand. This implementation is partially kernel-based to reduce
-	  overhead in the already-mounted case; this is unlike the BSD
-	  automounter (amd), which is a pure user space daemon.
-
-	  To use the automounter you need the user-space tools from the autofs
-	  package; you can find the location in <file:Documentation/Changes>.
-	  You also want to answer Y to "NFS file system support", below.
-
-	  If you want to use the newer version of the automounter with more
-	  features, say N here and say Y to "Kernel automounter v4 support",
-	  below.
-
-	  To compile this support as a module, choose M here: the module will be
-	  called autofs.
-
-	  If you are not a part of a fairly large, distributed network, you
-	  probably do not need an automounter, and can say N here.
diff --git a/fs/autofs/Makefile b/fs/autofs/Makefile
deleted file mode 100644
index 453a60f46d0..00000000000
--- a/fs/autofs/Makefile
+++ /dev/null
@@ -1,7 +0,0 @@
-#
-# Makefile for the linux autofs-filesystem routines.
-#
-
-obj-$(CONFIG_AUTOFS_FS) += autofs.o
-
-autofs-objs := dirhash.o init.o inode.o root.o symlink.o waitq.o
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
deleted file mode 100644
index 901a3e67ec4..00000000000
--- a/fs/autofs/autofs_i.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *   
- * linux/fs/autofs/autofs_i.h
- *
- *   Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/* Internal header file for autofs */
-
-#include <linux/auto_fs.h>
-
-/* This is the range of ioctl() numbers we claim as ours */
-#define AUTOFS_IOC_FIRST     AUTOFS_IOC_READY
-#define AUTOFS_IOC_COUNT     32
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/string.h>
-#include <linux/wait.h>
-#include <linux/dcache.h>
-#include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/sched.h>
-
-#include <asm/current.h>
-#include <asm/uaccess.h>
-
-#ifdef DEBUG
-#define DPRINTK(D) (printk D)
-#else
-#define DPRINTK(D) ((void)0)
-#endif
-
-/*
- * If the daemon returns a negative response (AUTOFS_IOC_FAIL) then the
- * kernel will keep the negative response cached for up to the time given
- * here, although the time can be shorter if the kernel throws the dcache
- * entry away.  This probably should be settable from user space.
- */
-#define AUTOFS_NEGATIVE_TIMEOUT (60*HZ)	/* 1 minute */
-
-/* Structures associated with the root directory hash table */
-
-#define AUTOFS_HASH_SIZE 67
-
-struct autofs_dir_ent {
-	int hash;
-	char *name;
-	int len;
-	ino_t ino;
-	struct dentry *dentry;
-	/* Linked list of entries */
-	struct autofs_dir_ent *next;
-	struct autofs_dir_ent **back;
-	/* The following entries are for the expiry system */
-	unsigned long last_usage;
-	struct list_head exp;
-};
-
-struct autofs_dirhash {
-	struct autofs_dir_ent *h[AUTOFS_HASH_SIZE];
-	struct list_head expiry_head;
-};
-
-struct autofs_wait_queue {
-	wait_queue_head_t queue;
-	struct autofs_wait_queue *next;
-	autofs_wqt_t wait_queue_token;
-	/* We use the following to see what we are waiting for */
-	int hash;
-	int len;
-	char *name;
-	/* This is for status reporting upon return */
-	int status;
-	int wait_ctr;
-};
-
-struct autofs_symlink {
-	char *data;
-	int len;
-	time_t mtime;
-};
-
-#define AUTOFS_MAX_SYMLINKS 256
-
-#define AUTOFS_ROOT_INO      1
-#define AUTOFS_FIRST_SYMLINK 2
-#define AUTOFS_FIRST_DIR_INO (AUTOFS_FIRST_SYMLINK+AUTOFS_MAX_SYMLINKS)
-
-#define AUTOFS_SYMLINK_BITMAP_LEN \
-	((AUTOFS_MAX_SYMLINKS+((sizeof(long)*1)-1))/(sizeof(long)*8))
-
-#define AUTOFS_SBI_MAGIC 0x6d4a556d
-
-struct autofs_sb_info {
-	u32 magic;
-	struct file *pipe;
-	struct pid *oz_pgrp;
-	int catatonic;
-	struct super_block *sb;
-	unsigned long exp_timeout;
-	ino_t next_dir_ino;
-	struct autofs_wait_queue *queues; /* Wait queue pointer */
-	struct autofs_dirhash dirhash; /* Root directory hash */
-	struct autofs_symlink symlink[AUTOFS_MAX_SYMLINKS];
-	unsigned long symlink_bitmap[AUTOFS_SYMLINK_BITMAP_LEN];
-};
-
-static inline struct autofs_sb_info *autofs_sbi(struct super_block *sb)
-{
-	return (struct autofs_sb_info *)(sb->s_fs_info);
-}
-
-/* autofs_oz_mode(): do we see the man behind the curtain?  (The
-   processes which do manipulations for us in user space sees the raw
-   filesystem without "magic".) */
-
-static inline int autofs_oz_mode(struct autofs_sb_info *sbi) {
-	return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
-}
-
-/* Hash operations */
-
-void autofs_initialize_hash(struct autofs_dirhash *);
-struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *,struct qstr *);
-void autofs_hash_insert(struct autofs_dirhash *,struct autofs_dir_ent *);
-void autofs_hash_delete(struct autofs_dir_ent *);
-struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *,off_t *,struct autofs_dir_ent *);
-void autofs_hash_dputall(struct autofs_dirhash *);
-void autofs_hash_nuke(struct autofs_sb_info *);
-
-/* Expiration-handling functions */
-
-void autofs_update_usage(struct autofs_dirhash *,struct autofs_dir_ent *);
-struct autofs_dir_ent *autofs_expire(struct super_block *,struct autofs_sb_info *, struct vfsmount *mnt);
-
-/* Operations structures */
-
-extern const struct inode_operations autofs_root_inode_operations;
-extern const struct inode_operations autofs_symlink_inode_operations;
-extern const struct file_operations autofs_root_operations;
-
-/* Initializing function */
-
-int autofs_fill_super(struct super_block *, void *, int);
-void autofs_kill_sb(struct super_block *sb);
-struct inode *autofs_iget(struct super_block *, unsigned long);
-
-/* Queue management functions */
-
-int autofs_wait(struct autofs_sb_info *,struct qstr *);
-int autofs_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
-void autofs_catatonic_mode(struct autofs_sb_info *);
-
-#ifdef DEBUG
-void autofs_say(const char *name, int len);
-#else
-#define autofs_say(n,l) ((void)0)
-#endif
diff --git a/fs/autofs/dirhash.c b/fs/autofs/dirhash.c
deleted file mode 100644
index e947915109e..00000000000
--- a/fs/autofs/dirhash.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/dirhash.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include "autofs_i.h"
-
-/* Functions for maintenance of expiry queue */
-
-static void autofs_init_usage(struct autofs_dirhash *dh,
-			      struct autofs_dir_ent *ent)
-{
-	list_add_tail(&ent->exp, &dh->expiry_head);
-	ent->last_usage = jiffies;
-}
-
-static void autofs_delete_usage(struct autofs_dir_ent *ent)
-{
-	list_del(&ent->exp);
-}
-
-void autofs_update_usage(struct autofs_dirhash *dh,
-			 struct autofs_dir_ent *ent)
-{
-	autofs_delete_usage(ent);   /* Unlink from current position */
-	autofs_init_usage(dh,ent);  /* Relink at queue tail */
-}
-
-struct autofs_dir_ent *autofs_expire(struct super_block *sb,
-				     struct autofs_sb_info *sbi,
-				     struct vfsmount *mnt)
-{
-	struct autofs_dirhash *dh = &sbi->dirhash;
-	struct autofs_dir_ent *ent;
-	unsigned long timeout = sbi->exp_timeout;
-
-	while (1) {
-		struct path path;
-		int umount_ok;
-
-		if ( list_empty(&dh->expiry_head) || sbi->catatonic )
-			return NULL;	/* No entries */
-		/* We keep the list sorted by last_usage and want old stuff */
-		ent = list_entry(dh->expiry_head.next, struct autofs_dir_ent, exp);
-		if (jiffies - ent->last_usage < timeout)
-			break;
-		/* Move to end of list in case expiry isn't desirable */
-		autofs_update_usage(dh, ent);
-
-		/* Check to see that entry is expirable */
-		if ( ent->ino < AUTOFS_FIRST_DIR_INO )
-			return ent; /* Symlinks are always expirable */
-
-		/* Get the dentry for the autofs subdirectory */
-		path.dentry = ent->dentry;
-
-		if (!path.dentry) {
-			/* Should only happen in catatonic mode */
-			printk("autofs: dentry == NULL but inode range is directory, entry %s\n", ent->name);
-			autofs_delete_usage(ent);
-			continue;
-		}
-
-		if (!path.dentry->d_inode) {
-			dput(path.dentry);
-			printk("autofs: negative dentry on expiry queue: %s\n",
-			       ent->name);
-			autofs_delete_usage(ent);
-			continue;
-		}
-
-		/* Make sure entry is mounted and unused; note that dentry will
-		   point to the mounted-on-top root. */
-		if (!S_ISDIR(path.dentry->d_inode->i_mode) ||
-		    !d_mountpoint(path.dentry)) {
-			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
-			continue;
-		}
-		path.mnt = mnt;
-		path_get(&path);
-		if (!follow_down(&path)) {
-			path_put(&path);
-			DPRINTK(("autofs: not expirable (not a mounted directory): %s\n", ent->name));
-			continue;
-		}
-		while (d_mountpoint(path.dentry) && follow_down(&path))
-			;
-		umount_ok = may_umount(path.mnt);
-		path_put(&path);
-
-		if (umount_ok) {
-			DPRINTK(("autofs: signaling expire on %s\n", ent->name));
-			return ent; /* Expirable! */
-		}
-		DPRINTK(("autofs: didn't expire due to may_umount: %s\n", ent->name));
-	}
-	return NULL;		/* No expirable entries */
-}
-
-void autofs_initialize_hash(struct autofs_dirhash *dh) {
-	memset(&dh->h, 0, AUTOFS_HASH_SIZE*sizeof(struct autofs_dir_ent *));
-	INIT_LIST_HEAD(&dh->expiry_head);
-}
-
-struct autofs_dir_ent *autofs_hash_lookup(const struct autofs_dirhash *dh, struct qstr *name)
-{
-	struct autofs_dir_ent *dhn;
-
-	DPRINTK(("autofs_hash_lookup: hash = 0x%08x, name = ", name->hash));
-	autofs_say(name->name,name->len);
-
-	for ( dhn = dh->h[(unsigned) name->hash % AUTOFS_HASH_SIZE] ; dhn ; dhn = dhn->next ) {
-		if ( name->hash == dhn->hash &&
-		     name->len == dhn->len &&
-		     !memcmp(name->name, dhn->name, name->len) )
-			break;
-	}
-
-	return dhn;
-}
-
-void autofs_hash_insert(struct autofs_dirhash *dh, struct autofs_dir_ent *ent)
-{
-	struct autofs_dir_ent **dhnp;
-
-	DPRINTK(("autofs_hash_insert: hash = 0x%08x, name = ", ent->hash));
-	autofs_say(ent->name,ent->len);
-
-	autofs_init_usage(dh,ent);
-	if (ent->dentry)
-		dget(ent->dentry);
-
-	dhnp = &dh->h[(unsigned) ent->hash % AUTOFS_HASH_SIZE];
-	ent->next = *dhnp;
-	ent->back = dhnp;
-	*dhnp = ent;
-	if ( ent->next )
-		ent->next->back = &(ent->next);
-}
-
-void autofs_hash_delete(struct autofs_dir_ent *ent)
-{
-	*(ent->back) = ent->next;
-	if ( ent->next )
-		ent->next->back = ent->back;
-
-	autofs_delete_usage(ent);
-
-	if ( ent->dentry )
-		dput(ent->dentry);
-	kfree(ent->name);
-	kfree(ent);
-}
-
-/*
- * Used by readdir().  We must validate "ptr", so we can't simply make it
- * a pointer.  Values below 0xffff are reserved; calling with any value
- * <= 0x10000 will return the first entry found.
- *
- * "last" can be NULL or the value returned by the last search *if* we
- * want the next sequential entry.
- */
-struct autofs_dir_ent *autofs_hash_enum(const struct autofs_dirhash *dh,
-					off_t *ptr, struct autofs_dir_ent *last)
-{
-	int bucket, ecount, i;
-	struct autofs_dir_ent *ent;
-
-	bucket = (*ptr >> 16) - 1;
-	ecount = *ptr & 0xffff;
-
-	if ( bucket < 0 ) {
-		bucket = ecount = 0;
-	} 
-
-	DPRINTK(("autofs_hash_enum: bucket %d, entry %d\n", bucket, ecount));
-
-	ent = last ? last->next : NULL;
-
-	if ( ent ) {
-		ecount++;
-	} else {
-		while  ( bucket < AUTOFS_HASH_SIZE ) {
-			ent = dh->h[bucket];
-			for ( i = ecount ; ent && i ; i-- )
-				ent = ent->next;
-			
-			if (ent) {
-				ecount++; /* Point to *next* entry */
-				break;
-			}
-			
-			bucket++; ecount = 0;
-		}
-	}
-
-#ifdef DEBUG
-	if ( !ent )
-		printk("autofs_hash_enum: nothing found\n");
-	else {
-		printk("autofs_hash_enum: found hash %08x, name", ent->hash);
-		autofs_say(ent->name,ent->len);
-	}
-#endif
-
-	*ptr = ((bucket+1) << 16) + ecount;
-	return ent;
-}
-
-/* Iterate over all the ents, and remove all dentry pointers.  Used on
-   entering catatonic mode, in order to make the filesystem unmountable. */
-void autofs_hash_dputall(struct autofs_dirhash *dh)
-{
-	int i;
-	struct autofs_dir_ent *ent;
-
-	for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
-		for ( ent = dh->h[i] ; ent ; ent = ent->next ) {
-			if ( ent->dentry ) {
-				dput(ent->dentry);
-				ent->dentry = NULL;
-			}
-		}
-	}
-}
-
-/* Delete everything.  This is used on filesystem destruction, so we
-   make no attempt to keep the pointers valid */
-void autofs_hash_nuke(struct autofs_sb_info *sbi)
-{
-	int i;
-	struct autofs_dir_ent *ent, *nent;
-
-	for ( i = 0 ; i < AUTOFS_HASH_SIZE ; i++ ) {
-		for ( ent = sbi->dirhash.h[i] ; ent ; ent = nent ) {
-			nent = ent->next;
-			if ( ent->dentry )
-				dput(ent->dentry);
-			kfree(ent->name);
-			kfree(ent);
-		}
-	}
-}
diff --git a/fs/autofs/init.c b/fs/autofs/init.c
deleted file mode 100644
index cea5219b4f3..00000000000
--- a/fs/autofs/init.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/init.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include "autofs_i.h"
-
-static int autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	return get_sb_nodev(fs_type, flags, data, autofs_fill_super, mnt);
-}
-
-static struct file_system_type autofs_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "autofs",
-	.get_sb		= autofs_get_sb,
-	.kill_sb	= autofs_kill_sb,
-};
-
-static int __init init_autofs_fs(void)
-{
-	return register_filesystem(&autofs_fs_type);
-}
-
-static void __exit exit_autofs_fs(void)
-{
-	unregister_filesystem(&autofs_fs_type);
-}
-
-module_init(init_autofs_fs);
-module_exit(exit_autofs_fs);
-
-#ifdef DEBUG
-void autofs_say(const char *name, int len)
-{
-	printk("(%d: ", len);
-	while ( len-- )
-		printk("%c", *name++);
-	printk(")\n");
-}
-#endif
-MODULE_LICENSE("GPL");
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
deleted file mode 100644
index e1734f2d6e2..00000000000
--- a/fs/autofs/inode.c
+++ /dev/null
@@ -1,288 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/inode.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/file.h>
-#include <linux/parser.h>
-#include <linux/bitops.h>
-#include <linux/magic.h>
-#include "autofs_i.h"
-#include <linux/module.h>
-
-void autofs_kill_sb(struct super_block *sb)
-{
-	struct autofs_sb_info *sbi = autofs_sbi(sb);
-	unsigned int n;
-
-	/*
-	 * In the event of a failure in get_sb_nodev the superblock
-	 * info is not present so nothing else has been setup, so
-	 * just call kill_anon_super when we are called from
-	 * deactivate_super.
-	 */
-	if (!sbi)
-		goto out_kill_sb;
-
-	if (!sbi->catatonic)
-		autofs_catatonic_mode(sbi); /* Free wait queues, close pipe */
-
-	put_pid(sbi->oz_pgrp);
-
-	autofs_hash_nuke(sbi);
-	for (n = 0; n < AUTOFS_MAX_SYMLINKS; n++) {
-		if (test_bit(n, sbi->symlink_bitmap))
-			kfree(sbi->symlink[n].data);
-	}
-
-	kfree(sb->s_fs_info);
-
-out_kill_sb:
-	DPRINTK(("autofs: shutting down\n"));
-	kill_anon_super(sb);
-}
-
-static const struct super_operations autofs_sops = {
-	.statfs		= simple_statfs,
-	.show_options	= generic_show_options,
-};
-
-enum {Opt_err, Opt_fd, Opt_uid, Opt_gid, Opt_pgrp, Opt_minproto, Opt_maxproto};
-
-static const match_table_t autofs_tokens = {
-	{Opt_fd, "fd=%u"},
-	{Opt_uid, "uid=%u"},
-	{Opt_gid, "gid=%u"},
-	{Opt_pgrp, "pgrp=%u"},
-	{Opt_minproto, "minproto=%u"},
-	{Opt_maxproto, "maxproto=%u"},
-	{Opt_err, NULL}
-};
-
-static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid,
-		pid_t *pgrp, int *minproto, int *maxproto)
-{
-	char *p;
-	substring_t args[MAX_OPT_ARGS];
-	int option;
-
-	*uid = current_uid();
-	*gid = current_gid();
-	*pgrp = task_pgrp_nr(current);
-
-	*minproto = *maxproto = AUTOFS_PROTO_VERSION;
-
-	*pipefd = -1;
-
-	if (!options)
-		return 1;
-
-	while ((p = strsep(&options, ",")) != NULL) {
-		int token;
-		if (!*p)
-			continue;
-
-		token = match_token(p, autofs_tokens, args);
-		switch (token) {
-		case Opt_fd:
-			if (match_int(&args[0], &option))
-				return 1;
-			*pipefd = option;
-			break;
-		case Opt_uid:
-			if (match_int(&args[0], &option))
-				return 1;
-			*uid = option;
-			break;
-		case Opt_gid:
-			if (match_int(&args[0], &option))
-				return 1;
-			*gid = option;
-			break;
-		case Opt_pgrp:
-			if (match_int(&args[0], &option))
-				return 1;
-			*pgrp = option;
-			break;
-		case Opt_minproto:
-			if (match_int(&args[0], &option))
-				return 1;
-			*minproto = option;
-			break;
-		case Opt_maxproto:
-			if (match_int(&args[0], &option))
-				return 1;
-			*maxproto = option;
-			break;
-		default:
-			return 1;
-		}
-	}
-	return (*pipefd < 0);
-}
-
-int autofs_fill_super(struct super_block *s, void *data, int silent)
-{
-	struct inode * root_inode;
-	struct dentry * root;
-	struct file * pipe;
-	int pipefd;
-	struct autofs_sb_info *sbi;
-	int minproto, maxproto;
-	pid_t pgid;
-
-	save_mount_options(s, data);
-
-	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
-	if (!sbi)
-		goto fail_unlock;
-	DPRINTK(("autofs: starting up, sbi = %p\n",sbi));
-
-	s->s_fs_info = sbi;
-	sbi->magic = AUTOFS_SBI_MAGIC;
-	sbi->pipe = NULL;
-	sbi->catatonic = 1;
-	sbi->exp_timeout = 0;
-	autofs_initialize_hash(&sbi->dirhash);
-	sbi->queues = NULL;
-	memset(sbi->symlink_bitmap, 0, sizeof(long)*AUTOFS_SYMLINK_BITMAP_LEN);
-	sbi->next_dir_ino = AUTOFS_FIRST_DIR_INO;
-	s->s_blocksize = 1024;
-	s->s_blocksize_bits = 10;
-	s->s_magic = AUTOFS_SUPER_MAGIC;
-	s->s_op = &autofs_sops;
-	s->s_time_gran = 1;
-	sbi->sb = s;
-
-	root_inode = autofs_iget(s, AUTOFS_ROOT_INO);
-	if (IS_ERR(root_inode))
-		goto fail_free;
-	root = d_alloc_root(root_inode);
-	pipe = NULL;
-
-	if (!root)
-		goto fail_iput;
-
-	/* Can this call block?  - WTF cares? s is locked. */
-	if (parse_options(data, &pipefd, &root_inode->i_uid,
-				&root_inode->i_gid, &pgid, &minproto,
-				&maxproto)) {
-		printk("autofs: called with bogus options\n");
-		goto fail_dput;
-	}
-
-	/* Couldn't this be tested earlier? */
-	if (minproto > AUTOFS_PROTO_VERSION ||
-	     maxproto < AUTOFS_PROTO_VERSION) {
-		printk("autofs: kernel does not match daemon version\n");
-		goto fail_dput;
-	}
-
-	DPRINTK(("autofs: pipe fd = %d, pgrp = %u\n", pipefd, pgid));
-	sbi->oz_pgrp = find_get_pid(pgid);
-
-	if (!sbi->oz_pgrp) {
-		printk("autofs: could not find process group %d\n", pgid);
-		goto fail_dput;
-	}
-
-	pipe = fget(pipefd);
-	
-	if (!pipe) {
-		printk("autofs: could not open pipe file descriptor\n");
-		goto fail_put_pid;
-	}
-
-	if (!pipe->f_op || !pipe->f_op->write)
-		goto fail_fput;
-	sbi->pipe = pipe;
-	sbi->catatonic = 0;
-
-	/*
-	 * Success! Install the root dentry now to indicate completion.
-	 */
-	s->s_root = root;
-	return 0;
-
-fail_fput:
-	printk("autofs: pipe file descriptor does not contain proper ops\n");
-	fput(pipe);
-fail_put_pid:
-	put_pid(sbi->oz_pgrp);
-fail_dput:
-	dput(root);
-	goto fail_free;
-fail_iput:
-	printk("autofs: get root dentry failed\n");
-	iput(root_inode);
-fail_free:
-	kfree(sbi);
-	s->s_fs_info = NULL;
-fail_unlock:
-	return -EINVAL;
-}
-
-struct inode *autofs_iget(struct super_block *sb, unsigned long ino)
-{
-	unsigned int n;
-	struct autofs_sb_info *sbi = autofs_sbi(sb);
-	struct inode *inode;
-
-	inode = iget_locked(sb, ino);
-	if (!inode)
-		return ERR_PTR(-ENOMEM);
-	if (!(inode->i_state & I_NEW))
-		return inode;
-
-	/* Initialize to the default case (stub directory) */
-
-	inode->i_op = &simple_dir_inode_operations;
-	inode->i_fop = &simple_dir_operations;
-	inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
-	inode->i_nlink = 2;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-
-	if (ino == AUTOFS_ROOT_INO) {
-		inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO | S_IWUSR;
-		inode->i_op = &autofs_root_inode_operations;
-		inode->i_fop = &autofs_root_operations;
-		goto done;
-	} 
-	
-	inode->i_uid = inode->i_sb->s_root->d_inode->i_uid;
-	inode->i_gid = inode->i_sb->s_root->d_inode->i_gid;
-	
-	if (ino >= AUTOFS_FIRST_SYMLINK && ino < AUTOFS_FIRST_DIR_INO) {
-		/* Symlink inode - should be in symlink list */
-		struct autofs_symlink *sl;
-
-		n = ino - AUTOFS_FIRST_SYMLINK;
-		if (n >= AUTOFS_MAX_SYMLINKS || !test_bit(n,sbi->symlink_bitmap)) {
-			printk("autofs: Looking for bad symlink inode %u\n", (unsigned int) ino);
-			goto done;
-		}
-		
-		inode->i_op = &autofs_symlink_inode_operations;
-		sl = &sbi->symlink[n];
-		inode->i_private = sl;
-		inode->i_mode = S_IFLNK | S_IRWXUGO;
-		inode->i_mtime.tv_sec = inode->i_ctime.tv_sec = sl->mtime;
-		inode->i_mtime.tv_nsec = inode->i_ctime.tv_nsec = 0;
-		inode->i_size = sl->len;
-		inode->i_nlink = 1;
-	}
-
-done:
-	unlock_new_inode(inode);
-	return inode;
-}
diff --git a/fs/autofs/root.c b/fs/autofs/root.c
deleted file mode 100644
index 0c4ca81aeae..00000000000
--- a/fs/autofs/root.c
+++ /dev/null
@@ -1,645 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/root.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/stat.h>
-#include <linux/slab.h>
-#include <linux/param.h>
-#include <linux/time.h>
-#include <linux/compat.h>
-#include <linux/smp_lock.h>
-#include "autofs_i.h"
-
-static int autofs_root_readdir(struct file *,void *,filldir_t);
-static struct dentry *autofs_root_lookup(struct inode *,struct dentry *, struct nameidata *);
-static int autofs_root_symlink(struct inode *,struct dentry *,const char *);
-static int autofs_root_unlink(struct inode *,struct dentry *);
-static int autofs_root_rmdir(struct inode *,struct dentry *);
-static int autofs_root_mkdir(struct inode *,struct dentry *,int);
-static long autofs_root_ioctl(struct file *,unsigned int,unsigned long);
-#ifdef CONFIG_COMPAT
-static long autofs_root_compat_ioctl(struct file *,unsigned int,unsigned long);
-#endif
-
-const struct file_operations autofs_root_operations = {
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-	.readdir	= autofs_root_readdir,
-	.unlocked_ioctl	= autofs_root_ioctl,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl	= autofs_root_compat_ioctl,
-#endif
-};
-
-const struct inode_operations autofs_root_inode_operations = {
-        .lookup		= autofs_root_lookup,
-        .unlink		= autofs_root_unlink,
-        .symlink	= autofs_root_symlink,
-        .mkdir		= autofs_root_mkdir,
-        .rmdir		= autofs_root_rmdir,
-};
-
-static int autofs_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct autofs_dir_ent *ent = NULL;
-	struct autofs_dirhash *dirhash;
-	struct autofs_sb_info *sbi;
-	struct inode * inode = filp->f_path.dentry->d_inode;
-	off_t onr, nr;
-
-	lock_kernel();
-
-	sbi = autofs_sbi(inode->i_sb);
-	dirhash = &sbi->dirhash;
-	nr = filp->f_pos;
-
-	switch(nr)
-	{
-	case 0:
-		if (filldir(dirent, ".", 1, nr, inode->i_ino, DT_DIR) < 0)
-			goto out;
-		filp->f_pos = ++nr;
-		/* fall through */
-	case 1:
-		if (filldir(dirent, "..", 2, nr, inode->i_ino, DT_DIR) < 0)
-			goto out;
-		filp->f_pos = ++nr;
-		/* fall through */
-	default:
-		while (onr = nr, ent = autofs_hash_enum(dirhash,&nr,ent)) {
-			if (!ent->dentry || d_mountpoint(ent->dentry)) {
-				if (filldir(dirent,ent->name,ent->len,onr,ent->ino,DT_UNKNOWN) < 0)
-					goto out;
-				filp->f_pos = nr;
-			}
-		}
-		break;
-	}
-
-out:
-	unlock_kernel();
-	return 0;
-}
-
-static int try_to_fill_dentry(struct dentry *dentry, struct super_block *sb, struct autofs_sb_info *sbi)
-{
-	struct inode * inode;
-	struct autofs_dir_ent *ent;
-	int status = 0;
-
-	if (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name))) {
-		do {
-			if (status && dentry->d_inode) {
-				if (status != -ENOENT)
-					printk("autofs warning: lookup failure on positive dentry, status = %d, name = %s\n", status, dentry->d_name.name);
-				return 0; /* Try to get the kernel to invalidate this dentry */
-			}
-
-			/* Turn this into a real negative dentry? */
-			if (status == -ENOENT) {
-				dentry->d_time = jiffies + AUTOFS_NEGATIVE_TIMEOUT;
-				dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
-				return 1;
-			} else if (status) {
-				/* Return a negative dentry, but leave it "pending" */
-				return 1;
-			}
-			status = autofs_wait(sbi, &dentry->d_name);
-		} while (!(ent = autofs_hash_lookup(&sbi->dirhash, &dentry->d_name)));
-	}
-
-	/* Abuse this field as a pointer to the directory entry, used to
-	   find the expire list pointers */
-	dentry->d_time = (unsigned long) ent;
-	
-	if (!dentry->d_inode) {
-		inode = autofs_iget(sb, ent->ino);
-		if (IS_ERR(inode)) {
-			/* Failed, but leave pending for next time */
-			return 1;
-		}
-		dentry->d_inode = inode;
-	}
-
-	/* If this is a directory that isn't a mount point, bitch at the
-	   daemon and fix it in user space */
-	if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
-		return !autofs_wait(sbi, &dentry->d_name);
-	}
-
-	/* We don't update the usages for the autofs daemon itself, this
-	   is necessary for recursive autofs mounts */
-	if (!autofs_oz_mode(sbi)) {
-		autofs_update_usage(&sbi->dirhash,ent);
-	}
-
-	dentry->d_flags &= ~DCACHE_AUTOFS_PENDING;
-	return 1;
-}
-
-
-/*
- * Revalidate is called on every cache lookup.  Some of those
- * cache lookups may actually happen while the dentry is not
- * yet completely filled in, and revalidate has to delay such
- * lookups..
- */
-static int autofs_revalidate(struct dentry * dentry, struct nameidata *nd)
-{
-	struct inode * dir;
-	struct autofs_sb_info *sbi;
-	struct autofs_dir_ent *ent;
-	int res;
-
-	lock_kernel();
-	dir = dentry->d_parent->d_inode;
-	sbi = autofs_sbi(dir->i_sb);
-
-	/* Pending dentry */
-	if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
-		if (autofs_oz_mode(sbi))
-			res = 1;
-		else
-			res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
-		unlock_kernel();
-		return res;
-	}
-
-	/* Negative dentry.. invalidate if "old" */
-	if (!dentry->d_inode) {
-		unlock_kernel();
-		return (dentry->d_time - jiffies <= AUTOFS_NEGATIVE_TIMEOUT);
-	}
-		
-	/* Check for a non-mountpoint directory */
-	if (S_ISDIR(dentry->d_inode->i_mode) && !d_mountpoint(dentry)) {
-		if (autofs_oz_mode(sbi))
-			res = 1;
-		else
-			res = try_to_fill_dentry(dentry, dir->i_sb, sbi);
-		unlock_kernel();
-		return res;
-	}
-
-	/* Update the usage list */
-	if (!autofs_oz_mode(sbi)) {
-		ent = (struct autofs_dir_ent *) dentry->d_time;
-		if (ent)
-			autofs_update_usage(&sbi->dirhash,ent);
-	}
-	unlock_kernel();
-	return 1;
-}
-
-static const struct dentry_operations autofs_dentry_operations = {
-	.d_revalidate	= autofs_revalidate,
-};
-
-static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
-	struct autofs_sb_info *sbi;
-	int oz_mode;
-
-	DPRINTK(("autofs_root_lookup: name = "));
-	lock_kernel();
-	autofs_say(dentry->d_name.name,dentry->d_name.len);
-
-	if (dentry->d_name.len > NAME_MAX) {
-		unlock_kernel();
-		return ERR_PTR(-ENAMETOOLONG);/* File name too long to exist */
-	}
-
-	sbi = autofs_sbi(dir->i_sb);
-
-	oz_mode = autofs_oz_mode(sbi);
-	DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, "
-				"oz_mode = %d\n", task_pid_nr(current),
-				task_pgrp_nr(current), sbi->catatonic,
-				oz_mode));
-
-	/*
-	 * Mark the dentry incomplete, but add it. This is needed so
-	 * that the VFS layer knows about the dentry, and we can count
-	 * on catching any lookups through the revalidate.
-	 *
-	 * Let all the hard work be done by the revalidate function that
-	 * needs to be able to do this anyway..
-	 *
-	 * We need to do this before we release the directory semaphore.
-	 */
-	dentry->d_op = &autofs_dentry_operations;
-	dentry->d_flags |= DCACHE_AUTOFS_PENDING;
-	d_add(dentry, NULL);
-
-	mutex_unlock(&dir->i_mutex);
-	autofs_revalidate(dentry, nd);
-	mutex_lock(&dir->i_mutex);
-
-	/*
-	 * If we are still pending, check if we had to handle
-	 * a signal. If so we can force a restart..
-	 */
-	if (dentry->d_flags & DCACHE_AUTOFS_PENDING) {
-		/* See if we were interrupted */
-		if (signal_pending(current)) {
-			sigset_t *sigset = &current->pending.signal;
-			if (sigismember (sigset, SIGKILL) ||
-			    sigismember (sigset, SIGQUIT) ||
-			    sigismember (sigset, SIGINT)) {
-				unlock_kernel();
-				return ERR_PTR(-ERESTARTNOINTR);
-			}
-		}
-	}
-	unlock_kernel();
-
-	/*
-	 * If this dentry is unhashed, then we shouldn't honour this
-	 * lookup even if the dentry is positive.  Returning ENOENT here
-	 * doesn't do the right thing for all system calls, but it should
-	 * be OK for the operations we permit from an autofs.
-	 */
-	if (dentry->d_inode && d_unhashed(dentry))
-		return ERR_PTR(-ENOENT);
-
-	return NULL;
-}
-
-static int autofs_root_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
-{
-	struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
-	struct autofs_dirhash *dh = &sbi->dirhash;
-	struct autofs_dir_ent *ent;
-	unsigned int n;
-	int slsize;
-	struct autofs_symlink *sl;
-	struct inode *inode;
-
-	DPRINTK(("autofs_root_symlink: %s <- ", symname));
-	autofs_say(dentry->d_name.name,dentry->d_name.len);
-
-	lock_kernel();
-	if (!autofs_oz_mode(sbi)) {
-		unlock_kernel();
-		return -EACCES;
-	}
-
-	if (autofs_hash_lookup(dh, &dentry->d_name)) {
-		unlock_kernel();
-		return -EEXIST;
-	}
-
-	n = find_first_zero_bit(sbi->symlink_bitmap,AUTOFS_MAX_SYMLINKS);
-	if (n >= AUTOFS_MAX_SYMLINKS) {
-		unlock_kernel();
-		return -ENOSPC;
-	}
-
-	set_bit(n,sbi->symlink_bitmap);
-	sl = &sbi->symlink[n];
-	sl->len = strlen(symname);
-	sl->data = kmalloc(slsize = sl->len+1, GFP_KERNEL);
-	if (!sl->data) {
-		clear_bit(n,sbi->symlink_bitmap);
-		unlock_kernel();
-		return -ENOSPC;
-	}
-
-	ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
-	if (!ent) {
-		kfree(sl->data);
-		clear_bit(n,sbi->symlink_bitmap);
-		unlock_kernel();
-		return -ENOSPC;
-	}
-
-	ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
-	if (!ent->name) {
-		kfree(sl->data);
-		kfree(ent);
-		clear_bit(n,sbi->symlink_bitmap);
-		unlock_kernel();
-		return -ENOSPC;
-	}
-
-	memcpy(sl->data,symname,slsize);
-	sl->mtime = get_seconds();
-
-	ent->ino = AUTOFS_FIRST_SYMLINK + n;
-	ent->hash = dentry->d_name.hash;
-	memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
-	ent->dentry = NULL;	/* We don't keep the dentry for symlinks */
-
-	autofs_hash_insert(dh,ent);
-
-	inode = autofs_iget(dir->i_sb, ent->ino);
-	if (IS_ERR(inode))
-		return PTR_ERR(inode);
-
-	d_instantiate(dentry, inode);
-	unlock_kernel();
-	return 0;
-}
-
-/*
- * NOTE!
- *
- * Normal filesystems would do a "d_delete()" to tell the VFS dcache
- * that the file no longer exists. However, doing that means that the
- * VFS layer can turn the dentry into a negative dentry, which we
- * obviously do not want (we're dropping the entry not because it
- * doesn't exist, but because it has timed out).
- *
- * Also see autofs_root_rmdir()..
- */
-static int autofs_root_unlink(struct inode *dir, struct dentry *dentry)
-{
-	struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
-	struct autofs_dirhash *dh = &sbi->dirhash;
-	struct autofs_dir_ent *ent;
-	unsigned int n;
-
-	/* This allows root to remove symlinks */
-	lock_kernel();
-	if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) {
-		unlock_kernel();
-		return -EACCES;
-	}
-
-	ent = autofs_hash_lookup(dh, &dentry->d_name);
-	if (!ent) {
-		unlock_kernel();
-		return -ENOENT;
-	}
-
-	n = ent->ino - AUTOFS_FIRST_SYMLINK;
-	if (n >= AUTOFS_MAX_SYMLINKS) {
-		unlock_kernel();
-		return -EISDIR;	/* It's a directory, dummy */
-	}
-	if (!test_bit(n,sbi->symlink_bitmap)) {
-		unlock_kernel();
-		return -EINVAL;	/* Nonexistent symlink?  Shouldn't happen */
-	}
-	
-	dentry->d_time = (unsigned long)(struct autofs_dirhash *)NULL;
-	autofs_hash_delete(ent);
-	clear_bit(n,sbi->symlink_bitmap);
-	kfree(sbi->symlink[n].data);
-	d_drop(dentry);
-	
-	unlock_kernel();
-	return 0;
-}
-
-static int autofs_root_rmdir(struct inode *dir, struct dentry *dentry)
-{
-	struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
-	struct autofs_dirhash *dh = &sbi->dirhash;
-	struct autofs_dir_ent *ent;
-
-	lock_kernel();
-	if (!autofs_oz_mode(sbi)) {
-		unlock_kernel();
-		return -EACCES;
-	}
-
-	ent = autofs_hash_lookup(dh, &dentry->d_name);
-	if (!ent) {
-		unlock_kernel();
-		return -ENOENT;
-	}
-
-	if ((unsigned int)ent->ino < AUTOFS_FIRST_DIR_INO) {
-		unlock_kernel();
-		return -ENOTDIR; /* Not a directory */
-	}
-
-	if (ent->dentry != dentry) {
-		printk("autofs_rmdir: odentry != dentry for entry %s\n", dentry->d_name.name);
-	}
-
-	dentry->d_time = (unsigned long)(struct autofs_dir_ent *)NULL;
-	autofs_hash_delete(ent);
-	drop_nlink(dir);
-	d_drop(dentry);
-	unlock_kernel();
-
-	return 0;
-}
-
-static int autofs_root_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-	struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
-	struct autofs_dirhash *dh = &sbi->dirhash;
-	struct autofs_dir_ent *ent;
-	struct inode *inode;
-	ino_t ino;
-
-	lock_kernel();
-	if (!autofs_oz_mode(sbi)) {
-		unlock_kernel();
-		return -EACCES;
-	}
-
-	ent = autofs_hash_lookup(dh, &dentry->d_name);
-	if (ent) {
-		unlock_kernel();
-		return -EEXIST;
-	}
-
-	if (sbi->next_dir_ino < AUTOFS_FIRST_DIR_INO) {
-		printk("autofs: Out of inode numbers -- what the heck did you do??\n");
-		unlock_kernel();
-		return -ENOSPC;
-	}
-	ino = sbi->next_dir_ino++;
-
-	ent = kmalloc(sizeof(struct autofs_dir_ent), GFP_KERNEL);
-	if (!ent) {
-		unlock_kernel();
-		return -ENOSPC;
-	}
-
-	ent->name = kmalloc(dentry->d_name.len+1, GFP_KERNEL);
-	if (!ent->name) {
-		kfree(ent);
-		unlock_kernel();
-		return -ENOSPC;
-	}
-
-	ent->hash = dentry->d_name.hash;
-	memcpy(ent->name, dentry->d_name.name, 1+(ent->len = dentry->d_name.len));
-	ent->ino = ino;
-	ent->dentry = dentry;
-	autofs_hash_insert(dh,ent);
-
-	inc_nlink(dir);
-
-	inode = autofs_iget(dir->i_sb, ino);
-	if (IS_ERR(inode)) {
-		drop_nlink(dir);
-		return PTR_ERR(inode);
-	}
-
-	d_instantiate(dentry, inode);
-	unlock_kernel();
-
-	return 0;
-}
-
-/* Get/set timeout ioctl() operation */
-#ifdef CONFIG_COMPAT
-static inline int autofs_compat_get_set_timeout(struct autofs_sb_info *sbi,
-					 unsigned int __user *p)
-{
-	unsigned long ntimeout;
-
-	if (get_user(ntimeout, p) ||
-	    put_user(sbi->exp_timeout / HZ, p))
-		return -EFAULT;
-
-	if (ntimeout > UINT_MAX/HZ)
-		sbi->exp_timeout = 0;
-	else
-		sbi->exp_timeout = ntimeout * HZ;
-
-	return 0;
-}
-#endif
-
-static inline int autofs_get_set_timeout(struct autofs_sb_info *sbi,
-					 unsigned long __user *p)
-{
-	unsigned long ntimeout;
-
-	if (get_user(ntimeout, p) ||
-	    put_user(sbi->exp_timeout / HZ, p))
-		return -EFAULT;
-
-	if (ntimeout > ULONG_MAX/HZ)
-		sbi->exp_timeout = 0;
-	else
-		sbi->exp_timeout = ntimeout * HZ;
-
-	return 0;
-}
-
-/* Return protocol version */
-static inline int autofs_get_protover(int __user *p)
-{
-	return put_user(AUTOFS_PROTO_VERSION, p);
-}
-
-/* Perform an expiry operation */
-static inline int autofs_expire_run(struct super_block *sb,
-				    struct autofs_sb_info *sbi,
-				    struct vfsmount *mnt,
-				    struct autofs_packet_expire __user *pkt_p)
-{
-	struct autofs_dir_ent *ent;
-	struct autofs_packet_expire pkt;
-
-	memset(&pkt,0,sizeof pkt);
-
-	pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
-	pkt.hdr.type = autofs_ptype_expire;
-
-	if (!sbi->exp_timeout || !(ent = autofs_expire(sb,sbi,mnt)))
-		return -EAGAIN;
-
-	pkt.len = ent->len;
-	memcpy(pkt.name, ent->name, pkt.len);
-	pkt.name[pkt.len] = '\0';
-
-	if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
-		return -EFAULT;
-
-	return 0;
-}
-
-/*
- * ioctl()'s on the root directory is the chief method for the daemon to
- * generate kernel reactions
- */
-static int autofs_do_root_ioctl(struct inode *inode, struct file *filp,
-			     unsigned int cmd, unsigned long arg)
-{
-	struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb);
-	void __user *argp = (void __user *)arg;
-
-	DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current)));
-
-	if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
-	     _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
-		return -ENOTTY;
-	
-	if (!autofs_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
-	
-	switch(cmd) {
-	case AUTOFS_IOC_READY:	/* Wait queue: go ahead and retry */
-		return autofs_wait_release(sbi,(autofs_wqt_t)arg,0);
-	case AUTOFS_IOC_FAIL:	/* Wait queue: fail with ENOENT */
-		return autofs_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT);
-	case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
-		autofs_catatonic_mode(sbi);
-		return 0;
-	case AUTOFS_IOC_PROTOVER: /* Get protocol version */
-		return autofs_get_protover(argp);
-#ifdef CONFIG_COMPAT
-	case AUTOFS_IOC_SETTIMEOUT32:
-		return autofs_compat_get_set_timeout(sbi, argp);
-#endif
-	case AUTOFS_IOC_SETTIMEOUT:
-		return autofs_get_set_timeout(sbi, argp);
-	case AUTOFS_IOC_EXPIRE:
-		return autofs_expire_run(inode->i_sb, sbi, filp->f_path.mnt,
-					 argp);
-	default:
-		return -ENOSYS;
-	}
-
-}
-
-static long autofs_root_ioctl(struct file *filp,
-			     unsigned int cmd, unsigned long arg)
-{
-	int ret;
-
-	lock_kernel();
-	ret = autofs_do_root_ioctl(filp->f_path.dentry->d_inode,
-				   filp, cmd, arg);
-	unlock_kernel();
-
-	return ret;
-}
-
-#ifdef CONFIG_COMPAT
-static long autofs_root_compat_ioctl(struct file *filp,
-			     unsigned int cmd, unsigned long arg)
-{
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	int ret;
-
-	lock_kernel();
-	if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
-		ret = autofs_do_root_ioctl(inode, filp, cmd, arg);
-	else
-		ret = autofs_do_root_ioctl(inode, filp, cmd,
-			(unsigned long)compat_ptr(arg));
-	unlock_kernel();
-
-	return ret;
-}
-#endif
diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c
deleted file mode 100644
index 7ce9cb2c9ce..00000000000
--- a/fs/autofs/symlink.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/symlink.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include "autofs_i.h"
-
-/* Nothing to release.. */
-static void *autofs_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	char *s=((struct autofs_symlink *)dentry->d_inode->i_private)->data;
-	nd_set_link(nd, s);
-	return NULL;
-}
-
-const struct inode_operations autofs_symlink_inode_operations = {
-	.readlink	= generic_readlink,
-	.follow_link	= autofs_follow_link
-};
diff --git a/fs/autofs/waitq.c b/fs/autofs/waitq.c
deleted file mode 100644
index be46805972f..00000000000
--- a/fs/autofs/waitq.c
+++ /dev/null
@@ -1,205 +0,0 @@
-/* -*- linux-c -*- --------------------------------------------------------- *
- *
- * linux/fs/autofs/waitq.c
- *
- *  Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
-
-#include <linux/slab.h>
-#include <linux/time.h>
-#include <linux/signal.h>
-#include <linux/file.h>
-#include "autofs_i.h"
-
-/* We make this a static variable rather than a part of the superblock; it
-   is better if we don't reassign numbers easily even across filesystems */
-static autofs_wqt_t autofs_next_wait_queue = 1;
-
-/* These are the signals we allow interrupting a pending mount */
-#define SHUTDOWN_SIGS	(sigmask(SIGKILL) | sigmask(SIGINT) | sigmask(SIGQUIT))
-
-void autofs_catatonic_mode(struct autofs_sb_info *sbi)
-{
-	struct autofs_wait_queue *wq, *nwq;
-
-	DPRINTK(("autofs: entering catatonic mode\n"));
-
-	sbi->catatonic = 1;
-	wq = sbi->queues;
-	sbi->queues = NULL;	/* Erase all wait queues */
-	while ( wq ) {
-		nwq = wq->next;
-		wq->status = -ENOENT; /* Magic is gone - report failure */
-		kfree(wq->name);
-		wq->name = NULL;
-		wake_up(&wq->queue);
-		wq = nwq;
-	}
-	fput(sbi->pipe);	/* Close the pipe */
-	sbi->pipe = NULL;
-	autofs_hash_dputall(&sbi->dirhash); /* Remove all dentry pointers */
-}
-
-static int autofs_write(struct file *file, const void *addr, int bytes)
-{
-	unsigned long sigpipe, flags;
-	mm_segment_t fs;
-	const char *data = (const char *)addr;
-	ssize_t wr = 0;
-
-	/** WARNING: this is not safe for writing more than PIPE_BUF bytes! **/
-
-	sigpipe = sigismember(&current->pending.signal, SIGPIPE);
-
-	/* Save pointer to user space and point back to kernel space */
-	fs = get_fs();
-	set_fs(KERNEL_DS);
-
-	while (bytes &&
-	       (wr = file->f_op->write(file,data,bytes,&file->f_pos)) > 0) {
-		data += wr;
-		bytes -= wr;
-	}
-
-	set_fs(fs);
-
-	/* Keep the currently executing process from receiving a
-	   SIGPIPE unless it was already supposed to get one */
-	if (wr == -EPIPE && !sigpipe) {
-		spin_lock_irqsave(&current->sighand->siglock, flags);
-		sigdelset(&current->pending.signal, SIGPIPE);
-		recalc_sigpending();
-		spin_unlock_irqrestore(&current->sighand->siglock, flags);
-	}
-
-	return (bytes > 0);
-}
-	
-static void autofs_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq)
-{
-	struct autofs_packet_missing pkt;
-
-	DPRINTK(("autofs_wait: wait id = 0x%08lx, name = ", wq->wait_queue_token));
-	autofs_say(wq->name,wq->len);
-
-	memset(&pkt,0,sizeof pkt); /* For security reasons */
-
-	pkt.hdr.proto_version = AUTOFS_PROTO_VERSION;
-	pkt.hdr.type = autofs_ptype_missing;
-	pkt.wait_queue_token = wq->wait_queue_token;
-	pkt.len = wq->len;
-        memcpy(pkt.name, wq->name, pkt.len);
-	pkt.name[pkt.len] = '\0';
-
-	if ( autofs_write(sbi->pipe,&pkt,sizeof(struct autofs_packet_missing)) )
-		autofs_catatonic_mode(sbi);
-}
-
-int autofs_wait(struct autofs_sb_info *sbi, struct qstr *name)
-{
-	struct autofs_wait_queue *wq;
-	int status;
-
-	/* In catatonic mode, we don't wait for nobody */
-	if ( sbi->catatonic )
-		return -ENOENT;
-	
-	/* We shouldn't be able to get here, but just in case */
-	if ( name->len > NAME_MAX )
-		return -ENOENT;
-
-	for ( wq = sbi->queues ; wq ; wq = wq->next ) {
-		if ( wq->hash == name->hash &&
-		     wq->len == name->len &&
-		     wq->name && !memcmp(wq->name,name->name,name->len) )
-			break;
-	}
-	
-	if ( !wq ) {
-		/* Create a new wait queue */
-		wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
-		if ( !wq )
-			return -ENOMEM;
-
-		wq->name = kmalloc(name->len,GFP_KERNEL);
-		if ( !wq->name ) {
-			kfree(wq);
-			return -ENOMEM;
-		}
-		wq->wait_queue_token = autofs_next_wait_queue++;
-		init_waitqueue_head(&wq->queue);
-		wq->hash = name->hash;
-		wq->len = name->len;
-		wq->status = -EINTR; /* Status return if interrupted */
-		memcpy(wq->name, name->name, name->len);
-		wq->next = sbi->queues;
-		sbi->queues = wq;
-
-		/* autofs_notify_daemon() may block */
-		wq->wait_ctr = 2;
-		autofs_notify_daemon(sbi,wq);
-	} else
-		wq->wait_ctr++;
-
-	/* wq->name is NULL if and only if the lock is already released */
-
-	if ( sbi->catatonic ) {
-		/* We might have slept, so check again for catatonic mode */
-		wq->status = -ENOENT;
-		kfree(wq->name);
-		wq->name = NULL;
-	}
-
-	if ( wq->name ) {
-		/* Block all but "shutdown" signals while waiting */
-		sigset_t sigmask;
-
-		siginitsetinv(&sigmask, SHUTDOWN_SIGS);
-		sigprocmask(SIG_BLOCK, &sigmask, &sigmask);
-
-		interruptible_sleep_on(&wq->queue);
-
-		sigprocmask(SIG_SETMASK, &sigmask, NULL);
-	} else {
-		DPRINTK(("autofs_wait: skipped sleeping\n"));
-	}
-
-	status = wq->status;
-
-	if ( ! --wq->wait_ctr )	/* Are we the last process to need status? */
-		kfree(wq);
-
-	return status;
-}
-
-
-int autofs_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_token, int status)
-{
-	struct autofs_wait_queue *wq, **wql;
-
-	for (wql = &sbi->queues; (wq = *wql) != NULL; wql = &wq->next) {
-		if ( wq->wait_queue_token == wait_queue_token )
-			break;
-	}
-	if ( !wq )
-		return -EINVAL;
-
-	*wql = wq->next;	/* Unlink from chain */
-	kfree(wq->name);
-	wq->name = NULL;	/* Do not wait on this queue */
-
-	wq->status = status;
-
-	if ( ! --wq->wait_ctr )	/* Is anyone still waiting for this guy? */
-		kfree(wq);
-	else
-		wake_up(&wq->queue);
-
-	return 0;
-}
-
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index 9722e4bd895..c038727b405 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -14,16 +14,16 @@
 #include <linux/init.h>
 #include "autofs_i.h"
 
-static int autofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *autofs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, autofs4_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, autofs4_fill_super);
 }
 
 static struct file_system_type autofs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "autofs",
-	.get_sb		= autofs_get_sb,
+	.mount		= autofs_mount,
 	.kill_sb	= autofs4_kill_sb,
 };
 
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index 821b2b955da..ac87e49fa70 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb,
 		inode->i_gid = sb->s_root->d_inode->i_gid;
 	}
 	inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	inode->i_ino = get_next_ino();
 
 	if (S_ISDIR(inf->mode)) {
 		inode->i_nlink = 2;
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d5c1401f003..d34896cfb19 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -980,19 +980,11 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
 	}
 }
 
-static DEFINE_MUTEX(autofs4_ioctl_mutex);
-
 static long autofs4_root_ioctl(struct file *filp,
 			       unsigned int cmd, unsigned long arg)
 {
-	long ret;
 	struct inode *inode = filp->f_dentry->d_inode;
-
-	mutex_lock(&autofs4_ioctl_mutex);
-	ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
-	mutex_unlock(&autofs4_ioctl_mutex);
-
-	return ret;
+	return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
 }
 
 #ifdef CONFIG_COMPAT
@@ -1002,13 +994,11 @@ static long autofs4_root_compat_ioctl(struct file *filp,
 	struct inode *inode = filp->f_path.dentry->d_inode;
 	int ret;
 
-	mutex_lock(&autofs4_ioctl_mutex);
 	if (cmd == AUTOFS_IOC_READY || cmd == AUTOFS_IOC_FAIL)
 		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
 	else
 		ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
 			(unsigned long)compat_ptr(arg));
-	mutex_unlock(&autofs4_ioctl_mutex);
 
 	return ret;
 }
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index dc39d282488..aa4e7c7ae3c 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -913,18 +913,17 @@ befs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	return 0;
 }
 
-static int
-befs_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name,
-	    void *data, struct vfsmount *mnt)
+static struct dentry *
+befs_mount(struct file_system_type *fs_type, int flags, const char *dev_name,
+	    void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, befs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, befs_fill_super);
 }
 
 static struct file_system_type befs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "befs",
-	.get_sb		= befs_get_sb,
+	.mount		= befs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,	
 };
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c
index d967e052b77..685ecff3ab3 100644
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
 	inc_nlink(inode);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(new, inode);
 	mutex_unlock(&info->bfs_lock);
 	return 0;
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 883e77acd5a..76db6d7d49b 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -450,16 +450,16 @@ out:
 	return ret;
 }
 
-static int bfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, bfs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, bfs_fill_super);
 }
 
 static struct file_system_type bfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "bfs",
-	.get_sb		= bfs_get_sb,
+	.mount		= bfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 139fc8083f5..1befe2ec818 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
 	struct inode * inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime =
 			current_fs_time(inode->i_sb);
@@ -705,10 +706,10 @@ static int bm_fill_super(struct super_block * sb, void * data, int silent)
 	return err;
 }
 
-static int bm_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bm_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, bm_fill_super, mnt);
+	return mount_single(fs_type, flags, data, bm_fill_super);
 }
 
 static struct linux_binfmt misc_format = {
@@ -719,7 +720,7 @@ static struct linux_binfmt misc_format = {
 static struct file_system_type bm_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "binfmt_misc",
-	.get_sb		= bm_get_sb,
+	.mount		= bm_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/bio.c b/fs/bio.c
index 8abb2dfb2e7..4bd454fa844 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -370,6 +370,9 @@ struct bio *bio_kmalloc(gfp_t gfp_mask, int nr_iovecs)
 {
 	struct bio *bio;
 
+	if (nr_iovecs > UIO_MAXIOV)
+		return NULL;
+
 	bio = kmalloc(sizeof(struct bio) + nr_iovecs * sizeof(struct bio_vec),
 		      gfp_mask);
 	if (unlikely(!bio))
@@ -697,8 +700,12 @@ static void bio_free_map_data(struct bio_map_data *bmd)
 static struct bio_map_data *bio_alloc_map_data(int nr_segs, int iov_count,
 					       gfp_t gfp_mask)
 {
-	struct bio_map_data *bmd = kmalloc(sizeof(*bmd), gfp_mask);
+	struct bio_map_data *bmd;
 
+	if (iov_count > UIO_MAXIOV)
+		return NULL;
+
+	bmd = kmalloc(sizeof(*bmd), gfp_mask);
 	if (!bmd)
 		return NULL;
 
@@ -827,6 +834,12 @@ struct bio *bio_copy_user_iov(struct request_queue *q,
 		end = (uaddr + iov[i].iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		start = uaddr >> PAGE_SHIFT;
 
+		/*
+		 * Overflow, abort
+		 */
+		if (end < start)
+			return ERR_PTR(-EINVAL);
+
 		nr_pages += end - start;
 		len += iov[i].iov_len;
 	}
@@ -955,6 +968,12 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
 		unsigned long end = (uaddr + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
 		unsigned long start = uaddr >> PAGE_SHIFT;
 
+		/*
+		 * Overflow, abort
+		 */
+		if (end < start)
+			return ERR_PTR(-EINVAL);
+
 		nr_pages += end - start;
 		/*
 		 * buffer must be aligned to at least hardsector size for now
@@ -982,7 +1001,7 @@ static struct bio *__bio_map_user_iov(struct request_queue *q,
 		unsigned long start = uaddr >> PAGE_SHIFT;
 		const int local_nr_pages = end - start;
 		const int page_limit = cur_page + local_nr_pages;
-		
+
 		ret = get_user_pages_fast(uaddr, local_nr_pages,
 				write_to_vm, &pages[cur_page]);
 		if (ret < local_nr_pages) {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index b737451e2e9..4230252fd68 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -11,7 +11,6 @@
 #include <linux/slab.h>
 #include <linux/kmod.h>
 #include <linux/major.h>
-#include <linux/smp_lock.h>
 #include <linux/device_cgroup.h>
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
@@ -48,6 +47,21 @@ inline struct block_device *I_BDEV(struct inode *inode)
 
 EXPORT_SYMBOL(I_BDEV);
 
+/*
+ * move the inode from it's current bdi to the a new bdi. if the inode is dirty
+ * we need to move it onto the dirty list of @dst so that the inode is always
+ * on the right list.
+ */
+static void bdev_inode_switch_bdi(struct inode *inode,
+			struct backing_dev_info *dst)
+{
+	spin_lock(&inode_lock);
+	inode->i_data.backing_dev_info = dst;
+	if (inode->i_state & I_DIRTY)
+		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+	spin_unlock(&inode_lock);
+}
+
 static sector_t max_block(struct block_device *bdev)
 {
 	sector_t retval = ~((sector_t)0);
@@ -449,15 +463,15 @@ static const struct super_operations bdev_sops = {
 	.evict_inode = bdev_evict_inode,
 };
 
-static int bd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *bd_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576, mnt);
+	return mount_pseudo(fs_type, "bdev:", &bdev_sops, 0x62646576);
 }
 
 static struct file_system_type bd_type = {
 	.name		= "bdev",
-	.get_sb		= bd_get_sb,
+	.mount		= bd_mount,
 	.kill_sb	= kill_anon_super,
 };
 
@@ -550,7 +564,7 @@ EXPORT_SYMBOL(bdget);
  */
 struct block_device *bdgrab(struct block_device *bdev)
 {
-	atomic_inc(&bdev->bd_inode->i_count);
+	ihold(bdev->bd_inode);
 	return bdev;
 }
 
@@ -580,7 +594,7 @@ static struct block_device *bd_acquire(struct inode *inode)
 	spin_lock(&bdev_lock);
 	bdev = inode->i_bdev;
 	if (bdev) {
-		atomic_inc(&bdev->bd_inode->i_count);
+		ihold(bdev->bd_inode);
 		spin_unlock(&bdev_lock);
 		return bdev;
 	}
@@ -591,12 +605,12 @@ static struct block_device *bd_acquire(struct inode *inode)
 		spin_lock(&bdev_lock);
 		if (!inode->i_bdev) {
 			/*
-			 * We take an additional bd_inode->i_count for inode,
+			 * We take an additional reference to bd_inode,
 			 * and it's released in clear_inode() of inode.
 			 * So, we can access it via ->i_mapping always
 			 * without igrab().
 			 */
-			atomic_inc(&bdev->bd_inode->i_count);
+			ihold(bdev->bd_inode);
 			inode->i_bdev = bdev;
 			inode->i_mapping = bdev->bd_inode->i_mapping;
 			list_add(&inode->i_devices, &bdev->bd_inodes);
@@ -1390,7 +1404,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 				bdi = blk_get_backing_dev_info(bdev);
 				if (bdi == NULL)
 					bdi = &default_backing_dev_info;
-				bdev->bd_inode->i_data.backing_dev_info = bdi;
+				bdev_inode_switch_bdi(bdev->bd_inode, bdi);
 			}
 			if (bdev->bd_invalidated)
 				rescan_partitions(disk, bdev);
@@ -1405,8 +1419,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (ret)
 				goto out_clear;
 			bdev->bd_contains = whole;
-			bdev->bd_inode->i_data.backing_dev_info =
-			   whole->bd_inode->i_data.backing_dev_info;
+			bdev_inode_switch_bdi(bdev->bd_inode,
+				whole->bd_inode->i_data.backing_dev_info);
 			bdev->bd_part = disk_get_part(disk, partno);
 			if (!(disk->flags & GENHD_FL_UP) ||
 			    !bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1439,7 +1453,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 	disk_put_part(bdev->bd_part);
 	bdev->bd_disk = NULL;
 	bdev->bd_part = NULL;
-	bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+	bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
 	if (bdev != bdev->bd_contains)
 		__blkdev_put(bdev->bd_contains, mode, 1);
 	bdev->bd_contains = NULL;
@@ -1533,7 +1547,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 		disk_put_part(bdev->bd_part);
 		bdev->bd_part = NULL;
 		bdev->bd_disk = NULL;
-		bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+		bdev_inode_switch_bdi(bdev->bd_inode,
+					&default_backing_dev_info);
 		if (bdev != bdev->bd_contains)
 			victim = bdev->bd_contains;
 		bdev->bd_contains = NULL;
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 396039b3a8a..b50bc4bd5c5 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -91,23 +91,10 @@ static inline int compressed_bio_size(struct btrfs_root *root,
 static struct bio *compressed_bio_alloc(struct block_device *bdev,
 					u64 first_byte, gfp_t gfp_flags)
 {
-	struct bio *bio;
 	int nr_vecs;
 
 	nr_vecs = bio_get_nr_vecs(bdev);
-	bio = bio_alloc(gfp_flags, nr_vecs);
-
-	if (bio == NULL && (current->flags & PF_MEMALLOC)) {
-		while (!bio && (nr_vecs /= 2))
-			bio = bio_alloc(gfp_flags, nr_vecs);
-	}
-
-	if (bio) {
-		bio->bi_size = 0;
-		bio->bi_bdev = bdev;
-		bio->bi_sector = first_byte >> 9;
-	}
-	return bio;
+	return btrfs_bio_alloc(bdev, first_byte >> 9, nr_vecs, gfp_flags);
 }
 
 static int check_compressed_csum(struct inode *inode,
@@ -163,7 +150,6 @@ fail:
  */
 static void end_compressed_bio_read(struct bio *bio, int err)
 {
-	struct extent_io_tree *tree;
 	struct compressed_bio *cb = bio->bi_private;
 	struct inode *inode;
 	struct page *page;
@@ -187,7 +173,6 @@ static void end_compressed_bio_read(struct bio *bio, int err)
 	/* ok, we're the last bio for this extent, lets start
 	 * the decompression.
 	 */
-	tree = &BTRFS_I(inode)->io_tree;
 	ret = btrfs_zlib_decompress_biovec(cb->compressed_pages,
 					cb->start,
 					cb->orig_bio->bi_io_vec,
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index c3df14ce2cc..9ac17159925 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -200,7 +200,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 		      struct extent_buffer **cow_ret, u64 new_root_objectid)
 {
 	struct extent_buffer *cow;
-	u32 nritems;
 	int ret = 0;
 	int level;
 	struct btrfs_disk_key disk_key;
@@ -210,7 +209,6 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
 	WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
 	level = btrfs_header_level(buf);
-	nritems = btrfs_header_nritems(buf);
 	if (level == 0)
 		btrfs_item_key(buf, &disk_key, 0);
 	else
@@ -1008,7 +1006,6 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	int wret;
 	int pslot;
 	int orig_slot = path->slots[level];
-	int err_on_enospc = 0;
 	u64 orig_ptr;
 
 	if (level == 0)
@@ -1071,8 +1068,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 	    BTRFS_NODEPTRS_PER_BLOCK(root) / 4)
 		return 0;
 
-	if (btrfs_header_nritems(mid) < 2)
-		err_on_enospc = 1;
+	btrfs_header_nritems(mid);
 
 	left = read_node_slot(root, parent, pslot - 1);
 	if (left) {
@@ -1103,8 +1099,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
 		wret = push_node_left(trans, root, left, mid, 1);
 		if (wret < 0)
 			ret = wret;
-		if (btrfs_header_nritems(mid) < 2)
-			err_on_enospc = 1;
+		btrfs_header_nritems(mid);
 	}
 
 	/*
@@ -1224,14 +1219,12 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans,
 	int wret;
 	int pslot;
 	int orig_slot = path->slots[level];
-	u64 orig_ptr;
 
 	if (level == 0)
 		return 1;
 
 	mid = path->nodes[level];
 	WARN_ON(btrfs_header_generation(mid) != trans->transid);
-	orig_ptr = btrfs_node_blockptr(mid, orig_slot);
 
 	if (level < BTRFS_MAX_LEVEL - 1)
 		parent = path->nodes[level + 1];
@@ -1577,13 +1570,33 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 	blocksize = btrfs_level_size(root, level - 1);
 
 	tmp = btrfs_find_tree_block(root, blocknr, blocksize);
-	if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
-		/*
-		 * we found an up to date block without sleeping, return
-		 * right away
-		 */
-		*eb_ret = tmp;
-		return 0;
+	if (tmp) {
+		if (btrfs_buffer_uptodate(tmp, 0)) {
+			if (btrfs_buffer_uptodate(tmp, gen)) {
+				/*
+				 * we found an up to date block without
+				 * sleeping, return
+				 * right away
+				 */
+				*eb_ret = tmp;
+				return 0;
+			}
+			/* the pages were up to date, but we failed
+			 * the generation number check.  Do a full
+			 * read for the generation number that is correct.
+			 * We must do this without dropping locks so
+			 * we can trust our generation number
+			 */
+			free_extent_buffer(tmp);
+			tmp = read_tree_block(root, blocknr, blocksize, gen);
+			if (tmp && btrfs_buffer_uptodate(tmp, gen)) {
+				*eb_ret = tmp;
+				return 0;
+			}
+			free_extent_buffer(tmp);
+			btrfs_release_path(NULL, p);
+			return -EIO;
+		}
 	}
 
 	/*
@@ -1596,8 +1609,7 @@ read_block_for_search(struct btrfs_trans_handle *trans,
 	btrfs_unlock_up_safe(p, level + 1);
 	btrfs_set_path_blocking(p);
 
-	if (tmp)
-		free_extent_buffer(tmp);
+	free_extent_buffer(tmp);
 	if (p->reada)
 		reada_for_search(root, p, level, slot, key->objectid);
 
@@ -2548,7 +2560,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 {
 	struct btrfs_disk_key disk_key;
 	struct extent_buffer *right = path->nodes[0];
-	int slot;
 	int i;
 	int push_space = 0;
 	int push_items = 0;
@@ -2560,8 +2571,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans,
 	u32 this_item_size;
 	u32 old_left_item_size;
 
-	slot = path->slots[1];
-
 	if (empty)
 		nr = min(right_nritems, max_slot);
 	else
@@ -3330,7 +3339,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	int slot;
-	int slot_orig;
 	struct extent_buffer *leaf;
 	struct btrfs_item *item;
 	u32 nritems;
@@ -3340,7 +3348,6 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans,
 	unsigned int size_diff;
 	int i;
 
-	slot_orig = path->slots[0];
 	leaf = path->nodes[0];
 	slot = path->slots[0];
 
@@ -3445,7 +3452,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	int slot;
-	int slot_orig;
 	struct extent_buffer *leaf;
 	struct btrfs_item *item;
 	u32 nritems;
@@ -3454,7 +3460,6 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans,
 	unsigned int old_size;
 	int i;
 
-	slot_orig = path->slots[0];
 	leaf = path->nodes[0];
 
 	nritems = btrfs_header_nritems(leaf);
@@ -3787,7 +3792,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 			    struct btrfs_key *cpu_key, u32 *data_size,
 			    int nr)
 {
-	struct extent_buffer *leaf;
 	int ret = 0;
 	int slot;
 	int i;
@@ -3804,7 +3808,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans,
 	if (ret < 0)
 		goto out;
 
-	leaf = path->nodes[0];
 	slot = path->slots[0];
 	BUG_ON(slot < 0);
 
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index eaf286abad1..af52f6d7a4d 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -99,6 +99,9 @@ struct btrfs_ordered_sum;
  */
 #define BTRFS_EXTENT_CSUM_OBJECTID -10ULL
 
+/* For storing free space cache */
+#define BTRFS_FREE_SPACE_OBJECTID -11ULL
+
 /* dummy objectid represents multiple objectids */
 #define BTRFS_MULTIPLE_OBJECTIDS -255ULL
 
@@ -265,6 +268,22 @@ struct btrfs_chunk {
 	/* additional stripes go here */
 } __attribute__ ((__packed__));
 
+#define BTRFS_FREE_SPACE_EXTENT	1
+#define BTRFS_FREE_SPACE_BITMAP	2
+
+struct btrfs_free_space_entry {
+	__le64 offset;
+	__le64 bytes;
+	u8 type;
+} __attribute__ ((__packed__));
+
+struct btrfs_free_space_header {
+	struct btrfs_disk_key location;
+	__le64 generation;
+	__le64 num_entries;
+	__le64 num_bitmaps;
+} __attribute__ ((__packed__));
+
 static inline unsigned long btrfs_chunk_item_size(int num_stripes)
 {
 	BUG_ON(num_stripes == 0);
@@ -365,8 +384,10 @@ struct btrfs_super_block {
 
 	char label[BTRFS_LABEL_SIZE];
 
+	__le64 cache_generation;
+
 	/* future expansion */
-	__le64 reserved[32];
+	__le64 reserved[31];
 	u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
 } __attribute__ ((__packed__));
 
@@ -375,13 +396,15 @@ struct btrfs_super_block {
  * ones specified below then we will fail to mount
  */
 #define BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF	(1ULL << 0)
-#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(2ULL << 0)
+#define BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL	(1ULL << 1)
+#define BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS	(1ULL << 2)
 
 #define BTRFS_FEATURE_COMPAT_SUPP		0ULL
 #define BTRFS_FEATURE_COMPAT_RO_SUPP		0ULL
-#define BTRFS_FEATURE_INCOMPAT_SUPP		\
-	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |	\
-	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL)
+#define BTRFS_FEATURE_INCOMPAT_SUPP			\
+	(BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF |		\
+	 BTRFS_FEATURE_INCOMPAT_DEFAULT_SUBVOL |	\
+	 BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
 
 /*
  * A leaf is full of items. offset and size tell us where to find
@@ -675,7 +698,8 @@ struct btrfs_block_group_item {
 struct btrfs_space_info {
 	u64 flags;
 
-	u64 total_bytes;	/* total bytes in the space */
+	u64 total_bytes;	/* total bytes in the space,
+				   this doesn't take mirrors into account */
 	u64 bytes_used;		/* total bytes used,
 				   this does't take mirrors into account */
 	u64 bytes_pinned;	/* total bytes pinned, will be freed when the
@@ -687,6 +711,8 @@ struct btrfs_space_info {
 	u64 bytes_may_use;	/* number of bytes that may be used for
 				   delalloc/allocations */
 	u64 disk_used;		/* total bytes used on disk */
+	u64 disk_total;		/* total bytes on disk, takes mirrors into
+				   account */
 
 	int full;		/* indicates that we cannot allocate any more
 				   chunks for this space */
@@ -750,6 +776,14 @@ enum btrfs_caching_type {
 	BTRFS_CACHE_FINISHED	= 2,
 };
 
+enum btrfs_disk_cache_state {
+	BTRFS_DC_WRITTEN	= 0,
+	BTRFS_DC_ERROR		= 1,
+	BTRFS_DC_CLEAR		= 2,
+	BTRFS_DC_SETUP		= 3,
+	BTRFS_DC_NEED_WRITE	= 4,
+};
+
 struct btrfs_caching_control {
 	struct list_head list;
 	struct mutex mutex;
@@ -763,6 +797,7 @@ struct btrfs_block_group_cache {
 	struct btrfs_key key;
 	struct btrfs_block_group_item item;
 	struct btrfs_fs_info *fs_info;
+	struct inode *inode;
 	spinlock_t lock;
 	u64 pinned;
 	u64 reserved;
@@ -773,8 +808,11 @@ struct btrfs_block_group_cache {
 	int extents_thresh;
 	int free_extents;
 	int total_bitmaps;
-	int ro;
-	int dirty;
+	unsigned int ro:1;
+	unsigned int dirty:1;
+	unsigned int iref:1;
+
+	int disk_cache_state;
 
 	/* cache tracking stuff */
 	int cached;
@@ -863,6 +901,7 @@ struct btrfs_fs_info {
 	struct btrfs_transaction *running_transaction;
 	wait_queue_head_t transaction_throttle;
 	wait_queue_head_t transaction_wait;
+	wait_queue_head_t transaction_blocked_wait;
 	wait_queue_head_t async_submit_wait;
 
 	struct btrfs_super_block super_copy;
@@ -949,6 +988,7 @@ struct btrfs_fs_info {
 	struct btrfs_workers endio_meta_workers;
 	struct btrfs_workers endio_meta_write_workers;
 	struct btrfs_workers endio_write_workers;
+	struct btrfs_workers endio_freespace_worker;
 	struct btrfs_workers submit_workers;
 	/*
 	 * fixup workers take dirty pages that didn't properly go through
@@ -1192,6 +1232,9 @@ struct btrfs_root {
 #define BTRFS_MOUNT_NOSSD		(1 << 9)
 #define BTRFS_MOUNT_DISCARD		(1 << 10)
 #define BTRFS_MOUNT_FORCE_COMPRESS      (1 << 11)
+#define BTRFS_MOUNT_SPACE_CACHE		(1 << 12)
+#define BTRFS_MOUNT_CLEAR_CACHE		(1 << 13)
+#define BTRFS_MOUNT_USER_SUBVOL_RM_ALLOWED (1 << 14)
 
 #define btrfs_clear_opt(o, opt)		((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)		((o) |= BTRFS_MOUNT_##opt)
@@ -1665,6 +1708,27 @@ static inline void btrfs_set_dir_item_key(struct extent_buffer *eb,
 	write_eb_member(eb, item, struct btrfs_dir_item, location, key);
 }
 
+BTRFS_SETGET_FUNCS(free_space_entries, struct btrfs_free_space_header,
+		   num_entries, 64);
+BTRFS_SETGET_FUNCS(free_space_bitmaps, struct btrfs_free_space_header,
+		   num_bitmaps, 64);
+BTRFS_SETGET_FUNCS(free_space_generation, struct btrfs_free_space_header,
+		   generation, 64);
+
+static inline void btrfs_free_space_key(struct extent_buffer *eb,
+					struct btrfs_free_space_header *h,
+					struct btrfs_disk_key *key)
+{
+	read_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
+static inline void btrfs_set_free_space_key(struct extent_buffer *eb,
+					    struct btrfs_free_space_header *h,
+					    struct btrfs_disk_key *key)
+{
+	write_eb_member(eb, h, struct btrfs_free_space_header, location, key);
+}
+
 /* struct btrfs_disk_key */
 BTRFS_SETGET_STACK_FUNCS(disk_key_objectid, struct btrfs_disk_key,
 			 objectid, 64);
@@ -1876,6 +1940,8 @@ BTRFS_SETGET_STACK_FUNCS(super_incompat_flags, struct btrfs_super_block,
 			 incompat_flags, 64);
 BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
 			 csum_type, 16);
+BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
+			 cache_generation, 64);
 
 static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
 {
@@ -1988,6 +2054,12 @@ static inline struct dentry *fdentry(struct file *file)
 	return file->f_path.dentry;
 }
 
+static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info)
+{
+	return ((space_info->flags & BTRFS_BLOCK_GROUP_METADATA) &&
+		(space_info->flags & BTRFS_BLOCK_GROUP_DATA));
+}
+
 /* extent-tree.c */
 void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
@@ -2079,7 +2151,7 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes);
 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes);
 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root,
-				int num_items, int *retries);
+				int num_items);
 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root);
 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans,
@@ -2100,7 +2172,7 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct btrfs_block_rsv *block_rsv,
-			u64 num_bytes, int *retries);
+			u64 num_bytes);
 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root,
 			  struct btrfs_block_rsv *block_rsv,
@@ -2115,6 +2187,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root,
 			     struct btrfs_block_group_cache *cache);
 int btrfs_set_block_group_rw(struct btrfs_root *root,
 			     struct btrfs_block_group_cache *cache);
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
 		     int level, int *slot);
@@ -2373,7 +2446,8 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 			       u32 min_type);
 
 int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput);
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+				   int sync);
 int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
 			      struct extent_state **cached_state);
 int btrfs_writepages(struct address_space *mapping,
@@ -2426,6 +2500,10 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root);
 int btrfs_prealloc_file_range(struct inode *inode, int mode,
 			      u64 start, u64 num_bytes, u64 min_size,
 			      loff_t actual_len, u64 *alloc_hint);
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+				    struct btrfs_trans_handle *trans, int mode,
+				    u64 start, u64 num_bytes, u64 min_size,
+				    loff_t actual_len, u64 *alloc_hint);
 extern const struct dentry_operations btrfs_dentry_operations;
 
 /* ioctl.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index e9103b3baa4..f0cad5ae5be 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -427,5 +427,5 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
 		ret = btrfs_truncate_item(trans, root, path,
 					  item_len - sub_item_len, 1);
 	}
-	return 0;
+	return ret;
 }
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5e789f4a3ed..51d2e4de34e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -28,6 +28,7 @@
 #include <linux/freezer.h>
 #include <linux/crc32c.h>
 #include <linux/slab.h>
+#include <linux/migrate.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -338,7 +339,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 	struct extent_io_tree *tree;
 	u64 start = (u64)page->index << PAGE_CACHE_SHIFT;
 	u64 found_start;
-	int found_level;
 	unsigned long len;
 	struct extent_buffer *eb;
 	int ret;
@@ -356,6 +356,8 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 	ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE,
 					     btrfs_header_generation(eb));
 	BUG_ON(ret);
+	WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN));
+
 	found_start = btrfs_header_bytenr(eb);
 	if (found_start != start) {
 		WARN_ON(1);
@@ -369,8 +371,6 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page)
 		WARN_ON(1);
 		goto err;
 	}
-	found_level = btrfs_header_level(eb);
-
 	csum_tree_block(root, eb, 0);
 err:
 	free_extent_buffer(eb);
@@ -481,9 +481,12 @@ static void end_workqueue_bio(struct bio *bio, int err)
 	end_io_wq->work.flags = 0;
 
 	if (bio->bi_rw & REQ_WRITE) {
-		if (end_io_wq->metadata)
+		if (end_io_wq->metadata == 1)
 			btrfs_queue_worker(&fs_info->endio_meta_write_workers,
 					   &end_io_wq->work);
+		else if (end_io_wq->metadata == 2)
+			btrfs_queue_worker(&fs_info->endio_freespace_worker,
+					   &end_io_wq->work);
 		else
 			btrfs_queue_worker(&fs_info->endio_write_workers,
 					   &end_io_wq->work);
@@ -497,6 +500,13 @@ static void end_workqueue_bio(struct bio *bio, int err)
 	}
 }
 
+/*
+ * For the metadata arg you want
+ *
+ * 0 - if data
+ * 1 - if normal metadta
+ * 2 - if writing to the free space cache area
+ */
 int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio,
 			int metadata)
 {
@@ -533,11 +543,9 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone)
 
 static void run_one_async_start(struct btrfs_work *work)
 {
-	struct btrfs_fs_info *fs_info;
 	struct async_submit_bio *async;
 
 	async = container_of(work, struct  async_submit_bio, work);
-	fs_info = BTRFS_I(async->inode)->root->fs_info;
 	async->submit_bio_start(async->inode, async->rw, async->bio,
 			       async->mirror_num, async->bio_flags,
 			       async->bio_offset);
@@ -688,6 +696,27 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 				   __btree_submit_bio_done);
 }
 
+#ifdef CONFIG_MIGRATION
+static int btree_migratepage(struct address_space *mapping,
+			struct page *newpage, struct page *page)
+{
+	/*
+	 * we can't safely write a btree page from here,
+	 * we haven't done the locking hook
+	 */
+	if (PageDirty(page))
+		return -EAGAIN;
+	/*
+	 * Buffers may be managed in a filesystem specific way.
+	 * We must have no buffers or drop them.
+	 */
+	if (page_has_private(page) &&
+	    !try_to_release_page(page, GFP_KERNEL))
+		return -EAGAIN;
+	return migrate_page(mapping, newpage, page);
+}
+#endif
+
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
 	struct extent_io_tree *tree;
@@ -702,8 +731,7 @@ static int btree_writepage(struct page *page, struct writeback_control *wbc)
 	}
 
 	redirty_page_for_writepage(wbc, page);
-	eb = btrfs_find_tree_block(root, page_offset(page),
-				      PAGE_CACHE_SIZE);
+	eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE);
 	WARN_ON(!eb);
 
 	was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags);
@@ -794,6 +822,9 @@ static const struct address_space_operations btree_aops = {
 	.releasepage	= btree_releasepage,
 	.invalidatepage = btree_invalidatepage,
 	.sync_page	= block_sync_page,
+#ifdef CONFIG_MIGRATION
+	.migratepage	= btree_migratepage,
+#endif
 };
 
 int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize,
@@ -850,12 +881,8 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
 				      u32 blocksize, u64 parent_transid)
 {
 	struct extent_buffer *buf = NULL;
-	struct inode *btree_inode = root->fs_info->btree_inode;
-	struct extent_io_tree *io_tree;
 	int ret;
 
-	io_tree = &BTRFS_I(btree_inode)->io_tree;
-
 	buf = btrfs_find_create_tree_block(root, bytenr, blocksize);
 	if (!buf)
 		return NULL;
@@ -980,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
-	BUG_ON(!root->node);
+	if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
+		free_extent_buffer(root->node);
+		return -EIO;
+	}
 	root->commit_root = btrfs_root_node(root);
 	return 0;
 }
@@ -1377,7 +1407,6 @@ static int bio_ready_for_csum(struct bio *bio)
 	u64 start = 0;
 	struct page *page;
 	struct extent_io_tree *io_tree = NULL;
-	struct btrfs_fs_info *info = NULL;
 	struct bio_vec *bvec;
 	int i;
 	int ret;
@@ -1396,7 +1425,6 @@ static int bio_ready_for_csum(struct bio *bio)
 		buf_len = page->private >> 2;
 		start = page_offset(page) + bvec->bv_offset;
 		io_tree = &BTRFS_I(page->mapping->host)->io_tree;
-		info = BTRFS_I(page->mapping->host)->root->fs_info;
 	}
 	/* are we fully contained in this bio? */
 	if (buf_len <= length)
@@ -1539,10 +1567,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 						 GFP_NOFS);
 	struct btrfs_root *csum_root = kzalloc(sizeof(struct btrfs_root),
 						 GFP_NOFS);
-	struct btrfs_root *tree_root = kzalloc(sizeof(struct btrfs_root),
-					       GFP_NOFS);
-	struct btrfs_fs_info *fs_info = kzalloc(sizeof(*fs_info),
-						GFP_NOFS);
+	struct btrfs_root *tree_root = btrfs_sb(sb);
+	struct btrfs_fs_info *fs_info = tree_root->fs_info;
 	struct btrfs_root *chunk_root = kzalloc(sizeof(struct btrfs_root),
 						GFP_NOFS);
 	struct btrfs_root *dev_root = kzalloc(sizeof(struct btrfs_root),
@@ -1680,12 +1706,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 
 	init_waitqueue_head(&fs_info->transaction_throttle);
 	init_waitqueue_head(&fs_info->transaction_wait);
+	init_waitqueue_head(&fs_info->transaction_blocked_wait);
 	init_waitqueue_head(&fs_info->async_submit_wait);
 
 	__setup_root(4096, 4096, 4096, 4096, tree_root,
 		     fs_info, BTRFS_ROOT_TREE_OBJECTID);
 
-
 	bh = btrfs_read_dev_super(fs_devices->latest_bdev);
 	if (!bh)
 		goto fail_iput;
@@ -1775,6 +1801,8 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	btrfs_init_workers(&fs_info->endio_write_workers, "endio-write",
 			   fs_info->thread_pool_size,
 			   &fs_info->generic_worker);
+	btrfs_init_workers(&fs_info->endio_freespace_worker, "freespace-write",
+			   1, &fs_info->generic_worker);
 
 	/*
 	 * endios are largely parallel and should have a very
@@ -1795,6 +1823,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	btrfs_start_workers(&fs_info->endio_meta_workers, 1);
 	btrfs_start_workers(&fs_info->endio_meta_write_workers, 1);
 	btrfs_start_workers(&fs_info->endio_write_workers, 1);
+	btrfs_start_workers(&fs_info->endio_freespace_worker, 1);
 
 	fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
 	fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
@@ -1993,6 +2022,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
 	if (!(sb->s_flags & MS_RDONLY)) {
 		down_read(&fs_info->cleanup_work_sem);
 		btrfs_orphan_cleanup(fs_info->fs_root);
+		btrfs_orphan_cleanup(fs_info->tree_root);
 		up_read(&fs_info->cleanup_work_sem);
 	}
 
@@ -2035,6 +2065,7 @@ fail_sb_buffer:
 	btrfs_stop_workers(&fs_info->endio_meta_workers);
 	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
 	btrfs_stop_workers(&fs_info->endio_write_workers);
+	btrfs_stop_workers(&fs_info->endio_freespace_worker);
 	btrfs_stop_workers(&fs_info->submit_workers);
 fail_iput:
 	invalidate_inode_pages2(fs_info->btree_inode->i_mapping);
@@ -2410,6 +2441,7 @@ int close_ctree(struct btrfs_root *root)
 	fs_info->closing = 1;
 	smp_mb();
 
+	btrfs_put_block_group_cache(fs_info);
 	if (!(fs_info->sb->s_flags & MS_RDONLY)) {
 		ret =  btrfs_commit_super(root);
 		if (ret)
@@ -2456,6 +2488,7 @@ int close_ctree(struct btrfs_root *root)
 	btrfs_stop_workers(&fs_info->endio_meta_workers);
 	btrfs_stop_workers(&fs_info->endio_meta_write_workers);
 	btrfs_stop_workers(&fs_info->endio_write_workers);
+	btrfs_stop_workers(&fs_info->endio_freespace_worker);
 	btrfs_stop_workers(&fs_info->submit_workers);
 
 	btrfs_close_devices(fs_info->fs_devices);
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 951ef09b82f..659f532d26a 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
 static struct dentry *btrfs_get_parent(struct dentry *child)
 {
 	struct inode *dir = child->d_inode;
-	static struct dentry *dentry;
+	struct dentry *dentry;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
@@ -232,9 +232,85 @@ fail:
 	return ERR_PTR(ret);
 }
 
+static int btrfs_get_name(struct dentry *parent, char *name,
+			  struct dentry *child)
+{
+	struct inode *inode = child->d_inode;
+	struct inode *dir = parent->d_inode;
+	struct btrfs_path *path;
+	struct btrfs_root *root = BTRFS_I(dir)->root;
+	struct btrfs_inode_ref *iref;
+	struct btrfs_root_ref *rref;
+	struct extent_buffer *leaf;
+	unsigned long name_ptr;
+	struct btrfs_key key;
+	int name_len;
+	int ret;
+
+	if (!dir || !inode)
+		return -EINVAL;
+
+	if (!S_ISDIR(dir->i_mode))
+		return -EINVAL;
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->leave_spinning = 1;
+
+	if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+		key.objectid = BTRFS_I(inode)->root->root_key.objectid;
+		key.type = BTRFS_ROOT_BACKREF_KEY;
+		key.offset = (u64)-1;
+		root = root->fs_info->tree_root;
+	} else {
+		key.objectid = inode->i_ino;
+		key.offset = dir->i_ino;
+		key.type = BTRFS_INODE_REF_KEY;
+	}
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0) {
+		btrfs_free_path(path);
+		return ret;
+	} else if (ret > 0) {
+		if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+			path->slots[0]--;
+		} else {
+			btrfs_free_path(path);
+			return -ENOENT;
+		}
+	}
+	leaf = path->nodes[0];
+
+	if (inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) {
+	       rref = btrfs_item_ptr(leaf, path->slots[0],
+				     struct btrfs_root_ref);
+	       name_ptr = (unsigned long)(rref + 1);
+	       name_len = btrfs_root_ref_name_len(leaf, rref);
+	} else {
+		iref = btrfs_item_ptr(leaf, path->slots[0],
+				      struct btrfs_inode_ref);
+		name_ptr = (unsigned long)(iref + 1);
+		name_len = btrfs_inode_ref_name_len(leaf, iref);
+	}
+
+	read_extent_buffer(leaf, name, name_ptr, name_len);
+	btrfs_free_path(path);
+
+	/*
+	 * have to add the null termination to make sure that reconnect_path
+	 * gets the right len for strlen
+	 */
+	name[name_len] = '\0';
+
+	return 0;
+}
+
 const struct export_operations btrfs_export_ops = {
 	.encode_fh	= btrfs_encode_fh,
 	.fh_to_dentry	= btrfs_fh_to_dentry,
 	.fh_to_parent	= btrfs_fh_to_parent,
 	.get_parent	= btrfs_get_parent,
+	.get_name	= btrfs_get_name,
 };
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 0b81ecdb101..227e5815d83 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -242,6 +242,12 @@ get_caching_control(struct btrfs_block_group_cache *cache)
 		return NULL;
 	}
 
+	/* We're loading it the fast way, so we don't have a caching_ctl. */
+	if (!cache->caching_ctl) {
+		spin_unlock(&cache->lock);
+		return NULL;
+	}
+
 	ctl = cache->caching_ctl;
 	atomic_inc(&ctl->count);
 	spin_unlock(&cache->lock);
@@ -421,7 +427,10 @@ err:
 	return 0;
 }
 
-static int cache_block_group(struct btrfs_block_group_cache *cache)
+static int cache_block_group(struct btrfs_block_group_cache *cache,
+			     struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
+			     int load_cache_only)
 {
 	struct btrfs_fs_info *fs_info = cache->fs_info;
 	struct btrfs_caching_control *caching_ctl;
@@ -432,6 +441,39 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
 	if (cache->cached != BTRFS_CACHE_NO)
 		return 0;
 
+	/*
+	 * We can't do the read from on-disk cache during a commit since we need
+	 * to have the normal tree locking.  Also if we are currently trying to
+	 * allocate blocks for the tree root we can't do the fast caching since
+	 * we likely hold important locks.
+	 */
+	if (!trans->transaction->in_commit &&
+	    (root && root != root->fs_info->tree_root)) {
+		spin_lock(&cache->lock);
+		if (cache->cached != BTRFS_CACHE_NO) {
+			spin_unlock(&cache->lock);
+			return 0;
+		}
+		cache->cached = BTRFS_CACHE_STARTED;
+		spin_unlock(&cache->lock);
+
+		ret = load_free_space_cache(fs_info, cache);
+
+		spin_lock(&cache->lock);
+		if (ret == 1) {
+			cache->cached = BTRFS_CACHE_FINISHED;
+			cache->last_byte_to_unpin = (u64)-1;
+		} else {
+			cache->cached = BTRFS_CACHE_NO;
+		}
+		spin_unlock(&cache->lock);
+		if (ret == 1)
+			return 0;
+	}
+
+	if (load_cache_only)
+		return 0;
+
 	caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL);
 	BUG_ON(!caching_ctl);
 
@@ -509,7 +551,7 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(found, head, list) {
-		if (found->flags == flags) {
+		if (found->flags & flags) {
 			rcu_read_unlock();
 			return found;
 		}
@@ -542,6 +584,15 @@ static u64 div_factor(u64 num, int factor)
 	return num;
 }
 
+static u64 div_factor_fine(u64 num, int factor)
+{
+	if (factor == 100)
+		return num;
+	num *= factor;
+	do_div(num, 100);
+	return num;
+}
+
 u64 btrfs_find_block_group(struct btrfs_root *root,
 			   u64 search_start, u64 search_hint, int owner)
 {
@@ -2687,6 +2738,111 @@ next_block_group(struct btrfs_root *root,
 	return cache;
 }
 
+static int cache_save_setup(struct btrfs_block_group_cache *block_group,
+			    struct btrfs_trans_handle *trans,
+			    struct btrfs_path *path)
+{
+	struct btrfs_root *root = block_group->fs_info->tree_root;
+	struct inode *inode = NULL;
+	u64 alloc_hint = 0;
+	int dcs = BTRFS_DC_ERROR;
+	int num_pages = 0;
+	int retries = 0;
+	int ret = 0;
+
+	/*
+	 * If this block group is smaller than 100 megs don't bother caching the
+	 * block group.
+	 */
+	if (block_group->key.offset < (100 * 1024 * 1024)) {
+		spin_lock(&block_group->lock);
+		block_group->disk_cache_state = BTRFS_DC_WRITTEN;
+		spin_unlock(&block_group->lock);
+		return 0;
+	}
+
+again:
+	inode = lookup_free_space_inode(root, block_group, path);
+	if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) {
+		ret = PTR_ERR(inode);
+		btrfs_release_path(root, path);
+		goto out;
+	}
+
+	if (IS_ERR(inode)) {
+		BUG_ON(retries);
+		retries++;
+
+		if (block_group->ro)
+			goto out_free;
+
+		ret = create_free_space_inode(root, trans, block_group, path);
+		if (ret)
+			goto out_free;
+		goto again;
+	}
+
+	/*
+	 * We want to set the generation to 0, that way if anything goes wrong
+	 * from here on out we know not to trust this cache when we load up next
+	 * time.
+	 */
+	BTRFS_I(inode)->generation = 0;
+	ret = btrfs_update_inode(trans, root, inode);
+	WARN_ON(ret);
+
+	if (i_size_read(inode) > 0) {
+		ret = btrfs_truncate_free_space_cache(root, trans, path,
+						      inode);
+		if (ret)
+			goto out_put;
+	}
+
+	spin_lock(&block_group->lock);
+	if (block_group->cached != BTRFS_CACHE_FINISHED) {
+		/* We're not cached, don't bother trying to write stuff out */
+		dcs = BTRFS_DC_WRITTEN;
+		spin_unlock(&block_group->lock);
+		goto out_put;
+	}
+	spin_unlock(&block_group->lock);
+
+	num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024);
+	if (!num_pages)
+		num_pages = 1;
+
+	/*
+	 * Just to make absolutely sure we have enough space, we're going to
+	 * preallocate 12 pages worth of space for each block group.  In
+	 * practice we ought to use at most 8, but we need extra space so we can
+	 * add our header and have a terminator between the extents and the
+	 * bitmaps.
+	 */
+	num_pages *= 16;
+	num_pages *= PAGE_CACHE_SIZE;
+
+	ret = btrfs_check_data_free_space(inode, num_pages);
+	if (ret)
+		goto out_put;
+
+	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
+					      num_pages, num_pages,
+					      &alloc_hint);
+	if (!ret)
+		dcs = BTRFS_DC_SETUP;
+	btrfs_free_reserved_data_space(inode, num_pages);
+out_put:
+	iput(inode);
+out_free:
+	btrfs_release_path(root, path);
+out:
+	spin_lock(&block_group->lock);
+	block_group->disk_cache_state = dcs;
+	spin_unlock(&block_group->lock);
+
+	return ret;
+}
+
 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root)
 {
@@ -2699,6 +2855,25 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 	if (!path)
 		return -ENOMEM;
 
+again:
+	while (1) {
+		cache = btrfs_lookup_first_block_group(root->fs_info, last);
+		while (cache) {
+			if (cache->disk_cache_state == BTRFS_DC_CLEAR)
+				break;
+			cache = next_block_group(root, cache);
+		}
+		if (!cache) {
+			if (last == 0)
+				break;
+			last = 0;
+			continue;
+		}
+		err = cache_save_setup(cache, trans, path);
+		last = cache->key.objectid + cache->key.offset;
+		btrfs_put_block_group(cache);
+	}
+
 	while (1) {
 		if (last == 0) {
 			err = btrfs_run_delayed_refs(trans, root,
@@ -2708,6 +2883,11 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 
 		cache = btrfs_lookup_first_block_group(root->fs_info, last);
 		while (cache) {
+			if (cache->disk_cache_state == BTRFS_DC_CLEAR) {
+				btrfs_put_block_group(cache);
+				goto again;
+			}
+
 			if (cache->dirty)
 				break;
 			cache = next_block_group(root, cache);
@@ -2719,6 +2899,8 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 			continue;
 		}
 
+		if (cache->disk_cache_state == BTRFS_DC_SETUP)
+			cache->disk_cache_state = BTRFS_DC_NEED_WRITE;
 		cache->dirty = 0;
 		last = cache->key.objectid + cache->key.offset;
 
@@ -2727,6 +2909,52 @@ int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
 		btrfs_put_block_group(cache);
 	}
 
+	while (1) {
+		/*
+		 * I don't think this is needed since we're just marking our
+		 * preallocated extent as written, but just in case it can't
+		 * hurt.
+		 */
+		if (last == 0) {
+			err = btrfs_run_delayed_refs(trans, root,
+						     (unsigned long)-1);
+			BUG_ON(err);
+		}
+
+		cache = btrfs_lookup_first_block_group(root->fs_info, last);
+		while (cache) {
+			/*
+			 * Really this shouldn't happen, but it could if we
+			 * couldn't write the entire preallocated extent and
+			 * splitting the extent resulted in a new block.
+			 */
+			if (cache->dirty) {
+				btrfs_put_block_group(cache);
+				goto again;
+			}
+			if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+				break;
+			cache = next_block_group(root, cache);
+		}
+		if (!cache) {
+			if (last == 0)
+				break;
+			last = 0;
+			continue;
+		}
+
+		btrfs_write_out_cache(root, trans, cache, path);
+
+		/*
+		 * If we didn't have an error then the cache state is still
+		 * NEED_WRITE, so we can set it to WRITTEN.
+		 */
+		if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE)
+			cache->disk_cache_state = BTRFS_DC_WRITTEN;
+		last = cache->key.objectid + cache->key.offset;
+		btrfs_put_block_group(cache);
+	}
+
 	btrfs_free_path(path);
 	return 0;
 }
@@ -2762,6 +2990,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	if (found) {
 		spin_lock(&found->lock);
 		found->total_bytes += total_bytes;
+		found->disk_total += total_bytes * factor;
 		found->bytes_used += bytes_used;
 		found->disk_used += bytes_used * factor;
 		found->full = 0;
@@ -2781,6 +3010,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 				BTRFS_BLOCK_GROUP_SYSTEM |
 				BTRFS_BLOCK_GROUP_METADATA);
 	found->total_bytes = total_bytes;
+	found->disk_total = total_bytes * factor;
 	found->bytes_used = bytes_used;
 	found->disk_used = bytes_used * factor;
 	found->bytes_pinned = 0;
@@ -2813,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
-	u64 num_devices = root->fs_info->fs_devices->rw_devices;
+	/*
+	 * we add in the count of missing devices because we want
+	 * to make sure that any RAID levels on a degraded FS
+	 * continue to be honored.
+	 */
+	u64 num_devices = root->fs_info->fs_devices->rw_devices +
+		root->fs_info->fs_devices->missing_devices;
 
 	if (num_devices == 1)
 		flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -2882,11 +3118,16 @@ int btrfs_check_data_free_space(struct inode *inode, u64 bytes)
 	struct btrfs_space_info *data_sinfo;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 used;
-	int ret = 0, committed = 0;
+	int ret = 0, committed = 0, alloc_chunk = 1;
 
 	/* make sure bytes are sectorsize aligned */
 	bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
 
+	if (root == root->fs_info->tree_root) {
+		alloc_chunk = 0;
+		committed = 1;
+	}
+
 	data_sinfo = BTRFS_I(inode)->space_info;
 	if (!data_sinfo)
 		goto alloc;
@@ -2905,7 +3146,7 @@ again:
 		 * if we don't have enough free bytes in this space then we need
 		 * to alloc a new chunk.
 		 */
-		if (!data_sinfo->full) {
+		if (!data_sinfo->full && alloc_chunk) {
 			u64 alloc_target;
 
 			data_sinfo->force_alloc = 1;
@@ -2997,10 +3238,11 @@ static void force_metadata_allocation(struct btrfs_fs_info *info)
 	rcu_read_unlock();
 }
 
-static int should_alloc_chunk(struct btrfs_space_info *sinfo,
-			      u64 alloc_bytes)
+static int should_alloc_chunk(struct btrfs_root *root,
+			      struct btrfs_space_info *sinfo, u64 alloc_bytes)
 {
 	u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly;
+	u64 thresh;
 
 	if (sinfo->bytes_used + sinfo->bytes_reserved +
 	    alloc_bytes + 256 * 1024 * 1024 < num_bytes)
@@ -3010,6 +3252,12 @@ static int should_alloc_chunk(struct btrfs_space_info *sinfo,
 	    alloc_bytes < div_factor(num_bytes, 8))
 		return 0;
 
+	thresh = btrfs_super_total_bytes(&root->fs_info->super_copy);
+	thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5));
+
+	if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3))
+		return 0;
+
 	return 1;
 }
 
@@ -3041,13 +3289,21 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans,
 		goto out;
 	}
 
-	if (!force && !should_alloc_chunk(space_info, alloc_bytes)) {
+	if (!force && !should_alloc_chunk(extent_root, space_info,
+					  alloc_bytes)) {
 		spin_unlock(&space_info->lock);
 		goto out;
 	}
 	spin_unlock(&space_info->lock);
 
 	/*
+	 * If we have mixed data/metadata chunks we want to make sure we keep
+	 * allocating mixed chunks instead of individual chunks.
+	 */
+	if (btrfs_mixed_space_info(space_info))
+		flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA);
+
+	/*
 	 * if we're doing a data chunk, go ahead and make sure that
 	 * we keep a reasonable number of metadata chunks allocated in the
 	 * FS as well.
@@ -3072,55 +3328,25 @@ out:
 	return ret;
 }
 
-static int maybe_allocate_chunk(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root,
-				struct btrfs_space_info *sinfo, u64 num_bytes)
-{
-	int ret;
-	int end_trans = 0;
-
-	if (sinfo->full)
-		return 0;
-
-	spin_lock(&sinfo->lock);
-	ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024);
-	spin_unlock(&sinfo->lock);
-	if (!ret)
-		return 0;
-
-	if (!trans) {
-		trans = btrfs_join_transaction(root, 1);
-		BUG_ON(IS_ERR(trans));
-		end_trans = 1;
-	}
-
-	ret = do_chunk_alloc(trans, root->fs_info->extent_root,
-			     num_bytes + 2 * 1024 * 1024,
-			     get_alloc_profile(root, sinfo->flags), 0);
-
-	if (end_trans)
-		btrfs_end_transaction(trans, root);
-
-	return ret == 1 ? 1 : 0;
-}
-
 /*
  * shrink metadata reservation for delalloc
  */
 static int shrink_delalloc(struct btrfs_trans_handle *trans,
-			   struct btrfs_root *root, u64 to_reclaim)
+			   struct btrfs_root *root, u64 to_reclaim, int sync)
 {
 	struct btrfs_block_rsv *block_rsv;
+	struct btrfs_space_info *space_info;
 	u64 reserved;
 	u64 max_reclaim;
 	u64 reclaimed = 0;
 	int pause = 1;
-	int ret;
+	int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT;
 
 	block_rsv = &root->fs_info->delalloc_block_rsv;
-	spin_lock(&block_rsv->lock);
-	reserved = block_rsv->reserved;
-	spin_unlock(&block_rsv->lock);
+	space_info = block_rsv->space_info;
+
+	smp_mb();
+	reserved = space_info->bytes_reserved;
 
 	if (reserved == 0)
 		return 0;
@@ -3128,104 +3354,169 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans,
 	max_reclaim = min(reserved, to_reclaim);
 
 	while (1) {
-		ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0);
-		if (!ret) {
-			__set_current_state(TASK_INTERRUPTIBLE);
-			schedule_timeout(pause);
-			pause <<= 1;
-			if (pause > HZ / 10)
-				pause = HZ / 10;
-		} else {
-			pause = 1;
-		}
+		/* have the flusher threads jump in and do some IO */
+		smp_mb();
+		nr_pages = min_t(unsigned long, nr_pages,
+		       root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT);
+		writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages);
 
-		spin_lock(&block_rsv->lock);
-		if (reserved > block_rsv->reserved)
-			reclaimed = reserved - block_rsv->reserved;
-		reserved = block_rsv->reserved;
-		spin_unlock(&block_rsv->lock);
+		spin_lock(&space_info->lock);
+		if (reserved > space_info->bytes_reserved)
+			reclaimed += reserved - space_info->bytes_reserved;
+		reserved = space_info->bytes_reserved;
+		spin_unlock(&space_info->lock);
 
 		if (reserved == 0 || reclaimed >= max_reclaim)
 			break;
 
 		if (trans && trans->transaction->blocked)
 			return -EAGAIN;
+
+		__set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(pause);
+		pause <<= 1;
+		if (pause > HZ / 10)
+			pause = HZ / 10;
+
 	}
 	return reclaimed >= to_reclaim;
 }
 
-static int should_retry_reserve(struct btrfs_trans_handle *trans,
-				struct btrfs_root *root,
-				struct btrfs_block_rsv *block_rsv,
-				u64 num_bytes, int *retries)
+/*
+ * Retries tells us how many times we've called reserve_metadata_bytes.  The
+ * idea is if this is the first call (retries == 0) then we will add to our
+ * reserved count if we can't make the allocation in order to hold our place
+ * while we go and try and free up space.  That way for retries > 1 we don't try
+ * and add space, we just check to see if the amount of unused space is >= the
+ * total space, meaning that our reservation is valid.
+ *
+ * However if we don't intend to retry this reservation, pass -1 as retries so
+ * that it short circuits this logic.
+ */
+static int reserve_metadata_bytes(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root,
+				  struct btrfs_block_rsv *block_rsv,
+				  u64 orig_bytes, int flush)
 {
 	struct btrfs_space_info *space_info = block_rsv->space_info;
-	int ret;
+	u64 unused;
+	u64 num_bytes = orig_bytes;
+	int retries = 0;
+	int ret = 0;
+	bool reserved = false;
+	bool committed = false;
 
-	if ((*retries) > 2)
-		return -ENOSPC;
+again:
+	ret = -ENOSPC;
+	if (reserved)
+		num_bytes = 0;
 
-	ret = maybe_allocate_chunk(trans, root, space_info, num_bytes);
-	if (ret)
-		return 1;
+	spin_lock(&space_info->lock);
+	unused = space_info->bytes_used + space_info->bytes_reserved +
+		 space_info->bytes_pinned + space_info->bytes_readonly +
+		 space_info->bytes_may_use;
 
-	if (trans && trans->transaction->in_commit)
-		return -ENOSPC;
+	/*
+	 * The idea here is that we've not already over-reserved the block group
+	 * then we can go ahead and save our reservation first and then start
+	 * flushing if we need to.  Otherwise if we've already overcommitted
+	 * lets start flushing stuff first and then come back and try to make
+	 * our reservation.
+	 */
+	if (unused <= space_info->total_bytes) {
+		unused = space_info->total_bytes - unused;
+		if (unused >= num_bytes) {
+			if (!reserved)
+				space_info->bytes_reserved += orig_bytes;
+			ret = 0;
+		} else {
+			/*
+			 * Ok set num_bytes to orig_bytes since we aren't
+			 * overocmmitted, this way we only try and reclaim what
+			 * we need.
+			 */
+			num_bytes = orig_bytes;
+		}
+	} else {
+		/*
+		 * Ok we're over committed, set num_bytes to the overcommitted
+		 * amount plus the amount of bytes that we need for this
+		 * reservation.
+		 */
+		num_bytes = unused - space_info->total_bytes +
+			(orig_bytes * (retries + 1));
+	}
 
-	ret = shrink_delalloc(trans, root, num_bytes);
-	if (ret)
-		return ret;
+	/*
+	 * Couldn't make our reservation, save our place so while we're trying
+	 * to reclaim space we can actually use it instead of somebody else
+	 * stealing it from us.
+	 */
+	if (ret && !reserved) {
+		space_info->bytes_reserved += orig_bytes;
+		reserved = true;
+	}
 
-	spin_lock(&space_info->lock);
-	if (space_info->bytes_pinned < num_bytes)
-		ret = 1;
 	spin_unlock(&space_info->lock);
-	if (ret)
-		return -ENOSPC;
 
-	(*retries)++;
-
-	if (trans)
-		return -EAGAIN;
+	if (!ret)
+		return 0;
 
-	trans = btrfs_join_transaction(root, 1);
-	BUG_ON(IS_ERR(trans));
-	ret = btrfs_commit_transaction(trans, root);
-	BUG_ON(ret);
+	if (!flush)
+		goto out;
 
-	return 1;
-}
+	/*
+	 * We do synchronous shrinking since we don't actually unreserve
+	 * metadata until after the IO is completed.
+	 */
+	ret = shrink_delalloc(trans, root, num_bytes, 1);
+	if (ret > 0)
+		return 0;
+	else if (ret < 0)
+		goto out;
 
-static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv,
-				  u64 num_bytes)
-{
-	struct btrfs_space_info *space_info = block_rsv->space_info;
-	u64 unused;
-	int ret = -ENOSPC;
+	/*
+	 * So if we were overcommitted it's possible that somebody else flushed
+	 * out enough space and we simply didn't have enough space to reclaim,
+	 * so go back around and try again.
+	 */
+	if (retries < 2) {
+		retries++;
+		goto again;
+	}
 
 	spin_lock(&space_info->lock);
-	unused = space_info->bytes_used + space_info->bytes_reserved +
-		 space_info->bytes_pinned + space_info->bytes_readonly;
+	/*
+	 * Not enough space to be reclaimed, don't bother committing the
+	 * transaction.
+	 */
+	if (space_info->bytes_pinned < orig_bytes)
+		ret = -ENOSPC;
+	spin_unlock(&space_info->lock);
+	if (ret)
+		goto out;
 
-	if (unused < space_info->total_bytes)
-		unused = space_info->total_bytes - unused;
-	else
-		unused = 0;
+	ret = -EAGAIN;
+	if (trans || committed)
+		goto out;
 
-	if (unused >= num_bytes) {
-		if (block_rsv->priority >= 10) {
-			space_info->bytes_reserved += num_bytes;
-			ret = 0;
-		} else {
-			if ((unused + block_rsv->reserved) *
-			    block_rsv->priority >=
-			    (num_bytes + block_rsv->reserved) * 10) {
-				space_info->bytes_reserved += num_bytes;
-				ret = 0;
-			}
-		}
+	ret = -ENOSPC;
+	trans = btrfs_join_transaction(root, 1);
+	if (IS_ERR(trans))
+		goto out;
+	ret = btrfs_commit_transaction(trans, root);
+	if (!ret) {
+		trans = NULL;
+		committed = true;
+		goto again;
+	}
+
+out:
+	if (reserved) {
+		spin_lock(&space_info->lock);
+		space_info->bytes_reserved -= orig_bytes;
+		spin_unlock(&space_info->lock);
 	}
-	spin_unlock(&space_info->lock);
 
 	return ret;
 }
@@ -3327,18 +3618,14 @@ struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root)
 {
 	struct btrfs_block_rsv *block_rsv;
 	struct btrfs_fs_info *fs_info = root->fs_info;
-	u64 alloc_target;
 
 	block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS);
 	if (!block_rsv)
 		return NULL;
 
 	btrfs_init_block_rsv(block_rsv);
-
-	alloc_target = btrfs_get_alloc_profile(root, 0);
 	block_rsv->space_info = __find_space_info(fs_info,
 						  BTRFS_BLOCK_GROUP_METADATA);
-
 	return block_rsv;
 }
 
@@ -3369,23 +3656,19 @@ void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info,
 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans,
 			struct btrfs_root *root,
 			struct btrfs_block_rsv *block_rsv,
-			u64 num_bytes, int *retries)
+			u64 num_bytes)
 {
 	int ret;
 
 	if (num_bytes == 0)
 		return 0;
-again:
-	ret = reserve_metadata_bytes(block_rsv, num_bytes);
+
+	ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1);
 	if (!ret) {
 		block_rsv_add_bytes(block_rsv, num_bytes, 1);
 		return 0;
 	}
 
-	ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries);
-	if (ret > 0)
-		goto again;
-
 	return ret;
 }
 
@@ -3420,7 +3703,8 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans,
 		return 0;
 
 	if (block_rsv->refill_used) {
-		ret = reserve_metadata_bytes(block_rsv, num_bytes);
+		ret = reserve_metadata_bytes(trans, root, block_rsv,
+					     num_bytes, 0);
 		if (!ret) {
 			block_rsv_add_bytes(block_rsv, num_bytes, 0);
 			return 0;
@@ -3499,6 +3783,8 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info)
 
 	sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA);
 	spin_lock(&sinfo->lock);
+	if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA)
+		data_used = 0;
 	meta_used = sinfo->bytes_used;
 	spin_unlock(&sinfo->lock);
 
@@ -3526,7 +3812,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
 	block_rsv->size = num_bytes;
 
 	num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
-		    sinfo->bytes_reserved + sinfo->bytes_readonly;
+		    sinfo->bytes_reserved + sinfo->bytes_readonly +
+		    sinfo->bytes_may_use;
 
 	if (sinfo->total_bytes > num_bytes) {
 		num_bytes = sinfo->total_bytes - num_bytes;
@@ -3597,7 +3884,7 @@ static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items)
 
 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
 				 struct btrfs_root *root,
-				 int num_items, int *retries)
+				 int num_items)
 {
 	u64 num_bytes;
 	int ret;
@@ -3607,7 +3894,7 @@ int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans,
 
 	num_bytes = calc_trans_metadata_size(root, num_items);
 	ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv,
-				  num_bytes, retries);
+				  num_bytes);
 	if (!ret) {
 		trans->bytes_reserved += num_bytes;
 		trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -3681,14 +3968,13 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
 	struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv;
 	u64 to_reserve;
 	int nr_extents;
-	int retries = 0;
 	int ret;
 
 	if (btrfs_transaction_in_commit(root->fs_info))
 		schedule_timeout(1);
 
 	num_bytes = ALIGN(num_bytes, root->sectorsize);
-again:
+
 	spin_lock(&BTRFS_I(inode)->accounting_lock);
 	nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1;
 	if (nr_extents > BTRFS_I(inode)->reserved_extents) {
@@ -3698,18 +3984,14 @@ again:
 		nr_extents = 0;
 		to_reserve = 0;
 	}
+	spin_unlock(&BTRFS_I(inode)->accounting_lock);
 
 	to_reserve += calc_csum_metadata_size(inode, num_bytes);
-	ret = reserve_metadata_bytes(block_rsv, to_reserve);
-	if (ret) {
-		spin_unlock(&BTRFS_I(inode)->accounting_lock);
-		ret = should_retry_reserve(NULL, root, block_rsv, to_reserve,
-					   &retries);
-		if (ret > 0)
-			goto again;
+	ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1);
+	if (ret)
 		return ret;
-	}
 
+	spin_lock(&BTRFS_I(inode)->accounting_lock);
 	BTRFS_I(inode)->reserved_extents += nr_extents;
 	atomic_inc(&BTRFS_I(inode)->outstanding_extents);
 	spin_unlock(&BTRFS_I(inode)->accounting_lock);
@@ -3717,7 +3999,7 @@ again:
 	block_rsv_add_bytes(block_rsv, to_reserve, 1);
 
 	if (block_rsv->size > 512 * 1024 * 1024)
-		shrink_delalloc(NULL, root, to_reserve);
+		shrink_delalloc(NULL, root, to_reserve, 0);
 
 	return 0;
 }
@@ -3776,12 +4058,12 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 			      struct btrfs_root *root,
 			      u64 bytenr, u64 num_bytes, int alloc)
 {
-	struct btrfs_block_group_cache *cache;
+	struct btrfs_block_group_cache *cache = NULL;
 	struct btrfs_fs_info *info = root->fs_info;
-	int factor;
 	u64 total = num_bytes;
 	u64 old_val;
 	u64 byte_in_group;
+	int factor;
 
 	/* block accounting for super block */
 	spin_lock(&info->delalloc_lock);
@@ -3803,11 +4085,25 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 			factor = 2;
 		else
 			factor = 1;
+		/*
+		 * If this block group has free space cache written out, we
+		 * need to make sure to load it if we are removing space.  This
+		 * is because we need the unpinning stage to actually add the
+		 * space back to the block group, otherwise we will leak space.
+		 */
+		if (!alloc && cache->cached == BTRFS_CACHE_NO)
+			cache_block_group(cache, trans, NULL, 1);
+
 		byte_in_group = bytenr - cache->key.objectid;
 		WARN_ON(byte_in_group > cache->key.offset);
 
 		spin_lock(&cache->space_info->lock);
 		spin_lock(&cache->lock);
+
+		if (btrfs_super_cache_generation(&info->super_copy) != 0 &&
+		    cache->disk_cache_state < BTRFS_DC_CLEAR)
+			cache->disk_cache_state = BTRFS_DC_CLEAR;
+
 		cache->dirty = 1;
 		old_val = btrfs_block_group_used(&cache->item);
 		num_bytes = min(total, cache->key.offset - byte_in_group);
@@ -4554,6 +4850,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 	bool found_uncached_bg = false;
 	bool failed_cluster_refill = false;
 	bool failed_alloc = false;
+	bool use_cluster = true;
 	u64 ideal_cache_percent = 0;
 	u64 ideal_cache_offset = 0;
 
@@ -4568,16 +4865,24 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
 		return -ENOSPC;
 	}
 
+	/*
+	 * If the space info is for both data and metadata it means we have a
+	 * small filesystem and we can't use the clustering stuff.
+	 */
+	if (btrfs_mixed_space_info(space_info))
+		use_cluster = false;
+
 	if (orig_root->ref_cows || empty_size)
 		allowed_chunk_alloc = 1;
 
-	if (data & BTRFS_BLOCK_GROUP_METADATA) {
+	if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) {
 		last_ptr = &root->fs_info->meta_alloc_cluster;
 		if (!btrfs_test_opt(root, SSD))
 			empty_cluster = 64 * 1024;
 	}
 
-	if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) {
+	if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster &&
+	    btrfs_test_opt(root, SSD)) {
 		last_ptr = &root->fs_info->data_alloc_cluster;
 	}
 
@@ -4637,10 +4942,34 @@ search:
 		btrfs_get_block_group(block_group);
 		search_start = block_group->key.objectid;
 
+		/*
+		 * this can happen if we end up cycling through all the
+		 * raid types, but we want to make sure we only allocate
+		 * for the proper type.
+		 */
+		if (!block_group_bits(block_group, data)) {
+		    u64 extra = BTRFS_BLOCK_GROUP_DUP |
+				BTRFS_BLOCK_GROUP_RAID1 |
+				BTRFS_BLOCK_GROUP_RAID10;
+
+			/*
+			 * if they asked for extra copies and this block group
+			 * doesn't provide them, bail.  This does allow us to
+			 * fill raid0 from raid1.
+			 */
+			if ((data & extra) && !(block_group->flags & extra))
+				goto loop;
+		}
+
 have_block_group:
 		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
 			u64 free_percent;
 
+			ret = cache_block_group(block_group, trans,
+						orig_root, 1);
+			if (block_group->cached == BTRFS_CACHE_FINISHED)
+				goto have_block_group;
+
 			free_percent = btrfs_block_group_used(&block_group->item);
 			free_percent *= 100;
 			free_percent = div64_u64(free_percent,
@@ -4661,7 +4990,8 @@ have_block_group:
 			if (loop > LOOP_CACHING_NOWAIT ||
 			    (loop > LOOP_FIND_IDEAL &&
 			     atomic_read(&space_info->caching_threads) < 2)) {
-				ret = cache_block_group(block_group);
+				ret = cache_block_group(block_group, trans,
+							orig_root, 0);
 				BUG_ON(ret);
 			}
 			found_uncached_bg = true;
@@ -5218,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 	u64 num_bytes = ins->offset;
 
 	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-	cache_block_group(block_group);
+	cache_block_group(block_group, trans, NULL, 0);
 	caching_ctl = get_caching_control(block_group);
 
 	if (!caching_ctl) {
@@ -5308,7 +5638,8 @@ use_block_rsv(struct btrfs_trans_handle *trans,
 	block_rsv = get_block_rsv(trans, root);
 
 	if (block_rsv->size == 0) {
-		ret = reserve_metadata_bytes(block_rsv, blocksize);
+		ret = reserve_metadata_bytes(trans, root, block_rsv,
+					     blocksize, 0);
 		if (ret)
 			return ERR_PTR(ret);
 		return block_rsv;
@@ -5318,11 +5649,6 @@ use_block_rsv(struct btrfs_trans_handle *trans,
 	if (!ret)
 		return block_rsv;
 
-	WARN_ON(1);
-	printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n",
-		block_rsv->size, block_rsv->reserved,
-		block_rsv->freed[0], block_rsv->freed[1]);
-
 	return ERR_PTR(-ENOSPC);
 }
 
@@ -5421,7 +5747,6 @@ static noinline void reada_walk_down(struct btrfs_trans_handle *trans,
 	u64 generation;
 	u64 refs;
 	u64 flags;
-	u64 last = 0;
 	u32 nritems;
 	u32 blocksize;
 	struct btrfs_key key;
@@ -5489,7 +5814,6 @@ reada:
 					   generation);
 		if (ret)
 			break;
-		last = bytenr + blocksize;
 		nread++;
 	}
 	wc->reada_slot = slot;
@@ -6009,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 					   NULL, NULL);
 		BUG_ON(ret < 0);
 		if (ret > 0) {
-			ret = btrfs_del_orphan_item(trans, tree_root,
-						    root->root_key.objectid);
-			BUG_ON(ret);
+			/* if we fail to delete the orphan item this time
+			 * around, it'll get picked up the next time.
+			 *
+			 * The most common failure here is just -ENOENT.
+			 */
+			btrfs_del_orphan_item(trans, tree_root,
+					      root->root_key.objectid);
 		}
 	}
 
@@ -7587,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
 	u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
 		BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
 
-	num_devices = root->fs_info->fs_devices->rw_devices;
+	/*
+	 * we add in the count of missing devices because we want
+	 * to make sure that any RAID levels on a degraded FS
+	 * continue to be honored.
+	 */
+	num_devices = root->fs_info->fs_devices->rw_devices +
+		root->fs_info->fs_devices->missing_devices;
+
 	if (num_devices == 1) {
 		stripped |= BTRFS_BLOCK_GROUP_DUP;
 		stripped = flags & ~stripped;
@@ -7813,6 +8148,40 @@ out:
 	return ret;
 }
 
+void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
+{
+	struct btrfs_block_group_cache *block_group;
+	u64 last = 0;
+
+	while (1) {
+		struct inode *inode;
+
+		block_group = btrfs_lookup_first_block_group(info, last);
+		while (block_group) {
+			spin_lock(&block_group->lock);
+			if (block_group->iref)
+				break;
+			spin_unlock(&block_group->lock);
+			block_group = next_block_group(info->tree_root,
+						       block_group);
+		}
+		if (!block_group) {
+			if (last == 0)
+				break;
+			last = 0;
+			continue;
+		}
+
+		inode = block_group->inode;
+		block_group->iref = 0;
+		block_group->inode = NULL;
+		spin_unlock(&block_group->lock);
+		iput(inode);
+		last = block_group->key.objectid + block_group->key.offset;
+		btrfs_put_block_group(block_group);
+	}
+}
+
 int btrfs_free_block_groups(struct btrfs_fs_info *info)
 {
 	struct btrfs_block_group_cache *block_group;
@@ -7896,6 +8265,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 	struct btrfs_key key;
 	struct btrfs_key found_key;
 	struct extent_buffer *leaf;
+	int need_clear = 0;
+	u64 cache_gen;
 
 	root = info->extent_root;
 	key.objectid = 0;
@@ -7905,13 +8276,21 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 	if (!path)
 		return -ENOMEM;
 
+	cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy);
+	if (cache_gen != 0 &&
+	    btrfs_super_generation(&root->fs_info->super_copy) != cache_gen)
+		need_clear = 1;
+	if (btrfs_test_opt(root, CLEAR_CACHE))
+		need_clear = 1;
+	if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen)
+		printk(KERN_INFO "btrfs: disk space caching is enabled\n");
+
 	while (1) {
 		ret = find_first_block_group(root, path, &key);
 		if (ret > 0)
 			break;
 		if (ret != 0)
 			goto error;
-
 		leaf = path->nodes[0];
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 		cache = kzalloc(sizeof(*cache), GFP_NOFS);
@@ -7927,6 +8306,9 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 		INIT_LIST_HEAD(&cache->list);
 		INIT_LIST_HEAD(&cache->cluster_list);
 
+		if (need_clear)
+			cache->disk_cache_state = BTRFS_DC_CLEAR;
+
 		/*
 		 * we only want to have 32k of ram per block group for keeping
 		 * track of free space, and if we pass 1/2 of that we want to
@@ -8031,6 +8413,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans,
 	cache->key.offset = size;
 	cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
 	cache->sectorsize = root->sectorsize;
+	cache->fs_info = root->fs_info;
 
 	/*
 	 * we only want to have 32k of ram per block group for keeping track
@@ -8087,8 +8470,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	struct btrfs_path *path;
 	struct btrfs_block_group_cache *block_group;
 	struct btrfs_free_cluster *cluster;
+	struct btrfs_root *tree_root = root->fs_info->tree_root;
 	struct btrfs_key key;
+	struct inode *inode;
 	int ret;
+	int factor;
 
 	root = root->fs_info->extent_root;
 
@@ -8097,6 +8483,12 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	BUG_ON(!block_group->ro);
 
 	memcpy(&key, &block_group->key, sizeof(key));
+	if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP |
+				  BTRFS_BLOCK_GROUP_RAID1 |
+				  BTRFS_BLOCK_GROUP_RAID10))
+		factor = 2;
+	else
+		factor = 1;
 
 	/* make sure this block group isn't part of an allocation cluster */
 	cluster = &root->fs_info->data_alloc_cluster;
@@ -8116,6 +8508,40 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
 
+	inode = lookup_free_space_inode(root, block_group, path);
+	if (!IS_ERR(inode)) {
+		btrfs_orphan_add(trans, inode);
+		clear_nlink(inode);
+		/* One for the block groups ref */
+		spin_lock(&block_group->lock);
+		if (block_group->iref) {
+			block_group->iref = 0;
+			block_group->inode = NULL;
+			spin_unlock(&block_group->lock);
+			iput(inode);
+		} else {
+			spin_unlock(&block_group->lock);
+		}
+		/* One for our lookup ref */
+		iput(inode);
+	}
+
+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+	key.offset = block_group->key.objectid;
+	key.type = 0;
+
+	ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1);
+	if (ret < 0)
+		goto out;
+	if (ret > 0)
+		btrfs_release_path(tree_root, path);
+	if (ret == 0) {
+		ret = btrfs_del_item(trans, tree_root, path);
+		if (ret)
+			goto out;
+		btrfs_release_path(tree_root, path);
+	}
+
 	spin_lock(&root->fs_info->block_group_cache_lock);
 	rb_erase(&block_group->cache_node,
 		 &root->fs_info->block_group_cache_tree);
@@ -8137,8 +8563,11 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
 	spin_lock(&block_group->space_info->lock);
 	block_group->space_info->total_bytes -= block_group->key.offset;
 	block_group->space_info->bytes_readonly -= block_group->key.offset;
+	block_group->space_info->disk_total -= block_group->key.offset * factor;
 	spin_unlock(&block_group->space_info->lock);
 
+	memcpy(&key, &block_group->key, sizeof(key));
+
 	btrfs_clear_space_info_full(root->fs_info);
 
 	btrfs_put_block_group(block_group);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index d74e6af9b53..3e86b9f3650 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -104,7 +104,7 @@ void extent_io_tree_init(struct extent_io_tree *tree,
 			  struct address_space *mapping, gfp_t mask)
 {
 	tree->state = RB_ROOT;
-	tree->buffer = RB_ROOT;
+	INIT_RADIX_TREE(&tree->buffer, GFP_ATOMIC);
 	tree->ops = NULL;
 	tree->dirty_bytes = 0;
 	spin_lock_init(&tree->lock);
@@ -235,50 +235,6 @@ static inline struct rb_node *tree_search(struct extent_io_tree *tree,
 	return ret;
 }
 
-static struct extent_buffer *buffer_tree_insert(struct extent_io_tree *tree,
-					  u64 offset, struct rb_node *node)
-{
-	struct rb_root *root = &tree->buffer;
-	struct rb_node **p = &root->rb_node;
-	struct rb_node *parent = NULL;
-	struct extent_buffer *eb;
-
-	while (*p) {
-		parent = *p;
-		eb = rb_entry(parent, struct extent_buffer, rb_node);
-
-		if (offset < eb->start)
-			p = &(*p)->rb_left;
-		else if (offset > eb->start)
-			p = &(*p)->rb_right;
-		else
-			return eb;
-	}
-
-	rb_link_node(node, parent, p);
-	rb_insert_color(node, root);
-	return NULL;
-}
-
-static struct extent_buffer *buffer_search(struct extent_io_tree *tree,
-					   u64 offset)
-{
-	struct rb_root *root = &tree->buffer;
-	struct rb_node *n = root->rb_node;
-	struct extent_buffer *eb;
-
-	while (n) {
-		eb = rb_entry(n, struct extent_buffer, rb_node);
-		if (offset < eb->start)
-			n = n->rb_left;
-		else if (offset > eb->start)
-			n = n->rb_right;
-		else
-			return eb;
-	}
-	return NULL;
-}
-
 static void merge_cb(struct extent_io_tree *tree, struct extent_state *new,
 		     struct extent_state *other)
 {
@@ -1872,9 +1828,9 @@ static void end_bio_extent_preparewrite(struct bio *bio, int err)
 	bio_put(bio);
 }
 
-static struct bio *
-extent_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
-		 gfp_t gfp_flags)
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+		gfp_t gfp_flags)
 {
 	struct bio *bio;
 
@@ -1901,10 +1857,8 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num,
 	struct page *page = bvec->bv_page;
 	struct extent_io_tree *tree = bio->bi_private;
 	u64 start;
-	u64 end;
 
 	start = ((u64)page->index << PAGE_CACHE_SHIFT) + bvec->bv_offset;
-	end = start + bvec->bv_len - 1;
 
 	bio->bi_private = NULL;
 
@@ -1965,7 +1919,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
 	else
 		nr = bio_get_nr_vecs(bdev);
 
-	bio = extent_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
+	bio = btrfs_bio_alloc(bdev, sector, nr, GFP_NOFS | __GFP_HIGH);
 
 	bio_add_page(bio, page, page_size, offset);
 	bio->bi_end_io = end_io_func;
@@ -2204,7 +2158,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 	u64 last_byte = i_size_read(inode);
 	u64 block_start;
 	u64 iosize;
-	u64 unlock_start;
 	sector_t sector;
 	struct extent_state *cached_state = NULL;
 	struct extent_map *em;
@@ -2329,7 +2282,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 		if (tree->ops && tree->ops->writepage_end_io_hook)
 			tree->ops->writepage_end_io_hook(page, start,
 							 page_end, NULL, 1);
-		unlock_start = page_end + 1;
 		goto done;
 	}
 
@@ -2340,7 +2292,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 			if (tree->ops && tree->ops->writepage_end_io_hook)
 				tree->ops->writepage_end_io_hook(page, cur,
 							 page_end, NULL, 1);
-			unlock_start = page_end + 1;
 			break;
 		}
 		em = epd->get_extent(inode, page, pg_offset, cur,
@@ -2387,7 +2338,6 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
 
 			cur += iosize;
 			pg_offset += iosize;
-			unlock_start = cur;
 			continue;
 		}
 		/* leave this out until we have a page_mkwrite call */
@@ -2473,7 +2423,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	int scanned = 0;
-	int range_whole = 0;
 
 	pagevec_init(&pvec, 0);
 	if (wbc->range_cyclic) {
@@ -2482,8 +2431,6 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
 	} else {
 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
 		end = wbc->range_end >> PAGE_CACHE_SHIFT;
-		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
-			range_whole = 1;
 		scanned = 1;
 	}
 retry:
@@ -2823,6 +2770,8 @@ int extent_prepare_write(struct extent_io_tree *tree,
 					 NULL, 1,
 					 end_bio_extent_preparewrite, 0,
 					 0, 0);
+			if (ret && !err)
+				err = ret;
 			iocount++;
 			block_start = block_start + iosize;
 		} else {
@@ -2952,21 +2901,53 @@ out:
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		__u64 start, __u64 len, get_extent_t *get_extent)
 {
-	int ret;
+	int ret = 0;
 	u64 off = start;
 	u64 max = start + len;
 	u32 flags = 0;
+	u32 found_type;
+	u64 last;
 	u64 disko = 0;
+	struct btrfs_key found_key;
 	struct extent_map *em = NULL;
 	struct extent_state *cached_state = NULL;
+	struct btrfs_path *path;
+	struct btrfs_file_extent_item *item;
 	int end = 0;
 	u64 em_start = 0, em_len = 0;
 	unsigned long emflags;
-	ret = 0;
+	int hole = 0;
 
 	if (len == 0)
 		return -EINVAL;
 
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+	path->leave_spinning = 1;
+
+	ret = btrfs_lookup_file_extent(NULL, BTRFS_I(inode)->root,
+				       path, inode->i_ino, -1, 0);
+	if (ret < 0) {
+		btrfs_free_path(path);
+		return ret;
+	}
+	WARN_ON(!ret);
+	path->slots[0]--;
+	item = btrfs_item_ptr(path->nodes[0], path->slots[0],
+			      struct btrfs_file_extent_item);
+	btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
+	found_type = btrfs_key_type(&found_key);
+
+	/* No extents, just return */
+	if (found_key.objectid != inode->i_ino ||
+	    found_type != BTRFS_EXTENT_DATA_KEY) {
+		btrfs_free_path(path);
+		return 0;
+	}
+	last = found_key.offset;
+	btrfs_free_path(path);
+
 	lock_extent_bits(&BTRFS_I(inode)->io_tree, start, start + len, 0,
 			 &cached_state, GFP_NOFS);
 	em = get_extent(inode, NULL, 0, off, max - off, 0);
@@ -2976,11 +2957,18 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		ret = PTR_ERR(em);
 		goto out;
 	}
+
 	while (!end) {
+		hole = 0;
 		off = em->start + em->len;
 		if (off >= max)
 			end = 1;
 
+		if (em->block_start == EXTENT_MAP_HOLE) {
+			hole = 1;
+			goto next;
+		}
+
 		em_start = em->start;
 		em_len = em->len;
 
@@ -2990,8 +2978,6 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		if (em->block_start == EXTENT_MAP_LAST_BYTE) {
 			end = 1;
 			flags |= FIEMAP_EXTENT_LAST;
-		} else if (em->block_start == EXTENT_MAP_HOLE) {
-			flags |= FIEMAP_EXTENT_UNWRITTEN;
 		} else if (em->block_start == EXTENT_MAP_INLINE) {
 			flags |= (FIEMAP_EXTENT_DATA_INLINE |
 				  FIEMAP_EXTENT_NOT_ALIGNED);
@@ -3004,10 +2990,10 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags))
 			flags |= FIEMAP_EXTENT_ENCODED;
 
+next:
 		emflags = em->flags;
 		free_extent_map(em);
 		em = NULL;
-
 		if (!end) {
 			em = get_extent(inode, NULL, 0, off, max - off, 0);
 			if (!em)
@@ -3018,15 +3004,23 @@ int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			}
 			emflags = em->flags;
 		}
+
 		if (test_bit(EXTENT_FLAG_VACANCY, &emflags)) {
 			flags |= FIEMAP_EXTENT_LAST;
 			end = 1;
 		}
 
-		ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
-					em_len, flags);
-		if (ret)
-			goto out_free;
+		if (em_start == last) {
+			flags |= FIEMAP_EXTENT_LAST;
+			end = 1;
+		}
+
+		if (!hole) {
+			ret = fiemap_fill_next_extent(fieinfo, em_start, disko,
+						em_len, flags);
+			if (ret)
+				goto out_free;
+		}
 	}
 out_free:
 	free_extent_map(em);
@@ -3104,6 +3098,39 @@ static void __free_extent_buffer(struct extent_buffer *eb)
 	kmem_cache_free(extent_buffer_cache, eb);
 }
 
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+						unsigned long start_idx)
+{
+	unsigned long index;
+	struct page *page;
+
+	if (!eb->first_page)
+		return;
+
+	index = num_extent_pages(eb->start, eb->len);
+	if (start_idx >= index)
+		return;
+
+	do {
+		index--;
+		page = extent_buffer_page(eb, index);
+		if (page)
+			page_cache_release(page);
+	} while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+	btrfs_release_extent_buffer_page(eb, 0);
+	__free_extent_buffer(eb);
+}
+
 struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 					  u64 start, unsigned long len,
 					  struct page *page0,
@@ -3117,16 +3144,16 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 	struct page *p;
 	struct address_space *mapping = tree->mapping;
 	int uptodate = 1;
+	int ret;
 
-	spin_lock(&tree->buffer_lock);
-	eb = buffer_search(tree, start);
-	if (eb) {
-		atomic_inc(&eb->refs);
-		spin_unlock(&tree->buffer_lock);
+	rcu_read_lock();
+	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+	if (eb && atomic_inc_not_zero(&eb->refs)) {
+		rcu_read_unlock();
 		mark_page_accessed(eb->first_page);
 		return eb;
 	}
-	spin_unlock(&tree->buffer_lock);
+	rcu_read_unlock();
 
 	eb = __alloc_extent_buffer(tree, start, len, mask);
 	if (!eb)
@@ -3165,26 +3192,31 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
 	if (uptodate)
 		set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
 
+	ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
+	if (ret)
+		goto free_eb;
+
 	spin_lock(&tree->buffer_lock);
-	exists = buffer_tree_insert(tree, start, &eb->rb_node);
-	if (exists) {
+	ret = radix_tree_insert(&tree->buffer, start >> PAGE_CACHE_SHIFT, eb);
+	if (ret == -EEXIST) {
+		exists = radix_tree_lookup(&tree->buffer,
+						start >> PAGE_CACHE_SHIFT);
 		/* add one reference for the caller */
 		atomic_inc(&exists->refs);
 		spin_unlock(&tree->buffer_lock);
+		radix_tree_preload_end();
 		goto free_eb;
 	}
 	/* add one reference for the tree */
 	atomic_inc(&eb->refs);
 	spin_unlock(&tree->buffer_lock);
+	radix_tree_preload_end();
 	return eb;
 
 free_eb:
 	if (!atomic_dec_and_test(&eb->refs))
 		return exists;
-	for (index = 1; index < i; index++)
-		page_cache_release(extent_buffer_page(eb, index));
-	page_cache_release(extent_buffer_page(eb, 0));
-	__free_extent_buffer(eb);
+	btrfs_release_extent_buffer(eb);
 	return exists;
 }
 
@@ -3194,16 +3226,16 @@ struct extent_buffer *find_extent_buffer(struct extent_io_tree *tree,
 {
 	struct extent_buffer *eb;
 
-	spin_lock(&tree->buffer_lock);
-	eb = buffer_search(tree, start);
-	if (eb)
-		atomic_inc(&eb->refs);
-	spin_unlock(&tree->buffer_lock);
-
-	if (eb)
+	rcu_read_lock();
+	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+	if (eb && atomic_inc_not_zero(&eb->refs)) {
+		rcu_read_unlock();
 		mark_page_accessed(eb->first_page);
+		return eb;
+	}
+	rcu_read_unlock();
 
-	return eb;
+	return NULL;
 }
 
 void free_extent_buffer(struct extent_buffer *eb)
@@ -3833,34 +3865,47 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 	}
 }
 
+static inline void btrfs_release_extent_buffer_rcu(struct rcu_head *head)
+{
+	struct extent_buffer *eb =
+			container_of(head, struct extent_buffer, rcu_head);
+
+	btrfs_release_extent_buffer(eb);
+}
+
 int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page)
 {
 	u64 start = page_offset(page);
 	struct extent_buffer *eb;
 	int ret = 1;
-	unsigned long i;
-	unsigned long num_pages;
 
 	spin_lock(&tree->buffer_lock);
-	eb = buffer_search(tree, start);
-	if (!eb)
-		goto out;
+	eb = radix_tree_lookup(&tree->buffer, start >> PAGE_CACHE_SHIFT);
+	if (!eb) {
+		spin_unlock(&tree->buffer_lock);
+		return ret;
+	}
 
-	if (atomic_read(&eb->refs) > 1) {
+	if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
 		ret = 0;
 		goto out;
 	}
-	if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) {
+
+	/*
+	 * set @eb->refs to 0 if it is already 1, and then release the @eb.
+	 * Or go back.
+	 */
+	if (atomic_cmpxchg(&eb->refs, 1, 0) != 1) {
 		ret = 0;
 		goto out;
 	}
-	/* at this point we can safely release the extent buffer */
-	num_pages = num_extent_pages(eb->start, eb->len);
-	for (i = 0; i < num_pages; i++)
-		page_cache_release(extent_buffer_page(eb, i));
-	rb_erase(&eb->rb_node, &tree->buffer);
-	__free_extent_buffer(eb);
+
+	radix_tree_delete(&tree->buffer, start >> PAGE_CACHE_SHIFT);
 out:
 	spin_unlock(&tree->buffer_lock);
+
+	/* at this point we can safely release the extent buffer */
+	if (atomic_read(&eb->refs) == 0)
+		call_rcu(&eb->rcu_head, btrfs_release_extent_buffer_rcu);
 	return ret;
 }
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 5691c7b590d..4183c8178f0 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -85,7 +85,7 @@ struct extent_io_ops {
 
 struct extent_io_tree {
 	struct rb_root state;
-	struct rb_root buffer;
+	struct radix_tree_root buffer;
 	struct address_space *mapping;
 	u64 dirty_bytes;
 	spinlock_t lock;
@@ -123,7 +123,7 @@ struct extent_buffer {
 	unsigned long bflags;
 	atomic_t refs;
 	struct list_head leak_list;
-	struct rb_node rb_node;
+	struct rcu_head rcu_head;
 
 	/* the spinlock is used to protect most operations */
 	spinlock_t lock;
@@ -310,4 +310,7 @@ int extent_clear_unlock_delalloc(struct inode *inode,
 				struct extent_io_tree *tree,
 				u64 start, u64 end, struct page *locked_page,
 				unsigned long op);
+struct bio *
+btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
+		gfp_t gfp_flags);
 #endif
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 454ca52d645..23cb8da3ff6 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -335,7 +335,7 @@ struct extent_map *lookup_extent_mapping(struct extent_map_tree *tree,
 		goto out;
 	}
 	if (IS_ERR(rb_node)) {
-		em = ERR_PTR(PTR_ERR(rb_node));
+		em = ERR_CAST(rb_node);
 		goto out;
 	}
 	em = rb_entry(rb_node, struct extent_map, rb_node);
@@ -384,7 +384,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
 		goto out;
 	}
 	if (IS_ERR(rb_node)) {
-		em = ERR_PTR(PTR_ERR(rb_node));
+		em = ERR_CAST(rb_node);
 		goto out;
 	}
 	em = rb_entry(rb_node, struct extent_map, rb_node);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index e354c33df08..66836d85763 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 					 struct page **prepared_pages,
 					 struct iov_iter *i)
 {
-	size_t copied;
+	size_t copied = 0;
 	int pg = 0;
 	int offset = pos & (PAGE_CACHE_SIZE - 1);
+	int total_copied = 0;
 
 	while (write_bytes > 0) {
 		size_t count = min_t(size_t,
 				     PAGE_CACHE_SIZE - offset, write_bytes);
 		struct page *page = prepared_pages[pg];
-again:
-		if (unlikely(iov_iter_fault_in_readable(i, count)))
-			return -EFAULT;
-
-		/* Copy data from userspace to the current page */
-		copied = iov_iter_copy_from_user(page, i, offset, count);
+		/*
+		 * Copy data from userspace to the current page
+		 *
+		 * Disable pagefault to avoid recursive lock since
+		 * the pages are already locked
+		 */
+		pagefault_disable();
+		copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+		pagefault_enable();
 
 		/* Flush processor's dcache for this page */
 		flush_dcache_page(page);
 		iov_iter_advance(i, copied);
 		write_bytes -= copied;
+		total_copied += copied;
 
+		/* Return to btrfs_file_aio_write to fault page */
 		if (unlikely(copied == 0)) {
-			count = min_t(size_t, PAGE_CACHE_SIZE - offset,
-				      iov_iter_single_seg_count(i));
-			goto again;
+			break;
 		}
 
 		if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
 			offset = 0;
 		}
 	}
-	return 0;
+	return total_copied;
 }
 
 /*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	unsigned long last_index;
 	int will_write;
 	int buffered = 0;
+	int copied = 0;
+	int dirty_pages = 0;
 
 	will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
 		      (file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		WARN_ON(num_pages > nrptrs);
 		memset(pages, 0, sizeof(struct page *) * nrptrs);
 
-		ret = btrfs_delalloc_reserve_space(inode, write_bytes);
+		/*
+		 * Fault pages before locking them in prepare_pages
+		 * to avoid recursive lock
+		 */
+		if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		ret = btrfs_delalloc_reserve_space(inode,
+					num_pages << PAGE_CACHE_SHIFT);
 		if (ret)
 			goto out;
 
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 				    pos, first_index, last_index,
 				    write_bytes);
 		if (ret) {
-			btrfs_delalloc_release_space(inode, write_bytes);
+			btrfs_delalloc_release_space(inode,
+					num_pages << PAGE_CACHE_SHIFT);
 			goto out;
 		}
 
-		ret = btrfs_copy_from_user(pos, num_pages,
+		copied = btrfs_copy_from_user(pos, num_pages,
 					   write_bytes, pages, &i);
-		if (ret == 0) {
+		dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
+					PAGE_CACHE_SHIFT;
+
+		if (num_pages > dirty_pages) {
+			if (copied > 0)
+				atomic_inc(
+					&BTRFS_I(inode)->outstanding_extents);
+			btrfs_delalloc_release_space(inode,
+					(num_pages - dirty_pages) <<
+					PAGE_CACHE_SHIFT);
+		}
+
+		if (copied > 0) {
 			dirty_and_release_pages(NULL, root, file, pages,
-						num_pages, pos, write_bytes);
+						dirty_pages, pos, copied);
 		}
 
 		btrfs_drop_pages(pages, num_pages);
-		if (ret) {
-			btrfs_delalloc_release_space(inode, write_bytes);
-			goto out;
-		}
 
-		if (will_write) {
-			filemap_fdatawrite_range(inode->i_mapping, pos,
-						 pos + write_bytes - 1);
-		} else {
-			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-							   num_pages);
-			if (num_pages <
-			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
-				btrfs_btree_balance_dirty(root, 1);
-			btrfs_throttle(root);
+		if (copied > 0) {
+			if (will_write) {
+				filemap_fdatawrite_range(inode->i_mapping, pos,
+							 pos + copied - 1);
+			} else {
+				balance_dirty_pages_ratelimited_nr(
+							inode->i_mapping,
+							dirty_pages);
+				if (dirty_pages <
+				(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+					btrfs_btree_balance_dirty(root, 1);
+				btrfs_throttle(root);
+			}
 		}
 
-		pos += write_bytes;
-		num_written += write_bytes;
+		pos += copied;
+		num_written += copied;
 
 		cond_resched();
 	}
@@ -1047,8 +1075,14 @@ out:
 
 		if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) {
 			trans = btrfs_start_transaction(root, 0);
+			if (IS_ERR(trans)) {
+				num_written = PTR_ERR(trans);
+				goto done;
+			}
+			mutex_lock(&inode->i_mutex);
 			ret = btrfs_log_dentry_safe(trans, root,
 						    file->f_dentry);
+			mutex_unlock(&inode->i_mutex);
 			if (ret == 0) {
 				ret = btrfs_sync_log(trans, root);
 				if (ret == 0)
@@ -1067,6 +1101,7 @@ out:
 			     (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT);
 		}
 	}
+done:
 	current->backing_dev_info = NULL;
 	return num_written ? num_written : err;
 }
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index f488fac04d9..60d68426695 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -23,10 +23,763 @@
 #include "ctree.h"
 #include "free-space-cache.h"
 #include "transaction.h"
+#include "disk-io.h"
 
 #define BITS_PER_BITMAP		(PAGE_CACHE_SIZE * 8)
 #define MAX_CACHE_BYTES_PER_GIG	(32 * 1024)
 
+static void recalculate_thresholds(struct btrfs_block_group_cache
+				   *block_group);
+static int link_free_space(struct btrfs_block_group_cache *block_group,
+			   struct btrfs_free_space *info);
+
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+				      struct btrfs_block_group_cache
+				      *block_group, struct btrfs_path *path)
+{
+	struct btrfs_key key;
+	struct btrfs_key location;
+	struct btrfs_disk_key disk_key;
+	struct btrfs_free_space_header *header;
+	struct extent_buffer *leaf;
+	struct inode *inode = NULL;
+	int ret;
+
+	spin_lock(&block_group->lock);
+	if (block_group->inode)
+		inode = igrab(block_group->inode);
+	spin_unlock(&block_group->lock);
+	if (inode)
+		return inode;
+
+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+	key.offset = block_group->key.objectid;
+	key.type = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret < 0)
+		return ERR_PTR(ret);
+	if (ret > 0) {
+		btrfs_release_path(root, path);
+		return ERR_PTR(-ENOENT);
+	}
+
+	leaf = path->nodes[0];
+	header = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_free_space_header);
+	btrfs_free_space_key(leaf, header, &disk_key);
+	btrfs_disk_key_to_cpu(&location, &disk_key);
+	btrfs_release_path(root, path);
+
+	inode = btrfs_iget(root->fs_info->sb, &location, root, NULL);
+	if (!inode)
+		return ERR_PTR(-ENOENT);
+	if (IS_ERR(inode))
+		return inode;
+	if (is_bad_inode(inode)) {
+		iput(inode);
+		return ERR_PTR(-ENOENT);
+	}
+
+	spin_lock(&block_group->lock);
+	if (!root->fs_info->closing) {
+		block_group->inode = igrab(inode);
+		block_group->iref = 1;
+	}
+	spin_unlock(&block_group->lock);
+
+	return inode;
+}
+
+int create_free_space_inode(struct btrfs_root *root,
+			    struct btrfs_trans_handle *trans,
+			    struct btrfs_block_group_cache *block_group,
+			    struct btrfs_path *path)
+{
+	struct btrfs_key key;
+	struct btrfs_disk_key disk_key;
+	struct btrfs_free_space_header *header;
+	struct btrfs_inode_item *inode_item;
+	struct extent_buffer *leaf;
+	u64 objectid;
+	int ret;
+
+	ret = btrfs_find_free_objectid(trans, root, 0, &objectid);
+	if (ret < 0)
+		return ret;
+
+	ret = btrfs_insert_empty_inode(trans, root, path, objectid);
+	if (ret)
+		return ret;
+
+	leaf = path->nodes[0];
+	inode_item = btrfs_item_ptr(leaf, path->slots[0],
+				    struct btrfs_inode_item);
+	btrfs_item_key(leaf, &disk_key, path->slots[0]);
+	memset_extent_buffer(leaf, 0, (unsigned long)inode_item,
+			     sizeof(*inode_item));
+	btrfs_set_inode_generation(leaf, inode_item, trans->transid);
+	btrfs_set_inode_size(leaf, inode_item, 0);
+	btrfs_set_inode_nbytes(leaf, inode_item, 0);
+	btrfs_set_inode_uid(leaf, inode_item, 0);
+	btrfs_set_inode_gid(leaf, inode_item, 0);
+	btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600);
+	btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS |
+			      BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM);
+	btrfs_set_inode_nlink(leaf, inode_item, 1);
+	btrfs_set_inode_transid(leaf, inode_item, trans->transid);
+	btrfs_set_inode_block_group(leaf, inode_item,
+				    block_group->key.objectid);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(root, path);
+
+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+	key.offset = block_group->key.objectid;
+	key.type = 0;
+
+	ret = btrfs_insert_empty_item(trans, root, path, &key,
+				      sizeof(struct btrfs_free_space_header));
+	if (ret < 0) {
+		btrfs_release_path(root, path);
+		return ret;
+	}
+	leaf = path->nodes[0];
+	header = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_free_space_header);
+	memset_extent_buffer(leaf, 0, (unsigned long)header, sizeof(*header));
+	btrfs_set_free_space_key(leaf, header, &disk_key);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(root, path);
+
+	return 0;
+}
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+				    struct btrfs_trans_handle *trans,
+				    struct btrfs_path *path,
+				    struct inode *inode)
+{
+	loff_t oldsize;
+	int ret = 0;
+
+	trans->block_rsv = root->orphan_block_rsv;
+	ret = btrfs_block_rsv_check(trans, root,
+				    root->orphan_block_rsv,
+				    0, 5);
+	if (ret)
+		return ret;
+
+	oldsize = i_size_read(inode);
+	btrfs_i_size_write(inode, 0);
+	truncate_pagecache(inode, oldsize, 0);
+
+	/*
+	 * We don't need an orphan item because truncating the free space cache
+	 * will never be split across transactions.
+	 */
+	ret = btrfs_truncate_inode_items(trans, root, inode,
+					 0, BTRFS_EXTENT_DATA_KEY);
+	if (ret) {
+		WARN_ON(1);
+		return ret;
+	}
+
+	return btrfs_update_inode(trans, root, inode);
+}
+
+static int readahead_cache(struct inode *inode)
+{
+	struct file_ra_state *ra;
+	unsigned long last_index;
+
+	ra = kzalloc(sizeof(*ra), GFP_NOFS);
+	if (!ra)
+		return -ENOMEM;
+
+	file_ra_state_init(ra, inode->i_mapping);
+	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+
+	page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
+
+	kfree(ra);
+
+	return 0;
+}
+
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+			  struct btrfs_block_group_cache *block_group)
+{
+	struct btrfs_root *root = fs_info->tree_root;
+	struct inode *inode;
+	struct btrfs_free_space_header *header;
+	struct extent_buffer *leaf;
+	struct page *page;
+	struct btrfs_path *path;
+	u32 *checksums = NULL, *crc;
+	char *disk_crcs = NULL;
+	struct btrfs_key key;
+	struct list_head bitmaps;
+	u64 num_entries;
+	u64 num_bitmaps;
+	u64 generation;
+	u32 cur_crc = ~(u32)0;
+	pgoff_t index = 0;
+	unsigned long first_page_offset;
+	int num_checksums;
+	int ret = 0;
+
+	/*
+	 * If we're unmounting then just return, since this does a search on the
+	 * normal root and not the commit root and we could deadlock.
+	 */
+	smp_mb();
+	if (fs_info->closing)
+		return 0;
+
+	/*
+	 * If this block group has been marked to be cleared for one reason or
+	 * another then we can't trust the on disk cache, so just return.
+	 */
+	spin_lock(&block_group->lock);
+	if (block_group->disk_cache_state != BTRFS_DC_WRITTEN) {
+		spin_unlock(&block_group->lock);
+		return 0;
+	}
+	spin_unlock(&block_group->lock);
+
+	INIT_LIST_HEAD(&bitmaps);
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return 0;
+
+	inode = lookup_free_space_inode(root, block_group, path);
+	if (IS_ERR(inode)) {
+		btrfs_free_path(path);
+		return 0;
+	}
+
+	/* Nothing in the space cache, goodbye */
+	if (!i_size_read(inode)) {
+		btrfs_free_path(path);
+		goto out;
+	}
+
+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+	key.offset = block_group->key.objectid;
+	key.type = 0;
+
+	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+	if (ret) {
+		btrfs_free_path(path);
+		goto out;
+	}
+
+	leaf = path->nodes[0];
+	header = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_free_space_header);
+	num_entries = btrfs_free_space_entries(leaf, header);
+	num_bitmaps = btrfs_free_space_bitmaps(leaf, header);
+	generation = btrfs_free_space_generation(leaf, header);
+	btrfs_free_path(path);
+
+	if (BTRFS_I(inode)->generation != generation) {
+		printk(KERN_ERR "btrfs: free space inode generation (%llu) did"
+		       " not match free space cache generation (%llu) for "
+		       "block group %llu\n",
+		       (unsigned long long)BTRFS_I(inode)->generation,
+		       (unsigned long long)generation,
+		       (unsigned long long)block_group->key.objectid);
+		goto free_cache;
+	}
+
+	if (!num_entries)
+		goto out;
+
+	/* Setup everything for doing checksumming */
+	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+	checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+	if (!checksums)
+		goto out;
+	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+	disk_crcs = kzalloc(first_page_offset, GFP_NOFS);
+	if (!disk_crcs)
+		goto out;
+
+	ret = readahead_cache(inode);
+	if (ret) {
+		ret = 0;
+		goto out;
+	}
+
+	while (1) {
+		struct btrfs_free_space_entry *entry;
+		struct btrfs_free_space *e;
+		void *addr;
+		unsigned long offset = 0;
+		unsigned long start_offset = 0;
+		int need_loop = 0;
+
+		if (!num_entries && !num_bitmaps)
+			break;
+
+		if (index == 0) {
+			start_offset = first_page_offset;
+			offset = start_offset;
+		}
+
+		page = grab_cache_page(inode->i_mapping, index);
+		if (!page) {
+			ret = 0;
+			goto free_cache;
+		}
+
+		if (!PageUptodate(page)) {
+			btrfs_readpage(NULL, page);
+			lock_page(page);
+			if (!PageUptodate(page)) {
+				unlock_page(page);
+				page_cache_release(page);
+				printk(KERN_ERR "btrfs: error reading free "
+				       "space cache: %llu\n",
+				       (unsigned long long)
+				       block_group->key.objectid);
+				goto free_cache;
+			}
+		}
+		addr = kmap(page);
+
+		if (index == 0) {
+			u64 *gen;
+
+			memcpy(disk_crcs, addr, first_page_offset);
+			gen = addr + (sizeof(u32) * num_checksums);
+			if (*gen != BTRFS_I(inode)->generation) {
+				printk(KERN_ERR "btrfs: space cache generation"
+				       " (%llu) does not match inode (%llu) "
+				       "for block group %llu\n",
+				       (unsigned long long)*gen,
+				       (unsigned long long)
+				       BTRFS_I(inode)->generation,
+				       (unsigned long long)
+				       block_group->key.objectid);
+				kunmap(page);
+				unlock_page(page);
+				page_cache_release(page);
+				goto free_cache;
+			}
+			crc = (u32 *)disk_crcs;
+		}
+		entry = addr + start_offset;
+
+		/* First lets check our crc before we do anything fun */
+		cur_crc = ~(u32)0;
+		cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc,
+					  PAGE_CACHE_SIZE - start_offset);
+		btrfs_csum_final(cur_crc, (char *)&cur_crc);
+		if (cur_crc != *crc) {
+			printk(KERN_ERR "btrfs: crc mismatch for page %lu in "
+			       "block group %llu\n", index,
+			       (unsigned long long)block_group->key.objectid);
+			kunmap(page);
+			unlock_page(page);
+			page_cache_release(page);
+			goto free_cache;
+		}
+		crc++;
+
+		while (1) {
+			if (!num_entries)
+				break;
+
+			need_loop = 1;
+			e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS);
+			if (!e) {
+				kunmap(page);
+				unlock_page(page);
+				page_cache_release(page);
+				goto free_cache;
+			}
+
+			e->offset = le64_to_cpu(entry->offset);
+			e->bytes = le64_to_cpu(entry->bytes);
+			if (!e->bytes) {
+				kunmap(page);
+				kfree(e);
+				unlock_page(page);
+				page_cache_release(page);
+				goto free_cache;
+			}
+
+			if (entry->type == BTRFS_FREE_SPACE_EXTENT) {
+				spin_lock(&block_group->tree_lock);
+				ret = link_free_space(block_group, e);
+				spin_unlock(&block_group->tree_lock);
+				BUG_ON(ret);
+			} else {
+				e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+				if (!e->bitmap) {
+					kunmap(page);
+					kfree(e);
+					unlock_page(page);
+					page_cache_release(page);
+					goto free_cache;
+				}
+				spin_lock(&block_group->tree_lock);
+				ret = link_free_space(block_group, e);
+				block_group->total_bitmaps++;
+				recalculate_thresholds(block_group);
+				spin_unlock(&block_group->tree_lock);
+				list_add_tail(&e->list, &bitmaps);
+			}
+
+			num_entries--;
+			offset += sizeof(struct btrfs_free_space_entry);
+			if (offset + sizeof(struct btrfs_free_space_entry) >=
+			    PAGE_CACHE_SIZE)
+				break;
+			entry++;
+		}
+
+		/*
+		 * We read an entry out of this page, we need to move on to the
+		 * next page.
+		 */
+		if (need_loop) {
+			kunmap(page);
+			goto next;
+		}
+
+		/*
+		 * We add the bitmaps at the end of the entries in order that
+		 * the bitmap entries are added to the cache.
+		 */
+		e = list_entry(bitmaps.next, struct btrfs_free_space, list);
+		list_del_init(&e->list);
+		memcpy(e->bitmap, addr, PAGE_CACHE_SIZE);
+		kunmap(page);
+		num_bitmaps--;
+next:
+		unlock_page(page);
+		page_cache_release(page);
+		index++;
+	}
+
+	ret = 1;
+out:
+	kfree(checksums);
+	kfree(disk_crcs);
+	iput(inode);
+	return ret;
+
+free_cache:
+	/* This cache is bogus, make sure it gets cleared */
+	spin_lock(&block_group->lock);
+	block_group->disk_cache_state = BTRFS_DC_CLEAR;
+	spin_unlock(&block_group->lock);
+	btrfs_remove_free_space_cache(block_group);
+	goto out;
+}
+
+int btrfs_write_out_cache(struct btrfs_root *root,
+			  struct btrfs_trans_handle *trans,
+			  struct btrfs_block_group_cache *block_group,
+			  struct btrfs_path *path)
+{
+	struct btrfs_free_space_header *header;
+	struct extent_buffer *leaf;
+	struct inode *inode;
+	struct rb_node *node;
+	struct list_head *pos, *n;
+	struct page *page;
+	struct extent_state *cached_state = NULL;
+	struct list_head bitmap_list;
+	struct btrfs_key key;
+	u64 bytes = 0;
+	u32 *crc, *checksums;
+	pgoff_t index = 0, last_index = 0;
+	unsigned long first_page_offset;
+	int num_checksums;
+	int entries = 0;
+	int bitmaps = 0;
+	int ret = 0;
+
+	root = root->fs_info->tree_root;
+
+	INIT_LIST_HEAD(&bitmap_list);
+
+	spin_lock(&block_group->lock);
+	if (block_group->disk_cache_state < BTRFS_DC_SETUP) {
+		spin_unlock(&block_group->lock);
+		return 0;
+	}
+	spin_unlock(&block_group->lock);
+
+	inode = lookup_free_space_inode(root, block_group, path);
+	if (IS_ERR(inode))
+		return 0;
+
+	if (!i_size_read(inode)) {
+		iput(inode);
+		return 0;
+	}
+
+	node = rb_first(&block_group->free_space_offset);
+	if (!node) {
+		iput(inode);
+		return 0;
+	}
+
+	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+	filemap_write_and_wait(inode->i_mapping);
+	btrfs_wait_ordered_range(inode, inode->i_size &
+				 ~(root->sectorsize - 1), (u64)-1);
+
+	/* We need a checksum per page. */
+	num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE;
+	crc = checksums  = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
+	if (!crc) {
+		iput(inode);
+		return 0;
+	}
+
+	/* Since the first page has all of our checksums and our generation we
+	 * need to calculate the offset into the page that we can start writing
+	 * our entries.
+	 */
+	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
+
+	/*
+	 * Lock all pages first so we can lock the extent safely.
+	 *
+	 * NOTE: Because we hold the ref the entire time we're going to write to
+	 * the page find_get_page should never fail, so we don't do a check
+	 * after find_get_page at this point.  Just putting this here so people
+	 * know and don't freak out.
+	 */
+	while (index <= last_index) {
+		page = grab_cache_page(inode->i_mapping, index);
+		if (!page) {
+			pgoff_t i = 0;
+
+			while (i < index) {
+				page = find_get_page(inode->i_mapping, i);
+				unlock_page(page);
+				page_cache_release(page);
+				page_cache_release(page);
+				i++;
+			}
+			goto out_free;
+		}
+		index++;
+	}
+
+	index = 0;
+	lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
+			 0, &cached_state, GFP_NOFS);
+
+	/* Write out the extent entries */
+	do {
+		struct btrfs_free_space_entry *entry;
+		void *addr;
+		unsigned long offset = 0;
+		unsigned long start_offset = 0;
+
+		if (index == 0) {
+			start_offset = first_page_offset;
+			offset = start_offset;
+		}
+
+		page = find_get_page(inode->i_mapping, index);
+
+		addr = kmap(page);
+		entry = addr + start_offset;
+
+		memset(addr, 0, PAGE_CACHE_SIZE);
+		while (1) {
+			struct btrfs_free_space *e;
+
+			e = rb_entry(node, struct btrfs_free_space, offset_index);
+			entries++;
+
+			entry->offset = cpu_to_le64(e->offset);
+			entry->bytes = cpu_to_le64(e->bytes);
+			if (e->bitmap) {
+				entry->type = BTRFS_FREE_SPACE_BITMAP;
+				list_add_tail(&e->list, &bitmap_list);
+				bitmaps++;
+			} else {
+				entry->type = BTRFS_FREE_SPACE_EXTENT;
+			}
+			node = rb_next(node);
+			if (!node)
+				break;
+			offset += sizeof(struct btrfs_free_space_entry);
+			if (offset + sizeof(struct btrfs_free_space_entry) >=
+			    PAGE_CACHE_SIZE)
+				break;
+			entry++;
+		}
+		*crc = ~(u32)0;
+		*crc = btrfs_csum_data(root, addr + start_offset, *crc,
+				       PAGE_CACHE_SIZE - start_offset);
+		kunmap(page);
+
+		btrfs_csum_final(*crc, (char *)crc);
+		crc++;
+
+		bytes += PAGE_CACHE_SIZE;
+
+		ClearPageChecked(page);
+		set_page_extent_mapped(page);
+		SetPageUptodate(page);
+		set_page_dirty(page);
+
+		/*
+		 * We need to release our reference we got for grab_cache_page,
+		 * except for the first page which will hold our checksums, we
+		 * do that below.
+		 */
+		if (index != 0) {
+			unlock_page(page);
+			page_cache_release(page);
+		}
+
+		page_cache_release(page);
+
+		index++;
+	} while (node);
+
+	/* Write out the bitmaps */
+	list_for_each_safe(pos, n, &bitmap_list) {
+		void *addr;
+		struct btrfs_free_space *entry =
+			list_entry(pos, struct btrfs_free_space, list);
+
+		page = find_get_page(inode->i_mapping, index);
+
+		addr = kmap(page);
+		memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
+		*crc = ~(u32)0;
+		*crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE);
+		kunmap(page);
+		btrfs_csum_final(*crc, (char *)crc);
+		crc++;
+		bytes += PAGE_CACHE_SIZE;
+
+		ClearPageChecked(page);
+		set_page_extent_mapped(page);
+		SetPageUptodate(page);
+		set_page_dirty(page);
+		unlock_page(page);
+		page_cache_release(page);
+		page_cache_release(page);
+		list_del_init(&entry->list);
+		index++;
+	}
+
+	/* Zero out the rest of the pages just to make sure */
+	while (index <= last_index) {
+		void *addr;
+
+		page = find_get_page(inode->i_mapping, index);
+
+		addr = kmap(page);
+		memset(addr, 0, PAGE_CACHE_SIZE);
+		kunmap(page);
+		ClearPageChecked(page);
+		set_page_extent_mapped(page);
+		SetPageUptodate(page);
+		set_page_dirty(page);
+		unlock_page(page);
+		page_cache_release(page);
+		page_cache_release(page);
+		bytes += PAGE_CACHE_SIZE;
+		index++;
+	}
+
+	btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
+
+	/* Write the checksums and trans id to the first page */
+	{
+		void *addr;
+		u64 *gen;
+
+		page = find_get_page(inode->i_mapping, 0);
+
+		addr = kmap(page);
+		memcpy(addr, checksums, sizeof(u32) * num_checksums);
+		gen = addr + (sizeof(u32) * num_checksums);
+		*gen = trans->transid;
+		kunmap(page);
+		ClearPageChecked(page);
+		set_page_extent_mapped(page);
+		SetPageUptodate(page);
+		set_page_dirty(page);
+		unlock_page(page);
+		page_cache_release(page);
+		page_cache_release(page);
+	}
+	BTRFS_I(inode)->generation = trans->transid;
+
+	unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
+			     i_size_read(inode) - 1, &cached_state, GFP_NOFS);
+
+	filemap_write_and_wait(inode->i_mapping);
+
+	key.objectid = BTRFS_FREE_SPACE_OBJECTID;
+	key.offset = block_group->key.objectid;
+	key.type = 0;
+
+	ret = btrfs_search_slot(trans, root, &key, path, 1, 1);
+	if (ret < 0) {
+		ret = 0;
+		clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+				 EXTENT_DIRTY | EXTENT_DELALLOC |
+				 EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS);
+		goto out_free;
+	}
+	leaf = path->nodes[0];
+	if (ret > 0) {
+		struct btrfs_key found_key;
+		BUG_ON(!path->slots[0]);
+		path->slots[0]--;
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
+		if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
+		    found_key.offset != block_group->key.objectid) {
+			ret = 0;
+			clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1,
+					 EXTENT_DIRTY | EXTENT_DELALLOC |
+					 EXTENT_DO_ACCOUNTING, 0, 0, NULL,
+					 GFP_NOFS);
+			btrfs_release_path(root, path);
+			goto out_free;
+		}
+	}
+	header = btrfs_item_ptr(leaf, path->slots[0],
+				struct btrfs_free_space_header);
+	btrfs_set_free_space_entries(leaf, header, entries);
+	btrfs_set_free_space_bitmaps(leaf, header, bitmaps);
+	btrfs_set_free_space_generation(leaf, header, trans->transid);
+	btrfs_mark_buffer_dirty(leaf);
+	btrfs_release_path(root, path);
+
+	ret = 1;
+
+out_free:
+	if (ret == 0) {
+		invalidate_inode_pages2_range(inode->i_mapping, 0, index);
+		spin_lock(&block_group->lock);
+		block_group->disk_cache_state = BTRFS_DC_ERROR;
+		spin_unlock(&block_group->lock);
+		BTRFS_I(inode)->generation = 0;
+	}
+	kfree(checksums);
+	btrfs_update_inode(trans, root, inode);
+	iput(inode);
+	return ret;
+}
+
 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize,
 					  u64 offset)
 {
diff --git a/fs/btrfs/free-space-cache.h b/fs/btrfs/free-space-cache.h
index 890a8e79011..e49ca5c321b 100644
--- a/fs/btrfs/free-space-cache.h
+++ b/fs/btrfs/free-space-cache.h
@@ -27,6 +27,24 @@ struct btrfs_free_space {
 	struct list_head list;
 };
 
+struct inode *lookup_free_space_inode(struct btrfs_root *root,
+				      struct btrfs_block_group_cache
+				      *block_group, struct btrfs_path *path);
+int create_free_space_inode(struct btrfs_root *root,
+			    struct btrfs_trans_handle *trans,
+			    struct btrfs_block_group_cache *block_group,
+			    struct btrfs_path *path);
+
+int btrfs_truncate_free_space_cache(struct btrfs_root *root,
+				    struct btrfs_trans_handle *trans,
+				    struct btrfs_path *path,
+				    struct inode *inode);
+int load_free_space_cache(struct btrfs_fs_info *fs_info,
+			  struct btrfs_block_group_cache *block_group);
+int btrfs_write_out_cache(struct btrfs_root *root,
+			  struct btrfs_trans_handle *trans,
+			  struct btrfs_block_group_cache *block_group,
+			  struct btrfs_path *path);
 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
 			 u64 bytenr, u64 size);
 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index c03864406af..72f31ecb5c9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -319,8 +319,6 @@ static noinline int compress_file_range(struct inode *inode,
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
 	u64 num_bytes;
-	u64 orig_start;
-	u64 disk_num_bytes;
 	u64 blocksize = root->sectorsize;
 	u64 actual_end;
 	u64 isize = i_size_read(inode);
@@ -335,8 +333,6 @@ static noinline int compress_file_range(struct inode *inode,
 	int i;
 	int will_compress;
 
-	orig_start = start;
-
 	actual_end = min_t(u64, isize, end + 1);
 again:
 	will_compress = 0;
@@ -371,7 +367,6 @@ again:
 	total_compressed = min(total_compressed, max_uncompressed);
 	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
 	num_bytes = max(blocksize,  num_bytes);
-	disk_num_bytes = num_bytes;
 	total_in = 0;
 	ret = 0;
 
@@ -467,7 +462,6 @@ again:
 		if (total_compressed >= total_in) {
 			will_compress = 0;
 		} else {
-			disk_num_bytes = total_compressed;
 			num_bytes = total_in;
 		}
 	}
@@ -501,7 +495,7 @@ again:
 		add_async_extent(async_cow, start, num_bytes,
 				 total_compressed, pages, nr_pages_ret);
 
-		if (start + num_bytes < end && start + num_bytes < actual_end) {
+		if (start + num_bytes < end) {
 			start += num_bytes;
 			pages = NULL;
 			cond_resched();
@@ -757,20 +751,17 @@ static noinline int cow_file_range(struct inode *inode,
 	u64 disk_num_bytes;
 	u64 cur_alloc_size;
 	u64 blocksize = root->sectorsize;
-	u64 actual_end;
-	u64 isize = i_size_read(inode);
 	struct btrfs_key ins;
 	struct extent_map *em;
 	struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
 	int ret = 0;
 
+	BUG_ON(root == root->fs_info->tree_root);
 	trans = btrfs_join_transaction(root, 1);
 	BUG_ON(!trans);
 	btrfs_set_trans_block_group(trans, inode);
 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
-	actual_end = min_t(u64, isize, end + 1);
-
 	num_bytes = (end - start + blocksize) & ~(blocksize - 1);
 	num_bytes = max(blocksize,  num_bytes);
 	disk_num_bytes = num_bytes;
@@ -1035,10 +1026,16 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 	int type;
 	int nocow;
 	int check_prev = 1;
+	bool nolock = false;
 
 	path = btrfs_alloc_path();
 	BUG_ON(!path);
-	trans = btrfs_join_transaction(root, 1);
+	if (root == root->fs_info->tree_root) {
+		nolock = true;
+		trans = btrfs_join_transaction_nolock(root, 1);
+	} else {
+		trans = btrfs_join_transaction(root, 1);
+	}
 	BUG_ON(!trans);
 
 	cow_start = (u64)-1;
@@ -1211,8 +1208,13 @@ out_check:
 		BUG_ON(ret);
 	}
 
-	ret = btrfs_end_transaction(trans, root);
-	BUG_ON(ret);
+	if (nolock) {
+		ret = btrfs_end_transaction_nolock(trans, root);
+		BUG_ON(ret);
+	} else {
+		ret = btrfs_end_transaction(trans, root);
+		BUG_ON(ret);
+	}
 	btrfs_free_path(path);
 	return 0;
 }
@@ -1289,6 +1291,8 @@ static int btrfs_set_bit_hook(struct inode *inode,
 	if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
 		u64 len = state->end + 1 - state->start;
+		int do_list = (root->root_key.objectid !=
+			       BTRFS_ROOT_TREE_OBJECTID);
 
 		if (*bits & EXTENT_FIRST_DELALLOC)
 			*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1298,7 +1302,7 @@ static int btrfs_set_bit_hook(struct inode *inode,
 		spin_lock(&root->fs_info->delalloc_lock);
 		BTRFS_I(inode)->delalloc_bytes += len;
 		root->fs_info->delalloc_bytes += len;
-		if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
+		if (do_list && list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
 			list_add_tail(&BTRFS_I(inode)->delalloc_inodes,
 				      &root->fs_info->delalloc_inodes);
 		}
@@ -1321,6 +1325,8 @@ static int btrfs_clear_bit_hook(struct inode *inode,
 	if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
 		struct btrfs_root *root = BTRFS_I(inode)->root;
 		u64 len = state->end + 1 - state->start;
+		int do_list = (root->root_key.objectid !=
+			       BTRFS_ROOT_TREE_OBJECTID);
 
 		if (*bits & EXTENT_FIRST_DELALLOC)
 			*bits &= ~EXTENT_FIRST_DELALLOC;
@@ -1330,14 +1336,15 @@ static int btrfs_clear_bit_hook(struct inode *inode,
 		if (*bits & EXTENT_DO_ACCOUNTING)
 			btrfs_delalloc_release_metadata(inode, len);
 
-		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID)
+		if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID
+		    && do_list)
 			btrfs_free_reserved_data_space(inode, len);
 
 		spin_lock(&root->fs_info->delalloc_lock);
 		root->fs_info->delalloc_bytes -= len;
 		BTRFS_I(inode)->delalloc_bytes -= len;
 
-		if (BTRFS_I(inode)->delalloc_bytes == 0 &&
+		if (do_list && BTRFS_I(inode)->delalloc_bytes == 0 &&
 		    !list_empty(&BTRFS_I(inode)->delalloc_inodes)) {
 			list_del_init(&BTRFS_I(inode)->delalloc_inodes);
 		}
@@ -1372,7 +1379,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
 
 	if (map_length < length + size)
 		return 1;
-	return 0;
+	return ret;
 }
 
 /*
@@ -1426,7 +1433,10 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 
 	skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
-	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+	if (root == root->fs_info->tree_root)
+		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2);
+	else
+		ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
 	BUG_ON(ret);
 
 	if (!(rw & REQ_WRITE)) {
@@ -1662,6 +1672,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	struct extent_state *cached_state = NULL;
 	int compressed = 0;
 	int ret;
+	bool nolock = false;
 
 	ret = btrfs_dec_test_ordered_pending(inode, &ordered_extent, start,
 					     end - start + 1);
@@ -1669,11 +1680,17 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 		return 0;
 	BUG_ON(!ordered_extent);
 
+	nolock = (root == root->fs_info->tree_root);
+
 	if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
 		BUG_ON(!list_empty(&ordered_extent->list));
 		ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
 		if (!ret) {
-			trans = btrfs_join_transaction(root, 1);
+			if (nolock)
+				trans = btrfs_join_transaction_nolock(root, 1);
+			else
+				trans = btrfs_join_transaction(root, 1);
+			BUG_ON(!trans);
 			btrfs_set_trans_block_group(trans, inode);
 			trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 			ret = btrfs_update_inode(trans, root, inode);
@@ -1686,7 +1703,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 			 ordered_extent->file_offset + ordered_extent->len - 1,
 			 0, &cached_state, GFP_NOFS);
 
-	trans = btrfs_join_transaction(root, 1);
+	if (nolock)
+		trans = btrfs_join_transaction_nolock(root, 1);
+	else
+		trans = btrfs_join_transaction(root, 1);
 	btrfs_set_trans_block_group(trans, inode);
 	trans->block_rsv = &root->fs_info->delalloc_block_rsv;
 
@@ -1700,6 +1720,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 						ordered_extent->len);
 		BUG_ON(ret);
 	} else {
+		BUG_ON(root == root->fs_info->tree_root);
 		ret = insert_reserved_file_extent(trans, inode,
 						ordered_extent->file_offset,
 						ordered_extent->start,
@@ -1724,9 +1745,15 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
 	ret = btrfs_update_inode(trans, root, inode);
 	BUG_ON(ret);
 out:
-	btrfs_delalloc_release_metadata(inode, ordered_extent->len);
-	if (trans)
-		btrfs_end_transaction(trans, root);
+	if (nolock) {
+		if (trans)
+			btrfs_end_transaction_nolock(trans, root);
+	} else {
+		btrfs_delalloc_release_metadata(inode, ordered_extent->len);
+		if (trans)
+			btrfs_end_transaction(trans, root);
+	}
+
 	/* once for us */
 	btrfs_put_ordered_extent(ordered_extent);
 	/* once for the tree */
@@ -2237,7 +2264,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 {
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
-	struct btrfs_item *item;
 	struct btrfs_key key, found_key;
 	struct btrfs_trans_handle *trans;
 	struct inode *inode;
@@ -2275,7 +2301,6 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
 
 		/* pull out the item */
 		leaf = path->nodes[0];
-		item = btrfs_item_nr(leaf, path->slots[0]);
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 
 		/* make sure the item matches what we want */
@@ -2651,7 +2676,8 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans,
 
 	ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len,
 					   dir, index);
-	BUG_ON(ret);
+	if (ret == -ENOENT)
+		ret = 0;
 err:
 	btrfs_free_path(path);
 	if (ret)
@@ -2672,8 +2698,8 @@ static int check_path_shared(struct btrfs_root *root,
 {
 	struct extent_buffer *eb;
 	int level;
-	int ret;
 	u64 refs = 1;
+	int uninitialized_var(ret);
 
 	for (level = 0; level < BTRFS_MAX_LEVEL; level++) {
 		if (!path->nodes[level])
@@ -2686,7 +2712,7 @@ static int check_path_shared(struct btrfs_root *root,
 		if (refs > 1)
 			return 1;
 	}
-	return 0;
+	return ret; /* XXX callers? */
 }
 
 /*
@@ -3196,7 +3222,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
 
 	BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
 
-	if (root->ref_cows)
+	if (root->ref_cows || root == root->fs_info->tree_root)
 		btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
 
 	path = btrfs_alloc_path();
@@ -3344,7 +3370,8 @@ delete:
 		} else {
 			break;
 		}
-		if (found_extent && root->ref_cows) {
+		if (found_extent && (root->ref_cows ||
+				     root == root->fs_info->tree_root)) {
 			btrfs_set_path_blocking(path);
 			ret = btrfs_free_extent(trans, root, extent_start,
 						extent_num_bytes, 0,
@@ -3675,7 +3702,8 @@ void btrfs_evict_inode(struct inode *inode)
 	int ret;
 
 	truncate_inode_pages(&inode->i_data, 0);
-	if (inode->i_nlink && btrfs_root_refs(&root->root_item) != 0)
+	if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 ||
+			       root == root->fs_info->tree_root))
 		goto no_delete;
 
 	if (is_bad_inode(inode)) {
@@ -3849,7 +3877,7 @@ again:
 	p = &root->inode_tree.rb_node;
 	parent = NULL;
 
-	if (hlist_unhashed(&inode->i_hash))
+	if (inode_unhashed(inode))
 		return;
 
 	spin_lock(&root->inode_lock);
@@ -3888,7 +3916,14 @@ static void inode_tree_del(struct inode *inode)
 	}
 	spin_unlock(&root->inode_lock);
 
-	if (empty && btrfs_root_refs(&root->root_item) == 0) {
+	/*
+	 * Free space cache has inodes in the tree root, but the tree root has a
+	 * root_refs of 0, so this could end up dropping the tree root as a
+	 * snapshot, so we need the extra !root->fs_info->tree_root check to
+	 * make sure we don't drop it.
+	 */
+	if (empty && btrfs_root_refs(&root->root_item) == 0 &&
+	    root != root->fs_info->tree_root) {
 		synchronize_srcu(&root->fs_info->subvol_srcu);
 		spin_lock(&root->inode_lock);
 		empty = RB_EMPTY_ROOT(&root->inode_tree);
@@ -4282,14 +4317,24 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_trans_handle *trans;
 	int ret = 0;
+	bool nolock = false;
 
 	if (BTRFS_I(inode)->dummy_inode)
 		return 0;
 
+	smp_mb();
+	nolock = (root->fs_info->closing && root == root->fs_info->tree_root);
+
 	if (wbc->sync_mode == WB_SYNC_ALL) {
-		trans = btrfs_join_transaction(root, 1);
+		if (nolock)
+			trans = btrfs_join_transaction_nolock(root, 1);
+		else
+			trans = btrfs_join_transaction(root, 1);
 		btrfs_set_trans_block_group(trans, inode);
-		ret = btrfs_commit_transaction(trans, root);
+		if (nolock)
+			ret = btrfs_end_transaction_nolock(trans, root);
+		else
+			ret = btrfs_commit_transaction(trans, root);
 	}
 	return ret;
 }
@@ -4456,6 +4501,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
 	BTRFS_I(inode)->index_cnt = 2;
 	BTRFS_I(inode)->root = root;
 	BTRFS_I(inode)->generation = trans->transid;
+	inode->i_generation = BTRFS_I(inode)->generation;
 	btrfs_set_inode_space_info(root, inode);
 
 	if (mode & S_IFDIR)
@@ -4577,12 +4623,12 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
 }
 
 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
-			    struct dentry *dentry, struct inode *inode,
-			    int backref, u64 index)
+			    struct inode *dir, struct dentry *dentry,
+			    struct inode *inode, int backref, u64 index)
 {
-	int err = btrfs_add_link(trans, dentry->d_parent->d_inode,
-				 inode, dentry->d_name.name,
-				 dentry->d_name.len, backref, index);
+	int err = btrfs_add_link(trans, dir, inode,
+				 dentry->d_name.name, dentry->d_name.len,
+				 backref, index);
 	if (!err) {
 		d_instantiate(dentry, inode);
 		return 0;
@@ -4623,8 +4669,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, mode, &index);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
@@ -4637,7 +4682,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -4685,10 +4730,8 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino,
-				objectid, BTRFS_I(dir)->block_group, mode,
-				&index);
+				dentry->d_name.len, dir->i_ino, objectid,
+				BTRFS_I(dir)->block_group, mode, &index);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_unlock;
@@ -4700,7 +4743,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -4742,6 +4785,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 		return -EPERM;
 
 	btrfs_inc_nlink(inode);
+	inode->i_ctime = CURRENT_TIME;
 
 	err = btrfs_set_inode_index(dir, &index);
 	if (err)
@@ -4758,17 +4802,19 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
 	}
 
 	btrfs_set_trans_block_group(trans, dir);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
-	err = btrfs_add_nondir(trans, dentry, inode, 1, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 1, index);
 
 	if (err) {
 		drop_inode = 1;
 	} else {
+		struct dentry *parent = dget_parent(dentry);
 		btrfs_update_inode_block_group(trans, dir);
 		err = btrfs_update_inode(trans, root, inode);
 		BUG_ON(err);
-		btrfs_log_new_name(trans, inode, NULL, dentry->d_parent);
+		btrfs_log_new_name(trans, inode, NULL, parent);
+		dput(parent);
 	}
 
 	nr = trans->blocks_used;
@@ -4808,8 +4854,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFDIR | mode,
 				&index);
 	if (IS_ERR(inode)) {
@@ -4832,9 +4877,8 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 	if (err)
 		goto out_fail;
 
-	err = btrfs_add_link(trans, dentry->d_parent->d_inode,
-				 inode, dentry->d_name.name,
-				 dentry->d_name.len, 0, index);
+	err = btrfs_add_link(trans, dir, inode, dentry->d_name.name,
+			     dentry->d_name.len, 0, index);
 	if (err)
 		goto out_fail;
 
@@ -5490,13 +5534,21 @@ struct btrfs_dio_private {
 	u64 bytes;
 	u32 *csums;
 	void *private;
+
+	/* number of bios pending for this dio */
+	atomic_t pending_bios;
+
+	/* IO errors */
+	int errors;
+
+	struct bio *orig_bio;
 };
 
 static void btrfs_endio_direct_read(struct bio *bio, int err)
 {
+	struct btrfs_dio_private *dip = bio->bi_private;
 	struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
 	struct bio_vec *bvec = bio->bi_io_vec;
-	struct btrfs_dio_private *dip = bio->bi_private;
 	struct inode *inode = dip->inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	u64 start;
@@ -5550,15 +5602,18 @@ static void btrfs_endio_direct_write(struct bio *bio, int err)
 	struct btrfs_trans_handle *trans;
 	struct btrfs_ordered_extent *ordered = NULL;
 	struct extent_state *cached_state = NULL;
+	u64 ordered_offset = dip->logical_offset;
+	u64 ordered_bytes = dip->bytes;
 	int ret;
 
 	if (err)
 		goto out_done;
-
-	ret = btrfs_dec_test_ordered_pending(inode, &ordered,
-					     dip->logical_offset, dip->bytes);
+again:
+	ret = btrfs_dec_test_first_ordered_pending(inode, &ordered,
+						   &ordered_offset,
+						   ordered_bytes);
 	if (!ret)
-		goto out_done;
+		goto out_test;
 
 	BUG_ON(!ordered);
 
@@ -5618,8 +5673,20 @@ out_unlock:
 out:
 	btrfs_delalloc_release_metadata(inode, ordered->len);
 	btrfs_end_transaction(trans, root);
+	ordered_offset = ordered->file_offset + ordered->len;
 	btrfs_put_ordered_extent(ordered);
 	btrfs_put_ordered_extent(ordered);
+
+out_test:
+	/*
+	 * our bio might span multiple ordered extents.  If we haven't
+	 * completed the accounting for the whole dio, go back and try again
+	 */
+	if (ordered_offset < dip->logical_offset + dip->bytes) {
+		ordered_bytes = dip->logical_offset + dip->bytes -
+			ordered_offset;
+		goto again;
+	}
 out_done:
 	bio->bi_private = dip->private;
 
@@ -5639,13 +5706,182 @@ static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw,
 	return 0;
 }
 
+static void btrfs_end_dio_bio(struct bio *bio, int err)
+{
+	struct btrfs_dio_private *dip = bio->bi_private;
+
+	if (err) {
+		printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
+		      "sector %#Lx len %u err no %d\n",
+		      dip->inode->i_ino, bio->bi_rw,
+		      (unsigned long long)bio->bi_sector, bio->bi_size, err);
+		dip->errors = 1;
+
+		/*
+		 * before atomic variable goto zero, we must make sure
+		 * dip->errors is perceived to be set.
+		 */
+		smp_mb__before_atomic_dec();
+	}
+
+	/* if there are more bios still pending for this dio, just exit */
+	if (!atomic_dec_and_test(&dip->pending_bios))
+		goto out;
+
+	if (dip->errors)
+		bio_io_error(dip->orig_bio);
+	else {
+		set_bit(BIO_UPTODATE, &dip->orig_bio->bi_flags);
+		bio_endio(dip->orig_bio, 0);
+	}
+out:
+	bio_put(bio);
+}
+
+static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
+				       u64 first_sector, gfp_t gfp_flags)
+{
+	int nr_vecs = bio_get_nr_vecs(bdev);
+	return btrfs_bio_alloc(bdev, first_sector, nr_vecs, gfp_flags);
+}
+
+static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
+					 int rw, u64 file_offset, int skip_sum,
+					 u32 *csums)
+{
+	int write = rw & REQ_WRITE;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	int ret;
+
+	bio_get(bio);
+	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
+	if (ret)
+		goto err;
+
+	if (write && !skip_sum) {
+		ret = btrfs_wq_submit_bio(root->fs_info,
+				   inode, rw, bio, 0, 0,
+				   file_offset,
+				   __btrfs_submit_bio_start_direct_io,
+				   __btrfs_submit_bio_done);
+		goto err;
+	} else if (!skip_sum)
+		btrfs_lookup_bio_sums_dio(root, inode, bio,
+					  file_offset, csums);
+
+	ret = btrfs_map_bio(root, rw, bio, 0, 1);
+err:
+	bio_put(bio);
+	return ret;
+}
+
+static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
+				    int skip_sum)
+{
+	struct inode *inode = dip->inode;
+	struct btrfs_root *root = BTRFS_I(inode)->root;
+	struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree;
+	struct bio *bio;
+	struct bio *orig_bio = dip->orig_bio;
+	struct bio_vec *bvec = orig_bio->bi_io_vec;
+	u64 start_sector = orig_bio->bi_sector;
+	u64 file_offset = dip->logical_offset;
+	u64 submit_len = 0;
+	u64 map_length;
+	int nr_pages = 0;
+	u32 *csums = dip->csums;
+	int ret = 0;
+
+	bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev, start_sector, GFP_NOFS);
+	if (!bio)
+		return -ENOMEM;
+	bio->bi_private = dip;
+	bio->bi_end_io = btrfs_end_dio_bio;
+	atomic_inc(&dip->pending_bios);
+
+	map_length = orig_bio->bi_size;
+	ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+			      &map_length, NULL, 0);
+	if (ret) {
+		bio_put(bio);
+		return -EIO;
+	}
+
+	while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
+		if (unlikely(map_length < submit_len + bvec->bv_len ||
+		    bio_add_page(bio, bvec->bv_page, bvec->bv_len,
+				 bvec->bv_offset) < bvec->bv_len)) {
+			/*
+			 * inc the count before we submit the bio so
+			 * we know the end IO handler won't happen before
+			 * we inc the count. Otherwise, the dip might get freed
+			 * before we're done setting it up
+			 */
+			atomic_inc(&dip->pending_bios);
+			ret = __btrfs_submit_dio_bio(bio, inode, rw,
+						     file_offset, skip_sum,
+						     csums);
+			if (ret) {
+				bio_put(bio);
+				atomic_dec(&dip->pending_bios);
+				goto out_err;
+			}
+
+			if (!skip_sum)
+				csums = csums + nr_pages;
+			start_sector += submit_len >> 9;
+			file_offset += submit_len;
+
+			submit_len = 0;
+			nr_pages = 0;
+
+			bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
+						  start_sector, GFP_NOFS);
+			if (!bio)
+				goto out_err;
+			bio->bi_private = dip;
+			bio->bi_end_io = btrfs_end_dio_bio;
+
+			map_length = orig_bio->bi_size;
+			ret = btrfs_map_block(map_tree, READ, start_sector << 9,
+					      &map_length, NULL, 0);
+			if (ret) {
+				bio_put(bio);
+				goto out_err;
+			}
+		} else {
+			submit_len += bvec->bv_len;
+			nr_pages ++;
+			bvec++;
+		}
+	}
+
+	ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
+				     csums);
+	if (!ret)
+		return 0;
+
+	bio_put(bio);
+out_err:
+	dip->errors = 1;
+	/*
+	 * before atomic variable goto zero, we must
+	 * make sure dip->errors is perceived to be set.
+	 */
+	smp_mb__before_atomic_dec();
+	if (atomic_dec_and_test(&dip->pending_bios))
+		bio_io_error(dip->orig_bio);
+
+	/* bio_end_io() will handle error, so we needn't return it */
+	return 0;
+}
+
 static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 				loff_t file_offset)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_dio_private *dip;
 	struct bio_vec *bvec = bio->bi_io_vec;
-	u64 start;
 	int skip_sum;
 	int write = rw & REQ_WRITE;
 	int ret = 0;
@@ -5671,7 +5907,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 	dip->inode = inode;
 	dip->logical_offset = file_offset;
 
-	start = dip->logical_offset;
 	dip->bytes = 0;
 	do {
 		dip->bytes += bvec->bv_len;
@@ -5680,36 +5915,18 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
 
 	dip->disk_bytenr = (u64)bio->bi_sector << 9;
 	bio->bi_private = dip;
+	dip->errors = 0;
+	dip->orig_bio = bio;
+	atomic_set(&dip->pending_bios, 0);
 
 	if (write)
 		bio->bi_end_io = btrfs_endio_direct_write;
 	else
 		bio->bi_end_io = btrfs_endio_direct_read;
 
-	ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
-	if (ret)
-		goto out_err;
-
-	if (write && !skip_sum) {
-		ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info,
-				   inode, rw, bio, 0, 0,
-				   dip->logical_offset,
-				   __btrfs_submit_bio_start_direct_io,
-				   __btrfs_submit_bio_done);
-		if (ret)
-			goto out_err;
+	ret = btrfs_submit_direct_hook(rw, dip, skip_sum);
+	if (!ret)
 		return;
-	} else if (!skip_sum)
-		btrfs_lookup_bio_sums_dio(root, inode, bio,
-					  dip->logical_offset, dip->csums);
-
-	ret = btrfs_map_bio(root, rw, bio, 0, 1);
-	if (ret)
-		goto out_err;
-	return;
-out_err:
-	kfree(dip->csums);
-	kfree(dip);
 free_ordered:
 	/*
 	 * If this is a write, we need to clean up the reserved space and kill
@@ -5717,8 +5934,7 @@ free_ordered:
 	 */
 	if (write) {
 		struct btrfs_ordered_extent *ordered;
-		ordered = btrfs_lookup_ordered_extent(inode,
-						      dip->logical_offset);
+		ordered = btrfs_lookup_ordered_extent(inode, file_offset);
 		if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
 		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
 			btrfs_free_reserved_extent(root, ordered->start,
@@ -6308,6 +6524,21 @@ void btrfs_destroy_inode(struct inode *inode)
 		spin_unlock(&root->fs_info->ordered_extent_lock);
 	}
 
+	if (root == root->fs_info->tree_root) {
+		struct btrfs_block_group_cache *block_group;
+
+		block_group = btrfs_lookup_block_group(root->fs_info,
+						BTRFS_I(inode)->block_group);
+		if (block_group && block_group->inode == inode) {
+			spin_lock(&block_group->lock);
+			block_group->inode = NULL;
+			spin_unlock(&block_group->lock);
+			btrfs_put_block_group(block_group);
+		} else if (block_group) {
+			btrfs_put_block_group(block_group);
+		}
+	}
+
 	spin_lock(&root->orphan_lock);
 	if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
 		printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
@@ -6340,7 +6571,8 @@ int btrfs_drop_inode(struct inode *inode)
 {
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 
-	if (btrfs_root_refs(&root->root_item) == 0)
+	if (btrfs_root_refs(&root->root_item) == 0 &&
+	    root != root->fs_info->tree_root)
 		return 1;
 	else
 		return generic_drop_inode(inode);
@@ -6548,8 +6780,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	BUG_ON(ret);
 
 	if (old_inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
-		btrfs_log_new_name(trans, old_inode, old_dir,
-				   new_dentry->d_parent);
+		struct dentry *parent = dget_parent(new_dentry);
+		btrfs_log_new_name(trans, old_inode, old_dir, parent);
+		dput(parent);
 		btrfs_end_log_trans(root);
 	}
 out_fail:
@@ -6609,7 +6842,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
 	return 0;
 }
 
-int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
+int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput,
+				   int sync)
 {
 	struct btrfs_inode *binode;
 	struct inode *inode = NULL;
@@ -6631,7 +6865,26 @@ int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput)
 	spin_unlock(&root->fs_info->delalloc_lock);
 
 	if (inode) {
-		write_inode_now(inode, 0);
+		if (sync) {
+			filemap_write_and_wait(inode->i_mapping);
+			/*
+			 * We have to do this because compression doesn't
+			 * actually set PG_writeback until it submits the pages
+			 * for IO, which happens in an async thread, so we could
+			 * race and not actually wait for any writeback pages
+			 * because they've not been submitted yet.  Technically
+			 * this could still be the case for the ordered stuff
+			 * since the async thread may not have started to do its
+			 * work yet.  If this becomes the case then we need to
+			 * figure out a way to make sure that in writepage we
+			 * wait for any async pages to be submitted before
+			 * returning so that fdatawait does what its supposed to
+			 * do.
+			 */
+			btrfs_wait_ordered_range(inode, 0, (u64)-1);
+		} else {
+			filemap_flush(inode->i_mapping);
+		}
 		if (delay_iput)
 			btrfs_add_delayed_iput(inode);
 		else
@@ -6679,8 +6932,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	btrfs_set_trans_block_group(trans, dir);
 
 	inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name,
-				dentry->d_name.len,
-				dentry->d_parent->d_inode->i_ino, objectid,
+				dentry->d_name.len, dir->i_ino, objectid,
 				BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO,
 				&index);
 	err = PTR_ERR(inode);
@@ -6694,7 +6946,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
 	}
 
 	btrfs_set_trans_block_group(trans, inode);
-	err = btrfs_add_nondir(trans, dentry, inode, 0, index);
+	err = btrfs_add_nondir(trans, dir, dentry, inode, 0, index);
 	if (err)
 		drop_inode = 1;
 	else {
@@ -6757,27 +7009,34 @@ out_unlock:
 	return err;
 }
 
-int btrfs_prealloc_file_range(struct inode *inode, int mode,
-			      u64 start, u64 num_bytes, u64 min_size,
-			      loff_t actual_len, u64 *alloc_hint)
+static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
+				       u64 start, u64 num_bytes, u64 min_size,
+				       loff_t actual_len, u64 *alloc_hint,
+				       struct btrfs_trans_handle *trans)
 {
-	struct btrfs_trans_handle *trans;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_key ins;
 	u64 cur_offset = start;
+	u64 i_size;
 	int ret = 0;
+	bool own_trans = true;
 
+	if (trans)
+		own_trans = false;
 	while (num_bytes > 0) {
-		trans = btrfs_start_transaction(root, 3);
-		if (IS_ERR(trans)) {
-			ret = PTR_ERR(trans);
-			break;
+		if (own_trans) {
+			trans = btrfs_start_transaction(root, 3);
+			if (IS_ERR(trans)) {
+				ret = PTR_ERR(trans);
+				break;
+			}
 		}
 
 		ret = btrfs_reserve_extent(trans, root, num_bytes, min_size,
 					   0, *alloc_hint, (u64)-1, &ins, 1);
 		if (ret) {
-			btrfs_end_transaction(trans, root);
+			if (own_trans)
+				btrfs_end_transaction(trans, root);
 			break;
 		}
 
@@ -6800,21 +7059,40 @@ int btrfs_prealloc_file_range(struct inode *inode, int mode,
 		    (actual_len > inode->i_size) &&
 		    (cur_offset > inode->i_size)) {
 			if (cur_offset > actual_len)
-				i_size_write(inode, actual_len);
+				i_size = actual_len;
 			else
-				i_size_write(inode, cur_offset);
-			i_size_write(inode, cur_offset);
-			btrfs_ordered_update_i_size(inode, cur_offset, NULL);
+				i_size = cur_offset;
+			i_size_write(inode, i_size);
+			btrfs_ordered_update_i_size(inode, i_size, NULL);
 		}
 
 		ret = btrfs_update_inode(trans, root, inode);
 		BUG_ON(ret);
 
-		btrfs_end_transaction(trans, root);
+		if (own_trans)
+			btrfs_end_transaction(trans, root);
 	}
 	return ret;
 }
 
+int btrfs_prealloc_file_range(struct inode *inode, int mode,
+			      u64 start, u64 num_bytes, u64 min_size,
+			      loff_t actual_len, u64 *alloc_hint)
+{
+	return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+					   min_size, actual_len, alloc_hint,
+					   NULL);
+}
+
+int btrfs_prealloc_file_range_trans(struct inode *inode,
+				    struct btrfs_trans_handle *trans, int mode,
+				    u64 start, u64 num_bytes, u64 min_size,
+				    loff_t actual_len, u64 *alloc_hint)
+{
+	return __btrfs_prealloc_file_range(inode, mode, start, num_bytes,
+					   min_size, actual_len, alloc_hint, trans);
+}
+
 static long btrfs_fallocate(struct inode *inode, int mode,
 			    loff_t offset, loff_t len)
 {
@@ -6839,6 +7117,10 @@ static long btrfs_fallocate(struct inode *inode, int mode,
 	btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
 
 	mutex_lock(&inode->i_mutex);
+	ret = inode_newsize_ok(inode, alloc_end);
+	if (ret)
+		goto out;
+
 	if (alloc_start > inode->i_size) {
 		ret = btrfs_cont_expand(inode, alloc_start);
 		if (ret)
@@ -7035,6 +7317,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
+	.getattr	= btrfs_getattr,
 	.permission	= btrfs_permission,
 	.setxattr	= btrfs_setxattr,
 	.getxattr	= btrfs_getxattr,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 9254b3d58db..f87552a1d7e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
 
 static noinline int create_subvol(struct btrfs_root *root,
 				  struct dentry *dentry,
-				  char *name, int namelen)
+				  char *name, int namelen,
+				  u64 *async_transid)
 {
 	struct btrfs_trans_handle *trans;
 	struct btrfs_key key;
@@ -232,7 +233,8 @@ static noinline int create_subvol(struct btrfs_root *root,
 	struct btrfs_inode_item *inode_item;
 	struct extent_buffer *leaf;
 	struct btrfs_root *new_root;
-	struct inode *dir = dentry->d_parent->d_inode;
+	struct dentry *parent = dget_parent(dentry);
+	struct inode *dir;
 	int ret;
 	int err;
 	u64 objectid;
@@ -241,8 +243,13 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root,
 				       0, &objectid);
-	if (ret)
+	if (ret) {
+		dput(parent);
 		return ret;
+	}
+
+	dir = parent->d_inode;
+
 	/*
 	 * 1 - inode item
 	 * 2 - refs
@@ -250,8 +257,10 @@ static noinline int create_subvol(struct btrfs_root *root,
 	 * 2 - dir items
 	 */
 	trans = btrfs_start_transaction(root, 6);
-	if (IS_ERR(trans))
+	if (IS_ERR(trans)) {
+		dput(parent);
 		return PTR_ERR(trans);
+	}
 
 	leaf = btrfs_alloc_free_block(trans, root, root->leafsize,
 				      0, objectid, NULL, 0, 0, 0);
@@ -338,15 +347,23 @@ static noinline int create_subvol(struct btrfs_root *root,
 
 	d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
 fail:
-	err = btrfs_commit_transaction(trans, root);
+	dput(parent);
+	if (async_transid) {
+		*async_transid = trans->transid;
+		err = btrfs_commit_transaction_async(trans, root, 1);
+	} else {
+		err = btrfs_commit_transaction(trans, root);
+	}
 	if (err && !ret)
 		ret = err;
 	return ret;
 }
 
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+			   char *name, int namelen, u64 *async_transid)
 {
 	struct inode *inode;
+	struct dentry *parent;
 	struct btrfs_pending_snapshot *pending_snapshot;
 	struct btrfs_trans_handle *trans;
 	int ret;
@@ -373,7 +390,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
 
 	list_add(&pending_snapshot->list,
 		 &trans->transaction->pending_snapshots);
-	ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
+	if (async_transid) {
+		*async_transid = trans->transid;
+		ret = btrfs_commit_transaction_async(trans,
+				     root->fs_info->extent_root, 1);
+	} else {
+		ret = btrfs_commit_transaction(trans,
+					       root->fs_info->extent_root);
+	}
 	BUG_ON(ret);
 
 	ret = pending_snapshot->error;
@@ -382,7 +406,9 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
 
 	btrfs_orphan_cleanup(pending_snapshot->snap);
 
-	inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+	parent = dget_parent(dentry);
+	inode = btrfs_lookup_dentry(parent->d_inode, dentry);
+	dput(parent);
 	if (IS_ERR(inode)) {
 		ret = PTR_ERR(inode);
 		goto fail;
@@ -395,6 +421,76 @@ fail:
 	return ret;
 }
 
+/*  copy of check_sticky in fs/namei.c()
+* It's inline, so penalty for filesystems that don't use sticky bit is
+* minimal.
+*/
+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
+{
+	uid_t fsuid = current_fsuid();
+
+	if (!(dir->i_mode & S_ISVTX))
+		return 0;
+	if (inode->i_uid == fsuid)
+		return 0;
+	if (dir->i_uid == fsuid)
+		return 0;
+	return !capable(CAP_FOWNER);
+}
+
+/*  copy of may_delete in fs/namei.c()
+ *	Check whether we can remove a link victim from directory dir, check
+ *  whether the type of victim is right.
+ *  1. We can't do it if dir is read-only (done in permission())
+ *  2. We should have write and exec permissions on dir
+ *  3. We can't remove anything from append-only dir
+ *  4. We can't do anything with immutable dir (done in permission())
+ *  5. If the sticky bit on dir is set we should either
+ *	a. be owner of dir, or
+ *	b. be owner of victim, or
+ *	c. have CAP_FOWNER capability
+ *  6. If the victim is append-only or immutable we can't do antyhing with
+ *     links pointing to it.
+ *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ *  9. We can't remove a root or mountpoint.
+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ *     nfs_async_unlink().
+ */
+
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+{
+	int error;
+
+	if (!victim->d_inode)
+		return -ENOENT;
+
+	BUG_ON(victim->d_parent->d_inode != dir);
+	audit_inode_child(victim, dir);
+
+	error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+	if (error)
+		return error;
+	if (IS_APPEND(dir))
+		return -EPERM;
+	if (btrfs_check_sticky(dir, victim->d_inode)||
+		IS_APPEND(victim->d_inode)||
+	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+		return -EPERM;
+	if (isdir) {
+		if (!S_ISDIR(victim->d_inode->i_mode))
+			return -ENOTDIR;
+		if (IS_ROOT(victim))
+			return -EBUSY;
+	} else if (S_ISDIR(victim->d_inode->i_mode))
+		return -EISDIR;
+	if (IS_DEADDIR(dir))
+		return -ENOENT;
+	if (victim->d_flags & DCACHE_NFSFS_RENAMED)
+		return -EBUSY;
+	return 0;
+}
+
 /* copy of may_create in fs/namei.c() */
 static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
 {
@@ -412,7 +508,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
  */
 static noinline int btrfs_mksubvol(struct path *parent,
 				   char *name, int namelen,
-				   struct btrfs_root *snap_src)
+				   struct btrfs_root *snap_src,
+				   u64 *async_transid)
 {
 	struct inode *dir  = parent->dentry->d_inode;
 	struct dentry *dentry;
@@ -443,10 +540,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
 		goto out_up_read;
 
 	if (snap_src) {
-		error = create_snapshot(snap_src, dentry);
+		error = create_snapshot(snap_src, dentry,
+					name, namelen, async_transid);
 	} else {
 		error = create_subvol(BTRFS_I(dir)->root, dentry,
-				      name, namelen);
+				      name, namelen, async_transid);
 	}
 	if (!error)
 		fsnotify_mkdir(dir, dentry);
@@ -708,7 +806,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
 	char *sizestr;
 	char *devstr = NULL;
 	int ret = 0;
-	int namelen;
 	int mod = 0;
 
 	if (root->fs_info->sb->s_flags & MS_RDONLY)
@@ -722,7 +819,6 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root,
 		return PTR_ERR(vol_args);
 
 	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-	namelen = strlen(vol_args->name);
 
 	mutex_lock(&root->fs_info->volume_mutex);
 	sizestr = vol_args->name;
@@ -801,11 +897,13 @@ out_unlock:
 	return ret;
 }
 
-static noinline int btrfs_ioctl_snap_create(struct file *file,
-					    void __user *arg, int subvol)
+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+						    char *name,
+						    unsigned long fd,
+						    int subvol,
+						    u64 *transid)
 {
 	struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
-	struct btrfs_ioctl_vol_args *vol_args;
 	struct file *src_file;
 	int namelen;
 	int ret = 0;
@@ -813,23 +911,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
 	if (root->fs_info->sb->s_flags & MS_RDONLY)
 		return -EROFS;
 
-	vol_args = memdup_user(arg, sizeof(*vol_args));
-	if (IS_ERR(vol_args))
-		return PTR_ERR(vol_args);
-
-	vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-	namelen = strlen(vol_args->name);
-	if (strchr(vol_args->name, '/')) {
+	namelen = strlen(name);
+	if (strchr(name, '/')) {
 		ret = -EINVAL;
 		goto out;
 	}
 
 	if (subvol) {
-		ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
-				     NULL);
+		ret = btrfs_mksubvol(&file->f_path, name, namelen,
+				     NULL, transid);
 	} else {
 		struct inode *src_inode;
-		src_file = fget(vol_args->fd);
+		src_file = fget(fd);
 		if (!src_file) {
 			ret = -EINVAL;
 			goto out;
@@ -843,12 +936,68 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
 			fput(src_file);
 			goto out;
 		}
-		ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
-				     BTRFS_I(src_inode)->root);
+		ret = btrfs_mksubvol(&file->f_path, name, namelen,
+				     BTRFS_I(src_inode)->root,
+				     transid);
 		fput(src_file);
 	}
 out:
+	return ret;
+}
+
+static noinline int btrfs_ioctl_snap_create(struct file *file,
+					    void __user *arg, int subvol,
+					    int v2)
+{
+	struct btrfs_ioctl_vol_args *vol_args = NULL;
+	struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
+	char *name;
+	u64 fd;
+	int ret;
+
+	if (v2) {
+		u64 transid = 0;
+		u64 *ptr = NULL;
+
+		vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
+		if (IS_ERR(vol_args_v2))
+			return PTR_ERR(vol_args_v2);
+
+		if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		name = vol_args_v2->name;
+		fd = vol_args_v2->fd;
+		vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+
+		if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
+			ptr = &transid;
+
+		ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+						      subvol, ptr);
+
+		if (ret == 0 && ptr &&
+		    copy_to_user(arg +
+				 offsetof(struct btrfs_ioctl_vol_args_v2,
+					  transid), ptr, sizeof(*ptr)))
+			ret = -EFAULT;
+	} else {
+		vol_args = memdup_user(arg, sizeof(*vol_args));
+		if (IS_ERR(vol_args))
+			return PTR_ERR(vol_args);
+		name = vol_args->name;
+		fd = vol_args->fd;
+		vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+
+		ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+						      subvol, NULL);
+	}
+out:
 	kfree(vol_args);
+	kfree(vol_args_v2);
+
 	return ret;
 }
 
@@ -1073,14 +1222,10 @@ static noinline int btrfs_ioctl_tree_search(struct file *file,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	args = kmalloc(sizeof(*args), GFP_KERNEL);
-	if (!args)
-		return -ENOMEM;
+	args = memdup_user(argp, sizeof(*args));
+	if (IS_ERR(args))
+		return PTR_ERR(args);
 
-	if (copy_from_user(args, argp, sizeof(*args))) {
-		kfree(args);
-		return -EFAULT;
-	}
 	inode = fdentry(file)->d_inode;
 	ret = search_ioctl(inode, args);
 	if (ret == 0 && copy_to_user(argp, args, sizeof(*args)))
@@ -1188,14 +1333,10 @@ static noinline int btrfs_ioctl_ino_lookup(struct file *file,
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	args = kmalloc(sizeof(*args), GFP_KERNEL);
-	if (!args)
-		return -ENOMEM;
+	args = memdup_user(argp, sizeof(*args));
+	if (IS_ERR(args))
+		return PTR_ERR(args);
 
-	if (copy_from_user(args, argp, sizeof(*args))) {
-		kfree(args);
-		return -EFAULT;
-	}
 	inode = fdentry(file)->d_inode;
 
 	if (args->treeid == 0)
@@ -1227,9 +1368,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	int ret;
 	int err = 0;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
-
 	vol_args = memdup_user(arg, sizeof(*vol_args));
 	if (IS_ERR(vol_args))
 		return PTR_ERR(vol_args);
@@ -1259,13 +1397,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 	}
 
 	inode = dentry->d_inode;
+	dest = BTRFS_I(inode)->root;
+	if (!capable(CAP_SYS_ADMIN)){
+		/*
+		 * Regular user.  Only allow this with a special mount
+		 * option, when the user has write+exec access to the
+		 * subvol root, and when rmdir(2) would have been
+		 * allowed.
+		 *
+		 * Note that this is _not_ check that the subvol is
+		 * empty or doesn't contain data that we wouldn't
+		 * otherwise be able to delete.
+		 *
+		 * Users who want to delete empty subvols should try
+		 * rmdir(2).
+		 */
+		err = -EPERM;
+		if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+			goto out_dput;
+
+		/*
+		 * Do not allow deletion if the parent dir is the same
+		 * as the dir to be deleted.  That means the ioctl
+		 * must be called on the dentry referencing the root
+		 * of the subvol, not a random directory contained
+		 * within it.
+		 */
+		err = -EINVAL;
+		if (root == dest)
+			goto out_dput;
+
+		err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+		if (err)
+			goto out_dput;
+
+		/* check if subvolume may be deleted by a non-root user */
+		err = btrfs_may_delete(dir, dentry, 1);
+		if (err)
+			goto out_dput;
+	}
+
 	if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
 		err = -EINVAL;
 		goto out_dput;
 	}
 
-	dest = BTRFS_I(inode)->root;
-
 	mutex_lock(&inode->i_mutex);
 	err = d_invalidate(dentry);
 	if (err)
@@ -1304,7 +1480,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 		BUG_ON(ret);
 	}
 
-	ret = btrfs_commit_transaction(trans, root);
+	ret = btrfs_end_transaction(trans, root);
 	BUG_ON(ret);
 	inode->i_flags |= S_DEAD;
 out_up_write:
@@ -1502,11 +1678,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	path->reada = 2;
 
 	if (inode < src) {
-		mutex_lock(&inode->i_mutex);
-		mutex_lock(&src->i_mutex);
+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
 	} else {
-		mutex_lock(&src->i_mutex);
-		mutex_lock(&inode->i_mutex);
+		mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
 	}
 
 	/* determine range to clone */
@@ -1517,12 +1693,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 		olen = len = src->i_size - off;
 	/* if we extend to eof, continue to block boundary */
 	if (off + len == src->i_size)
-		len = ((src->i_size + bs-1) & ~(bs-1))
-			- off;
+		len = ALIGN(src->i_size, bs) - off;
 
 	/* verify the end result is block aligned */
-	if ((off & (bs-1)) ||
-	    ((off + len) & (bs-1)))
+	if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+	    !IS_ALIGNED(destoff, bs))
 		goto out_unlock;
 
 	/* do any pending delalloc/csum calc on src, one way or
@@ -1530,13 +1705,15 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 	while (1) {
 		struct btrfs_ordered_extent *ordered;
 		lock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
-		ordered = btrfs_lookup_first_ordered_extent(inode, off+len);
-		if (BTRFS_I(src)->delalloc_bytes == 0 && !ordered)
+		ordered = btrfs_lookup_first_ordered_extent(src, off+len);
+		if (!ordered &&
+		    !test_range_bit(&BTRFS_I(src)->io_tree, off, off+len,
+				   EXTENT_DELALLOC, 0, NULL))
 			break;
 		unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS);
 		if (ordered)
 			btrfs_put_ordered_extent(ordered);
-		btrfs_wait_ordered_range(src, off, off+len);
+		btrfs_wait_ordered_range(src, off, len);
 	}
 
 	/* clone data */
@@ -1605,7 +1782,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 			}
 			btrfs_release_path(root, path);
 
-			if (key.offset + datal < off ||
+			if (key.offset + datal <= off ||
 			    key.offset >= off+len)
 				goto next;
 
@@ -1720,8 +1897,8 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 			 * but shouldn't round up the file size
 			 */
 			endoff = new_key.offset + datal;
-			if (endoff > off+olen)
-				endoff = off+olen;
+			if (endoff > destoff+olen)
+				endoff = destoff+olen;
 			if (endoff > inode->i_size)
 				btrfs_i_size_write(inode, endoff);
 
@@ -1879,6 +2056,22 @@ static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp)
 	return 0;
 }
 
+static void get_block_group_info(struct list_head *groups_list,
+				 struct btrfs_ioctl_space_info *space)
+{
+	struct btrfs_block_group_cache *block_group;
+
+	space->total_bytes = 0;
+	space->used_bytes = 0;
+	space->flags = 0;
+	list_for_each_entry(block_group, groups_list, list) {
+		space->flags = block_group->flags;
+		space->total_bytes += block_group->key.offset;
+		space->used_bytes +=
+			btrfs_block_group_used(&block_group->item);
+	}
+}
+
 long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 {
 	struct btrfs_ioctl_space_args space_args;
@@ -1887,27 +2080,56 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 	struct btrfs_ioctl_space_info *dest_orig;
 	struct btrfs_ioctl_space_info *user_dest;
 	struct btrfs_space_info *info;
+	u64 types[] = {BTRFS_BLOCK_GROUP_DATA,
+		       BTRFS_BLOCK_GROUP_SYSTEM,
+		       BTRFS_BLOCK_GROUP_METADATA,
+		       BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA};
+	int num_types = 4;
 	int alloc_size;
 	int ret = 0;
 	int slot_count = 0;
+	int i, c;
 
 	if (copy_from_user(&space_args,
 			   (struct btrfs_ioctl_space_args __user *)arg,
 			   sizeof(space_args)))
 		return -EFAULT;
 
-	/* first we count slots */
-	rcu_read_lock();
-	list_for_each_entry_rcu(info, &root->fs_info->space_info, list)
-		slot_count++;
-	rcu_read_unlock();
+	for (i = 0; i < num_types; i++) {
+		struct btrfs_space_info *tmp;
+
+		info = NULL;
+		rcu_read_lock();
+		list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+					list) {
+			if (tmp->flags == types[i]) {
+				info = tmp;
+				break;
+			}
+		}
+		rcu_read_unlock();
+
+		if (!info)
+			continue;
+
+		down_read(&info->groups_sem);
+		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+			if (!list_empty(&info->block_groups[c]))
+				slot_count++;
+		}
+		up_read(&info->groups_sem);
+	}
 
 	/* space_slots == 0 means they are asking for a count */
 	if (space_args.space_slots == 0) {
 		space_args.total_spaces = slot_count;
 		goto out;
 	}
+
+	slot_count = min_t(int, space_args.space_slots, slot_count);
+
 	alloc_size = sizeof(*dest) * slot_count;
+
 	/* we generally have at most 6 or so space infos, one for each raid
 	 * level.  So, a whole page should be more than enough for everyone
 	 */
@@ -1921,27 +2143,34 @@ long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
 	dest_orig = dest;
 
 	/* now we have a buffer to copy into */
-	rcu_read_lock();
-	list_for_each_entry_rcu(info, &root->fs_info->space_info, list) {
-		/* make sure we don't copy more than we allocated
-		 * in our buffer
-		 */
-		if (slot_count == 0)
-			break;
-		slot_count--;
-
-		/* make sure userland has enough room in their buffer */
-		if (space_args.total_spaces >= space_args.space_slots)
-			break;
+	for (i = 0; i < num_types; i++) {
+		struct btrfs_space_info *tmp;
+
+		info = NULL;
+		rcu_read_lock();
+		list_for_each_entry_rcu(tmp, &root->fs_info->space_info,
+					list) {
+			if (tmp->flags == types[i]) {
+				info = tmp;
+				break;
+			}
+		}
+		rcu_read_unlock();
 
-		space.flags = info->flags;
-		space.total_bytes = info->total_bytes;
-		space.used_bytes = info->bytes_used;
-		memcpy(dest, &space, sizeof(space));
-		dest++;
-		space_args.total_spaces++;
+		if (!info)
+			continue;
+		down_read(&info->groups_sem);
+		for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) {
+			if (!list_empty(&info->block_groups[c])) {
+				get_block_group_info(&info->block_groups[c],
+						     &space);
+				memcpy(dest, &space, sizeof(space));
+				dest++;
+				space_args.total_spaces++;
+			}
+		}
+		up_read(&info->groups_sem);
 	}
-	rcu_read_unlock();
 
 	user_dest = (struct btrfs_ioctl_space_info *)
 		(arg + sizeof(struct btrfs_ioctl_space_args));
@@ -1984,6 +2213,36 @@ long btrfs_ioctl_trans_end(struct file *file)
 	return 0;
 }
 
+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+{
+	struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+	struct btrfs_trans_handle *trans;
+	u64 transid;
+
+	trans = btrfs_start_transaction(root, 0);
+	transid = trans->transid;
+	btrfs_commit_transaction_async(trans, root, 0);
+
+	if (argp)
+		if (copy_to_user(argp, &transid, sizeof(transid)))
+			return -EFAULT;
+	return 0;
+}
+
+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+{
+	struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+	u64 transid;
+
+	if (argp) {
+		if (copy_from_user(&transid, argp, sizeof(transid)))
+			return -EFAULT;
+	} else {
+		transid = 0;  /* current trans */
+	}
+	return btrfs_wait_for_commit(root, transid);
+}
+
 long btrfs_ioctl(struct file *file, unsigned int
 		cmd, unsigned long arg)
 {
@@ -1998,9 +2257,11 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case FS_IOC_GETVERSION:
 		return btrfs_ioctl_getversion(file, argp);
 	case BTRFS_IOC_SNAP_CREATE:
-		return btrfs_ioctl_snap_create(file, argp, 0);
+		return btrfs_ioctl_snap_create(file, argp, 0, 0);
+	case BTRFS_IOC_SNAP_CREATE_V2:
+		return btrfs_ioctl_snap_create(file, argp, 0, 1);
 	case BTRFS_IOC_SUBVOL_CREATE:
-		return btrfs_ioctl_snap_create(file, argp, 1);
+		return btrfs_ioctl_snap_create(file, argp, 1, 0);
 	case BTRFS_IOC_SNAP_DESTROY:
 		return btrfs_ioctl_snap_destroy(file, argp);
 	case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2034,6 +2295,10 @@ long btrfs_ioctl(struct file *file, unsigned int
 	case BTRFS_IOC_SYNC:
 		btrfs_sync_fs(file->f_dentry->d_sb, 1);
 		return 0;
+	case BTRFS_IOC_START_SYNC:
+		return btrfs_ioctl_start_sync(file, argp);
+	case BTRFS_IOC_WAIT_SYNC:
+		return btrfs_ioctl_wait_sync(file, argp);
 	}
 
 	return -ENOTTY;
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 424694aa517..c344d12c646 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -22,14 +22,25 @@
 
 #define BTRFS_IOCTL_MAGIC 0x94
 #define BTRFS_VOL_NAME_MAX 255
-#define BTRFS_PATH_NAME_MAX 4087
 
 /* this should be 4k */
+#define BTRFS_PATH_NAME_MAX 4087
 struct btrfs_ioctl_vol_args {
 	__s64 fd;
 	char name[BTRFS_PATH_NAME_MAX + 1];
 };
 
+#define BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
+
+#define BTRFS_SUBVOL_NAME_MAX 4039
+struct btrfs_ioctl_vol_args_v2 {
+	__s64 fd;
+	__u64 transid;
+	__u64 flags;
+	__u64 unused[4];
+	char name[BTRFS_SUBVOL_NAME_MAX + 1];
+};
+
 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
 struct btrfs_ioctl_ino_lookup_args {
 	__u64 treeid;
@@ -178,4 +189,8 @@ struct btrfs_ioctl_space_args {
 #define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
 				    struct btrfs_ioctl_space_args)
+#define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
+#define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+				   struct btrfs_ioctl_vol_args_v2)
 #endif
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index e56c72bc5ad..ae7737e352c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -250,6 +250,73 @@ int btrfs_add_ordered_sum(struct inode *inode,
 
 /*
  * this is used to account for finished IO across a given range
+ * of the file.  The IO may span ordered extents.  If
+ * a given ordered_extent is completely done, 1 is returned, otherwise
+ * 0.
+ *
+ * test_and_set_bit on a flag in the struct btrfs_ordered_extent is used
+ * to make sure this function only returns 1 once for a given ordered extent.
+ *
+ * file_offset is updated to one byte past the range that is recorded as
+ * complete.  This allows you to walk forward in the file.
+ */
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+				   struct btrfs_ordered_extent **cached,
+				   u64 *file_offset, u64 io_size)
+{
+	struct btrfs_ordered_inode_tree *tree;
+	struct rb_node *node;
+	struct btrfs_ordered_extent *entry = NULL;
+	int ret;
+	u64 dec_end;
+	u64 dec_start;
+	u64 to_dec;
+
+	tree = &BTRFS_I(inode)->ordered_tree;
+	spin_lock(&tree->lock);
+	node = tree_search(tree, *file_offset);
+	if (!node) {
+		ret = 1;
+		goto out;
+	}
+
+	entry = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+	if (!offset_in_entry(entry, *file_offset)) {
+		ret = 1;
+		goto out;
+	}
+
+	dec_start = max(*file_offset, entry->file_offset);
+	dec_end = min(*file_offset + io_size, entry->file_offset +
+		      entry->len);
+	*file_offset = dec_end;
+	if (dec_start > dec_end) {
+		printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
+		       (unsigned long long)dec_start,
+		       (unsigned long long)dec_end);
+	}
+	to_dec = dec_end - dec_start;
+	if (to_dec > entry->bytes_left) {
+		printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
+		       (unsigned long long)entry->bytes_left,
+		       (unsigned long long)to_dec);
+	}
+	entry->bytes_left -= to_dec;
+	if (entry->bytes_left == 0)
+		ret = test_and_set_bit(BTRFS_ORDERED_IO_DONE, &entry->flags);
+	else
+		ret = 1;
+out:
+	if (!ret && cached && entry) {
+		*cached = entry;
+		atomic_inc(&entry->refs);
+	}
+	spin_unlock(&tree->lock);
+	return ret == 0;
+}
+
+/*
+ * this is used to account for finished IO across a given range
  * of the file.  The IO should not span ordered extents.  If
  * a given ordered_extent is completely done, 1 is returned, otherwise
  * 0.
@@ -526,7 +593,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 {
 	u64 end;
 	u64 orig_end;
-	u64 wait_end;
 	struct btrfs_ordered_extent *ordered;
 	int found;
 
@@ -537,7 +603,6 @@ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len)
 		if (orig_end > INT_LIMIT(loff_t))
 			orig_end = INT_LIMIT(loff_t);
 	}
-	wait_end = orig_end;
 again:
 	/* start IO across the range first to instantiate any delalloc
 	 * extents
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 8ac365492a3..61dca83119d 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -141,6 +141,9 @@ int btrfs_remove_ordered_extent(struct inode *inode,
 int btrfs_dec_test_ordered_pending(struct inode *inode,
 				   struct btrfs_ordered_extent **cached,
 				   u64 file_offset, u64 io_size);
+int btrfs_dec_test_first_ordered_pending(struct inode *inode,
+				   struct btrfs_ordered_extent **cached,
+				   u64 *file_offset, u64 io_size);
 int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
 			     u64 start, u64 len, u64 disk_len, int type);
 int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset,
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 79cba5fbc28..f8be250963a 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 
 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-	if (ret)
+	if (ret < 0)
 		goto out;
+	if (ret) {
+		ret = -ENOENT;
+		goto out;
+	}
 
 	ret = btrfs_del_item(trans, root, path);
 
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index b37d723b9d4..045c9c2b2d7 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -29,6 +29,7 @@
 #include "locking.h"
 #include "btrfs_inode.h"
 #include "async-thread.h"
+#include "free-space-cache.h"
 
 /*
  * backref_node, mapping_node and tree_block start with this
@@ -178,8 +179,6 @@ struct reloc_control {
 	u64 search_start;
 	u64 extents_found;
 
-	int block_rsv_retries;
-
 	unsigned int stage:8;
 	unsigned int create_reloc_tree:1;
 	unsigned int merge_reloc_tree:1;
@@ -2133,7 +2132,6 @@ int prepare_to_merge(struct reloc_control *rc, int err)
 	LIST_HEAD(reloc_roots);
 	u64 num_bytes = 0;
 	int ret;
-	int retries = 0;
 
 	mutex_lock(&root->fs_info->trans_mutex);
 	rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2;
@@ -2143,7 +2141,7 @@ again:
 	if (!err) {
 		num_bytes = rc->merging_rsv_size;
 		ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv,
-					  num_bytes, &retries);
+					  num_bytes);
 		if (ret)
 			err = ret;
 	}
@@ -2155,7 +2153,6 @@ again:
 			btrfs_end_transaction(trans, rc->extent_root);
 			btrfs_block_rsv_release(rc->extent_root,
 						rc->block_rsv, num_bytes);
-			retries = 0;
 			goto again;
 		}
 	}
@@ -2405,15 +2402,13 @@ static int reserve_metadata_space(struct btrfs_trans_handle *trans,
 	num_bytes = calcu_metadata_size(rc, node, 1) * 2;
 
 	trans->block_rsv = rc->block_rsv;
-	ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes,
-				  &rc->block_rsv_retries);
+	ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes);
 	if (ret) {
 		if (ret == -EAGAIN)
 			rc->commit_transaction = 1;
 		return ret;
 	}
 
-	rc->block_rsv_retries = 0;
 	return 0;
 }
 
@@ -3099,6 +3094,8 @@ static int add_tree_block(struct reloc_control *rc,
 		BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0));
 		ret = get_ref_objectid_v0(rc, path, extent_key,
 					  &ref_owner, NULL);
+		if (ret < 0)
+			return ret;
 		BUG_ON(ref_owner >= BTRFS_MAX_LEVEL);
 		level = (int)ref_owner;
 		/* FIXME: get real generation */
@@ -3191,6 +3188,54 @@ static int block_use_full_backref(struct reloc_control *rc,
 	return ret;
 }
 
+static int delete_block_group_cache(struct btrfs_fs_info *fs_info,
+				    struct inode *inode, u64 ino)
+{
+	struct btrfs_key key;
+	struct btrfs_path *path;
+	struct btrfs_root *root = fs_info->tree_root;
+	struct btrfs_trans_handle *trans;
+	unsigned long nr;
+	int ret = 0;
+
+	if (inode)
+		goto truncate;
+
+	key.objectid = ino;
+	key.type = BTRFS_INODE_ITEM_KEY;
+	key.offset = 0;
+
+	inode = btrfs_iget(fs_info->sb, &key, root, NULL);
+	if (!inode || IS_ERR(inode) || is_bad_inode(inode)) {
+		if (inode && !IS_ERR(inode))
+			iput(inode);
+		return -ENOENT;
+	}
+
+truncate:
+	path = btrfs_alloc_path();
+	if (!path) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	trans = btrfs_join_transaction(root, 0);
+	if (IS_ERR(trans)) {
+		btrfs_free_path(path);
+		goto out;
+	}
+
+	ret = btrfs_truncate_free_space_cache(root, trans, path, inode);
+
+	btrfs_free_path(path);
+	nr = trans->blocks_used;
+	btrfs_end_transaction(trans, root);
+	btrfs_btree_balance_dirty(root, nr);
+out:
+	iput(inode);
+	return ret;
+}
+
 /*
  * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY
  * this function scans fs tree to find blocks reference the data extent
@@ -3217,15 +3262,27 @@ static int find_data_references(struct reloc_control *rc,
 	int counted;
 	int ret;
 
-	path = btrfs_alloc_path();
-	if (!path)
-		return -ENOMEM;
-
 	ref_root = btrfs_extent_data_ref_root(leaf, ref);
 	ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref);
 	ref_offset = btrfs_extent_data_ref_offset(leaf, ref);
 	ref_count = btrfs_extent_data_ref_count(leaf, ref);
 
+	/*
+	 * This is an extent belonging to the free space cache, lets just delete
+	 * it and redo the search.
+	 */
+	if (ref_root == BTRFS_ROOT_TREE_OBJECTID) {
+		ret = delete_block_group_cache(rc->extent_root->fs_info,
+					       NULL, ref_objectid);
+		if (ret != -ENOENT)
+			return ret;
+		ret = 0;
+	}
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
 	root = read_fs_root(rc->extent_root->fs_info, ref_root);
 	if (IS_ERR(root)) {
 		err = PTR_ERR(root);
@@ -3554,8 +3611,7 @@ int prepare_to_relocate(struct reloc_control *rc)
 	 * is no reservation in transaction handle.
 	 */
 	ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv,
-				  rc->extent_root->nodesize * 256,
-				  &rc->block_rsv_retries);
+				  rc->extent_root->nodesize * 256);
 	if (ret)
 		return ret;
 
@@ -3567,7 +3623,6 @@ int prepare_to_relocate(struct reloc_control *rc)
 	rc->extents_found = 0;
 	rc->nodes_relocated = 0;
 	rc->merging_rsv_size = 0;
-	rc->block_rsv_retries = 0;
 
 	rc->create_reloc_tree = 1;
 	set_reloc_control(rc);
@@ -3860,6 +3915,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 {
 	struct btrfs_fs_info *fs_info = extent_root->fs_info;
 	struct reloc_control *rc;
+	struct inode *inode;
+	struct btrfs_path *path;
 	int ret;
 	int rw = 0;
 	int err = 0;
@@ -3882,6 +3939,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
 		rw = 1;
 	}
 
+	path = btrfs_alloc_path();
+	if (!path) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group,
+					path);
+	btrfs_free_path(path);
+
+	if (!IS_ERR(inode))
+		ret = delete_block_group_cache(fs_info, inode, 0);
+	else
+		ret = PTR_ERR(inode);
+
+	if (ret && ret != -ENOENT) {
+		err = ret;
+		goto out;
+	}
+
 	rc->data_inode = create_reloc_inode(fs_info, rc->block_group);
 	if (IS_ERR(rc->data_inode)) {
 		err = PTR_ERR(rc->data_inode);
@@ -4143,7 +4220,7 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len)
 		btrfs_add_ordered_sum(inode, ordered, sums);
 	}
 	btrfs_put_ordered_extent(ordered);
-	return 0;
+	return ret;
 }
 
 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2d958be761c..6a1086e83ff 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -181,7 +181,6 @@ int btrfs_insert_root(struct btrfs_trans_handle *trans, struct btrfs_root
 int btrfs_find_dead_roots(struct btrfs_root *root, u64 objectid)
 {
 	struct btrfs_root *dead_root;
-	struct btrfs_item *item;
 	struct btrfs_root_item *ri;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
@@ -214,7 +213,6 @@ again:
 			nritems = btrfs_header_nritems(leaf);
 			slot = path->slots[0];
 		}
-		item = btrfs_item_nr(leaf, slot);
 		btrfs_item_key_to_cpu(leaf, &key, slot);
 		if (btrfs_key_type(&key) != BTRFS_ROOT_ITEM_KEY)
 			goto next;
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 144f8a5730f..883c6fa1367 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -61,6 +61,8 @@ static void btrfs_put_super(struct super_block *sb)
 
 	ret = close_ctree(root);
 	sb->s_fs_info = NULL;
+
+	(void)ret; /* FIXME: need to fix VFS to return error? */
 }
 
 enum {
@@ -68,7 +70,8 @@ enum {
 	Opt_nodatacow, Opt_max_inline, Opt_alloc_start, Opt_nobarrier, Opt_ssd,
 	Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl, Opt_compress,
 	Opt_compress_force, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
-	Opt_discard, Opt_err,
+	Opt_discard, Opt_space_cache, Opt_clear_cache, Opt_err,
+	Opt_user_subvol_rm_allowed,
 };
 
 static match_table_t tokens = {
@@ -92,6 +95,9 @@ static match_table_t tokens = {
 	{Opt_flushoncommit, "flushoncommit"},
 	{Opt_ratio, "metadata_ratio=%d"},
 	{Opt_discard, "discard"},
+	{Opt_space_cache, "space_cache"},
+	{Opt_clear_cache, "clear_cache"},
+	{Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"},
 	{Opt_err, NULL},
 };
 
@@ -235,6 +241,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
 		case Opt_discard:
 			btrfs_set_opt(info->mount_opt, DISCARD);
 			break;
+		case Opt_space_cache:
+			printk(KERN_INFO "btrfs: enabling disk space caching\n");
+			btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+			break;
+		case Opt_clear_cache:
+			printk(KERN_INFO "btrfs: force clearing of disk cache\n");
+			btrfs_set_opt(info->mount_opt, CLEAR_CACHE);
+			break;
+		case Opt_user_subvol_rm_allowed:
+			btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED);
+			break;
 		case Opt_err:
 			printk(KERN_INFO "btrfs: unrecognized mount option "
 			       "'%s'\n", p);
@@ -380,7 +397,7 @@ static struct dentry *get_default_root(struct super_block *sb,
 find_root:
 	new_root = btrfs_read_fs_root_no_name(root->fs_info, &location);
 	if (IS_ERR(new_root))
-		return ERR_PTR(PTR_ERR(new_root));
+		return ERR_CAST(new_root);
 
 	if (btrfs_root_refs(&new_root->root_item) == 0)
 		return ERR_PTR(-ENOENT);
@@ -436,7 +453,6 @@ static int btrfs_fill_super(struct super_block *sb,
 {
 	struct inode *inode;
 	struct dentry *root_dentry;
-	struct btrfs_super_block *disk_super;
 	struct btrfs_root *tree_root;
 	struct btrfs_key key;
 	int err;
@@ -458,7 +474,6 @@ static int btrfs_fill_super(struct super_block *sb,
 		return PTR_ERR(tree_root);
 	}
 	sb->s_fs_info = tree_root;
-	disk_super = &tree_root->fs_info->super_copy;
 
 	key.objectid = BTRFS_FIRST_FREE_OBJECTID;
 	key.type = BTRFS_INODE_ITEM_KEY;
@@ -548,30 +563,45 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
 
 static int btrfs_test_super(struct super_block *s, void *data)
 {
-	struct btrfs_fs_devices *test_fs_devices = data;
+	struct btrfs_root *test_root = data;
 	struct btrfs_root *root = btrfs_sb(s);
 
-	return root->fs_info->fs_devices == test_fs_devices;
+	/*
+	 * If this super block is going away, return false as it
+	 * can't match as an existing super block.
+	 */
+	if (!atomic_read(&s->s_active))
+		return 0;
+	return root->fs_info->fs_devices == test_root->fs_info->fs_devices;
+}
+
+static int btrfs_set_super(struct super_block *s, void *data)
+{
+	s->s_fs_info = data;
+
+	return set_anon_super(s, data);
 }
 
+
 /*
  * Find a superblock for the given device / mount point.
  *
  * Note:  This is based on get_sb_bdev from fs/super.c with a few additions
  *	  for multiple device setup.  Make sure to keep it in sync.
  */
-static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
-		const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
+		const char *dev_name, void *data)
 {
 	struct block_device *bdev = NULL;
 	struct super_block *s;
 	struct dentry *root;
 	struct btrfs_fs_devices *fs_devices = NULL;
+	struct btrfs_root *tree_root = NULL;
+	struct btrfs_fs_info *fs_info = NULL;
 	fmode_t mode = FMODE_READ;
 	char *subvol_name = NULL;
 	u64 subvol_objectid = 0;
 	int error = 0;
-	int found = 0;
 
 	if (!(flags & MS_RDONLY))
 		mode |= FMODE_WRITE;
@@ -580,7 +610,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 					  &subvol_name, &subvol_objectid,
 					  &fs_devices);
 	if (error)
-		return error;
+		return ERR_PTR(error);
 
 	error = btrfs_scan_one_device(dev_name, mode, fs_type, &fs_devices);
 	if (error)
@@ -595,8 +625,24 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 		goto error_close_devices;
 	}
 
+	/*
+	 * Setup a dummy root and fs_info for test/set super.  This is because
+	 * we don't actually fill this stuff out until open_ctree, but we need
+	 * it for searching for existing supers, so this lets us do that and
+	 * then open_ctree will properly initialize everything later.
+	 */
+	fs_info = kzalloc(sizeof(struct btrfs_fs_info), GFP_NOFS);
+	tree_root = kzalloc(sizeof(struct btrfs_root), GFP_NOFS);
+	if (!fs_info || !tree_root) {
+		error = -ENOMEM;
+		goto error_close_devices;
+	}
+	fs_info->tree_root = tree_root;
+	fs_info->fs_devices = fs_devices;
+	tree_root->fs_info = fs_info;
+
 	bdev = fs_devices->latest_bdev;
-	s = sget(fs_type, btrfs_test_super, set_anon_super, fs_devices);
+	s = sget(fs_type, btrfs_test_super, btrfs_set_super, tree_root);
 	if (IS_ERR(s))
 		goto error_s;
 
@@ -607,7 +653,6 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 			goto error_close_devices;
 		}
 
-		found = 1;
 		btrfs_close_devices(fs_devices);
 	} else {
 		char b[BDEVNAME_SIZE];
@@ -629,7 +674,7 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 	if (IS_ERR(root)) {
 		error = PTR_ERR(root);
 		deactivate_locked_super(s);
-		goto error;
+		goto error_free_subvol_name;
 	}
 	/* if they gave us a subvolume name bind mount into that */
 	if (strcmp(subvol_name, ".")) {
@@ -640,36 +685,34 @@ static int btrfs_get_sb(struct file_system_type *fs_type, int flags,
 		mutex_unlock(&root->d_inode->i_mutex);
 
 		if (IS_ERR(new_root)) {
+			dput(root);
 			deactivate_locked_super(s);
 			error = PTR_ERR(new_root);
-			dput(root);
-			goto error_close_devices;
+			goto error_free_subvol_name;
 		}
 		if (!new_root->d_inode) {
 			dput(root);
 			dput(new_root);
 			deactivate_locked_super(s);
 			error = -ENXIO;
-			goto error_close_devices;
+			goto error_free_subvol_name;
 		}
 		dput(root);
 		root = new_root;
 	}
 
-	mnt->mnt_sb = s;
-	mnt->mnt_root = root;
-
 	kfree(subvol_name);
-	return 0;
+	return root;
 
 error_s:
 	error = PTR_ERR(s);
 error_close_devices:
 	btrfs_close_devices(fs_devices);
+	kfree(fs_info);
+	kfree(tree_root);
 error_free_subvol_name:
 	kfree(subvol_name);
-error:
-	return error;
+	return ERR_PTR(error);
 }
 
 static int btrfs_remount(struct super_block *sb, int *flags, char *data)
@@ -716,18 +759,25 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	struct list_head *head = &root->fs_info->space_info;
 	struct btrfs_space_info *found;
 	u64 total_used = 0;
+	u64 total_used_data = 0;
 	int bits = dentry->d_sb->s_blocksize_bits;
 	__be32 *fsid = (__be32 *)root->fs_info->fsid;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(found, head, list)
+	list_for_each_entry_rcu(found, head, list) {
+		if (found->flags & (BTRFS_BLOCK_GROUP_METADATA |
+				    BTRFS_BLOCK_GROUP_SYSTEM))
+			total_used_data += found->disk_total;
+		else
+			total_used_data += found->disk_used;
 		total_used += found->disk_used;
+	}
 	rcu_read_unlock();
 
 	buf->f_namelen = BTRFS_NAME_LEN;
 	buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
 	buf->f_bfree = buf->f_blocks - (total_used >> bits);
-	buf->f_bavail = buf->f_bfree;
+	buf->f_bavail = buf->f_blocks - (total_used_data >> bits);
 	buf->f_bsize = dentry->d_sb->s_blocksize;
 	buf->f_type = BTRFS_SUPER_MAGIC;
 
@@ -746,7 +796,7 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 static struct file_system_type btrfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "btrfs",
-	.get_sb		= btrfs_get_sb,
+	.mount		= btrfs_mount,
 	.kill_sb	= kill_anon_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 66e4c66cc63..f50e931fc21 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -163,6 +163,7 @@ enum btrfs_trans_type {
 	TRANS_START,
 	TRANS_JOIN,
 	TRANS_USERSPACE,
+	TRANS_JOIN_NOLOCK,
 };
 
 static int may_wait_transaction(struct btrfs_root *root, int type)
@@ -179,14 +180,14 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root,
 {
 	struct btrfs_trans_handle *h;
 	struct btrfs_transaction *cur_trans;
-	int retries = 0;
 	int ret;
 again:
 	h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
 	if (!h)
 		return ERR_PTR(-ENOMEM);
 
-	mutex_lock(&root->fs_info->trans_mutex);
+	if (type != TRANS_JOIN_NOLOCK)
+		mutex_lock(&root->fs_info->trans_mutex);
 	if (may_wait_transaction(root, type))
 		wait_current_trans(root);
 
@@ -195,7 +196,8 @@ again:
 
 	cur_trans = root->fs_info->running_transaction;
 	cur_trans->use_count++;
-	mutex_unlock(&root->fs_info->trans_mutex);
+	if (type != TRANS_JOIN_NOLOCK)
+		mutex_unlock(&root->fs_info->trans_mutex);
 
 	h->transid = cur_trans->transid;
 	h->transaction = cur_trans;
@@ -212,8 +214,7 @@ again:
 	}
 
 	if (num_items > 0) {
-		ret = btrfs_trans_reserve_metadata(h, root, num_items,
-						   &retries);
+		ret = btrfs_trans_reserve_metadata(h, root, num_items);
 		if (ret == -EAGAIN) {
 			btrfs_commit_transaction(h, root);
 			goto again;
@@ -224,9 +225,11 @@ again:
 		}
 	}
 
-	mutex_lock(&root->fs_info->trans_mutex);
+	if (type != TRANS_JOIN_NOLOCK)
+		mutex_lock(&root->fs_info->trans_mutex);
 	record_root_in_trans(h, root);
-	mutex_unlock(&root->fs_info->trans_mutex);
+	if (type != TRANS_JOIN_NOLOCK)
+		mutex_unlock(&root->fs_info->trans_mutex);
 
 	if (!current->journal_info && type != TRANS_USERSPACE)
 		current->journal_info = h;
@@ -244,6 +247,12 @@ struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
 	return start_transaction(root, 0, TRANS_JOIN);
 }
 
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+							  int num_blocks)
+{
+	return start_transaction(root, 0, TRANS_JOIN_NOLOCK);
+}
+
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
 							 int num_blocks)
 {
@@ -270,6 +279,58 @@ static noinline int wait_for_commit(struct btrfs_root *root,
 	return 0;
 }
 
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid)
+{
+	struct btrfs_transaction *cur_trans = NULL, *t;
+	int ret;
+
+	mutex_lock(&root->fs_info->trans_mutex);
+
+	ret = 0;
+	if (transid) {
+		if (transid <= root->fs_info->last_trans_committed)
+			goto out_unlock;
+
+		/* find specified transaction */
+		list_for_each_entry(t, &root->fs_info->trans_list, list) {
+			if (t->transid == transid) {
+				cur_trans = t;
+				break;
+			}
+			if (t->transid > transid)
+				break;
+		}
+		ret = -EINVAL;
+		if (!cur_trans)
+			goto out_unlock;  /* bad transid */
+	} else {
+		/* find newest transaction that is committing | committed */
+		list_for_each_entry_reverse(t, &root->fs_info->trans_list,
+					    list) {
+			if (t->in_commit) {
+				if (t->commit_done)
+					goto out_unlock;
+				cur_trans = t;
+				break;
+			}
+		}
+		if (!cur_trans)
+			goto out_unlock;  /* nothing committing|committed */
+	}
+
+	cur_trans->use_count++;
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	wait_for_commit(root, cur_trans);
+
+	mutex_lock(&root->fs_info->trans_mutex);
+	put_transaction(cur_trans);
+	ret = 0;
+out_unlock:
+	mutex_unlock(&root->fs_info->trans_mutex);
+	return ret;
+}
+
 #if 0
 /*
  * rate limit against the drop_snapshot code.  This helps to slow down new
@@ -348,7 +409,7 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 }
 
 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
-			  struct btrfs_root *root, int throttle)
+			  struct btrfs_root *root, int throttle, int lock)
 {
 	struct btrfs_transaction *cur_trans = trans->transaction;
 	struct btrfs_fs_info *info = root->fs_info;
@@ -376,26 +437,29 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 
 	btrfs_trans_release_metadata(trans, root);
 
-	if (!root->fs_info->open_ioctl_trans &&
+	if (lock && !root->fs_info->open_ioctl_trans &&
 	    should_end_transaction(trans, root))
 		trans->transaction->blocked = 1;
 
-	if (cur_trans->blocked && !cur_trans->in_commit) {
+	if (lock && cur_trans->blocked && !cur_trans->in_commit) {
 		if (throttle)
 			return btrfs_commit_transaction(trans, root);
 		else
 			wake_up_process(info->transaction_kthread);
 	}
 
-	mutex_lock(&info->trans_mutex);
+	if (lock)
+		mutex_lock(&info->trans_mutex);
 	WARN_ON(cur_trans != info->running_transaction);
 	WARN_ON(cur_trans->num_writers < 1);
 	cur_trans->num_writers--;
 
+	smp_mb();
 	if (waitqueue_active(&cur_trans->writer_wait))
 		wake_up(&cur_trans->writer_wait);
 	put_transaction(cur_trans);
-	mutex_unlock(&info->trans_mutex);
+	if (lock)
+		mutex_unlock(&info->trans_mutex);
 
 	if (current->journal_info == trans)
 		current->journal_info = NULL;
@@ -411,13 +475,19 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root)
 {
-	return __btrfs_end_transaction(trans, root, 0);
+	return __btrfs_end_transaction(trans, root, 0, 1);
 }
 
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root)
 {
-	return __btrfs_end_transaction(trans, root, 1);
+	return __btrfs_end_transaction(trans, root, 1, 1);
+}
+
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root)
+{
+	return __btrfs_end_transaction(trans, root, 0, 0);
 }
 
 /*
@@ -832,11 +902,11 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	struct btrfs_root *root = pending->root;
 	struct btrfs_root *parent_root;
 	struct inode *parent_inode;
+	struct dentry *parent;
 	struct dentry *dentry;
 	struct extent_buffer *tmp;
 	struct extent_buffer *old;
 	int ret;
-	int retries = 0;
 	u64 to_reserve = 0;
 	u64 index = 0;
 	u64 objectid;
@@ -858,7 +928,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 
 	if (to_reserve > 0) {
 		ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv,
-					  to_reserve, &retries);
+					  to_reserve);
 		if (ret) {
 			pending->error = ret;
 			goto fail;
@@ -872,7 +942,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 	trans->block_rsv = &pending->block_rsv;
 
 	dentry = pending->dentry;
-	parent_inode = dentry->d_parent->d_inode;
+	parent = dget_parent(dentry);
+	parent_inode = parent->d_inode;
 	parent_root = BTRFS_I(parent_inode)->root;
 	record_root_in_trans(trans, parent_root);
 
@@ -920,6 +991,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
 				 parent_inode->i_ino, index,
 				 dentry->d_name.name, dentry->d_name.len);
 	BUG_ON(ret);
+	dput(parent);
 
 	key.offset = (u64)-1;
 	pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key);
@@ -966,6 +1038,8 @@ static void update_super_roots(struct btrfs_root *root)
 	super->root = root_item->bytenr;
 	super->generation = root_item->generation;
 	super->root_level = root_item->level;
+	if (super->cache_generation != 0 || btrfs_test_opt(root, SPACE_CACHE))
+		super->cache_generation = root_item->generation;
 }
 
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -988,11 +1062,127 @@ int btrfs_transaction_blocked(struct btrfs_fs_info *info)
 	return ret;
 }
 
+/*
+ * wait for the current transaction commit to start and block subsequent
+ * transaction joins
+ */
+static void wait_current_trans_commit_start(struct btrfs_root *root,
+					    struct btrfs_transaction *trans)
+{
+	DEFINE_WAIT(wait);
+
+	if (trans->in_commit)
+		return;
+
+	while (1) {
+		prepare_to_wait(&root->fs_info->transaction_blocked_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (trans->in_commit) {
+			finish_wait(&root->fs_info->transaction_blocked_wait,
+				    &wait);
+			break;
+		}
+		mutex_unlock(&root->fs_info->trans_mutex);
+		schedule();
+		mutex_lock(&root->fs_info->trans_mutex);
+		finish_wait(&root->fs_info->transaction_blocked_wait, &wait);
+	}
+}
+
+/*
+ * wait for the current transaction to start and then become unblocked.
+ * caller holds ref.
+ */
+static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root,
+					 struct btrfs_transaction *trans)
+{
+	DEFINE_WAIT(wait);
+
+	if (trans->commit_done || (trans->in_commit && !trans->blocked))
+		return;
+
+	while (1) {
+		prepare_to_wait(&root->fs_info->transaction_wait, &wait,
+				TASK_UNINTERRUPTIBLE);
+		if (trans->commit_done ||
+		    (trans->in_commit && !trans->blocked)) {
+			finish_wait(&root->fs_info->transaction_wait,
+				    &wait);
+			break;
+		}
+		mutex_unlock(&root->fs_info->trans_mutex);
+		schedule();
+		mutex_lock(&root->fs_info->trans_mutex);
+		finish_wait(&root->fs_info->transaction_wait,
+			    &wait);
+	}
+}
+
+/*
+ * commit transactions asynchronously. once btrfs_commit_transaction_async
+ * returns, any subsequent transaction will not be allowed to join.
+ */
+struct btrfs_async_commit {
+	struct btrfs_trans_handle *newtrans;
+	struct btrfs_root *root;
+	struct delayed_work work;
+};
+
+static void do_async_commit(struct work_struct *work)
+{
+	struct btrfs_async_commit *ac =
+		container_of(work, struct btrfs_async_commit, work.work);
+
+	btrfs_commit_transaction(ac->newtrans, ac->root);
+	kfree(ac);
+}
+
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   int wait_for_unblock)
+{
+	struct btrfs_async_commit *ac;
+	struct btrfs_transaction *cur_trans;
+
+	ac = kmalloc(sizeof(*ac), GFP_NOFS);
+	BUG_ON(!ac);
+
+	INIT_DELAYED_WORK(&ac->work, do_async_commit);
+	ac->root = root;
+	ac->newtrans = btrfs_join_transaction(root, 0);
+
+	/* take transaction reference */
+	mutex_lock(&root->fs_info->trans_mutex);
+	cur_trans = trans->transaction;
+	cur_trans->use_count++;
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	btrfs_end_transaction(trans, root);
+	schedule_delayed_work(&ac->work, 0);
+
+	/* wait for transaction to start and unblock */
+	mutex_lock(&root->fs_info->trans_mutex);
+	if (wait_for_unblock)
+		wait_current_trans_commit_start_and_unblock(root, cur_trans);
+	else
+		wait_current_trans_commit_start(root, cur_trans);
+	put_transaction(cur_trans);
+	mutex_unlock(&root->fs_info->trans_mutex);
+
+	return 0;
+}
+
+/*
+ * btrfs_transaction state sequence:
+ *    in_commit = 0, blocked = 0  (initial)
+ *    in_commit = 1, blocked = 1
+ *    blocked = 0
+ *    commit_done = 1
+ */
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root)
 {
 	unsigned long joined = 0;
-	unsigned long timeout = 1;
 	struct btrfs_transaction *cur_trans;
 	struct btrfs_transaction *prev_trans = NULL;
 	DEFINE_WAIT(wait);
@@ -1039,6 +1229,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 
 	trans->transaction->in_commit = 1;
 	trans->transaction->blocked = 1;
+	wake_up(&root->fs_info->transaction_blocked_wait);
+
 	if (cur_trans->list.prev != &root->fs_info->trans_list) {
 		prev_trans = list_entry(cur_trans->list.prev,
 					struct btrfs_transaction, list);
@@ -1063,11 +1255,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			snap_pending = 1;
 
 		WARN_ON(cur_trans != trans->transaction);
-		if (cur_trans->num_writers > 1)
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		else if (should_grow)
-			timeout = 1;
-
 		mutex_unlock(&root->fs_info->trans_mutex);
 
 		if (flush_on_commit || snap_pending) {
@@ -1089,8 +1276,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 				TASK_UNINTERRUPTIBLE);
 
 		smp_mb();
-		if (cur_trans->num_writers > 1 || should_grow)
-			schedule_timeout(timeout);
+		if (cur_trans->num_writers > 1)
+			schedule_timeout(MAX_SCHEDULE_TIMEOUT);
+		else if (should_grow)
+			schedule_timeout(1);
 
 		mutex_lock(&root->fs_info->trans_mutex);
 		finish_wait(&cur_trans->writer_wait, &wait);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index e104986d0bf..f104b57ad4e 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -87,12 +87,17 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans,
 
 int btrfs_end_transaction(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root);
+int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans,
+				 struct btrfs_root *root);
 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
 						   int num_items);
 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root,
 						  int num_blocks);
+struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root,
+							  int num_blocks);
 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r,
 							 int num_blocks);
+int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid);
 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
 				     struct btrfs_root *root);
 int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans,
@@ -104,6 +109,9 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly);
 int btrfs_clean_old_snapshots(struct btrfs_root *root);
 int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
 			     struct btrfs_root *root);
+int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans,
+				   struct btrfs_root *root,
+				   int wait_for_unblock);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
 				   struct btrfs_root *root);
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c
index f7ac8e013ed..992ab425599 100644
--- a/fs/btrfs/tree-defrag.c
+++ b/fs/btrfs/tree-defrag.c
@@ -36,7 +36,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 	int ret = 0;
 	int wret;
 	int level;
-	int orig_level;
 	int is_extent = 0;
 	int next_key_ret = 0;
 	u64 last_ret = 0;
@@ -64,7 +63,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 
 	level = btrfs_header_level(root->node);
-	orig_level = level;
 
 	if (level == 0)
 		goto out;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index fb102a9aee9..054744ac571 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -786,7 +786,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 {
 	struct inode *dir;
 	int ret;
-	struct btrfs_key location;
 	struct btrfs_inode_ref *ref;
 	struct btrfs_dir_item *di;
 	struct inode *inode;
@@ -795,10 +794,6 @@ static noinline int add_inode_ref(struct btrfs_trans_handle *trans,
 	unsigned long ref_ptr;
 	unsigned long ref_end;
 
-	location.objectid = key->objectid;
-	location.type = BTRFS_INODE_ITEM_KEY;
-	location.offset = 0;
-
 	/*
 	 * it is possible that we didn't log all the parent directories
 	 * for a given inode.  If we don't find the dir, just don't
@@ -1583,7 +1578,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 	struct btrfs_path *path;
 	struct btrfs_root *root = wc->replay_dest;
 	struct btrfs_key key;
-	u32 item_size;
 	int level;
 	int i;
 	int ret;
@@ -1601,7 +1595,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
 	nritems = btrfs_header_nritems(eb);
 	for (i = 0; i < nritems; i++) {
 		btrfs_item_key_to_cpu(eb, &key, i);
-		item_size = btrfs_item_size_nr(eb, i);
 
 		/* inode keys are done during the first stage */
 		if (key.type == BTRFS_INODE_ITEM_KEY &&
@@ -1668,7 +1661,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 				   struct walk_control *wc)
 {
 	u64 root_owner;
-	u64 root_gen;
 	u64 bytenr;
 	u64 ptr_gen;
 	struct extent_buffer *next;
@@ -1698,7 +1690,6 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
 
 		parent = path->nodes[*level];
 		root_owner = btrfs_header_owner(parent);
-		root_gen = btrfs_header_generation(parent);
 
 		next = btrfs_find_create_tree_block(root, bytenr, blocksize);
 
@@ -1749,7 +1740,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 				 struct walk_control *wc)
 {
 	u64 root_owner;
-	u64 root_gen;
 	int i;
 	int slot;
 	int ret;
@@ -1757,8 +1747,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 	for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
 		slot = path->slots[i];
 		if (slot + 1 < btrfs_header_nritems(path->nodes[i])) {
-			struct extent_buffer *node;
-			node = path->nodes[i];
 			path->slots[i]++;
 			*level = i;
 			WARN_ON(*level == 0);
@@ -1771,7 +1759,6 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
 				parent = path->nodes[*level + 1];
 
 			root_owner = btrfs_header_owner(parent);
-			root_gen = btrfs_header_generation(parent);
 			wc->process_func(root, path->nodes[*level], wc,
 				 btrfs_header_generation(path->nodes[*level]));
 			if (wc->free) {
@@ -2273,7 +2260,7 @@ fail:
 	}
 	btrfs_end_log_trans(root);
 
-	return 0;
+	return err;
 }
 
 /* see comments for btrfs_del_dir_entries_in_log */
@@ -2729,7 +2716,6 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
 	struct btrfs_key max_key;
 	struct btrfs_root *log = root->log_root;
 	struct extent_buffer *src = NULL;
-	u32 size;
 	int err = 0;
 	int ret;
 	int nritems;
@@ -2793,7 +2779,6 @@ again:
 			break;
 
 		src = path->nodes[0];
-		size = btrfs_item_size_nr(src, path->slots[0]);
 		if (ins_nr && ins_start_slot + ins_nr == path->slots[0]) {
 			ins_nr++;
 			goto next_slot;
@@ -2884,6 +2869,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 {
 	int ret = 0;
 	struct btrfs_root *root;
+	struct dentry *old_parent = NULL;
 
 	/*
 	 * for regular files, if its inode is already on disk, we don't
@@ -2925,10 +2911,13 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
 		if (IS_ROOT(parent))
 			break;
 
-		parent = parent->d_parent;
+		parent = dget_parent(parent);
+		dput(old_parent);
+		old_parent = parent;
 		inode = parent->d_inode;
 
 	}
+	dput(old_parent);
 out:
 	return ret;
 }
@@ -2960,6 +2949,7 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 {
 	int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL;
 	struct super_block *sb;
+	struct dentry *old_parent = NULL;
 	int ret = 0;
 	u64 last_committed = root->fs_info->last_trans_committed;
 
@@ -3031,10 +3021,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
 		if (IS_ROOT(parent))
 			break;
 
-		parent = parent->d_parent;
+		parent = dget_parent(parent);
+		dput(old_parent);
+		old_parent = parent;
 	}
 	ret = 0;
 end_trans:
+	dput(old_parent);
 	if (ret < 0) {
 		BUG_ON(ret != -ENOSPC);
 		root->fs_info->last_trans_log_full_commit = trans->transid;
@@ -3054,8 +3047,13 @@ end_no_trans:
 int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans,
 			  struct btrfs_root *root, struct dentry *dentry)
 {
-	return btrfs_log_inode_parent(trans, root, dentry->d_inode,
-				      dentry->d_parent, 0);
+	struct dentry *parent = dget_parent(dentry);
+	int ret;
+
+	ret = btrfs_log_inode_parent(trans, root, dentry->d_inode, parent, 0);
+	dput(parent);
+
+	return ret;
 }
 
 /*
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index e25e46a8b4e..6b988450783 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path,
 
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
-	} else if (strcmp(device->name, path)) {
+	} else if (!device->name || strcmp(device->name, path)) {
 		name = kstrdup(path, GFP_NOFS);
 		if (!name)
 			return -ENOMEM;
 		kfree(device->name);
 		device->name = name;
+		if (device->missing) {
+			fs_devices->missing_devices--;
+			device->missing = 0;
+		}
 	}
 
 	if (found_transid > fs_devices->latest_trans) {
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
 	device->fs_devices->num_devices--;
 
+	if (device->missing)
+		root->fs_info->fs_devices->missing_devices--;
+
 	next_device = list_entry(root->fs_info->fs_devices->devices.next,
 				 struct btrfs_device, dev_list);
 	if (device->bdev == root->fs_info->sb->s_bdev)
@@ -1898,7 +1905,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
 	u64 size_to_free;
 	struct btrfs_path *path;
 	struct btrfs_key key;
-	struct btrfs_chunk *chunk;
 	struct btrfs_root *chunk_root = dev_root->fs_info->chunk_root;
 	struct btrfs_trans_handle *trans;
 	struct btrfs_key found_key;
@@ -1962,9 +1968,6 @@ int btrfs_balance(struct btrfs_root *dev_root)
 		if (found_key.objectid != key.objectid)
 			break;
 
-		chunk = btrfs_item_ptr(path->nodes[0],
-				       path->slots[0],
-				       struct btrfs_chunk);
 		/* chunk zero is special */
 		if (found_key.offset == 0)
 			break;
@@ -3031,8 +3034,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 		}
 		bio->bi_sector = multi->stripes[dev_nr].physical >> 9;
 		dev = multi->stripes[dev_nr].dev;
-		BUG_ON(rw == WRITE && !dev->writeable);
-		if (dev && dev->bdev) {
+		if (dev && dev->bdev && (rw != WRITE || dev->writeable)) {
 			bio->bi_bdev = dev->bdev;
 			if (async_submit)
 				schedule_bio(root, dev, rw, bio);
@@ -3085,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
 	device->devid = devid;
 	device->work.func = pending_bios_fn;
 	device->fs_devices = fs_devices;
+	device->missing = 1;
 	fs_devices->num_devices++;
+	fs_devices->missing_devices++;
 	spin_lock_init(&device->io_lock);
 	INIT_LIST_HEAD(&device->dev_alloc_list);
 	memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3283,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root,
 			device = add_missing_dev(root, devid, dev_uuid);
 			if (!device)
 				return -ENOMEM;
+		} else if (!device->missing) {
+			/*
+			 * this happens when a device that was properly setup
+			 * in the device info lists suddenly goes bad.
+			 * device->bdev is NULL, and so we have to set
+			 * device->missing to one here
+			 */
+			root->fs_info->fs_devices->missing_devices++;
+			device->missing = 1;
 		}
 	}
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b638b6e4ee..2740db49eb0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -44,6 +44,7 @@ struct btrfs_device {
 
 	int writeable;
 	int in_fs_metadata;
+	int missing;
 
 	spinlock_t io_lock;
 
@@ -93,6 +94,7 @@ struct btrfs_fs_devices {
 	u64 num_devices;
 	u64 open_devices;
 	u64 rw_devices;
+	u64 missing_devices;
 	u64 total_rw_bytes;
 	struct block_device *latest_bdev;
 
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 88ecbb21587..698fdd2c739 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -178,7 +178,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 	struct inode *inode = dentry->d_inode;
 	struct btrfs_root *root = BTRFS_I(inode)->root;
 	struct btrfs_path *path;
-	struct btrfs_item *item;
 	struct extent_buffer *leaf;
 	struct btrfs_dir_item *di;
 	int ret = 0, slot, advance;
@@ -234,7 +233,6 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
 		}
 		advance = 1;
 
-		item = btrfs_item_nr(leaf, slot);
 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
 
 		/* check to make sure this item is what we want */
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 3e2b90eaa23..b9cd5445f71 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -199,8 +199,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
 	int nr_pages = 0;
 	struct page *in_page = NULL;
 	struct page *out_page = NULL;
-	int out_written = 0;
-	int in_read = 0;
 	unsigned long bytes_left;
 
 	*out_pages = 0;
@@ -233,9 +231,6 @@ int btrfs_zlib_compress_pages(struct address_space *mapping,
 	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
 	workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
 
-	out_written = 0;
-	in_read = 0;
-
 	while (workspace->def_strm.total_in < len) {
 		ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
 		if (ret != Z_OK) {
diff --git a/fs/buffer.c b/fs/buffer.c
index 7f0b9b083f7..5930e382959 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -905,7 +905,6 @@ try_again:
 
 		bh->b_state = 0;
 		atomic_set(&bh->b_count, 0);
-		bh->b_private = NULL;
 		bh->b_size = size;
 
 		/* Link the buffer to its page */
@@ -1706,7 +1705,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
 		 * and kswapd activity, but those code paths have their own
 		 * higher-level throttling.
 		 */
-		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+		if (wbc->sync_mode != WB_SYNC_NONE) {
 			lock_buffer(bh);
 		} else if (!trylock_buffer(bh)) {
 			redirty_page_for_writepage(wbc, page);
@@ -1834,9 +1833,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
 }
 EXPORT_SYMBOL(page_zero_new_buffers);
 
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
 		get_block_t *get_block)
 {
+	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+	unsigned to = from + len;
 	struct inode *inode = page->mapping->host;
 	unsigned block_start, block_end;
 	sector_t block;
@@ -1916,7 +1917,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
 	}
 	return err;
 }
-EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(__block_write_begin);
 
 static int __block_commit_write(struct inode *inode, struct page *page,
 		unsigned from, unsigned to)
@@ -1953,15 +1954,6 @@ static int __block_commit_write(struct inode *inode, struct page *page,
 	return 0;
 }
 
-int __block_write_begin(struct page *page, loff_t pos, unsigned len,
-		get_block_t *get_block)
-{
-	unsigned start = pos & (PAGE_CACHE_SIZE - 1);
-
-	return block_prepare_write(page, start, start + len, get_block);
-}
-EXPORT_SYMBOL(__block_write_begin);
-
 /*
  * block_write_begin takes care of the basic task of block allocation and
  * bringing partial write blocks uptodate first.
@@ -2379,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
 	else
 		end = PAGE_CACHE_SIZE;
 
-	ret = block_prepare_write(page, 0, end, get_block);
+	ret = __block_write_begin(page, 0, end, get_block);
 	if (!ret)
 		ret = block_commit_write(page, 0, end);
 
@@ -2466,11 +2458,10 @@ int nobh_write_begin(struct address_space *mapping,
 	*fsdata = NULL;
 
 	if (page_has_buffers(page)) {
-		unlock_page(page);
-		page_cache_release(page);
-		*pagep = NULL;
-		return block_write_begin(mapping, pos, len, flags, pagep,
-					 get_block);
+		ret = __block_write_begin(page, pos, len, get_block);
+		if (unlikely(ret))
+			goto out_release;
+		return ret;
 	}
 
 	if (PageMappedToDisk(page))
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 51bcc5ce323..561438b6a50 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -204,7 +204,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
 	err = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
 				  page->index << PAGE_CACHE_SHIFT, &len,
 				  ci->i_truncate_seq, ci->i_truncate_size,
-				  &page, 1);
+				  &page, 1, 0);
 	if (err == -ENOENT)
 		err = 0;
 	if (err < 0) {
@@ -287,7 +287,7 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
 	rc = ceph_osdc_readpages(osdc, ceph_vino(inode), &ci->i_layout,
 				 offset, &len,
 				 ci->i_truncate_seq, ci->i_truncate_size,
-				 pages, nr_pages);
+				 pages, nr_pages, 0);
 	if (rc == -ENOENT)
 		rc = 0;
 	if (rc < 0)
@@ -591,7 +591,6 @@ static int ceph_writepages_start(struct address_space *mapping,
 				 struct writeback_control *wbc)
 {
 	struct inode *inode = mapping->host;
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_fs_client *fsc;
 	pgoff_t index, start, end;
@@ -633,13 +632,6 @@ static int ceph_writepages_start(struct address_space *mapping,
 
 	pagevec_init(&pvec, 0);
 
-	/* ?? */
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		dout(" writepages congested\n");
-		wbc->encountered_congestion = 1;
-		goto out_final;
-	}
-
 	/* where to start/end? */
 	if (wbc->range_cyclic) {
 		start = mapping->writeback_index; /* Start from prev offset */
@@ -782,7 +774,7 @@ get_more_pages:
 					    snapc, do_sync,
 					    ci->i_truncate_seq,
 					    ci->i_truncate_size,
-					    &inode->i_mtime, true, 1);
+					    &inode->i_mtime, true, 1, 0);
 				max_pages = req->r_num_pages;
 
 				alloc_page_vec(fsc, req);
@@ -885,7 +877,6 @@ out:
 		rc = 0;  /* vfs expects us to return 0 */
 	ceph_put_snap_context(snapc);
 	dout("writepages done, rc = %d\n", rc);
-out_final:
 	return rc;
 }
 
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 98ab13e2b71..60d27bc9eb8 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1430,8 +1430,8 @@ static int try_nonblocking_invalidate(struct inode *inode)
 	    invalidating_gen == ci->i_rdcache_gen) {
 		/* success. */
 		dout("try_nonblocking_invalidate %p success\n", inode);
-		ci->i_rdcache_gen = 0;
-		ci->i_rdcache_revoking = 0;
+		/* save any racing async invalidate some trouble */
+		ci->i_rdcache_revoking = ci->i_rdcache_gen - 1;
 		return 0;
 	}
 	dout("try_nonblocking_invalidate %p failed\n", inode);
@@ -2273,8 +2273,7 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 {
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	int mds = session->s_mds;
-	unsigned seq = le32_to_cpu(grant->seq);
-	unsigned issue_seq = le32_to_cpu(grant->issue_seq);
+	int seq = le32_to_cpu(grant->seq);
 	int newcaps = le32_to_cpu(grant->caps);
 	int issued, implemented, used, wanted, dirty;
 	u64 size = le64_to_cpu(grant->size);
@@ -2286,8 +2285,8 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 	int revoked_rdcache = 0;
 	int queue_invalidate = 0;
 
-	dout("handle_cap_grant inode %p cap %p mds%d seq %u/%u %s\n",
-	     inode, cap, mds, seq, issue_seq, ceph_cap_string(newcaps));
+	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
+	     inode, cap, mds, seq, ceph_cap_string(newcaps));
 	dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
 		inode->i_size);
 
@@ -2383,7 +2382,6 @@ static void handle_cap_grant(struct inode *inode, struct ceph_mds_caps *grant,
 	}
 
 	cap->seq = seq;
-	cap->issue_seq = issue_seq;
 
 	/* file layout may have changed */
 	ci->i_layout = grant->layout;
@@ -2691,6 +2689,11 @@ static void handle_cap_import(struct ceph_mds_client *mdsc,
 		     NULL /* no caps context */);
 	try_flush_caps(inode, session, NULL);
 	up_read(&mdsc->snap_rwsem);
+
+	/* make sure we re-request max_size, if necessary */
+	spin_lock(&inode->i_lock);
+	ci->i_requested_max_size = 0;
+	spin_unlock(&inode->i_lock);
 }
 
 /*
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e0a2dc6fcaf..d902948a90d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry)
 	if (dentry->d_fsdata)
 		return 0;
 
-	if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
+	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
 		dentry->d_op = &ceph_dentry_ops;
 	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
 		dentry->d_op = &ceph_snapdir_dentry_ops;
@@ -114,8 +115,8 @@ static int __dcache_readdir(struct file *filp,
 	spin_lock(&dcache_lock);
 
 	/* start at beginning? */
-	if (filp->f_pos == 2 || (last &&
-				 filp->f_pos < ceph_dentry(last)->offset)) {
+	if (filp->f_pos == 2 || last == NULL ||
+	    filp->f_pos < ceph_dentry(last)->offset) {
 		if (list_empty(&parent->d_subdirs))
 			goto out_unlock;
 		p = parent->d_subdirs.prev;
@@ -336,7 +337,10 @@ more:
 		if (req->r_reply_info.dir_end) {
 			kfree(fi->last_name);
 			fi->last_name = NULL;
-			fi->next_offset = 2;
+			if (ceph_frag_is_rightmost(frag))
+				fi->next_offset = 2;
+			else
+				fi->next_offset = 0;
 		} else {
 			rinfo = &req->r_reply_info;
 			err = note_last_dentry(fi,
@@ -355,18 +359,22 @@ more:
 		u64 pos = ceph_make_fpos(frag, off);
 		struct ceph_mds_reply_inode *in =
 			rinfo->dir_in[off - fi->offset].in;
+		struct ceph_vino vino;
+		ino_t ino;
+
 		dout("readdir off %d (%d/%d) -> %lld '%.*s' %p\n",
 		     off, off - fi->offset, rinfo->dir_nr, pos,
 		     rinfo->dir_dname_len[off - fi->offset],
 		     rinfo->dir_dname[off - fi->offset], in);
 		BUG_ON(!in);
 		ftype = le32_to_cpu(in->mode) >> 12;
+		vino.ino = le64_to_cpu(in->ino);
+		vino.snap = le64_to_cpu(in->snapid);
+		ino = ceph_vino_to_ino(vino);
 		if (filldir(dirent,
 			    rinfo->dir_dname[off - fi->offset],
 			    rinfo->dir_dname_len[off - fi->offset],
-			    pos,
-			    le64_to_cpu(in->ino),
-			    ftype) < 0) {
+			    pos, ino, ftype) < 0) {
 			dout("filldir stopping us...\n");
 			return 0;
 		}
@@ -414,6 +422,7 @@ static void reset_readdir(struct ceph_file_info *fi)
 		fi->last_readdir = NULL;
 	}
 	kfree(fi->last_name);
+	fi->last_name = NULL;
 	fi->next_offset = 2;  /* compensate for . and .. */
 	if (fi->dentry) {
 		dput(fi->dentry);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index e77c28cf369..7d0e4a82d89 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -154,11 +154,13 @@ int ceph_open(struct inode *inode, struct file *file)
 	}
 
 	/*
-	 * No need to block if we have any caps.  Update wanted set
+	 * No need to block if we have caps on the auth MDS (for
+	 * write) or any MDS (for read).  Update wanted set
 	 * asynchronously.
 	 */
 	spin_lock(&inode->i_lock);
-	if (__ceph_is_any_real_caps(ci)) {
+	if (__ceph_is_any_real_caps(ci) &&
+	    (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
 		int mds_wanted = __ceph_caps_mds_wanted(ci);
 		int issued = __ceph_caps_issued(ci, NULL);
 
@@ -280,11 +282,13 @@ int ceph_release(struct inode *inode, struct file *file)
 static int striped_read(struct inode *inode,
 			u64 off, u64 len,
 			struct page **pages, int num_pages,
-			int *checkeof)
+			int *checkeof, bool align_to_pages,
+			unsigned long buf_align)
 {
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	u64 pos, this_len;
+	int io_align, page_align;
 	int page_off = off & ~PAGE_CACHE_MASK; /* first byte's offset in page */
 	int left, pages_left;
 	int read;
@@ -300,14 +304,19 @@ static int striped_read(struct inode *inode,
 	page_pos = pages;
 	pages_left = num_pages;
 	read = 0;
+	io_align = off & ~PAGE_MASK;
 
 more:
+	if (align_to_pages)
+		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+	else
+		page_align = pos & ~PAGE_MASK;
 	this_len = left;
 	ret = ceph_osdc_readpages(&fsc->client->osdc, ceph_vino(inode),
 				  &ci->i_layout, pos, &this_len,
 				  ci->i_truncate_seq,
 				  ci->i_truncate_size,
-				  page_pos, pages_left);
+				  page_pos, pages_left, page_align);
 	hit_stripe = this_len < left;
 	was_short = ret >= 0 && ret < this_len;
 	if (ret == -ENOENT)
@@ -368,32 +377,34 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 	struct inode *inode = file->f_dentry->d_inode;
 	struct page **pages;
 	u64 off = *poff;
-	int num_pages = calc_pages_for(off, len);
-	int ret;
+	int num_pages, ret;
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages, off, len);
-
-		/*
-		 * flush any page cache pages in this range.  this
-		 * will make concurrent normal and O_DIRECT io slow,
-		 * but it will at least behave sensibly when they are
-		 * in sequence.
-		 */
+		num_pages = calc_pages_for((unsigned long)data, len);
+		pages = ceph_get_direct_page_vector(data, num_pages, true);
 	} else {
+		num_pages = calc_pages_for(off, len);
 		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
 	}
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
+	/*
+	 * flush any page cache pages in this range.  this
+	 * will make concurrent normal and sync io slow,
+	 * but it will at least behave sensibly when they are
+	 * in sequence.
+	 */
 	ret = filemap_write_and_wait(inode->i_mapping);
 	if (ret < 0)
 		goto done;
 
-	ret = striped_read(inode, off, len, pages, num_pages, checkeof);
+	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
+			   file->f_flags & O_DIRECT,
+			   (unsigned long)data & ~PAGE_MASK);
 
 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
 		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -402,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 
 done:
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, true);
 	else
 		ceph_release_page_vector(pages, num_pages);
 	dout("sync_read result %d\n", ret);
@@ -448,6 +459,8 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	int flags;
 	int do_sync = 0;
 	int check_caps = 0;
+	int page_align, io_align;
+	unsigned long buf_align;
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
 
@@ -462,6 +475,9 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	else
 		pos = *offset;
 
+	io_align = pos & ~PAGE_MASK;
+	buf_align = (unsigned long)data & ~PAGE_MASK;
+
 	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
 	if (ret < 0)
 		return ret;
@@ -486,20 +502,27 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	 */
 more:
 	len = left;
+	if (file->f_flags & O_DIRECT) {
+		/* write from beginning of first page, regardless of
+		   io alignment */
+		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+		num_pages = calc_pages_for((unsigned long)data, len);
+	} else {
+		page_align = pos & ~PAGE_MASK;
+		num_pages = calc_pages_for(pos, len);
+	}
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 				    ceph_vino(inode), pos, &len,
 				    CEPH_OSD_OP_WRITE, flags,
 				    ci->i_snap_realm->cached_context,
 				    do_sync,
 				    ci->i_truncate_seq, ci->i_truncate_size,
-				    &mtime, false, 2);
+				    &mtime, false, 2, page_align);
 	if (!req)
 		return -ENOMEM;
 
-	num_pages = calc_pages_for(pos, len);
-
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages, pos, len);
+		pages = ceph_get_direct_page_vector(data, num_pages, false);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto out;
@@ -549,7 +572,7 @@ more:
 	}
 
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, false);
 	else if (file->f_flags & O_SYNC)
 		ceph_release_page_vector(pages, num_pages);
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 1d6a45b5a04..bf1286588f2 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -2,7 +2,6 @@
 
 #include <linux/module.h>
 #include <linux/fs.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
@@ -471,7 +470,9 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 
 	if (issued & (CEPH_CAP_FILE_EXCL|
 		      CEPH_CAP_FILE_WR|
-		      CEPH_CAP_FILE_BUFFER)) {
+		      CEPH_CAP_FILE_BUFFER|
+		      CEPH_CAP_AUTH_EXCL|
+		      CEPH_CAP_XATTR_EXCL)) {
 		if (timespec_compare(ctime, &inode->i_ctime) > 0) {
 			dout("ctime %ld.%09ld -> %ld.%09ld inc w/ cap\n",
 			     inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec,
@@ -511,7 +512,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
 			warn = 1;
 		}
 	} else {
-		/* we have no write caps; whatever the MDS says is true */
+		/* we have no write|excl caps; whatever the MDS says is true */
 		if (ceph_seq_cmp(time_warp_seq, ci->i_time_warp_seq) >= 0) {
 			inode->i_ctime = *ctime;
 			inode->i_mtime = *mtime;
@@ -567,12 +568,17 @@ static int fill_inode(struct inode *inode,
 
 	/*
 	 * provided version will be odd if inode value is projected,
-	 * even if stable.  skip the update if we have a newer info
-	 * (e.g., due to inode info racing form multiple MDSs), or if
-	 * we are getting projected (unstable) inode info.
+	 * even if stable.  skip the update if we have newer stable
+	 * info (ours>=theirs, e.g. due to racing mds replies), unless
+	 * we are getting projected (unstable) info (in which case the
+	 * version is odd, and we want ours>theirs).
+	 *   us   them
+	 *   2    2     skip
+	 *   3    2     skip
+	 *   3    3     update
 	 */
 	if (le64_to_cpu(info->version) > 0 &&
-	    (ci->i_version & ~1) > le64_to_cpu(info->version))
+	    (ci->i_version & ~1) >= le64_to_cpu(info->version))
 		goto no_change;
 
 	issued = __ceph_caps_issued(ci, &implemented);
@@ -606,7 +612,14 @@ static int fill_inode(struct inode *inode,
 			    le32_to_cpu(info->time_warp_seq),
 			    &ctime, &mtime, &atime);
 
-	ci->i_max_size = le64_to_cpu(info->max_size);
+	/* only update max_size on auth cap */
+	if ((info->cap.flags & CEPH_CAP_FLAG_AUTH) &&
+	    ci->i_max_size != le64_to_cpu(info->max_size)) {
+		dout("max_size %lld -> %llu\n", ci->i_max_size,
+		     le64_to_cpu(info->max_size));
+		ci->i_max_size = le64_to_cpu(info->max_size);
+	}
+
 	ci->i_layout = info->layout;
 	inode->i_blkbits = fls(le32_to_cpu(info->layout.fl_stripe_unit)) - 1;
 
@@ -1055,7 +1068,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 		ininfo = rinfo->targeti.in;
 		vino.ino = le64_to_cpu(ininfo->ino);
 		vino.snap = le64_to_cpu(ininfo->snapid);
-		if (!dn->d_inode) {
+		in = dn->d_inode;
+		if (!in) {
 			in = ceph_get_inode(sb, vino);
 			if (IS_ERR(in)) {
 				pr_err("fill_trace bad get_inode "
@@ -1386,11 +1400,8 @@ static void ceph_invalidate_work(struct work_struct *work)
 	spin_lock(&inode->i_lock);
 	dout("invalidate_pages %p gen %d revoking %d\n", inode,
 	     ci->i_rdcache_gen, ci->i_rdcache_revoking);
-	if (ci->i_rdcache_gen == 0 ||
-	    ci->i_rdcache_revoking != ci->i_rdcache_gen) {
-		BUG_ON(ci->i_rdcache_revoking > ci->i_rdcache_gen);
+	if (ci->i_rdcache_revoking != ci->i_rdcache_gen) {
 		/* nevermind! */
-		ci->i_rdcache_revoking = 0;
 		spin_unlock(&inode->i_lock);
 		goto out;
 	}
@@ -1400,15 +1411,16 @@ static void ceph_invalidate_work(struct work_struct *work)
 	ceph_invalidate_nondirty_pages(inode->i_mapping);
 
 	spin_lock(&inode->i_lock);
-	if (orig_gen == ci->i_rdcache_gen) {
+	if (orig_gen == ci->i_rdcache_gen &&
+	    orig_gen == ci->i_rdcache_revoking) {
 		dout("invalidate_pages %p gen %d successful\n", inode,
 		     ci->i_rdcache_gen);
-		ci->i_rdcache_gen = 0;
-		ci->i_rdcache_revoking = 0;
+		ci->i_rdcache_revoking--;
 		check = 1;
 	} else {
-		dout("invalidate_pages %p gen %d raced, gen now %d\n",
-		     inode, orig_gen, ci->i_rdcache_gen);
+		dout("invalidate_pages %p gen %d raced, now %d revoking %d\n",
+		     inode, orig_gen, ci->i_rdcache_gen,
+		     ci->i_rdcache_revoking);
 	}
 	spin_unlock(&inode->i_lock);
 
@@ -1739,7 +1751,7 @@ int ceph_do_getattr(struct inode *inode, int mask)
 		return 0;
 	}
 
-	dout("do_getattr inode %p mask %s\n", inode, ceph_cap_string(mask));
+	dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
 	if (ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
 		return 0;
 
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index a6ce54e94eb..52e8fd74d45 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
-#define CEPH_IOCTL_MAGIC 0x98
+#define CEPH_IOCTL_MAGIC 0x97
 
 /* just use u64 to align sanely on all archs */
 struct ceph_ioctl_layout {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 40abde93c34..476b329867d 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -11,40 +11,68 @@
  * Implement fcntl and flock locking functions.
  */
 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
-			     u64 pid, u64 pid_ns,
-			     int cmd, u64 start, u64 length, u8 wait)
+			     int cmd, u8 wait, struct file_lock *fl)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_mds_client *mdsc =
 		ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	int err;
+	u64 length = 0;
 
 	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 	req->r_inode = igrab(inode);
 
+	/* mds requires start and length rather than start and end */
+	if (LLONG_MAX == fl->fl_end)
+		length = 0;
+	else
+		length = fl->fl_end - fl->fl_start + 1;
+
 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
 	     "length: %llu, wait: %d, type`: %d", (int)lock_type,
-	     (int)operation, pid, start, length, wait, cmd);
+	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
+	     length, wait, fl->fl_type);
+
 
 	req->r_args.filelock_change.rule = lock_type;
 	req->r_args.filelock_change.type = cmd;
-	req->r_args.filelock_change.pid = cpu_to_le64(pid);
+	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
 	/* This should be adjusted, but I'm not sure if
 	   namespaces actually get id numbers*/
 	req->r_args.filelock_change.pid_namespace =
-		cpu_to_le64((u64)pid_ns);
-	req->r_args.filelock_change.start = cpu_to_le64(start);
+		cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
+	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
 	req->r_args.filelock_change.length = cpu_to_le64(length);
 	req->r_args.filelock_change.wait = wait;
 
 	err = ceph_mdsc_do_request(mdsc, inode, req);
+
+	if ( operation == CEPH_MDS_OP_GETFILELOCK){
+		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
+			fl->fl_type = F_RDLCK;
+		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
+			fl->fl_type = F_WRLCK;
+		else
+			fl->fl_type = F_UNLCK;
+
+		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
+		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
+						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
+		if (length >= 1)
+			fl->fl_end = length -1;
+		else
+			fl->fl_end = 0;
+
+	}
 	ceph_mdsc_put_request(req);
 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-	     "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type,
-	     (int)operation, pid, start, length, wait, cmd, err);
+	     "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
+	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
+	     length, wait, fl->fl_type, err);
 	return err;
 }
 
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
  */
 int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u64 length;
 	u8 lock_cmd;
 	int err;
 	u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else
 		lock_cmd = CEPH_LOCK_UNLOCK;
 
-	if (LLONG_MAX == fl->fl_end)
-		length = 0;
-	else
-		length = fl->fl_end - fl->fl_start + 1;
-
-	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-				(u64)fl->fl_pid,
-				(u64)(unsigned long)fl->fl_nspid,
-				lock_cmd, fl->fl_start,
-				length, wait);
+	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
 	if (!err) {
-		dout("mds locked, locking locally");
-		err = posix_lock_file(file, fl, NULL);
-		if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
-			/* undo! This should only happen if the kernel detects
-			 * local deadlock. */
-			ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-					  (u64)fl->fl_pid,
-					  (u64)(unsigned long)fl->fl_nspid,
-					  CEPH_LOCK_UNLOCK, fl->fl_start,
-					  length, 0);
-			dout("got %d on posix_lock_file, undid lock", err);
+		if ( op != CEPH_MDS_OP_GETFILELOCK ){
+			dout("mds locked, locking locally");
+			err = posix_lock_file(file, fl, NULL);
+			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+				/* undo! This should only happen if the kernel detects
+				 * local deadlock. */
+				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+						  CEPH_LOCK_UNLOCK, 0, fl);
+				dout("got %d on posix_lock_file, undid lock", err);
+			}
 		}
+
 	} else {
 		dout("mds returned error code %d", err);
 	}
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 
 int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u64 length;
 	u8 lock_cmd;
 	int err;
 	u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 		lock_cmd = CEPH_LOCK_EXCL;
 	else
 		lock_cmd = CEPH_LOCK_UNLOCK;
-	/* mds requires start and length rather than start and end */
-	if (LLONG_MAX == fl->fl_end)
-		length = 0;
-	else
-		length = fl->fl_end - fl->fl_start + 1;
 
 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
-				file, (u64)fl->fl_pid,
-				(u64)(unsigned long)fl->fl_nspid,
-				lock_cmd, fl->fl_start,
-				length, wait);
+				file, lock_cmd, wait, fl);
 	if (!err) {
 		err = flock_lock_file_wait(file, fl);
 		if (err) {
 			ceph_lock_message(CEPH_LOCK_FLOCK,
 					  CEPH_MDS_OP_SETFILELOCK,
-					  file, (u64)fl->fl_pid,
-					  (u64)(unsigned long)fl->fl_nspid,
-					  CEPH_LOCK_UNLOCK, fl->fl_start,
-					  length, 0);
+					  file, CEPH_LOCK_UNLOCK, 0, fl);
 			dout("got %d on flock_lock_file_wait, undid lock", err);
 		}
 	} else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 3142b15940c..38800eaa81d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -6,7 +6,6 @@
 #include <linux/sched.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 
 #include "super.h"
 #include "mds_client.h"
@@ -203,6 +202,38 @@ out_bad:
 }
 
 /*
+ * parse fcntl F_GETLK results
+ */
+static int parse_reply_info_filelock(void **p, void *end,
+                struct ceph_mds_reply_info_parsed *info)
+{
+	if (*p + sizeof(*info->filelock_reply) > end)
+		goto bad;
+
+	info->filelock_reply = *p;
+	*p += sizeof(*info->filelock_reply);
+
+	if (unlikely(*p != end))
+		goto bad;
+	return 0;
+
+bad:
+	return -EIO;
+}
+
+/*
+ * parse extra results
+ */
+static int parse_reply_info_extra(void **p, void *end,
+                struct ceph_mds_reply_info_parsed *info)
+{
+	if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+		return parse_reply_info_filelock(p, end, info);
+	else
+		return parse_reply_info_dir(p, end, info);
+}
+
+/*
  * parse entire mds reply
  */
 static int parse_reply_info(struct ceph_msg *msg,
@@ -224,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
 			goto out_bad;
 	}
 
-	/* dir content */
+	/* extra */
 	ceph_decode_32_safe(&p, end, len, bad);
 	if (len > 0) {
-		err = parse_reply_info_dir(&p, p+len, info);
+		err = parse_reply_info_extra(&p, p+len, info);
 		if (err < 0)
 			goto out_bad;
 	}
@@ -529,6 +560,9 @@ static void __register_request(struct ceph_mds_client *mdsc,
 	ceph_mdsc_get_request(req);
 	__insert_request(mdsc, req);
 
+	req->r_uid = current_fsuid();
+	req->r_gid = current_fsgid();
+
 	if (dir) {
 		struct ceph_inode_info *ci = ceph_inode(dir);
 
@@ -1588,8 +1622,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 
 	head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
 	head->op = cpu_to_le32(req->r_op);
-	head->caller_uid = cpu_to_le32(current_fsuid());
-	head->caller_gid = cpu_to_le32(current_fsgid());
+	head->caller_uid = cpu_to_le32(req->r_uid);
+	head->caller_gid = cpu_to_le32(req->r_gid);
 	head->args = req->r_args;
 
 	ceph_encode_filepath(&p, end, ino1, path1);
@@ -2072,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 	mutex_lock(&session->s_mutex);
 	if (err < 0) {
-		pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds);
+		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
 		ceph_msg_dump(msg);
 		goto out_err;
 	}
@@ -2092,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	mutex_lock(&req->r_fill_mutex);
 	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
 	if (err == 0) {
-		if (result == 0 && rinfo->dir_nr)
+		if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+		    rinfo->dir_nr)
 			ceph_readdir_prepopulate(req, req->r_session);
 		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
 	}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index d66d63c7235..aabe563b54d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in {
 };
 
 /*
- * parsed info about an mds reply, including information about the
- * target inode and/or its parent directory and dentry, and directory
- * contents (for readdir results).
+ * parsed info about an mds reply, including information about
+ * either: 1) the target inode and/or its parent directory and dentry,
+ * and directory contents (for readdir results), or
+ * 2) the file range lock info (for fcntl F_GETLK results).
  */
 struct ceph_mds_reply_info_parsed {
 	struct ceph_mds_reply_head    *head;
 
+	/* trace */
 	struct ceph_mds_reply_info_in diri, targeti;
 	struct ceph_mds_reply_dirfrag *dirfrag;
 	char                          *dname;
 	u32                           dname_len;
 	struct ceph_mds_reply_lease   *dlease;
 
-	struct ceph_mds_reply_dirfrag *dir_dir;
-	int                           dir_nr;
-	char                          **dir_dname;
-	u32                           *dir_dname_len;
-	struct ceph_mds_reply_lease   **dir_dlease;
-	struct ceph_mds_reply_info_in *dir_in;
-	u8                            dir_complete, dir_end;
+	/* extra */
+	union {
+		/* for fcntl F_GETLK results */
+		struct ceph_filelock *filelock_reply;
+
+		/* for readdir results */
+		struct {
+			struct ceph_mds_reply_dirfrag *dir_dir;
+			int                           dir_nr;
+			char                          **dir_dname;
+			u32                           *dir_dname_len;
+			struct ceph_mds_reply_lease   **dir_dlease;
+			struct ceph_mds_reply_info_in *dir_in;
+			u8                            dir_complete, dir_end;
+		};
+	};
 
 	/* encoded blob describing snapshot contexts for certain
 	   operations (e.g., open) */
@@ -170,6 +181,8 @@ struct ceph_mds_request {
 
 	union ceph_mds_request_args r_args;
 	int r_fmode;        /* file mode, if expecting cap */
+	uid_t r_uid;
+	gid_t r_gid;
 
 	/* for choosing which mds to send this request to */
 	int r_direct_mode;
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index d6e0e042189..08b460ae053 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -635,7 +635,7 @@ static struct dentry *open_root_dentry(struct ceph_fs_client *fsc,
 /*
  * mount: join the ceph cluster, and open root directory.
  */
-static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
+static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
 		      const char *path)
 {
 	int err;
@@ -678,16 +678,14 @@ static int ceph_mount(struct ceph_fs_client *fsc, struct vfsmount *mnt,
 		}
 	}
 
-	mnt->mnt_root = root;
-	mnt->mnt_sb = fsc->sb;
-
 	fsc->mount_state = CEPH_MOUNT_MOUNTED;
 	dout("mount success\n");
-	err = 0;
+	mutex_unlock(&fsc->client->mount_mutex);
+	return root;
 
 out:
 	mutex_unlock(&fsc->client->mount_mutex);
-	return err;
+	return ERR_PTR(err);
 
 fail:
 	if (first) {
@@ -777,41 +775,45 @@ static int ceph_register_bdi(struct super_block *sb,
 	return err;
 }
 
-static int ceph_get_sb(struct file_system_type *fs_type,
-		       int flags, const char *dev_name, void *data,
-		       struct vfsmount *mnt)
+static struct dentry *ceph_mount(struct file_system_type *fs_type,
+		       int flags, const char *dev_name, void *data)
 {
 	struct super_block *sb;
 	struct ceph_fs_client *fsc;
+	struct dentry *res;
 	int err;
 	int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
 	const char *path = NULL;
 	struct ceph_mount_options *fsopt = NULL;
 	struct ceph_options *opt = NULL;
 
-	dout("ceph_get_sb\n");
+	dout("ceph_mount\n");
 	err = parse_mount_options(&fsopt, &opt, flags, data, dev_name, &path);
-	if (err < 0)
+	if (err < 0) {
+		res = ERR_PTR(err);
 		goto out_final;
+	}
 
 	/* create client (which we may/may not use) */
 	fsc = create_fs_client(fsopt, opt);
 	if (IS_ERR(fsc)) {
-		err = PTR_ERR(fsc);
+		res = ERR_CAST(fsc);
 		kfree(fsopt);
 		kfree(opt);
 		goto out_final;
 	}
 
 	err = ceph_mdsc_init(fsc);
-	if (err < 0)
+	if (err < 0) {
+		res = ERR_PTR(err);
 		goto out;
+	}
 
 	if (ceph_test_opt(fsc->client, NOSHARE))
 		compare_super = NULL;
 	sb = sget(fs_type, compare_super, ceph_set_super, fsc);
 	if (IS_ERR(sb)) {
-		err = PTR_ERR(sb);
+		res = ERR_CAST(sb);
 		goto out;
 	}
 
@@ -823,16 +825,18 @@ static int ceph_get_sb(struct file_system_type *fs_type,
 	} else {
 		dout("get_sb using new client %p\n", fsc);
 		err = ceph_register_bdi(sb, fsc);
-		if (err < 0)
+		if (err < 0) {
+			res = ERR_PTR(err);
 			goto out_splat;
+		}
 	}
 
-	err = ceph_mount(fsc, mnt, path);
-	if (err < 0)
+	res = ceph_real_mount(fsc, path);
+	if (IS_ERR(res))
 		goto out_splat;
-	dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
-	     mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode));
-	return 0;
+	dout("root %p inode %p ino %llx.%llx\n", res,
+	     res->d_inode, ceph_vinop(res->d_inode));
+	return res;
 
 out_splat:
 	ceph_mdsc_close_sessions(fsc->mdsc);
@@ -843,8 +847,8 @@ out:
 	ceph_mdsc_destroy(fsc);
 	destroy_fs_client(fsc);
 out_final:
-	dout("ceph_get_sb fail %d\n", err);
-	return err;
+	dout("ceph_mount fail %ld\n", PTR_ERR(res));
+	return res;
 }
 
 static void ceph_kill_sb(struct super_block *s)
@@ -860,7 +864,7 @@ static void ceph_kill_sb(struct super_block *s)
 static struct file_system_type ceph_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ceph",
-	.get_sb		= ceph_get_sb,
+	.mount		= ceph_mount,
 	.kill_sb	= ceph_kill_sb,
 	.fs_flags	= FS_RENAME_DOES_D_MOVE,
 };
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 1886294e12f..7f01728a465 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -293,9 +293,7 @@ struct ceph_inode_info {
 	int i_rd_ref, i_rdcache_ref, i_wr_ref;
 	int i_wrbuffer_ref, i_wrbuffer_ref_head;
 	u32 i_shared_gen;       /* increment each time we get FILE_SHARED */
-	u32 i_rdcache_gen;      /* we increment this each time we get
-				   FILE_CACHE.  If it's non-zero, we
-				   _may_ have cached pages. */
+	u32 i_rdcache_gen;      /* incremented each time we get FILE_CACHE. */
 	u32 i_rdcache_revoking; /* RDCACHE gen to async invalidate, if any */
 
 	struct list_head i_unsafe_writes; /* uncommitted sync writes */
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 917b7d449bb..ee45648b0d1 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -2,6 +2,10 @@ config CIFS
 	tristate "CIFS support (advanced network filesystem, SMBFS successor)"
 	depends on INET
 	select NLS
+	select CRYPTO
+	select CRYPTO_MD5
+	select CRYPTO_HMAC
+	select CRYPTO_ARC4
 	help
 	  This is the client VFS module for the Common Internet File System
 	  (CIFS) protocol which is the successor to the Server Message Block
@@ -140,6 +144,13 @@ config CIFS_FSCACHE
 	    to be cached locally on disk through the general filesystem cache
 	    manager. If unsure, say N.
 
+config CIFS_ACL
+	  bool "Provide CIFS ACL support (EXPERIMENTAL)"
+	  depends on EXPERIMENTAL && CIFS_XATTR
+	  help
+	    Allows to fetch CIFS/NTFS ACL from the server.  The DACL blob
+	    is handed over to the application/caller.
+
 config CIFS_EXPERIMENTAL
 	  bool "CIFS Experimental Features (EXPERIMENTAL)"
 	  depends on CIFS && EXPERIMENTAL
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index adefa60a9bd..43b19dd3919 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
 	  link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
 	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
-	  readdir.o ioctl.o sess.o export.o cifsacl.o
+	  readdir.o ioctl.o sess.o export.o
+
+cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
 
 cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 
diff --git a/fs/cifs/README b/fs/cifs/README
index ee68d103654..46af99ab361 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -337,6 +337,15 @@ A partial list of the supported mount options follows:
   wsize		default write size (default 57344)
 		maximum wsize currently allowed by CIFS is 57344 (fourteen
 		4096 byte pages)
+  actimeo=n	attribute cache timeout in seconds (default 1 second).
+		After this timeout, the cifs client requests fresh attribute
+		information from the server. This option allows to tune the
+		attribute cache timeout to suit the workload needs. Shorter
+		timeouts mean better the cache coherency, but increased number
+		of calls to the server. Longer timeouts mean reduced number
+		of calls to the server at the expense of less stricter cache
+		coherency checks (i.e. incorrect attribute cache for a short
+		period of time).
   rw		mount the network share read-write (note that the
 		server may still consider the share read-only)
   ro		mount network share read-only
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 5aff46c61e5..355abcdcda9 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -81,7 +81,7 @@ u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for
 
 v) mount check for unmatched uids
 
-w) Add support for new vfs entry points for setlease and fallocate 
+w) Add support for new vfs entry point for fallocate
 
 x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 
 processes can proceed better in parallel (on the server)
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 525ba59a410..7852cd67705 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -15,7 +15,7 @@
  *   the GNU Lesser General Public License for more details.
  *
  */
-#include <linux/radix-tree.h>
+#include <linux/rbtree.h>
 
 #ifndef _CIFS_FS_SB_H
 #define _CIFS_FS_SB_H
@@ -42,12 +42,13 @@
 #define CIFS_MOUNT_MULTIUSER	0x20000 /* multiuser mount */
 
 struct cifs_sb_info {
-	struct radix_tree_root tlink_tree;
-#define CIFS_TLINK_MASTER_TAG		0	/* is "master" (mount) tcon */
+	struct rb_root tlink_tree;
 	spinlock_t tlink_tree_lock;
+	struct tcon_link *master_tlink;
 	struct nls_table *local_nls;
 	unsigned int rsize;
 	unsigned int wsize;
+	unsigned long actimeo; /* attribute cache timeout (jiffies) */
 	atomic_t active;
 	uid_t	mnt_uid;
 	gid_t	mnt_gid;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c9b4792ae82..a437ec391a0 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -30,8 +30,6 @@
 #include "cifs_debug.h"
 
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-
 static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
 	{{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
 	{{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
@@ -560,7 +558,7 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
 
 	if (IS_ERR(tlink))
-		return NULL;
+		return ERR_CAST(tlink);
 
 	xid = GetXid();
 	rc = CIFSSMBGetCIFSACL(xid, tlink_tcon(tlink), fid, &pntsd, pacllen);
@@ -568,7 +566,9 @@ static struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
 
 	cifs_put_tlink(tlink);
 
-	cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
+	cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+	if (rc)
+		return ERR_PTR(rc);
 	return pntsd;
 }
 
@@ -583,7 +583,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
 
 	if (IS_ERR(tlink))
-		return NULL;
+		return ERR_CAST(tlink);
 
 	tcon = tlink_tcon(tlink);
 	xid = GetXid();
@@ -591,23 +591,22 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
 	rc = CIFSSMBOpen(xid, tcon, path, FILE_OPEN, READ_CONTROL, 0,
 			 &fid, &oplock, NULL, cifs_sb->local_nls,
 			 cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
-	if (rc) {
-		cERROR(1, "Unable to open file to get ACL");
-		goto out;
+	if (!rc) {
+		rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
+		CIFSSMBClose(xid, tcon, fid);
 	}
 
-	rc = CIFSSMBGetCIFSACL(xid, tcon, fid, &pntsd, pacllen);
-	cFYI(1, "GetCIFSACL rc = %d ACL len %d", rc, *pacllen);
-
-	CIFSSMBClose(xid, tcon, fid);
- out:
 	cifs_put_tlink(tlink);
 	FreeXid(xid);
+
+	cFYI(1, "%s: rc = %d ACL len %d", __func__, rc, *pacllen);
+	if (rc)
+		return ERR_PTR(rc);
 	return pntsd;
 }
 
 /* Retrieve an ACL from the server */
-static struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
+struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
 				      struct inode *inode, const char *path,
 				      u32 *pacllen)
 {
@@ -695,7 +694,7 @@ static int set_cifs_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 }
 
 /* Translate the CIFS ACL (simlar to NTFS ACL) for a file into mode bits */
-void
+int
 cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 		  struct inode *inode, const char *path, const __u16 *pfid)
 {
@@ -711,17 +710,21 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 		pntsd = get_cifs_acl(cifs_sb, inode, path, &acllen);
 
 	/* if we can retrieve the ACL, now parse Access Control Entries, ACEs */
-	if (pntsd)
+	if (IS_ERR(pntsd)) {
+		rc = PTR_ERR(pntsd);
+		cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+	} else {
 		rc = parse_sec_desc(pntsd, acllen, fattr);
-	if (rc)
-		cFYI(1, "parse sec desc failed rc = %d", rc);
+		kfree(pntsd);
+		if (rc)
+			cERROR(1, "parse sec desc failed rc = %d", rc);
+	}
 
-	kfree(pntsd);
-	return;
+	return rc;
 }
 
 /* Convert mode bits to an ACL so we can update the ACL on the server */
-int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
+int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode)
 {
 	int rc = 0;
 	__u32 secdesclen = 0;
@@ -736,7 +739,10 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
 	/* Add three ACEs for owner, group, everyone getting rid of
 	   other ACEs as chmod disables ACEs and set the security descriptor */
 
-	if (pntsd) {
+	if (IS_ERR(pntsd)) {
+		rc = PTR_ERR(pntsd);
+		cERROR(1, "%s: error %d getting sec desc", __func__, rc);
+	} else {
 		/* allocate memory for the smb header,
 		   set security descriptor request security descriptor
 		   parameters, and secuirty descriptor itself */
@@ -766,4 +772,3 @@ int mode_to_acl(struct inode *inode, const char *path, __u64 nmode)
 
 	return rc;
 }
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 6c8096cf515..c4ae7d03656 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -74,11 +74,7 @@ struct cifs_wksid {
 	char sidname[SIDNAMELENGTH];
 } __attribute__((packed));
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-
 extern int match_sid(struct cifs_sid *);
 extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
 
-#endif /*  CONFIG_CIFS_EXPERIMENTAL */
-
 #endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 7ac0056294c..f856732161a 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -43,18 +43,32 @@ extern void SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 		       unsigned char *p24);
 
 static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu,
-				const struct session_key *key, char *signature)
+				struct TCP_Server_Info *server, char *signature)
 {
-	struct	MD5Context context;
+	int rc;
 
-	if ((cifs_pdu == NULL) || (signature == NULL) || (key == NULL))
+	if (cifs_pdu == NULL || signature == NULL || server == NULL)
 		return -EINVAL;
 
-	cifs_MD5_init(&context);
-	cifs_MD5_update(&context, (char *)&key->data, key->len);
-	cifs_MD5_update(&context, cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+	if (!server->secmech.sdescmd5) {
+		cERROR(1, "%s: Can't generate signature\n", __func__);
+		return -1;
+	}
+
+	rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+	if (rc) {
+		cERROR(1, "%s: Oould not init md5\n", __func__);
+		return rc;
+	}
+
+	crypto_shash_update(&server->secmech.sdescmd5->shash,
+		server->session_key.response, server->session_key.len);
+
+	crypto_shash_update(&server->secmech.sdescmd5->shash,
+		cifs_pdu->Protocol, cifs_pdu->smb_buf_length);
+
+	rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
 
-	cifs_MD5_final(signature, &context);
 	return 0;
 }
 
@@ -79,8 +93,7 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 	server->sequence_number++;
 	spin_unlock(&GlobalMid_Lock);
 
-	rc = cifs_calculate_signature(cifs_pdu, &server->session_key,
-				      smb_signature);
+	rc = cifs_calculate_signature(cifs_pdu, server, smb_signature);
 	if (rc)
 		memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
 	else
@@ -90,16 +103,28 @@ int cifs_sign_smb(struct smb_hdr *cifs_pdu, struct TCP_Server_Info *server,
 }
 
 static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
-				const struct session_key *key, char *signature)
+				struct TCP_Server_Info *server, char *signature)
 {
-	struct  MD5Context context;
 	int i;
+	int rc;
 
-	if ((iov == NULL) || (signature == NULL) || (key == NULL))
+	if (iov == NULL || signature == NULL || server == NULL)
 		return -EINVAL;
 
-	cifs_MD5_init(&context);
-	cifs_MD5_update(&context, (char *)&key->data, key->len);
+	if (!server->secmech.sdescmd5) {
+		cERROR(1, "%s: Can't generate signature\n", __func__);
+		return -1;
+	}
+
+	rc = crypto_shash_init(&server->secmech.sdescmd5->shash);
+	if (rc) {
+		cERROR(1, "%s: Oould not init md5\n", __func__);
+		return rc;
+	}
+
+	crypto_shash_update(&server->secmech.sdescmd5->shash,
+		server->session_key.response, server->session_key.len);
+
 	for (i = 0; i < n_vec; i++) {
 		if (iov[i].iov_len == 0)
 			continue;
@@ -112,18 +137,18 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec,
 		if (i == 0) {
 			if (iov[0].iov_len <= 8) /* cmd field at offset 9 */
 				break; /* nothing to sign or corrupt header */
-			cifs_MD5_update(&context, iov[0].iov_base+4,
-				  iov[0].iov_len-4);
+			crypto_shash_update(&server->secmech.sdescmd5->shash,
+				iov[i].iov_base + 4, iov[i].iov_len - 4);
 		} else
-			cifs_MD5_update(&context, iov[i].iov_base, iov[i].iov_len);
+			crypto_shash_update(&server->secmech.sdescmd5->shash,
+				iov[i].iov_base, iov[i].iov_len);
 	}
 
-	cifs_MD5_final(signature, &context);
+	rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature);
 
-	return 0;
+	return rc;
 }
 
-
 int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 		   __u32 *pexpected_response_sequence_number)
 {
@@ -146,8 +171,7 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 	server->sequence_number++;
 	spin_unlock(&GlobalMid_Lock);
 
-	rc = cifs_calc_signature2(iov, n_vec, &server->session_key,
-				      smb_signature);
+	rc = cifs_calc_signature2(iov, n_vec, server, smb_signature);
 	if (rc)
 		memset(cifs_pdu->Signature.SecuritySignature, 0, 8);
 	else
@@ -157,14 +181,14 @@ int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *server,
 }
 
 int cifs_verify_signature(struct smb_hdr *cifs_pdu,
-			  const struct session_key *session_key,
+			  struct TCP_Server_Info *server,
 			  __u32 expected_sequence_number)
 {
 	unsigned int rc;
 	char server_response_sig[8];
 	char what_we_think_sig_should_be[20];
 
-	if (cifs_pdu == NULL || session_key == NULL)
+	if (cifs_pdu == NULL || server == NULL)
 		return -EINVAL;
 
 	if (cifs_pdu->Command == SMB_COM_NEGOTIATE)
@@ -193,7 +217,7 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 					cpu_to_le32(expected_sequence_number);
 	cifs_pdu->Signature.Sequence.Reserved = 0;
 
-	rc = cifs_calculate_signature(cifs_pdu, session_key,
+	rc = cifs_calculate_signature(cifs_pdu, server,
 		what_we_think_sig_should_be);
 
 	if (rc)
@@ -209,18 +233,28 @@ int cifs_verify_signature(struct smb_hdr *cifs_pdu,
 
 }
 
-/* We fill in key by putting in 40 byte array which was allocated by caller */
-int cifs_calculate_session_key(struct session_key *key, const char *rn,
-			   const char *password)
+/* first calculate 24 bytes ntlm response and then 16 byte session key */
+int setup_ntlm_response(struct cifsSesInfo *ses)
 {
-	char temp_key[16];
-	if ((key == NULL) || (rn == NULL))
+	unsigned int temp_len = CIFS_SESS_KEY_SIZE + CIFS_AUTH_RESP_SIZE;
+	char temp_key[CIFS_SESS_KEY_SIZE];
+
+	if (!ses)
 		return -EINVAL;
 
-	E_md4hash(password, temp_key);
-	mdfour(key->data.ntlm, temp_key, 16);
-	memcpy(key->data.ntlm+16, rn, CIFS_SESS_KEY_SIZE);
-	key->len = 40;
+	ses->auth_key.response = kmalloc(temp_len, GFP_KERNEL);
+	if (!ses->auth_key.response) {
+		cERROR(1, "NTLM can't allocate (%u bytes) memory", temp_len);
+		return -ENOMEM;
+	}
+	ses->auth_key.len = temp_len;
+
+	SMBNTencrypt(ses->password, ses->server->cryptkey,
+			ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+
+	E_md4hash(ses->password, temp_key);
+	mdfour(ses->auth_key.response, temp_key, CIFS_SESS_KEY_SIZE);
+
 	return 0;
 }
 
@@ -294,15 +328,15 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
 	 * two times the unicode length of a server name +
 	 * size of a timestamp (which is 8 bytes).
 	 */
-	ses->tilen = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
-	ses->tiblob = kzalloc(ses->tilen, GFP_KERNEL);
-	if (!ses->tiblob) {
-		ses->tilen = 0;
+	ses->auth_key.len = size + 2 * (2 * dlen) + 2 * (2 * wlen) + 8;
+	ses->auth_key.response = kzalloc(ses->auth_key.len, GFP_KERNEL);
+	if (!ses->auth_key.response) {
+		ses->auth_key.len = 0;
 		cERROR(1, "Challenge target info allocation failure");
 		return -ENOMEM;
 	}
 
-	blobptr = ses->tiblob;
+	blobptr = ses->auth_key.response;
 	attrptr = (struct ntlmssp2_name *) blobptr;
 
 	attrptr->type = cpu_to_le16(NTLMSSP_AV_NB_DOMAIN_NAME);
@@ -357,7 +391,7 @@ build_avpair_blob(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
  * about target string i.e. for some, just user name might suffice.
  */
 static int
-find_domain_name(struct cifsSesInfo *ses)
+find_domain_name(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
 {
 	unsigned int attrsize;
 	unsigned int type;
@@ -366,11 +400,11 @@ find_domain_name(struct cifsSesInfo *ses)
 	unsigned char *blobend;
 	struct ntlmssp2_name *attrptr;
 
-	if (!ses->tilen || !ses->tiblob)
+	if (!ses->auth_key.len || !ses->auth_key.response)
 		return 0;
 
-	blobptr = ses->tiblob;
-	blobend = ses->tiblob + ses->tilen;
+	blobptr = ses->auth_key.response;
+	blobend = blobptr + ses->auth_key.len;
 
 	while (blobptr + onesize < blobend) {
 		attrptr = (struct ntlmssp2_name *) blobptr;
@@ -386,16 +420,13 @@ find_domain_name(struct cifsSesInfo *ses)
 			if (!attrsize)
 				break;
 			if (!ses->domainName) {
-				struct nls_table *default_nls;
 				ses->domainName =
 					kmalloc(attrsize + 1, GFP_KERNEL);
 				if (!ses->domainName)
 						return -ENOMEM;
-				default_nls = load_nls_default();
 				cifs_from_ucs2(ses->domainName,
 					(__le16 *)blobptr, attrsize, attrsize,
-					default_nls, false);
-				unload_nls(default_nls);
+					nls_cp, false);
 				break;
 			}
 		}
@@ -405,82 +436,136 @@ find_domain_name(struct cifsSesInfo *ses)
 	return 0;
 }
 
-static int calc_ntlmv2_hash(struct cifsSesInfo *ses,
+static int calc_ntlmv2_hash(struct cifsSesInfo *ses, char *ntlmv2_hash,
 			    const struct nls_table *nls_cp)
 {
 	int rc = 0;
 	int len;
-	char nt_hash[16];
-	struct HMACMD5Context *pctxt;
+	char nt_hash[CIFS_NTHASH_SIZE];
 	wchar_t *user;
 	wchar_t *domain;
+	wchar_t *server;
 
-	pctxt = kmalloc(sizeof(struct HMACMD5Context), GFP_KERNEL);
-
-	if (pctxt == NULL)
-		return -ENOMEM;
+	if (!ses->server->secmech.sdeschmacmd5) {
+		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+		return -1;
+	}
 
 	/* calculate md4 hash of password */
 	E_md4hash(ses->password, nt_hash);
 
-	/* convert Domainname to unicode and uppercase */
-	hmac_md5_init_limK_to_64(nt_hash, 16, pctxt);
+	crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash,
+				CIFS_NTHASH_SIZE);
+
+	rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+	if (rc) {
+		cERROR(1, "calc_ntlmv2_hash: could not init hmacmd5\n");
+		return rc;
+	}
 
 	/* convert ses->userName to unicode and uppercase */
 	len = strlen(ses->userName);
 	user = kmalloc(2 + (len * 2), GFP_KERNEL);
-	if (user == NULL)
+	if (user == NULL) {
+		cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n");
+		rc = -ENOMEM;
 		goto calc_exit_2;
+	}
 	len = cifs_strtoUCS((__le16 *)user, ses->userName, len, nls_cp);
 	UniStrupr(user);
-	hmac_md5_update((char *)user, 2*len, pctxt);
+
+	crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+				(char *)user, 2 * len);
 
 	/* convert ses->domainName to unicode and uppercase */
 	if (ses->domainName) {
 		len = strlen(ses->domainName);
 
 		domain = kmalloc(2 + (len * 2), GFP_KERNEL);
-		if (domain == NULL)
+		if (domain == NULL) {
+			cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure");
+			rc = -ENOMEM;
 			goto calc_exit_1;
+		}
 		len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len,
 					nls_cp);
-		/* the following line was removed since it didn't work well
-		   with lower cased domain name that passed as an option.
-		   Maybe converting the domain name earlier makes sense */
-		/* UniStrupr(domain); */
-
-		hmac_md5_update((char *)domain, 2*len, pctxt);
-
+		crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+					(char *)domain, 2 * len);
 		kfree(domain);
+	} else if (ses->serverName) {
+		len = strlen(ses->serverName);
+
+		server = kmalloc(2 + (len * 2), GFP_KERNEL);
+		if (server == NULL) {
+			cERROR(1, "calc_ntlmv2_hash: server mem alloc failure");
+			rc = -ENOMEM;
+			goto calc_exit_1;
+		}
+		len = cifs_strtoUCS((__le16 *)server, ses->serverName, len,
+					nls_cp);
+		crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+					(char *)server, 2 * len);
+		kfree(server);
 	}
+
+	rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+					ntlmv2_hash);
+
 calc_exit_1:
 	kfree(user);
 calc_exit_2:
-	/* BB FIXME what about bytes 24 through 40 of the signing key?
-	   compare with the NTLM example */
-	hmac_md5_final(ses->ntlmv2_hash, pctxt);
+	return rc;
+}
+
+static int
+CalcNTLMv2_response(const struct cifsSesInfo *ses, char *ntlmv2_hash)
+{
+	int rc;
+	unsigned int offset = CIFS_SESS_KEY_SIZE + 8;
+
+	if (!ses->server->secmech.sdeschmacmd5) {
+		cERROR(1, "calc_ntlmv2_hash: can't generate ntlmv2 hash\n");
+		return -1;
+	}
+
+	crypto_shash_setkey(ses->server->secmech.hmacmd5,
+				ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+
+	rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+	if (rc) {
+		cERROR(1, "CalcNTLMv2_response: could not init hmacmd5");
+		return rc;
+	}
+
+	if (ses->server->secType == RawNTLMSSP)
+		memcpy(ses->auth_key.response + offset,
+			ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+	else
+		memcpy(ses->auth_key.response + offset,
+			ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
+	crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+		ses->auth_key.response + offset, ses->auth_key.len - offset);
+
+	rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+		ses->auth_key.response + CIFS_SESS_KEY_SIZE);
 
-	kfree(pctxt);
 	return rc;
 }
 
+
 int
-setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
-		      const struct nls_table *nls_cp)
+setup_ntlmv2_rsp(struct cifsSesInfo *ses, const struct nls_table *nls_cp)
 {
 	int rc;
-	struct ntlmv2_resp *buf = (struct ntlmv2_resp *)resp_buf;
-	struct HMACMD5Context context;
-
-	buf->blob_signature = cpu_to_le32(0x00000101);
-	buf->reserved = 0;
-	buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
-	get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
-	buf->reserved2 = 0;
+	int baselen;
+	unsigned int tilen;
+	struct ntlmv2_resp *buf;
+	char ntlmv2_hash[16];
+	unsigned char *tiblob = NULL; /* target info blob */
 
 	if (ses->server->secType == RawNTLMSSP) {
 		if (!ses->domainName) {
-			rc = find_domain_name(ses);
+			rc = find_domain_name(ses, nls_cp);
 			if (rc) {
 				cERROR(1, "error %d finding domain name", rc);
 				goto setup_ntlmv2_rsp_ret;
@@ -490,51 +575,179 @@ setup_ntlmv2_rsp(struct cifsSesInfo *ses, char *resp_buf,
 		rc = build_avpair_blob(ses, nls_cp);
 		if (rc) {
 			cERROR(1, "error %d building av pair blob", rc);
-			return rc;
+			goto setup_ntlmv2_rsp_ret;
 		}
 	}
 
-	/* calculate buf->ntlmv2_hash */
-	rc = calc_ntlmv2_hash(ses, nls_cp);
+	baselen = CIFS_SESS_KEY_SIZE + sizeof(struct ntlmv2_resp);
+	tilen = ses->auth_key.len;
+	tiblob = ses->auth_key.response;
+
+	ses->auth_key.response = kmalloc(baselen + tilen, GFP_KERNEL);
+	if (!ses->auth_key.response) {
+		rc = ENOMEM;
+		ses->auth_key.len = 0;
+		cERROR(1, "%s: Can't allocate auth blob", __func__);
+		goto setup_ntlmv2_rsp_ret;
+	}
+	ses->auth_key.len += baselen;
+
+	buf = (struct ntlmv2_resp *)
+			(ses->auth_key.response + CIFS_SESS_KEY_SIZE);
+	buf->blob_signature = cpu_to_le32(0x00000101);
+	buf->reserved = 0;
+	buf->time = cpu_to_le64(cifs_UnixTimeToNT(CURRENT_TIME));
+	get_random_bytes(&buf->client_chal, sizeof(buf->client_chal));
+	buf->reserved2 = 0;
+
+	memcpy(ses->auth_key.response + baselen, tiblob, tilen);
+
+	/* calculate ntlmv2_hash */
+	rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
 	if (rc) {
 		cERROR(1, "could not get v2 hash rc %d", rc);
 		goto setup_ntlmv2_rsp_ret;
 	}
-	CalcNTLMv2_response(ses, resp_buf);
+
+	/* calculate first part of the client response (CR1) */
+	rc = CalcNTLMv2_response(ses, ntlmv2_hash);
+	if (rc) {
+		cERROR(1, "Could not calculate CR1  rc: %d", rc);
+		goto setup_ntlmv2_rsp_ret;
+	}
 
 	/* now calculate the session key for NTLMv2 */
-	hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
-	hmac_md5_update(resp_buf, 16, &context);
-	hmac_md5_final(ses->auth_key.data.ntlmv2.key, &context);
+	crypto_shash_setkey(ses->server->secmech.hmacmd5,
+		ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
+
+	rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash);
+	if (rc) {
+		cERROR(1, "%s: Could not init hmacmd5\n", __func__);
+		goto setup_ntlmv2_rsp_ret;
+	}
 
-	memcpy(&ses->auth_key.data.ntlmv2.resp, resp_buf,
-	       sizeof(struct ntlmv2_resp));
-	ses->auth_key.len = 16 + sizeof(struct ntlmv2_resp);
+	crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
+		ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+		CIFS_HMAC_MD5_HASH_SIZE);
 
-	return 0;
+	rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash,
+		ses->auth_key.response);
 
 setup_ntlmv2_rsp_ret:
-	kfree(ses->tiblob);
-	ses->tiblob = NULL;
-	ses->tilen = 0;
+	kfree(tiblob);
 
 	return rc;
 }
 
-void CalcNTLMv2_response(const struct cifsSesInfo *ses,
-			 char *v2_session_response)
+int
+calc_seckey(struct cifsSesInfo *ses)
 {
-	struct HMACMD5Context context;
-	/* rest of v2 struct already generated */
-	memcpy(v2_session_response + 8, ses->cryptKey, 8);
-	hmac_md5_init_limK_to_64(ses->ntlmv2_hash, 16, &context);
+	int rc;
+	struct crypto_blkcipher *tfm_arc4;
+	struct scatterlist sgin, sgout;
+	struct blkcipher_desc desc;
+	unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
+
+	get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
+
+	tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+	if (!tfm_arc4 || IS_ERR(tfm_arc4)) {
+		cERROR(1, "could not allocate crypto API arc4\n");
+		return PTR_ERR(tfm_arc4);
+	}
 
-	hmac_md5_update(v2_session_response+8,
-			sizeof(struct ntlmv2_resp) - 8, &context);
+	desc.tfm = tfm_arc4;
 
-	if (ses->tilen)
-		hmac_md5_update(ses->tiblob, ses->tilen, &context);
+	crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
+					CIFS_SESS_KEY_SIZE);
 
-	hmac_md5_final(v2_session_response, &context);
-/*	cifs_dump_mem("v2_sess_rsp: ", v2_session_response, 32); */
+	sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
+	sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+
+	rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
+	if (rc) {
+		cERROR(1, "could not encrypt session key rc: %d\n", rc);
+		crypto_free_blkcipher(tfm_arc4);
+		return rc;
+	}
+
+	/* make secondary_key/nonce as session key */
+	memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE);
+	/* and make len as that of session key only */
+	ses->auth_key.len = CIFS_SESS_KEY_SIZE;
+
+	crypto_free_blkcipher(tfm_arc4);
+
+	return 0;
+}
+
+void
+cifs_crypto_shash_release(struct TCP_Server_Info *server)
+{
+	if (server->secmech.md5)
+		crypto_free_shash(server->secmech.md5);
+
+	if (server->secmech.hmacmd5)
+		crypto_free_shash(server->secmech.hmacmd5);
+
+	kfree(server->secmech.sdeschmacmd5);
+
+	kfree(server->secmech.sdescmd5);
+}
+
+int
+cifs_crypto_shash_allocate(struct TCP_Server_Info *server)
+{
+	int rc;
+	unsigned int size;
+
+	server->secmech.hmacmd5 = crypto_alloc_shash("hmac(md5)", 0, 0);
+	if (!server->secmech.hmacmd5 ||
+			IS_ERR(server->secmech.hmacmd5)) {
+		cERROR(1, "could not allocate crypto hmacmd5\n");
+		return PTR_ERR(server->secmech.hmacmd5);
+	}
+
+	server->secmech.md5 = crypto_alloc_shash("md5", 0, 0);
+	if (!server->secmech.md5 || IS_ERR(server->secmech.md5)) {
+		cERROR(1, "could not allocate crypto md5\n");
+		rc = PTR_ERR(server->secmech.md5);
+		goto crypto_allocate_md5_fail;
+	}
+
+	size = sizeof(struct shash_desc) +
+			crypto_shash_descsize(server->secmech.hmacmd5);
+	server->secmech.sdeschmacmd5 = kmalloc(size, GFP_KERNEL);
+	if (!server->secmech.sdeschmacmd5) {
+		cERROR(1, "cifs_crypto_shash_allocate: can't alloc hmacmd5\n");
+		rc = -ENOMEM;
+		goto crypto_allocate_hmacmd5_sdesc_fail;
+	}
+	server->secmech.sdeschmacmd5->shash.tfm = server->secmech.hmacmd5;
+	server->secmech.sdeschmacmd5->shash.flags = 0x0;
+
+
+	size = sizeof(struct shash_desc) +
+			crypto_shash_descsize(server->secmech.md5);
+	server->secmech.sdescmd5 = kmalloc(size, GFP_KERNEL);
+	if (!server->secmech.sdescmd5) {
+		cERROR(1, "cifs_crypto_shash_allocate: can't alloc md5\n");
+		rc = -ENOMEM;
+		goto crypto_allocate_md5_sdesc_fail;
+	}
+	server->secmech.sdescmd5->shash.tfm = server->secmech.md5;
+	server->secmech.sdescmd5->shash.flags = 0x0;
+
+	return 0;
+
+crypto_allocate_md5_sdesc_fail:
+	kfree(server->secmech.sdeschmacmd5);
+
+crypto_allocate_hmacmd5_sdesc_fail:
+	crypto_free_shash(server->secmech.md5);
+
+crypto_allocate_md5_fail:
+	crypto_free_shash(server->secmech.hmacmd5);
+
+	return rc;
 }
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 34371637f21..3936aa7f2c2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -116,7 +116,7 @@ cifs_read_super(struct super_block *sb, void *data,
 		return -ENOMEM;
 
 	spin_lock_init(&cifs_sb->tlink_tree_lock);
-	INIT_RADIX_TREE(&cifs_sb->tlink_tree, GFP_KERNEL);
+	cifs_sb->tlink_tree = RB_ROOT;
 
 	rc = bdi_setup_and_register(&cifs_sb->bdi, "cifs", BDI_CAP_MAP_COPY);
 	if (rc) {
@@ -318,12 +318,10 @@ cifs_alloc_inode(struct super_block *sb)
 		return NULL;
 	cifs_inode->cifsAttrs = 0x20;	/* default */
 	cifs_inode->time = 0;
-	cifs_inode->write_behind_rc = 0;
 	/* Until the file is open and we have gotten oplock
 	info back from the server, can not assume caching of
 	file data or metadata */
-	cifs_inode->clientCanCacheRead = false;
-	cifs_inode->clientCanCacheAll = false;
+	cifs_set_oplock_level(cifs_inode, 0);
 	cifs_inode->delete_pending = false;
 	cifs_inode->invalid_mapping = false;
 	cifs_inode->vfs_inode.i_blkbits = 14;  /* 2**14 = CIFS_MAX_MSGSIZE */
@@ -460,9 +458,13 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 		seq_printf(s, ",acl");
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS)
 		seq_printf(s, ",mfsymlinks");
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE)
+		seq_printf(s, ",fsc");
 
 	seq_printf(s, ",rsize=%d", cifs_sb->rsize);
 	seq_printf(s, ",wsize=%d", cifs_sb->wsize);
+	/* convert actimeo and display it in seconds */
+		seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
 
 	return 0;
 }
@@ -545,9 +547,9 @@ static const struct super_operations cifs_super_ops = {
 #endif
 };
 
-static int
-cifs_get_sb(struct file_system_type *fs_type,
-	    int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+cifs_do_mount(struct file_system_type *fs_type,
+	    int flags, const char *dev_name, void *data)
 {
 	int rc;
 	struct super_block *sb;
@@ -557,18 +559,17 @@ cifs_get_sb(struct file_system_type *fs_type,
 	cFYI(1, "Devname: %s flags: %d ", dev_name, flags);
 
 	if (IS_ERR(sb))
-		return PTR_ERR(sb);
+		return ERR_CAST(sb);
 
 	sb->s_flags = flags;
 
 	rc = cifs_read_super(sb, data, dev_name, flags & MS_SILENT ? 1 : 0);
 	if (rc) {
 		deactivate_locked_super(sb);
-		return rc;
+		return ERR_PTR(rc);
 	}
 	sb->s_flags |= MS_ACTIVE;
-	simple_set_mnt(mnt, sb);
-	return 0;
+	return dget(sb->s_root);
 }
 
 static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -634,7 +635,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 struct file_system_type cifs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "cifs",
-	.get_sb = cifs_get_sb,
+	.mount = cifs_do_mount,
 	.kill_sb = kill_anon_super,
 	/*  .fs_flags */
 };
@@ -936,7 +937,6 @@ init_cifs(void)
 	GlobalCurrentXid = 0;
 	GlobalTotalActiveXid = 0;
 	GlobalMaxActiveXid = 0;
-	memset(Local_System_Name, 0, 15);
 	spin_lock_init(&cifs_tcp_ses_lock);
 	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index f35795a16b4..897b2b2b28b 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -112,5 +112,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern const struct export_operations cifs_export_ops;
 #endif /* EXPERIMENTAL */
 
-#define CIFS_VERSION   "1.67"
+#define CIFS_VERSION   "1.68"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3365e77f6f2..7136c0c3e2f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -25,6 +25,9 @@
 #include <linux/workqueue.h>
 #include "cifs_fs_sb.h"
 #include "cifsacl.h"
+#include <crypto/internal/hash.h>
+#include <linux/scatterlist.h>
+
 /*
  * The sizes of various internal tables and strings
  */
@@ -42,6 +45,16 @@
 #define CIFS_MIN_RCV_POOL 4
 
 /*
+ * default attribute cache timeout (jiffies)
+ */
+#define CIFS_DEF_ACTIMEO (1 * HZ)
+
+/*
+ * max attribute cache timeout (jiffies) - 2^30
+ */
+#define CIFS_MAX_ACTIMEO (1 << 30)
+
+/*
  * MAX_REQ is the maximum number of requests that WE will send
  * on one socket concurrently. It also matches the most common
  * value of max multiplex returned by servers.  We may
@@ -74,7 +87,7 @@
  * CIFS vfs client Status information (based on what we know.)
  */
 
- /* associated with each tcp and smb session */
+/* associated with each tcp and smb session */
 enum statusEnum {
 	CifsNew = 0,
 	CifsGood,
@@ -99,14 +112,29 @@ enum protocolEnum {
 
 struct session_key {
 	unsigned int len;
-	union {
-		char ntlm[CIFS_SESS_KEY_SIZE + 16];
-		char krb5[CIFS_SESS_KEY_SIZE + 16]; /* BB: length correct? */
-		struct {
-			char key[16];
-			struct ntlmv2_resp resp;
-		} ntlmv2;
-	} data;
+	char *response;
+};
+
+/* crypto security descriptor definition */
+struct sdesc {
+	struct shash_desc shash;
+	char ctx[];
+};
+
+/* crypto hashing related structure/fields, not specific to a sec mech */
+struct cifs_secmech {
+	struct crypto_shash *hmacmd5; /* hmac-md5 hash function */
+	struct crypto_shash *md5; /* md5 hash function */
+	struct sdesc *sdeschmacmd5;  /* ctxt to generate ntlmv2 hash, CR1 */
+	struct sdesc *sdescmd5; /* ctxt to generate cifs/smb signature */
+};
+
+/* per smb session structure/fields */
+struct ntlmssp_auth {
+	__u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
+	__u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
+	unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
+	char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlmssp */
 };
 
 struct cifs_cred {
@@ -179,12 +207,14 @@ struct TCP_Server_Info {
 	int capabilities; /* allow selective disabling of caps by smb sess */
 	int timeAdj;  /* Adjust for difference in server time zone in sec */
 	__u16 CurrentMid;         /* multiplex id - rotating counter */
+	char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
 	/* 16th byte of RFC1001 workstation name is always null */
 	char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
 	__u32 sequence_number; /* needed for CIFS PDU signature */
 	struct session_key session_key;
 	unsigned long lstrp; /* when we got last response from this server */
 	u16 dialect; /* dialect index that server chose */
+	struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
 	/* extended security flavors that server supports */
 	bool	sec_kerberos;		/* supports plain Kerberos */
 	bool	sec_mskerberos;		/* supports legacy MS Kerberos */
@@ -222,11 +252,8 @@ struct cifsSesInfo {
 	char userName[MAX_USERNAME_SIZE + 1];
 	char *domainName;
 	char *password;
-	char cryptKey[CIFS_CRYPTO_KEY_SIZE];
 	struct session_key auth_key;
-	char ntlmv2_hash[16];
-	unsigned int tilen; /* length of the target info blob */
-	unsigned char *tiblob; /* target info blob in challenge response */
+	struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */
 	bool need_reconnect:1; /* connection reset, uid now invalid */
 };
 /* no more than one of the following three session flags may be set */
@@ -319,7 +346,8 @@ struct cifsTconInfo {
  * "get" on the container.
  */
 struct tcon_link {
-	unsigned long		tl_index;
+	struct rb_node		tl_rbnode;
+	uid_t			tl_uid;
 	unsigned long		tl_flags;
 #define TCON_LINK_MASTER	0
 #define TCON_LINK_PENDING	1
@@ -395,16 +423,19 @@ struct cifsFileInfo {
 	struct list_head llist; /* list of byte range locks we have. */
 	bool invalidHandle:1;	/* file closed via session abend */
 	bool oplock_break_cancelled:1;
-	atomic_t count;		/* reference count */
+	int count;		/* refcount protected by cifs_file_list_lock */
 	struct mutex fh_mutex; /* prevents reopen race after dead ses*/
 	struct cifs_search_info srch_inf;
 	struct work_struct oplock_break; /* work for oplock breaks */
 };
 
-/* Take a reference on the file private data */
+/*
+ * Take a reference on the file private data. Must be called with
+ * cifs_file_list_lock held.
+ */
 static inline void cifsFileInfo_get(struct cifsFileInfo *cifs_file)
 {
-	atomic_inc(&cifs_file->count);
+	++cifs_file->count;
 }
 
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
@@ -417,7 +448,6 @@ struct cifsInodeInfo {
 	struct list_head lockList;
 	/* BB add in lists for dirty pages i.e. write caching info for oplock */
 	struct list_head openFileList;
-	int write_behind_rc;
 	__u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
 	unsigned long time;	/* jiffies of last update/check of inode */
 	bool clientCanCacheRead:1;	/* read oplock */
@@ -668,7 +698,7 @@ require use of the stronger protocol */
  *  GlobalMid_Lock protects:
  *	list operations on pending_mid_q and oplockQ
  *      updates to XID counters, multiplex id  and SMB sequence numbers
- *  GlobalSMBSesLock protects:
+ *  cifs_file_list_lock protects:
  *	list operations on tcp and SMB session lists and tCon lists
  *  f_owner.lock protects certain per file struct operations
  *  mapping->page_lock protects certain per page operations
@@ -726,8 +756,6 @@ GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
 GLOBAL_EXTERN unsigned int GlobalMaxActiveXid;	/* prot by GlobalMid_Sem */
 GLOBAL_EXTERN spinlock_t GlobalMid_Lock;  /* protects above & list operations */
 					  /* on midQ entries */
-GLOBAL_EXTERN char Local_System_Name[15];
-
 /*
  *  Global counters, updated atomically
  */
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index b0f4b5656d4..de36b09763a 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -131,9 +131,20 @@
 #define CIFS_CRYPTO_KEY_SIZE (8)
 
 /*
+ * Size of the ntlm client response
+ */
+#define CIFS_AUTH_RESP_SIZE (24)
+
+/*
  * Size of the session key (crypto key encrypted with the password
  */
-#define CIFS_SESS_KEY_SIZE (24)
+#define CIFS_SESS_KEY_SIZE (16)
+
+#define CIFS_CLIENT_CHALLENGE_SIZE (8)
+#define CIFS_SERVER_CHALLENGE_SIZE (8)
+#define CIFS_HMAC_MD5_HASH_SIZE (16)
+#define CIFS_CPHTXT_SIZE (16)
+#define CIFS_NTHASH_SIZE (16)
 
 /*
  * Maximum user name length
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index e593c40ba7b..e6d1481b16c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -54,7 +54,8 @@ do {								\
 	     __func__, curr_xid, (int)rc);			\
 } while (0)
 extern char *build_path_from_dentry(struct dentry *);
-extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
+extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
+					struct cifsTconInfo *tcon);
 extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
 extern char *cifs_compose_mount_options(const char *sb_mountdata,
 		const char *fullpath, const struct dfs_info3_param *ref,
@@ -79,9 +80,7 @@ extern bool is_valid_oplock_break(struct smb_hdr *smb,
 				  struct TCP_Server_Info *);
 extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
-#endif
 extern unsigned int smbCalcSize(struct smb_hdr *ptr);
 extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
@@ -104,6 +103,7 @@ extern struct timespec cifs_NTtimeToUnix(__le64 utc_nanoseconds_since_1601);
 extern u64 cifs_UnixTimeToNT(struct timespec);
 extern struct timespec cnvrtDosUnixTm(__le16 le_date, __le16 le_time,
 				      int offset);
+extern void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 
 extern struct cifsFileInfo *cifs_new_fileinfo(__u16 fileHandle,
 				struct file *file, struct tcon_link *tlink,
@@ -129,10 +129,12 @@ extern int cifs_get_file_info_unix(struct file *filp);
 extern int cifs_get_inode_info_unix(struct inode **pinode,
 			const unsigned char *search_path,
 			struct super_block *sb, int xid);
-extern void cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
+extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
 			      struct cifs_fattr *fattr, struct inode *inode,
 			      const char *path, const __u16 *pfid);
-extern int mode_to_acl(struct inode *inode, const char *path, __u64);
+extern int mode_to_cifs_acl(struct inode *inode, const char *path, __u64);
+extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
+					const char *, u32 *);
 
 extern int cifs_mount(struct super_block *, struct cifs_sb_info *, char *,
 			const char *);
@@ -362,13 +364,15 @@ extern int cifs_sign_smb(struct smb_hdr *, struct TCP_Server_Info *, __u32 *);
 extern int cifs_sign_smb2(struct kvec *iov, int n_vec, struct TCP_Server_Info *,
 			  __u32 *);
 extern int cifs_verify_signature(struct smb_hdr *,
-				 const struct session_key *session_key,
+				 struct TCP_Server_Info *server,
 				__u32 expected_sequence_number);
-extern int cifs_calculate_session_key(struct session_key *key, const char *rn,
-				 const char *pass);
-extern void CalcNTLMv2_response(const struct cifsSesInfo *, char *);
-extern int setup_ntlmv2_rsp(struct cifsSesInfo *, char *,
-			     const struct nls_table *);
+extern void SMBNTencrypt(unsigned char *, unsigned char *, unsigned char *);
+extern int setup_ntlm_response(struct cifsSesInfo *);
+extern int setup_ntlmv2_rsp(struct cifsSesInfo *, const struct nls_table *);
+extern int cifs_crypto_shash_allocate(struct TCP_Server_Info *);
+extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
+extern int calc_seckey(struct cifsSesInfo *);
+
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern void calc_lanman_hash(const char *password, const char *cryptkey,
 				bool encrypt, char *lnm_session_key);
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index e98f1f317b1..67acfb3acad 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -503,7 +503,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 
 		if (rsp->EncryptionKeyLength ==
 				cpu_to_le16(CIFS_CRYPTO_KEY_SIZE)) {
-			memcpy(ses->cryptKey, rsp->EncryptionKey,
+			memcpy(ses->server->cryptkey, rsp->EncryptionKey,
 				CIFS_CRYPTO_KEY_SIZE);
 		} else if (server->secMode & SECMODE_PW_ENCRYPT) {
 			rc = -EIO; /* need cryptkey unless plain text */
@@ -574,7 +574,7 @@ CIFSSMBNegotiate(unsigned int xid, struct cifsSesInfo *ses)
 	server->timeAdj = (int)(__s16)le16_to_cpu(pSMBr->ServerTimeZone);
 	server->timeAdj *= 60;
 	if (pSMBr->EncryptionKeyLength == CIFS_CRYPTO_KEY_SIZE) {
-		memcpy(ses->cryptKey, pSMBr->u.EncryptionKey,
+		memcpy(ses->server->cryptkey, pSMBr->u.EncryptionKey,
 		       CIFS_CRYPTO_KEY_SIZE);
 	} else if ((pSMBr->hdr.Flags2 & SMBFLG2_EXT_SEC)
 			&& (pSMBr->EncryptionKeyLength == 0)) {
@@ -2478,95 +2478,6 @@ querySymLinkRetry:
 }
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-/* Initialize NT TRANSACT SMB into small smb request buffer.
-   This assumes that all NT TRANSACTS that we init here have
-   total parm and data under about 400 bytes (to fit in small cifs
-   buffer size), which is the case so far, it easily fits. NB:
-	Setup words themselves and ByteCount
-	MaxSetupCount (size of returned setup area) and
-	MaxParameterCount (returned parms size) must be set by caller */
-static int
-smb_init_nttransact(const __u16 sub_command, const int setup_count,
-		   const int parm_len, struct cifsTconInfo *tcon,
-		   void **ret_buf)
-{
-	int rc;
-	__u32 temp_offset;
-	struct smb_com_ntransact_req *pSMB;
-
-	rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
-				(void **)&pSMB);
-	if (rc)
-		return rc;
-	*ret_buf = (void *)pSMB;
-	pSMB->Reserved = 0;
-	pSMB->TotalParameterCount = cpu_to_le32(parm_len);
-	pSMB->TotalDataCount  = 0;
-	pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
-					  MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
-	pSMB->ParameterCount = pSMB->TotalParameterCount;
-	pSMB->DataCount  = pSMB->TotalDataCount;
-	temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
-			(setup_count * 2) - 4 /* for rfc1001 length itself */;
-	pSMB->ParameterOffset = cpu_to_le32(temp_offset);
-	pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
-	pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
-	pSMB->SubCommand = cpu_to_le16(sub_command);
-	return 0;
-}
-
-static int
-validate_ntransact(char *buf, char **ppparm, char **ppdata,
-		   __u32 *pparmlen, __u32 *pdatalen)
-{
-	char *end_of_smb;
-	__u32 data_count, data_offset, parm_count, parm_offset;
-	struct smb_com_ntransact_rsp *pSMBr;
-
-	*pdatalen = 0;
-	*pparmlen = 0;
-
-	if (buf == NULL)
-		return -EINVAL;
-
-	pSMBr = (struct smb_com_ntransact_rsp *)buf;
-
-	/* ByteCount was converted from little endian in SendReceive */
-	end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
-			(char *)&pSMBr->ByteCount;
-
-	data_offset = le32_to_cpu(pSMBr->DataOffset);
-	data_count = le32_to_cpu(pSMBr->DataCount);
-	parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
-	parm_count = le32_to_cpu(pSMBr->ParameterCount);
-
-	*ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
-	*ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
-
-	/* should we also check that parm and data areas do not overlap? */
-	if (*ppparm > end_of_smb) {
-		cFYI(1, "parms start after end of smb");
-		return -EINVAL;
-	} else if (parm_count + *ppparm > end_of_smb) {
-		cFYI(1, "parm end after end of smb");
-		return -EINVAL;
-	} else if (*ppdata > end_of_smb) {
-		cFYI(1, "data starts after end of smb");
-		return -EINVAL;
-	} else if (data_count + *ppdata > end_of_smb) {
-		cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
-			*ppdata, data_count, (data_count + *ppdata),
-			end_of_smb, pSMBr);
-		return -EINVAL;
-	} else if (parm_count + data_count > pSMBr->ByteCount) {
-		cFYI(1, "parm count and data count larger than SMB");
-		return -EINVAL;
-	}
-	*pdatalen = data_count;
-	*pparmlen = parm_count;
-	return 0;
-}
-
 int
 CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName,
@@ -3056,7 +2967,97 @@ GetExtAttrOut:
 
 #endif /* CONFIG_POSIX */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_ACL
+/*
+ * Initialize NT TRANSACT SMB into small smb request buffer.  This assumes that
+ * all NT TRANSACTS that we init here have total parm and data under about 400
+ * bytes (to fit in small cifs buffer size), which is the case so far, it
+ * easily fits. NB: Setup words themselves and ByteCount MaxSetupCount (size of
+ * returned setup area) and MaxParameterCount (returned parms size) must be set
+ * by caller
+ */
+static int
+smb_init_nttransact(const __u16 sub_command, const int setup_count,
+		   const int parm_len, struct cifsTconInfo *tcon,
+		   void **ret_buf)
+{
+	int rc;
+	__u32 temp_offset;
+	struct smb_com_ntransact_req *pSMB;
+
+	rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
+				(void **)&pSMB);
+	if (rc)
+		return rc;
+	*ret_buf = (void *)pSMB;
+	pSMB->Reserved = 0;
+	pSMB->TotalParameterCount = cpu_to_le32(parm_len);
+	pSMB->TotalDataCount  = 0;
+	pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+					  MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
+	pSMB->ParameterCount = pSMB->TotalParameterCount;
+	pSMB->DataCount  = pSMB->TotalDataCount;
+	temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
+			(setup_count * 2) - 4 /* for rfc1001 length itself */;
+	pSMB->ParameterOffset = cpu_to_le32(temp_offset);
+	pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
+	pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
+	pSMB->SubCommand = cpu_to_le16(sub_command);
+	return 0;
+}
+
+static int
+validate_ntransact(char *buf, char **ppparm, char **ppdata,
+		   __u32 *pparmlen, __u32 *pdatalen)
+{
+	char *end_of_smb;
+	__u32 data_count, data_offset, parm_count, parm_offset;
+	struct smb_com_ntransact_rsp *pSMBr;
+
+	*pdatalen = 0;
+	*pparmlen = 0;
+
+	if (buf == NULL)
+		return -EINVAL;
+
+	pSMBr = (struct smb_com_ntransact_rsp *)buf;
+
+	/* ByteCount was converted from little endian in SendReceive */
+	end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
+			(char *)&pSMBr->ByteCount;
+
+	data_offset = le32_to_cpu(pSMBr->DataOffset);
+	data_count = le32_to_cpu(pSMBr->DataCount);
+	parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
+	parm_count = le32_to_cpu(pSMBr->ParameterCount);
+
+	*ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
+	*ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
+
+	/* should we also check that parm and data areas do not overlap? */
+	if (*ppparm > end_of_smb) {
+		cFYI(1, "parms start after end of smb");
+		return -EINVAL;
+	} else if (parm_count + *ppparm > end_of_smb) {
+		cFYI(1, "parm end after end of smb");
+		return -EINVAL;
+	} else if (*ppdata > end_of_smb) {
+		cFYI(1, "data starts after end of smb");
+		return -EINVAL;
+	} else if (data_count + *ppdata > end_of_smb) {
+		cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
+			*ppdata, data_count, (data_count + *ppdata),
+			end_of_smb, pSMBr);
+		return -EINVAL;
+	} else if (parm_count + data_count > pSMBr->ByteCount) {
+		cFYI(1, "parm count and data count larger than SMB");
+		return -EINVAL;
+	}
+	*pdatalen = data_count;
+	*pparmlen = parm_count;
+	return 0;
+}
+
 /* Get Security Descriptor (by handle) from remote server for a file or dir */
 int
 CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
@@ -3214,7 +3215,7 @@ setCifsAclRetry:
 	return (rc);
 }
 
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
+#endif /* CONFIG_CIFS_ACL */
 
 /* Legacy Query Path Information call for lookup to old servers such
    as Win9x/WinME */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 7e73176acb5..cc1a8604a79 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -105,6 +105,7 @@ struct smb_vol {
 	unsigned int wsize;
 	bool sockopt_tcp_nodelay:1;
 	unsigned short int port;
+	unsigned long actimeo; /* attribute cache timeout (jiffies) */
 	char *prepath;
 	struct sockaddr_storage srcaddr; /* allow binding to a local IP */
 	struct nls_table *local_nls;
@@ -116,6 +117,7 @@ struct smb_vol {
 
 static int ipv4_connect(struct TCP_Server_Info *server);
 static int ipv6_connect(struct TCP_Server_Info *server);
+static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
 static void cifs_prune_tlinks(struct work_struct *work);
 
 /*
@@ -175,6 +177,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
 	}
 	server->sequence_number = 0;
 	server->session_estab = false;
+	kfree(server->session_key.response);
+	server->session_key.response = NULL;
+	server->session_key.len = 0;
 
 	spin_lock(&GlobalMid_Lock);
 	list_for_each(tmp, &server->pending_mid_q) {
@@ -802,23 +807,20 @@ cifs_parse_mount_options(char *options, const char *devname,
 	short int override_gid = -1;
 	bool uid_specified = false;
 	bool gid_specified = false;
+	char *nodename = utsname()->nodename;
 
 	separator[0] = ',';
 	separator[1] = 0;
 
-	if (Local_System_Name[0] != 0)
-		memcpy(vol->source_rfc1001_name, Local_System_Name, 15);
-	else {
-		char *nodename = utsname()->nodename;
-		int n = strnlen(nodename, 15);
-		memset(vol->source_rfc1001_name, 0x20, 15);
-		for (i = 0; i < n; i++) {
-			/* does not have to be perfect mapping since field is
-			informational, only used for servers that do not support
-			port 445 and it can be overridden at mount time */
-			vol->source_rfc1001_name[i] = toupper(nodename[i]);
-		}
-	}
+	/*
+	 * does not have to be perfect mapping since field is
+	 * informational, only used for servers that do not support
+	 * port 445 and it can be overridden at mount time
+	 */
+	memset(vol->source_rfc1001_name, 0x20, 15);
+	for (i = 0; i < strnlen(nodename, 15); i++)
+		vol->source_rfc1001_name[i] = toupper(nodename[i]);
+
 	vol->source_rfc1001_name[15] = 0;
 	/* null target name indicates to use *SMBSERVR default called name
 	   if we end up sending RFC1001 session initialize */
@@ -836,6 +838,8 @@ cifs_parse_mount_options(char *options, const char *devname,
 	/* default to using server inode numbers where available */
 	vol->server_ino = 1;
 
+	vol->actimeo = CIFS_DEF_ACTIMEO;
+
 	if (!options)
 		return 1;
 
@@ -1064,7 +1068,7 @@ cifs_parse_mount_options(char *options, const char *devname,
 			}
 			i = cifs_convert_address((struct sockaddr *)&vol->srcaddr,
 						 value, strlen(value));
-			if (i < 0) {
+			if (i == 0) {
 				printk(KERN_WARNING "CIFS:  Could not parse"
 				       " srcaddr: %s\n",
 				       value);
@@ -1210,6 +1214,16 @@ cifs_parse_mount_options(char *options, const char *devname,
 					printk(KERN_WARNING "CIFS: server net"
 					"biosname longer than 15 truncated.\n");
 			}
+		} else if (strnicmp(data, "actimeo", 7) == 0) {
+			if (value && *value) {
+				vol->actimeo = HZ * simple_strtoul(value,
+								   &value, 0);
+				if (vol->actimeo > CIFS_MAX_ACTIMEO) {
+					cERROR(1, "CIFS: attribute cache"
+							"timeout too large");
+					return 1;
+				}
+			}
 		} else if (strnicmp(data, "credentials", 4) == 0) {
 			/* ignore */
 		} else if (strnicmp(data, "version", 3) == 0) {
@@ -1348,6 +1362,11 @@ cifs_parse_mount_options(char *options, const char *devname,
 				"supported. Instead set "
 				"/proc/fs/cifs/LookupCacheEnabled to 0\n");
 		} else if (strnicmp(data, "fsc", 3) == 0) {
+#ifndef CONFIG_CIFS_FSCACHE
+			cERROR(1, "FS-Cache support needs CONFIG_CIFS_FSCACHE"
+				  "kernel config option set");
+			return 1;
+#endif
 			vol->fsc = true;
 		} else if (strnicmp(data, "mfsymlinks", 10) == 0) {
 			vol->mfsymlinks = true;
@@ -1560,8 +1579,13 @@ cifs_put_tcp_session(struct TCP_Server_Info *server)
 	server->tcpStatus = CifsExiting;
 	spin_unlock(&GlobalMid_Lock);
 
+	cifs_crypto_shash_release(server);
 	cifs_fscache_release_client_cookie(server);
 
+	kfree(server->session_key.response);
+	server->session_key.response = NULL;
+	server->session_key.len = 0;
+
 	task = xchg(&server->tsk, NULL);
 	if (task)
 		force_sig(SIGKILL, task);
@@ -1614,10 +1638,16 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		goto out_err;
 	}
 
+	rc = cifs_crypto_shash_allocate(tcp_ses);
+	if (rc) {
+		cERROR(1, "could not setup hash structures rc %d", rc);
+		goto out_err;
+	}
+
 	tcp_ses->hostname = extract_hostname(volume_info->UNC);
 	if (IS_ERR(tcp_ses->hostname)) {
 		rc = PTR_ERR(tcp_ses->hostname);
-		goto out_err;
+		goto out_err_crypto_release;
 	}
 
 	tcp_ses->noblocksnd = volume_info->noblocksnd;
@@ -1661,7 +1691,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 	}
 	if (rc < 0) {
 		cERROR(1, "Error connecting to socket. Aborting operation");
-		goto out_err;
+		goto out_err_crypto_release;
 	}
 
 	/*
@@ -1675,7 +1705,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		rc = PTR_ERR(tcp_ses->tsk);
 		cERROR(1, "error %d create cifsd thread", rc);
 		module_put(THIS_MODULE);
-		goto out_err;
+		goto out_err_crypto_release;
 	}
 
 	/* thread spawned, put it on the list */
@@ -1687,6 +1717,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 
 	return tcp_ses;
 
+out_err_crypto_release:
+	cifs_crypto_shash_release(tcp_ses);
+
 out_err:
 	if (tcp_ses) {
 		if (!IS_ERR(tcp_ses->hostname))
@@ -1801,8 +1834,6 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	if (ses == NULL)
 		goto get_ses_fail;
 
-	ses->tilen = 0;
-	ses->tiblob = NULL;
 	/* new SMB session uses our server ref */
 	ses->server = server;
 	if (server->addr.sockAddr6.sin6_family == AF_INET6)
@@ -1823,10 +1854,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 			goto get_ses_fail;
 	}
 	if (volume_info->domainname) {
-		int len = strlen(volume_info->domainname);
-		ses->domainName = kmalloc(len + 1, GFP_KERNEL);
-		if (ses->domainName)
-			strcpy(ses->domainName, volume_info->domainname);
+		ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
+		if (!ses->domainName)
+			goto get_ses_fail;
 	}
 	ses->cred_uid = volume_info->cred_uid;
 	ses->linux_uid = volume_info->linux_uid;
@@ -2551,6 +2581,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
 	cFYI(1, "file mode: 0x%x  dir mode: 0x%x",
 		cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
 
+	cifs_sb->actimeo = pvolume_info->actimeo;
+
 	if (pvolume_info->noperm)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
 	if (pvolume_info->setuids)
@@ -2801,13 +2833,13 @@ remote_path_check:
 	/* check if a whole path (including prepath) is not remote */
 	if (!rc && cifs_sb->prepathlen && tcon) {
 		/* build_path_to_root works only when we have a valid tcon */
-		full_path = cifs_build_path_to_root(cifs_sb);
+		full_path = cifs_build_path_to_root(cifs_sb, tcon);
 		if (full_path == NULL) {
 			rc = -ENOMEM;
 			goto mount_fail_check;
 		}
 		rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
-		if (rc != -EREMOTE) {
+		if (rc != 0 && rc != -EREMOTE) {
 			kfree(full_path);
 			goto mount_fail_check;
 		}
@@ -2886,24 +2918,16 @@ remote_path_check:
 		goto mount_fail_check;
 	}
 
-	tlink->tl_index = pSesInfo->linux_uid;
+	tlink->tl_uid = pSesInfo->linux_uid;
 	tlink->tl_tcon = tcon;
 	tlink->tl_time = jiffies;
 	set_bit(TCON_LINK_MASTER, &tlink->tl_flags);
 	set_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
 
-	rc = radix_tree_preload(GFP_KERNEL);
-	if (rc == -ENOMEM) {
-		kfree(tlink);
-		goto mount_fail_check;
-	}
-
+	cifs_sb->master_tlink = tlink;
 	spin_lock(&cifs_sb->tlink_tree_lock);
-	radix_tree_insert(&cifs_sb->tlink_tree, pSesInfo->linux_uid, tlink);
-	radix_tree_tag_set(&cifs_sb->tlink_tree, pSesInfo->linux_uid,
-			   CIFS_TLINK_MASTER_TAG);
+	tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
 	spin_unlock(&cifs_sb->tlink_tree_lock);
-	radix_tree_preload_end();
 
 	queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
 				TLINK_IDLE_EXPIRE);
@@ -2985,13 +3009,13 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 		if ((global_secflags & CIFSSEC_MAY_LANMAN) &&
 		    (ses->server->secType == LANMAN))
-			calc_lanman_hash(tcon->password, ses->cryptKey,
+			calc_lanman_hash(tcon->password, ses->server->cryptkey,
 					 ses->server->secMode &
 					    SECMODE_PW_ENCRYPT ? true : false,
 					 bcc_ptr);
 		else
 #endif /* CIFS_WEAK_PW_HASH */
-		SMBNTencrypt(tcon->password, ses->cryptKey, bcc_ptr);
+		SMBNTencrypt(tcon->password, ses->server->cryptkey, bcc_ptr);
 
 		bcc_ptr += CIFS_SESS_KEY_SIZE;
 		if (ses->capabilities & CAP_UNICODE) {
@@ -3093,32 +3117,25 @@ CIFSTCon(unsigned int xid, struct cifsSesInfo *ses,
 int
 cifs_umount(struct super_block *sb, struct cifs_sb_info *cifs_sb)
 {
-	int i, ret;
+	struct rb_root *root = &cifs_sb->tlink_tree;
+	struct rb_node *node;
+	struct tcon_link *tlink;
 	char *tmp;
-	struct tcon_link *tlink[8];
-	unsigned long index = 0;
 
 	cancel_delayed_work_sync(&cifs_sb->prune_tlinks);
 
-	do {
-		spin_lock(&cifs_sb->tlink_tree_lock);
-		ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
-					     (void **)tlink, index,
-					     ARRAY_SIZE(tlink));
-		/* increment index for next pass */
-		if (ret > 0)
-			index = tlink[ret - 1]->tl_index + 1;
-		for (i = 0; i < ret; i++) {
-			cifs_get_tlink(tlink[i]);
-			clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
-			radix_tree_delete(&cifs_sb->tlink_tree,
-							tlink[i]->tl_index);
-		}
-		spin_unlock(&cifs_sb->tlink_tree_lock);
+	spin_lock(&cifs_sb->tlink_tree_lock);
+	while ((node = rb_first(root))) {
+		tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+		cifs_get_tlink(tlink);
+		clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+		rb_erase(node, root);
 
-		for (i = 0; i < ret; i++)
-			cifs_put_tlink(tlink[i]);
-	} while (ret != 0);
+		spin_unlock(&cifs_sb->tlink_tree_lock);
+		cifs_put_tlink(tlink);
+		spin_lock(&cifs_sb->tlink_tree_lock);
+	}
+	spin_unlock(&cifs_sb->tlink_tree_lock);
 
 	tmp = cifs_sb->prepath;
 	cifs_sb->prepathlen = 0;
@@ -3178,10 +3195,11 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
 	} else {
 		mutex_lock(&ses->server->srv_mutex);
 		if (!server->session_estab) {
-			memcpy(&server->session_key.data,
-				&ses->auth_key.data, ses->auth_key.len);
+			server->session_key.response = ses->auth_key.response;
 			server->session_key.len = ses->auth_key.len;
-			ses->server->session_estab = true;
+			server->sequence_number = 0x2;
+			server->session_estab = true;
+			ses->auth_key.response = NULL;
 		}
 		mutex_unlock(&server->srv_mutex);
 
@@ -3192,6 +3210,12 @@ int cifs_setup_session(unsigned int xid, struct cifsSesInfo *ses,
 		spin_unlock(&GlobalMid_Lock);
 	}
 
+	kfree(ses->auth_key.response);
+	ses->auth_key.response = NULL;
+	ses->auth_key.len = 0;
+	kfree(ses->ntlmssp);
+	ses->ntlmssp = NULL;
+
 	return rc;
 }
 
@@ -3250,22 +3274,10 @@ out:
 	return tcon;
 }
 
-static struct tcon_link *
+static inline struct tcon_link *
 cifs_sb_master_tlink(struct cifs_sb_info *cifs_sb)
 {
-	struct tcon_link *tlink;
-	unsigned int ret;
-
-	spin_lock(&cifs_sb->tlink_tree_lock);
-	ret = radix_tree_gang_lookup_tag(&cifs_sb->tlink_tree, (void **)&tlink,
-					0, 1, CIFS_TLINK_MASTER_TAG);
-	spin_unlock(&cifs_sb->tlink_tree_lock);
-
-	/* the master tcon should always be present */
-	if (ret == 0)
-		BUG();
-
-	return tlink;
+	return cifs_sb->master_tlink;
 }
 
 struct cifsTconInfo *
@@ -3281,6 +3293,47 @@ cifs_sb_tcon_pending_wait(void *unused)
 	return signal_pending(current) ? -ERESTARTSYS : 0;
 }
 
+/* find and return a tlink with given uid */
+static struct tcon_link *
+tlink_rb_search(struct rb_root *root, uid_t uid)
+{
+	struct rb_node *node = root->rb_node;
+	struct tcon_link *tlink;
+
+	while (node) {
+		tlink = rb_entry(node, struct tcon_link, tl_rbnode);
+
+		if (tlink->tl_uid > uid)
+			node = node->rb_left;
+		else if (tlink->tl_uid < uid)
+			node = node->rb_right;
+		else
+			return tlink;
+	}
+	return NULL;
+}
+
+/* insert a tcon_link into the tree */
+static void
+tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct tcon_link *tlink;
+
+	while (*new) {
+		tlink = rb_entry(*new, struct tcon_link, tl_rbnode);
+		parent = *new;
+
+		if (tlink->tl_uid > new_tlink->tl_uid)
+			new = &((*new)->rb_left);
+		else
+			new = &((*new)->rb_right);
+	}
+
+	rb_link_node(&new_tlink->tl_rbnode, parent, new);
+	rb_insert_color(&new_tlink->tl_rbnode, root);
+}
+
 /*
  * Find or construct an appropriate tcon given a cifs_sb and the fsuid of the
  * current task.
@@ -3288,7 +3341,7 @@ cifs_sb_tcon_pending_wait(void *unused)
  * If the superblock doesn't refer to a multiuser mount, then just return
  * the master tcon for the mount.
  *
- * First, search the radix tree for an existing tcon for this fsuid. If one
+ * First, search the rbtree for an existing tcon for this fsuid. If one
  * exists, then check to see if it's pending construction. If it is then wait
  * for construction to complete. Once it's no longer pending, check to see if
  * it failed and either return an error or retry construction, depending on
@@ -3301,14 +3354,14 @@ struct tcon_link *
 cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
 {
 	int ret;
-	unsigned long fsuid = (unsigned long) current_fsuid();
+	uid_t fsuid = current_fsuid();
 	struct tcon_link *tlink, *newtlink;
 
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
 		return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb));
 
 	spin_lock(&cifs_sb->tlink_tree_lock);
-	tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+	tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
 	if (tlink)
 		cifs_get_tlink(tlink);
 	spin_unlock(&cifs_sb->tlink_tree_lock);
@@ -3317,36 +3370,24 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb)
 		newtlink = kzalloc(sizeof(*tlink), GFP_KERNEL);
 		if (newtlink == NULL)
 			return ERR_PTR(-ENOMEM);
-		newtlink->tl_index = fsuid;
+		newtlink->tl_uid = fsuid;
 		newtlink->tl_tcon = ERR_PTR(-EACCES);
 		set_bit(TCON_LINK_PENDING, &newtlink->tl_flags);
 		set_bit(TCON_LINK_IN_TREE, &newtlink->tl_flags);
 		cifs_get_tlink(newtlink);
 
-		ret = radix_tree_preload(GFP_KERNEL);
-		if (ret != 0) {
-			kfree(newtlink);
-			return ERR_PTR(ret);
-		}
-
 		spin_lock(&cifs_sb->tlink_tree_lock);
 		/* was one inserted after previous search? */
-		tlink = radix_tree_lookup(&cifs_sb->tlink_tree, fsuid);
+		tlink = tlink_rb_search(&cifs_sb->tlink_tree, fsuid);
 		if (tlink) {
 			cifs_get_tlink(tlink);
 			spin_unlock(&cifs_sb->tlink_tree_lock);
-			radix_tree_preload_end();
 			kfree(newtlink);
 			goto wait_for_construction;
 		}
-		ret = radix_tree_insert(&cifs_sb->tlink_tree, fsuid, newtlink);
-		spin_unlock(&cifs_sb->tlink_tree_lock);
-		radix_tree_preload_end();
-		if (ret) {
-			kfree(newtlink);
-			return ERR_PTR(ret);
-		}
 		tlink = newtlink;
+		tlink_rb_insert(&cifs_sb->tlink_tree, tlink);
+		spin_unlock(&cifs_sb->tlink_tree_lock);
 	} else {
 wait_for_construction:
 		ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING,
@@ -3392,39 +3433,39 @@ cifs_prune_tlinks(struct work_struct *work)
 {
 	struct cifs_sb_info *cifs_sb = container_of(work, struct cifs_sb_info,
 						    prune_tlinks.work);
-	struct tcon_link *tlink[8];
-	unsigned long now = jiffies;
-	unsigned long index = 0;
-	int i, ret;
+	struct rb_root *root = &cifs_sb->tlink_tree;
+	struct rb_node *node = rb_first(root);
+	struct rb_node *tmp;
+	struct tcon_link *tlink;
 
-	do {
-		spin_lock(&cifs_sb->tlink_tree_lock);
-		ret = radix_tree_gang_lookup(&cifs_sb->tlink_tree,
-					     (void **)tlink, index,
-					     ARRAY_SIZE(tlink));
-		/* increment index for next pass */
-		if (ret > 0)
-			index = tlink[ret - 1]->tl_index + 1;
-		for (i = 0; i < ret; i++) {
-			if (test_bit(TCON_LINK_MASTER, &tlink[i]->tl_flags) ||
-			    atomic_read(&tlink[i]->tl_count) != 0 ||
-			    time_after(tlink[i]->tl_time + TLINK_IDLE_EXPIRE,
-				       now)) {
-				tlink[i] = NULL;
-				continue;
-			}
-			cifs_get_tlink(tlink[i]);
-			clear_bit(TCON_LINK_IN_TREE, &tlink[i]->tl_flags);
-			radix_tree_delete(&cifs_sb->tlink_tree,
-					  tlink[i]->tl_index);
-		}
-		spin_unlock(&cifs_sb->tlink_tree_lock);
+	/*
+	 * Because we drop the spinlock in the loop in order to put the tlink
+	 * it's not guarded against removal of links from the tree. The only
+	 * places that remove entries from the tree are this function and
+	 * umounts. Because this function is non-reentrant and is canceled
+	 * before umount can proceed, this is safe.
+	 */
+	spin_lock(&cifs_sb->tlink_tree_lock);
+	node = rb_first(root);
+	while (node != NULL) {
+		tmp = node;
+		node = rb_next(tmp);
+		tlink = rb_entry(tmp, struct tcon_link, tl_rbnode);
+
+		if (test_bit(TCON_LINK_MASTER, &tlink->tl_flags) ||
+		    atomic_read(&tlink->tl_count) != 0 ||
+		    time_after(tlink->tl_time + TLINK_IDLE_EXPIRE, jiffies))
+			continue;
 
-		for (i = 0; i < ret; i++) {
-			if (tlink[i] != NULL)
-				cifs_put_tlink(tlink[i]);
-		}
-	} while (ret != 0);
+		cifs_get_tlink(tlink);
+		clear_bit(TCON_LINK_IN_TREE, &tlink->tl_flags);
+		rb_erase(tmp, root);
+
+		spin_unlock(&cifs_sb->tlink_tree_lock);
+		cifs_put_tlink(tlink);
+		spin_lock(&cifs_sb->tlink_tree_lock);
+	}
+	spin_unlock(&cifs_sb->tlink_tree_lock);
 
 	queue_delayed_work(system_nrt_wq, &cifs_sb->prune_tlinks,
 				TLINK_IDLE_EXPIRE);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index 0eb87026cad..548f06230a6 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -66,7 +66,7 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
 	/* Search for server name delimiter */
 	sep = memchr(hostname, '\\', len);
 	if (sep)
-		len = sep - unc;
+		len = sep - hostname;
 	else
 		cFYI(1, "%s: probably server name is whole unc: %s",
 		     __func__, unc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 8c81e7b14d5..5a28660ca2b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -131,8 +131,7 @@ static inline int cifs_open_inode_helper(struct inode *inode,
 			/* BB no need to lock inode until after invalidate
 			since namei code should already have it locked? */
 			rc = filemap_write_and_wait(inode->i_mapping);
-			if (rc != 0)
-				pCifsInode->write_behind_rc = rc;
+			mapping_set_error(inode->i_mapping, rc);
 		}
 		cFYI(1, "invalidating remote inode since open detected it "
 			 "changed");
@@ -147,12 +146,7 @@ client_can_cache:
 		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
 					 xid, NULL);
 
-	if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-		pCifsInode->clientCanCacheAll = true;
-		pCifsInode->clientCanCacheRead = true;
-		cFYI(1, "Exclusive Oplock granted on inode %p", inode);
-	} else if ((oplock & 0xF) == OPLOCK_READ)
-		pCifsInode->clientCanCacheRead = true;
+	cifs_set_oplock_level(pCifsInode, oplock);
 
 	return rc;
 }
@@ -232,6 +226,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 	if (pCifsFile == NULL)
 		return pCifsFile;
 
+	pCifsFile->count = 1;
 	pCifsFile->netfid = fileHandle;
 	pCifsFile->pid = current->tgid;
 	pCifsFile->uid = current_fsuid();
@@ -242,7 +237,6 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 	mutex_init(&pCifsFile->fh_mutex);
 	mutex_init(&pCifsFile->lock_mutex);
 	INIT_LIST_HEAD(&pCifsFile->llist);
-	atomic_set(&pCifsFile->count, 1);
 	INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
 
 	spin_lock(&cifs_file_list_lock);
@@ -254,12 +248,7 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 		list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
 	spin_unlock(&cifs_file_list_lock);
 
-	if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-		pCifsInode->clientCanCacheAll = true;
-		pCifsInode->clientCanCacheRead = true;
-		cFYI(1, "Exclusive Oplock inode %p", inode);
-	} else if ((oplock & 0xF) == OPLOCK_READ)
-		pCifsInode->clientCanCacheRead = true;
+	cifs_set_oplock_level(pCifsInode, oplock);
 
 	file->private_data = pCifsFile;
 	return pCifsFile;
@@ -267,16 +256,18 @@ cifs_new_fileinfo(__u16 fileHandle, struct file *file,
 
 /*
  * Release a reference on the file private data. This may involve closing
- * the filehandle out on the server.
+ * the filehandle out on the server. Must be called without holding
+ * cifs_file_list_lock.
  */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
+	struct inode *inode = cifs_file->dentry->d_inode;
 	struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
-	struct cifsInodeInfo *cifsi = CIFS_I(cifs_file->dentry->d_inode);
+	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 	struct cifsLockInfo *li, *tmp;
 
 	spin_lock(&cifs_file_list_lock);
-	if (!atomic_dec_and_test(&cifs_file->count)) {
+	if (--cifs_file->count > 0) {
 		spin_unlock(&cifs_file_list_lock);
 		return;
 	}
@@ -288,8 +279,7 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 	if (list_empty(&cifsi->openFileList)) {
 		cFYI(1, "closing last open instance for inode %p",
 			cifs_file->dentry->d_inode);
-		cifsi->clientCanCacheRead = false;
-		cifsi->clientCanCacheAll  = false;
+		cifs_set_oplock_level(cifsi, 0);
 	}
 	spin_unlock(&cifs_file_list_lock);
 
@@ -605,11 +595,8 @@ reopen_success:
 
 	if (can_flush) {
 		rc = filemap_write_and_wait(inode->i_mapping);
-		if (rc != 0)
-			CIFS_I(inode)->write_behind_rc = rc;
+		mapping_set_error(inode->i_mapping, rc);
 
-		pCifsInode->clientCanCacheAll = false;
-		pCifsInode->clientCanCacheRead = false;
 		if (tcon->unix_ext)
 			rc = cifs_get_inode_info_unix(&inode,
 				full_path, inode->i_sb, xid);
@@ -623,18 +610,9 @@ reopen_success:
 	     invalidate the current end of file on the server
 	     we can not go to the server to get the new inod
 	     info */
-	if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-		pCifsInode->clientCanCacheAll = true;
-		pCifsInode->clientCanCacheRead = true;
-		cFYI(1, "Exclusive Oplock granted on inode %p",
-			 pCifsFile->dentry->d_inode);
-	} else if ((oplock & 0xF) == OPLOCK_READ) {
-		pCifsInode->clientCanCacheRead = true;
-		pCifsInode->clientCanCacheAll = false;
-	} else {
-		pCifsInode->clientCanCacheRead = false;
-		pCifsInode->clientCanCacheAll = false;
-	}
+
+	cifs_set_oplock_level(pCifsInode, oplock);
+
 	cifs_relock_file(pCifsFile);
 
 reopen_error_exit:
@@ -776,12 +754,6 @@ int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
 
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 	tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
-
-	if (file->private_data == NULL) {
-		rc = -EBADF;
-		FreeXid(xid);
-		return rc;
-	}
 	netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
 
 	if ((tcon->ses->capabilities & CAP_UNIX) &&
@@ -957,6 +929,7 @@ cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 	size_t write_size, loff_t *poffset)
 {
+	struct inode *inode = file->f_path.dentry->d_inode;
 	int rc = 0;
 	unsigned int bytes_written = 0;
 	unsigned int total_written;
@@ -964,7 +937,7 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 	struct cifsTconInfo *pTcon;
 	int xid, long_op;
 	struct cifsFileInfo *open_file;
-	struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
+	struct cifsInodeInfo *cifsi = CIFS_I(inode);
 
 	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
 
@@ -1030,21 +1003,17 @@ ssize_t cifs_user_write(struct file *file, const char __user *write_data,
 
 	cifs_stats_bytes_written(pTcon, total_written);
 
-	/* since the write may have blocked check these pointers again */
-	if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
-		struct inode *inode = file->f_path.dentry->d_inode;
 /* Do not update local mtime - server will set its actual value on write
- *		inode->i_ctime = inode->i_mtime =
- * 			current_fs_time(inode->i_sb);*/
-		if (total_written > 0) {
-			spin_lock(&inode->i_lock);
-			if (*poffset > file->f_path.dentry->d_inode->i_size)
-				i_size_write(file->f_path.dentry->d_inode,
-					*poffset);
-			spin_unlock(&inode->i_lock);
-		}
-		mark_inode_dirty_sync(file->f_path.dentry->d_inode);
+ *	inode->i_ctime = inode->i_mtime =
+ * 		current_fs_time(inode->i_sb);*/
+	if (total_written > 0) {
+		spin_lock(&inode->i_lock);
+		if (*poffset > inode->i_size)
+			i_size_write(inode, *poffset);
+		spin_unlock(&inode->i_lock);
 	}
+	mark_inode_dirty_sync(inode);
+
 	FreeXid(xid);
 	return total_written;
 }
@@ -1139,7 +1108,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
 	return total_written;
 }
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 					bool fsuid_only)
 {
@@ -1173,13 +1141,12 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 	spin_unlock(&cifs_file_list_lock);
 	return NULL;
 }
-#endif
 
 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
 					bool fsuid_only)
 {
 	struct cifsFileInfo *open_file;
-	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+	struct cifs_sb_info *cifs_sb;
 	bool any_available = false;
 	int rc;
 
@@ -1193,6 +1160,8 @@ struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
 		return NULL;
 	}
 
+	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+
 	/* only filter by fsuid on multiuser mounts */
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
 		fsuid_only = false;
@@ -1303,7 +1272,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 static int cifs_writepages(struct address_space *mapping,
 			   struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	unsigned int bytes_to_write;
 	unsigned int bytes_written;
 	struct cifs_sb_info *cifs_sb;
@@ -1326,15 +1294,6 @@ static int cifs_writepages(struct address_space *mapping,
 	int scanned = 0;
 	int xid, long_op;
 
-	/*
-	 * BB: Is this meaningful for a non-block-device file system?
-	 * If it is, we should test it again after we do I/O
-	 */
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		wbc->encountered_congestion = 1;
-		return 0;
-	}
-
 	cifs_sb = CIFS_SB(mapping->host->i_sb);
 
 	/*
@@ -1363,6 +1322,7 @@ static int cifs_writepages(struct address_space *mapping,
 	if (!experimEnabled && tcon->ses->server->secMode &
 			(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 		cifsFileInfo_put(open_file);
+		kfree(iov);
 		return generic_writepages(mapping, wbc);
 	}
 	cifsFileInfo_put(open_file);
@@ -1488,12 +1448,7 @@ retry:
 			if (rc || bytes_written < bytes_to_write) {
 				cERROR(1, "Write2 ret %d, wrote %d",
 					  rc, bytes_written);
-				/* BB what if continued retry is
-				   requested via mount flags? */
-				if (rc == -ENOSPC)
-					set_bit(AS_ENOSPC, &mapping->flags);
-				else
-					set_bit(AS_EIO, &mapping->flags);
+				mapping_set_error(mapping, rc);
 			} else {
 				cifs_stats_bytes_written(tcon, bytes_written);
 			}
@@ -1638,11 +1593,10 @@ int cifs_fsync(struct file *file, int datasync)
 
 	rc = filemap_write_and_wait(inode->i_mapping);
 	if (rc == 0) {
-		rc = CIFS_I(inode)->write_behind_rc;
-		CIFS_I(inode)->write_behind_rc = 0;
+		struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
 		tcon = tlink_tcon(smbfile->tlink);
-		if (!rc && tcon && smbfile &&
-		   !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
+		if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
 			rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
 	}
 
@@ -1687,21 +1641,8 @@ int cifs_flush(struct file *file, fl_owner_t id)
 	struct inode *inode = file->f_path.dentry->d_inode;
 	int rc = 0;
 
-	/* Rather than do the steps manually:
-	   lock the inode for writing
-	   loop through pages looking for write behind data (dirty pages)
-	   coalesce into contiguous 16K (or smaller) chunks to write to server
-	   send to server (prefer in parallel)
-	   deal with writebehind errors
-	   unlock inode for writing
-	   filemapfdatawrite appears easier for the time being */
-
-	rc = filemap_fdatawrite(inode->i_mapping);
-	/* reset wb rc if we were able to write out dirty pages */
-	if (!rc) {
-		rc = CIFS_I(inode)->write_behind_rc;
-		CIFS_I(inode)->write_behind_rc = 0;
-	}
+	if (file->f_mode & FMODE_WRITE)
+		rc = filemap_write_and_wait(inode->i_mapping);
 
 	cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
 
@@ -2280,7 +2221,7 @@ void cifs_oplock_break(struct work_struct *work)
 						  oplock_break);
 	struct inode *inode = cfile->dentry->d_inode;
 	struct cifsInodeInfo *cinode = CIFS_I(inode);
-	int rc, waitrc = 0;
+	int rc = 0;
 
 	if (inode && S_ISREG(inode->i_mode)) {
 		if (cinode->clientCanCacheRead)
@@ -2289,13 +2230,10 @@ void cifs_oplock_break(struct work_struct *work)
 			break_lease(inode, O_WRONLY);
 		rc = filemap_fdatawrite(inode->i_mapping);
 		if (cinode->clientCanCacheRead == 0) {
-			waitrc = filemap_fdatawait(inode->i_mapping);
+			rc = filemap_fdatawait(inode->i_mapping);
+			mapping_set_error(inode->i_mapping, rc);
 			invalidate_remote_inode(inode);
 		}
-		if (!rc)
-			rc = waitrc;
-		if (rc)
-			cinode->write_behind_rc = rc;
 		cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
 	}
 
@@ -2314,7 +2252,7 @@ void cifs_oplock_break(struct work_struct *work)
 	/*
 	 * We might have kicked in before is_valid_oplock_break()
 	 * finished grabbing reference for us.  Make sure it's done by
-	 * waiting for GlobalSMSSeslock.
+	 * waiting for cifs_file_list_lock.
 	 */
 	spin_lock(&cifs_file_list_lock);
 	spin_unlock(&cifs_file_list_lock);
@@ -2322,6 +2260,7 @@ void cifs_oplock_break(struct work_struct *work)
 	cifs_oplock_break_put(cfile);
 }
 
+/* must be called while holding cifs_file_list_lock */
 void cifs_oplock_break_get(struct cifsFileInfo *cfile)
 {
 	cifs_sb_active(cfile->dentry->d_sb);
@@ -2330,8 +2269,10 @@ void cifs_oplock_break_get(struct cifsFileInfo *cfile)
 
 void cifs_oplock_break_put(struct cifsFileInfo *cfile)
 {
+	struct super_block *sb = cfile->dentry->d_sb;
+
 	cifsFileInfo_put(cfile);
-	cifs_sb_deactive(cfile->dentry->d_sb);
+	cifs_sb_deactive(sb);
 }
 
 const struct address_space_operations cifs_addr_ops = {
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index a2ad94efcfe..297a43d0ff7 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -2,7 +2,7 @@
  *   fs/cifs/fscache.c - CIFS filesystem cache interface
  *
  *   Copyright (c) 2010 Novell, Inc.
- *   Author(s): Suresh Jayaraman (sjayaraman@suse.de>
+ *   Author(s): Suresh Jayaraman <sjayaraman@suse.de>
  *
  *   This library is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU Lesser General Public License as published
@@ -67,10 +67,12 @@ static void cifs_fscache_enable_inode_cookie(struct inode *inode)
 	if (cifsi->fscache)
 		return;
 
-	cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_FSCACHE) {
+		cifsi->fscache = fscache_acquire_cookie(tcon->fscache,
 				&cifs_fscache_inode_object_def, cifsi);
-	cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
+		cFYI(1, "CIFS: got FH cookie (0x%p/0x%p)", tcon->fscache,
 				cifsi->fscache);
+	}
 }
 
 void cifs_fscache_release_inode_cookie(struct inode *inode)
@@ -101,10 +103,8 @@ void cifs_fscache_set_inode_cookie(struct inode *inode, struct file *filp)
 {
 	if ((filp->f_flags & O_ACCMODE) != O_RDONLY)
 		cifs_fscache_disable_inode_cookie(inode);
-	else {
+	else
 		cifs_fscache_enable_inode_cookie(inode);
-		cFYI(1, "CIFS: fscache inode cookie set");
-	}
 }
 
 void cifs_fscache_reset_inode_cookie(struct inode *inode)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 94979309698..589f3e3f6e0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -686,13 +686,18 @@ int cifs_get_inode_info(struct inode **pinode,
 			cFYI(1, "cifs_sfu_type failed: %d", tmprc);
 	}
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_ACL
 	/* fill in 0777 bits from ACL */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
-		cFYI(1, "Getting mode bits from ACL");
-		cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path, pfid);
+		rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path,
+						pfid);
+		if (rc) {
+			cFYI(1, "%s: Getting ACL failed with error: %d",
+				__func__, rc);
+			goto cgii_exit;
+		}
 	}
-#endif
+#endif /* CONFIG_CIFS_ACL */
 
 	/* fill in remaining high mode bits e.g. SUID, VTX */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
@@ -723,12 +728,12 @@ static const struct inode_operations cifs_ipc_inode_ops = {
 	.lookup = cifs_lookup,
 };
 
-char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
+char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
+				struct cifsTconInfo *tcon)
 {
 	int pplen = cifs_sb->prepathlen;
 	int dfsplen;
 	char *full_path = NULL;
-	struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
 
 	/* if no prefix path, simply set path to the root of share to "" */
 	if (pplen == 0) {
@@ -870,7 +875,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 	char *full_path;
 	struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
 
-	full_path = cifs_build_path_to_root(cifs_sb);
+	full_path = cifs_build_path_to_root(cifs_sb, tcon);
 	if (full_path == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -881,8 +886,10 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 		rc = cifs_get_inode_info(&inode, full_path, NULL, sb,
 						xid, NULL);
 
-	if (!inode)
-		return ERR_PTR(rc);
+	if (!inode) {
+		inode = ERR_PTR(rc);
+		goto out;
+	}
 
 #ifdef CONFIG_CIFS_FSCACHE
 	/* populate tcon->resource_id */
@@ -898,13 +905,11 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 		inode->i_uid = cifs_sb->mnt_uid;
 		inode->i_gid = cifs_sb->mnt_gid;
 	} else if (rc) {
-		kfree(full_path);
-		_FreeXid(xid);
 		iget_failed(inode);
-		return ERR_PTR(rc);
+		inode = ERR_PTR(rc);
 	}
 
-
+out:
 	kfree(full_path);
 	/* can not call macro FreeXid here since in a void func
 	 * TODO: This is no longer true
@@ -1648,6 +1653,7 @@ static bool
 cifs_inode_needs_reval(struct inode *inode)
 {
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 
 	if (cifs_i->clientCanCacheRead)
 		return false;
@@ -1658,19 +1664,21 @@ cifs_inode_needs_reval(struct inode *inode)
 	if (cifs_i->time == 0)
 		return true;
 
-	/* FIXME: the actimeo should be tunable */
-	if (time_after_eq(jiffies, cifs_i->time + HZ))
+	if (!time_in_range(jiffies, cifs_i->time,
+				cifs_i->time + cifs_sb->actimeo))
 		return true;
 
 	/* hardlinked files w/ noserverino get "special" treatment */
-	if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
 	    S_ISREG(inode->i_mode) && inode->i_nlink != 1)
 		return true;
 
 	return false;
 }
 
-/* check invalid_mapping flag and zap the cache if it's set */
+/*
+ * Zap the cache. Called when invalid_mapping flag is set.
+ */
 static void
 cifs_invalidate_mapping(struct inode *inode)
 {
@@ -1682,8 +1690,7 @@ cifs_invalidate_mapping(struct inode *inode)
 	/* write back any cached data */
 	if (inode->i_mapping && inode->i_mapping->nrpages != 0) {
 		rc = filemap_write_and_wait(inode->i_mapping);
-		if (rc)
-			cifs_i->write_behind_rc = rc;
+		mapping_set_error(inode->i_mapping, rc);
 	}
 	invalidate_remote_inode(inode);
 	cifs_fscache_reset_inode_cookie(inode);
@@ -1943,10 +1950,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
 	 * the flush returns error?
 	 */
 	rc = filemap_write_and_wait(inode->i_mapping);
-	if (rc != 0) {
-		cifsInode->write_behind_rc = rc;
-		rc = 0;
-	}
+	mapping_set_error(inode->i_mapping, rc);
+	rc = 0;
 
 	if (attrs->ia_valid & ATTR_SIZE) {
 		rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2087,10 +2092,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 	 * the flush returns error?
 	 */
 	rc = filemap_write_and_wait(inode->i_mapping);
-	if (rc != 0) {
-		cifsInode->write_behind_rc = rc;
-		rc = 0;
-	}
+	mapping_set_error(inode->i_mapping, rc);
+	rc = 0;
 
 	if (attrs->ia_valid & ATTR_SIZE) {
 		rc = cifs_set_file_size(inode, attrs, xid, full_path);
@@ -2119,11 +2122,16 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	if (attrs->ia_valid & ATTR_MODE) {
 		rc = 0;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL)
-			rc = mode_to_acl(inode, full_path, mode);
-		else
-#endif
+#ifdef CONFIG_CIFS_ACL
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
+			rc = mode_to_cifs_acl(inode, full_path, mode);
+			if (rc) {
+				cFYI(1, "%s: Setting ACL failed with error: %d",
+					__func__, rc);
+				goto cifs_setattr_exit;
+			}
+		} else
+#endif /* CONFIG_CIFS_ACL */
 		if (((mode & S_IWUGO) == 0) &&
 		    (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
 
@@ -2182,7 +2190,6 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	setattr_copy(inode, attrs);
 	mark_inode_dirty(inode);
-	return 0;
 
 cifs_setattr_exit:
 	kfree(full_path);
diff --git a/fs/cifs/ioctl.c b/fs/cifs/ioctl.c
index 077bf756f34..0c98672d012 100644
--- a/fs/cifs/ioctl.c
+++ b/fs/cifs/ioctl.c
@@ -38,10 +38,10 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 	struct cifs_sb_info *cifs_sb;
 #ifdef CONFIG_CIFS_POSIX
 	struct cifsFileInfo *pSMBFile = filep->private_data;
-	struct cifsTconInfo *tcon = tlink_tcon(pSMBFile->tlink);
+	struct cifsTconInfo *tcon;
 	__u64	ExtAttrBits = 0;
 	__u64	ExtAttrMask = 0;
-	__u64   caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
+	__u64   caps;
 #endif /* CONFIG_CIFS_POSIX */
 
 	xid = GetXid();
@@ -62,9 +62,11 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			break;
 #ifdef CONFIG_CIFS_POSIX
 		case FS_IOC_GETFLAGS:
+			if (pSMBFile == NULL)
+				break;
+			tcon = tlink_tcon(pSMBFile->tlink);
+			caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
 			if (CIFS_UNIX_EXTATTR_CAP & caps) {
-				if (pSMBFile == NULL)
-					break;
 				rc = CIFSGetExtAttr(xid, tcon, pSMBFile->netfid,
 					&ExtAttrBits, &ExtAttrMask);
 				if (rc == 0)
@@ -75,13 +77,15 @@ long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
 			break;
 
 		case FS_IOC_SETFLAGS:
+			if (pSMBFile == NULL)
+				break;
+			tcon = tlink_tcon(pSMBFile->tlink);
+			caps = le64_to_cpu(tcon->fsUnixInfo.Capability);
 			if (CIFS_UNIX_EXTATTR_CAP & caps) {
 				if (get_user(ExtAttrBits, (int __user *)arg)) {
 					rc = -EFAULT;
 					break;
 				}
-				if (pSMBFile == NULL)
-					break;
 				/* rc= CIFSGetExtAttr(xid,tcon,pSMBFile->netfid,
 					extAttrBits, &ExtAttrMask);*/
 			}
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c681f6a680..43f10281bc1 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -569,15 +569,14 @@ is_valid_oplock_break(struct smb_hdr *buf, struct TCP_Server_Info *srv)
 
 				cFYI(1, "file id match, oplock break");
 				pCifsInode = CIFS_I(netfile->dentry->d_inode);
-				pCifsInode->clientCanCacheAll = false;
-				if (pSMB->OplockLevel == 0)
-					pCifsInode->clientCanCacheRead = false;
 
+				cifs_set_oplock_level(pCifsInode,
+						      pSMB->OplockLevel);
 				/*
 				 * cifs_oplock_break_put() can't be called
 				 * from here.  Get reference after queueing
 				 * succeeded.  cifs_oplock_break() will
-				 * synchronize using GlobalSMSSeslock.
+				 * synchronize using cifs_file_list_lock.
 				 */
 				if (queue_work(system_nrt_wq,
 					       &netfile->oplock_break))
@@ -722,3 +721,23 @@ cifs_autodisable_serverino(struct cifs_sb_info *cifs_sb)
 			   cifs_sb_master_tcon(cifs_sb)->treeName);
 	}
 }
+
+void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
+{
+	oplock &= 0xF;
+
+	if (oplock == OPLOCK_EXCLUSIVE) {
+		cinode->clientCanCacheAll = true;
+		cinode->clientCanCacheRead = true;
+		cFYI(1, "Exclusive Oplock granted on inode %p",
+		     &cinode->vfs_inode);
+	} else if (oplock == OPLOCK_READ) {
+		cinode->clientCanCacheAll = false;
+		cinode->clientCanCacheRead = true;
+		cFYI(1, "Level II Oplock granted on inode %p",
+		    &cinode->vfs_inode);
+	} else {
+		cinode->clientCanCacheAll = false;
+		cinode->clientCanCacheRead = false;
+	}
+}
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index ef7bb7b50f5..a73eb9f4bda 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -226,26 +226,29 @@ static int initiate_cifs_search(const int xid, struct file *file)
 	char *full_path = NULL;
 	struct cifsFileInfo *cifsFile;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
-	struct tcon_link *tlink;
+	struct tcon_link *tlink = NULL;
 	struct cifsTconInfo *pTcon;
 
-	tlink = cifs_sb_tlink(cifs_sb);
-	if (IS_ERR(tlink))
-		return PTR_ERR(tlink);
-	pTcon = tlink_tcon(tlink);
-
-	if (file->private_data == NULL)
-		file->private_data =
-			kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
 	if (file->private_data == NULL) {
-		rc = -ENOMEM;
-		goto error_exit;
+		tlink = cifs_sb_tlink(cifs_sb);
+		if (IS_ERR(tlink))
+			return PTR_ERR(tlink);
+
+		cifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+		if (cifsFile == NULL) {
+			rc = -ENOMEM;
+			goto error_exit;
+		}
+		file->private_data = cifsFile;
+		cifsFile->tlink = cifs_get_tlink(tlink);
+		pTcon = tlink_tcon(tlink);
+	} else {
+		cifsFile = file->private_data;
+		pTcon = tlink_tcon(cifsFile->tlink);
 	}
 
-	cifsFile = file->private_data;
 	cifsFile->invalidHandle = true;
 	cifsFile->srch_inf.endOfSearch = false;
-	cifsFile->tlink = cifs_get_tlink(tlink);
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
@@ -756,18 +759,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 	rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
 		     ino, fattr.cf_dtype);
 
-	/*
-	 * we can not return filldir errors to the caller since they are
-	 * "normal" when the stat blocksize is too small - we return remapped
-	 * error instead
-	 *
-	 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
-	 * case already. Why should we be clobbering other errors from it?
-	 */
-	if (rc) {
-		cFYI(1, "filldir rc = %d", rc);
-		rc = -EOVERFLOW;
-	}
 	dput(tmp_dentry);
 	return rc;
 }
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 2a11efd9659..7b01d3f6eed 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,9 +32,6 @@
 #include <linux/slab.h>
 #include "cifs_spnego.h"
 
-extern void SMBNTencrypt(unsigned char *passwd, unsigned char *c8,
-			 unsigned char *p24);
-
 /*
  * Checks if this is the first smb session to be reconnected after
  * the socket has been reestablished (so we know whether to use vc 0).
@@ -402,23 +399,22 @@ static int decode_ntlmssp_challenge(char *bcc_ptr, int blob_len,
 		return -EINVAL;
 	}
 
-	memcpy(ses->cryptKey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
+	memcpy(ses->ntlmssp->cryptkey, pblob->Challenge, CIFS_CRYPTO_KEY_SIZE);
 	/* BB we could decode pblob->NegotiateFlags; some may be useful */
 	/* In particular we can examine sign flags */
 	/* BB spec says that if AvId field of MsvAvTimestamp is populated then
 		we must set the MIC field of the AUTHENTICATE_MESSAGE */
-
+	ses->ntlmssp->server_flags = le32_to_cpu(pblob->NegotiateFlags);
 	tioffset = cpu_to_le16(pblob->TargetInfoArray.BufferOffset);
 	tilen = cpu_to_le16(pblob->TargetInfoArray.Length);
-	ses->tilen = tilen;
-	if (ses->tilen) {
-		ses->tiblob = kmalloc(tilen, GFP_KERNEL);
-		if (!ses->tiblob) {
+	if (tilen) {
+		ses->auth_key.response = kmalloc(tilen, GFP_KERNEL);
+		if (!ses->auth_key.response) {
 			cERROR(1, "Challenge target info allocation failure");
-			ses->tilen = 0;
 			return -ENOMEM;
 		}
-		memcpy(ses->tiblob,  bcc_ptr + tioffset, ses->tilen);
+		memcpy(ses->auth_key.response, bcc_ptr + tioffset, tilen);
+		ses->auth_key.len = tilen;
 	}
 
 	return 0;
@@ -443,10 +439,12 @@ static void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
 		NTLMSSP_NEGOTIATE_128 | NTLMSSP_NEGOTIATE_UNICODE |
 		NTLMSSP_NEGOTIATE_NTLM;
 	if (ses->server->secMode &
-	   (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
+			(SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
 		flags |= NTLMSSP_NEGOTIATE_SIGN;
-	if (ses->server->secMode & SECMODE_SIGN_REQUIRED)
-		flags |= NTLMSSP_NEGOTIATE_ALWAYS_SIGN;
+		if (!ses->server->session_estab)
+			flags |= NTLMSSP_NEGOTIATE_KEY_XCH |
+				NTLMSSP_NEGOTIATE_EXTENDED_SEC;
+	}
 
 	sec_blob->NegotiateFlags |= cpu_to_le32(flags);
 
@@ -469,11 +467,9 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 				   const struct nls_table *nls_cp)
 {
 	int rc;
-	unsigned int size;
 	AUTHENTICATE_MESSAGE *sec_blob = (AUTHENTICATE_MESSAGE *)pbuffer;
 	__u32 flags;
 	unsigned char *tmp;
-	struct ntlmv2_resp ntlmv2_response = {};
 
 	memcpy(sec_blob->Signature, NTLMSSP_SIGNATURE, 8);
 	sec_blob->MessageType = NtLmAuthenticate;
@@ -497,25 +493,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->LmChallengeResponse.MaximumLength = 0;
 
 	sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
-	rc = setup_ntlmv2_rsp(ses, (char *)&ntlmv2_response, nls_cp);
+	rc = setup_ntlmv2_rsp(ses, nls_cp);
 	if (rc) {
 		cERROR(1, "Error %d during NTLMSSP authentication", rc);
 		goto setup_ntlmv2_ret;
 	}
-	size =  sizeof(struct ntlmv2_resp);
-	memcpy(tmp, (char *)&ntlmv2_response, size);
-	tmp += size;
-	if (ses->tilen > 0) {
-		memcpy(tmp, ses->tiblob, ses->tilen);
-		tmp += ses->tilen;
-	}
+	memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+			ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+	tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
 
-	sec_blob->NtChallengeResponse.Length = cpu_to_le16(size + ses->tilen);
+	sec_blob->NtChallengeResponse.Length =
+			cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
 	sec_blob->NtChallengeResponse.MaximumLength =
-				cpu_to_le16(size + ses->tilen);
-	kfree(ses->tiblob);
-	ses->tiblob = NULL;
-	ses->tilen = 0;
+			cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
 
 	if (ses->domainName == NULL) {
 		sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -554,9 +544,19 @@ static int build_ntlmssp_auth_blob(unsigned char *pbuffer,
 	sec_blob->WorkstationName.MaximumLength = 0;
 	tmp += 2;
 
-	sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
-	sec_blob->SessionKey.Length = 0;
-	sec_blob->SessionKey.MaximumLength = 0;
+	if ((ses->ntlmssp->server_flags & NTLMSSP_NEGOTIATE_KEY_XCH) &&
+			!calc_seckey(ses)) {
+		memcpy(tmp, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.Length = cpu_to_le16(CIFS_CPHTXT_SIZE);
+		sec_blob->SessionKey.MaximumLength =
+				cpu_to_le16(CIFS_CPHTXT_SIZE);
+		tmp += CIFS_CPHTXT_SIZE;
+	} else {
+		sec_blob->SessionKey.BufferOffset = cpu_to_le32(tmp - pbuffer);
+		sec_blob->SessionKey.Length = 0;
+		sec_blob->SessionKey.MaximumLength = 0;
+	}
 
 setup_ntlmv2_ret:
 	*buflen = tmp - pbuffer;
@@ -600,8 +600,16 @@ CIFS_SessSetup(unsigned int xid, struct cifsSesInfo *ses,
 		return -EINVAL;
 
 	type = ses->server->secType;
-
 	cFYI(1, "sess setup type %d", type);
+	if (type == RawNTLMSSP) {
+		/* if memory allocation is successful, caller of this function
+		 * frees it.
+		 */
+		ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
+		if (!ses->ntlmssp)
+			return -ENOMEM;
+	}
+
 ssetup_ntlmssp_authenticate:
 	if (phase == NtLmChallenge)
 		phase = NtLmAuthenticate; /* if ntlmssp, now final phase */
@@ -666,10 +674,14 @@ ssetup_ntlmssp_authenticate:
 		/* no capabilities flags in old lanman negotiation */
 
 		pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_SESS_KEY_SIZE);
-		/* BB calculate hash with password */
-		/* and copy into bcc */
 
-		calc_lanman_hash(ses->password, ses->cryptKey,
+		/* Calculate hash with password and copy into bcc_ptr.
+		 * Encryption Key (stored as in cryptkey) gets used if the
+		 * security mode bit in Negottiate Protocol response states
+		 * to use challenge/response method (i.e. Password bit is 1).
+		 */
+
+		calc_lanman_hash(ses->password, ses->server->cryptkey,
 				 ses->server->secMode & SECMODE_PW_ENCRYPT ?
 					true : false, lnm_session_key);
 
@@ -687,24 +699,27 @@ ssetup_ntlmssp_authenticate:
 		ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
 #endif
 	} else if (type == NTLM) {
-		char ntlm_session_key[CIFS_SESS_KEY_SIZE];
-
 		pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
 		pSMB->req_no_secext.CaseInsensitivePasswordLength =
-			cpu_to_le16(CIFS_SESS_KEY_SIZE);
+			cpu_to_le16(CIFS_AUTH_RESP_SIZE);
 		pSMB->req_no_secext.CaseSensitivePasswordLength =
-			cpu_to_le16(CIFS_SESS_KEY_SIZE);
+			cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+
+		/* calculate ntlm response and session key */
+		rc = setup_ntlm_response(ses);
+		if (rc) {
+			cERROR(1, "Error %d during NTLM authentication", rc);
+			goto ssetup_exit;
+		}
 
-		/* calculate session key */
-		SMBNTencrypt(ses->password, ses->cryptKey, ntlm_session_key);
+		/* copy ntlm response */
+		memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+				CIFS_AUTH_RESP_SIZE);
+		bcc_ptr += CIFS_AUTH_RESP_SIZE;
+		memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+				CIFS_AUTH_RESP_SIZE);
+		bcc_ptr += CIFS_AUTH_RESP_SIZE;
 
-		cifs_calculate_session_key(&ses->auth_key,
-					ntlm_session_key, ses->password);
-		/* copy session key */
-		memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
-		bcc_ptr += CIFS_SESS_KEY_SIZE;
-		memcpy(bcc_ptr, (char *)ntlm_session_key, CIFS_SESS_KEY_SIZE);
-		bcc_ptr += CIFS_SESS_KEY_SIZE;
 		if (ses->capabilities & CAP_UNICODE) {
 			/* unicode strings must be word aligned */
 			if (iov[0].iov_len % 2) {
@@ -715,47 +730,26 @@ ssetup_ntlmssp_authenticate:
 		} else
 			ascii_ssetup_strings(&bcc_ptr, ses, nls_cp);
 	} else if (type == NTLMv2) {
-		char *v2_sess_key =
-			kmalloc(sizeof(struct ntlmv2_resp), GFP_KERNEL);
-
-		/* BB FIXME change all users of v2_sess_key to
-		   struct ntlmv2_resp */
-
-		if (v2_sess_key == NULL) {
-			rc = -ENOMEM;
-			goto ssetup_exit;
-		}
-
 		pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
 
 		/* LM2 password would be here if we supported it */
 		pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
-		/*	cpu_to_le16(LM2_SESS_KEY_SIZE); */
 
-		/* calculate session key */
-		rc = setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
+		/* calculate nlmv2 response and session key */
+		rc = setup_ntlmv2_rsp(ses, nls_cp);
 		if (rc) {
 			cERROR(1, "Error %d during NTLMv2 authentication", rc);
-			kfree(v2_sess_key);
 			goto ssetup_exit;
 		}
-		memcpy(bcc_ptr, (char *)v2_sess_key,
-				sizeof(struct ntlmv2_resp));
-		bcc_ptr += sizeof(struct ntlmv2_resp);
-		kfree(v2_sess_key);
+		memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+				ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+		bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+
 		/* set case sensitive password length after tilen may get
 		 * assigned, tilen is 0 otherwise.
 		 */
 		pSMB->req_no_secext.CaseSensitivePasswordLength =
-			cpu_to_le16(sizeof(struct ntlmv2_resp) + ses->tilen);
-		if (ses->tilen > 0) {
-			memcpy(bcc_ptr, ses->tiblob, ses->tilen);
-			bcc_ptr += ses->tilen;
-			/* we never did allocate ses->domainName to free */
-			kfree(ses->tiblob);
-			ses->tiblob = NULL;
-			ses->tilen = 0;
-		}
+			cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
 
 		if (ses->capabilities & CAP_UNICODE) {
 			if (iov[0].iov_len % 2) {
@@ -768,6 +762,7 @@ ssetup_ntlmssp_authenticate:
 	} else if (type == Kerberos) {
 #ifdef CONFIG_CIFS_UPCALL
 		struct cifs_spnego_msg *msg;
+
 		spnego_key = cifs_get_spnego_key(ses);
 		if (IS_ERR(spnego_key)) {
 			rc = PTR_ERR(spnego_key);
@@ -785,16 +780,17 @@ ssetup_ntlmssp_authenticate:
 			rc = -EKEYREJECTED;
 			goto ssetup_exit;
 		}
-		/* bail out if key is too long */
-		if (msg->sesskey_len >
-		    sizeof(ses->auth_key.data.krb5)) {
-			cERROR(1, "Kerberos signing key too long (%u bytes)",
-				msg->sesskey_len);
-			rc = -EOVERFLOW;
+
+		ses->auth_key.response = kmalloc(msg->sesskey_len, GFP_KERNEL);
+		if (!ses->auth_key.response) {
+			cERROR(1, "Kerberos can't allocate (%u bytes) memory",
+					msg->sesskey_len);
+			rc = -ENOMEM;
 			goto ssetup_exit;
 		}
+		memcpy(ses->auth_key.response, msg->data, msg->sesskey_len);
 		ses->auth_key.len = msg->sesskey_len;
-		memcpy(ses->auth_key.data.krb5, msg->data, msg->sesskey_len);
+
 		pSMB->req.hdr.Flags2 |= SMBFLG2_EXT_SEC;
 		capabilities |= CAP_EXTENDED_SECURITY;
 		pSMB->req.Capabilities = cpu_to_le32(capabilities);
@@ -897,8 +893,6 @@ ssetup_ntlmssp_authenticate:
 			  CIFS_STD_OP /* not long */ | CIFS_LOG_ERROR);
 	/* SMB request buf freed in SendReceive2 */
 
-	cFYI(1, "ssetup rc from sendrecv2 is %d", rc);
-
 	pSMB = (SESSION_SETUP_ANDX *)iov[0].iov_base;
 	smb_buf = (struct smb_hdr *)iov[0].iov_base;
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index a66c91eb6eb..e0588cdf4cc 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -543,7 +543,7 @@ SendReceive2(const unsigned int xid, struct cifsSesInfo *ses,
 		    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 					     SECMODE_SIGN_ENABLED))) {
 			rc = cifs_verify_signature(midQ->resp_buf,
-						&ses->server->session_key,
+						ses->server,
 						midQ->sequence_number+1);
 			if (rc) {
 				cERROR(1, "Unexpected SMB signature");
@@ -731,7 +731,7 @@ SendReceive(const unsigned int xid, struct cifsSesInfo *ses,
 		    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 					     SECMODE_SIGN_ENABLED))) {
 			rc = cifs_verify_signature(out_buf,
-						&ses->server->session_key,
+						ses->server,
 						midQ->sequence_number+1);
 			if (rc) {
 				cERROR(1, "Unexpected SMB signature");
@@ -981,7 +981,7 @@ SendReceiveBlockingLock(const unsigned int xid, struct cifsTconInfo *tcon,
 	    (ses->server->secMode & (SECMODE_SIGN_REQUIRED |
 				     SECMODE_SIGN_ENABLED))) {
 		rc = cifs_verify_signature(out_buf,
-					   &ses->server->session_key,
+					   ses->server,
 					   midQ->sequence_number+1);
 		if (rc) {
 			cERROR(1, "Unexpected SMB signature");
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index a264b744bb4..eae2a149160 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -30,10 +30,11 @@
 
 #define MAX_EA_VALUE_SIZE 65535
 #define CIFS_XATTR_DOS_ATTRIB "user.DosAttrib"
+#define CIFS_XATTR_CIFS_ACL "system.cifs_acl"
 #define CIFS_XATTR_USER_PREFIX "user."
 #define CIFS_XATTR_SYSTEM_PREFIX "system."
 #define CIFS_XATTR_OS2_PREFIX "os2."
-#define CIFS_XATTR_SECURITY_PREFIX ".security"
+#define CIFS_XATTR_SECURITY_PREFIX "security."
 #define CIFS_XATTR_TRUSTED_PREFIX "trusted."
 #define XATTR_TRUSTED_PREFIX_LEN  8
 #define XATTR_SECURITY_PREFIX_LEN 9
@@ -277,29 +278,8 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 				cifs_sb->local_nls,
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-		else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
-			__u16 fid;
-			int oplock = 0;
-			struct cifs_ntsd *pacl = NULL;
-			__u32 buflen = 0;
-			if (experimEnabled)
-				rc = CIFSSMBOpen(xid, pTcon, full_path,
-					FILE_OPEN, GENERIC_READ, 0, &fid,
-					&oplock, NULL, cifs_sb->local_nls,
-					cifs_sb->mnt_cifs_flags &
-					CIFS_MOUNT_MAP_SPECIAL_CHR);
-			/* else rc is EOPNOTSUPP from above */
-
-			if (rc == 0) {
-				rc = CIFSSMBGetCIFSACL(xid, pTcon, fid, &pacl,
-						      &buflen);
-				CIFSSMBClose(xid, pTcon, fid);
-			}
-		}
-#endif /* EXPERIMENTAL */
 #else
-		cFYI(1, "query POSIX ACL not supported yet");
+		cFYI(1, "Query POSIX ACL not supported yet");
 #endif /* CONFIG_CIFS_POSIX */
 	} else if (strncmp(ea_name, POSIX_ACL_XATTR_DEFAULT,
 			  strlen(POSIX_ACL_XATTR_DEFAULT)) == 0) {
@@ -311,8 +291,33 @@ ssize_t cifs_getxattr(struct dentry *direntry, const char *ea_name,
 				cifs_sb->mnt_cifs_flags &
 					CIFS_MOUNT_MAP_SPECIAL_CHR);
 #else
-		cFYI(1, "query POSIX default ACL not supported yet");
-#endif
+		cFYI(1, "Query POSIX default ACL not supported yet");
+#endif /* CONFIG_CIFS_POSIX */
+	} else if (strncmp(ea_name, CIFS_XATTR_CIFS_ACL,
+				strlen(CIFS_XATTR_CIFS_ACL)) == 0) {
+#ifdef CONFIG_CIFS_ACL
+			u32 acllen;
+			struct cifs_ntsd *pacl;
+
+			pacl = get_cifs_acl(cifs_sb, direntry->d_inode,
+						full_path, &acllen);
+			if (IS_ERR(pacl)) {
+				rc = PTR_ERR(pacl);
+				cERROR(1, "%s: error %zd getting sec desc",
+						__func__, rc);
+			} else {
+				if (ea_value) {
+					if (acllen > buf_size)
+						acllen = -ERANGE;
+					else
+						memcpy(ea_value, pacl, acllen);
+				}
+				rc = acllen;
+				kfree(pacl);
+			}
+#else
+		cFYI(1, "Query CIFS ACL not supported yet");
+#endif /* CONFIG_CIFS_ACL */
 	} else if (strncmp(ea_name,
 		  CIFS_XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) == 0) {
 		cFYI(1, "Trusted xattr namespace not supported yet");
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 96fbeab77f2..5d8b3553960 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -276,7 +276,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
 	}
 
 	coda_dir_update_mtime(dir_inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(de, inode);
 	inc_nlink(inode);
 	return 0;
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 7993b96ca34..5ea57c8c7f9 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -306,16 +306,16 @@ static int coda_statfs(struct dentry *dentry, struct kstatfs *buf)
 
 /* init_coda: used by filesystems.c to register coda */
 
-static int coda_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *coda_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, coda_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, coda_fill_super);
 }
 
 struct file_system_type coda_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "coda",
-	.get_sb		= coda_get_sb,
+	.mount		= coda_mount,
 	.kill_sb	= kill_anon_super,
 	.fs_flags	= FS_BINARY_MOUNTDATA,
 };
diff --git a/fs/compat.c b/fs/compat.c
index f03abdadc40..eb1740ac8c0 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -29,8 +29,6 @@
 #include <linux/vfs.h>
 #include <linux/ioctl.h>
 #include <linux/init.h>
-#include <linux/smb.h>
-#include <linux/smb_mount.h>
 #include <linux/ncp_mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/syscalls.h>
@@ -51,6 +49,7 @@
 #include <linux/eventpoll.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/pagemap.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -608,14 +607,14 @@ ssize_t compat_rw_copy_check_uvector(int type,
 	/*
 	 * Single unix specification:
 	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.  The total length is fitting an ssize_t
+	 * ssize_t.
 	 *
-	 * Be careful here because iov_len is a size_t not an ssize_t
+	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
+	 * no overflow possibility.
 	 */
 	tot_len = 0;
 	ret = -EINVAL;
 	for (seg = 0; seg < nr_segs; seg++) {
-		compat_ssize_t tmp = tot_len;
 		compat_uptr_t buf;
 		compat_ssize_t len;
 
@@ -626,13 +625,13 @@ ssize_t compat_rw_copy_check_uvector(int type,
 		}
 		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
 			goto out;
-		tot_len += len;
-		if (tot_len < tmp) /* maths overflow on the compat_ssize_t */
-			goto out;
 		if (!access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
 			ret = -EFAULT;
 			goto out;
 		}
+		if (len > MAX_RW_COUNT - tot_len)
+			len = MAX_RW_COUNT - tot_len;
+		tot_len += len;
 		iov->iov_base = compat_ptr(buf);
 		iov->iov_len = (compat_size_t) len;
 		uvector++;
@@ -745,30 +744,6 @@ static void *do_ncp_super_data_conv(void *raw_data)
 	return raw_data;
 }
 
-struct compat_smb_mount_data {
-	compat_int_t version;
-	__compat_uid_t mounted_uid;
-	__compat_uid_t uid;
-	__compat_gid_t gid;
-	compat_mode_t file_mode;
-	compat_mode_t dir_mode;
-};
-
-static void *do_smb_super_data_conv(void *raw_data)
-{
-	struct smb_mount_data *s = raw_data;
-	struct compat_smb_mount_data *c_s = raw_data;
-
-	if (c_s->version != SMB_MOUNT_OLDVERSION)
-		goto out;
-	s->dir_mode = c_s->dir_mode;
-	s->file_mode = c_s->file_mode;
-	s->gid = c_s->gid;
-	s->uid = c_s->uid;
-	s->mounted_uid = c_s->mounted_uid;
- out:
-	return raw_data;
-}
 
 struct compat_nfs_string {
 	compat_uint_t len;
@@ -835,7 +810,6 @@ static int do_nfs4_super_data_conv(void *raw_data)
 	return 0;
 }
 
-#define SMBFS_NAME      "smbfs"
 #define NCPFS_NAME      "ncpfs"
 #define NFS4_NAME	"nfs4"
 
@@ -870,9 +844,7 @@ asmlinkage long compat_sys_mount(const char __user * dev_name,
 	retval = -EINVAL;
 
 	if (kernel_type && data_page) {
-		if (!strcmp(kernel_type, SMBFS_NAME)) {
-			do_smb_super_data_conv((void *)data_page);
-		} else if (!strcmp(kernel_type, NCPFS_NAME)) {
+		if (!strcmp(kernel_type, NCPFS_NAME)) {
 			do_ncp_super_data_conv((void *)data_page);
 		} else if (!strcmp(kernel_type, NFS4_NAME)) {
 			if (do_nfs4_super_data_conv((void *) data_page))
@@ -1378,6 +1350,10 @@ static int compat_count(compat_uptr_t __user *argv, int max)
 			argv++;
 			if (i++ >= max)
 				return -E2BIG;
+
+			if (fatal_signal_pending(current))
+				return -ERESTARTNOHAND;
+			cond_resched();
 		}
 	}
 	return i;
@@ -1419,6 +1395,12 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
 		while (len > 0) {
 			int offset, bytes_to_copy;
 
+			if (fatal_signal_pending(current)) {
+				ret = -ERESTARTNOHAND;
+				goto out;
+			}
+			cond_resched();
+
 			offset = pos % PAGE_SIZE;
 			if (offset == 0)
 				offset = PAGE_SIZE;
@@ -1435,18 +1417,8 @@ static int compat_copy_strings(int argc, compat_uptr_t __user *argv,
 			if (!kmapped_page || kpos != (pos & PAGE_MASK)) {
 				struct page *page;
 
-#ifdef CONFIG_STACK_GROWSUP
-				ret = expand_stack_downwards(bprm->vma, pos);
-				if (ret < 0) {
-					/* We've exceed the stack rlimit. */
-					ret = -E2BIG;
-					goto out;
-				}
-#endif
-				ret = get_user_pages(current, bprm->mm, pos,
-						     1, 1, 1, &page, NULL);
-				if (ret <= 0) {
-					/* We've exceed the stack rlimit. */
+				page = get_arg_page(bprm, pos, 1);
+				if (!page) {
 					ret = -E2BIG;
 					goto out;
 				}
@@ -1567,8 +1539,10 @@ int compat_do_execve(char * filename,
 	return retval;
 
 out:
-	if (bprm->mm)
+	if (bprm->mm) {
+		acct_arg_size(bprm, 0);
 		mmput(bprm->mm);
+	}
 
 out_file:
 	if (bprm->file) {
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index d0ad09d5778..a60579b007b 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -19,7 +19,6 @@
 #include <linux/compiler.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/ioctl.h>
 #include <linux/if.h>
 #include <linux/if_bridge.h>
@@ -46,7 +45,6 @@
 #include <linux/videodev.h>
 #include <linux/netdevice.h>
 #include <linux/raw.h>
-#include <linux/smb_fs.h>
 #include <linux/blkdev.h>
 #include <linux/elevator.h>
 #include <linux/rtc.h>
@@ -558,25 +556,6 @@ static int mt_ioctl_trans(unsigned int fd, unsigned int cmd, void __user *argp)
 
 #endif /* CONFIG_BLOCK */
 
-static int do_smb_getmountuid(unsigned int fd, unsigned int cmd,
-			compat_uid_t __user *argp)
-{
-	mm_segment_t old_fs = get_fs();
-	__kernel_uid_t kuid;
-	int err;
-
-	cmd = SMB_IOC_GETMOUNTUID;
-
-	set_fs(KERNEL_DS);
-	err = sys_ioctl(fd, cmd, (unsigned long)&kuid);
-	set_fs(old_fs);
-
-	if (err >= 0)
-		err = put_user(kuid, argp);
-
-	return err;
-}
-
 /* Bluetooth ioctls */
 #define HCIUARTSETPROTO		_IOW('U', 200, int)
 #define HCIUARTGETPROTO		_IOR('U', 201, int)
@@ -1199,8 +1178,9 @@ COMPATIBLE_IOCTL(SOUND_MIXER_PRIVATE5)
 COMPATIBLE_IOCTL(SOUND_MIXER_GETLEVELS)
 COMPATIBLE_IOCTL(SOUND_MIXER_SETLEVELS)
 COMPATIBLE_IOCTL(OSS_GETVERSION)
-/* SMB ioctls which do not need any translations */
-COMPATIBLE_IOCTL(SMB_IOC_NEWCONN)
+/* Raw devices */
+COMPATIBLE_IOCTL(RAW_SETBIND)
+COMPATIBLE_IOCTL(RAW_GETBIND)
 /* Watchdog */
 COMPATIBLE_IOCTL(WDIOC_GETSUPPORT)
 COMPATIBLE_IOCTL(WDIOC_GETSTATUS)
@@ -1458,10 +1438,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
 	case MTIOCPOS32:
 		return mt_ioctl_trans(fd, cmd, argp);
 #endif
-	/* One SMB ioctl needs translations. */
-#define SMB_IOC_GETMOUNTUID_32 _IOR('u', 1, compat_uid_t)
-	case SMB_IOC_GETMOUNTUID_32:
-		return do_smb_getmountuid(fd, cmd, argp);
 	/* Serial */
 	case TIOCGSERIAL:
 	case TIOCSSERIAL:
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cf78d44a8d6..253476d78ed 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
 {
 	struct inode * inode = new_inode(configfs_sb);
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mapping->a_ops = &configfs_aops;
 		inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
 		inode->i_op = &configfs_inode_operations;
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index 8c8d64230c2..7d3607febe1 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -104,16 +104,16 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int configfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *configfs_do_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, configfs_fill_super, mnt);
+	return mount_single(fs_type, flags, data, configfs_fill_super);
 }
 
 static struct file_system_type configfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "configfs",
-	.get_sb		= configfs_get_sb,
+	.mount		= configfs_do_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 1e7a33028d3..32fd5fe9ca0 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -533,17 +533,16 @@ static const struct super_operations cramfs_ops = {
 	.statfs		= cramfs_statfs,
 };
 
-static int cramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *cramfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, cramfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, cramfs_fill_super);
 }
 
 static struct file_system_type cramfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "cramfs",
-	.get_sb		= cramfs_get_sb,
+	.mount		= cramfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/dcache.c b/fs/dcache.c
index 83293be4814..23702a9d4e6 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,33 +67,43 @@ struct dentry_stat_t dentry_stat = {
 	.age_limit = 45,
 };
 
-static void __d_free(struct dentry *dentry)
+static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
+static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
+
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
+		   size_t *lenp, loff_t *ppos)
+{
+	dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
+	dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
+static void __d_free(struct rcu_head *head)
 {
+	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
+
 	WARN_ON(!list_empty(&dentry->d_alias));
 	if (dname_external(dentry))
 		kfree(dentry->d_name.name);
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
-static void d_callback(struct rcu_head *head)
-{
-	struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
-	__d_free(dentry);
-}
-
 /*
- * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
- * inside dcache_lock.
+ * no dcache_lock, please.
  */
 static void d_free(struct dentry *dentry)
 {
+	percpu_counter_dec(&nr_dentry);
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
+
 	/* if dentry was never inserted into hash, immediate free is OK */
 	if (hlist_unhashed(&dentry->d_hash))
-		__d_free(dentry);
+		__d_free(&dentry->d_u.d_rcu);
 	else
-		call_rcu(&dentry->d_u.d_rcu, d_callback);
+		call_rcu(&dentry->d_u.d_rcu, __d_free);
 }
 
 /*
@@ -123,37 +133,34 @@ static void dentry_iput(struct dentry * dentry)
 }
 
 /*
- * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
  */
 static void dentry_lru_add(struct dentry *dentry)
 {
-	list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-	dentry->d_sb->s_nr_dentry_unused++;
-	dentry_stat.nr_unused++;
-}
-
-static void dentry_lru_add_tail(struct dentry *dentry)
-{
-	list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-	dentry->d_sb->s_nr_dentry_unused++;
-	dentry_stat.nr_unused++;
+	if (list_empty(&dentry->d_lru)) {
+		list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+		dentry->d_sb->s_nr_dentry_unused++;
+		percpu_counter_inc(&nr_dentry_unused);
+	}
 }
 
 static void dentry_lru_del(struct dentry *dentry)
 {
 	if (!list_empty(&dentry->d_lru)) {
-		list_del(&dentry->d_lru);
+		list_del_init(&dentry->d_lru);
 		dentry->d_sb->s_nr_dentry_unused--;
-		dentry_stat.nr_unused--;
+		percpu_counter_dec(&nr_dentry_unused);
 	}
 }
 
-static void dentry_lru_del_init(struct dentry *dentry)
+static void dentry_lru_move_tail(struct dentry *dentry)
 {
-	if (likely(!list_empty(&dentry->d_lru))) {
-		list_del_init(&dentry->d_lru);
-		dentry->d_sb->s_nr_dentry_unused--;
-		dentry_stat.nr_unused--;
+	if (list_empty(&dentry->d_lru)) {
+		list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+		dentry->d_sb->s_nr_dentry_unused++;
+		percpu_counter_inc(&nr_dentry_unused);
+	} else {
+		list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
 	}
 }
 
@@ -172,7 +179,6 @@ static struct dentry *d_kill(struct dentry *dentry)
 	struct dentry *parent;
 
 	list_del(&dentry->d_u.d_child);
-	dentry_stat.nr_dentry--;	/* For d_free, below */
 	/*drops the locks, at that point nobody can reach this dentry */
 	dentry_iput(dentry);
 	if (IS_ROOT(dentry))
@@ -237,13 +243,15 @@ repeat:
 		if (dentry->d_op->d_delete(dentry))
 			goto unhash_it;
 	}
+
 	/* Unreachable? Get rid of it */
  	if (d_unhashed(dentry))
 		goto kill_it;
-  	if (list_empty(&dentry->d_lru)) {
-  		dentry->d_flags |= DCACHE_REFERENCED;
-		dentry_lru_add(dentry);
-  	}
+
+	/* Otherwise leave it cached and ensure it's on the LRU */
+	dentry->d_flags |= DCACHE_REFERENCED;
+	dentry_lru_add(dentry);
+
  	spin_unlock(&dentry->d_lock);
 	spin_unlock(&dcache_lock);
 	return;
@@ -318,11 +326,10 @@ int d_invalidate(struct dentry * dentry)
 EXPORT_SYMBOL(d_invalidate);
 
 /* This should be called _only_ with dcache_lock held */
-
 static inline struct dentry * __dget_locked(struct dentry *dentry)
 {
 	atomic_inc(&dentry->d_count);
-	dentry_lru_del_init(dentry);
+	dentry_lru_del(dentry);
 	return dentry;
 }
 
@@ -441,73 +448,27 @@ static void prune_one_dentry(struct dentry * dentry)
 
 		if (dentry->d_op && dentry->d_op->d_delete)
 			dentry->d_op->d_delete(dentry);
-		dentry_lru_del_init(dentry);
+		dentry_lru_del(dentry);
 		__d_drop(dentry);
 		dentry = d_kill(dentry);
 		spin_lock(&dcache_lock);
 	}
 }
 
-/*
- * Shrink the dentry LRU on a given superblock.
- * @sb   : superblock to shrink dentry LRU.
- * @count: If count is NULL, we prune all dentries on superblock.
- * @flags: If flags is non-zero, we need to do special processing based on
- * which flags are set. This means we don't need to maintain multiple
- * similar copies of this loop.
- */
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void shrink_dentry_list(struct list_head *list)
 {
-	LIST_HEAD(referenced);
-	LIST_HEAD(tmp);
 	struct dentry *dentry;
-	int cnt = 0;
 
-	BUG_ON(!sb);
-	BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
-	spin_lock(&dcache_lock);
-	if (count != NULL)
-		/* called from prune_dcache() and shrink_dcache_parent() */
-		cnt = *count;
-restart:
-	if (count == NULL)
-		list_splice_init(&sb->s_dentry_lru, &tmp);
-	else {
-		while (!list_empty(&sb->s_dentry_lru)) {
-			dentry = list_entry(sb->s_dentry_lru.prev,
-					struct dentry, d_lru);
-			BUG_ON(dentry->d_sb != sb);
+	while (!list_empty(list)) {
+		dentry = list_entry(list->prev, struct dentry, d_lru);
+		dentry_lru_del(dentry);
 
-			spin_lock(&dentry->d_lock);
-			/*
-			 * If we are honouring the DCACHE_REFERENCED flag and
-			 * the dentry has this flag set, don't free it. Clear
-			 * the flag and put it back on the LRU.
-			 */
-			if ((flags & DCACHE_REFERENCED)
-				&& (dentry->d_flags & DCACHE_REFERENCED)) {
-				dentry->d_flags &= ~DCACHE_REFERENCED;
-				list_move(&dentry->d_lru, &referenced);
-				spin_unlock(&dentry->d_lock);
-			} else {
-				list_move_tail(&dentry->d_lru, &tmp);
-				spin_unlock(&dentry->d_lock);
-				cnt--;
-				if (!cnt)
-					break;
-			}
-			cond_resched_lock(&dcache_lock);
-		}
-	}
-	while (!list_empty(&tmp)) {
-		dentry = list_entry(tmp.prev, struct dentry, d_lru);
-		dentry_lru_del_init(dentry);
-		spin_lock(&dentry->d_lock);
 		/*
 		 * We found an inuse dentry which was not removed from
 		 * the LRU because of laziness during lookup.  Do not free
 		 * it - just keep it off the LRU list.
 		 */
+		spin_lock(&dentry->d_lock);
 		if (atomic_read(&dentry->d_count)) {
 			spin_unlock(&dentry->d_lock);
 			continue;
@@ -516,13 +477,60 @@ restart:
 		/* dentry->d_lock was dropped in prune_one_dentry() */
 		cond_resched_lock(&dcache_lock);
 	}
-	if (count == NULL && !list_empty(&sb->s_dentry_lru))
-		goto restart;
-	if (count != NULL)
-		*count = cnt;
+}
+
+/**
+ * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
+ * @sb:		superblock to shrink dentry LRU.
+ * @count:	number of entries to prune
+ * @flags:	flags to control the dentry processing
+ *
+ * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
+ */
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+{
+	/* called from prune_dcache() and shrink_dcache_parent() */
+	struct dentry *dentry;
+	LIST_HEAD(referenced);
+	LIST_HEAD(tmp);
+	int cnt = *count;
+
+	spin_lock(&dcache_lock);
+	while (!list_empty(&sb->s_dentry_lru)) {
+		dentry = list_entry(sb->s_dentry_lru.prev,
+				struct dentry, d_lru);
+		BUG_ON(dentry->d_sb != sb);
+
+		/*
+		 * If we are honouring the DCACHE_REFERENCED flag and the
+		 * dentry has this flag set, don't free it.  Clear the flag
+		 * and put it back on the LRU.
+		 */
+		if (flags & DCACHE_REFERENCED) {
+			spin_lock(&dentry->d_lock);
+			if (dentry->d_flags & DCACHE_REFERENCED) {
+				dentry->d_flags &= ~DCACHE_REFERENCED;
+				list_move(&dentry->d_lru, &referenced);
+				spin_unlock(&dentry->d_lock);
+				cond_resched_lock(&dcache_lock);
+				continue;
+			}
+			spin_unlock(&dentry->d_lock);
+		}
+
+		list_move_tail(&dentry->d_lru, &tmp);
+		if (!--cnt)
+			break;
+		cond_resched_lock(&dcache_lock);
+	}
+
+	*count = cnt;
+	shrink_dentry_list(&tmp);
+
 	if (!list_empty(&referenced))
 		list_splice(&referenced, &sb->s_dentry_lru);
 	spin_unlock(&dcache_lock);
+
 }
 
 /**
@@ -538,7 +546,7 @@ static void prune_dcache(int count)
 {
 	struct super_block *sb, *p = NULL;
 	int w_count;
-	int unused = dentry_stat.nr_unused;
+	int unused = percpu_counter_sum_positive(&nr_dentry_unused);
 	int prune_ratio;
 	int pruned;
 
@@ -608,13 +616,19 @@ static void prune_dcache(int count)
  * shrink_dcache_sb - shrink dcache for a superblock
  * @sb: superblock
  *
- * Shrink the dcache for the specified super block. This
- * is used to free the dcache before unmounting a file
- * system
+ * Shrink the dcache for the specified super block. This is used to free
+ * the dcache before unmounting a file system.
  */
-void shrink_dcache_sb(struct super_block * sb)
+void shrink_dcache_sb(struct super_block *sb)
 {
-	__shrink_dcache_sb(sb, NULL, 0);
+	LIST_HEAD(tmp);
+
+	spin_lock(&dcache_lock);
+	while (!list_empty(&sb->s_dentry_lru)) {
+		list_splice_init(&sb->s_dentry_lru, &tmp);
+		shrink_dentry_list(&tmp);
+	}
+	spin_unlock(&dcache_lock);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
@@ -632,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 
 	/* detach this root from the system */
 	spin_lock(&dcache_lock);
-	dentry_lru_del_init(dentry);
+	dentry_lru_del(dentry);
 	__d_drop(dentry);
 	spin_unlock(&dcache_lock);
 
@@ -646,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			spin_lock(&dcache_lock);
 			list_for_each_entry(loop, &dentry->d_subdirs,
 					    d_u.d_child) {
-				dentry_lru_del_init(loop);
+				dentry_lru_del(loop);
 				__d_drop(loop);
 				cond_resched_lock(&dcache_lock);
 			}
@@ -703,20 +717,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
 			 * otherwise we ascend to the parent and move to the
 			 * next sibling if there is one */
 			if (!parent)
-				goto out;
-
+				return;
 			dentry = parent;
-
 		} while (list_empty(&dentry->d_subdirs));
 
 		dentry = list_entry(dentry->d_subdirs.next,
 				    struct dentry, d_u.d_child);
 	}
-out:
-	/* several dentries were freed, need to correct nr_dentry */
-	spin_lock(&dcache_lock);
-	dentry_stat.nr_dentry -= detached;
-	spin_unlock(&dcache_lock);
 }
 
 /*
@@ -830,14 +837,15 @@ resume:
 		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
 		next = tmp->next;
 
-		dentry_lru_del_init(dentry);
 		/* 
 		 * move only zero ref count dentries to the end 
 		 * of the unused list for prune_dcache
 		 */
 		if (!atomic_read(&dentry->d_count)) {
-			dentry_lru_add_tail(dentry);
+			dentry_lru_move_tail(dentry);
 			found++;
+		} else {
+			dentry_lru_del(dentry);
 		}
 
 		/*
@@ -900,12 +908,16 @@ EXPORT_SYMBOL(shrink_dcache_parent);
  */
 static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 {
+	int nr_unused;
+
 	if (nr) {
 		if (!(gfp_mask & __GFP_FS))
 			return -1;
 		prune_dcache(nr);
 	}
-	return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+
+	nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+	return (nr_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker dcache_shrinker = {
@@ -972,9 +984,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	spin_lock(&dcache_lock);
 	if (parent)
 		list_add(&dentry->d_u.d_child, &parent->d_subdirs);
-	dentry_stat.nr_dentry++;
 	spin_unlock(&dcache_lock);
 
+	percpu_counter_inc(&nr_dentry);
+
 	return dentry;
 }
 EXPORT_SYMBOL(d_alloc);
@@ -1478,33 +1491,26 @@ out:
  * This is used by ncpfs in its readdir implementation.
  * Zero is returned in the dentry is invalid.
  */
- 
-int d_validate(struct dentry *dentry, struct dentry *dparent)
+int d_validate(struct dentry *dentry, struct dentry *parent)
 {
-	struct hlist_head *base;
-	struct hlist_node *lhp;
+	struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
+	struct hlist_node *node;
+	struct dentry *d;
 
 	/* Check whether the ptr might be valid at all.. */
 	if (!kmem_ptr_validate(dentry_cache, dentry))
-		goto out;
-
-	if (dentry->d_parent != dparent)
-		goto out;
+		return 0;
+	if (dentry->d_parent != parent)
+		return 0;
 
-	spin_lock(&dcache_lock);
-	base = d_hash(dparent, dentry->d_name.hash);
-	hlist_for_each(lhp,base) { 
-		/* hlist_for_each_entry_rcu() not required for d_hash list
-		 * as it is parsed under dcache_lock
-		 */
-		if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
-			__dget_locked(dentry);
-			spin_unlock(&dcache_lock);
+	rcu_read_lock();
+	hlist_for_each_entry_rcu(d, node, head, d_hash) {
+		if (d == dentry) {
+			dget(dentry);
 			return 1;
 		}
 	}
-	spin_unlock(&dcache_lock);
-out:
+	rcu_read_unlock();
 	return 0;
 }
 EXPORT_SYMBOL(d_validate);
@@ -1994,7 +2000,7 @@ global_root:
  * Returns a pointer into the buffer or an error code if the
  * path was too long.
  *
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * "buflen" should be positive.
  *
  * If path is not reachable from the supplied root, then the value of
  * root is changed (without modifying refcounts).
@@ -2006,10 +2012,12 @@ char *__d_path(const struct path *path, struct path *root,
 	int error;
 
 	prepend(&res, &buflen, "\0", 1);
+	spin_lock(&dcache_lock);
 	error = prepend_path(path, root, &res, &buflen);
+	spin_unlock(&dcache_lock);
+
 	if (error)
 		return ERR_PTR(error);
-
 	return res;
 }
 
@@ -2419,6 +2427,9 @@ static void __init dcache_init(void)
 {
 	int loop;
 
+	percpu_counter_init(&nr_dentry, 0);
+	percpu_counter_init(&nr_dentry_unused, 0);
+
 	/* 
 	 * A constructor could be added for stable state like the lists,
 	 * but it is probably not worth it because of the cache nature
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 30a87b3dbca..37a8ca7c122 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
 	struct inode *inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
 		switch (mode & S_IFMT) {
@@ -134,17 +135,17 @@ static int debug_fill_super(struct super_block *sb, void *data, int silent)
 	return simple_fill_super(sb, DEBUGFS_MAGIC, debug_files);
 }
 
-static int debug_get_sb(struct file_system_type *fs_type,
+static struct dentry *debug_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_single(fs_type, flags, data, debug_fill_super, mnt);
+	return mount_single(fs_type, flags, data, debug_fill_super);
 }
 
 static struct file_system_type debug_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"debugfs",
-	.get_sb =	debug_get_sb,
+	.mount =	debug_mount,
 	.kill_sb =	kill_litter_super,
 };
 
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 8b3ffd5b523..1bb547c9cad 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -331,7 +331,7 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
 }
 
 /*
- * devpts_get_sb()
+ * devpts_mount()
  *
  *     If the '-o newinstance' mount option was specified, mount a new
  *     (private) instance of devpts.  PTYs created in this instance are
@@ -345,20 +345,20 @@ static int compare_init_pts_sb(struct super_block *s, void *p)
  *     semantics in devpts while preserving backward compatibility of the
  *     current 'single-namespace' semantics. i.e all mounts of devpts
  *     without the 'newinstance' mount option should bind to the initial
- *     kernel mount, like get_sb_single().
+ *     kernel mount, like mount_single().
  *
  *     Mounts with 'newinstance' option create a new, private namespace.
  *
  *     NOTE:
  *
- *     For single-mount semantics, devpts cannot use get_sb_single(),
- *     because get_sb_single()/sget() find and use the super-block from
+ *     For single-mount semantics, devpts cannot use mount_single(),
+ *     because mount_single()/sget() find and use the super-block from
  *     the most recent mount of devpts. But that recent mount may be a
- *     'newinstance' mount and get_sb_single() would pick the newinstance
+ *     'newinstance' mount and mount_single() would pick the newinstance
  *     super-block instead of the initial super-block.
  */
-static int devpts_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
 	int error;
 	struct pts_mount_opts opts;
@@ -366,7 +366,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
 
 	error = parse_mount_options(data, PARSE_MOUNT, &opts);
 	if (error)
-		return error;
+		return ERR_PTR(error);
 
 	if (opts.newinstance)
 		s = sget(fs_type, NULL, set_anon_super, NULL);
@@ -374,7 +374,7 @@ static int devpts_get_sb(struct file_system_type *fs_type,
 		s = sget(fs_type, compare_init_pts_sb, set_anon_super, NULL);
 
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 
 	if (!s->s_root) {
 		s->s_flags = flags;
@@ -390,13 +390,11 @@ static int devpts_get_sb(struct file_system_type *fs_type,
 	if (error)
 		goto out_undo_sget;
 
-	simple_set_mnt(mnt, s);
-
-	return 0;
+	return dget(s->s_root);
 
 out_undo_sget:
 	deactivate_locked_super(s);
-	return error;
+	return ERR_PTR(error);
 }
 
 #else
@@ -404,10 +402,10 @@ out_undo_sget:
  * This supports only the legacy single-instance semantics (no
  * multiple-instance semantics)
  */
-static int devpts_get_sb(struct file_system_type *fs_type, int flags,
-		const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *devpts_mount(struct file_system_type *fs_type, int flags,
+		const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, devpts_fill_super, mnt);
+	return mount_single(fs_type, flags, data, devpts_fill_super);
 }
 #endif
 
@@ -421,7 +419,7 @@ static void devpts_kill_sb(struct super_block *sb)
 
 static struct file_system_type devpts_fs_type = {
 	.name		= "devpts",
-	.get_sb		= devpts_get_sb,
+	.mount		= devpts_mount,
 	.kill_sb	= devpts_kill_sb,
 };
 
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 48d74c7391d..85882f6ba5f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -218,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
  * filesystems can use it to hold additional state between get_block calls and
  * dio_complete.
  */
-static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
+static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async)
 {
 	ssize_t transferred = 0;
 
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 40186b95942..413a3c48f0b 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -377,6 +377,7 @@ struct ecryptfs_mount_crypt_stat {
 #define ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES      0x00000010
 #define ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK   0x00000020
 #define ECRYPTFS_GLOBAL_ENCFN_USE_FEK          0x00000040
+#define ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY    0x00000080
 	u32 flags;
 	struct list_head global_auth_tok_list;
 	struct mutex global_auth_tok_list_mutex;
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 3fbc9420338..9d1a22d6276 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -32,6 +32,7 @@
 #include <linux/crypto.h>
 #include <linux/fs_stack.h>
 #include <linux/slab.h>
+#include <linux/xattr.h>
 #include <asm/unaligned.h>
 #include "ecryptfs_kernel.h"
 
@@ -70,15 +71,19 @@ ecryptfs_create_underlying_file(struct inode *lower_dir_inode,
 	struct vfsmount *lower_mnt = ecryptfs_dentry_to_lower_mnt(dentry);
 	struct dentry *dentry_save;
 	struct vfsmount *vfsmount_save;
+	unsigned int flags_save;
 	int rc;
 
 	dentry_save = nd->path.dentry;
 	vfsmount_save = nd->path.mnt;
+	flags_save = nd->flags;
 	nd->path.dentry = lower_dentry;
 	nd->path.mnt = lower_mnt;
+	nd->flags &= ~LOOKUP_OPEN;
 	rc = vfs_create(lower_dir_inode, lower_dentry, mode, nd);
 	nd->path.dentry = dentry_save;
 	nd->path.mnt = vfsmount_save;
+	nd->flags = flags_save;
 	return rc;
 }
 
@@ -1108,10 +1113,8 @@ ecryptfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		rc = -EOPNOTSUPP;
 		goto out;
 	}
-	mutex_lock(&lower_dentry->d_inode->i_mutex);
-	rc = lower_dentry->d_inode->i_op->setxattr(lower_dentry, name, value,
-						   size, flags);
-	mutex_unlock(&lower_dentry->d_inode->i_mutex);
+
+	rc = vfs_setxattr(lower_dentry, name, value, size, flags);
 out:
 	return rc;
 }
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 73811cfa2ea..b1f6858a522 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -446,6 +446,7 @@ out:
  */
 static int
 ecryptfs_find_auth_tok_for_sig(
+	struct key **auth_tok_key,
 	struct ecryptfs_auth_tok **auth_tok,
 	struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
 	char *sig)
@@ -453,12 +454,21 @@ ecryptfs_find_auth_tok_for_sig(
 	struct ecryptfs_global_auth_tok *global_auth_tok;
 	int rc = 0;
 
+	(*auth_tok_key) = NULL;
 	(*auth_tok) = NULL;
 	if (ecryptfs_find_global_auth_tok_for_sig(&global_auth_tok,
 						  mount_crypt_stat, sig)) {
-		struct key *auth_tok_key;
 
-		rc = ecryptfs_keyring_auth_tok_for_sig(&auth_tok_key, auth_tok,
+		/* if the flag ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY is set in the
+		 * mount_crypt_stat structure, we prevent to use auth toks that
+		 * are not inserted through the ecryptfs_add_global_auth_tok
+		 * function.
+		 */
+		if (mount_crypt_stat->flags
+				& ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+			return -EINVAL;
+
+		rc = ecryptfs_keyring_auth_tok_for_sig(auth_tok_key, auth_tok,
 						       sig);
 	} else
 		(*auth_tok) = global_auth_tok->global_auth_tok;
@@ -509,6 +519,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
 			     char *filename, size_t filename_size)
 {
 	struct ecryptfs_write_tag_70_packet_silly_stack *s;
+	struct key *auth_tok_key = NULL;
 	int rc = 0;
 
 	s = kmalloc(sizeof(*s), GFP_KERNEL);
@@ -606,6 +617,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
 	}
 	dest[s->i++] = s->cipher_code;
 	rc = ecryptfs_find_auth_tok_for_sig(
+		&auth_tok_key,
 		&s->auth_tok, mount_crypt_stat,
 		mount_crypt_stat->global_default_fnek_sig);
 	if (rc) {
@@ -753,6 +765,8 @@ out_free_unlock:
 out_unlock:
 	mutex_unlock(s->tfm_mutex);
 out:
+	if (auth_tok_key)
+		key_put(auth_tok_key);
 	kfree(s);
 	return rc;
 }
@@ -798,6 +812,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
 			     char *data, size_t max_packet_size)
 {
 	struct ecryptfs_parse_tag_70_packet_silly_stack *s;
+	struct key *auth_tok_key = NULL;
 	int rc = 0;
 
 	(*packet_size) = 0;
@@ -910,7 +925,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
 	 * >= ECRYPTFS_MAX_IV_BYTES. */
 	memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
 	s->desc.info = s->iv;
-	rc = ecryptfs_find_auth_tok_for_sig(&s->auth_tok, mount_crypt_stat,
+	rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+					    &s->auth_tok, mount_crypt_stat,
 					    s->fnek_sig_hex);
 	if (rc) {
 		printk(KERN_ERR "%s: Error attempting to find auth tok for "
@@ -986,6 +1002,8 @@ out:
 		(*filename_size) = 0;
 		(*filename) = NULL;
 	}
+	if (auth_tok_key)
+		key_put(auth_tok_key);
 	kfree(s);
 	return rc;
 }
@@ -1557,14 +1575,19 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
 		       ECRYPTFS_VERSION_MAJOR,
 		       ECRYPTFS_VERSION_MINOR);
 		rc = -EINVAL;
-		goto out;
+		goto out_release_key;
 	}
 	if ((*auth_tok)->token_type != ECRYPTFS_PASSWORD
 	    && (*auth_tok)->token_type != ECRYPTFS_PRIVATE_KEY) {
 		printk(KERN_ERR "Invalid auth_tok structure "
 		       "returned from key query\n");
 		rc = -EINVAL;
-		goto out;
+		goto out_release_key;
+	}
+out_release_key:
+	if (rc) {
+		key_put(*auth_tok_key);
+		(*auth_tok_key) = NULL;
 	}
 out:
 	return rc;
@@ -1688,6 +1711,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 	struct ecryptfs_auth_tok_list_item *auth_tok_list_item;
 	size_t tag_11_contents_size;
 	size_t tag_11_packet_size;
+	struct key *auth_tok_key = NULL;
 	int rc = 0;
 
 	INIT_LIST_HEAD(&auth_tok_list);
@@ -1784,6 +1808,10 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
 	 * just one will be sufficient to decrypt to get the FEK. */
 find_next_matching_auth_tok:
 	found_auth_tok = 0;
+	if (auth_tok_key) {
+		key_put(auth_tok_key);
+		auth_tok_key = NULL;
+	}
 	list_for_each_entry(auth_tok_list_item, &auth_tok_list, list) {
 		candidate_auth_tok = &auth_tok_list_item->auth_tok;
 		if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1800,10 +1828,11 @@ find_next_matching_auth_tok:
 			rc = -EINVAL;
 			goto out_wipe_list;
 		}
-		ecryptfs_find_auth_tok_for_sig(&matching_auth_tok,
+		rc = ecryptfs_find_auth_tok_for_sig(&auth_tok_key,
+					       &matching_auth_tok,
 					       crypt_stat->mount_crypt_stat,
 					       candidate_auth_tok_sig);
-		if (matching_auth_tok) {
+		if (!rc) {
 			found_auth_tok = 1;
 			goto found_matching_auth_tok;
 		}
@@ -1866,6 +1895,8 @@ found_matching_auth_tok:
 out_wipe_list:
 	wipe_auth_tok_list(&auth_tok_list);
 out:
+	if (auth_tok_key)
+		key_put(auth_tok_key);
 	return rc;
 }
 
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index cbd4e18adb2..a9dbd62518e 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -208,7 +208,8 @@ enum { ecryptfs_opt_sig, ecryptfs_opt_ecryptfs_sig,
        ecryptfs_opt_passthrough, ecryptfs_opt_xattr_metadata,
        ecryptfs_opt_encrypted_view, ecryptfs_opt_fnek_sig,
        ecryptfs_opt_fn_cipher, ecryptfs_opt_fn_cipher_key_bytes,
-       ecryptfs_opt_unlink_sigs, ecryptfs_opt_err };
+       ecryptfs_opt_unlink_sigs, ecryptfs_opt_mount_auth_tok_only,
+       ecryptfs_opt_err };
 
 static const match_table_t tokens = {
 	{ecryptfs_opt_sig, "sig=%s"},
@@ -223,6 +224,7 @@ static const match_table_t tokens = {
 	{ecryptfs_opt_fn_cipher, "ecryptfs_fn_cipher=%s"},
 	{ecryptfs_opt_fn_cipher_key_bytes, "ecryptfs_fn_key_bytes=%u"},
 	{ecryptfs_opt_unlink_sigs, "ecryptfs_unlink_sigs"},
+	{ecryptfs_opt_mount_auth_tok_only, "ecryptfs_mount_auth_tok_only"},
 	{ecryptfs_opt_err, NULL}
 };
 
@@ -406,6 +408,10 @@ static int ecryptfs_parse_options(struct ecryptfs_sb_info *sbi, char *options)
 		case ecryptfs_opt_unlink_sigs:
 			mount_crypt_stat->flags |= ECRYPTFS_UNLINK_SIGS;
 			break;
+		case ecryptfs_opt_mount_auth_tok_only:
+			mount_crypt_stat->flags |=
+				ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY;
+			break;
 		case ecryptfs_opt_err:
 		default:
 			printk(KERN_WARNING
@@ -540,9 +546,8 @@ out:
  *                        ecryptfs_interpose to perform most of the linking
  * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
  */
-static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
-			const char *dev_name, void *raw_data,
-			struct vfsmount *mnt)
+static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags,
+			const char *dev_name, void *raw_data)
 {
 	struct super_block *s;
 	struct ecryptfs_sb_info *sbi;
@@ -607,8 +612,7 @@ static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
 		err = "Reading sb failed";
 		goto out;
 	}
-	simple_set_mnt(mnt, s);
-	return 0;
+	return dget(s->s_root);
 
 out:
 	if (sbi) {
@@ -616,7 +620,7 @@ out:
 		kmem_cache_free(ecryptfs_sb_info_cache, sbi);
 	}
 	printk(KERN_ERR "%s; rc = [%d]\n", err, rc);
-	return rc;
+	return ERR_PTR(rc);
 }
 
 /**
@@ -639,7 +643,7 @@ static void ecryptfs_kill_block_super(struct super_block *sb)
 static struct file_system_type ecryptfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "ecryptfs",
-	.get_sb = ecryptfs_get_sb,
+	.mount = ecryptfs_mount,
 	.kill_sb = ecryptfs_kill_block_super,
 	.fs_flags = 0
 };
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index f7fc286a3aa..2720178b771 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -28,7 +28,6 @@
 #include <linux/key.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
-#include <linux/smp_lock.h>
 #include <linux/file.h>
 #include <linux/crypto.h>
 #include "ecryptfs_kernel.h"
@@ -180,6 +179,8 @@ static int ecryptfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 		seq_printf(m, ",ecryptfs_encrypted_view");
 	if (mount_crypt_stat->flags & ECRYPTFS_UNLINK_SIGS)
 		seq_printf(m, ",ecryptfs_unlink_sigs");
+	if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_MOUNT_AUTH_TOK_ONLY)
+		seq_printf(m, ",ecryptfs_mount_auth_tok_only");
 
 	return 0;
 }
diff --git a/fs/efs/super.c b/fs/efs/super.c
index f0494281081..5073a07652c 100644
--- a/fs/efs/super.c
+++ b/fs/efs/super.c
@@ -20,16 +20,16 @@
 static int efs_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int efs_fill_super(struct super_block *s, void *d, int silent);
 
-static int efs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *efs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, efs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, efs_fill_super);
 }
 
 static struct file_system_type efs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "efs",
-	.get_sb		= efs_get_sb,
+	.mount		= efs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 256bb7bb102..8cf07242067 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -77,9 +77,6 @@
 /* Maximum number of nesting allowed inside epoll sets */
 #define EP_MAX_NESTS 4
 
-/* Maximum msec timeout value storeable in a long int */
-#define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
-
 #define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
 
 #define EP_UNACTIVE_PTR ((void *) -1L)
@@ -1117,18 +1114,22 @@ static int ep_send_events(struct eventpoll *ep,
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
 		   int maxevents, long timeout)
 {
-	int res, eavail;
+	int res, eavail, timed_out = 0;
 	unsigned long flags;
-	long jtimeout;
+	long slack;
 	wait_queue_t wait;
-
-	/*
-	 * Calculate the timeout by checking for the "infinite" value (-1)
-	 * and the overflow condition. The passed timeout is in milliseconds,
-	 * that why (t * HZ) / 1000.
-	 */
-	jtimeout = (timeout < 0 || timeout >= EP_MAX_MSTIMEO) ?
-		MAX_SCHEDULE_TIMEOUT : (timeout * HZ + 999) / 1000;
+	struct timespec end_time;
+	ktime_t expires, *to = NULL;
+
+	if (timeout > 0) {
+		ktime_get_ts(&end_time);
+		timespec_add_ns(&end_time, (u64)timeout * NSEC_PER_MSEC);
+		slack = select_estimate_accuracy(&end_time);
+		to = &expires;
+		*to = timespec_to_ktime(end_time);
+	} else if (timeout == 0) {
+		timed_out = 1;
+	}
 
 retry:
 	spin_lock_irqsave(&ep->lock, flags);
@@ -1150,7 +1151,7 @@ retry:
 			 * to TASK_INTERRUPTIBLE before doing the checks.
 			 */
 			set_current_state(TASK_INTERRUPTIBLE);
-			if (!list_empty(&ep->rdllist) || !jtimeout)
+			if (!list_empty(&ep->rdllist) || timed_out)
 				break;
 			if (signal_pending(current)) {
 				res = -EINTR;
@@ -1158,7 +1159,9 @@ retry:
 			}
 
 			spin_unlock_irqrestore(&ep->lock, flags);
-			jtimeout = schedule_timeout(jtimeout);
+			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
+				timed_out = 1;
+
 			spin_lock_irqsave(&ep->lock, flags);
 		}
 		__remove_wait_queue(&ep->wq, &wait);
@@ -1176,7 +1179,7 @@ retry:
 	 * more luck.
 	 */
 	if (!res && eavail &&
-	    !(res = ep_send_events(ep, events, maxevents)) && jtimeout)
+	    !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
 		goto retry;
 
 	return res;
diff --git a/fs/exec.c b/fs/exec.c
index 6d2b6f93685..c62efcb959c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -54,6 +54,7 @@
 #include <linux/fsnotify.h>
 #include <linux/fs_struct.h>
 #include <linux/pipe_fs_i.h>
+#include <linux/oom.h>
 
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
@@ -65,6 +66,12 @@ char core_pattern[CORENAME_MAX_SIZE] = "core";
 unsigned int core_pipe_limit;
 int suid_dumpable = 0;
 
+struct core_name {
+	char *corename;
+	int used, size;
+};
+static atomic_t call_count = ATOMIC_INIT(1);
+
 /* The maximal length of core_pattern is also specified in sysctl.c */
 
 static LIST_HEAD(formats);
@@ -157,7 +164,26 @@ out:
 
 #ifdef CONFIG_MMU
 
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+	struct mm_struct *mm = current->mm;
+	long diff = (long)(pages - bprm->vma_pages);
+
+	if (!mm || !diff)
+		return;
+
+	bprm->vma_pages = pages;
+
+#ifdef SPLIT_RSS_COUNTING
+	add_mm_counter(mm, MM_ANONPAGES, diff);
+#else
+	spin_lock(&mm->page_table_lock);
+	add_mm_counter(mm, MM_ANONPAGES, diff);
+	spin_unlock(&mm->page_table_lock);
+#endif
+}
+
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
@@ -179,6 +205,8 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		unsigned long size = bprm->vma->vm_end - bprm->vma->vm_start;
 		struct rlimit *rlim;
 
+		acct_arg_size(bprm, size / PAGE_SIZE);
+
 		/*
 		 * We've historically supported up to 32 pages (ARG_MAX)
 		 * of argument strings even with small stacks
@@ -247,6 +275,11 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
+
+	err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
+	if (err)
+		goto err;
+
 	err = insert_vm_struct(mm, vma);
 	if (err)
 		goto err;
@@ -269,7 +302,11 @@ static bool valid_arg_len(struct linux_binprm *bprm, long len)
 
 #else
 
-static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
+void acct_arg_size(struct linux_binprm *bprm, unsigned long pages)
+{
+}
+
+struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
 		int write)
 {
 	struct page *page;
@@ -759,6 +796,10 @@ static int exec_mmap(struct mm_struct *mm)
 	tsk->mm = mm;
 	tsk->active_mm = mm;
 	activate_mm(active_mm, mm);
+	if (old_mm && tsk->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
+		atomic_dec(&old_mm->oom_disable_count);
+		atomic_inc(&tsk->mm->oom_disable_count);
+	}
 	task_unlock(tsk);
 	arch_pick_mmap_layout(mm);
 	if (old_mm) {
@@ -992,13 +1033,14 @@ int flush_old_exec(struct linux_binprm * bprm)
 	/*
 	 * Release all of the old mmap stuff
 	 */
+	acct_arg_size(bprm, 0);
 	retval = exec_mmap(bprm->mm);
 	if (retval)
 		goto out;
 
 	bprm->mm = NULL;		/* We're using it now */
 
-	current->flags &= ~PF_RANDOMIZE;
+	current->flags &= ~(PF_RANDOMIZE | PF_KTHREAD);
 	flush_thread();
 	current->personality &= ~bprm->per_clear;
 
@@ -1078,14 +1120,14 @@ EXPORT_SYMBOL(setup_new_exec);
  */
 int prepare_bprm_creds(struct linux_binprm *bprm)
 {
-	if (mutex_lock_interruptible(&current->cred_guard_mutex))
+	if (mutex_lock_interruptible(&current->signal->cred_guard_mutex))
 		return -ERESTARTNOINTR;
 
 	bprm->cred = prepare_exec_creds();
 	if (likely(bprm->cred))
 		return 0;
 
-	mutex_unlock(&current->cred_guard_mutex);
+	mutex_unlock(&current->signal->cred_guard_mutex);
 	return -ENOMEM;
 }
 
@@ -1093,7 +1135,7 @@ void free_bprm(struct linux_binprm *bprm)
 {
 	free_arg_pages(bprm);
 	if (bprm->cred) {
-		mutex_unlock(&current->cred_guard_mutex);
+		mutex_unlock(&current->signal->cred_guard_mutex);
 		abort_creds(bprm->cred);
 	}
 	kfree(bprm);
@@ -1114,13 +1156,13 @@ void install_exec_creds(struct linux_binprm *bprm)
 	 * credentials; any time after this it may be unlocked.
 	 */
 	security_bprm_committed_creds(bprm);
-	mutex_unlock(&current->cred_guard_mutex);
+	mutex_unlock(&current->signal->cred_guard_mutex);
 }
 EXPORT_SYMBOL(install_exec_creds);
 
 /*
  * determine how safe it is to execute the proposed program
- * - the caller must hold current->cred_guard_mutex to protect against
+ * - the caller must hold ->cred_guard_mutex to protect against
  *   PTRACE_ATTACH
  */
 int check_unsafe_exec(struct linux_binprm *bprm)
@@ -1401,7 +1443,6 @@ int do_execve(const char * filename,
 	if (retval < 0)
 		goto out;
 
-	current->flags &= ~PF_KTHREAD;
 	retval = search_binary_handler(bprm,regs);
 	if (retval < 0)
 		goto out;
@@ -1416,8 +1457,10 @@ int do_execve(const char * filename,
 	return retval;
 
 out:
-	if (bprm->mm)
-		mmput (bprm->mm);
+	if (bprm->mm) {
+		acct_arg_size(bprm, 0);
+		mmput(bprm->mm);
+	}
 
 out_file:
 	if (bprm->file) {
@@ -1454,127 +1497,148 @@ void set_binfmt(struct linux_binfmt *new)
 
 EXPORT_SYMBOL(set_binfmt);
 
+static int expand_corename(struct core_name *cn)
+{
+	char *old_corename = cn->corename;
+
+	cn->size = CORENAME_MAX_SIZE * atomic_inc_return(&call_count);
+	cn->corename = krealloc(old_corename, cn->size, GFP_KERNEL);
+
+	if (!cn->corename) {
+		kfree(old_corename);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static int cn_printf(struct core_name *cn, const char *fmt, ...)
+{
+	char *cur;
+	int need;
+	int ret;
+	va_list arg;
+
+	va_start(arg, fmt);
+	need = vsnprintf(NULL, 0, fmt, arg);
+	va_end(arg);
+
+	if (likely(need < cn->size - cn->used - 1))
+		goto out_printf;
+
+	ret = expand_corename(cn);
+	if (ret)
+		goto expand_fail;
+
+out_printf:
+	cur = cn->corename + cn->used;
+	va_start(arg, fmt);
+	vsnprintf(cur, need + 1, fmt, arg);
+	va_end(arg);
+	cn->used += need;
+	return 0;
+
+expand_fail:
+	return ret;
+}
+
 /* format_corename will inspect the pattern parameter, and output a
  * name into corename, which must have space for at least
  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  */
-static int format_corename(char *corename, long signr)
+static int format_corename(struct core_name *cn, long signr)
 {
 	const struct cred *cred = current_cred();
 	const char *pat_ptr = core_pattern;
 	int ispipe = (*pat_ptr == '|');
-	char *out_ptr = corename;
-	char *const out_end = corename + CORENAME_MAX_SIZE;
-	int rc;
 	int pid_in_pattern = 0;
+	int err = 0;
+
+	cn->size = CORENAME_MAX_SIZE * atomic_read(&call_count);
+	cn->corename = kmalloc(cn->size, GFP_KERNEL);
+	cn->used = 0;
+
+	if (!cn->corename)
+		return -ENOMEM;
 
 	/* Repeat as long as we have more pattern to process and more output
 	   space */
 	while (*pat_ptr) {
 		if (*pat_ptr != '%') {
-			if (out_ptr == out_end)
+			if (*pat_ptr == 0)
 				goto out;
-			*out_ptr++ = *pat_ptr++;
+			err = cn_printf(cn, "%c", *pat_ptr++);
 		} else {
 			switch (*++pat_ptr) {
+			/* single % at the end, drop that */
 			case 0:
 				goto out;
 			/* Double percent, output one percent */
 			case '%':
-				if (out_ptr == out_end)
-					goto out;
-				*out_ptr++ = '%';
+				err = cn_printf(cn, "%c", '%');
 				break;
 			/* pid */
 			case 'p':
 				pid_in_pattern = 1;
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", task_tgid_vnr(current));
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
+				err = cn_printf(cn, "%d",
+					      task_tgid_vnr(current));
 				break;
 			/* uid */
 			case 'u':
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", cred->uid);
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
+				err = cn_printf(cn, "%d", cred->uid);
 				break;
 			/* gid */
 			case 'g':
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%d", cred->gid);
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
+				err = cn_printf(cn, "%d", cred->gid);
 				break;
 			/* signal that caused the coredump */
 			case 's':
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%ld", signr);
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
+				err = cn_printf(cn, "%ld", signr);
 				break;
 			/* UNIX time of coredump */
 			case 't': {
 				struct timeval tv;
 				do_gettimeofday(&tv);
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%lu", tv.tv_sec);
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
+				err = cn_printf(cn, "%lu", tv.tv_sec);
 				break;
 			}
 			/* hostname */
 			case 'h':
 				down_read(&uts_sem);
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%s", utsname()->nodename);
+				err = cn_printf(cn, "%s",
+					      utsname()->nodename);
 				up_read(&uts_sem);
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
 				break;
 			/* executable */
 			case 'e':
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%s", current->comm);
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
+				err = cn_printf(cn, "%s", current->comm);
 				break;
 			/* core limit size */
 			case 'c':
-				rc = snprintf(out_ptr, out_end - out_ptr,
-					      "%lu", rlimit(RLIMIT_CORE));
-				if (rc > out_end - out_ptr)
-					goto out;
-				out_ptr += rc;
+				err = cn_printf(cn, "%lu",
+					      rlimit(RLIMIT_CORE));
 				break;
 			default:
 				break;
 			}
 			++pat_ptr;
 		}
+
+		if (err)
+			return err;
 	}
+
 	/* Backward compatibility with core_uses_pid:
 	 *
 	 * If core_pattern does not include a %p (as is the default)
 	 * and core_uses_pid is set, then .%pid will be appended to
 	 * the filename. Do not do this for piped commands. */
 	if (!ispipe && !pid_in_pattern && core_uses_pid) {
-		rc = snprintf(out_ptr, out_end - out_ptr,
-			      ".%d", task_tgid_vnr(current));
-		if (rc > out_end - out_ptr)
-			goto out;
-		out_ptr += rc;
+		err = cn_printf(cn, ".%d", task_tgid_vnr(current));
+		if (err)
+			return err;
 	}
 out:
-	*out_ptr = 0;
 	return ispipe;
 }
 
@@ -1851,7 +1915,7 @@ static int umh_pipe_setup(struct subprocess_info *info)
 void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 {
 	struct core_state core_state;
-	char corename[CORENAME_MAX_SIZE + 1];
+	struct core_name cn;
 	struct mm_struct *mm = current->mm;
 	struct linux_binfmt * binfmt;
 	const struct cred *old_cred;
@@ -1906,7 +1970,13 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 	 */
 	clear_thread_flag(TIF_SIGPENDING);
 
-	ispipe = format_corename(corename, signr);
+	ispipe = format_corename(&cn, signr);
+
+	if (ispipe == -ENOMEM) {
+		printk(KERN_WARNING "format_corename failed\n");
+		printk(KERN_WARNING "Aborting core\n");
+		goto fail_corename;
+	}
 
  	if (ispipe) {
 		int dump_count;
@@ -1943,7 +2013,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 			goto fail_dropcount;
 		}
 
-		helper_argv = argv_split(GFP_KERNEL, corename+1, NULL);
+		helper_argv = argv_split(GFP_KERNEL, cn.corename+1, NULL);
 		if (!helper_argv) {
 			printk(KERN_WARNING "%s failed to allocate memory\n",
 			       __func__);
@@ -1956,7 +2026,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		argv_free(helper_argv);
 		if (retval) {
  			printk(KERN_INFO "Core dump to %s pipe failed\n",
-			       corename);
+			       cn.corename);
 			goto close_fail;
  		}
 	} else {
@@ -1965,7 +2035,7 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
 		if (cprm.limit < binfmt->min_coredump)
 			goto fail_unlock;
 
-		cprm.file = filp_open(corename,
+		cprm.file = filp_open(cn.corename,
 				 O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
 				 0600);
 		if (IS_ERR(cprm.file))
@@ -2007,6 +2077,8 @@ fail_dropcount:
 	if (ispipe)
 		atomic_dec(&core_dump_count);
 fail_unlock:
+	kfree(cn.corename);
+fail_corename:
 	coredump_finish(mm);
 	revert_creds(old_cred);
 fail_creds:
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 68cb23e3bb9..b905c79b4f0 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -46,10 +46,6 @@ static int exofs_file_fsync(struct file *filp, int datasync)
 {
 	int ret;
 	struct inode *inode = filp->f_mapping->host;
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0, /* metadata-only; caller takes care of data */
-	};
 	struct super_block *sb;
 
 	if (!(inode->i_state & I_DIRTY))
@@ -57,7 +53,7 @@ static int exofs_file_fsync(struct file *filp, int datasync)
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		return 0;
 
-	ret = sync_inode(inode, &wbc);
+	ret = sync_inode_metadata(inode, 1);
 
 	/* This is a good place to write the sb */
 	/* TODO: Sechedule an sb-sync on create */
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index b7dd0c23686..264e95d0283 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
 
 	inode->i_ctime = CURRENT_TIME;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	return exofs_add_nondir(dentry, inode);
 }
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index 047e92fa3af..79c3ae6e045 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -659,19 +659,19 @@ free_bdi:
 /*
  * Set up the superblock (calls exofs_fill_super eventually)
  */
-static int exofs_get_sb(struct file_system_type *type,
+static struct dentry *exofs_mount(struct file_system_type *type,
 			  int flags, const char *dev_name,
-			  void *data, struct vfsmount *mnt)
+			  void *data)
 {
 	struct exofs_mountopt opts;
 	int ret;
 
 	ret = parse_options(data, &opts);
 	if (ret)
-		return ret;
+		return ERR_PTR(ret);
 
 	opts.dev_name = dev_name;
-	return get_sb_nodev(type, flags, &opts, exofs_fill_super, mnt);
+	return mount_nodev(type, flags, &opts, exofs_fill_super);
 }
 
 /*
@@ -809,7 +809,7 @@ static const struct export_operations exofs_export_ops = {
 static struct file_system_type exofs_type = {
 	.owner          = THIS_MODULE,
 	.name           = "exofs",
-	.get_sb         = exofs_get_sb,
+	.mount          = exofs_mount,
 	.kill_sb        = generic_shutdown_super,
 };
 
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index e9e175949a6..51b304056f1 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -74,21 +74,20 @@ static struct dentry *
 find_disconnected_root(struct dentry *dentry)
 {
 	dget(dentry);
-	spin_lock(&dentry->d_lock);
-	while (!IS_ROOT(dentry) &&
-	       (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) {
-		struct dentry *parent = dentry->d_parent;
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
+	while (!IS_ROOT(dentry)) {
+		struct dentry *parent = dget_parent(dentry);
+
+		if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
+			dput(parent);
+			break;
+		}
+
 		dput(dentry);
 		dentry = parent;
-		spin_lock(&dentry->d_lock);
 	}
-	spin_unlock(&dentry->d_lock);
 	return dentry;
 }
 
-
 /*
  * Make sure target_dir is fully connected to the dentry tree.
  *
diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c
index c6c684b44ea..0d06f4e7569 100644
--- a/fs/ext2/balloc.c
+++ b/fs/ext2/balloc.c
@@ -646,10 +646,9 @@ find_next_usable_block(int start, struct buffer_head *bh, int maxblocks)
 	return here;
 }
 
-/*
+/**
  * ext2_try_to_allocate()
  * @sb:			superblock
- * @handle:		handle to this transaction
  * @group:		given allocation block group
  * @bitmap_bh:		bufferhead holds the block bitmap
  * @grp_goal:		given target block within the group
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 764109886ec..2709b34206a 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
 	if (IS_DIRSYNC(dir)) {
 		err = write_one_page(page, 1);
 		if (!err)
-			err = ext2_sync_inode(dir);
+			err = sync_inode_metadata(dir, 1);
 	} else {
 		unlock_page(page);
 	}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 416daa62242..6346a2acf32 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -120,7 +120,6 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
 extern struct inode *ext2_iget (struct super_block *, unsigned long);
 extern int ext2_write_inode (struct inode *, struct writeback_control *);
 extern void ext2_evict_inode(struct inode *);
-extern int ext2_sync_inode (struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
 extern int ext2_setattr (struct dentry *, struct iattr *);
 extern void ext2_set_inode_flags(struct inode *inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 533699c1604..40ad210a504 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
 	if (inode_needs_sync(inode)) {
 		sync_mapping_buffers(inode->i_mapping);
-		ext2_sync_inode (inode);
+		sync_inode_metadata(inode, 1);
 	} else {
 		mark_inode_dirty(inode);
 	}
@@ -1523,15 +1523,6 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
 	return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 }
 
-int ext2_sync_inode(struct inode *inode)
-{
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0,	/* sys_fsync did this */
-	};
-	return sync_inode(inode, &wbc);
-}
-
 int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 {
 	struct inode *inode = dentry->d_inode;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 71efb0e9a3f..f8aecd2e329 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = ext2_add_link(dentry, inode);
 	if (!err) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 85df87d0f7b..d89e0b6a2d7 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1221,9 +1221,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 	}
 
 	es = sbi->s_es;
-	if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
-	    (old_mount_opt & EXT2_MOUNT_XIP)) &&
-	    invalidate_inodes(sb)) {
+	if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) {
 		ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
 			 "xip flag with busy inodes while remounting");
 		sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
@@ -1358,10 +1356,10 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	return 0;
 }
 
-static int ext2_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext2_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext2_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ext2_fill_super);
 }
 
 #ifdef CONFIG_QUOTA
@@ -1475,7 +1473,7 @@ out:
 static struct file_system_type ext2_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext2",
-	.get_sb		= ext2_get_sb,
+	.mount		= ext2_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 8c29ae15129..f84700be327 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
 	EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
 	inode->i_ctime = CURRENT_TIME_SEC;
 	if (IS_SYNC(inode)) {
-		error = ext2_sync_inode (inode);
+		error = sync_inode_metadata(inode, 1);
 		/* In case sync failed due to ENOSPC the inode was actually
 		 * written (only some dirty data were not) so we just proceed
 		 * as if nothing happened and cleanup the unused block */
diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c
index 4a32511f4de..b3db2264942 100644
--- a/fs/ext3/balloc.c
+++ b/fs/ext3/balloc.c
@@ -792,9 +792,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 	if (here < 0)
 		here = 0;
 
-	p = ((char *)bh->b_data) + (here >> 3);
+	p = bh->b_data + (here >> 3);
 	r = memscan(p, 0, ((maxblocks + 7) >> 3) - (here >> 3));
-	next = (r - ((char *)bh->b_data)) << 3;
+	next = (r - bh->b_data) << 3;
 
 	if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
 		return next;
@@ -810,8 +810,9 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 
 /**
  * claim_block()
+ * @lock:		the spin lock for this block group
  * @block:		the free block (group relative) to allocate
- * @bh:			the bufferhead containts the block group bitmap
+ * @bh:			the buffer_head contains the block group bitmap
  *
  * We think we can allocate this block in this bitmap.  Try to set the bit.
  * If that succeeds then check that nobody has allocated and then freed the
@@ -956,9 +957,11 @@ fail_access:
  *		but we will shift to the place where start_block is,
  *		then start from there, when looking for a reservable space.
  *
- *	@size: the target new reservation window size
+ *	@my_rsv: the reservation window
  *
- *	@group_first_block: the first block we consider to start
+ *	@sb: the super block
+ *
+ *	@start_block: the first block we consider to start
  *			the real search from
  *
  *	@last_block:
@@ -1084,7 +1087,7 @@ static int find_next_reservable_window(
  *
  *	failed: we failed to find a reservation window in this group
  *
- *	@rsv: the reservation
+ *	@my_rsv: the reservation window
  *
  *	@grp_goal: The goal (group-relative).  It is where the search for a
  *		free reservable space should start from.
@@ -1273,8 +1276,8 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
  * @group:		given allocation block group
  * @bitmap_bh:		bufferhead holds the block bitmap
  * @grp_goal:		given target block within the group
- * @count:		target number of blocks to allocate
  * @my_rsv:		reservation window
+ * @count:		target number of blocks to allocate
  * @errp:		pointer to store the error code
  *
  * This is the main function used to allocate a new block and its reservation
diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c
index 4ab72db3559..9724aef2246 100644
--- a/fs/ext3/ialloc.c
+++ b/fs/ext3/ialloc.c
@@ -570,9 +570,14 @@ got:
 	ei->i_state_flags = 0;
 	ext3_set_inode_state(inode, EXT3_STATE_NEW);
 
-	ei->i_extra_isize =
-		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
-		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
+	/* See comment in ext3_iget for explanation */
+	if (ino >= EXT3_FIRST_INO(sb) + 1 &&
+	    EXT3_INODE_SIZE(sb) > EXT3_GOOD_OLD_INODE_SIZE) {
+		ei->i_extra_isize =
+			sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE;
+	} else {
+		ei->i_extra_isize = 0;
+	}
 
 	ret = inode;
 	dquot_initialize(inode);
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 5e0faf4cda7..a9580617edd 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -498,7 +498,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
 }
 
 /**
- *	ext3_blks_to_allocate: Look up the block map and count the number
+ *	ext3_blks_to_allocate - Look up the block map and count the number
  *	of direct blocks need to be allocated for the given branch.
  *
  *	@branch: chain of indirect blocks
@@ -536,14 +536,18 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
 }
 
 /**
- *	ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *	ext3_alloc_blocks - multiple allocate blocks needed for a branch
+ *	@handle: handle for this transaction
+ *	@inode: owner
+ *	@goal: preferred place for allocation
  *	@indirect_blks: the number of blocks need to allocate for indirect
  *			blocks
- *
+ *	@blks:	number of blocks need to allocated for direct blocks
  *	@new_blocks: on return it will store the new block numbers for
  *	the indirect blocks(if needed) and the first direct block,
- *	@blks:	on return it will store the total number of allocated
- *		direct blocks
+ *	@err: here we store the error value
+ *
+ *	return the number of direct blocks allocated
  */
 static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
 			ext3_fsblk_t goal, int indirect_blks, int blks,
@@ -598,9 +602,11 @@ failed_out:
 
 /**
  *	ext3_alloc_branch - allocate and set up a chain of blocks.
+ *	@handle: handle for this transaction
  *	@inode: owner
  *	@indirect_blks: number of allocated indirect blocks
  *	@blks: number of allocated direct blocks
+ *	@goal: preferred place for allocation
  *	@offsets: offsets (in the blocks) to store the pointers to next.
  *	@branch: place to store the chain in.
  *
@@ -700,10 +706,9 @@ failed:
 
 /**
  * ext3_splice_branch - splice the allocated branch onto inode.
+ * @handle: handle for this transaction
  * @inode: owner
  * @block: (logical) number of block we are adding
- * @chain: chain of indirect blocks (with a missing link - see
- *	ext3_alloc_branch)
  * @where: location of missing link
  * @num:   number of indirect blocks we are adding
  * @blks:  number of direct blocks we are adding
@@ -1696,8 +1701,8 @@ static int ext3_journalled_writepage(struct page *page,
 		 * doesn't seem much point in redirtying the page here.
 		 */
 		ClearPageChecked(page);
-		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-					ext3_get_block);
+		ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
+					  ext3_get_block);
 		if (ret != 0) {
 			ext3_journal_stop(handle);
 			goto out_unlock;
@@ -2530,7 +2535,6 @@ void ext3_truncate(struct inode *inode)
 			 */
 		} else {
 			/* Shared branch grows from an indirect block */
-			BUFFER_TRACE(partial->bh, "get_write_access");
 			ext3_free_branches(handle, inode, partial->bh,
 					partial->p,
 					partial->p+1, (chain+n-1) - partial);
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index 2b35ddb70d6..bce9dce639b 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2260,7 +2260,7 @@ retry:
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = ext3_add_entry(handle, dentry, inode);
 	if (!err) {
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 0ccd7b12b73..e746d30b123 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -977,7 +977,8 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	o_blocks_count = le32_to_cpu(es->s_blocks_count);
 
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+		printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK
+		       " upto "E3FSBLK" blocks\n",
 		       o_blocks_count, n_blocks_count);
 
 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -985,7 +986,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 
 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
 		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
-			" too large to resize to %lu blocks safely\n",
+			" too large to resize to "E3FSBLK" blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
 			ext3_warning(sb, __func__,
@@ -1065,11 +1066,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
 	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 	mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
-	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
-		   o_blocks_count + add);
+	ext3_debug("freeing blocks "E3FSBLK" through "E3FSBLK"\n",
+		   o_blocks_count, o_blocks_count + add);
 	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
-	ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
-		   o_blocks_count + add);
+	ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n",
+		   o_blocks_count, o_blocks_count + add);
 	if ((err = ext3_journal_stop(handle)))
 		goto exit_put;
 	if (test_opt(sb, DEBUG))
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 37776800910..acf8695fa8f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -27,7 +27,6 @@
 #include <linux/init.h>
 #include <linux/blkdev.h>
 #include <linux/parser.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/exportfs.h>
 #include <linux/vfs.h>
@@ -1301,9 +1300,9 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
 		ext3_msg(sb, KERN_WARNING,
 			"warning: mounting fs with errors, "
 			"running e2fsck is recommended");
-	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
+	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) > 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
-		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
+			le16_to_cpu(es->s_max_mnt_count))
 		ext3_msg(sb, KERN_WARNING,
 			"warning: maximal mount count reached, "
 			"running e2fsck is recommended");
@@ -1320,7 +1319,7 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
                    valid forever! :) */
 	es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
 #endif
-	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+	if (!le16_to_cpu(es->s_max_mnt_count))
 		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
 	le16_add_cpu(&es->s_mnt_count, 1);
 	es->s_mtime = cpu_to_le32(get_seconds());
@@ -1647,7 +1646,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * Note: s_es must be initialized as soon as possible because
 	 *       some ext3 macro-instructions depend on its value
 	 */
-	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	es = (struct ext3_super_block *) (bh->b_data + offset);
 	sbi->s_es = es;
 	sb->s_magic = le16_to_cpu(es->s_magic);
 	if (sb->s_magic != EXT3_SUPER_MAGIC)
@@ -1758,7 +1757,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 			       "error: can't read superblock on 2nd try");
 			goto failed_mount;
 		}
-		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+		es = (struct ext3_super_block *)(bh->b_data + offset);
 		sbi->s_es = es;
 		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
 			ext3_msg(sb, KERN_ERR,
@@ -1857,13 +1856,13 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
 			       le32_to_cpu(es->s_first_data_block) - 1)
 				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
-	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
-		   EXT3_DESC_PER_BLOCK(sb);
+	db_count = DIV_ROUND_UP(sbi->s_groups_count, EXT3_DESC_PER_BLOCK(sb));
 	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
 		ext3_msg(sb, KERN_ERR,
 			"error: not enough memory");
+		ret = -ENOMEM;
 		goto failed_mount;
 	}
 
@@ -1951,6 +1950,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	}
 	if (err) {
 		ext3_msg(sb, KERN_ERR, "error: insufficient memory");
+		ret = err;
 		goto failed_mount3;
 	}
 
@@ -2159,7 +2159,7 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 		goto out_bdev;
 	}
 
-	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	es = (struct ext3_super_block *) (bh->b_data + offset);
 	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
 	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
@@ -2352,6 +2352,21 @@ static int ext3_commit_super(struct super_block *sb,
 
 	if (!sbh)
 		return error;
+
+	if (buffer_write_io_error(sbh)) {
+		/*
+		 * Oh, dear.  A previous attempt to write the
+		 * superblock failed.  This could happen because the
+		 * USB device was yanked out.  Or it could happen to
+		 * be a transient write error and maybe the block will
+		 * be remapped.  Nothing we can do but to retry the
+		 * write and hope for the best.
+		 */
+		ext3_msg(sb, KERN_ERR, "previous I/O error to "
+		       "superblock detected");
+		clear_buffer_write_io_error(sbh);
+		set_buffer_uptodate(sbh);
+	}
 	/*
 	 * If the file system is mounted read-only, don't update the
 	 * superblock write time.  This avoids updating the superblock
@@ -2368,8 +2383,15 @@ static int ext3_commit_super(struct super_block *sb,
 	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
-	if (sync)
+	if (sync) {
 		error = sync_dirty_buffer(sbh);
+		if (buffer_write_io_error(sbh)) {
+			ext3_msg(sb, KERN_ERR, "I/O error while writing "
+			       "superblock");
+			clear_buffer_write_io_error(sbh);
+			set_buffer_uptodate(sbh);
+		}
+	}
 	return error;
 }
 
@@ -2997,16 +3019,16 @@ out:
 
 #endif
 
-static int ext3_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext3_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ext3_fill_super);
 }
 
 static struct file_system_type ext3_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext3",
-	.get_sb		= ext3_get_sb,
+	.mount		= ext3_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 8867b2a1e5f..c947e36eda6 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -4,7 +4,7 @@
 
 obj-$(CONFIG_EXT4_FS) += ext4.o
 
-ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o page-io.o \
 		ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o \
 		ext4_jbd2.o migrate.o mballoc.o block_validity.o move_extent.o
 
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index bd30799a43e..14c3af26c67 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -171,7 +171,8 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
 		 * less than the blocksize * 8 ( which is the size
 		 * of bitmap ), set rest of the block bitmap to 1
 		 */
-		mark_bitmap_end(group_blocks, sb->s_blocksize * 8, bh->b_data);
+		ext4_mark_bitmap_end(group_blocks, sb->s_blocksize * 8,
+				     bh->b_data);
 	}
 	return free_blocks - ext4_group_used_meta_blocks(sb, block_group, gdp);
 }
@@ -489,7 +490,7 @@ error_return:
  * Check if filesystem has nblocks free & available for allocation.
  * On success return 1, return 0 on failure.
  */
-int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
+static int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks)
 {
 	s64 free_blocks, dirty_blocks, root_blocks;
 	struct percpu_counter *fbc = &sbi->s_freeblocks_counter;
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 3db5084db9b..fac90f3fba8 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -29,16 +29,15 @@ struct ext4_system_zone {
 
 static struct kmem_cache *ext4_system_zone_cachep;
 
-int __init init_ext4_system_zone(void)
+int __init ext4_init_system_zone(void)
 {
-	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone,
-					     SLAB_RECLAIM_ACCOUNT);
+	ext4_system_zone_cachep = KMEM_CACHE(ext4_system_zone, 0);
 	if (ext4_system_zone_cachep == NULL)
 		return -ENOMEM;
 	return 0;
 }
 
-void exit_ext4_system_zone(void)
+void ext4_exit_system_zone(void)
 {
 	kmem_cache_destroy(ext4_system_zone_cachep);
 }
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 374510f72ba..ece76fb6a40 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -39,7 +39,7 @@ static int ext4_release_dir(struct inode *inode,
 				struct file *filp);
 
 const struct file_operations ext4_dir_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext4_llseek,
 	.read		= generic_read_dir,
 	.readdir	= ext4_readdir,		/* we take BKL. needed?*/
 	.unlocked_ioctl = ext4_ioctl,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 889ec9d5e6a..94ce3d7a1c4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -168,7 +168,20 @@ struct mpage_da_data {
 	int pages_written;
 	int retval;
 };
-#define	EXT4_IO_UNWRITTEN	0x1
+
+/*
+ * Flags for ext4_io_end->flags
+ */
+#define	EXT4_IO_END_UNWRITTEN	0x0001
+#define EXT4_IO_END_ERROR	0x0002
+
+struct ext4_io_page {
+	struct page	*p_page;
+	atomic_t	p_count;
+};
+
+#define MAX_IO_PAGES 128
+
 typedef struct ext4_io_end {
 	struct list_head	list;		/* per-file finished IO list */
 	struct inode		*inode;		/* file being written to */
@@ -179,8 +192,18 @@ typedef struct ext4_io_end {
 	struct work_struct	work;		/* data work queue */
 	struct kiocb		*iocb;		/* iocb struct for AIO */
 	int			result;		/* error value for AIO */
+	int			num_io_pages;
+	struct ext4_io_page	*pages[MAX_IO_PAGES];
 } ext4_io_end_t;
 
+struct ext4_io_submit {
+	int			io_op;
+	struct bio		*io_bio;
+	ext4_io_end_t		*io_end;
+	struct ext4_io_page	*io_page;
+	sector_t		io_next_block;
+};
+
 /*
  * Special inodes numbers
  */
@@ -205,6 +228,7 @@ typedef struct ext4_io_end {
 #define EXT4_MIN_BLOCK_SIZE		1024
 #define	EXT4_MAX_BLOCK_SIZE		65536
 #define EXT4_MIN_BLOCK_LOG_SIZE		10
+#define EXT4_MAX_BLOCK_LOG_SIZE		16
 #ifdef __KERNEL__
 # define EXT4_BLOCK_SIZE(s)		((s)->s_blocksize)
 #else
@@ -834,6 +858,7 @@ struct ext4_inode_info {
 	spinlock_t i_completed_io_lock;
 	/* current io_end structure for async DIO write*/
 	ext4_io_end_t *cur_aio_dio;
+	atomic_t i_ioend_count;	/* Number of outstanding io_end structs */
 
 	/*
 	 * Transactions that contain inode's metadata needed to complete
@@ -885,10 +910,12 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
+#define EXT4_MOUNT_MBLK_IO_SUBMIT	0x4000000 /* multi-block io submits */
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
 #define EXT4_MOUNT_DISCARD		0x40000000 /* Issue DISCARD requests */
+#define EXT4_MOUNT_INIT_INODE_TABLE	0x80000000 /* Initialize uninitialized itables */
 
 #define clear_opt(o, opt)		o &= ~EXT4_MOUNT_##opt
 #define set_opt(o, opt)			o |= EXT4_MOUNT_##opt
@@ -1087,7 +1114,6 @@ struct ext4_sb_info {
 	struct completion s_kobj_unregister;
 
 	/* Journaling */
-	struct inode *s_journal_inode;
 	struct journal_s *s_journal;
 	struct list_head s_orphan;
 	struct mutex s_orphan_lock;
@@ -1120,10 +1146,7 @@ struct ext4_sb_info {
 	/* for buddy allocator */
 	struct ext4_group_info ***s_group_info;
 	struct inode *s_buddy_cache;
-	long s_blocks_reserved;
-	spinlock_t s_reserve_lock;
 	spinlock_t s_md_lock;
-	tid_t s_last_transaction;
 	unsigned short *s_mb_offsets;
 	unsigned int *s_mb_maxs;
 
@@ -1141,7 +1164,6 @@ struct ext4_sb_info {
 	unsigned long s_mb_last_start;
 
 	/* stats for buddy allocator */
-	spinlock_t s_mb_pa_lock;
 	atomic_t s_bal_reqs;	/* number of reqs with len > 1 */
 	atomic_t s_bal_success;	/* we found long enough chunks */
 	atomic_t s_bal_allocated;	/* in blocks */
@@ -1172,6 +1194,11 @@ struct ext4_sb_info {
 
 	/* timer for periodic error stats printing */
 	struct timer_list s_err_report;
+
+	/* Lazy inode table initialization info */
+	struct ext4_li_request *s_li_request;
+	/* Wait multiplier for lazy initialization thread */
+	unsigned int s_li_wait_mult;
 };
 
 static inline struct ext4_sb_info *EXT4_SB(struct super_block *sb)
@@ -1533,7 +1560,42 @@ ext4_group_first_block_no(struct super_block *sb, ext4_group_t group_no)
 void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
 			ext4_group_t *blockgrpp, ext4_grpblk_t *offsetp);
 
-extern struct proc_dir_entry *ext4_proc_root;
+/*
+ * Timeout and state flag for lazy initialization inode thread.
+ */
+#define EXT4_DEF_LI_WAIT_MULT			10
+#define EXT4_DEF_LI_MAX_START_DELAY		5
+#define EXT4_LAZYINIT_QUIT			0x0001
+#define EXT4_LAZYINIT_RUNNING			0x0002
+
+/*
+ * Lazy inode table initialization info
+ */
+struct ext4_lazy_init {
+	unsigned long		li_state;
+
+	wait_queue_head_t	li_wait_daemon;
+	wait_queue_head_t	li_wait_task;
+	struct timer_list	li_timer;
+	struct task_struct	*li_task;
+
+	struct list_head	li_request_list;
+	struct mutex		li_list_mtx;
+};
+
+struct ext4_li_request {
+	struct super_block	*lr_super;
+	struct ext4_sb_info	*lr_sbi;
+	ext4_group_t		lr_next_group;
+	struct list_head	lr_request;
+	unsigned long		lr_next_sched;
+	unsigned long		lr_timeout;
+};
+
+struct ext4_features {
+	struct kobject f_kobj;
+	struct completion f_kobj_unregister;
+};
 
 /*
  * Function prototypes
@@ -1561,7 +1623,6 @@ extern unsigned long ext4_bg_num_gdb(struct super_block *sb,
 extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
 			ext4_fsblk_t goal, unsigned long *count, int *errp);
 extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
-extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
 extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
 				ext4_fsblk_t block, unsigned long count);
 extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1605,11 +1666,9 @@ extern struct inode * ext4_orphan_get(struct super_block *, unsigned long);
 extern unsigned long ext4_count_free_inodes(struct super_block *);
 extern unsigned long ext4_count_dirs(struct super_block *);
 extern void ext4_check_inodes_bitmap(struct super_block *);
-extern unsigned ext4_init_inode_bitmap(struct super_block *sb,
-				       struct buffer_head *bh,
-				       ext4_group_t group,
-				       struct ext4_group_desc *desc);
-extern void mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+extern void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap);
+extern int ext4_init_inode_table(struct super_block *sb,
+				 ext4_group_t group, int barrier);
 
 /* mballoc.c */
 extern long ext4_mb_stats;
@@ -1620,16 +1679,15 @@ extern ext4_fsblk_t ext4_mb_new_blocks(handle_t *,
 				struct ext4_allocation_request *, int *);
 extern int ext4_mb_reserve_blocks(struct super_block *, int);
 extern void ext4_discard_preallocations(struct inode *);
-extern int __init init_ext4_mballoc(void);
-extern void exit_ext4_mballoc(void);
+extern int __init ext4_init_mballoc(void);
+extern void ext4_exit_mballoc(void);
 extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
 			     struct buffer_head *bh, ext4_fsblk_t block,
 			     unsigned long count, int flags);
 extern int ext4_mb_add_groupinfo(struct super_block *sb,
 		ext4_group_t i, struct ext4_group_desc *desc);
-extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
-extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
-						ext4_group_t, int);
+extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
+
 /* inode.c */
 struct buffer_head *ext4_getblk(handle_t *, struct inode *,
 						ext4_lblk_t, int, int *);
@@ -1657,13 +1715,11 @@ extern void ext4_get_inode_flags(struct ext4_inode_info *);
 extern int ext4_alloc_da_blocks(struct inode *inode);
 extern void ext4_set_aops(struct inode *inode);
 extern int ext4_writepage_trans_blocks(struct inode *);
-extern int ext4_meta_trans_blocks(struct inode *, int nrblocks, int idxblocks);
 extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
 extern int ext4_block_truncate_page(handle_t *handle,
 		struct address_space *mapping, loff_t from);
 extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
 extern qsize_t *ext4_get_reserved_space(struct inode *inode);
-extern int flush_completed_IO(struct inode *inode);
 extern void ext4_da_update_reserve_space(struct inode *inode,
 					int used, int quota_claim);
 /* ioctl.c */
@@ -1960,6 +2016,7 @@ extern const struct file_operations ext4_dir_operations;
 /* file.c */
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
+extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
 
 /* namei.c */
 extern const struct inode_operations ext4_dir_inode_operations;
@@ -1973,8 +2030,8 @@ extern const struct inode_operations ext4_fast_symlink_inode_operations;
 /* block_validity */
 extern void ext4_release_system_zone(struct super_block *sb);
 extern int ext4_setup_system_zone(struct super_block *sb);
-extern int __init init_ext4_system_zone(void);
-extern void exit_ext4_system_zone(void);
+extern int __init ext4_init_system_zone(void);
+extern void ext4_exit_system_zone(void);
 extern int ext4_data_block_valid(struct ext4_sb_info *sbi,
 				 ext4_fsblk_t start_blk,
 				 unsigned int count);
@@ -2002,6 +2059,18 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 			     __u64 start_orig, __u64 start_donor,
 			     __u64 len, __u64 *moved_len);
 
+/* page-io.c */
+extern int __init ext4_init_pageio(void);
+extern void ext4_exit_pageio(void);
+extern void ext4_ioend_wait(struct inode *);
+extern void ext4_free_io_end(ext4_io_end_t *io);
+extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
+extern int ext4_end_io_nolock(ext4_io_end_t *io);
+extern void ext4_io_submit(struct ext4_io_submit *io);
+extern int ext4_bio_write_page(struct ext4_io_submit *io,
+			       struct page *page,
+			       int len,
+			       struct writeback_control *wbc);
 
 /* BH_Uninit flag: blocks are allocated but uninitialized on disk */
 enum ext4_state_bits {
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index bdb6ce7e2eb..28ce70fd9cd 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,11 +225,60 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
 	ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
 }
 
+/*
+ * ext4_ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
+static inline ext4_fsblk_t ext4_ext_pblock(struct ext4_extent *ex)
+{
+	ext4_fsblk_t block;
+
+	block = le32_to_cpu(ex->ee_start_lo);
+	block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
+	return block;
+}
+
+/*
+ * ext4_idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
+static inline ext4_fsblk_t ext4_idx_pblock(struct ext4_extent_idx *ix)
+{
+	ext4_fsblk_t block;
+
+	block = le32_to_cpu(ix->ei_leaf_lo);
+	block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
+	return block;
+}
+
+/*
+ * ext4_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
+static inline void ext4_ext_store_pblock(struct ext4_extent *ex,
+					 ext4_fsblk_t pb)
+{
+	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
+				      0xffff);
+}
+
+/*
+ * ext4_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
+static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix,
+					 ext4_fsblk_t pb)
+{
+	ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) &
+				     0xffff);
+}
+
 extern int ext4_ext_calc_metadata_amount(struct inode *inode,
 					 sector_t lblocks);
-extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
-extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
-extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
 extern int ext4_extent_tree_init(handle_t *, struct inode *);
 extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
 						   int num,
@@ -237,19 +286,9 @@ extern int ext4_ext_calc_credits_for_single_extent(struct inode *inode,
 extern int ext4_can_extents_be_merged(struct inode *inode,
 				      struct ext4_extent *ex1,
 				      struct ext4_extent *ex2);
-extern int ext4_ext_try_to_merge(struct inode *inode,
-				 struct ext4_ext_path *path,
-				 struct ext4_extent *);
-extern unsigned int ext4_ext_check_overlap(struct inode *, struct ext4_extent *, struct ext4_ext_path *);
 extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *, int);
-extern int ext4_ext_walk_space(struct inode *, ext4_lblk_t, ext4_lblk_t,
-							ext_prepare_callback, void *);
 extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t,
 							struct ext4_ext_path *);
-extern int ext4_ext_search_left(struct inode *, struct ext4_ext_path *,
-						ext4_lblk_t *, ext4_fsblk_t *);
-extern int ext4_ext_search_right(struct inode *, struct ext4_ext_path *,
-						ext4_lblk_t *, ext4_fsblk_t *);
 extern void ext4_ext_drop_refs(struct ext4_ext_path *);
 extern int ext4_ext_check_inode(struct inode *inode);
 #endif /* _EXT4_EXTENTS */
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 06328d3e571..0554c48cb1f 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,55 +44,6 @@
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 
-
-/*
- * ext_pblock:
- * combine low and high parts of physical block number into ext4_fsblk_t
- */
-ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
-{
-	ext4_fsblk_t block;
-
-	block = le32_to_cpu(ex->ee_start_lo);
-	block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
-	return block;
-}
-
-/*
- * idx_pblock:
- * combine low and high parts of a leaf physical block number into ext4_fsblk_t
- */
-ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
-{
-	ext4_fsblk_t block;
-
-	block = le32_to_cpu(ix->ei_leaf_lo);
-	block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
-	return block;
-}
-
-/*
- * ext4_ext_store_pblock:
- * stores a large physical block number into an extent struct,
- * breaking it into parts
- */
-void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
-{
-	ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
-	ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-}
-
-/*
- * ext4_idx_store_pblock:
- * stores a large physical block number into an index struct,
- * breaking it into parts
- */
-static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
-{
-	ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
-	ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
-}
-
 static int ext4_ext_truncate_extend_restart(handle_t *handle,
 					    struct inode *inode,
 					    int needed)
@@ -169,7 +120,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 		/* try to predict block placement */
 		ex = path[depth].p_ext;
 		if (ex)
-			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
+			return (ext4_ext_pblock(ex) +
+				(block - le32_to_cpu(ex->ee_block)));
 
 		/* it looks like index is empty;
 		 * try to find starting block from index itself */
@@ -354,7 +306,7 @@ ext4_ext_max_entries(struct inode *inode, int depth)
 
 static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 {
-	ext4_fsblk_t block = ext_pblock(ext);
+	ext4_fsblk_t block = ext4_ext_pblock(ext);
 	int len = ext4_ext_get_actual_len(ext);
 
 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
@@ -363,7 +315,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
 static int ext4_valid_extent_idx(struct inode *inode,
 				struct ext4_extent_idx *ext_idx)
 {
-	ext4_fsblk_t block = idx_pblock(ext_idx);
+	ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
 
 	return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
 }
@@ -463,13 +415,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 	for (k = 0; k <= l; k++, path++) {
 		if (path->p_idx) {
 		  ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
-			    idx_pblock(path->p_idx));
+			    ext4_idx_pblock(path->p_idx));
 		} else if (path->p_ext) {
 			ext_debug("  %d:[%d]%d:%llu ",
 				  le32_to_cpu(path->p_ext->ee_block),
 				  ext4_ext_is_uninitialized(path->p_ext),
 				  ext4_ext_get_actual_len(path->p_ext),
-				  ext_pblock(path->p_ext));
+				  ext4_ext_pblock(path->p_ext));
 		} else
 			ext_debug("  []");
 	}
@@ -494,7 +446,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
 		ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
 			  ext4_ext_is_uninitialized(ex),
-			  ext4_ext_get_actual_len(ex), ext_pblock(ex));
+			  ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
 	}
 	ext_debug("\n");
 }
@@ -545,7 +497,7 @@ ext4_ext_binsearch_idx(struct inode *inode,
 
 	path->p_idx = l - 1;
 	ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
-		  idx_pblock(path->p_idx));
+		  ext4_idx_pblock(path->p_idx));
 
 #ifdef CHECK_BINSEARCH
 	{
@@ -614,7 +566,7 @@ ext4_ext_binsearch(struct inode *inode,
 	path->p_ext = l - 1;
 	ext_debug("  -> %d:%llu:[%d]%d ",
 			le32_to_cpu(path->p_ext->ee_block),
-			ext_pblock(path->p_ext),
+			ext4_ext_pblock(path->p_ext),
 			ext4_ext_is_uninitialized(path->p_ext),
 			ext4_ext_get_actual_len(path->p_ext));
 
@@ -682,7 +634,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 
 		ext4_ext_binsearch_idx(inode, path + ppos, block);
-		path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+		path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
 		path[ppos].p_depth = i;
 		path[ppos].p_ext = NULL;
 
@@ -721,7 +673,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
 	ext4_ext_binsearch(inode, path + ppos, block);
 	/* if not an empty leaf */
 	if (path[ppos].p_ext)
-		path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+		path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
 
 	ext4_ext_show_path(inode, path);
 
@@ -739,9 +691,9 @@ err:
  * insert new index [@logical;@ptr] into the block at @curp;
  * check where to insert: before @curp or after @curp
  */
-int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
-				struct ext4_ext_path *curp,
-				int logical, ext4_fsblk_t ptr)
+static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+				 struct ext4_ext_path *curp,
+				 int logical, ext4_fsblk_t ptr)
 {
 	struct ext4_extent_idx *ix;
 	int len, err;
@@ -917,7 +869,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 			EXT_MAX_EXTENT(path[depth].p_hdr)) {
 		ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
 				le32_to_cpu(path[depth].p_ext->ee_block),
-				ext_pblock(path[depth].p_ext),
+				ext4_ext_pblock(path[depth].p_ext),
 				ext4_ext_is_uninitialized(path[depth].p_ext),
 				ext4_ext_get_actual_len(path[depth].p_ext),
 				newblock);
@@ -1007,7 +959,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
 			ext_debug("%d: move %d:%llu in new index %llu\n", i,
 					le32_to_cpu(path[i].p_idx->ei_block),
-					idx_pblock(path[i].p_idx),
+					ext4_idx_pblock(path[i].p_idx),
 					newblock);
 			/*memmove(++fidx, path[i].p_idx++,
 					sizeof(struct ext4_extent_idx));
@@ -1146,7 +1098,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
 		  le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
-		  idx_pblock(EXT_FIRST_INDEX(neh)));
+		  ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
 
 	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
 	err = ext4_ext_dirty(handle, inode, curp);
@@ -1232,9 +1184,9 @@ out:
  * returns 0 at @phys
  * return value contains 0 (success) or error code
  */
-int
-ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
-			ext4_lblk_t *logical, ext4_fsblk_t *phys)
+static int ext4_ext_search_left(struct inode *inode,
+				struct ext4_ext_path *path,
+				ext4_lblk_t *logical, ext4_fsblk_t *phys)
 {
 	struct ext4_extent_idx *ix;
 	struct ext4_extent *ex;
@@ -1286,7 +1238,7 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
 	}
 
 	*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
-	*phys = ext_pblock(ex) + ee_len - 1;
+	*phys = ext4_ext_pblock(ex) + ee_len - 1;
 	return 0;
 }
 
@@ -1297,9 +1249,9 @@ ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
  * returns 0 at @phys
  * return value contains 0 (success) or error code
  */
-int
-ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
-			ext4_lblk_t *logical, ext4_fsblk_t *phys)
+static int ext4_ext_search_right(struct inode *inode,
+				 struct ext4_ext_path *path,
+				 ext4_lblk_t *logical, ext4_fsblk_t *phys)
 {
 	struct buffer_head *bh = NULL;
 	struct ext4_extent_header *eh;
@@ -1342,7 +1294,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
 			}
 		}
 		*logical = le32_to_cpu(ex->ee_block);
-		*phys = ext_pblock(ex);
+		*phys = ext4_ext_pblock(ex);
 		return 0;
 	}
 
@@ -1357,7 +1309,7 @@ ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
 		/* next allocated block in this leaf */
 		ex++;
 		*logical = le32_to_cpu(ex->ee_block);
-		*phys = ext_pblock(ex);
+		*phys = ext4_ext_pblock(ex);
 		return 0;
 	}
 
@@ -1376,7 +1328,7 @@ got_index:
 	 * follow it and find the closest allocated
 	 * block to the right */
 	ix++;
-	block = idx_pblock(ix);
+	block = ext4_idx_pblock(ix);
 	while (++depth < path->p_depth) {
 		bh = sb_bread(inode->i_sb, block);
 		if (bh == NULL)
@@ -1388,7 +1340,7 @@ got_index:
 			return -EIO;
 		}
 		ix = EXT_FIRST_INDEX(eh);
-		block = idx_pblock(ix);
+		block = ext4_idx_pblock(ix);
 		put_bh(bh);
 	}
 
@@ -1402,7 +1354,7 @@ got_index:
 	}
 	ex = EXT_FIRST_EXTENT(eh);
 	*logical = le32_to_cpu(ex->ee_block);
-	*phys = ext_pblock(ex);
+	*phys = ext4_ext_pblock(ex);
 	put_bh(bh);
 	return 0;
 }
@@ -1573,7 +1525,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 		return 0;
 #endif
 
-	if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
+	if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
 		return 1;
 	return 0;
 }
@@ -1585,9 +1537,9 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
  * Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
  * 1 if they got merged.
  */
-int ext4_ext_try_to_merge(struct inode *inode,
-			  struct ext4_ext_path *path,
-			  struct ext4_extent *ex)
+static int ext4_ext_try_to_merge(struct inode *inode,
+				 struct ext4_ext_path *path,
+				 struct ext4_extent *ex)
 {
 	struct ext4_extent_header *eh;
 	unsigned int depth, len;
@@ -1632,9 +1584,9 @@ int ext4_ext_try_to_merge(struct inode *inode,
  * such that there will be no overlap, and then returns 1.
  * If there is no overlap found, it returns 0.
  */
-unsigned int ext4_ext_check_overlap(struct inode *inode,
-				    struct ext4_extent *newext,
-				    struct ext4_ext_path *path)
+static unsigned int ext4_ext_check_overlap(struct inode *inode,
+					   struct ext4_extent *newext,
+					   struct ext4_ext_path *path)
 {
 	ext4_lblk_t b1, b2;
 	unsigned int depth, len1;
@@ -1706,11 +1658,12 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 	if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
 		&& ext4_can_extents_be_merged(inode, ex, newext)) {
 		ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
-				ext4_ext_is_uninitialized(newext),
-				ext4_ext_get_actual_len(newext),
-				le32_to_cpu(ex->ee_block),
-				ext4_ext_is_uninitialized(ex),
-				ext4_ext_get_actual_len(ex), ext_pblock(ex));
+			  ext4_ext_is_uninitialized(newext),
+			  ext4_ext_get_actual_len(newext),
+			  le32_to_cpu(ex->ee_block),
+			  ext4_ext_is_uninitialized(ex),
+			  ext4_ext_get_actual_len(ex),
+			  ext4_ext_pblock(ex));
 		err = ext4_ext_get_access(handle, inode, path + depth);
 		if (err)
 			return err;
@@ -1780,7 +1733,7 @@ has_space:
 		/* there is no extent in this leaf, create first one */
 		ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
 				le32_to_cpu(newext->ee_block),
-				ext_pblock(newext),
+				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
 				ext4_ext_get_actual_len(newext));
 		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
@@ -1794,7 +1747,7 @@ has_space:
 			ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
 					"move %d from 0x%p to 0x%p\n",
 					le32_to_cpu(newext->ee_block),
-					ext_pblock(newext),
+					ext4_ext_pblock(newext),
 					ext4_ext_is_uninitialized(newext),
 					ext4_ext_get_actual_len(newext),
 					nearex, len, nearex + 1, nearex + 2);
@@ -1808,7 +1761,7 @@ has_space:
 		ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
 				"move %d from 0x%p to 0x%p\n",
 				le32_to_cpu(newext->ee_block),
-				ext_pblock(newext),
+				ext4_ext_pblock(newext),
 				ext4_ext_is_uninitialized(newext),
 				ext4_ext_get_actual_len(newext),
 				nearex, len, nearex + 1, nearex + 2);
@@ -1819,7 +1772,7 @@ has_space:
 	le16_add_cpu(&eh->eh_entries, 1);
 	nearex = path[depth].p_ext;
 	nearex->ee_block = newext->ee_block;
-	ext4_ext_store_pblock(nearex, ext_pblock(newext));
+	ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
 	nearex->ee_len = newext->ee_len;
 
 merge:
@@ -1845,9 +1798,9 @@ cleanup:
 	return err;
 }
 
-int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
-			ext4_lblk_t num, ext_prepare_callback func,
-			void *cbdata)
+static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+			       ext4_lblk_t num, ext_prepare_callback func,
+			       void *cbdata)
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_ext_cache cbex;
@@ -1923,7 +1876,7 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
 		} else {
 			cbex.ec_block = le32_to_cpu(ex->ee_block);
 			cbex.ec_len = ext4_ext_get_actual_len(ex);
-			cbex.ec_start = ext_pblock(ex);
+			cbex.ec_start = ext4_ext_pblock(ex);
 			cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
 		}
 
@@ -2073,7 +2026,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 
 	/* free index block */
 	path--;
-	leaf = idx_pblock(path->p_idx);
+	leaf = ext4_idx_pblock(path->p_idx);
 	if (unlikely(path->p_hdr->eh_entries == 0)) {
 		EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
 		return -EIO;
@@ -2181,7 +2134,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 		ext4_fsblk_t start;
 
 		num = le32_to_cpu(ex->ee_block) + ee_len - from;
-		start = ext_pblock(ex) + ee_len - num;
+		start = ext4_ext_pblock(ex) + ee_len - num;
 		ext_debug("free last %u blocks starting %llu\n", num, start);
 		ext4_free_blocks(handle, inode, 0, start, num, flags);
 	} else if (from == le32_to_cpu(ex->ee_block)
@@ -2310,7 +2263,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			goto out;
 
 		ext_debug("new extent: %u:%u:%llu\n", block, num,
-				ext_pblock(ex));
+				ext4_ext_pblock(ex));
 		ex--;
 		ex_ee_block = le32_to_cpu(ex->ee_block);
 		ex_ee_len = ext4_ext_get_actual_len(ex);
@@ -2421,9 +2374,9 @@ again:
 			struct buffer_head *bh;
 			/* go to the next level */
 			ext_debug("move to level %d (block %llu)\n",
-				  i + 1, idx_pblock(path[i].p_idx));
+				  i + 1, ext4_idx_pblock(path[i].p_idx));
 			memset(path + i + 1, 0, sizeof(*path));
-			bh = sb_bread(sb, idx_pblock(path[i].p_idx));
+			bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
 			if (!bh) {
 				/* should we reset i_size? */
 				err = -EIO;
@@ -2535,77 +2488,21 @@ void ext4_ext_release(struct super_block *sb)
 #endif
 }
 
-static void bi_complete(struct bio *bio, int error)
-{
-	complete((struct completion *)bio->bi_private);
-}
-
 /* FIXME!! we need to try to merge to left or right after zero-out  */
 static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
 {
+	ext4_fsblk_t ee_pblock;
+	unsigned int ee_len;
 	int ret;
-	struct bio *bio;
-	int blkbits, blocksize;
-	sector_t ee_pblock;
-	struct completion event;
-	unsigned int ee_len, len, done, offset;
 
-
-	blkbits   = inode->i_blkbits;
-	blocksize = inode->i_sb->s_blocksize;
 	ee_len    = ext4_ext_get_actual_len(ex);
-	ee_pblock = ext_pblock(ex);
-
-	/* convert ee_pblock to 512 byte sectors */
-	ee_pblock = ee_pblock << (blkbits - 9);
-
-	while (ee_len > 0) {
-
-		if (ee_len > BIO_MAX_PAGES)
-			len = BIO_MAX_PAGES;
-		else
-			len = ee_len;
-
-		bio = bio_alloc(GFP_NOIO, len);
-		if (!bio)
-			return -ENOMEM;
-
-		bio->bi_sector = ee_pblock;
-		bio->bi_bdev   = inode->i_sb->s_bdev;
-
-		done = 0;
-		offset = 0;
-		while (done < len) {
-			ret = bio_add_page(bio, ZERO_PAGE(0),
-							blocksize, offset);
-			if (ret != blocksize) {
-				/*
-				 * We can't add any more pages because of
-				 * hardware limitations.  Start a new bio.
-				 */
-				break;
-			}
-			done++;
-			offset += blocksize;
-			if (offset >= PAGE_CACHE_SIZE)
-				offset = 0;
-		}
+	ee_pblock = ext4_ext_pblock(ex);
 
-		init_completion(&event);
-		bio->bi_private = &event;
-		bio->bi_end_io = bi_complete;
-		submit_bio(WRITE, bio);
-		wait_for_completion(&event);
+	ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
+	if (ret > 0)
+		ret = 0;
 
-		if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
-			bio_put(bio);
-			return -EIO;
-		}
-		bio_put(bio);
-		ee_len    -= done;
-		ee_pblock += done  << (blkbits - 9);
-	}
-	return 0;
+	return ret;
 }
 
 #define EXT4_EXT_ZERO_LEN 7
@@ -2651,12 +2548,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 	ee_block = le32_to_cpu(ex->ee_block);
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (map->m_lblk - ee_block);
-	newblock = map->m_lblk - ee_block + ext_pblock(ex);
+	newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
 
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
-	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+	ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
 
 	/*
 	 * It is safe to convert extent to initialized via explicit
@@ -2675,7 +2572,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 		/* update the extent length and mark as initialized */
 		ex->ee_block = orig_ex.ee_block;
 		ex->ee_len   = orig_ex.ee_len;
-		ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+		ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 		ext4_ext_dirty(handle, inode, path + depth);
 		/* zeroed the full extent */
 		return allocated;
@@ -2710,7 +2607,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 			ex->ee_block = orig_ex.ee_block;
 			ex->ee_len   = cpu_to_le16(ee_len - allocated);
 			ext4_ext_mark_uninitialized(ex);
-			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 			ext4_ext_dirty(handle, inode, path + depth);
 
 			ex3 = &newex;
@@ -2725,7 +2622,8 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 					goto fix_extent_len;
 				ex->ee_block = orig_ex.ee_block;
 				ex->ee_len   = orig_ex.ee_len;
-				ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+				ext4_ext_store_pblock(ex,
+					ext4_ext_pblock(&orig_ex));
 				ext4_ext_dirty(handle, inode, path + depth);
 				/* blocks available from map->m_lblk */
 				return allocated;
@@ -2782,7 +2680,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 			/* update the extent length and mark as initialized */
 			ex->ee_block = orig_ex.ee_block;
 			ex->ee_len   = orig_ex.ee_len;
-			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 			ext4_ext_dirty(handle, inode, path + depth);
 			/* zeroed the full extent */
 			/* blocks available from map->m_lblk */
@@ -2833,7 +2731,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
 			/* update the extent length and mark as initialized */
 			ex->ee_block = orig_ex.ee_block;
 			ex->ee_len   = orig_ex.ee_len;
-			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 			ext4_ext_dirty(handle, inode, path + depth);
 			/* zero out the first half */
 			/* blocks available from map->m_lblk */
@@ -2902,7 +2800,7 @@ insert:
 		/* update the extent length and mark as initialized */
 		ex->ee_block = orig_ex.ee_block;
 		ex->ee_len   = orig_ex.ee_len;
-		ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+		ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 		ext4_ext_dirty(handle, inode, path + depth);
 		/* zero out the first half */
 		return allocated;
@@ -2915,7 +2813,7 @@ out:
 fix_extent_len:
 	ex->ee_block = orig_ex.ee_block;
 	ex->ee_len   = orig_ex.ee_len;
-	ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+	ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 	ext4_ext_mark_uninitialized(ex);
 	ext4_ext_dirty(handle, inode, path + depth);
 	return err;
@@ -2973,12 +2871,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 	ee_block = le32_to_cpu(ex->ee_block);
 	ee_len = ext4_ext_get_actual_len(ex);
 	allocated = ee_len - (map->m_lblk - ee_block);
-	newblock = map->m_lblk - ee_block + ext_pblock(ex);
+	newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
 
 	ex2 = ex;
 	orig_ex.ee_block = ex->ee_block;
 	orig_ex.ee_len   = cpu_to_le16(ee_len);
-	ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+	ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
 
 	/*
 	 * It is safe to convert extent to initialized via explicit
@@ -3027,7 +2925,7 @@ static int ext4_split_unwritten_extents(handle_t *handle,
 			/* update the extent length and mark as initialized */
 			ex->ee_block = orig_ex.ee_block;
 			ex->ee_len   = orig_ex.ee_len;
-			ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+			ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 			ext4_ext_dirty(handle, inode, path + depth);
 			/* zeroed the full extent */
 			/* blocks available from map->m_lblk */
@@ -3099,7 +2997,7 @@ insert:
 		/* update the extent length and mark as initialized */
 		ex->ee_block = orig_ex.ee_block;
 		ex->ee_len   = orig_ex.ee_len;
-		ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+		ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 		ext4_ext_dirty(handle, inode, path + depth);
 		/* zero out the first half */
 		return allocated;
@@ -3112,7 +3010,7 @@ out:
 fix_extent_len:
 	ex->ee_block = orig_ex.ee_block;
 	ex->ee_len   = orig_ex.ee_len;
-	ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+	ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
 	ext4_ext_mark_uninitialized(ex);
 	ext4_ext_dirty(handle, inode, path + depth);
 	return err;
@@ -3180,6 +3078,57 @@ static void unmap_underlying_metadata_blocks(struct block_device *bdev,
                 unmap_underlying_metadata(bdev, block + i);
 }
 
+/*
+ * Handle EOFBLOCKS_FL flag, clearing it if necessary
+ */
+static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
+			      struct ext4_map_blocks *map,
+			      struct ext4_ext_path *path,
+			      unsigned int len)
+{
+	int i, depth;
+	struct ext4_extent_header *eh;
+	struct ext4_extent *ex, *last_ex;
+
+	if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
+		return 0;
+
+	depth = ext_depth(inode);
+	eh = path[depth].p_hdr;
+	ex = path[depth].p_ext;
+
+	if (unlikely(!eh->eh_entries)) {
+		EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
+				 "EOFBLOCKS_FL set");
+		return -EIO;
+	}
+	last_ex = EXT_LAST_EXTENT(eh);
+	/*
+	 * We should clear the EOFBLOCKS_FL flag if we are writing the
+	 * last block in the last extent in the file.  We test this by
+	 * first checking to see if the caller to
+	 * ext4_ext_get_blocks() was interested in the last block (or
+	 * a block beyond the last block) in the current extent.  If
+	 * this turns out to be false, we can bail out from this
+	 * function immediately.
+	 */
+	if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
+	    ext4_ext_get_actual_len(last_ex))
+		return 0;
+	/*
+	 * If the caller does appear to be planning to write at or
+	 * beyond the end of the current extent, we then test to see
+	 * if the current extent is the last extent in the file, by
+	 * checking to make sure it was reached via the rightmost node
+	 * at each level of the tree.
+	 */
+	for (i = depth-1; i >= 0; i--)
+		if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
+			return 0;
+	ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+	return ext4_mark_inode_dirty(handle, inode);
+}
+
 static int
 ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 			struct ext4_map_blocks *map,
@@ -3206,7 +3155,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 		 * completed
 		 */
 		if (io)
-			io->flag = EXT4_IO_UNWRITTEN;
+			io->flag = EXT4_IO_END_UNWRITTEN;
 		else
 			ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
 		if (ext4_should_dioread_nolock(inode))
@@ -3217,8 +3166,12 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 	if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
 		ret = ext4_convert_unwritten_extents_endio(handle, inode,
 							path);
-		if (ret >= 0)
+		if (ret >= 0) {
 			ext4_update_inode_fsync_trans(handle, inode, 1);
+			err = check_eofblocks_fl(handle, inode, map, path,
+						 map->m_len);
+		} else
+			err = ret;
 		goto out2;
 	}
 	/* buffered IO case */
@@ -3244,8 +3197,13 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
 
 	/* buffered write, writepage time, convert*/
 	ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
-	if (ret >= 0)
+	if (ret >= 0) {
 		ext4_update_inode_fsync_trans(handle, inode, 1);
+		err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
+		if (err < 0)
+			goto out2;
+	}
+
 out:
 	if (ret <= 0) {
 		err = ret;
@@ -3292,6 +3250,7 @@ out2:
 	}
 	return err ? err : allocated;
 }
+
 /*
  * Block allocation/map/preallocation routine for extents based files
  *
@@ -3315,9 +3274,9 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent_header *eh;
-	struct ext4_extent newex, *ex, *last_ex;
+	struct ext4_extent newex, *ex;
 	ext4_fsblk_t newblock;
-	int i, err = 0, depth, ret, cache_type;
+	int err = 0, depth, ret, cache_type;
 	unsigned int allocated = 0;
 	struct ext4_allocation_request ar;
 	ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
@@ -3341,7 +3300,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 			/* block is already allocated */
 			newblock = map->m_lblk
 				   - le32_to_cpu(newex.ee_block)
-				   + ext_pblock(&newex);
+				   + ext4_ext_pblock(&newex);
 			/* number of remaining blocks in the extent */
 			allocated = ext4_ext_get_actual_len(&newex) -
 				(map->m_lblk - le32_to_cpu(newex.ee_block));
@@ -3379,7 +3338,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 	ex = path[depth].p_ext;
 	if (ex) {
 		ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
-		ext4_fsblk_t ee_start = ext_pblock(ex);
+		ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
 		unsigned short ee_len;
 
 		/*
@@ -3488,7 +3447,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 		 */
 		if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
 			if (io)
-				io->flag = EXT4_IO_UNWRITTEN;
+				io->flag = EXT4_IO_END_UNWRITTEN;
 			else
 				ext4_set_inode_state(inode,
 						     EXT4_STATE_DIO_UNWRITTEN);
@@ -3497,44 +3456,23 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 			map->m_flags |= EXT4_MAP_UNINIT;
 	}
 
-	if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
-		if (unlikely(!eh->eh_entries)) {
-			EXT4_ERROR_INODE(inode,
-					 "eh->eh_entries == 0 and "
-					 "EOFBLOCKS_FL set");
-			err = -EIO;
-			goto out2;
-		}
-		last_ex = EXT_LAST_EXTENT(eh);
-		/*
-		 * If the current leaf block was reached by looking at
-		 * the last index block all the way down the tree, and
-		 * we are extending the inode beyond the last extent
-		 * in the current leaf block, then clear the
-		 * EOFBLOCKS_FL flag.
-		 */
-		for (i = depth-1; i >= 0; i--) {
-			if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
-				break;
-		}
-		if ((i < 0) &&
-		    (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
-		     ext4_ext_get_actual_len(last_ex)))
-			ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
-	}
+	err = check_eofblocks_fl(handle, inode, map, path, ar.len);
+	if (err)
+		goto out2;
+
 	err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
 	if (err) {
 		/* free data blocks we just allocated */
 		/* not a good idea to call discard here directly,
 		 * but otherwise we'd need to call it every free() */
 		ext4_discard_preallocations(inode);
-		ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+		ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
 				 ext4_ext_get_actual_len(&newex), 0);
 		goto out2;
 	}
 
 	/* previous routine could use block we allocated */
-	newblock = ext_pblock(&newex);
+	newblock = ext4_ext_pblock(&newex);
 	allocated = ext4_ext_get_actual_len(&newex);
 	if (allocated > map->m_len)
 		allocated = map->m_len;
@@ -3729,7 +3667,7 @@ retry:
 			printk(KERN_ERR "%s: ext4_ext_map_blocks "
 				    "returned error inode#%lu, block=%u, "
 				    "max_blocks=%u", __func__,
-				    inode->i_ino, block, max_blocks);
+				    inode->i_ino, map.m_lblk, max_blocks);
 #endif
 			ext4_mark_inode_dirty(handle, inode);
 			ret2 = ext4_journal_stop(handle);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ee92b66d455..5a5c55ddcee 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -130,8 +130,50 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
 	return dquot_file_open(inode, filp);
 }
 
+/*
+ * ext4_llseek() copied from generic_file_llseek() to handle both
+ * block-mapped and extent-mapped maxbytes values. This should
+ * otherwise be identical with generic_file_llseek().
+ */
+loff_t ext4_llseek(struct file *file, loff_t offset, int origin)
+{
+	struct inode *inode = file->f_mapping->host;
+	loff_t maxbytes;
+
+	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
+		maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
+	else
+		maxbytes = inode->i_sb->s_maxbytes;
+	mutex_lock(&inode->i_mutex);
+	switch (origin) {
+	case SEEK_END:
+		offset += inode->i_size;
+		break;
+	case SEEK_CUR:
+		if (offset == 0) {
+			mutex_unlock(&inode->i_mutex);
+			return file->f_pos;
+		}
+		offset += file->f_pos;
+		break;
+	}
+
+	if (offset < 0 || offset > maxbytes) {
+		mutex_unlock(&inode->i_mutex);
+		return -EINVAL;
+	}
+
+	if (offset != file->f_pos) {
+		file->f_pos = offset;
+		file->f_version = 0;
+	}
+	mutex_unlock(&inode->i_mutex);
+
+	return offset;
+}
+
 const struct file_operations ext4_file_operations = {
-	.llseek		= generic_file_llseek,
+	.llseek		= ext4_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 3f3ff5ee8f9..c1a7bc923cf 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -34,6 +34,89 @@
 
 #include <trace/events/ext4.h>
 
+static void dump_completed_IO(struct inode * inode)
+{
+#ifdef	EXT4_DEBUG
+	struct list_head *cur, *before, *after;
+	ext4_io_end_t *io, *io0, *io1;
+	unsigned long flags;
+
+	if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
+		ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
+		return;
+	}
+
+	ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
+	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+	list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
+		cur = &io->list;
+		before = cur->prev;
+		io0 = container_of(before, ext4_io_end_t, list);
+		after = cur->next;
+		io1 = container_of(after, ext4_io_end_t, list);
+
+		ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
+			    io, inode->i_ino, io0, io1);
+	}
+	spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+#endif
+}
+
+/*
+ * This function is called from ext4_sync_file().
+ *
+ * When IO is completed, the work to convert unwritten extents to
+ * written is queued on workqueue but may not get immediately
+ * scheduled. When fsync is called, we need to ensure the
+ * conversion is complete before fsync returns.
+ * The inode keeps track of a list of pending/completed IO that
+ * might needs to do the conversion. This function walks through
+ * the list and convert the related unwritten extents for completed IO
+ * to written.
+ * The function return the number of pending IOs on success.
+ */
+static int flush_completed_IO(struct inode *inode)
+{
+	ext4_io_end_t *io;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned long flags;
+	int ret = 0;
+	int ret2 = 0;
+
+	if (list_empty(&ei->i_completed_io_list))
+		return ret;
+
+	dump_completed_IO(inode);
+	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+	while (!list_empty(&ei->i_completed_io_list)){
+		io = list_entry(ei->i_completed_io_list.next,
+				ext4_io_end_t, list);
+		/*
+		 * Calling ext4_end_io_nolock() to convert completed
+		 * IO to written.
+		 *
+		 * When ext4_sync_file() is called, run_queue() may already
+		 * about to flush the work corresponding to this io structure.
+		 * It will be upset if it founds the io structure related
+		 * to the work-to-be schedule is freed.
+		 *
+		 * Thus we need to keep the io structure still valid here after
+		 * convertion finished. The io structure has a flag to
+		 * avoid double converting from both fsync and background work
+		 * queue work.
+		 */
+		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+		ret = ext4_end_io_nolock(io);
+		spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+		if (ret < 0)
+			ret2 = ret;
+		else
+			list_del_init(&io->list);
+	}
+	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+	return (ret2 < 0) ? ret2 : 0;
+}
+
 /*
  * If we're not journaling and this is a just-created file, we have to
  * sync our parent directory (if it was freshly created) since
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 45853e0d1f2..1ce240a23eb 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -50,7 +50,7 @@
  * need to use it within a single byte (to ensure we get endianness right).
  * We can use memset for the rest of the bitmap as there are no other users.
  */
-void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 {
 	int i;
 
@@ -65,9 +65,10 @@ void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 }
 
 /* Initializes an uninitialized inode bitmap */
-unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
-				ext4_group_t block_group,
-				struct ext4_group_desc *gdp)
+static unsigned ext4_init_inode_bitmap(struct super_block *sb,
+				       struct buffer_head *bh,
+				       ext4_group_t block_group,
+				       struct ext4_group_desc *gdp)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
@@ -85,7 +86,7 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
 	}
 
 	memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
-	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+	ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
 			bh->b_data);
 
 	return EXT4_INODES_PER_GROUP(sb);
@@ -107,6 +108,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 	desc = ext4_get_group_desc(sb, block_group, NULL);
 	if (!desc)
 		return NULL;
+
 	bitmap_blk = ext4_inode_bitmap(sb, desc);
 	bh = sb_getblk(sb, bitmap_blk);
 	if (unlikely(!bh)) {
@@ -123,6 +125,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 		unlock_buffer(bh);
 		return bh;
 	}
+
 	ext4_lock_group(sb, block_group);
 	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
 		ext4_init_inode_bitmap(sb, bh, block_group, desc);
@@ -133,6 +136,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
 		return bh;
 	}
 	ext4_unlock_group(sb, block_group);
+
 	if (buffer_uptodate(bh)) {
 		/*
 		 * if not uninit if bh is uptodate,
@@ -411,8 +415,8 @@ struct orlov_stats {
  * for a particular block group or flex_bg.  If flex_size is 1, then g
  * is a block group number; otherwise it is flex_bg number.
  */
-void get_orlov_stats(struct super_block *sb, ext4_group_t g,
-		       int flex_size, struct orlov_stats *stats)
+static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
+			    int flex_size, struct orlov_stats *stats)
 {
 	struct ext4_group_desc *desc;
 	struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
@@ -712,8 +716,17 @@ static int ext4_claim_inode(struct super_block *sb,
 {
 	int free = 0, retval = 0, count;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
 
+	/*
+	 * We have to be sure that new inode allocation does not race with
+	 * inode table initialization, because otherwise we may end up
+	 * allocating and writing new inode right before sb_issue_zeroout
+	 * takes place and overwriting our new inode with zeroes. So we
+	 * take alloc_sem to prevent it.
+	 */
+	down_read(&grp->alloc_sem);
 	ext4_lock_group(sb, group);
 	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
 		/* not a free inode */
@@ -724,6 +737,7 @@ static int ext4_claim_inode(struct super_block *sb,
 	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
 			ino > EXT4_INODES_PER_GROUP(sb)) {
 		ext4_unlock_group(sb, group);
+		up_read(&grp->alloc_sem);
 		ext4_error(sb, "reserved inode or inode > inodes count - "
 			   "block_group = %u, inode=%lu", group,
 			   ino + group * EXT4_INODES_PER_GROUP(sb));
@@ -772,6 +786,7 @@ static int ext4_claim_inode(struct super_block *sb,
 	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
 err_ret:
 	ext4_unlock_group(sb, group);
+	up_read(&grp->alloc_sem);
 	return retval;
 }
 
@@ -1205,3 +1220,109 @@ unsigned long ext4_count_dirs(struct super_block * sb)
 	}
 	return count;
 }
+
+/*
+ * Zeroes not yet zeroed inode table - just write zeroes through the whole
+ * inode table. Must be called without any spinlock held. The only place
+ * where it is called from on active part of filesystem is ext4lazyinit
+ * thread, so we do not need any special locks, however we have to prevent
+ * inode allocation from the current group, so we take alloc_sem lock, to
+ * block ext4_claim_inode until we are finished.
+ */
+extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+				 int barrier)
+{
+	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_group_desc *gdp = NULL;
+	struct buffer_head *group_desc_bh;
+	handle_t *handle;
+	ext4_fsblk_t blk;
+	int num, ret = 0, used_blks = 0;
+
+	/* This should not happen, but just to be sure check this */
+	if (sb->s_flags & MS_RDONLY) {
+		ret = 1;
+		goto out;
+	}
+
+	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+	if (!gdp)
+		goto out;
+
+	/*
+	 * We do not need to lock this, because we are the only one
+	 * handling this flag.
+	 */
+	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+		goto out;
+
+	handle = ext4_journal_start_sb(sb, 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
+
+	down_write(&grp->alloc_sem);
+	/*
+	 * If inode bitmap was already initialized there may be some
+	 * used inodes so we need to skip blocks with used inodes in
+	 * inode table.
+	 */
+	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+		used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
+			    ext4_itable_unused_count(sb, gdp)),
+			    sbi->s_inodes_per_block);
+
+	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+		ext4_error(sb, "Something is wrong with group %u\n"
+			   "Used itable blocks: %d"
+			   "itable unused count: %u\n",
+			   group, used_blks,
+			   ext4_itable_unused_count(sb, gdp));
+		ret = 1;
+		goto out;
+	}
+
+	blk = ext4_inode_table(sb, gdp) + used_blks;
+	num = sbi->s_itb_per_group - used_blks;
+
+	BUFFER_TRACE(group_desc_bh, "get_write_access");
+	ret = ext4_journal_get_write_access(handle,
+					    group_desc_bh);
+	if (ret)
+		goto err_out;
+
+	/*
+	 * Skip zeroout if the inode table is full. But we set the ZEROED
+	 * flag anyway, because obviously, when it is full it does not need
+	 * further zeroing.
+	 */
+	if (unlikely(num == 0))
+		goto skip_zeroout;
+
+	ext4_debug("going to zero out inode table in group %d\n",
+		   group);
+	ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
+	if (ret < 0)
+		goto err_out;
+	if (barrier)
+		blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
+
+skip_zeroout:
+	ext4_lock_group(sb, group);
+	gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
+	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+	ext4_unlock_group(sb, group);
+
+	BUFFER_TRACE(group_desc_bh,
+		     "call ext4_handle_dirty_metadata");
+	ret = ext4_handle_dirty_metadata(handle, NULL,
+					 group_desc_bh);
+
+err_out:
+	up_write(&grp->alloc_sem);
+	ext4_journal_stop(handle);
+out:
+	return ret;
+}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4b8debeb396..e659597b690 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -53,6 +53,7 @@
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
 					      loff_t new_size)
 {
+	trace_ext4_begin_ordered_truncate(inode, new_size);
 	return jbd2_journal_begin_ordered_truncate(
 					EXT4_SB(inode->i_sb)->s_journal,
 					&EXT4_I(inode)->jinode,
@@ -60,6 +61,12 @@ static inline int ext4_begin_ordered_truncate(struct inode *inode,
 }
 
 static void ext4_invalidatepage(struct page *page, unsigned long offset);
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
+				   struct buffer_head *bh_result, int create);
+static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
+static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
+static int __ext4_journalled_writepage(struct page *page, unsigned int len);
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
 
 /*
  * Test whether an inode is a fast symlink.
@@ -172,6 +179,7 @@ void ext4_evict_inode(struct inode *inode)
 	handle_t *handle;
 	int err;
 
+	trace_ext4_evict_inode(inode);
 	if (inode->i_nlink) {
 		truncate_inode_pages(&inode->i_data, 0);
 		goto no_delete;
@@ -755,6 +763,11 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
 		 * parent to disk.
 		 */
 		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+		if (unlikely(!bh)) {
+			err = -EIO;
+			goto failed;
+		}
+
 		branch[n].bh = bh;
 		lock_buffer(bh);
 		BUFFER_TRACE(bh, "call get_create_access");
@@ -1207,8 +1220,10 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
 				break;
 			idx++;
 			num++;
-			if (num >= max_pages)
+			if (num >= max_pages) {
+				done = 1;
 				break;
+			}
 		}
 		pagevec_release(&pvec);
 	}
@@ -1538,10 +1553,10 @@ static int do_journal_get_write_access(handle_t *handle,
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
 	/*
-	 * __block_prepare_write() could have dirtied some buffers. Clean
+	 * __block_write_begin() could have dirtied some buffers. Clean
 	 * the dirty bit as jbd2_journal_get_write_access() could complain
 	 * otherwise about fs integrity issues. Setting of the dirty bit
-	 * by __block_prepare_write() isn't a real problem here as we clear
+	 * by __block_write_begin() isn't a real problem here as we clear
 	 * the bit before releasing a page lock and thus writeback cannot
 	 * ever write the buffer.
 	 */
@@ -1995,16 +2010,23 @@ static void ext4_da_page_release_reservation(struct page *page,
  *
  * As pages are already locked by write_cache_pages(), we can't use it
  */
-static int mpage_da_submit_io(struct mpage_da_data *mpd)
+static int mpage_da_submit_io(struct mpage_da_data *mpd,
+			      struct ext4_map_blocks *map)
 {
-	long pages_skipped;
 	struct pagevec pvec;
 	unsigned long index, end;
 	int ret = 0, err, nr_pages, i;
 	struct inode *inode = mpd->inode;
 	struct address_space *mapping = inode->i_mapping;
+	loff_t size = i_size_read(inode);
+	unsigned int len, block_start;
+	struct buffer_head *bh, *page_bufs = NULL;
+	int journal_data = ext4_should_journal_data(inode);
+	sector_t pblock = 0, cur_logical = 0;
+	struct ext4_io_submit io_submit;
 
 	BUG_ON(mpd->next_page <= mpd->first_page);
+	memset(&io_submit, 0, sizeof(io_submit));
 	/*
 	 * We need to start from the first_page to the next_page - 1
 	 * to make sure we also write the mapped dirty buffer_heads.
@@ -2020,122 +2042,111 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd)
 		if (nr_pages == 0)
 			break;
 		for (i = 0; i < nr_pages; i++) {
+			int commit_write = 0, redirty_page = 0;
 			struct page *page = pvec.pages[i];
 
 			index = page->index;
 			if (index > end)
 				break;
+
+			if (index == size >> PAGE_CACHE_SHIFT)
+				len = size & ~PAGE_CACHE_MASK;
+			else
+				len = PAGE_CACHE_SIZE;
+			if (map) {
+				cur_logical = index << (PAGE_CACHE_SHIFT -
+							inode->i_blkbits);
+				pblock = map->m_pblk + (cur_logical -
+							map->m_lblk);
+			}
 			index++;
 
 			BUG_ON(!PageLocked(page));
 			BUG_ON(PageWriteback(page));
 
-			pages_skipped = mpd->wbc->pages_skipped;
-			err = mapping->a_ops->writepage(page, mpd->wbc);
-			if (!err && (pages_skipped == mpd->wbc->pages_skipped))
-				/*
-				 * have successfully written the page
-				 * without skipping the same
-				 */
-				mpd->pages_written++;
 			/*
-			 * In error case, we have to continue because
-			 * remaining pages are still locked
-			 * XXX: unlock and re-dirty them?
+			 * If the page does not have buffers (for
+			 * whatever reason), try to create them using
+			 * __block_write_begin.  If this fails,
+			 * redirty the page and move on.
 			 */
-			if (ret == 0)
-				ret = err;
-		}
-		pagevec_release(&pvec);
-	}
-	return ret;
-}
-
-/*
- * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
- *
- * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
- */
-static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
-				 struct ext4_map_blocks *map)
-{
-	struct inode *inode = mpd->inode;
-	struct address_space *mapping = inode->i_mapping;
-	int blocks = map->m_len;
-	sector_t pblock = map->m_pblk, cur_logical;
-	struct buffer_head *head, *bh;
-	pgoff_t index, end;
-	struct pagevec pvec;
-	int nr_pages, i;
-
-	index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
-	pagevec_init(&pvec, 0);
-
-	while (index <= end) {
-		/* XXX: optimize tail */
-		nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
-		if (nr_pages == 0)
-			break;
-		for (i = 0; i < nr_pages; i++) {
-			struct page *page = pvec.pages[i];
-
-			index = page->index;
-			if (index > end)
-				break;
-			index++;
-
-			BUG_ON(!PageLocked(page));
-			BUG_ON(PageWriteback(page));
-			BUG_ON(!page_has_buffers(page));
-
-			bh = page_buffers(page);
-			head = bh;
-
-			/* skip blocks out of the range */
-			do {
-				if (cur_logical >= map->m_lblk)
-					break;
-				cur_logical++;
-			} while ((bh = bh->b_this_page) != head);
+			if (!page_has_buffers(page)) {
+				if (__block_write_begin(page, 0, len,
+						noalloc_get_block_write)) {
+				redirty_page:
+					redirty_page_for_writepage(mpd->wbc,
+								   page);
+					unlock_page(page);
+					continue;
+				}
+				commit_write = 1;
+			}
 
+			bh = page_bufs = page_buffers(page);
+			block_start = 0;
 			do {
-				if (cur_logical >= map->m_lblk + blocks)
-					break;
-
-				if (buffer_delay(bh) || buffer_unwritten(bh)) {
-
-					BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
-
+				if (!bh)
+					goto redirty_page;
+				if (map && (cur_logical >= map->m_lblk) &&
+				    (cur_logical <= (map->m_lblk +
+						     (map->m_len - 1)))) {
 					if (buffer_delay(bh)) {
 						clear_buffer_delay(bh);
 						bh->b_blocknr = pblock;
-					} else {
-						/*
-						 * unwritten already should have
-						 * blocknr assigned. Verify that
-						 */
-						clear_buffer_unwritten(bh);
-						BUG_ON(bh->b_blocknr != pblock);
 					}
+					if (buffer_unwritten(bh) ||
+					    buffer_mapped(bh))
+						BUG_ON(bh->b_blocknr != pblock);
+					if (map->m_flags & EXT4_MAP_UNINIT)
+						set_buffer_uninit(bh);
+					clear_buffer_unwritten(bh);
+				}
 
-				} else if (buffer_mapped(bh))
-					BUG_ON(bh->b_blocknr != pblock);
-
-				if (map->m_flags & EXT4_MAP_UNINIT)
-					set_buffer_uninit(bh);
+				/* redirty page if block allocation undone */
+				if (buffer_delay(bh) || buffer_unwritten(bh))
+					redirty_page = 1;
+				bh = bh->b_this_page;
+				block_start += bh->b_size;
 				cur_logical++;
 				pblock++;
-			} while ((bh = bh->b_this_page) != head);
+			} while (bh != page_bufs);
+
+			if (redirty_page)
+				goto redirty_page;
+
+			if (commit_write)
+				/* mark the buffer_heads as dirty & uptodate */
+				block_commit_write(page, 0, len);
+
+			/*
+			 * Delalloc doesn't support data journalling,
+			 * but eventually maybe we'll lift this
+			 * restriction.
+			 */
+			if (unlikely(journal_data && PageChecked(page)))
+				err = __ext4_journalled_writepage(page, len);
+			else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
+				err = ext4_bio_write_page(&io_submit, page,
+							  len, mpd->wbc);
+			else
+				err = block_write_full_page(page,
+					noalloc_get_block_write, mpd->wbc);
+
+			if (!err)
+				mpd->pages_written++;
+			/*
+			 * In error case, we have to continue because
+			 * remaining pages are still locked
+			 */
+			if (ret == 0)
+				ret = err;
 		}
 		pagevec_release(&pvec);
 	}
+	ext4_io_submit(&io_submit);
+	return ret;
 }
 
-
 static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
 					sector_t logical, long blk_cnt)
 {
@@ -2187,35 +2198,32 @@ static void ext4_print_free_blocks(struct inode *inode)
 }
 
 /*
- * mpage_da_map_blocks - go through given space
+ * mpage_da_map_and_submit - go through given space, map them
+ *       if necessary, and then submit them for I/O
  *
  * @mpd - bh describing space
  *
  * The function skips space we know is already mapped to disk blocks.
  *
  */
-static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
 {
 	int err, blks, get_blocks_flags;
-	struct ext4_map_blocks map;
+	struct ext4_map_blocks map, *mapp = NULL;
 	sector_t next = mpd->b_blocknr;
 	unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
 	loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
 	handle_t *handle = NULL;
 
 	/*
-	 * We consider only non-mapped and non-allocated blocks
+	 * If the blocks are mapped already, or we couldn't accumulate
+	 * any blocks, then proceed immediately to the submission stage.
 	 */
-	if ((mpd->b_state  & (1 << BH_Mapped)) &&
-		!(mpd->b_state & (1 << BH_Delay)) &&
-		!(mpd->b_state & (1 << BH_Unwritten)))
-		return 0;
-
-	/*
-	 * If we didn't accumulate anything to write simply return
-	 */
-	if (!mpd->b_size)
-		return 0;
+	if ((mpd->b_size == 0) ||
+	    ((mpd->b_state  & (1 << BH_Mapped)) &&
+	     !(mpd->b_state & (1 << BH_Delay)) &&
+	     !(mpd->b_state & (1 << BH_Unwritten))))
+		goto submit_io;
 
 	handle = ext4_journal_current_handle();
 	BUG_ON(!handle);
@@ -2252,17 +2260,18 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 
 		err = blks;
 		/*
-		 * If get block returns with error we simply
-		 * return. Later writepage will redirty the page and
-		 * writepages will find the dirty page again
+		 * If get block returns EAGAIN or ENOSPC and there
+		 * appears to be free blocks we will call
+		 * ext4_writepage() for all of the pages which will
+		 * just redirty the pages.
 		 */
 		if (err == -EAGAIN)
-			return 0;
+			goto submit_io;
 
 		if (err == -ENOSPC &&
 		    ext4_count_free_blocks(sb)) {
 			mpd->retval = err;
-			return 0;
+			goto submit_io;
 		}
 
 		/*
@@ -2287,10 +2296,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 		/* invalidate all the pages */
 		ext4_da_block_invalidatepages(mpd, next,
 				mpd->b_size >> mpd->inode->i_blkbits);
-		return err;
+		return;
 	}
 	BUG_ON(blks == 0);
 
+	mapp = &map;
 	if (map.m_flags & EXT4_MAP_NEW) {
 		struct block_device *bdev = mpd->inode->i_sb->s_bdev;
 		int i;
@@ -2299,18 +2309,11 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 			unmap_underlying_metadata(bdev, map.m_pblk + i);
 	}
 
-	/*
-	 * If blocks are delayed marked, we need to
-	 * put actual blocknr and drop delayed bit
-	 */
-	if ((mpd->b_state & (1 << BH_Delay)) ||
-	    (mpd->b_state & (1 << BH_Unwritten)))
-		mpage_put_bnr_to_bhs(mpd, &map);
-
 	if (ext4_should_order_data(mpd->inode)) {
 		err = ext4_jbd2_file_inode(handle, mpd->inode);
 		if (err)
-			return err;
+			/* This only happens if the journal is aborted */
+			return;
 	}
 
 	/*
@@ -2321,10 +2324,16 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
 		disksize = i_size_read(mpd->inode);
 	if (disksize > EXT4_I(mpd->inode)->i_disksize) {
 		ext4_update_i_disksize(mpd->inode, disksize);
-		return ext4_mark_inode_dirty(handle, mpd->inode);
+		err = ext4_mark_inode_dirty(handle, mpd->inode);
+		if (err)
+			ext4_error(mpd->inode->i_sb,
+				   "Failed to mark inode %lu dirty",
+				   mpd->inode->i_ino);
 	}
 
-	return 0;
+submit_io:
+	mpage_da_submit_io(mpd, mapp);
+	mpd->io_done = 1;
 }
 
 #define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
@@ -2401,9 +2410,7 @@ flush_it:
 	 * We couldn't merge the block to our extent, so we
 	 * need to flush current  extent and start new one
 	 */
-	if (mpage_da_map_blocks(mpd) == 0)
-		mpage_da_submit_io(mpd);
-	mpd->io_done = 1;
+	mpage_da_map_and_submit(mpd);
 	return;
 }
 
@@ -2422,9 +2429,9 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh)
  * The function finds extents of pages and scan them for all blocks.
  */
 static int __mpage_da_writepage(struct page *page,
-				struct writeback_control *wbc, void *data)
+				struct writeback_control *wbc,
+				struct mpage_da_data *mpd)
 {
-	struct mpage_da_data *mpd = data;
 	struct inode *inode = mpd->inode;
 	struct buffer_head *bh, *head;
 	sector_t logical;
@@ -2435,15 +2442,13 @@ static int __mpage_da_writepage(struct page *page,
 	if (mpd->next_page != page->index) {
 		/*
 		 * Nope, we can't. So, we map non-allocated blocks
-		 * and start IO on them using writepage()
+		 * and start IO on them
 		 */
 		if (mpd->next_page != mpd->first_page) {
-			if (mpage_da_map_blocks(mpd) == 0)
-				mpage_da_submit_io(mpd);
+			mpage_da_map_and_submit(mpd);
 			/*
 			 * skip rest of the page in the page_vec
 			 */
-			mpd->io_done = 1;
 			redirty_page_for_writepage(wbc, page);
 			unlock_page(page);
 			return MPAGE_DA_EXTENT_TAIL;
@@ -2550,8 +2555,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 		if (buffer_delay(bh))
 			return 0; /* Not sure this could or should happen */
 		/*
-		 * XXX: __block_prepare_write() unmaps passed block,
-		 * is it OK?
+		 * XXX: __block_write_begin() unmaps passed block, is it OK?
 		 */
 		ret = ext4_da_reserve_space(inode, iblock);
 		if (ret)
@@ -2583,7 +2587,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
 /*
  * This function is used as a standard get_block_t calback function
  * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_prepare_write() and block_write_full_page().
+ * callback function for block_write_begin() and block_write_full_page().
  * These functions should only try to map a single block at a time.
  *
  * Since this function doesn't do block allocations even if the caller
@@ -2623,6 +2627,7 @@ static int __ext4_journalled_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
+	ClearPageChecked(page);
 	page_bufs = page_buffers(page);
 	BUG_ON(!page_bufs);
 	walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
@@ -2700,7 +2705,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
 static int ext4_writepage(struct page *page,
 			  struct writeback_control *wbc)
 {
-	int ret = 0;
+	int ret = 0, commit_write = 0;
 	loff_t size;
 	unsigned int len;
 	struct buffer_head *page_bufs = NULL;
@@ -2713,71 +2718,44 @@ static int ext4_writepage(struct page *page,
 	else
 		len = PAGE_CACHE_SIZE;
 
-	if (page_has_buffers(page)) {
-		page_bufs = page_buffers(page);
-		if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-					ext4_bh_delay_or_unwritten)) {
-			/*
-			 * We don't want to do  block allocation
-			 * So redirty the page and return
-			 * We may reach here when we do a journal commit
-			 * via journal_submit_inode_data_buffers.
-			 * If we don't have mapping block we just ignore
-			 * them. We can also reach here via shrink_page_list
-			 */
+	/*
+	 * If the page does not have buffers (for whatever reason),
+	 * try to create them using __block_write_begin.  If this
+	 * fails, redirty the page and move on.
+	 */
+	if (!page_has_buffers(page)) {
+		if (__block_write_begin(page, 0, len,
+					noalloc_get_block_write)) {
+		redirty_page:
 			redirty_page_for_writepage(wbc, page);
 			unlock_page(page);
 			return 0;
 		}
-	} else {
+		commit_write = 1;
+	}
+	page_bufs = page_buffers(page);
+	if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+			      ext4_bh_delay_or_unwritten)) {
 		/*
-		 * The test for page_has_buffers() is subtle:
-		 * We know the page is dirty but it lost buffers. That means
-		 * that at some moment in time after write_begin()/write_end()
-		 * has been called all buffers have been clean and thus they
-		 * must have been written at least once. So they are all
-		 * mapped and we can happily proceed with mapping them
-		 * and writing the page.
-		 *
-		 * Try to initialize the buffer_heads and check whether
-		 * all are mapped and non delay. We don't want to
-		 * do block allocation here.
+		 * We don't want to do block allocation, so redirty
+		 * the page and return.  We may reach here when we do
+		 * a journal commit via journal_submit_inode_data_buffers.
+		 * We can also reach here via shrink_page_list
 		 */
-		ret = block_prepare_write(page, 0, len,
-					  noalloc_get_block_write);
-		if (!ret) {
-			page_bufs = page_buffers(page);
-			/* check whether all are mapped and non delay */
-			if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
-						ext4_bh_delay_or_unwritten)) {
-				redirty_page_for_writepage(wbc, page);
-				unlock_page(page);
-				return 0;
-			}
-		} else {
-			/*
-			 * We can't do block allocation here
-			 * so just redity the page and unlock
-			 * and return
-			 */
-			redirty_page_for_writepage(wbc, page);
-			unlock_page(page);
-			return 0;
-		}
+		goto redirty_page;
+	}
+	if (commit_write)
 		/* now mark the buffer_heads as dirty and uptodate */
 		block_commit_write(page, 0, len);
-	}
 
-	if (PageChecked(page) && ext4_should_journal_data(inode)) {
+	if (PageChecked(page) && ext4_should_journal_data(inode))
 		/*
 		 * It's mmapped pagecache.  Add buffers and journal it.  There
 		 * doesn't seem much point in redirtying the page here.
 		 */
-		ClearPageChecked(page);
 		return __ext4_journalled_writepage(page, len);
-	}
 
-	if (page_bufs && buffer_uninit(page_bufs)) {
+	if (buffer_uninit(page_bufs)) {
 		ext4_set_bh_endio(page_bufs, inode);
 		ret = block_write_full_page_endio(page, noalloc_get_block_write,
 					    wbc, ext4_end_io_buffer_write);
@@ -2824,25 +2802,32 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
  */
 static int write_cache_pages_da(struct address_space *mapping,
 				struct writeback_control *wbc,
-				struct mpage_da_data *mpd)
+				struct mpage_da_data *mpd,
+				pgoff_t *done_index)
 {
 	int ret = 0;
 	int done = 0;
 	struct pagevec pvec;
-	int nr_pages;
+	unsigned nr_pages;
 	pgoff_t index;
 	pgoff_t end;		/* Inclusive */
 	long nr_to_write = wbc->nr_to_write;
+	int tag;
 
 	pagevec_init(&pvec, 0);
 	index = wbc->range_start >> PAGE_CACHE_SHIFT;
 	end = wbc->range_end >> PAGE_CACHE_SHIFT;
 
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		tag = PAGECACHE_TAG_TOWRITE;
+	else
+		tag = PAGECACHE_TAG_DIRTY;
+
+	*done_index = index;
 	while (!done && (index <= end)) {
 		int i;
 
-		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			      PAGECACHE_TAG_DIRTY,
+		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
 			      min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
 		if (nr_pages == 0)
 			break;
@@ -2862,6 +2847,8 @@ static int write_cache_pages_da(struct address_space *mapping,
 				break;
 			}
 
+			*done_index = page->index + 1;
+
 			lock_page(page);
 
 			/*
@@ -2947,6 +2934,8 @@ static int ext4_da_writepages(struct address_space *mapping,
 	long desired_nr_to_write, nr_to_writebump = 0;
 	loff_t range_start = wbc->range_start;
 	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+	pgoff_t done_index = 0;
+	pgoff_t end;
 
 	trace_ext4_da_writepages(inode, wbc);
 
@@ -2982,8 +2971,11 @@ static int ext4_da_writepages(struct address_space *mapping,
 		wbc->range_start = index << PAGE_CACHE_SHIFT;
 		wbc->range_end  = LLONG_MAX;
 		wbc->range_cyclic = 0;
-	} else
+		end = -1;
+	} else {
 		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+	}
 
 	/*
 	 * This works around two forms of stupidity.  The first is in
@@ -3002,9 +2994,12 @@ static int ext4_da_writepages(struct address_space *mapping,
 	 * sbi->max_writeback_mb_bump whichever is smaller.
 	 */
 	max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
-	if (!range_cyclic && range_whole)
-		desired_nr_to_write = wbc->nr_to_write * 8;
-	else
+	if (!range_cyclic && range_whole) {
+		if (wbc->nr_to_write == LONG_MAX)
+			desired_nr_to_write = wbc->nr_to_write;
+		else
+			desired_nr_to_write = wbc->nr_to_write * 8;
+	} else
 		desired_nr_to_write = ext4_num_dirty_pages(inode, index,
 							   max_pages);
 	if (desired_nr_to_write > max_pages)
@@ -3021,6 +3016,9 @@ static int ext4_da_writepages(struct address_space *mapping,
 	pages_skipped = wbc->pages_skipped;
 
 retry:
+	if (wbc->sync_mode == WB_SYNC_ALL)
+		tag_pages_for_writeback(mapping, index, end);
+
 	while (!ret && wbc->nr_to_write > 0) {
 
 		/*
@@ -3059,16 +3057,14 @@ retry:
 		mpd.io_done = 0;
 		mpd.pages_written = 0;
 		mpd.retval = 0;
-		ret = write_cache_pages_da(mapping, wbc, &mpd);
+		ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
 		/*
 		 * If we have a contiguous extent of pages and we
 		 * haven't done the I/O yet, map the blocks and submit
 		 * them for I/O.
 		 */
 		if (!mpd.io_done && mpd.next_page != mpd.first_page) {
-			if (mpage_da_map_blocks(&mpd) == 0)
-				mpage_da_submit_io(&mpd);
-			mpd.io_done = 1;
+			mpage_da_map_and_submit(&mpd);
 			ret = MPAGE_DA_EXTENT_TAIL;
 		}
 		trace_ext4_da_write_pages(inode, &mpd);
@@ -3115,14 +3111,13 @@ retry:
 			 __func__, wbc->nr_to_write, ret);
 
 	/* Update index */
-	index += pages_written;
 	wbc->range_cyclic = range_cyclic;
 	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
 		/*
 		 * set the writeback_index so that range_cyclic
 		 * mode will write it back later
 		 */
-		mapping->writeback_index = index;
+		mapping->writeback_index = done_index;
 
 out_writepages:
 	wbc->nr_to_write -= nr_to_writebump;
@@ -3457,15 +3452,6 @@ ext4_readpages(struct file *file, struct address_space *mapping,
 	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 }
 
-static void ext4_free_io_end(ext4_io_end_t *io)
-{
-	BUG_ON(!io);
-	if (io->page)
-		put_page(io->page);
-	iput(io->inode);
-	kfree(io);
-}
-
 static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
 {
 	struct buffer_head *head, *bh;
@@ -3642,173 +3628,6 @@ static int ext4_get_block_write(struct inode *inode, sector_t iblock,
 			       EXT4_GET_BLOCKS_IO_CREATE_EXT);
 }
 
-static void dump_completed_IO(struct inode * inode)
-{
-#ifdef	EXT4_DEBUG
-	struct list_head *cur, *before, *after;
-	ext4_io_end_t *io, *io0, *io1;
-	unsigned long flags;
-
-	if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
-		ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
-		return;
-	}
-
-	ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
-	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
-	list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
-		cur = &io->list;
-		before = cur->prev;
-		io0 = container_of(before, ext4_io_end_t, list);
-		after = cur->next;
-		io1 = container_of(after, ext4_io_end_t, list);
-
-		ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
-			    io, inode->i_ino, io0, io1);
-	}
-	spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-#endif
-}
-
-/*
- * check a range of space and convert unwritten extents to written.
- */
-static int ext4_end_io_nolock(ext4_io_end_t *io)
-{
-	struct inode *inode = io->inode;
-	loff_t offset = io->offset;
-	ssize_t size = io->size;
-	int ret = 0;
-
-	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
-		   "list->prev 0x%p\n",
-	           io, inode->i_ino, io->list.next, io->list.prev);
-
-	if (list_empty(&io->list))
-		return ret;
-
-	if (io->flag != EXT4_IO_UNWRITTEN)
-		return ret;
-
-	ret = ext4_convert_unwritten_extents(inode, offset, size);
-	if (ret < 0) {
-		printk(KERN_EMERG "%s: failed to convert unwritten"
-			"extents to written extents, error is %d"
-			" io is still on inode %lu aio dio list\n",
-                       __func__, ret, inode->i_ino);
-		return ret;
-	}
-
-	if (io->iocb)
-		aio_complete(io->iocb, io->result, 0);
-	/* clear the DIO AIO unwritten flag */
-	io->flag = 0;
-	return ret;
-}
-
-/*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
-static void ext4_end_io_work(struct work_struct *work)
-{
-	ext4_io_end_t		*io = container_of(work, ext4_io_end_t, work);
-	struct inode		*inode = io->inode;
-	struct ext4_inode_info	*ei = EXT4_I(inode);
-	unsigned long		flags;
-	int			ret;
-
-	mutex_lock(&inode->i_mutex);
-	ret = ext4_end_io_nolock(io);
-	if (ret < 0) {
-		mutex_unlock(&inode->i_mutex);
-		return;
-	}
-
-	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-	if (!list_empty(&io->list))
-		list_del_init(&io->list);
-	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
-	mutex_unlock(&inode->i_mutex);
-	ext4_free_io_end(io);
-}
-
-/*
- * This function is called from ext4_sync_file().
- *
- * When IO is completed, the work to convert unwritten extents to
- * written is queued on workqueue but may not get immediately
- * scheduled. When fsync is called, we need to ensure the
- * conversion is complete before fsync returns.
- * The inode keeps track of a list of pending/completed IO that
- * might needs to do the conversion. This function walks through
- * the list and convert the related unwritten extents for completed IO
- * to written.
- * The function return the number of pending IOs on success.
- */
-int flush_completed_IO(struct inode *inode)
-{
-	ext4_io_end_t *io;
-	struct ext4_inode_info *ei = EXT4_I(inode);
-	unsigned long flags;
-	int ret = 0;
-	int ret2 = 0;
-
-	if (list_empty(&ei->i_completed_io_list))
-		return ret;
-
-	dump_completed_IO(inode);
-	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-	while (!list_empty(&ei->i_completed_io_list)){
-		io = list_entry(ei->i_completed_io_list.next,
-				ext4_io_end_t, list);
-		/*
-		 * Calling ext4_end_io_nolock() to convert completed
-		 * IO to written.
-		 *
-		 * When ext4_sync_file() is called, run_queue() may already
-		 * about to flush the work corresponding to this io structure.
-		 * It will be upset if it founds the io structure related
-		 * to the work-to-be schedule is freed.
-		 *
-		 * Thus we need to keep the io structure still valid here after
-		 * convertion finished. The io structure has a flag to
-		 * avoid double converting from both fsync and background work
-		 * queue work.
-		 */
-		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
-		ret = ext4_end_io_nolock(io);
-		spin_lock_irqsave(&ei->i_completed_io_lock, flags);
-		if (ret < 0)
-			ret2 = ret;
-		else
-			list_del_init(&io->list);
-	}
-	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
-	return (ret2 < 0) ? ret2 : 0;
-}
-
-static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
-{
-	ext4_io_end_t *io = NULL;
-
-	io = kmalloc(sizeof(*io), flags);
-
-	if (io) {
-		igrab(inode);
-		io->inode = inode;
-		io->flag = 0;
-		io->offset = 0;
-		io->size = 0;
-		io->page = NULL;
-		io->iocb = NULL;
-		io->result = 0;
-		INIT_WORK(&io->work, ext4_end_io_work);
-		INIT_LIST_HEAD(&io->list);
-	}
-
-	return io;
-}
-
 static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 			    ssize_t size, void *private, int ret,
 			    bool is_async)
@@ -3828,7 +3647,7 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
 		  size);
 
 	/* if not aio dio with unwritten extents, just free io and return */
-	if (io_end->flag != EXT4_IO_UNWRITTEN){
+	if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
 		ext4_free_io_end(io_end);
 		iocb->private = NULL;
 out:
@@ -3845,14 +3664,14 @@ out:
 	}
 	wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
 
-	/* queue the work to convert unwritten extents to written */
-	queue_work(wq, &io_end->work);
-
 	/* Add the io_end to per-inode completed aio dio list*/
 	ei = EXT4_I(io_end->inode);
 	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
 	list_add_tail(&io_end->list, &ei->i_completed_io_list);
 	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+
+	/* queue the work to convert unwritten extents to written */
+	queue_work(wq, &io_end->work);
 	iocb->private = NULL;
 }
 
@@ -3873,7 +3692,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate)
 		goto out;
 	}
 
-	io_end->flag = EXT4_IO_UNWRITTEN;
+	io_end->flag = EXT4_IO_END_UNWRITTEN;
 	inode = io_end->inode;
 
 	/* Add the io_end to per-inode completed io list*/
@@ -5464,6 +5283,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error, rc = 0;
+	int orphan = 0;
 	const unsigned int ia_valid = attr->ia_valid;
 
 	error = inode_change_ok(inode, attr);
@@ -5519,8 +5339,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
-
-		error = ext4_orphan_add(handle, inode);
+		if (ext4_handle_valid(handle)) {
+			error = ext4_orphan_add(handle, inode);
+			orphan = 1;
+		}
 		EXT4_I(inode)->i_disksize = attr->ia_size;
 		rc = ext4_mark_inode_dirty(handle, inode);
 		if (!error)
@@ -5538,6 +5360,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 					goto err_out;
 				}
 				ext4_orphan_del(handle, inode);
+				orphan = 0;
 				ext4_journal_stop(handle);
 				goto err_out;
 			}
@@ -5560,7 +5383,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	 * If the call to ext4_truncate failed to get a transaction handle at
 	 * all, we need to clean up the in-core orphan list manually.
 	 */
-	if (inode->i_nlink)
+	if (orphan && inode->i_nlink)
 		ext4_orphan_del(NULL, inode);
 
 	if (!rc && (ia_valid & ATTR_MODE))
@@ -5592,9 +5415,7 @@ int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
 	 * will return the blocks that include the delayed allocation
 	 * blocks for this file.
 	 */
-	spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
 	delalloc_blocks = EXT4_I(inode)->i_reserved_data_blocks;
-	spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
 
 	stat->blocks += (delalloc_blocks << inode->i_sb->s_blocksize_bits)>>9;
 	return 0;
@@ -5643,7 +5464,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
  *
  * Also account for superblock, inode, quota and xattr blocks
  */
-int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
 {
 	ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
 	int gdpblocks;
@@ -5831,6 +5652,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 	int err, ret;
 
 	might_sleep();
+	trace_ext4_mark_inode_dirty(inode, _RET_IP_);
 	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (ext4_handle_valid(handle) &&
 	    EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index bf5ae883b1b..eb3bc2fe647 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,6 +331,30 @@ mext_out:
 		return err;
 	}
 
+	case FITRIM:
+	{
+		struct super_block *sb = inode->i_sb;
+		struct fstrim_range range;
+		int ret = 0;
+
+		if (!capable(CAP_SYS_ADMIN))
+			return -EPERM;
+
+		if (copy_from_user(&range, (struct fstrim_range *)arg,
+		    sizeof(range)))
+			return -EFAULT;
+
+		ret = ext4_trim_fs(sb, &range);
+		if (ret < 0)
+			return ret;
+
+		if (copy_to_user((struct fstrim_range *)arg, &range,
+		    sizeof(range)))
+			return -EFAULT;
+
+		return 0;
+	}
+
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 19aa0d44d82..5b4d4e3a4d5 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -338,6 +338,14 @@
 static struct kmem_cache *ext4_pspace_cachep;
 static struct kmem_cache *ext4_ac_cachep;
 static struct kmem_cache *ext4_free_ext_cachep;
+
+/* We create slab caches for groupinfo data structures based on the
+ * superblock block size.  There will be one per mounted filesystem for
+ * each unique s_blocksize_bits */
+#define NR_GRPINFO_CACHES	\
+	(EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
+static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+
 static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
 					ext4_group_t group);
 static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
@@ -939,6 +947,85 @@ out:
 }
 
 /*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen  whild holding the buddy cache
+ * lock
+ */
+static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
+					ext4_group_t group)
+{
+	int i;
+	int block, pnum;
+	int blocks_per_page;
+	int groups_per_page;
+	ext4_group_t ngroups = ext4_get_groups_count(sb);
+	ext4_group_t first_group;
+	struct ext4_group_info *grp;
+
+	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+	/*
+	 * the buddy cache inode stores the block bitmap
+	 * and buddy information in consecutive blocks.
+	 * So for each group we need two blocks.
+	 */
+	block = group * 2;
+	pnum = block / blocks_per_page;
+	first_group = pnum * blocks_per_page / 2;
+
+	groups_per_page = blocks_per_page >> 1;
+	if (groups_per_page == 0)
+		groups_per_page = 1;
+	/* read all groups the page covers into the cache */
+	for (i = 0; i < groups_per_page; i++) {
+
+		if ((first_group + i) >= ngroups)
+			break;
+		grp = ext4_get_group_info(sb, first_group + i);
+		/* take all groups write allocation
+		 * semaphore. This make sure there is
+		 * no block allocation going on in any
+		 * of that groups
+		 */
+		down_write_nested(&grp->alloc_sem, i);
+	}
+	return i;
+}
+
+static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+					 ext4_group_t group, int locked_group)
+{
+	int i;
+	int block, pnum;
+	int blocks_per_page;
+	ext4_group_t first_group;
+	struct ext4_group_info *grp;
+
+	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+	/*
+	 * the buddy cache inode stores the block bitmap
+	 * and buddy information in consecutive blocks.
+	 * So for each group we need two blocks.
+	 */
+	block = group * 2;
+	pnum = block / blocks_per_page;
+	first_group = pnum * blocks_per_page / 2;
+	/* release locks on all the groups */
+	for (i = 0; i < locked_group; i++) {
+
+		grp = ext4_get_group_info(sb, first_group + i);
+		/* take all groups write allocation
+		 * semaphore. This make sure there is
+		 * no block allocation going on in any
+		 * of that groups
+		 */
+		up_write(&grp->alloc_sem);
+	}
+
+}
+
+/*
  * Locking note:  This routine calls ext4_mb_init_cache(), which takes the
  * block group lock of all groups for this page; do not hold the BG lock when
  * calling this routine!
@@ -1915,84 +2002,6 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
 	return 0;
 }
 
-/*
- * lock the group_info alloc_sem of all the groups
- * belonging to the same buddy cache page. This
- * make sure other parallel operation on the buddy
- * cache doesn't happen  whild holding the buddy cache
- * lock
- */
-int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
-{
-	int i;
-	int block, pnum;
-	int blocks_per_page;
-	int groups_per_page;
-	ext4_group_t ngroups = ext4_get_groups_count(sb);
-	ext4_group_t first_group;
-	struct ext4_group_info *grp;
-
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-	/*
-	 * the buddy cache inode stores the block bitmap
-	 * and buddy information in consecutive blocks.
-	 * So for each group we need two blocks.
-	 */
-	block = group * 2;
-	pnum = block / blocks_per_page;
-	first_group = pnum * blocks_per_page / 2;
-
-	groups_per_page = blocks_per_page >> 1;
-	if (groups_per_page == 0)
-		groups_per_page = 1;
-	/* read all groups the page covers into the cache */
-	for (i = 0; i < groups_per_page; i++) {
-
-		if ((first_group + i) >= ngroups)
-			break;
-		grp = ext4_get_group_info(sb, first_group + i);
-		/* take all groups write allocation
-		 * semaphore. This make sure there is
-		 * no block allocation going on in any
-		 * of that groups
-		 */
-		down_write_nested(&grp->alloc_sem, i);
-	}
-	return i;
-}
-
-void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
-					ext4_group_t group, int locked_group)
-{
-	int i;
-	int block, pnum;
-	int blocks_per_page;
-	ext4_group_t first_group;
-	struct ext4_group_info *grp;
-
-	blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-	/*
-	 * the buddy cache inode stores the block bitmap
-	 * and buddy information in consecutive blocks.
-	 * So for each group we need two blocks.
-	 */
-	block = group * 2;
-	pnum = block / blocks_per_page;
-	first_group = pnum * blocks_per_page / 2;
-	/* release locks on all the groups */
-	for (i = 0; i < locked_group; i++) {
-
-		grp = ext4_get_group_info(sb, first_group + i);
-		/* take all groups write allocation
-		 * semaphore. This make sure there is
-		 * no block allocation going on in any
-		 * of that groups
-		 */
-		up_write(&grp->alloc_sem);
-	}
-
-}
-
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
@@ -2233,15 +2242,24 @@ static const struct file_operations ext4_mb_seq_groups_fops = {
 	.release	= seq_release,
 };
 
+static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
+{
+	int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+	struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
+
+	BUG_ON(!cachep);
+	return cachep;
+}
 
 /* Create and initialize ext4_group_info data for the given group. */
 int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 			  struct ext4_group_desc *desc)
 {
-	int i, len;
+	int i;
 	int metalen = 0;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_group_info **meta_group_info;
+	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 
 	/*
 	 * First check if this group is the first of a reserved block.
@@ -2261,22 +2279,16 @@ int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
 			meta_group_info;
 	}
 
-	/*
-	 * calculate needed size. if change bb_counters size,
-	 * don't forget about ext4_mb_generate_buddy()
-	 */
-	len = offsetof(typeof(**meta_group_info),
-		       bb_counters[sb->s_blocksize_bits + 2]);
-
 	meta_group_info =
 		sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
 	i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
 
-	meta_group_info[i] = kzalloc(len, GFP_KERNEL);
+	meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
 	if (meta_group_info[i] == NULL) {
 		printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
 		goto exit_group_info;
 	}
+	memset(meta_group_info[i], 0, kmem_cache_size(cachep));
 	set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
 		&(meta_group_info[i]->bb_state));
 
@@ -2331,6 +2343,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 	int num_meta_group_infos_max;
 	int array_size;
 	struct ext4_group_desc *desc;
+	struct kmem_cache *cachep;
 
 	/* This is the number of blocks used by GDT */
 	num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
@@ -2373,6 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
 		printk(KERN_ERR "EXT4-fs: can't get new inode\n");
 		goto err_freesgi;
 	}
+	sbi->s_buddy_cache->i_ino = get_next_ino();
 	EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
 	for (i = 0; i < ngroups; i++) {
 		desc = ext4_get_group_desc(sb, i, NULL);
@@ -2388,8 +2402,9 @@ static int ext4_mb_init_backend(struct super_block *sb)
 	return 0;
 
 err_freebuddy:
+	cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 	while (i-- > 0)
-		kfree(ext4_get_group_info(sb, i));
+		kmem_cache_free(cachep, ext4_get_group_info(sb, i));
 	i = num_meta_group_infos;
 	while (i-- > 0)
 		kfree(sbi->s_group_info[i]);
@@ -2406,19 +2421,48 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	unsigned offset;
 	unsigned max;
 	int ret;
+	int cache_index;
+	struct kmem_cache *cachep;
+	char *namep = NULL;
 
 	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
 
 	sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
 	if (sbi->s_mb_offsets == NULL) {
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
 	}
 
 	i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
 	sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
 	if (sbi->s_mb_maxs == NULL) {
-		kfree(sbi->s_mb_offsets);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+	cachep = ext4_groupinfo_caches[cache_index];
+	if (!cachep) {
+		char name[32];
+		int len = offsetof(struct ext4_group_info,
+					bb_counters[sb->s_blocksize_bits + 2]);
+
+		sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
+		namep = kstrdup(name, GFP_KERNEL);
+		if (!namep) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		/* Need to free the kmem_cache_name() when we
+		 * destroy the slab */
+		cachep = kmem_cache_create(namep, len, 0,
+					     SLAB_RECLAIM_ACCOUNT, NULL);
+		if (!cachep) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		ext4_groupinfo_caches[cache_index] = cachep;
 	}
 
 	/* order 0 is regular bitmap */
@@ -2439,9 +2483,7 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 	/* init file for buddy data */
 	ret = ext4_mb_init_backend(sb);
 	if (ret != 0) {
-		kfree(sbi->s_mb_offsets);
-		kfree(sbi->s_mb_maxs);
-		return ret;
+		goto out;
 	}
 
 	spin_lock_init(&sbi->s_md_lock);
@@ -2456,9 +2498,8 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
 	sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
 	if (sbi->s_locality_groups == NULL) {
-		kfree(sbi->s_mb_offsets);
-		kfree(sbi->s_mb_maxs);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out;
 	}
 	for_each_possible_cpu(i) {
 		struct ext4_locality_group *lg;
@@ -2475,7 +2516,13 @@ int ext4_mb_init(struct super_block *sb, int needs_recovery)
 
 	if (sbi->s_journal)
 		sbi->s_journal->j_commit_callback = release_blocks_on_commit;
-	return 0;
+out:
+	if (ret) {
+		kfree(sbi->s_mb_offsets);
+		kfree(sbi->s_mb_maxs);
+		kfree(namep);
+	}
+	return ret;
 }
 
 /* need to called with the ext4 group lock held */
@@ -2503,6 +2550,7 @@ int ext4_mb_release(struct super_block *sb)
 	int num_meta_group_infos;
 	struct ext4_group_info *grinfo;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
 
 	if (sbi->s_group_info) {
 		for (i = 0; i < ngroups; i++) {
@@ -2513,7 +2561,7 @@ int ext4_mb_release(struct super_block *sb)
 			ext4_lock_group(sb, i);
 			ext4_mb_cleanup_pa(grinfo);
 			ext4_unlock_group(sb, i);
-			kfree(grinfo);
+			kmem_cache_free(cachep, grinfo);
 		}
 		num_meta_group_infos = (ngroups +
 				EXT4_DESC_PER_BLOCK(sb) - 1) >>
@@ -2557,7 +2605,7 @@ int ext4_mb_release(struct super_block *sb)
 	return 0;
 }
 
-static inline void ext4_issue_discard(struct super_block *sb,
+static inline int ext4_issue_discard(struct super_block *sb,
 		ext4_group_t block_group, ext4_grpblk_t block, int count)
 {
 	int ret;
@@ -2567,10 +2615,11 @@ static inline void ext4_issue_discard(struct super_block *sb,
 	trace_ext4_discard_blocks(sb,
 			(unsigned long long) discard_block, count);
 	ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
-	if (ret == EOPNOTSUPP) {
+	if (ret == -EOPNOTSUPP) {
 		ext4_warning(sb, "discard not supported, disabling");
 		clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
 	}
+	return ret;
 }
 
 /*
@@ -2658,28 +2707,22 @@ static void ext4_remove_debugfs_entry(void)
 
 #endif
 
-int __init init_ext4_mballoc(void)
+int __init ext4_init_mballoc(void)
 {
-	ext4_pspace_cachep =
-		kmem_cache_create("ext4_prealloc_space",
-				     sizeof(struct ext4_prealloc_space),
-				     0, SLAB_RECLAIM_ACCOUNT, NULL);
+	ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
+					SLAB_RECLAIM_ACCOUNT);
 	if (ext4_pspace_cachep == NULL)
 		return -ENOMEM;
 
-	ext4_ac_cachep =
-		kmem_cache_create("ext4_alloc_context",
-				     sizeof(struct ext4_allocation_context),
-				     0, SLAB_RECLAIM_ACCOUNT, NULL);
+	ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
+				    SLAB_RECLAIM_ACCOUNT);
 	if (ext4_ac_cachep == NULL) {
 		kmem_cache_destroy(ext4_pspace_cachep);
 		return -ENOMEM;
 	}
 
-	ext4_free_ext_cachep =
-		kmem_cache_create("ext4_free_block_extents",
-				     sizeof(struct ext4_free_data),
-				     0, SLAB_RECLAIM_ACCOUNT, NULL);
+	ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
+					  SLAB_RECLAIM_ACCOUNT);
 	if (ext4_free_ext_cachep == NULL) {
 		kmem_cache_destroy(ext4_pspace_cachep);
 		kmem_cache_destroy(ext4_ac_cachep);
@@ -2689,8 +2732,9 @@ int __init init_ext4_mballoc(void)
 	return 0;
 }
 
-void exit_ext4_mballoc(void)
+void ext4_exit_mballoc(void)
 {
+	int i;
 	/*
 	 * Wait for completion of call_rcu()'s on ext4_pspace_cachep
 	 * before destroying the slab cache.
@@ -2699,6 +2743,15 @@ void exit_ext4_mballoc(void)
 	kmem_cache_destroy(ext4_pspace_cachep);
 	kmem_cache_destroy(ext4_ac_cachep);
 	kmem_cache_destroy(ext4_free_ext_cachep);
+
+	for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+		struct kmem_cache *cachep = ext4_groupinfo_caches[i];
+		if (cachep) {
+			char *name = (char *)kmem_cache_name(cachep);
+			kmem_cache_destroy(cachep);
+			kfree(name);
+		}
+	}
 	ext4_remove_debugfs_entry();
 }
 
@@ -3535,8 +3588,7 @@ static int ext4_mb_new_preallocation(struct ext4_allocation_context *ac)
  */
 static noinline_for_stack int
 ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
-			struct ext4_prealloc_space *pa,
-			struct ext4_allocation_context *ac)
+			struct ext4_prealloc_space *pa)
 {
 	struct super_block *sb = e4b->bd_sb;
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -3554,11 +3606,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	end = bit + pa->pa_len;
 
-	if (ac) {
-		ac->ac_sb = sb;
-		ac->ac_inode = pa->pa_inode;
-	}
-
 	while (bit < end) {
 		bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
 		if (bit >= end)
@@ -3569,16 +3616,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 			 (unsigned) next - bit, (unsigned) group);
 		free += next - bit;
 
-		if (ac) {
-			ac->ac_b_ex.fe_group = group;
-			ac->ac_b_ex.fe_start = bit;
-			ac->ac_b_ex.fe_len = next - bit;
-			ac->ac_b_ex.fe_logical = 0;
-			trace_ext4_mballoc_discard(ac);
-		}
-
-		trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
-					       next - bit);
+		trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
+		trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
+					       grp_blk_start + bit, next - bit);
 		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
 		bit = next + 1;
 	}
@@ -3601,29 +3641,19 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
 
 static noinline_for_stack int
 ext4_mb_release_group_pa(struct ext4_buddy *e4b,
-				struct ext4_prealloc_space *pa,
-				struct ext4_allocation_context *ac)
+				struct ext4_prealloc_space *pa)
 {
 	struct super_block *sb = e4b->bd_sb;
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 
-	trace_ext4_mb_release_group_pa(sb, ac, pa);
+	trace_ext4_mb_release_group_pa(sb, pa);
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
 	mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
 	atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
-	if (ac) {
-		ac->ac_sb = sb;
-		ac->ac_inode = NULL;
-		ac->ac_b_ex.fe_group = group;
-		ac->ac_b_ex.fe_start = bit;
-		ac->ac_b_ex.fe_len = pa->pa_len;
-		ac->ac_b_ex.fe_logical = 0;
-		trace_ext4_mballoc_discard(ac);
-	}
+	trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
 
 	return 0;
 }
@@ -3644,7 +3674,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 	struct list_head list;
 	struct ext4_buddy e4b;
 	int err;
@@ -3673,9 +3702,6 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
 		needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
 
 	INIT_LIST_HEAD(&list);
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac)
-		ac->ac_sb = sb;
 repeat:
 	ext4_lock_group(sb, group);
 	list_for_each_entry_safe(pa, tmp,
@@ -3730,9 +3756,9 @@ repeat:
 		spin_unlock(pa->pa_obj_lock);
 
 		if (pa->pa_type == MB_GROUP_PA)
-			ext4_mb_release_group_pa(&e4b, pa, ac);
+			ext4_mb_release_group_pa(&e4b, pa);
 		else
-			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+			ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
@@ -3740,8 +3766,6 @@ repeat:
 
 out:
 	ext4_unlock_group(sb, group);
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 	ext4_mb_unload_buddy(&e4b);
 	put_bh(bitmap_bh);
 	return free;
@@ -3762,7 +3786,6 @@ void ext4_discard_preallocations(struct inode *inode)
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *bitmap_bh = NULL;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 	ext4_group_t group = 0;
 	struct list_head list;
 	struct ext4_buddy e4b;
@@ -3778,11 +3801,6 @@ void ext4_discard_preallocations(struct inode *inode)
 
 	INIT_LIST_HEAD(&list);
 
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac) {
-		ac->ac_sb = sb;
-		ac->ac_inode = inode;
-	}
 repeat:
 	/* first, collect all pa's in the inode */
 	spin_lock(&ei->i_prealloc_lock);
@@ -3852,7 +3870,7 @@ repeat:
 
 		ext4_lock_group(sb, group);
 		list_del(&pa->pa_group_list);
-		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+		ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
 		ext4_unlock_group(sb, group);
 
 		ext4_mb_unload_buddy(&e4b);
@@ -3861,8 +3879,6 @@ repeat:
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -4060,14 +4076,10 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 	struct ext4_buddy e4b;
 	struct list_head discard_list;
 	struct ext4_prealloc_space *pa, *tmp;
-	struct ext4_allocation_context *ac;
 
 	mb_debug(1, "discard locality group preallocation\n");
 
 	INIT_LIST_HEAD(&discard_list);
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac)
-		ac->ac_sb = sb;
 
 	spin_lock(&lg->lg_prealloc_lock);
 	list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
@@ -4119,15 +4131,13 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
 		}
 		ext4_lock_group(sb, group);
 		list_del(&pa->pa_group_list);
-		ext4_mb_release_group_pa(&e4b, pa, ac);
+		ext4_mb_release_group_pa(&e4b, pa);
 		ext4_unlock_group(sb, group);
 
 		ext4_mb_unload_buddy(&e4b);
 		list_del(&pa->u.pa_tmp_list);
 		call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
 	}
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 }
 
 /*
@@ -4491,7 +4501,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct super_block *sb = inode->i_sb;
-	struct ext4_allocation_context *ac = NULL;
 	struct ext4_group_desc *gdp;
 	unsigned long freed = 0;
 	unsigned int overflow;
@@ -4531,6 +4540,8 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 			if (!bh)
 				tbh = sb_find_get_block(inode->i_sb,
 							block + i);
+			if (unlikely(!tbh))
+				continue;
 			ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
 				    inode, tbh, block + i);
 		}
@@ -4546,12 +4557,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
 	if (!ext4_should_writeback_data(inode))
 		flags |= EXT4_FREE_BLOCKS_METADATA;
 
-	ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
-	if (ac) {
-		ac->ac_inode = inode;
-		ac->ac_sb = sb;
-	}
-
 do_more:
 	overflow = 0;
 	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
@@ -4609,12 +4614,7 @@ do_more:
 			BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
 	}
 #endif
-	if (ac) {
-		ac->ac_b_ex.fe_group = block_group;
-		ac->ac_b_ex.fe_start = bit;
-		ac->ac_b_ex.fe_len = count;
-		trace_ext4_mballoc_free(ac);
-	}
+	trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
 
 	err = ext4_mb_load_buddy(sb, block_group, &e4b);
 	if (err)
@@ -4644,8 +4644,6 @@ do_more:
 		mb_clear_bits(bitmap_bh->b_data, bit, count);
 		mb_free_blocks(inode, &e4b, bit, count);
 		ext4_mb_return_to_preallocation(inode, &e4b, block, count);
-		if (test_opt(sb, DISCARD))
-			ext4_issue_discard(sb, block_group, bit, count);
 	}
 
 	ret = ext4_free_blks_count(sb, gdp) + count;
@@ -4685,7 +4683,190 @@ error_return:
 		dquot_free_block(inode, freed);
 	brelse(bitmap_bh);
 	ext4_std_error(sb, err);
-	if (ac)
-		kmem_cache_free(ext4_ac_cachep, ac);
 	return;
 }
+
+/**
+ * ext4_trim_extent -- function to TRIM one single free extent in the group
+ * @sb:		super block for the file system
+ * @start:	starting block of the free extent in the alloc. group
+ * @count:	number of blocks to TRIM
+ * @group:	alloc. group we are working with
+ * @e4b:	ext4 buddy for the group
+ *
+ * Trim "count" blocks starting at "start" in the "group". To assure that no
+ * one will allocate those blocks, mark it as used in buddy bitmap. This must
+ * be called with under the group lock.
+ */
+static int ext4_trim_extent(struct super_block *sb, int start, int count,
+		ext4_group_t group, struct ext4_buddy *e4b)
+{
+	struct ext4_free_extent ex;
+	int ret = 0;
+
+	assert_spin_locked(ext4_group_lock_ptr(sb, group));
+
+	ex.fe_start = start;
+	ex.fe_group = group;
+	ex.fe_len = count;
+
+	/*
+	 * Mark blocks used, so no one can reuse them while
+	 * being trimmed.
+	 */
+	mb_mark_used(e4b, &ex);
+	ext4_unlock_group(sb, group);
+
+	ret = ext4_issue_discard(sb, group, start, count);
+	if (ret)
+		ext4_std_error(sb, ret);
+
+	ext4_lock_group(sb, group);
+	mb_free_blocks(NULL, e4b, start, ex.fe_len);
+	return ret;
+}
+
+/**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb:			super block for file system
+ * @e4b:		ext4 buddy
+ * @start:		first group block to examine
+ * @max:		last group block to examine
+ * @minblocks:		minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's buddy bitmap searching for free
+ * extents. When the free block is found, ext4_trim_extent is called to TRIM
+ * the extent.
+ *
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap. This is done until whole group is scanned.
+ */
+ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+		ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
+{
+	void *bitmap;
+	ext4_grpblk_t next, count = 0;
+	ext4_group_t group;
+	int ret = 0;
+
+	BUG_ON(e4b == NULL);
+
+	bitmap = e4b->bd_bitmap;
+	group = e4b->bd_group;
+	start = (e4b->bd_info->bb_first_free > start) ?
+		e4b->bd_info->bb_first_free : start;
+	ext4_lock_group(sb, group);
+
+	while (start < max) {
+		start = mb_find_next_zero_bit(bitmap, max, start);
+		if (start >= max)
+			break;
+		next = mb_find_next_bit(bitmap, max, start);
+
+		if ((next - start) >= minblocks) {
+			ret = ext4_trim_extent(sb, start,
+				next - start, group, e4b);
+			if (ret < 0)
+				break;
+			count += next - start;
+		}
+		start = next + 1;
+
+		if (fatal_signal_pending(current)) {
+			count = -ERESTARTSYS;
+			break;
+		}
+
+		if (need_resched()) {
+			ext4_unlock_group(sb, group);
+			cond_resched();
+			ext4_lock_group(sb, group);
+		}
+
+		if ((e4b->bd_info->bb_free - count) < minblocks)
+			break;
+	}
+	ext4_unlock_group(sb, group);
+
+	ext4_debug("trimmed %d blocks in the group %d\n",
+		count, group);
+
+	if (ret < 0)
+		count = ret;
+
+	return count;
+}
+
+/**
+ * ext4_trim_fs() -- trim ioctl handle function
+ * @sb:			superblock for filesystem
+ * @range:		fstrim_range structure
+ *
+ * start:	First Byte to trim
+ * len:		number of Bytes to trim from start
+ * minlen:	minimum extent length in Bytes
+ * ext4_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext4_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+	struct ext4_buddy e4b;
+	ext4_group_t first_group, last_group;
+	ext4_group_t group, ngroups = ext4_get_groups_count(sb);
+	ext4_grpblk_t cnt = 0, first_block, last_block;
+	uint64_t start, len, minlen, trimmed;
+	int ret = 0;
+
+	start = range->start >> sb->s_blocksize_bits;
+	len = range->len >> sb->s_blocksize_bits;
+	minlen = range->minlen >> sb->s_blocksize_bits;
+	trimmed = 0;
+
+	if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+		return -EINVAL;
+
+	/* Determine first and last group to examine based on start and len */
+	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
+				     &first_group, &first_block);
+	ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
+				     &last_group, &last_block);
+	last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+	last_block = EXT4_BLOCKS_PER_GROUP(sb);
+
+	if (first_group > last_group)
+		return -EINVAL;
+
+	for (group = first_group; group <= last_group; group++) {
+		ret = ext4_mb_load_buddy(sb, group, &e4b);
+		if (ret) {
+			ext4_error(sb, "Error in loading buddy "
+					"information for %u", group);
+			break;
+		}
+
+		if (len >= EXT4_BLOCKS_PER_GROUP(sb))
+			len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
+		else
+			last_block = len;
+
+		if (e4b.bd_info->bb_free >= minlen) {
+			cnt = ext4_trim_all_free(sb, &e4b, first_block,
+						last_block, minlen);
+			if (cnt < 0) {
+				ret = cnt;
+				ext4_mb_unload_buddy(&e4b);
+				break;
+			}
+		}
+		ext4_mb_unload_buddy(&e4b);
+		trimmed += cnt;
+		first_block = 0;
+	}
+	range->len = trimmed * sb->s_blocksize;
+
+	return ret;
+}
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 1765c2c50a9..25f3a974b72 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -412,7 +412,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
 	struct buffer_head *bh;
 	struct ext4_extent_header *eh;
 
-	block = idx_pblock(ix);
+	block = ext4_idx_pblock(ix);
 	bh = sb_bread(inode->i_sb, block);
 	if (!bh)
 		return -EIO;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 5f1ed9fc913..b9f3e7862f1 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -85,7 +85,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
 	if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
 		/* leaf block */
 		*extent = ++path[ppos].p_ext;
-		path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+		path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
 		return 0;
 	}
 
@@ -96,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
 
 			/* index block */
 			path[ppos].p_idx++;
-			path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+			path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
 			if (path[ppos+1].p_bh)
 				brelse(path[ppos+1].p_bh);
 			path[ppos+1].p_bh =
@@ -111,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
 				path[cur_ppos].p_idx =
 					EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
 				path[cur_ppos].p_block =
-					idx_pblock(path[cur_ppos].p_idx);
+					ext4_idx_pblock(path[cur_ppos].p_idx);
 				if (path[cur_ppos+1].p_bh)
 					brelse(path[cur_ppos+1].p_bh);
 				path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@@ -133,7 +133,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
 			path[leaf_ppos].p_ext = *extent =
 				EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
 			path[leaf_ppos].p_block =
-					ext_pblock(path[leaf_ppos].p_ext);
+					ext4_ext_pblock(path[leaf_ppos].p_ext);
 			return 0;
 		}
 	}
@@ -249,7 +249,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
 			 */
 			o_end->ee_block = end_ext->ee_block;
 			o_end->ee_len = end_ext->ee_len;
-			ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+			ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
 		}
 
 		o_start->ee_len = start_ext->ee_len;
@@ -276,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
 		 */
 		o_end->ee_block = end_ext->ee_block;
 		o_end->ee_len = end_ext->ee_len;
-		ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+		ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
 
 		/*
 		 * Set 0 to the extent block if new_ext was
@@ -361,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
 	/* Insert new entry */
 	if (new_ext->ee_len) {
 		o_start[i] = *new_ext;
-		ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
+		ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
 	}
 
 	/* Insert end entry */
@@ -488,7 +488,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
 	start_ext.ee_len = end_ext.ee_len = 0;
 
 	new_ext.ee_block = cpu_to_le32(*from);
-	ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
+	ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
 	new_ext.ee_len = dext->ee_len;
 	new_ext_alen = ext4_ext_get_actual_len(&new_ext);
 	new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
@@ -553,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
 		copy_extent_status(oext, &end_ext);
 		end_ext_alen = ext4_ext_get_actual_len(&end_ext);
 		ext4_ext_store_pblock(&end_ext,
-			(ext_pblock(o_end) + oext_alen - end_ext_alen));
+			(ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
 		end_ext.ee_block =
 			cpu_to_le32(le32_to_cpu(o_end->ee_block) +
 			oext_alen - end_ext_alen);
@@ -604,7 +604,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
 	/* When tmp_dext is too large, pick up the target range. */
 	diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
 
-	ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
+	ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
 	tmp_dext->ee_block =
 			cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
 	tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@@ -613,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
 		tmp_dext->ee_len = cpu_to_le16(max_count);
 
 	orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
-	ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
+	ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
 
 	/* Adjust extent length if donor extent is larger than orig */
 	if (ext4_ext_get_actual_len(tmp_dext) >
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 314c0d3b3fa..dc40e75cba8 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -856,6 +856,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
 	struct buffer_head *bh, *ret = NULL;
 	ext4_lblk_t start, block, b;
+	const u8 *name = d_name->name;
 	int ra_max = 0;		/* Number of bh's in the readahead
 				   buffer, bh_use[] */
 	int ra_ptr = 0;		/* Current index into readahead
@@ -870,6 +871,16 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 	namelen = d_name->len;
 	if (namelen > EXT4_NAME_LEN)
 		return NULL;
+	if ((namelen <= 2) && (name[0] == '.') &&
+	    (name[1] == '.' || name[1] == '\0')) {
+		/*
+		 * "." or ".." will only be in the first block
+		 * NFS may look up ".."; "." should be handled by the VFS
+		 */
+		block = start = 0;
+		nblocks = 1;
+		goto restart;
+	}
 	if (is_dx(dir)) {
 		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
 		/*
@@ -960,55 +971,35 @@ cleanup_and_exit:
 static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
 		       struct ext4_dir_entry_2 **res_dir, int *err)
 {
-	struct super_block * sb;
+	struct super_block * sb = dir->i_sb;
 	struct dx_hash_info	hinfo;
-	u32 hash;
 	struct dx_frame frames[2], *frame;
-	struct ext4_dir_entry_2 *de, *top;
 	struct buffer_head *bh;
 	ext4_lblk_t block;
 	int retval;
-	int namelen = d_name->len;
-	const u8 *name = d_name->name;
 
-	sb = dir->i_sb;
-	/* NFS may look up ".." - look at dx_root directory block */
-	if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
-		if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
-			return NULL;
-	} else {
-		frame = frames;
-		frame->bh = NULL;			/* for dx_release() */
-		frame->at = (struct dx_entry *)frames;	/* hack for zero entry*/
-		dx_set_block(frame->at, 0);		/* dx_root block is 0 */
-	}
-	hash = hinfo.hash;
+	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
+		return NULL;
 	do {
 		block = dx_get_block(frame->at);
-		if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
+		if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
 			goto errout;
-		de = (struct ext4_dir_entry_2 *) bh->b_data;
-		top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
-				       EXT4_DIR_REC_LEN(0));
-		for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
-			int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
-				  + ((char *) de - bh->b_data);
-
-			if (!ext4_check_dir_entry(dir, de, bh, off)) {
-				brelse(bh);
-				*err = ERR_BAD_DX_DIR;
-				goto errout;
-			}
 
-			if (ext4_match(namelen, name, de)) {
-				*res_dir = de;
-				dx_release(frames);
-				return bh;
-			}
+		retval = search_dirblock(bh, dir, d_name,
+					 block << EXT4_BLOCK_SIZE_BITS(sb),
+					 res_dir);
+		if (retval == 1) { 	/* Success! */
+			dx_release(frames);
+			return bh;
 		}
 		brelse(bh);
+		if (retval == -1) {
+			*err = ERR_BAD_DX_DIR;
+			goto errout;
+		}
+
 		/* Check to see if we should continue to search */
-		retval = ext4_htree_next_block(dir, hash, frame,
+		retval = ext4_htree_next_block(dir, hinfo.hash, frame,
 					       frames, NULL);
 		if (retval < 0) {
 			ext4_warning(sb,
@@ -2312,7 +2303,7 @@ retry:
 
 	inode->i_ctime = ext4_current_time(inode);
 	ext4_inc_count(handle, inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
new file mode 100644
index 00000000000..beacce11ac5
--- /dev/null
+++ b/fs/ext4/page-io.c
@@ -0,0 +1,431 @@
+/*
+ * linux/fs/ext4/page-io.c
+ *
+ * This contains the new page_io functions for ext4
+ *
+ * Written by Theodore Ts'o, 2010.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/jbd2.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/pagevec.h>
+#include <linux/mpage.h>
+#include <linux/namei.h>
+#include <linux/uio.h>
+#include <linux/bio.h>
+#include <linux/workqueue.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "ext4_jbd2.h"
+#include "xattr.h"
+#include "acl.h"
+#include "ext4_extents.h"
+
+static struct kmem_cache *io_page_cachep, *io_end_cachep;
+
+#define WQ_HASH_SZ		37
+#define to_ioend_wq(v)	(&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
+static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
+
+int __init ext4_init_pageio(void)
+{
+	int i;
+
+	io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
+	if (io_page_cachep == NULL)
+		return -ENOMEM;
+	io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
+	if (io_page_cachep == NULL) {
+		kmem_cache_destroy(io_page_cachep);
+		return -ENOMEM;
+	}
+	for (i = 0; i < WQ_HASH_SZ; i++)
+		init_waitqueue_head(&ioend_wq[i]);
+
+	return 0;
+}
+
+void ext4_exit_pageio(void)
+{
+	kmem_cache_destroy(io_end_cachep);
+	kmem_cache_destroy(io_page_cachep);
+}
+
+void ext4_ioend_wait(struct inode *inode)
+{
+	wait_queue_head_t *wq = to_ioend_wq(inode);
+
+	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+}
+
+static void put_io_page(struct ext4_io_page *io_page)
+{
+	if (atomic_dec_and_test(&io_page->p_count)) {
+		end_page_writeback(io_page->p_page);
+		put_page(io_page->p_page);
+		kmem_cache_free(io_page_cachep, io_page);
+	}
+}
+
+void ext4_free_io_end(ext4_io_end_t *io)
+{
+	int i;
+	wait_queue_head_t *wq;
+
+	BUG_ON(!io);
+	if (io->page)
+		put_page(io->page);
+	for (i = 0; i < io->num_io_pages; i++)
+		put_io_page(io->pages[i]);
+	io->num_io_pages = 0;
+	wq = to_ioend_wq(io->inode);
+	if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
+	    waitqueue_active(wq))
+		wake_up_all(wq);
+	kmem_cache_free(io_end_cachep, io);
+}
+
+/*
+ * check a range of space and convert unwritten extents to written.
+ */
+int ext4_end_io_nolock(ext4_io_end_t *io)
+{
+	struct inode *inode = io->inode;
+	loff_t offset = io->offset;
+	ssize_t size = io->size;
+	int ret = 0;
+
+	ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
+		   "list->prev 0x%p\n",
+		   io, inode->i_ino, io->list.next, io->list.prev);
+
+	if (list_empty(&io->list))
+		return ret;
+
+	if (!(io->flag & EXT4_IO_END_UNWRITTEN))
+		return ret;
+
+	ret = ext4_convert_unwritten_extents(inode, offset, size);
+	if (ret < 0) {
+		printk(KERN_EMERG "%s: failed to convert unwritten "
+			"extents to written extents, error is %d "
+			"io is still on inode %lu aio dio list\n",
+		       __func__, ret, inode->i_ino);
+		return ret;
+	}
+
+	if (io->iocb)
+		aio_complete(io->iocb, io->result, 0);
+	/* clear the DIO AIO unwritten flag */
+	io->flag &= ~EXT4_IO_END_UNWRITTEN;
+	return ret;
+}
+
+/*
+ * work on completed aio dio IO, to convert unwritten extents to extents
+ */
+static void ext4_end_io_work(struct work_struct *work)
+{
+	ext4_io_end_t		*io = container_of(work, ext4_io_end_t, work);
+	struct inode		*inode = io->inode;
+	struct ext4_inode_info	*ei = EXT4_I(inode);
+	unsigned long		flags;
+	int			ret;
+
+	mutex_lock(&inode->i_mutex);
+	ret = ext4_end_io_nolock(io);
+	if (ret < 0) {
+		mutex_unlock(&inode->i_mutex);
+		return;
+	}
+
+	spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+	if (!list_empty(&io->list))
+		list_del_init(&io->list);
+	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+	mutex_unlock(&inode->i_mutex);
+	ext4_free_io_end(io);
+}
+
+ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
+{
+	ext4_io_end_t *io = NULL;
+
+	io = kmem_cache_alloc(io_end_cachep, flags);
+	if (io) {
+		memset(io, 0, sizeof(*io));
+		atomic_inc(&EXT4_I(inode)->i_ioend_count);
+		io->inode = inode;
+		INIT_WORK(&io->work, ext4_end_io_work);
+		INIT_LIST_HEAD(&io->list);
+	}
+	return io;
+}
+
+/*
+ * Print an buffer I/O error compatible with the fs/buffer.c.  This
+ * provides compatibility with dmesg scrapers that look for a specific
+ * buffer I/O error message.  We really need a unified error reporting
+ * structure to userspace ala Digital Unix's uerf system, but it's
+ * probably not going to happen in my lifetime, due to LKML politics...
+ */
+static void buffer_io_error(struct buffer_head *bh)
+{
+	char b[BDEVNAME_SIZE];
+	printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
+			bdevname(bh->b_bdev, b),
+			(unsigned long long)bh->b_blocknr);
+}
+
+static void ext4_end_bio(struct bio *bio, int error)
+{
+	ext4_io_end_t *io_end = bio->bi_private;
+	struct workqueue_struct *wq;
+	struct inode *inode;
+	unsigned long flags;
+	int i;
+
+	BUG_ON(!io_end);
+	bio->bi_private = NULL;
+	bio->bi_end_io = NULL;
+	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+		error = 0;
+	bio_put(bio);
+
+	for (i = 0; i < io_end->num_io_pages; i++) {
+		struct page *page = io_end->pages[i]->p_page;
+		struct buffer_head *bh, *head;
+		int partial_write = 0;
+
+		head = page_buffers(page);
+		if (error)
+			SetPageError(page);
+		BUG_ON(!head);
+		if (head->b_size == PAGE_CACHE_SIZE)
+			clear_buffer_dirty(head);
+		else {
+			loff_t offset;
+			loff_t io_end_offset = io_end->offset + io_end->size;
+
+			offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
+			bh = head;
+			do {
+				if ((offset >= io_end->offset) &&
+				    (offset+bh->b_size <= io_end_offset)) {
+					if (error)
+						buffer_io_error(bh);
+
+					clear_buffer_dirty(bh);
+				}
+				if (buffer_delay(bh))
+					partial_write = 1;
+				else if (!buffer_mapped(bh))
+					clear_buffer_dirty(bh);
+				else if (buffer_dirty(bh))
+					partial_write = 1;
+				offset += bh->b_size;
+				bh = bh->b_this_page;
+			} while (bh != head);
+		}
+
+		/*
+		 * If this is a partial write which happened to make
+		 * all buffers uptodate then we can optimize away a
+		 * bogus readpage() for the next read(). Here we
+		 * 'discover' whether the page went uptodate as a
+		 * result of this (potentially partial) write.
+		 */
+		if (!partial_write)
+			SetPageUptodate(page);
+
+		put_io_page(io_end->pages[i]);
+	}
+	io_end->num_io_pages = 0;
+	inode = io_end->inode;
+
+	if (error) {
+		io_end->flag |= EXT4_IO_END_ERROR;
+		ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+			     "(offset %llu size %ld starting block %llu)",
+			     inode->i_ino,
+			     (unsigned long long) io_end->offset,
+			     (long) io_end->size,
+			     (unsigned long long)
+			     bio->bi_sector >> (inode->i_blkbits - 9));
+	}
+
+	/* Add the io_end to per-inode completed io list*/
+	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+	list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
+	spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+
+	wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
+	/* queue the work to convert unwritten extents to written */
+	queue_work(wq, &io_end->work);
+}
+
+void ext4_io_submit(struct ext4_io_submit *io)
+{
+	struct bio *bio = io->io_bio;
+
+	if (bio) {
+		bio_get(io->io_bio);
+		submit_bio(io->io_op, io->io_bio);
+		BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
+		bio_put(io->io_bio);
+	}
+	io->io_bio = 0;
+	io->io_op = 0;
+	io->io_end = 0;
+}
+
+static int io_submit_init(struct ext4_io_submit *io,
+			  struct inode *inode,
+			  struct writeback_control *wbc,
+			  struct buffer_head *bh)
+{
+	ext4_io_end_t *io_end;
+	struct page *page = bh->b_page;
+	int nvecs = bio_get_nr_vecs(bh->b_bdev);
+	struct bio *bio;
+
+	io_end = ext4_init_io_end(inode, GFP_NOFS);
+	if (!io_end)
+		return -ENOMEM;
+	do {
+		bio = bio_alloc(GFP_NOIO, nvecs);
+		nvecs >>= 1;
+	} while (bio == NULL);
+
+	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+	bio->bi_bdev = bh->b_bdev;
+	bio->bi_private = io->io_end = io_end;
+	bio->bi_end_io = ext4_end_bio;
+
+	io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
+
+	io->io_bio = bio;
+	io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
+			WRITE_SYNC_PLUG : WRITE);
+	io->io_next_block = bh->b_blocknr;
+	return 0;
+}
+
+static int io_submit_add_bh(struct ext4_io_submit *io,
+			    struct ext4_io_page *io_page,
+			    struct inode *inode,
+			    struct writeback_control *wbc,
+			    struct buffer_head *bh)
+{
+	ext4_io_end_t *io_end;
+	int ret;
+
+	if (buffer_new(bh)) {
+		clear_buffer_new(bh);
+		unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+	}
+
+	if (!buffer_mapped(bh) || buffer_delay(bh)) {
+		if (!buffer_mapped(bh))
+			clear_buffer_dirty(bh);
+		if (io->io_bio)
+			ext4_io_submit(io);
+		return 0;
+	}
+
+	if (io->io_bio && bh->b_blocknr != io->io_next_block) {
+submit_and_retry:
+		ext4_io_submit(io);
+	}
+	if (io->io_bio == NULL) {
+		ret = io_submit_init(io, inode, wbc, bh);
+		if (ret)
+			return ret;
+	}
+	io_end = io->io_end;
+	if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
+	    (io_end->pages[io_end->num_io_pages-1] != io_page))
+		goto submit_and_retry;
+	if (buffer_uninit(bh))
+		io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
+	io->io_end->size += bh->b_size;
+	io->io_next_block++;
+	ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
+	if (ret != bh->b_size)
+		goto submit_and_retry;
+	if ((io_end->num_io_pages == 0) ||
+	    (io_end->pages[io_end->num_io_pages-1] != io_page)) {
+		io_end->pages[io_end->num_io_pages++] = io_page;
+		atomic_inc(&io_page->p_count);
+	}
+	return 0;
+}
+
+int ext4_bio_write_page(struct ext4_io_submit *io,
+			struct page *page,
+			int len,
+			struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	unsigned block_start, block_end, blocksize;
+	struct ext4_io_page *io_page;
+	struct buffer_head *bh, *head;
+	int ret = 0;
+
+	blocksize = 1 << inode->i_blkbits;
+
+	BUG_ON(PageWriteback(page));
+	set_page_writeback(page);
+	ClearPageError(page);
+
+	io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
+	if (!io_page) {
+		set_page_dirty(page);
+		unlock_page(page);
+		return -ENOMEM;
+	}
+	io_page->p_page = page;
+	atomic_set(&io_page->p_count, 1);
+	get_page(page);
+
+	for (bh = head = page_buffers(page), block_start = 0;
+	     bh != head || !block_start;
+	     block_start = block_end, bh = bh->b_this_page) {
+		block_end = block_start + blocksize;
+		if (block_start >= len) {
+			clear_buffer_dirty(bh);
+			set_buffer_uptodate(bh);
+			continue;
+		}
+		ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
+		if (ret) {
+			/*
+			 * We only get here on ENOMEM.  Not much else
+			 * we can do but mark the page as dirty, and
+			 * better luck next time.
+			 */
+			set_page_dirty(page);
+			break;
+		}
+	}
+	unlock_page(page);
+	/*
+	 * If the page was truncated before we could do the writeback,
+	 * or we had a memory allocation error while trying to write
+	 * the first buffer head, we won't have submitted any pages for
+	 * I/O.  In that case we need to make sure we've cleared the
+	 * PageWriteback bit from the page to prevent the system from
+	 * wedging later on.
+	 */
+	put_io_page(io_page);
+	return ret;
+}
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ca5c8aa00a2..981c8477ada 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -226,23 +226,15 @@ static int setup_new_group_blocks(struct super_block *sb,
 	}
 
 	/* Zero out all of the reserved backup group descriptor table blocks */
-	for (i = 0, bit = gdblocks + 1, block = start + bit;
-	     i < reserved_gdb; i++, block++, bit++) {
-		struct buffer_head *gdb;
-
-		ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
-
-		if ((err = extend_or_restart_transaction(handle, 1, bh)))
-			goto exit_bh;
-
-		if (IS_ERR(gdb = bclean(handle, sb, block))) {
-			err = PTR_ERR(gdb);
-			goto exit_bh;
-		}
-		ext4_handle_dirty_metadata(handle, NULL, gdb);
+	ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+			block, sbi->s_itb_per_group);
+	err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
+			       GFP_NOFS);
+	if (err)
+		goto exit_bh;
+	for (i = 0, bit = gdblocks + 1; i < reserved_gdb; i++, bit++)
 		ext4_set_bit(bit, bh->b_data);
-		brelse(gdb);
-	}
+
 	ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
 		   input->block_bitmap - start);
 	ext4_set_bit(input->block_bitmap - start, bh->b_data);
@@ -251,28 +243,21 @@ static int setup_new_group_blocks(struct super_block *sb,
 	ext4_set_bit(input->inode_bitmap - start, bh->b_data);
 
 	/* Zero out all of the inode table blocks */
-	for (i = 0, block = input->inode_table, bit = block - start;
-	     i < sbi->s_itb_per_group; i++, bit++, block++) {
-		struct buffer_head *it;
-
-		ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
-
-		if ((err = extend_or_restart_transaction(handle, 1, bh)))
-			goto exit_bh;
-
-		if (IS_ERR(it = bclean(handle, sb, block))) {
-			err = PTR_ERR(it);
-			goto exit_bh;
-		}
-		ext4_handle_dirty_metadata(handle, NULL, it);
-		brelse(it);
+	block = input->inode_table;
+	ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+			block, sbi->s_itb_per_group);
+	err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
+	if (err)
+		goto exit_bh;
+	for (i = 0, bit = input->inode_table - start;
+	     i < sbi->s_itb_per_group; i++, bit++)
 		ext4_set_bit(bit, bh->b_data);
-	}
 
 	if ((err = extend_or_restart_transaction(handle, 2, bh)))
 		goto exit_bh;
 
-	mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
+	ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
+			     bh->b_data);
 	ext4_handle_dirty_metadata(handle, NULL, bh);
 	brelse(bh);
 	/* Mark unused entries in inode bitmap used */
@@ -283,8 +268,8 @@ static int setup_new_group_blocks(struct super_block *sb,
 		goto exit_journal;
 	}
 
-	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
-			bh->b_data);
+	ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+			     bh->b_data);
 	ext4_handle_dirty_metadata(handle, NULL, bh);
 exit_bh:
 	brelse(bh);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8ecc1e59030..fb15c9c0be7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -40,6 +40,9 @@
 #include <linux/crc16.h>
 #include <asm/uaccess.h>
 
+#include <linux/kthread.h>
+#include <linux/freezer.h>
+
 #include "ext4.h"
 #include "ext4_jbd2.h"
 #include "xattr.h"
@@ -49,8 +52,11 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/ext4.h>
 
-struct proc_dir_entry *ext4_proc_root;
+static struct proc_dir_entry *ext4_proc_root;
 static struct kset *ext4_kset;
+struct ext4_lazy_init *ext4_li_info;
+struct mutex ext4_li_mtx;
+struct ext4_features *ext4_feat;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
@@ -67,14 +73,16 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
 static int ext4_unfreeze(struct super_block *sb);
 static void ext4_write_super(struct super_block *sb);
 static int ext4_freeze(struct super_block *sb);
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
-		       const char *dev_name, void *data, struct vfsmount *mnt);
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data);
+static void ext4_destroy_lazyinit_thread(void);
+static void ext4_unregister_li_request(struct super_block *sb);
 
 #if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext3_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext3",
-	.get_sb		= ext4_get_sb,
+	.mount		= ext4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
@@ -701,6 +709,7 @@ static void ext4_put_super(struct super_block *sb)
 	struct ext4_super_block *es = sbi->s_es;
 	int i, err;
 
+	ext4_unregister_li_request(sb);
 	dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
 
 	flush_workqueue(sbi->dio_unwritten_wq);
@@ -717,6 +726,7 @@ static void ext4_put_super(struct super_block *sb)
 			ext4_abort(sb, "Couldn't clean up the journal");
 	}
 
+	del_timer(&sbi->s_err_report);
 	ext4_release_system_zone(sb);
 	ext4_mb_release(sb);
 	ext4_ext_release(sb);
@@ -818,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 	ei->cur_aio_dio = NULL;
 	ei->i_sync_tid = 0;
 	ei->i_datasync_tid = 0;
+	atomic_set(&ei->i_ioend_count, 0);
 
 	return &ei->vfs_inode;
 }
 
+static int ext4_drop_inode(struct inode *inode)
+{
+	int drop = generic_drop_inode(inode);
+
+	trace_ext4_drop_inode(inode, drop);
+	return drop;
+}
+
 static void ext4_destroy_inode(struct inode *inode)
 {
+	ext4_ioend_wait(inode);
 	if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
 		ext4_msg(inode->i_sb, KERN_ERR,
 			 "Inode %lu (%p): orphan list check failed!",
@@ -1006,6 +1026,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	    !(def_mount_opts & EXT4_DEFM_NODELALLOC))
 		seq_puts(seq, ",nodelalloc");
 
+	if (test_opt(sb, MBLK_IO_SUBMIT))
+		seq_puts(seq, ",mblk_io_submit");
 	if (sbi->s_stripe)
 		seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
 	/*
@@ -1042,6 +1064,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	    !(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
 		seq_puts(seq, ",block_validity");
 
+	if (!test_opt(sb, INIT_INODE_TABLE))
+		seq_puts(seq, ",noinit_inode_table");
+	else if (sbi->s_li_wait_mult)
+		seq_printf(seq, ",init_inode_table=%u",
+			   (unsigned) sbi->s_li_wait_mult);
+
 	ext4_show_quota_options(seq, sb);
 
 	return 0;
@@ -1157,6 +1185,7 @@ static const struct super_operations ext4_sops = {
 	.destroy_inode	= ext4_destroy_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
+	.drop_inode	= ext4_drop_inode,
 	.evict_inode	= ext4_evict_inode,
 	.put_super	= ext4_put_super,
 	.sync_fs	= ext4_sync_fs,
@@ -1177,6 +1206,7 @@ static const struct super_operations ext4_nojournal_sops = {
 	.destroy_inode	= ext4_destroy_inode,
 	.write_inode	= ext4_write_inode,
 	.dirty_inode	= ext4_dirty_inode,
+	.drop_inode	= ext4_drop_inode,
 	.evict_inode	= ext4_evict_inode,
 	.write_super	= ext4_write_super,
 	.put_super	= ext4_put_super,
@@ -1211,11 +1241,12 @@ enum {
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
 	Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
 	Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
-	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-	Opt_block_validity, Opt_noblock_validity,
+	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
 	Opt_inode_readahead_blks, Opt_journal_ioprio,
 	Opt_dioread_nolock, Opt_dioread_lock,
 	Opt_discard, Opt_nodiscard,
+	Opt_init_inode_table, Opt_noinit_inode_table,
 };
 
 static const match_table_t tokens = {
@@ -1275,6 +1306,8 @@ static const match_table_t tokens = {
 	{Opt_resize, "resize"},
 	{Opt_delalloc, "delalloc"},
 	{Opt_nodelalloc, "nodelalloc"},
+	{Opt_mblk_io_submit, "mblk_io_submit"},
+	{Opt_nomblk_io_submit, "nomblk_io_submit"},
 	{Opt_block_validity, "block_validity"},
 	{Opt_noblock_validity, "noblock_validity"},
 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1286,6 +1319,9 @@ static const match_table_t tokens = {
 	{Opt_dioread_lock, "dioread_lock"},
 	{Opt_discard, "discard"},
 	{Opt_nodiscard, "nodiscard"},
+	{Opt_init_inode_table, "init_itable=%u"},
+	{Opt_init_inode_table, "init_itable"},
+	{Opt_noinit_inode_table, "noinit_itable"},
 	{Opt_err, NULL},
 };
 
@@ -1693,6 +1729,12 @@ set_qf_format:
 		case Opt_nodelalloc:
 			clear_opt(sbi->s_mount_opt, DELALLOC);
 			break;
+		case Opt_mblk_io_submit:
+			set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+			break;
+		case Opt_nomblk_io_submit:
+			clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+			break;
 		case Opt_stripe:
 			if (match_int(&args[0], &option))
 				return 0;
@@ -1756,6 +1798,20 @@ set_qf_format:
 		case Opt_dioread_lock:
 			clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
 			break;
+		case Opt_init_inode_table:
+			set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+			if (args[0].from) {
+				if (match_int(&args[0], &option))
+					return 0;
+			} else
+				option = EXT4_DEF_LI_WAIT_MULT;
+			if (option < 0)
+				return 0;
+			sbi->s_li_wait_mult = option;
+			break;
+		case Opt_noinit_inode_table:
+			clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+			break;
 		default:
 			ext4_msg(sb, KERN_ERR,
 			       "Unrecognized mount option \"%s\" "
@@ -1939,7 +1995,8 @@ int ext4_group_desc_csum_verify(struct ext4_sb_info *sbi, __u32 block_group,
 }
 
 /* Called at mount-time, super-block is locked */
-static int ext4_check_descriptors(struct super_block *sb)
+static int ext4_check_descriptors(struct super_block *sb,
+				  ext4_group_t *first_not_zeroed)
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
@@ -1948,7 +2005,7 @@ static int ext4_check_descriptors(struct super_block *sb)
 	ext4_fsblk_t inode_bitmap;
 	ext4_fsblk_t inode_table;
 	int flexbg_flag = 0;
-	ext4_group_t i;
+	ext4_group_t i, grp = sbi->s_groups_count;
 
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
 		flexbg_flag = 1;
@@ -1964,6 +2021,10 @@ static int ext4_check_descriptors(struct super_block *sb)
 			last_block = first_block +
 				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
+		if ((grp == sbi->s_groups_count) &&
+		   !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+			grp = i;
+
 		block_bitmap = ext4_block_bitmap(sb, gdp);
 		if (block_bitmap < first_block || block_bitmap > last_block) {
 			ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
@@ -2001,6 +2062,8 @@ static int ext4_check_descriptors(struct super_block *sb)
 		if (!flexbg_flag)
 			first_block += EXT4_BLOCKS_PER_GROUP(sb);
 	}
+	if (NULL != first_not_zeroed)
+		*first_not_zeroed = grp;
 
 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
 	sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
@@ -2373,6 +2436,7 @@ static struct ext4_attr ext4_attr_##_name = {			\
 #define EXT4_ATTR(name, mode, show, store) \
 static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
 
+#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
 #define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
 #define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
 #define EXT4_RW_ATTR_SBI_UI(name, elname)	\
@@ -2409,6 +2473,16 @@ static struct attribute *ext4_attrs[] = {
 	NULL,
 };
 
+/* Features this copy of ext4 supports */
+EXT4_INFO_ATTR(lazy_itable_init);
+EXT4_INFO_ATTR(batched_discard);
+
+static struct attribute *ext4_feat_attrs[] = {
+	ATTR_LIST(lazy_itable_init),
+	ATTR_LIST(batched_discard),
+	NULL,
+};
+
 static ssize_t ext4_attr_show(struct kobject *kobj,
 			      struct attribute *attr, char *buf)
 {
@@ -2437,7 +2511,6 @@ static void ext4_sb_release(struct kobject *kobj)
 	complete(&sbi->s_kobj_unregister);
 }
 
-
 static const struct sysfs_ops ext4_attr_ops = {
 	.show	= ext4_attr_show,
 	.store	= ext4_attr_store,
@@ -2449,6 +2522,17 @@ static struct kobj_type ext4_ktype = {
 	.release	= ext4_sb_release,
 };
 
+static void ext4_feat_release(struct kobject *kobj)
+{
+	complete(&ext4_feat->f_kobj_unregister);
+}
+
+static struct kobj_type ext4_feat_ktype = {
+	.default_attrs	= ext4_feat_attrs,
+	.sysfs_ops	= &ext4_attr_ops,
+	.release	= ext4_feat_release,
+};
+
 /*
  * Check whether this filesystem can be mounted based on
  * the features present and the RDONLY/RDWR mount requested.
@@ -2539,6 +2623,368 @@ static void print_daily_error_info(unsigned long arg)
 	mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ);  /* Once a day */
 }
 
+static void ext4_lazyinode_timeout(unsigned long data)
+{
+	struct task_struct *p = (struct task_struct *)data;
+	wake_up_process(p);
+}
+
+/* Find next suitable group and run ext4_init_inode_table */
+static int ext4_run_li_request(struct ext4_li_request *elr)
+{
+	struct ext4_group_desc *gdp = NULL;
+	ext4_group_t group, ngroups;
+	struct super_block *sb;
+	unsigned long timeout = 0;
+	int ret = 0;
+
+	sb = elr->lr_super;
+	ngroups = EXT4_SB(sb)->s_groups_count;
+
+	for (group = elr->lr_next_group; group < ngroups; group++) {
+		gdp = ext4_get_group_desc(sb, group, NULL);
+		if (!gdp) {
+			ret = 1;
+			break;
+		}
+
+		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+			break;
+	}
+
+	if (group == ngroups)
+		ret = 1;
+
+	if (!ret) {
+		timeout = jiffies;
+		ret = ext4_init_inode_table(sb, group,
+					    elr->lr_timeout ? 0 : 1);
+		if (elr->lr_timeout == 0) {
+			timeout = jiffies - timeout;
+			if (elr->lr_sbi->s_li_wait_mult)
+				timeout *= elr->lr_sbi->s_li_wait_mult;
+			else
+				timeout *= 20;
+			elr->lr_timeout = timeout;
+		}
+		elr->lr_next_sched = jiffies + elr->lr_timeout;
+		elr->lr_next_group = group + 1;
+	}
+
+	return ret;
+}
+
+/*
+ * Remove lr_request from the list_request and free the
+ * request tructure. Should be called with li_list_mtx held
+ */
+static void ext4_remove_li_request(struct ext4_li_request *elr)
+{
+	struct ext4_sb_info *sbi;
+
+	if (!elr)
+		return;
+
+	sbi = elr->lr_sbi;
+
+	list_del(&elr->lr_request);
+	sbi->s_li_request = NULL;
+	kfree(elr);
+}
+
+static void ext4_unregister_li_request(struct super_block *sb)
+{
+	struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
+
+	if (!ext4_li_info)
+		return;
+
+	mutex_lock(&ext4_li_info->li_list_mtx);
+	ext4_remove_li_request(elr);
+	mutex_unlock(&ext4_li_info->li_list_mtx);
+}
+
+/*
+ * This is the function where ext4lazyinit thread lives. It walks
+ * through the request list searching for next scheduled filesystem.
+ * When such a fs is found, run the lazy initialization request
+ * (ext4_rn_li_request) and keep track of the time spend in this
+ * function. Based on that time we compute next schedule time of
+ * the request. When walking through the list is complete, compute
+ * next waking time and put itself into sleep.
+ */
+static int ext4_lazyinit_thread(void *arg)
+{
+	struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
+	struct list_head *pos, *n;
+	struct ext4_li_request *elr;
+	unsigned long next_wakeup;
+	DEFINE_WAIT(wait);
+
+	BUG_ON(NULL == eli);
+
+	eli->li_timer.data = (unsigned long)current;
+	eli->li_timer.function = ext4_lazyinode_timeout;
+
+	eli->li_task = current;
+	wake_up(&eli->li_wait_task);
+
+cont_thread:
+	while (true) {
+		next_wakeup = MAX_JIFFY_OFFSET;
+
+		mutex_lock(&eli->li_list_mtx);
+		if (list_empty(&eli->li_request_list)) {
+			mutex_unlock(&eli->li_list_mtx);
+			goto exit_thread;
+		}
+
+		list_for_each_safe(pos, n, &eli->li_request_list) {
+			elr = list_entry(pos, struct ext4_li_request,
+					 lr_request);
+
+			if (time_after_eq(jiffies, elr->lr_next_sched)) {
+				if (ext4_run_li_request(elr) != 0) {
+					/* error, remove the lazy_init job */
+					ext4_remove_li_request(elr);
+					continue;
+				}
+			}
+
+			if (time_before(elr->lr_next_sched, next_wakeup))
+				next_wakeup = elr->lr_next_sched;
+		}
+		mutex_unlock(&eli->li_list_mtx);
+
+		if (freezing(current))
+			refrigerator();
+
+		if ((time_after_eq(jiffies, next_wakeup)) ||
+		    (MAX_JIFFY_OFFSET == next_wakeup)) {
+			cond_resched();
+			continue;
+		}
+
+		eli->li_timer.expires = next_wakeup;
+		add_timer(&eli->li_timer);
+		prepare_to_wait(&eli->li_wait_daemon, &wait,
+				TASK_INTERRUPTIBLE);
+		if (time_before(jiffies, next_wakeup))
+			schedule();
+		finish_wait(&eli->li_wait_daemon, &wait);
+	}
+
+exit_thread:
+	/*
+	 * It looks like the request list is empty, but we need
+	 * to check it under the li_list_mtx lock, to prevent any
+	 * additions into it, and of course we should lock ext4_li_mtx
+	 * to atomically free the list and ext4_li_info, because at
+	 * this point another ext4 filesystem could be registering
+	 * new one.
+	 */
+	mutex_lock(&ext4_li_mtx);
+	mutex_lock(&eli->li_list_mtx);
+	if (!list_empty(&eli->li_request_list)) {
+		mutex_unlock(&eli->li_list_mtx);
+		mutex_unlock(&ext4_li_mtx);
+		goto cont_thread;
+	}
+	mutex_unlock(&eli->li_list_mtx);
+	del_timer_sync(&ext4_li_info->li_timer);
+	eli->li_task = NULL;
+	wake_up(&eli->li_wait_task);
+
+	kfree(ext4_li_info);
+	ext4_li_info = NULL;
+	mutex_unlock(&ext4_li_mtx);
+
+	return 0;
+}
+
+static void ext4_clear_request_list(void)
+{
+	struct list_head *pos, *n;
+	struct ext4_li_request *elr;
+
+	mutex_lock(&ext4_li_info->li_list_mtx);
+	list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
+		elr = list_entry(pos, struct ext4_li_request,
+				 lr_request);
+		ext4_remove_li_request(elr);
+	}
+	mutex_unlock(&ext4_li_info->li_list_mtx);
+}
+
+static int ext4_run_lazyinit_thread(void)
+{
+	struct task_struct *t;
+
+	t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
+	if (IS_ERR(t)) {
+		int err = PTR_ERR(t);
+		ext4_clear_request_list();
+		del_timer_sync(&ext4_li_info->li_timer);
+		kfree(ext4_li_info);
+		ext4_li_info = NULL;
+		printk(KERN_CRIT "EXT4: error %d creating inode table "
+				 "initialization thread\n",
+				 err);
+		return err;
+	}
+	ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
+
+	wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
+	return 0;
+}
+
+/*
+ * Check whether it make sense to run itable init. thread or not.
+ * If there is at least one uninitialized inode table, return
+ * corresponding group number, else the loop goes through all
+ * groups and return total number of groups.
+ */
+static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
+{
+	ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
+	struct ext4_group_desc *gdp = NULL;
+
+	for (group = 0; group < ngroups; group++) {
+		gdp = ext4_get_group_desc(sb, group, NULL);
+		if (!gdp)
+			continue;
+
+		if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+			break;
+	}
+
+	return group;
+}
+
+static int ext4_li_info_new(void)
+{
+	struct ext4_lazy_init *eli = NULL;
+
+	eli = kzalloc(sizeof(*eli), GFP_KERNEL);
+	if (!eli)
+		return -ENOMEM;
+
+	eli->li_task = NULL;
+	INIT_LIST_HEAD(&eli->li_request_list);
+	mutex_init(&eli->li_list_mtx);
+
+	init_waitqueue_head(&eli->li_wait_daemon);
+	init_waitqueue_head(&eli->li_wait_task);
+	init_timer(&eli->li_timer);
+	eli->li_state |= EXT4_LAZYINIT_QUIT;
+
+	ext4_li_info = eli;
+
+	return 0;
+}
+
+static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
+					    ext4_group_t start)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_li_request *elr;
+	unsigned long rnd;
+
+	elr = kzalloc(sizeof(*elr), GFP_KERNEL);
+	if (!elr)
+		return NULL;
+
+	elr->lr_super = sb;
+	elr->lr_sbi = sbi;
+	elr->lr_next_group = start;
+
+	/*
+	 * Randomize first schedule time of the request to
+	 * spread the inode table initialization requests
+	 * better.
+	 */
+	get_random_bytes(&rnd, sizeof(rnd));
+	elr->lr_next_sched = jiffies + (unsigned long)rnd %
+			     (EXT4_DEF_LI_MAX_START_DELAY * HZ);
+
+	return elr;
+}
+
+static int ext4_register_li_request(struct super_block *sb,
+				    ext4_group_t first_not_zeroed)
+{
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_li_request *elr;
+	ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+	int ret;
+
+	if (sbi->s_li_request != NULL)
+		return 0;
+
+	if (first_not_zeroed == ngroups ||
+	    (sb->s_flags & MS_RDONLY) ||
+	    !test_opt(sb, INIT_INODE_TABLE)) {
+		sbi->s_li_request = NULL;
+		return 0;
+	}
+
+	if (first_not_zeroed == ngroups) {
+		sbi->s_li_request = NULL;
+		return 0;
+	}
+
+	elr = ext4_li_request_new(sb, first_not_zeroed);
+	if (!elr)
+		return -ENOMEM;
+
+	mutex_lock(&ext4_li_mtx);
+
+	if (NULL == ext4_li_info) {
+		ret = ext4_li_info_new();
+		if (ret)
+			goto out;
+	}
+
+	mutex_lock(&ext4_li_info->li_list_mtx);
+	list_add(&elr->lr_request, &ext4_li_info->li_request_list);
+	mutex_unlock(&ext4_li_info->li_list_mtx);
+
+	sbi->s_li_request = elr;
+
+	if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
+		ret = ext4_run_lazyinit_thread();
+		if (ret)
+			goto out;
+	}
+out:
+	mutex_unlock(&ext4_li_mtx);
+	if (ret)
+		kfree(elr);
+	return ret;
+}
+
+/*
+ * We do not need to lock anything since this is called on
+ * module unload.
+ */
+static void ext4_destroy_lazyinit_thread(void)
+{
+	/*
+	 * If thread exited earlier
+	 * there's nothing to be done.
+	 */
+	if (!ext4_li_info)
+		return;
+
+	ext4_clear_request_list();
+
+	while (ext4_li_info->li_task) {
+		wake_up(&ext4_li_info->li_wait_daemon);
+		wait_event(ext4_li_info->li_wait_task,
+			   ext4_li_info->li_task == NULL);
+	}
+}
+
 static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 				__releases(kernel_lock)
 				__acquires(kernel_lock)
@@ -2564,6 +3010,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	__u64 blocks_count;
 	int err;
 	unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+	ext4_group_t first_not_zeroed;
 
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
@@ -2624,6 +3071,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
 	/* Set defaults before we parse the mount options */
 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
+	set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
 	if (def_mount_opts & EXT4_DEFM_DEBUG)
 		set_opt(sbi->s_mount_opt, DEBUG);
 	if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
@@ -2826,13 +3274,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	 * Test whether we have more sectors than will fit in sector_t,
 	 * and whether the max offset is addressable by the page cache.
 	 */
-	ret = generic_check_addressable(sb->s_blocksize_bits,
+	err = generic_check_addressable(sb->s_blocksize_bits,
 					ext4_blocks_count(es));
-	if (ret) {
+	if (err) {
 		ext4_msg(sb, KERN_ERR, "filesystem"
 			 " too large to mount safely on this system");
 		if (sizeof(sector_t) < 8)
 			ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
+		ret = err;
 		goto failed_mount;
 	}
 
@@ -2901,7 +3350,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 			goto failed_mount2;
 		}
 	}
-	if (!ext4_check_descriptors(sb)) {
+	if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
 		ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
 		goto failed_mount2;
 	}
@@ -2917,6 +3366,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
 	spin_lock_init(&sbi->s_next_gen_lock);
 
+	err = percpu_counter_init(&sbi->s_freeblocks_counter,
+			ext4_count_free_blocks(sb));
+	if (!err) {
+		err = percpu_counter_init(&sbi->s_freeinodes_counter,
+				ext4_count_free_inodes(sb));
+	}
+	if (!err) {
+		err = percpu_counter_init(&sbi->s_dirs_counter,
+				ext4_count_dirs(sb));
+	}
+	if (!err) {
+		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+	}
+	if (err) {
+		ext4_msg(sb, KERN_ERR, "insufficient memory");
+		goto failed_mount3;
+	}
+
 	sbi->s_stripe = ext4_get_stripe_size(sbi);
 	sbi->s_max_writeback_mb_bump = 128;
 
@@ -3015,22 +3482,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 
-no_journal:
-	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-				  ext4_count_free_blocks(sb));
-	if (!err)
-		err = percpu_counter_init(&sbi->s_freeinodes_counter,
-					  ext4_count_free_inodes(sb));
-	if (!err)
-		err = percpu_counter_init(&sbi->s_dirs_counter,
-					  ext4_count_dirs(sb));
-	if (!err)
-		err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
-	if (err) {
-		ext4_msg(sb, KERN_ERR, "insufficient memory");
-		goto failed_mount_wq;
-	}
+	/*
+	 * The journal may have updated the bg summary counts, so we
+	 * need to update the global counters.
+	 */
+	percpu_counter_set(&sbi->s_freeblocks_counter,
+			   ext4_count_free_blocks(sb));
+	percpu_counter_set(&sbi->s_freeinodes_counter,
+			   ext4_count_free_inodes(sb));
+	percpu_counter_set(&sbi->s_dirs_counter,
+			   ext4_count_dirs(sb));
+	percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
 
+no_journal:
 	EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
 	if (!EXT4_SB(sb)->dio_unwritten_wq) {
 		printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3122,6 +3586,10 @@ no_journal:
 		goto failed_mount4;
 	}
 
+	err = ext4_register_li_request(sb, first_not_zeroed);
+	if (err)
+		goto failed_mount4;
+
 	sbi->s_kobj.kset = ext4_kset;
 	init_completion(&sbi->s_kobj_unregister);
 	err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
@@ -3176,10 +3644,6 @@ failed_mount_wq:
 		jbd2_journal_destroy(sbi->s_journal);
 		sbi->s_journal = NULL;
 	}
-	percpu_counter_destroy(&sbi->s_freeblocks_counter);
-	percpu_counter_destroy(&sbi->s_freeinodes_counter);
-	percpu_counter_destroy(&sbi->s_dirs_counter);
-	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount3:
 	if (sbi->s_flex_groups) {
 		if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3187,6 +3651,10 @@ failed_mount3:
 		else
 			kfree(sbi->s_flex_groups);
 	}
+	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+	percpu_counter_destroy(&sbi->s_dirs_counter);
+	percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount2:
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
@@ -3461,7 +3929,7 @@ static int ext4_load_journal(struct super_block *sb,
 	EXT4_SB(sb)->s_journal = journal;
 	ext4_clear_journal_err(sb, es);
 
-	if (journal_devnum &&
+	if (!really_read_only && journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
 		es->s_journal_dev = cpu_to_le32(journal_devnum);
 
@@ -3515,9 +3983,10 @@ static int ext4_commit_super(struct super_block *sb, int sync)
 		es->s_kbytes_written =
 			cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
 	ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
-					&EXT4_SB(sb)->s_freeblocks_counter));
-	es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
-					&EXT4_SB(sb)->s_freeinodes_counter));
+					   &EXT4_SB(sb)->s_freeblocks_counter));
+	es->s_free_inodes_count =
+		cpu_to_le32(percpu_counter_sum_positive(
+				&EXT4_SB(sb)->s_freeinodes_counter));
 	sb->s_dirt = 0;
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
@@ -3835,6 +4304,19 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
 			enable_quota = 1;
 		}
 	}
+
+	/*
+	 * Reinitialize lazy itable initialization thread based on
+	 * current settings
+	 */
+	if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
+		ext4_unregister_li_request(sb);
+	else {
+		ext4_group_t first_not_zeroed;
+		first_not_zeroed = ext4_has_uninit_itable(sb);
+		ext4_register_li_request(sb, first_not_zeroed);
+	}
+
 	ext4_setup_system_zone(sb);
 	if (sbi->s_journal == NULL)
 		ext4_commit_super(sb, 1);
@@ -4105,12 +4587,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 
 static int ext4_quota_off(struct super_block *sb, int type)
 {
-	/* Force all delayed allocation blocks to be allocated */
-	if (test_opt(sb, DELALLOC)) {
-		down_read(&sb->s_umount);
+	/* Force all delayed allocation blocks to be allocated.
+	 * Caller already holds s_umount sem */
+	if (test_opt(sb, DELALLOC))
 		sync_filesystem(sb);
-		up_read(&sb->s_umount);
-	}
 
 	return dquot_quota_off(sb, type);
 }
@@ -4216,17 +4696,17 @@ out:
 
 #endif
 
-static int ext4_get_sb(struct file_system_type *fs_type, int flags,
-		       const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ext4_fill_super);
 }
 
 #if !defined(CONFIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
 static struct file_system_type ext2_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext2",
-	.get_sb		= ext4_get_sb,
+	.mount		= ext4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
@@ -4271,28 +4751,58 @@ static inline void unregister_as_ext3(void) { }
 static struct file_system_type ext4_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ext4",
-	.get_sb		= ext4_get_sb,
+	.mount		= ext4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static int __init init_ext4_fs(void)
+int __init ext4_init_feat_adverts(void)
+{
+	struct ext4_features *ef;
+	int ret = -ENOMEM;
+
+	ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
+	if (!ef)
+		goto out;
+
+	ef->f_kobj.kset = ext4_kset;
+	init_completion(&ef->f_kobj_unregister);
+	ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
+				   "features");
+	if (ret) {
+		kfree(ef);
+		goto out;
+	}
+
+	ext4_feat = ef;
+	ret = 0;
+out:
+	return ret;
+}
+
+static int __init ext4_init_fs(void)
 {
 	int err;
 
 	ext4_check_flag_values();
-	err = init_ext4_system_zone();
+	err = ext4_init_pageio();
 	if (err)
 		return err;
+	err = ext4_init_system_zone();
+	if (err)
+		goto out5;
 	ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
 	if (!ext4_kset)
 		goto out4;
 	ext4_proc_root = proc_mkdir("fs/ext4", NULL);
-	err = init_ext4_mballoc();
+
+	err = ext4_init_feat_adverts();
+
+	err = ext4_init_mballoc();
 	if (err)
 		goto out3;
 
-	err = init_ext4_xattr();
+	err = ext4_init_xattr();
 	if (err)
 		goto out2;
 	err = init_inodecache();
@@ -4303,38 +4813,46 @@ static int __init init_ext4_fs(void)
 	err = register_filesystem(&ext4_fs_type);
 	if (err)
 		goto out;
+
+	ext4_li_info = NULL;
+	mutex_init(&ext4_li_mtx);
 	return 0;
 out:
 	unregister_as_ext2();
 	unregister_as_ext3();
 	destroy_inodecache();
 out1:
-	exit_ext4_xattr();
+	ext4_exit_xattr();
 out2:
-	exit_ext4_mballoc();
+	ext4_exit_mballoc();
 out3:
+	kfree(ext4_feat);
 	remove_proc_entry("fs/ext4", NULL);
 	kset_unregister(ext4_kset);
 out4:
-	exit_ext4_system_zone();
+	ext4_exit_system_zone();
+out5:
+	ext4_exit_pageio();
 	return err;
 }
 
-static void __exit exit_ext4_fs(void)
+static void __exit ext4_exit_fs(void)
 {
+	ext4_destroy_lazyinit_thread();
 	unregister_as_ext2();
 	unregister_as_ext3();
 	unregister_filesystem(&ext4_fs_type);
 	destroy_inodecache();
-	exit_ext4_xattr();
-	exit_ext4_mballoc();
+	ext4_exit_xattr();
+	ext4_exit_mballoc();
 	remove_proc_entry("fs/ext4", NULL);
 	kset_unregister(ext4_kset);
-	exit_ext4_system_zone();
+	ext4_exit_system_zone();
+	ext4_exit_pageio();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
 MODULE_DESCRIPTION("Fourth Extended Filesystem");
 MODULE_LICENSE("GPL");
-module_init(init_ext4_fs)
-module_exit(exit_ext4_fs)
+module_init(ext4_init_fs)
+module_exit(ext4_exit_fs)
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 3a8cd8dff1a..fa4b899da4b 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1588,7 +1588,7 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
 #undef BLOCK_HASH_SHIFT
 
 int __init
-init_ext4_xattr(void)
+ext4_init_xattr(void)
 {
 	ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
 	if (!ext4_xattr_cache)
@@ -1597,7 +1597,7 @@ init_ext4_xattr(void)
 }
 
 void
-exit_ext4_xattr(void)
+ext4_exit_xattr(void)
 {
 	if (ext4_xattr_cache)
 		mb_cache_destroy(ext4_xattr_cache);
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 518e96e4390..1ef16520b95 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -83,8 +83,8 @@ extern void ext4_xattr_put_super(struct super_block *);
 extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
 			    struct ext4_inode *raw_inode, handle_t *handle);
 
-extern int init_ext4_xattr(void);
-extern void exit_ext4_xattr(void);
+extern int __init ext4_init_xattr(void);
+extern void ext4_exit_xattr(void);
 
 extern const struct xattr_handler *ext4_xattr_handlers[];
 
@@ -121,14 +121,14 @@ ext4_xattr_put_super(struct super_block *sb)
 {
 }
 
-static inline int
-init_ext4_xattr(void)
+static __init inline int
+ext4_init_xattr(void)
 {
 	return 0;
 }
 
 static inline void
-exit_ext4_xattr(void)
+ext4_exit_xattr(void)
 {
 }
 
diff --git a/fs/fat/namei_msdos.c b/fs/fat/namei_msdos.c
index bbca5c186ae..3345aabd1dd 100644
--- a/fs/fat/namei_msdos.c
+++ b/fs/fat/namei_msdos.c
@@ -675,18 +675,17 @@ static int msdos_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int msdos_get_sb(struct file_system_type *fs_type,
+static struct dentry *msdos_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, msdos_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, msdos_fill_super);
 }
 
 static struct file_system_type msdos_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "msdos",
-	.get_sb		= msdos_get_sb,
+	.mount		= msdos_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/fat/namei_vfat.c b/fs/fat/namei_vfat.c
index 6f0f6c9a015..b936703b892 100644
--- a/fs/fat/namei_vfat.c
+++ b/fs/fat/namei_vfat.c
@@ -1071,18 +1071,17 @@ static int vfat_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int vfat_get_sb(struct file_system_type *fs_type,
+static struct dentry *vfat_mount(struct file_system_type *fs_type,
 		       int flags, const char *dev_name,
-		       void *data, struct vfsmount *mnt)
+		       void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vfat_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, vfat_fill_super);
 }
 
 static struct file_system_type vfat_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "vfat",
-	.get_sb		= vfat_get_sb,
+	.mount		= vfat_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/fcntl.c b/fs/fcntl.c
index f8cc34f542c..ecc8b3954ed 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -640,7 +640,7 @@ static void fasync_free_rcu(struct rcu_head *head)
  * match the state "is the filp on a fasync list".
  *
  */
-static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
+int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 {
 	struct fasync_struct *fa, **fp;
 	int result = 0;
@@ -666,21 +666,31 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 	return result;
 }
 
+struct fasync_struct *fasync_alloc(void)
+{
+	return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
+}
+
 /*
- * Add a fasync entry. Return negative on error, positive if
- * added, and zero if did nothing but change an existing one.
+ * NOTE! This can be used only for unused fasync entries:
+ * entries that actually got inserted on the fasync list
+ * need to be released by rcu - see fasync_remove_entry.
+ */
+void fasync_free(struct fasync_struct *new)
+{
+	kmem_cache_free(fasync_cache, new);
+}
+
+/*
+ * Insert a new entry into the fasync list.  Return the pointer to the
+ * old one if we didn't use the new one.
  *
  * NOTE! It is very important that the FASYNC flag always
  * match the state "is the filp on a fasync list".
  */
-static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
+struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
 {
-	struct fasync_struct *new, *fa, **fp;
-	int result = 0;
-
-	new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
-	if (!new)
-		return -ENOMEM;
+        struct fasync_struct *fa, **fp;
 
 	spin_lock(&filp->f_lock);
 	spin_lock(&fasync_lock);
@@ -691,8 +701,6 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
 		spin_lock_irq(&fa->fa_lock);
 		fa->fa_fd = fd;
 		spin_unlock_irq(&fa->fa_lock);
-
-		kmem_cache_free(fasync_cache, new);
 		goto out;
 	}
 
@@ -702,13 +710,39 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
 	new->fa_fd = fd;
 	new->fa_next = *fapp;
 	rcu_assign_pointer(*fapp, new);
-	result = 1;
 	filp->f_flags |= FASYNC;
 
 out:
 	spin_unlock(&fasync_lock);
 	spin_unlock(&filp->f_lock);
-	return result;
+	return fa;
+}
+
+/*
+ * Add a fasync entry. Return negative on error, positive if
+ * added, and zero if did nothing but change an existing one.
+ */
+static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
+{
+	struct fasync_struct *new;
+
+	new = fasync_alloc();
+	if (!new)
+		return -ENOMEM;
+
+	/*
+	 * fasync_insert_entry() returns the old (update) entry if
+	 * it existed.
+	 *
+	 * So free the (unused) new entry and return 0 to let the
+	 * caller know that we didn't add any new fasync entries.
+	 */
+	if (fasync_insert_entry(fd, filp, fapp, new)) {
+		fasync_free(new);
+		return 0;
+	}
+
+	return 1;
 }
 
 /*
diff --git a/fs/file_table.c b/fs/file_table.c
index a04bdd81c11..c3dee381f1b 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -60,7 +60,7 @@ static inline void file_free(struct file *f)
 /*
  * Return the total number of open files in the system
  */
-static int get_nr_files(void)
+static long get_nr_files(void)
 {
 	return percpu_counter_read_positive(&nr_files);
 }
@@ -68,7 +68,7 @@ static int get_nr_files(void)
 /*
  * Return the maximum number of open files in the system
  */
-int get_max_files(void)
+unsigned long get_max_files(void)
 {
 	return files_stat.max_files;
 }
@@ -82,7 +82,7 @@ int proc_nr_files(ctl_table *table, int write,
                      void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	files_stat.nr_files = get_nr_files();
-	return proc_dointvec(table, write, buffer, lenp, ppos);
+	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #else
 int proc_nr_files(ctl_table *table, int write,
@@ -105,7 +105,7 @@ int proc_nr_files(ctl_table *table, int write,
 struct file *get_empty_filp(void)
 {
 	const struct cred *cred = current_cred();
-	static int old_max;
+	static long old_max;
 	struct file * f;
 
 	/*
@@ -140,8 +140,7 @@ struct file *get_empty_filp(void)
 over:
 	/* Ran out of filps - report that */
 	if (get_nr_files() > old_max) {
-		printk(KERN_INFO "VFS: file-max limit %d reached\n",
-					get_max_files());
+		pr_info("VFS: file-max limit %lu reached\n", get_max_files());
 		old_max = get_nr_files();
 	}
 	goto fail;
@@ -487,7 +486,7 @@ retry:
 
 void __init files_init(unsigned long mempages)
 { 
-	int n; 
+	unsigned long n;
 
 	filp_cachep = kmem_cache_create("filp", sizeof(struct file), 0,
 			SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
@@ -498,9 +497,7 @@ void __init files_init(unsigned long mempages)
 	 */ 
 
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
-	files_stat.max_files = n; 
-	if (files_stat.max_files < NR_FILE)
-		files_stat.max_files = NR_FILE;
+	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
 	files_defer_init();
 	lg_lock_init(files_lglock);
 	percpu_counter_init(&nr_files, 0);
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c
index 79d1b4ea13e..8c04eac5079 100644
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
 	struct inode			*ip = NULL;
 
 	if ((ip = new_inode(sbp))) {
+		ip->i_ino = get_next_ino();
 		vxfs_iinit(ip, vip);
 		ip->i_mapping->a_ops = &vxfs_aops;
 	}
diff --git a/fs/freevxfs/vxfs_super.c b/fs/freevxfs/vxfs_super.c
index 71b0148b878..9d1c9955838 100644
--- a/fs/freevxfs/vxfs_super.c
+++ b/fs/freevxfs/vxfs_super.c
@@ -246,17 +246,16 @@ out:
 /*
  * The usual module blurb.
  */
-static int vxfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *vxfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, vxfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, vxfs_fill_super);
 }
 
 static struct file_system_type vxfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "vxfs",
-	.get_sb		= vxfs_get_sb,
+	.mount		= vxfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index ab38fef1c9a..3d06ccc953a 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 	return sb->s_bdi;
 }
 
+static inline struct inode *wb_inode(struct list_head *head)
+{
+	return list_entry(head, struct inode, i_wb_list);
+}
+
 static void bdi_queue_work(struct backing_dev_info *bdi,
 		struct wb_writeback_work *work)
 {
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
 	if (!list_empty(&wb->b_dirty)) {
 		struct inode *tail;
 
-		tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+		tail = wb_inode(wb->b_dirty.next);
 		if (time_before(inode->dirtied_when, tail->dirtied_when))
 			inode->dirtied_when = jiffies;
 	}
-	list_move(&inode->i_list, &wb->b_dirty);
+	list_move(&inode->i_wb_list, &wb->b_dirty);
 }
 
 /*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
 {
 	struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
 
-	list_move(&inode->i_list, &wb->b_more_io);
+	list_move(&inode->i_wb_list, &wb->b_more_io);
 }
 
 static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 	int do_sb_sort = 0;
 
 	while (!list_empty(delaying_queue)) {
-		inode = list_entry(delaying_queue->prev, struct inode, i_list);
+		inode = wb_inode(delaying_queue->prev);
 		if (older_than_this &&
 		    inode_dirtied_after(inode, *older_than_this))
 			break;
 		if (sb && sb != inode->i_sb)
 			do_sb_sort = 1;
 		sb = inode->i_sb;
-		list_move(&inode->i_list, &tmp);
+		list_move(&inode->i_wb_list, &tmp);
 	}
 
 	/* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
 
 	/* Move inodes from one superblock together */
 	while (!list_empty(&tmp)) {
-		inode = list_entry(tmp.prev, struct inode, i_list);
-		sb = inode->i_sb;
+		sb = wb_inode(tmp.prev)->i_sb;
 		list_for_each_prev_safe(pos, node, &tmp) {
-			inode = list_entry(pos, struct inode, i_list);
+			inode = wb_inode(pos);
 			if (inode->i_sb == sb)
-				list_move(&inode->i_list, dispatch_queue);
+				list_move(&inode->i_wb_list, dispatch_queue);
 		}
 	}
 }
@@ -408,16 +412,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
 			 * completion.
 			 */
 			redirty_tail(inode);
-		} else if (atomic_read(&inode->i_count)) {
-			/*
-			 * The inode is clean, inuse
-			 */
-			list_move(&inode->i_list, &inode_in_use);
 		} else {
 			/*
-			 * The inode is clean, unused
+			 * The inode is clean.  At this point we either have
+			 * a reference to the inode or it's on it's way out.
+			 * No need to add it back to the LRU.
 			 */
-			list_move(&inode->i_list, &inode_unused);
+			list_del_init(&inode->i_wb_list);
 		}
 	}
 	inode_sync_complete(inode);
@@ -465,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 {
 	while (!list_empty(&wb->b_io)) {
 		long pages_skipped;
-		struct inode *inode = list_entry(wb->b_io.prev,
-						 struct inode, i_list);
+		struct inode *inode = wb_inode(wb->b_io.prev);
 
 		if (inode->i_sb != sb) {
 			if (only_this_sb) {
@@ -487,10 +487,16 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 			return 0;
 		}
 
-		if (inode->i_state & (I_NEW | I_WILL_FREE)) {
+		/*
+		 * Don't bother with new inodes or inodes beeing freed, first
+		 * kind does not need peridic writeout yet, and for the latter
+		 * kind writeout is handled by the freer.
+		 */
+		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
 			requeue_io(inode);
 			continue;
 		}
+
 		/*
 		 * Was this inode dirtied after sync_sb_inodes was called?
 		 * This keeps sync from extra jobs and livelock.
@@ -498,7 +504,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
 		if (inode_dirtied_after(inode, wbc->wb_start))
 			return 1;
 
-		BUG_ON(inode->i_state & I_FREEING);
 		__iget(inode);
 		pages_skipped = wbc->pages_skipped;
 		writeback_single_inode(inode, wbc);
@@ -536,8 +541,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
 		queue_io(wb, wbc->older_than_this);
 
 	while (!list_empty(&wb->b_io)) {
-		struct inode *inode = list_entry(wb->b_io.prev,
-						 struct inode, i_list);
+		struct inode *inode = wb_inode(wb->b_io.prev);
 		struct super_block *sb = inode->i_sb;
 
 		if (!pin_sb_for_writeback(sb)) {
@@ -582,7 +586,7 @@ static inline bool over_bground_thresh(void)
 	global_dirty_limits(&background_thresh, &dirty_thresh);
 
 	return (global_page_state(NR_FILE_DIRTY) +
-		global_page_state(NR_UNSTABLE_NFS) >= background_thresh);
+		global_page_state(NR_UNSTABLE_NFS) > background_thresh);
 }
 
 /*
@@ -675,8 +679,7 @@ static long wb_writeback(struct bdi_writeback *wb,
 		 */
 		spin_lock(&inode_lock);
 		if (!list_empty(&wb->b_more_io))  {
-			inode = list_entry(wb->b_more_io.prev,
-						struct inode, i_list);
+			inode = wb_inode(wb->b_more_io.prev);
 			trace_wbc_writeback_wait(&wbc, wb->bdi);
 			inode_wait_for_writeback(inode);
 		}
@@ -704,6 +707,17 @@ get_next_work_item(struct backing_dev_info *bdi)
 	return work;
 }
 
+/*
+ * Add in the number of potentially dirty inodes, because each inode
+ * write can dirty pagecache in the underlying blockdev.
+ */
+static unsigned long get_nr_dirty_pages(void)
+{
+	return global_page_state(NR_FILE_DIRTY) +
+		global_page_state(NR_UNSTABLE_NFS) +
+		get_nr_dirty_inodes();
+}
+
 static long wb_check_old_data_flush(struct bdi_writeback *wb)
 {
 	unsigned long expired;
@@ -721,9 +735,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
 		return 0;
 
 	wb->last_old_flush = jiffies;
-	nr_pages = global_page_state(NR_FILE_DIRTY) +
-			global_page_state(NR_UNSTABLE_NFS) +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
+	nr_pages = get_nr_dirty_pages();
 
 	if (nr_pages) {
 		struct wb_writeback_work work = {
@@ -790,7 +802,7 @@ int bdi_writeback_thread(void *data)
 	struct backing_dev_info *bdi = wb->bdi;
 	long pages_written;
 
-	current->flags |= PF_FLUSHER | PF_SWAPWRITE;
+	current->flags |= PF_SWAPWRITE;
 	set_freezable();
 	wb->last_active = jiffies;
 
@@ -962,7 +974,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 		 * dirty list.  Add blockdev inodes as well.
 		 */
 		if (!S_ISBLK(inode->i_mode)) {
-			if (hlist_unhashed(&inode->i_hash))
+			if (inode_unhashed(inode))
 				goto out;
 		}
 		if (inode->i_state & I_FREEING)
@@ -990,7 +1002,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 			}
 
 			inode->dirtied_when = jiffies;
-			list_move(&inode->i_list, &bdi->wb.b_dirty);
+			list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
 		}
 	}
 out:
@@ -1069,33 +1081,42 @@ static void wait_sb_inodes(struct super_block *sb)
 }
 
 /**
- * writeback_inodes_sb	-	writeback dirty inodes from given super_block
+ * writeback_inodes_sb_nr -	writeback dirty inodes from given super_block
  * @sb: the superblock
+ * @nr: the number of pages to write
  *
  * Start writeback on some inodes on this super_block. No guarantees are made
  * on how many (if any) will be written, and this function does not wait
- * for IO completion of submitted IO. The number of pages submitted is
- * returned.
+ * for IO completion of submitted IO.
  */
-void writeback_inodes_sb(struct super_block *sb)
+void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr)
 {
-	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
-	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
 	DECLARE_COMPLETION_ONSTACK(done);
 	struct wb_writeback_work work = {
 		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
 		.done		= &done,
+		.nr_pages	= nr,
 	};
 
 	WARN_ON(!rwsem_is_locked(&sb->s_umount));
-
-	work.nr_pages = nr_dirty + nr_unstable +
-			(inodes_stat.nr_inodes - inodes_stat.nr_unused);
-
 	bdi_queue_work(sb->s_bdi, &work);
 	wait_for_completion(&done);
 }
+EXPORT_SYMBOL(writeback_inodes_sb_nr);
+
+/**
+ * writeback_inodes_sb	-	writeback dirty inodes from given super_block
+ * @sb: the superblock
+ *
+ * Start writeback on some inodes on this super_block. No guarantees are made
+ * on how many (if any) will be written, and this function does not wait
+ * for IO completion of submitted IO.
+ */
+void writeback_inodes_sb(struct super_block *sb)
+{
+	return writeback_inodes_sb_nr(sb, get_nr_dirty_pages());
+}
 EXPORT_SYMBOL(writeback_inodes_sb);
 
 /**
@@ -1118,6 +1139,27 @@ int writeback_inodes_sb_if_idle(struct super_block *sb)
 EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
 
 /**
+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * @sb: the superblock
+ * @nr: the number of pages to write
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_nr_if_idle(struct super_block *sb,
+				   unsigned long nr)
+{
+	if (!writeback_in_progress(sb->s_bdi)) {
+		down_read(&sb->s_umount);
+		writeback_inodes_sb_nr(sb, nr);
+		up_read(&sb->s_umount);
+		return 1;
+	} else
+		return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_nr_if_idle);
+
+/**
  * sync_inodes_sb	-	sync sb inode pages
  * @sb: the superblock
  *
@@ -1198,3 +1240,23 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
 	return ret;
 }
 EXPORT_SYMBOL(sync_inode);
+
+/**
+ * sync_inode - write an inode to disk
+ * @inode: the inode to sync
+ * @wait: wait for I/O to complete.
+ *
+ * Write an inode to disk and adjust it's dirty state after completion.
+ *
+ * Note: only writes the actual inode, no associated data or other metadata.
+ */
+int sync_inode_metadata(struct inode *inode, int wait)
+{
+	struct writeback_control wbc = {
+		.sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+		.nr_to_write = 0, /* metadata-only */
+	};
+
+	return sync_inode(inode, &wbc);
+}
+EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c
index 7367e177186..85542a7daf4 100644
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
 	if (!inode)
 		return NULL;
 
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_uid = fc->user_id;
 	inode->i_gid = fc->group_id;
@@ -321,12 +322,10 @@ static int fuse_ctl_fill_super(struct super_block *sb, void *data, int silent)
 	return 0;
 }
 
-static int fuse_ctl_get_sb(struct file_system_type *fs_type, int flags,
-			const char *dev_name, void *raw_data,
-			struct vfsmount *mnt)
+static struct dentry *fuse_ctl_mount(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *raw_data)
 {
-	return get_sb_single(fs_type, flags, raw_data,
-				fuse_ctl_fill_super, mnt);
+	return mount_single(fs_type, flags, raw_data, fuse_ctl_fill_super);
 }
 
 static void fuse_ctl_kill_sb(struct super_block *sb)
@@ -345,7 +344,7 @@ static void fuse_ctl_kill_sb(struct super_block *sb)
 static struct file_system_type fuse_ctl_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fusectl",
-	.get_sb		= fuse_ctl_get_sb,
+	.mount		= fuse_ctl_mount,
 	.kill_sb	= fuse_ctl_kill_sb,
 };
 
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cde755cca56..6e07696308d 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -809,11 +809,9 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 	int err;
 	struct page *page = *pagep;
 
-	if (page && zeroing && count < PAGE_SIZE) {
-		void *mapaddr = kmap_atomic(page, KM_USER1);
-		memset(mapaddr, 0, PAGE_SIZE);
-		kunmap_atomic(mapaddr, KM_USER1);
-	}
+	if (page && zeroing && count < PAGE_SIZE)
+		clear_highpage(page);
+
 	while (count) {
 		if (cs->write && cs->pipebufs && page) {
 			return fuse_ref_page(cs, page, offset, count);
@@ -830,10 +828,10 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
 			}
 		}
 		if (page) {
-			void *mapaddr = kmap_atomic(page, KM_USER1);
+			void *mapaddr = kmap_atomic(page, KM_USER0);
 			void *buf = mapaddr + offset;
 			offset += fuse_copy_do(cs, &buf, &count);
-			kunmap_atomic(mapaddr, KM_USER1);
+			kunmap_atomic(mapaddr, KM_USER0);
 		} else
 			offset += fuse_copy_do(cs, NULL, &count);
 	}
@@ -1336,12 +1334,7 @@ out_finish:
 
 static void fuse_retrieve_end(struct fuse_conn *fc, struct fuse_req *req)
 {
-	int i;
-
-	for (i = 0; i < req->num_pages; i++) {
-		struct page *page = req->pages[i];
-		page_cache_release(page);
-	}
+	release_pages(req->pages, req->num_pages, 0);
 }
 
 static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c8224587123..8b984a2cebb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/compat.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -134,6 +135,7 @@ EXPORT_SYMBOL_GPL(fuse_do_open);
 void fuse_finish_open(struct inode *inode, struct file *file)
 {
 	struct fuse_file *ff = file->private_data;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 
 	if (ff->open_flags & FOPEN_DIRECT_IO)
 		file->f_op = &fuse_direct_io_file_operations;
@@ -141,6 +143,15 @@ void fuse_finish_open(struct inode *inode, struct file *file)
 		invalidate_inode_pages2(inode->i_mapping);
 	if (ff->open_flags & FOPEN_NONSEEKABLE)
 		nonseekable_open(inode, file);
+	if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC)) {
+		struct fuse_inode *fi = get_fuse_inode(inode);
+
+		spin_lock(&fc->lock);
+		fi->attr_version = ++fc->attr_version;
+		i_size_write(inode, 0);
+		spin_unlock(&fc->lock);
+		fuse_invalidate_attr(inode);
+	}
 }
 
 int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
@@ -1618,6 +1629,58 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
 }
 
 /*
+ * CUSE servers compiled on 32bit broke on 64bit kernels because the
+ * ABI was defined to be 'struct iovec' which is different on 32bit
+ * and 64bit.  Fortunately we can determine which structure the server
+ * used from the size of the reply.
+ */
+static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src,
+				 size_t transferred, unsigned count,
+				 bool is_compat)
+{
+#ifdef CONFIG_COMPAT
+	if (count * sizeof(struct compat_iovec) == transferred) {
+		struct compat_iovec *ciov = src;
+		unsigned i;
+
+		/*
+		 * With this interface a 32bit server cannot support
+		 * non-compat (i.e. ones coming from 64bit apps) ioctl
+		 * requests
+		 */
+		if (!is_compat)
+			return -EINVAL;
+
+		for (i = 0; i < count; i++) {
+			dst[i].iov_base = compat_ptr(ciov[i].iov_base);
+			dst[i].iov_len = ciov[i].iov_len;
+		}
+		return 0;
+	}
+#endif
+
+	if (count * sizeof(struct iovec) != transferred)
+		return -EIO;
+
+	memcpy(dst, src, transferred);
+	return 0;
+}
+
+/* Make sure iov_length() won't overflow */
+static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+{
+	size_t n;
+	u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+
+	for (n = 0; n < count; n++) {
+		if (iov->iov_len > (size_t) max)
+			return -ENOMEM;
+		max -= iov->iov_len;
+	}
+	return 0;
+}
+
+/*
  * For ioctls, there is no generic way to determine how much memory
  * needs to be read and/or written.  Furthermore, ioctls are allowed
  * to dereference the passed pointer, so the parameter requires deep
@@ -1798,18 +1861,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		    in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
 			goto out;
 
-		err = -EIO;
-		if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
-			goto out;
-
-		/* okay, copy in iovs and retry */
 		vaddr = kmap_atomic(pages[0], KM_USER0);
-		memcpy(page_address(iov_page), vaddr, transferred);
+		err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr,
+					    transferred, in_iovs + out_iovs,
+					    (flags & FUSE_IOCTL_COMPAT) != 0);
 		kunmap_atomic(vaddr, KM_USER0);
+		if (err)
+			goto out;
 
 		in_iov = page_address(iov_page);
 		out_iov = in_iov + in_iovs;
 
+		err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+		if (err)
+			goto out;
+
+		err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+		if (err)
+			goto out;
+
 		goto retry;
 	}
 
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index da9e6e11374..cfce3ad86a9 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -1041,11 +1041,11 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	return err;
 }
 
-static int fuse_get_sb(struct file_system_type *fs_type,
+static struct dentry *fuse_mount(struct file_system_type *fs_type,
 		       int flags, const char *dev_name,
-		       void *raw_data, struct vfsmount *mnt)
+		       void *raw_data)
 {
-	return get_sb_nodev(fs_type, flags, raw_data, fuse_fill_super, mnt);
+	return mount_nodev(fs_type, flags, raw_data, fuse_fill_super);
 }
 
 static void fuse_kill_sb_anon(struct super_block *sb)
@@ -1065,17 +1065,16 @@ static struct file_system_type fuse_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuse",
 	.fs_flags	= FS_HAS_SUBTYPE,
-	.get_sb		= fuse_get_sb,
+	.mount		= fuse_mount,
 	.kill_sb	= fuse_kill_sb_anon,
 };
 
 #ifdef CONFIG_BLOCK
-static int fuse_get_sb_blk(struct file_system_type *fs_type,
+static struct dentry *fuse_mount_blk(struct file_system_type *fs_type,
 			   int flags, const char *dev_name,
-			   void *raw_data, struct vfsmount *mnt)
+			   void *raw_data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, raw_data, fuse_fill_super);
 }
 
 static void fuse_kill_sb_blk(struct super_block *sb)
@@ -1094,7 +1093,7 @@ static void fuse_kill_sb_blk(struct super_block *sb)
 static struct file_system_type fuseblk_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "fuseblk",
-	.get_sb		= fuse_get_sb_blk,
+	.mount		= fuse_mount_blk,
 	.kill_sb	= fuse_kill_sb_blk,
 	.fs_flags	= FS_REQUIRES_DEV | FS_HAS_SUBTYPE,
 };
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 6b24afb96aa..4f36f8832b9 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -618,7 +618,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	struct gfs2_alloc *al = NULL;
 	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
 	unsigned from = pos & (PAGE_CACHE_SIZE - 1);
-	unsigned to = from + len;
 	struct page *page;
 
 	gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -691,7 +690,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
 	}
 
 prepare_write:
-	error = block_prepare_write(page, from, to, gfs2_block_map);
+	error = __block_write_begin(page, from, len, gfs2_block_map);
 out:
 	if (error == 0)
 		return 0;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 06d582732d3..5ab3839dfcb 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -138,10 +138,8 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 				      struct gfs2_inum_host *inum)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
-	struct gfs2_holder i_gh;
 	struct inode *inode;
 	struct dentry *dentry;
-	int error;
 
 	inode = gfs2_ilookup(sb, inum->no_addr);
 	if (inode) {
@@ -152,52 +150,16 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
 		goto out_inode;
 	}
 
-	error = gfs2_glock_nq_num(sdp, inum->no_addr, &gfs2_inode_glops,
-				  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
-	if (error)
-		return ERR_PTR(error);
-
-	error = gfs2_check_blk_type(sdp, inum->no_addr, GFS2_BLKST_DINODE);
-	if (error)
-		goto fail;
-
-	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, inum->no_addr, 0);
-	if (IS_ERR(inode)) {
-		error = PTR_ERR(inode);
-		goto fail;
-	}
-
-	error = gfs2_inode_refresh(GFS2_I(inode));
-	if (error) {
-		iput(inode);
-		goto fail;
-	}
-
-	/* Pick up the works we bypass in gfs2_inode_lookup */
-	if (inode->i_state & I_NEW) 
-		gfs2_set_iop(inode);
-
-	if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
-		iput(inode);
-		goto fail;
-	}
-
-	error = -EIO;
-	if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM) {
-		iput(inode);
-		goto fail;
-	}
-
-	gfs2_glock_dq_uninit(&i_gh);
+	inode = gfs2_lookup_by_inum(sdp, inum->no_addr, &inum->no_formal_ino,
+				    GFS2_BLKST_DINODE);
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
 
 out_inode:
 	dentry = d_obtain_alias(inode);
 	if (!IS_ERR(dentry))
 		dentry->d_op = &gfs2_dops;
 	return dentry;
-fail:
-	gfs2_glock_dq_uninit(&i_gh);
-	return ERR_PTR(error);
 }
 
 static struct dentry *gfs2_fh_to_dentry(struct super_block *sb, struct fid *fid,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 87778857f09..f92c1770416 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -686,21 +686,20 @@ static void delete_work_func(struct work_struct *work)
 {
 	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_delete);
 	struct gfs2_sbd *sdp = gl->gl_sbd;
-	struct gfs2_inode *ip = NULL;
+	struct gfs2_inode *ip;
 	struct inode *inode;
-	u64 no_addr = 0;
+	u64 no_addr = gl->gl_name.ln_number;
+
+	ip = gl->gl_object;
+	/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
 
-	spin_lock(&gl->gl_spin);
-	ip = (struct gfs2_inode *)gl->gl_object;
 	if (ip)
-		no_addr = ip->i_no_addr;
-	spin_unlock(&gl->gl_spin);
-	if (ip) {
 		inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
-		if (inode) {
-			d_prune_aliases(inode);
-			iput(inode);
-		}
+	else
+		inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
+	if (inode && !IS_ERR(inode)) {
+		d_prune_aliases(inode);
+		iput(inode);
 	}
 	gfs2_glock_put(gl);
 }
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 06370f8bd8c..e1213f7f921 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -73,49 +73,6 @@ static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr)
 	return iget5_locked(sb, hash, iget_test, iget_set, &no_addr);
 }
 
-struct gfs2_skip_data {
-	u64	no_addr;
-	int	skipped;
-};
-
-static int iget_skip_test(struct inode *inode, void *opaque)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_skip_data *data = opaque;
-
-	if (ip->i_no_addr == data->no_addr) {
-		if (inode->i_state & (I_FREEING|I_WILL_FREE)){
-			data->skipped = 1;
-			return 0;
-		}
-		return 1;
-	}
-	return 0;
-}
-
-static int iget_skip_set(struct inode *inode, void *opaque)
-{
-	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_skip_data *data = opaque;
-
-	if (data->skipped)
-		return 1;
-	inode->i_ino = (unsigned long)(data->no_addr);
-	ip->i_no_addr = data->no_addr;
-	return 0;
-}
-
-static struct inode *gfs2_iget_skip(struct super_block *sb,
-				    u64 no_addr)
-{
-	struct gfs2_skip_data data;
-	unsigned long hash = (unsigned long)no_addr;
-
-	data.no_addr = no_addr;
-	data.skipped = 0;
-	return iget5_locked(sb, hash, iget_skip_test, iget_skip_set, &data);
-}
-
 /**
  * GFS2 lookup code fills in vfs inode contents based on info obtained
  * from directory entry inside gfs2_inode_lookup(). This has caused issues
@@ -243,93 +200,54 @@ fail:
 	return ERR_PTR(error);
 }
 
-/**
- * gfs2_process_unlinked_inode - Lookup an unlinked inode for reclamation
- *                               and try to reclaim it by doing iput.
- *
- * This function assumes no rgrp locks are currently held.
- *
- * @sb: The super block
- * no_addr: The inode number
- *
- */
-
-void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr)
+struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+				  u64 *no_formal_ino, unsigned int blktype)
 {
-	struct gfs2_sbd *sdp;
-	struct gfs2_inode *ip;
-	struct gfs2_glock *io_gl = NULL;
-	int error;
-	struct gfs2_holder gh;
+	struct super_block *sb = sdp->sd_vfs;
+	struct gfs2_holder i_gh;
 	struct inode *inode;
+	int error;
 
-	inode = gfs2_iget_skip(sb, no_addr);
-
-	if (!inode)
-		return;
-
-	/* If it's not a new inode, someone's using it, so leave it alone. */
-	if (!(inode->i_state & I_NEW)) {
-		iput(inode);
-		return;
-	}
-
-	ip = GFS2_I(inode);
-	sdp = GFS2_SB(inode);
-	ip->i_no_formal_ino = -1;
+	error = gfs2_glock_nq_num(sdp, no_addr, &gfs2_inode_glops,
+				  LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
+	if (error)
+		return ERR_PTR(error);
 
-	error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &ip->i_gl);
-	if (unlikely(error))
+	error = gfs2_check_blk_type(sdp, no_addr, blktype);
+	if (error)
 		goto fail;
-	ip->i_gl->gl_object = ip;
 
-	error = gfs2_glock_get(sdp, no_addr, &gfs2_iopen_glops, CREATE, &io_gl);
-	if (unlikely(error))
-		goto fail_put;
-
-	set_bit(GIF_INVALID, &ip->i_flags);
-	error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, LM_FLAG_TRY | GL_EXACT,
-				   &ip->i_iopen_gh);
-	if (unlikely(error))
-		goto fail_iopen;
+	inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
+	if (IS_ERR(inode))
+		goto fail;
 
-	ip->i_iopen_gh.gh_gl->gl_object = ip;
-	gfs2_glock_put(io_gl);
-	io_gl = NULL;
+	error = gfs2_inode_refresh(GFS2_I(inode));
+	if (error)
+		goto fail_iput;
 
-	inode->i_mode = DT2IF(DT_UNKNOWN);
+	/* Pick up the works we bypass in gfs2_inode_lookup */
+	if (inode->i_state & I_NEW) 
+		gfs2_set_iop(inode);
 
-	/*
-	 * We must read the inode in order to work out its type in
-	 * this case. Note that this doesn't happen often as we normally
-	 * know the type beforehand. This code path only occurs during
-	 * unlinked inode recovery (where it is safe to do this glock,
-	 * which is not true in the general case).
-	 */
-	error = gfs2_glock_nq_init(ip->i_gl, LM_ST_EXCLUSIVE, LM_FLAG_TRY,
-				   &gh);
-	if (unlikely(error))
-		goto fail_glock;
+	/* Two extra checks for NFS only */
+	if (no_formal_ino) {
+		error = -ESTALE;
+		if (GFS2_I(inode)->i_no_formal_ino != *no_formal_ino)
+			goto fail_iput;
 
-	/* Inode is now uptodate */
-	gfs2_glock_dq_uninit(&gh);
-	gfs2_set_iop(inode);
+		error = -EIO;
+		if (GFS2_I(inode)->i_diskflags & GFS2_DIF_SYSTEM)
+			goto fail_iput;
 
-	/* The iput will cause it to be deleted. */
-	iput(inode);
-	return;
+		error = 0;
+	}
 
-fail_glock:
-	gfs2_glock_dq(&ip->i_iopen_gh);
-fail_iopen:
-	if (io_gl)
-		gfs2_glock_put(io_gl);
-fail_put:
-	ip->i_gl->gl_object = NULL;
-	gfs2_glock_put(ip->i_gl);
 fail:
-	iget_failed(inode);
-	return;
+	gfs2_glock_dq_uninit(&i_gh);
+	return error ? ERR_PTR(error) : inode;
+fail_iput:
+	iput(inode);
+	goto fail;
 }
 
 static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index 6720d7d5fbc..d8499fadcc5 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -99,7 +99,9 @@ err:
 extern void gfs2_set_iop(struct inode *inode);
 extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type, 
 				       u64 no_addr, u64 no_formal_ino);
-extern void gfs2_process_unlinked_inode(struct super_block *sb, u64 no_addr);
+extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
+					 u64 *no_formal_ino,
+					 unsigned int blktype);
 extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
 
 extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index f3b071f921a..939739c7b3f 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -55,7 +55,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb
 		 * activity, but those code paths have their own higher-level
 		 * throttling.
 		 */
-		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+		if (wbc->sync_mode != WB_SYNC_NONE) {
 			lock_buffer(bh);
 		} else if (!trylock_buffer(bh)) {
 			redirty_page_for_writepage(wbc, page);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index aeafc233dc8..3eb1393f7b8 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1219,7 +1219,6 @@ fail_sb:
 fail_locking:
 	init_locking(sdp, &mount_gh, UNDO);
 fail_lm:
-	invalidate_inodes(sb);
 	gfs2_gl_hash_clear(sdp);
 	gfs2_lm_unmount(sdp);
 fail_sys:
@@ -1251,12 +1250,11 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
 }
 
 /**
- * gfs2_get_sb - Get the GFS2 superblock
+ * gfs2_mount - Get the GFS2 superblock
  * @fs_type: The GFS2 filesystem type
  * @flags: Mount flags
  * @dev_name: The name of the device
  * @data: The mount arguments
- * @mnt: The vfsmnt for this mount
  *
  * Q. Why not use get_sb_bdev() ?
  * A. We need to select one of two root directories to mount, independent
@@ -1265,8 +1263,8 @@ static int test_gfs2_super(struct super_block *s, void *ptr)
  * Returns: 0 or -ve on error
  */
 
-static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
-		       const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *gfs2_mount(struct file_system_type *fs_type, int flags,
+		       const char *dev_name, void *data)
 {
 	struct block_device *bdev;
 	struct super_block *s;
@@ -1280,7 +1278,7 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
 
 	bdev = open_bdev_exclusive(dev_name, mode, fs_type);
 	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
+		return ERR_CAST(bdev);
 
 	/*
 	 * once the super is inserted into the list by sget, s_umount
@@ -1299,6 +1297,9 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
 	if (IS_ERR(s))
 		goto error_bdev;
 
+	if (s->s_root)
+		close_bdev_exclusive(bdev, mode);
+
 	memset(&args, 0, sizeof(args));
 	args.ar_quota = GFS2_QUOTA_DEFAULT;
 	args.ar_data = GFS2_DATA_DEFAULT;
@@ -1310,17 +1311,13 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
 	error = gfs2_mount_args(&args, data);
 	if (error) {
 		printk(KERN_WARNING "GFS2: can't parse mount arguments\n");
-		if (s->s_root)
-			goto error_super;
-		deactivate_locked_super(s);
-		return error;
+		goto error_super;
 	}
 
 	if (s->s_root) {
 		error = -EBUSY;
 		if ((flags ^ s->s_flags) & MS_RDONLY)
 			goto error_super;
-		close_bdev_exclusive(bdev, mode);
 	} else {
 		char b[BDEVNAME_SIZE];
 
@@ -1329,27 +1326,24 @@ static int gfs2_get_sb(struct file_system_type *fs_type, int flags,
 		strlcpy(s->s_id, bdevname(bdev, b), sizeof(s->s_id));
 		sb_set_blocksize(s, block_size(bdev));
 		error = fill_super(s, &args, flags & MS_SILENT ? 1 : 0);
-		if (error) {
-			deactivate_locked_super(s);
-			return error;
-		}
+		if (error)
+			goto error_super;
 		s->s_flags |= MS_ACTIVE;
 		bdev->bd_super = s;
 	}
 
 	sdp = s->s_fs_info;
-	mnt->mnt_sb = s;
 	if (args.ar_meta)
-		mnt->mnt_root = dget(sdp->sd_master_dir);
+		return dget(sdp->sd_master_dir);
 	else
-		mnt->mnt_root = dget(sdp->sd_root_dir);
-	return 0;
+		return dget(sdp->sd_root_dir);
 
 error_super:
 	deactivate_locked_super(s);
+	return ERR_PTR(error);
 error_bdev:
 	close_bdev_exclusive(bdev, mode);
-	return error;
+	return ERR_PTR(error);
 }
 
 static int set_meta_super(struct super_block *s, void *ptr)
@@ -1357,8 +1351,8 @@ static int set_meta_super(struct super_block *s, void *ptr)
 	return -EINVAL;
 }
 
-static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
-			    const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *data)
 {
 	struct super_block *s;
 	struct gfs2_sbd *sdp;
@@ -1369,23 +1363,21 @@ static int gfs2_get_sb_meta(struct file_system_type *fs_type, int flags,
 	if (error) {
 		printk(KERN_WARNING "GFS2: path_lookup on %s returned error %d\n",
 		       dev_name, error);
-		return error;
+		return ERR_PTR(error);
 	}
 	s = sget(&gfs2_fs_type, test_gfs2_super, set_meta_super,
 		 path.dentry->d_inode->i_sb->s_bdev);
 	path_put(&path);
 	if (IS_ERR(s)) {
 		printk(KERN_WARNING "GFS2: gfs2 mount does not exist\n");
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 	}
 	if ((flags ^ s->s_flags) & MS_RDONLY) {
 		deactivate_locked_super(s);
-		return -EBUSY;
+		return ERR_PTR(-EBUSY);
 	}
 	sdp = s->s_fs_info;
-	mnt->mnt_sb = s;
-	mnt->mnt_root = dget(sdp->sd_master_dir);
-	return 0;
+	return dget(sdp->sd_master_dir);
 }
 
 static void gfs2_kill_sb(struct super_block *sb)
@@ -1411,7 +1403,7 @@ static void gfs2_kill_sb(struct super_block *sb)
 struct file_system_type gfs2_fs_type = {
 	.name = "gfs2",
 	.fs_flags = FS_REQUIRES_DEV,
-	.get_sb = gfs2_get_sb,
+	.mount = gfs2_mount,
 	.kill_sb = gfs2_kill_sb,
 	.owner = THIS_MODULE,
 };
@@ -1419,7 +1411,7 @@ struct file_system_type gfs2_fs_type = {
 struct file_system_type gfs2meta_fs_type = {
 	.name = "gfs2meta",
 	.fs_flags = FS_REQUIRES_DEV,
-	.get_sb = gfs2_get_sb_meta,
+	.mount = gfs2_mount_meta,
 	.owner = THIS_MODULE,
 };
 
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 0534510200d..12cbea7502c 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -255,7 +255,7 @@ out_parent:
 	gfs2_holder_uninit(ghs);
 	gfs2_holder_uninit(ghs + 1);
 	if (!error) {
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 		d_instantiate(dentry, inode);
 		mark_inode_dirty(inode);
 	}
@@ -1294,7 +1294,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 	int error;
 
 	if (!page_has_buffers(page)) {
-		error = block_prepare_write(page, from, to, gfs2_block_map);
+		error = __block_write_begin(page, from, to - from, gfs2_block_map);
 		if (unlikely(error))
 			return error;
 
@@ -1313,7 +1313,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 		next += bh->b_size;
 		if (buffer_mapped(bh)) {
 			if (end) {
-				error = block_prepare_write(page, start, end,
+				error = __block_write_begin(page, start, end - start,
 							    gfs2_block_map);
 				if (unlikely(error))
 					return error;
@@ -1328,7 +1328,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
 	} while (next < to);
 
 	if (end) {
-		error = block_prepare_write(page, start, end, gfs2_block_map);
+		error = __block_write_begin(page, start, end - start, gfs2_block_map);
 		if (unlikely(error))
 			return error;
 		empty_write_end(page, start, end);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 58a9b9998b4..f606baf9ba7 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -631,6 +631,7 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 			     struct fs_disk_quota *fdq)
 {
 	struct inode *inode = &ip->i_inode;
+	struct gfs2_sbd *sdp = GFS2_SB(inode);
 	struct address_space *mapping = inode->i_mapping;
 	unsigned long index = loc >> PAGE_CACHE_SHIFT;
 	unsigned offset = loc & (PAGE_CACHE_SIZE - 1);
@@ -658,11 +659,11 @@ static int gfs2_adjust_quota(struct gfs2_inode *ip, loff_t loc,
 	qd->qd_qb.qb_value = qp->qu_value;
 	if (fdq) {
 		if (fdq->d_fieldmask & FS_DQ_BSOFT) {
-			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit);
+			qp->qu_warn = cpu_to_be64(fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift);
 			qd->qd_qb.qb_warn = qp->qu_warn;
 		}
 		if (fdq->d_fieldmask & FS_DQ_BHARD) {
-			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit);
+			qp->qu_limit = cpu_to_be64(fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift);
 			qd->qd_qb.qb_limit = qp->qu_limit;
 		}
 	}
@@ -1497,9 +1498,9 @@ static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
 	fdq->d_version = FS_DQUOT_VERSION;
 	fdq->d_flags = (type == QUOTA_USER) ? FS_USER_QUOTA : FS_GROUP_QUOTA;
 	fdq->d_id = id;
-	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit);
-	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn);
-	fdq->d_bcount = be64_to_cpu(qlvb->qb_value);
+	fdq->d_blk_hardlimit = be64_to_cpu(qlvb->qb_limit) << sdp->sd_fsb2bb_shift;
+	fdq->d_blk_softlimit = be64_to_cpu(qlvb->qb_warn) << sdp->sd_fsb2bb_shift;
+	fdq->d_bcount = be64_to_cpu(qlvb->qb_value) << sdp->sd_fsb2bb_shift;
 
 	gfs2_glock_dq_uninit(&q_gh);
 out:
@@ -1566,10 +1567,10 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
 
 	/* If nothing has changed, this is a no-op */
 	if ((fdq->d_fieldmask & FS_DQ_BSOFT) &&
-	    (fdq->d_blk_softlimit == be64_to_cpu(qd->qd_qb.qb_warn)))
+	    ((fdq->d_blk_softlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_warn)))
 		fdq->d_fieldmask ^= FS_DQ_BSOFT;
 	if ((fdq->d_fieldmask & FS_DQ_BHARD) &&
-	    (fdq->d_blk_hardlimit == be64_to_cpu(qd->qd_qb.qb_limit)))
+	    ((fdq->d_blk_hardlimit >> sdp->sd_fsb2bb_shift) == be64_to_cpu(qd->qd_qb.qb_limit)))
 		fdq->d_fieldmask ^= FS_DQ_BHARD;
 	if (fdq->d_fieldmask == 0)
 		goto out_i;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index bef3ab6cf5c..33c8407b876 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -963,17 +963,18 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al)
  *          The inode, if one has been found, in inode.
  */
 
-static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
-			   u64 skip)
+static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)
 {
 	u32 goal = 0, block;
 	u64 no_addr;
 	struct gfs2_sbd *sdp = rgd->rd_sbd;
 	unsigned int n;
+	struct gfs2_glock *gl;
+	struct gfs2_inode *ip;
+	int error;
+	int found = 0;
 
-	for(;;) {
-		if (goal >= rgd->rd_data)
-			break;
+	while (goal < rgd->rd_data) {
 		down_write(&sdp->sd_log_flush_lock);
 		n = 1;
 		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED,
@@ -990,11 +991,32 @@ static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked,
 		if (no_addr == skip)
 			continue;
 		*last_unlinked = no_addr;
-		return no_addr;
+
+		error = gfs2_glock_get(sdp, no_addr, &gfs2_inode_glops, CREATE, &gl);
+		if (error)
+			continue;
+
+		/* If the inode is already in cache, we can ignore it here
+		 * because the existing inode disposal code will deal with
+		 * it when all refs have gone away. Accessing gl_object like
+		 * this is not safe in general. Here it is ok because we do
+		 * not dereference the pointer, and we only need an approx
+		 * answer to whether it is NULL or not.
+		 */
+		ip = gl->gl_object;
+
+		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0)
+			gfs2_glock_put(gl);
+		else
+			found++;
+
+		/* Limit reclaim to sensible number of tasks */
+		if (found > 2*NR_CPUS)
+			return;
 	}
 
 	rgd->rd_flags &= ~GFS2_RDF_CHECK;
-	return 0;
+	return;
 }
 
 /**
@@ -1075,11 +1097,9 @@ static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd)
  * Try to acquire rgrp in way which avoids contending with others.
  *
  * Returns: errno
- *          unlinked: the block address of an unlinked block to be reclaimed
  */
 
-static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
-			  u64 *last_unlinked)
+static int get_local_rgrp(struct gfs2_inode *ip, u64 *last_unlinked)
 {
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_rgrpd *rgd, *begin = NULL;
@@ -1089,7 +1109,6 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
 	int loops = 0;
 	int error, rg_locked;
 
-	*unlinked = 0;
 	rgd = gfs2_blk2rgrpd(sdp, ip->i_goal);
 
 	while (rgd) {
@@ -1106,17 +1125,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
-			/* If the rg came in already locked, there's no
-			   way we can recover from a failed try_rgrp_unlink
-			   because that would require an iput which can only
-			   happen after the rgrp is unlocked. */
-			if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
-				*unlinked = try_rgrp_unlink(rgd, last_unlinked,
-							   ip->i_no_addr);
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
-			if (*unlinked)
-				return -EAGAIN;
 			/* fall through */
 		case GLR_TRYFAILED:
 			rgd = recent_rgrp_next(rgd);
@@ -1145,13 +1157,10 @@ static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked,
 		case 0:
 			if (try_rgrp_fit(rgd, al))
 				goto out;
-			if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK)
-				*unlinked = try_rgrp_unlink(rgd, last_unlinked,
-							    ip->i_no_addr);
+			if (rgd->rd_flags & GFS2_RDF_CHECK)
+				try_rgrp_unlink(rgd, last_unlinked, ip->i_no_addr);
 			if (!rg_locked)
 				gfs2_glock_dq_uninit(&al->al_rgd_gh);
-			if (*unlinked)
-				return -EAGAIN;
 			break;
 
 		case GLR_TRYFAILED:
@@ -1204,12 +1213,12 @@ int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex,
 	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 	struct gfs2_alloc *al = ip->i_alloc;
 	int error = 0;
-	u64 last_unlinked = NO_BLOCK, unlinked;
+	u64 last_unlinked = NO_BLOCK;
+	int tries = 0;
 
 	if (gfs2_assert_warn(sdp, al->al_requested))
 		return -EINVAL;
 
-try_again:
 	if (hold_rindex) {
 		/* We need to hold the rindex unless the inode we're using is
 		   the rindex itself, in which case it's already held. */
@@ -1218,31 +1227,23 @@ try_again:
 		else if (!sdp->sd_rgrps) /* We may not have the rindex read
 					    in, so: */
 			error = gfs2_ri_update_special(ip);
+		if (error)
+			return error;
 	}
 
-	if (error)
-		return error;
+	do {
+		error = get_local_rgrp(ip, &last_unlinked);
+		/* If there is no space, flushing the log may release some */
+		if (error)
+			gfs2_log_flush(sdp, NULL);
+	} while (error && tries++ < 3);
 
-	/* Find an rgrp suitable for allocation.  If it encounters any unlinked
-	   dinodes along the way, error will equal -EAGAIN and unlinked will
-	   contains it block address. We then need to look up that inode and
-	   try to free it, and try the allocation again. */
-	error = get_local_rgrp(ip, &unlinked, &last_unlinked);
 	if (error) {
 		if (hold_rindex && ip != GFS2_I(sdp->sd_rindex))
 			gfs2_glock_dq_uninit(&al->al_ri_gh);
-		if (error != -EAGAIN)
-			return error;
-
-		gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked);
-		/* regardless of whether or not gfs2_process_unlinked_inode
-		   was successful, we don't want to repeat it again. */
-		last_unlinked = unlinked;
-		gfs2_log_flush(sdp, NULL);
-		error = 0;
-
-		goto try_again;
+		return error;
 	}
+
 	/* no error, so we have the rgrp set in the inode's allocation. */
 	al->al_file = file;
 	al->al_line = line;
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 047d1176096..2b2c4997430 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -857,7 +857,6 @@ restart:
 	gfs2_clear_rgrpd(sdp);
 	gfs2_jindex_free(sdp);
 	/*  Take apart glock structures and buffer lists  */
-	invalidate_inodes(sdp->sd_vfs);
 	gfs2_gl_hash_clear(sdp);
 	/*  Unmount the locking protocol  */
 	gfs2_lm_unmount(sdp);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h
index 4f55651aaa5..c8cffb81e84 100644
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -147,8 +147,6 @@ struct hfs_sb_info {
 	u16 blockoffset;
 
 	int fs_div;
-
-	struct hlist_head rsrc_inodes;
 };
 
 #define HFS_FLG_BITMAP_DIRTY	0
@@ -254,17 +252,6 @@ static inline void hfs_bitmap_dirty(struct super_block *sb)
 	sb->s_dirt = 1;
 }
 
-static inline void hfs_buffer_sync(struct buffer_head *bh)
-{
-	while (buffer_locked(bh)) {
-		wait_on_buffer(bh);
-	}
-	if (buffer_dirty(bh)) {
-		ll_rw_block(WRITE, 1, &bh);
-		wait_on_buffer(bh);
-	}
-}
-
 #define sb_bread512(sb, sec, data) ({			\
 	struct buffer_head *__bh;			\
 	sector_t __block;				\
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 397b7adc7ce..dffb4e99664 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -524,7 +524,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
 	HFS_I(inode)->rsrc_inode = dir;
 	HFS_I(dir)->rsrc_inode = inode;
 	igrab(dir);
-	hlist_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes);
+	hlist_add_fake(&inode->i_hash);
 	mark_inode_dirty(inode);
 out:
 	d_add(dentry, inode);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 86428f5ac99..1563d5ce576 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -220,7 +220,7 @@ int hfs_mdb_get(struct super_block *sb)
 		mdb->drLsMod = hfs_mtime();
 
 		mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
-		hfs_buffer_sync(HFS_SB(sb)->mdb_bh);
+		sync_dirty_buffer(HFS_SB(sb)->mdb_bh);
 	}
 
 	return 0;
@@ -287,7 +287,7 @@ void hfs_mdb_commit(struct super_block *sb)
 		HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
 		HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
 		mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
-		hfs_buffer_sync(HFS_SB(sb)->alt_mdb_bh);
+		sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
 	}
 
 	if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) {
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 33254160f65..4824c27cebb 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -382,7 +382,6 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 
 	sb->s_fs_info = sbi;
-	INIT_HLIST_HEAD(&sbi->rsrc_inodes);
 
 	res = -EINVAL;
 	if (!parse_options((char *)data, sbi)) {
@@ -442,17 +441,16 @@ bail:
 	return res;
 }
 
-static int hfs_get_sb(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *data,
-		      struct vfsmount *mnt)
+static struct dentry *hfs_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, hfs_fill_super);
 }
 
 static struct file_system_type hfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "hfs",
-	.get_sb		= hfs_get_sb,
+	.mount		= hfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c
index d236d85ec9d..9d59c0571f5 100644
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -286,7 +286,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
 
 	inc_nlink(inode);
 	hfsplus_instantiate(dst_dentry, inode, cnid);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	inode->i_ctime = CURRENT_TIME_SEC;
 	mark_inode_dirty(inode);
 	sbi->file_count++;
@@ -317,8 +317,10 @@ static int hfsplus_unlink(struct inode *dir, struct dentry *dentry)
 		res = hfsplus_rename_cat(inode->i_ino,
 					 dir, &dentry->d_name,
 					 sbi->hidden_dir, &str);
-		if (!res)
+		if (!res) {
 			inode->i_flags |= S_DEAD;
+			drop_nlink(inode);
+		}
 		goto out;
 	}
 	res = hfsplus_delete_cat(cnid, dir, &dentry->d_name);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 78449280dae..8afd7e84f98 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -211,7 +211,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
 	 * appear hashed, but do not put on any lists.  hlist_del()
 	 * will work fine and require no locking.
 	 */
-	inode->i_hash.pprev = &inode->i_hash.next;
+	hlist_add_fake(&inode->i_hash);
 
 	mark_inode_dirty(inode);
 out:
diff --git a/fs/hfsplus/ioctl.c b/fs/hfsplus/ioctl.c
index 5b4667e08ef..40a85a3ded6 100644
--- a/fs/hfsplus/ioctl.c
+++ b/fs/hfsplus/ioctl.c
@@ -92,7 +92,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
 	mark_inode_dirty(inode);
 
 out_unlock_inode:
-	mutex_lock(&inode->i_mutex);
+	mutex_unlock(&inode->i_mutex);
 out_drop_write:
 	mnt_drop_write(file->f_path.mnt);
 out:
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 9a88d753610..52cc746d3ba 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -495,18 +495,16 @@ static void hfsplus_destroy_inode(struct inode *inode)
 
 #define HFSPLUS_INODE_SIZE	sizeof(struct hfsplus_inode_info)
 
-static int hfsplus_get_sb(struct file_system_type *fs_type,
-			  int flags, const char *dev_name, void *data,
-			  struct vfsmount *mnt)
+static struct dentry *hfsplus_mount(struct file_system_type *fs_type,
+			  int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, hfsplus_fill_super);
 }
 
 static struct file_system_type hfsplus_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "hfsplus",
-	.get_sb		= hfsplus_get_sb,
+	.mount		= hfsplus_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
index 7c232c1487e..bf15a43016b 100644
--- a/fs/hostfs/hostfs.h
+++ b/fs/hostfs/hostfs.h
@@ -91,7 +91,6 @@ extern int rename_file(char *from, char *to);
 extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
 		     long long *bfree_out, long long *bavail_out,
 		     long long *files_out, long long *ffree_out,
-		     void *fsid_out, int fsid_size, long *namelen_out,
-		     long *spare_out);
+		     void *fsid_out, int fsid_size, long *namelen_out);
 
 #endif
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index f7dc9b5f9ef..2c0f148a49e 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -217,7 +217,7 @@ int hostfs_statfs(struct dentry *dentry, struct kstatfs *sf)
 	err = do_statfs(dentry->d_sb->s_fs_info,
 			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
 			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
-			&sf->f_namelen, sf->f_spare);
+			&sf->f_namelen);
 	if (err)
 		return err;
 	sf->f_blocks = f_blocks;
@@ -962,11 +962,11 @@ out:
 	return err;
 }
 
-static int hostfs_read_sb(struct file_system_type *type,
+static struct dentry *hostfs_read_sb(struct file_system_type *type,
 			  int flags, const char *dev_name,
-			  void *data, struct vfsmount *mnt)
+			  void *data)
 {
-	return get_sb_nodev(type, flags, data, hostfs_fill_sb_common, mnt);
+	return mount_nodev(type, flags, data, hostfs_fill_sb_common);
 }
 
 static void hostfs_kill_sb(struct super_block *s)
@@ -978,7 +978,7 @@ static void hostfs_kill_sb(struct super_block *s)
 static struct file_system_type hostfs_type = {
 	.owner 		= THIS_MODULE,
 	.name 		= "hostfs",
-	.get_sb 	= hostfs_read_sb,
+	.mount	 	= hostfs_read_sb,
 	.kill_sb	= hostfs_kill_sb,
 	.fs_flags 	= 0,
 };
diff --git a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
index 6777aa06ce2..d51a98384bc 100644
--- a/fs/hostfs/hostfs_user.c
+++ b/fs/hostfs/hostfs_user.c
@@ -94,8 +94,7 @@ void *open_dir(char *path, int *err_out)
 
 	dir = opendir(path);
 	*err_out = errno;
-	if (dir == NULL)
-		return NULL;
+
 	return dir;
 }
 
@@ -205,7 +204,7 @@ int set_attr(const char *file, struct hostfs_iattr *attrs, int fd)
 	if (attrs->ia_valid & HOSTFS_ATTR_MODE) {
 		if (fd >= 0) {
 			if (fchmod(fd, attrs->ia_mode) != 0)
-				return (-errno);
+				return -errno;
 		} else if (chmod(file, attrs->ia_mode) != 0) {
 			return -errno;
 		}
@@ -364,8 +363,7 @@ int rename_file(char *from, char *to)
 int do_statfs(char *root, long *bsize_out, long long *blocks_out,
 	      long long *bfree_out, long long *bavail_out,
 	      long long *files_out, long long *ffree_out,
-	      void *fsid_out, int fsid_size, long *namelen_out,
-	      long *spare_out)
+	      void *fsid_out, int fsid_size, long *namelen_out)
 {
 	struct statfs64 buf;
 	int err;
@@ -384,10 +382,6 @@ int do_statfs(char *root, long *bsize_out, long long *blocks_out,
 	       sizeof(buf.f_fsid) > fsid_size ? fsid_size :
 	       sizeof(buf.f_fsid));
 	*namelen_out = buf.f_namelen;
-	spare_out[0] = buf.f_spare[0];
-	spare_out[1] = buf.f_spare[1];
-	spare_out[2] = buf.f_spare[2];
-	spare_out[3] = buf.f_spare[3];
-	spare_out[4] = buf.f_spare[4];
+
 	return 0;
 }
diff --git a/fs/hpfs/buffer.c b/fs/hpfs/buffer.c
index eac5f96323e..793cb9d943d 100644
--- a/fs/hpfs/buffer.c
+++ b/fs/hpfs/buffer.c
@@ -14,7 +14,7 @@ void hpfs_lock_creation(struct super_block *s)
 #ifdef DEBUG_LOCKS
 	printk("lock creation\n");
 #endif
-	down(&hpfs_sb(s)->hpfs_creation_de);
+	mutex_lock(&hpfs_sb(s)->hpfs_creation_de);
 }
 
 void hpfs_unlock_creation(struct super_block *s)
@@ -22,7 +22,7 @@ void hpfs_unlock_creation(struct super_block *s)
 #ifdef DEBUG_LOCKS
 	printk("unlock creation\n");
 #endif
-	up(&hpfs_sb(s)->hpfs_creation_de);
+	mutex_unlock(&hpfs_sb(s)->hpfs_creation_de);
 }
 
 /* Map a sector into a buffer and return pointers to it and to the buffer. */
diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h
index b59eac0232a..2fee17d0d9a 100644
--- a/fs/hpfs/hpfs_fn.h
+++ b/fs/hpfs/hpfs_fn.h
@@ -87,7 +87,7 @@ struct hpfs_sb_info {
 	unsigned *sb_bmp_dir;		/* main bitmap directory */
 	unsigned sb_c_bitmap;		/* current bitmap */
 	unsigned sb_max_fwd_alloc;	/* max forwad allocation */
-	struct semaphore hpfs_creation_de; /* when creating dirents, nobody else
+	struct mutex hpfs_creation_de;	/* when creating dirents, nobody else
 					   can alloc blocks */
 	/*unsigned sb_mounting : 1;*/
 	int sb_timeshift;
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c969a1aa163..6c5f01597c3 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -491,7 +491,7 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
 	sbi->sb_bmp_dir = NULL;
 	sbi->sb_cp_table = NULL;
 
-	init_MUTEX(&sbi->hpfs_creation_de);
+	mutex_init(&sbi->hpfs_creation_de);
 
 	uid = current_uid();
 	gid = current_gid();
@@ -686,17 +686,16 @@ bail0:
 	return -EINVAL;
 }
 
-static int hpfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hpfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, hpfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, hpfs_fill_super);
 }
 
 static struct file_system_type hpfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "hpfs",
-	.get_sb		= hpfs_get_sb,
+	.mount		= hpfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4e2a45ea614..f702b5f713f 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -748,17 +748,17 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
 	return(err);
 }
 
-static int hppfs_read_super(struct file_system_type *type,
+static struct dentry *hppfs_read_super(struct file_system_type *type,
 			    int flags, const char *dev_name,
-			    void *data, struct vfsmount *mnt)
+			    void *data)
 {
-	return get_sb_nodev(type, flags, data, hppfs_fill_super, mnt);
+	return mount_nodev(type, flags, data, hppfs_fill_super);
 }
 
 static struct file_system_type hppfs_type = {
 	.owner 		= THIS_MODULE,
 	.name 		= "hppfs",
-	.get_sb 	= hppfs_read_super,
+	.mount 		= hppfs_read_super,
 	.kill_sb	= kill_anon_super,
 	.fs_flags 	= 0,
 };
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a14328d270e..a5fe68189ee 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -456,6 +456,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
 	inode = new_inode(sb);
 	if (inode) {
 		struct hugetlbfs_inode_info *info;
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = uid;
 		inode->i_gid = gid;
@@ -895,15 +896,15 @@ void hugetlb_put_quota(struct address_space *mapping, long delta)
 	}
 }
 
-static int hugetlbfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *hugetlbfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, hugetlbfs_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, hugetlbfs_fill_super);
 }
 
 static struct file_system_type hugetlbfs_fs_type = {
 	.name		= "hugetlbfs",
-	.get_sb		= hugetlbfs_get_sb,
+	.mount		= hugetlbfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
@@ -931,8 +932,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
 	if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) {
 		*user = current_user();
 		if (user_shm_lock(size, *user)) {
-			WARN_ONCE(1,
-			  "Using mlock ulimits for SHM_HUGETLB deprecated\n");
+			printk_once(KERN_WARNING "Using mlock ulimits for SHM_HUGETLB is deprecated\n");
 		} else {
 			*user = NULL;
 			return ERR_PTR(-EPERM);
diff --git a/fs/inode.c b/fs/inode.c
index 86464332e59..ae2727ab0c3 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -24,11 +24,11 @@
 #include <linux/mount.h>
 #include <linux/async.h>
 #include <linux/posix_acl.h>
+#include <linux/ima.h>
 
 /*
  * This is needed for the following functions:
  *  - inode_has_buffers
- *  - invalidate_inode_buffers
  *  - invalidate_bdev
  *
  * FIXME: remove all knowledge of the buffer layer from this file
@@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly;
  * allowing for low-overhead inode sync() operations.
  */
 
-LIST_HEAD(inode_in_use);
-LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_lru);
 static struct hlist_head *inode_hashtable __read_mostly;
 
 /*
@@ -103,8 +102,41 @@ static DECLARE_RWSEM(iprune_sem);
  */
 struct inodes_stat_t inodes_stat;
 
+static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
+static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+
 static struct kmem_cache *inode_cachep __read_mostly;
 
+static inline int get_nr_inodes(void)
+{
+	return percpu_counter_sum_positive(&nr_inodes);
+}
+
+static inline int get_nr_inodes_unused(void)
+{
+	return percpu_counter_sum_positive(&nr_inodes_unused);
+}
+
+int get_nr_dirty_inodes(void)
+{
+	int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+	return nr_dirty > 0 ? nr_dirty : 0;
+
+}
+
+/*
+ * Handle nr_inode sysctl
+ */
+#ifdef CONFIG_SYSCTL
+int proc_nr_inodes(ctl_table *table, int write,
+		   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	inodes_stat.nr_inodes = get_nr_inodes();
+	inodes_stat.nr_unused = get_nr_inodes_unused();
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
 static void wake_up_inode(struct inode *inode)
 {
 	/*
@@ -192,6 +224,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_fsnotify_mask = 0;
 #endif
 
+	percpu_counter_inc(&nr_inodes);
+
 	return 0;
 out:
 	return -ENOMEM;
@@ -232,11 +266,13 @@ void __destroy_inode(struct inode *inode)
 	if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
 		posix_acl_release(inode->i_default_acl);
 #endif
+	percpu_counter_dec(&nr_inodes);
 }
 EXPORT_SYMBOL(__destroy_inode);
 
-void destroy_inode(struct inode *inode)
+static void destroy_inode(struct inode *inode)
 {
+	BUG_ON(!list_empty(&inode->i_lru));
 	__destroy_inode(inode);
 	if (inode->i_sb->s_op->destroy_inode)
 		inode->i_sb->s_op->destroy_inode(inode);
@@ -255,6 +291,8 @@ void inode_init_once(struct inode *inode)
 	INIT_HLIST_NODE(&inode->i_hash);
 	INIT_LIST_HEAD(&inode->i_dentry);
 	INIT_LIST_HEAD(&inode->i_devices);
+	INIT_LIST_HEAD(&inode->i_wb_list);
+	INIT_LIST_HEAD(&inode->i_lru);
 	INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
 	spin_lock_init(&inode->i_data.tree_lock);
 	spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -281,14 +319,109 @@ static void init_once(void *foo)
  */
 void __iget(struct inode *inode)
 {
-	if (atomic_inc_return(&inode->i_count) != 1)
-		return;
+	atomic_inc(&inode->i_count);
+}
+
+/*
+ * get additional reference to inode; caller must already hold one.
+ */
+void ihold(struct inode *inode)
+{
+	WARN_ON(atomic_inc_return(&inode->i_count) < 2);
+}
+EXPORT_SYMBOL(ihold);
+
+static void inode_lru_list_add(struct inode *inode)
+{
+	if (list_empty(&inode->i_lru)) {
+		list_add(&inode->i_lru, &inode_lru);
+		percpu_counter_inc(&nr_inodes_unused);
+	}
+}
 
-	if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-		list_move(&inode->i_list, &inode_in_use);
-	inodes_stat.nr_unused--;
+static void inode_lru_list_del(struct inode *inode)
+{
+	if (!list_empty(&inode->i_lru)) {
+		list_del_init(&inode->i_lru);
+		percpu_counter_dec(&nr_inodes_unused);
+	}
+}
+
+static inline void __inode_sb_list_add(struct inode *inode)
+{
+	list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
 }
 
+/**
+ * inode_sb_list_add - add inode to the superblock list of inodes
+ * @inode: inode to add
+ */
+void inode_sb_list_add(struct inode *inode)
+{
+	spin_lock(&inode_lock);
+	__inode_sb_list_add(inode);
+	spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL_GPL(inode_sb_list_add);
+
+static inline void __inode_sb_list_del(struct inode *inode)
+{
+	list_del_init(&inode->i_sb_list);
+}
+
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
+{
+	unsigned long tmp;
+
+	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+			L1_CACHE_BYTES;
+	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
+	return tmp & I_HASHMASK;
+}
+
+/**
+ *	__insert_inode_hash - hash an inode
+ *	@inode: unhashed inode
+ *	@hashval: unsigned long value used to locate this object in the
+ *		inode_hashtable.
+ *
+ *	Add an inode to the inode hash for this superblock.
+ */
+void __insert_inode_hash(struct inode *inode, unsigned long hashval)
+{
+	struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
+
+	spin_lock(&inode_lock);
+	hlist_add_head(&inode->i_hash, b);
+	spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(__insert_inode_hash);
+
+/**
+ *	__remove_inode_hash - remove an inode from the hash
+ *	@inode: inode to unhash
+ *
+ *	Remove an inode from the superblock.
+ */
+static void __remove_inode_hash(struct inode *inode)
+{
+	hlist_del_init(&inode->i_hash);
+}
+
+/**
+ *	remove_inode_hash - remove an inode from the hash
+ *	@inode: inode to unhash
+ *
+ *	Remove an inode from the superblock.
+ */
+void remove_inode_hash(struct inode *inode)
+{
+	spin_lock(&inode_lock);
+	hlist_del_init(&inode->i_hash);
+	spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(remove_inode_hash);
+
 void end_writeback(struct inode *inode)
 {
 	might_sleep();
@@ -327,101 +460,113 @@ static void evict(struct inode *inode)
  */
 static void dispose_list(struct list_head *head)
 {
-	int nr_disposed = 0;
-
 	while (!list_empty(head)) {
 		struct inode *inode;
 
-		inode = list_first_entry(head, struct inode, i_list);
-		list_del(&inode->i_list);
+		inode = list_first_entry(head, struct inode, i_lru);
+		list_del_init(&inode->i_lru);
 
 		evict(inode);
 
 		spin_lock(&inode_lock);
-		hlist_del_init(&inode->i_hash);
-		list_del_init(&inode->i_sb_list);
+		__remove_inode_hash(inode);
+		__inode_sb_list_del(inode);
 		spin_unlock(&inode_lock);
 
 		wake_up_inode(inode);
 		destroy_inode(inode);
-		nr_disposed++;
 	}
-	spin_lock(&inode_lock);
-	inodes_stat.nr_inodes -= nr_disposed;
-	spin_unlock(&inode_lock);
 }
 
-/*
- * Invalidate all inodes for a device.
+/**
+ * evict_inodes	- evict all evictable inodes for a superblock
+ * @sb:		superblock to operate on
+ *
+ * Make sure that no inodes with zero refcount are retained.  This is
+ * called by superblock shutdown after having MS_ACTIVE flag removed,
+ * so any inode reaching zero refcount during or after that call will
+ * be immediately evicted.
  */
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+void evict_inodes(struct super_block *sb)
 {
-	struct list_head *next;
-	int busy = 0, count = 0;
-
-	next = head->next;
-	for (;;) {
-		struct list_head *tmp = next;
-		struct inode *inode;
+	struct inode *inode, *next;
+	LIST_HEAD(dispose);
 
-		/*
-		 * We can reschedule here without worrying about the list's
-		 * consistency because the per-sb list of inodes must not
-		 * change during umount anymore, and because iprune_sem keeps
-		 * shrink_icache_memory() away.
-		 */
-		cond_resched_lock(&inode_lock);
+	down_write(&iprune_sem);
 
-		next = next->next;
-		if (tmp == head)
-			break;
-		inode = list_entry(tmp, struct inode, i_sb_list);
-		if (inode->i_state & I_NEW)
+	spin_lock(&inode_lock);
+	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
+		if (atomic_read(&inode->i_count))
 			continue;
-		invalidate_inode_buffers(inode);
-		if (!atomic_read(&inode->i_count)) {
-			list_move(&inode->i_list, dispose);
-			WARN_ON(inode->i_state & I_NEW);
-			inode->i_state |= I_FREEING;
-			count++;
+
+		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+			WARN_ON(1);
 			continue;
 		}
-		busy = 1;
+
+		inode->i_state |= I_FREEING;
+
+		/*
+		 * Move the inode off the IO lists and LRU once I_FREEING is
+		 * set so that it won't get moved back on there if it is dirty.
+		 */
+		list_move(&inode->i_lru, &dispose);
+		list_del_init(&inode->i_wb_list);
+		if (!(inode->i_state & (I_DIRTY | I_SYNC)))
+			percpu_counter_dec(&nr_inodes_unused);
 	}
-	/* only unused inodes may be cached with i_count zero */
-	inodes_stat.nr_unused -= count;
-	return busy;
+	spin_unlock(&inode_lock);
+
+	dispose_list(&dispose);
+	up_write(&iprune_sem);
 }
 
 /**
- *	invalidate_inodes	- discard the inodes on a device
- *	@sb: superblock
+ * invalidate_inodes	- attempt to free all inodes on a superblock
+ * @sb:		superblock to operate on
  *
- *	Discard all of the inodes for a given superblock. If the discard
- *	fails because there are busy inodes then a non zero value is returned.
- *	If the discard is successful all the inodes have been discarded.
+ * Attempts to free all inodes for a given superblock.  If there were any
+ * busy inodes return a non-zero value, else zero.
  */
 int invalidate_inodes(struct super_block *sb)
 {
-	int busy;
-	LIST_HEAD(throw_away);
+	int busy = 0;
+	struct inode *inode, *next;
+	LIST_HEAD(dispose);
 
 	down_write(&iprune_sem);
+
 	spin_lock(&inode_lock);
-	fsnotify_unmount_inodes(&sb->s_inodes);
-	busy = invalidate_list(&sb->s_inodes, &throw_away);
+	list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
+		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
+			continue;
+		if (atomic_read(&inode->i_count)) {
+			busy = 1;
+			continue;
+		}
+
+		inode->i_state |= I_FREEING;
+
+		/*
+		 * Move the inode off the IO lists and LRU once I_FREEING is
+		 * set so that it won't get moved back on there if it is dirty.
+		 */
+		list_move(&inode->i_lru, &dispose);
+		list_del_init(&inode->i_wb_list);
+		if (!(inode->i_state & (I_DIRTY | I_SYNC)))
+			percpu_counter_dec(&nr_inodes_unused);
+	}
 	spin_unlock(&inode_lock);
 
-	dispose_list(&throw_away);
+	dispose_list(&dispose);
 	up_write(&iprune_sem);
 
 	return busy;
 }
-EXPORT_SYMBOL(invalidate_inodes);
 
 static int can_unuse(struct inode *inode)
 {
-	if (inode->i_state)
+	if (inode->i_state & ~I_REFERENCED)
 		return 0;
 	if (inode_has_buffers(inode))
 		return 0;
@@ -433,22 +578,24 @@ static int can_unuse(struct inode *inode)
 }
 
 /*
- * Scan `goal' inodes on the unused list for freeable ones. They are moved to
- * a temporary list and then are freed outside inode_lock by dispose_list().
+ * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
+ * temporary list and then are freed outside inode_lock by dispose_list().
  *
  * Any inodes which are pinned purely because of attached pagecache have their
- * pagecache removed.  We expect the final iput() on that inode to add it to
- * the front of the inode_unused list.  So look for it there and if the
- * inode is still freeable, proceed.  The right inode is found 99.9% of the
- * time in testing on a 4-way.
+ * pagecache removed.  If the inode has metadata buffers attached to
+ * mapping->private_list then try to remove them.
  *
- * If the inode has metadata buffers attached to mapping->private_list then
- * try to remove them.
+ * If the inode has the I_REFERENCED flag set, then it means that it has been
+ * used recently - the flag is set in iput_final(). When we encounter such an
+ * inode, clear the flag and move it to the back of the LRU so it gets another
+ * pass through the LRU before it gets reclaimed. This is necessary because of
+ * the fact we are doing lazy LRU updates to minimise lock contention so the
+ * LRU does not have strict ordering. Hence we don't want to reclaim inodes
+ * with this flag set because they are the inodes that are out of order.
  */
 static void prune_icache(int nr_to_scan)
 {
 	LIST_HEAD(freeable);
-	int nr_pruned = 0;
 	int nr_scanned;
 	unsigned long reap = 0;
 
@@ -457,13 +604,26 @@ static void prune_icache(int nr_to_scan)
 	for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
 		struct inode *inode;
 
-		if (list_empty(&inode_unused))
+		if (list_empty(&inode_lru))
 			break;
 
-		inode = list_entry(inode_unused.prev, struct inode, i_list);
+		inode = list_entry(inode_lru.prev, struct inode, i_lru);
 
-		if (inode->i_state || atomic_read(&inode->i_count)) {
-			list_move(&inode->i_list, &inode_unused);
+		/*
+		 * Referenced or dirty inodes are still in use. Give them
+		 * another pass through the LRU as we canot reclaim them now.
+		 */
+		if (atomic_read(&inode->i_count) ||
+		    (inode->i_state & ~I_REFERENCED)) {
+			list_del_init(&inode->i_lru);
+			percpu_counter_dec(&nr_inodes_unused);
+			continue;
+		}
+
+		/* recently referenced inodes get one more pass */
+		if (inode->i_state & I_REFERENCED) {
+			list_move(&inode->i_lru, &inode_lru);
+			inode->i_state &= ~I_REFERENCED;
 			continue;
 		}
 		if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -475,18 +635,23 @@ static void prune_icache(int nr_to_scan)
 			iput(inode);
 			spin_lock(&inode_lock);
 
-			if (inode != list_entry(inode_unused.next,
-						struct inode, i_list))
+			if (inode != list_entry(inode_lru.next,
+						struct inode, i_lru))
 				continue;	/* wrong inode or list_empty */
 			if (!can_unuse(inode))
 				continue;
 		}
-		list_move(&inode->i_list, &freeable);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state |= I_FREEING;
-		nr_pruned++;
+
+		/*
+		 * Move the inode off the IO lists and LRU once I_FREEING is
+		 * set so that it won't get moved back on there if it is dirty.
+		 */
+		list_move(&inode->i_lru, &freeable);
+		list_del_init(&inode->i_wb_list);
+		percpu_counter_dec(&nr_inodes_unused);
 	}
-	inodes_stat.nr_unused -= nr_pruned;
 	if (current_is_kswapd())
 		__count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
@@ -518,7 +683,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
 			return -1;
 		prune_icache(nr);
 	}
-	return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+	return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker icache_shrinker = {
@@ -529,9 +694,6 @@ static struct shrinker icache_shrinker = {
 static void __wait_on_freeing_inode(struct inode *inode);
 /*
  * Called with the inode lock held.
- * NOTE: we are not increasing the inode-refcount, you must call __iget()
- * by hand after calling find_inode now! This simplifies iunique and won't
- * add any additional branch in the common code.
  */
 static struct inode *find_inode(struct super_block *sb,
 				struct hlist_head *head,
@@ -551,9 +713,10 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
-		break;
+		__iget(inode);
+		return inode;
 	}
-	return node ? inode : NULL;
+	return NULL;
 }
 
 /*
@@ -576,53 +739,49 @@ repeat:
 			__wait_on_freeing_inode(inode);
 			goto repeat;
 		}
-		break;
+		__iget(inode);
+		return inode;
 	}
-	return node ? inode : NULL;
-}
-
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
-{
-	unsigned long tmp;
-
-	tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
-			L1_CACHE_BYTES;
-	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
-	return tmp & I_HASHMASK;
-}
-
-static inline void
-__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
-			struct inode *inode)
-{
-	inodes_stat.nr_inodes++;
-	list_add(&inode->i_list, &inode_in_use);
-	list_add(&inode->i_sb_list, &sb->s_inodes);
-	if (head)
-		hlist_add_head(&inode->i_hash, head);
+	return NULL;
 }
 
-/**
- * inode_add_to_lists - add a new inode to relevant lists
- * @sb: superblock inode belongs to
- * @inode: inode to mark in use
+/*
+ * Each cpu owns a range of LAST_INO_BATCH numbers.
+ * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
+ * to renew the exhausted range.
  *
- * When an inode is allocated it needs to be accounted for, added to the in use
- * list, the owning superblock and the inode hash. This needs to be done under
- * the inode_lock, so export a function to do this rather than the inode lock
- * itself. We calculate the hash list to add to here so it is all internal
- * which requires the caller to have already set up the inode number in the
- * inode to add.
+ * This does not significantly increase overflow rate because every CPU can
+ * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
+ * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
+ * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
+ * overflow rate by 2x, which does not seem too significant.
+ *
+ * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
+ * error if st_ino won't fit in target struct field. Use 32bit counter
+ * here to attempt to avoid that.
  */
-void inode_add_to_lists(struct super_block *sb, struct inode *inode)
+#define LAST_INO_BATCH 1024
+static DEFINE_PER_CPU(unsigned int, last_ino);
+
+unsigned int get_next_ino(void)
 {
-	struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+	unsigned int *p = &get_cpu_var(last_ino);
+	unsigned int res = *p;
 
-	spin_lock(&inode_lock);
-	__inode_add_to_lists(sb, head, inode);
-	spin_unlock(&inode_lock);
+#ifdef CONFIG_SMP
+	if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
+		static atomic_t shared_last_ino;
+		int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
+
+		res = next - LAST_INO_BATCH;
+	}
+#endif
+
+	*p = ++res;
+	put_cpu_var(last_ino);
+	return res;
 }
-EXPORT_SYMBOL_GPL(inode_add_to_lists);
+EXPORT_SYMBOL(get_next_ino);
 
 /**
  *	new_inode 	- obtain an inode
@@ -638,12 +797,6 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
  */
 struct inode *new_inode(struct super_block *sb)
 {
-	/*
-	 * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
-	 * error if st_ino won't fit in target struct field. Use 32bit counter
-	 * here to attempt to avoid that.
-	 */
-	static unsigned int last_ino;
 	struct inode *inode;
 
 	spin_lock_prefetch(&inode_lock);
@@ -651,8 +804,7 @@ struct inode *new_inode(struct super_block *sb)
 	inode = alloc_inode(sb);
 	if (inode) {
 		spin_lock(&inode_lock);
-		__inode_add_to_lists(sb, NULL, inode);
-		inode->i_ino = ++last_ino;
+		__inode_sb_list_add(inode);
 		inode->i_state = 0;
 		spin_unlock(&inode_lock);
 	}
@@ -663,7 +815,7 @@ EXPORT_SYMBOL(new_inode);
 void unlock_new_inode(struct inode *inode)
 {
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
-	if (inode->i_mode & S_IFDIR) {
+	if (S_ISDIR(inode->i_mode)) {
 		struct file_system_type *type = inode->i_sb->s_type;
 
 		/* Set new key only if filesystem hasn't already changed it */
@@ -720,7 +872,8 @@ static struct inode *get_new_inode(struct super_block *sb,
 			if (set(inode, data))
 				goto set_failed;
 
-			__inode_add_to_lists(sb, head, inode);
+			hlist_add_head(&inode->i_hash, head);
+			__inode_sb_list_add(inode);
 			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
@@ -735,7 +888,6 @@ static struct inode *get_new_inode(struct super_block *sb,
 		 * us. Use the old inode instead of the one we just
 		 * allocated.
 		 */
-		__iget(old);
 		spin_unlock(&inode_lock);
 		destroy_inode(inode);
 		inode = old;
@@ -767,7 +919,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		old = find_inode_fast(sb, head, ino);
 		if (!old) {
 			inode->i_ino = ino;
-			__inode_add_to_lists(sb, head, inode);
+			hlist_add_head(&inode->i_hash, head);
+			__inode_sb_list_add(inode);
 			inode->i_state = I_NEW;
 			spin_unlock(&inode_lock);
 
@@ -782,7 +935,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 		 * us. Use the old inode instead of the one we just
 		 * allocated.
 		 */
-		__iget(old);
 		spin_unlock(&inode_lock);
 		destroy_inode(inode);
 		inode = old;
@@ -791,6 +943,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
 	return inode;
 }
 
+/*
+ * search the inode cache for a matching inode number.
+ * If we find one, then the inode number we are trying to
+ * allocate is not unique and so we should not use it.
+ *
+ * Returns 1 if the inode number is unique, 0 if it is not.
+ */
+static int test_inode_iunique(struct super_block *sb, unsigned long ino)
+{
+	struct hlist_head *b = inode_hashtable + hash(sb, ino);
+	struct hlist_node *node;
+	struct inode *inode;
+
+	hlist_for_each_entry(inode, node, b, i_hash) {
+		if (inode->i_ino == ino && inode->i_sb == sb)
+			return 0;
+	}
+
+	return 1;
+}
+
 /**
  *	iunique - get a unique inode number
  *	@sb: superblock
@@ -812,19 +985,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
 	 * error if st_ino won't fit in target struct field. Use 32bit counter
 	 * here to attempt to avoid that.
 	 */
+	static DEFINE_SPINLOCK(iunique_lock);
 	static unsigned int counter;
-	struct inode *inode;
-	struct hlist_head *head;
 	ino_t res;
 
 	spin_lock(&inode_lock);
+	spin_lock(&iunique_lock);
 	do {
 		if (counter <= max_reserved)
 			counter = max_reserved + 1;
 		res = counter++;
-		head = inode_hashtable + hash(sb, res);
-		inode = find_inode_fast(sb, head, res);
-	} while (inode != NULL);
+	} while (!test_inode_iunique(sb, res));
+	spin_unlock(&iunique_lock);
 	spin_unlock(&inode_lock);
 
 	return res;
@@ -876,7 +1048,6 @@ static struct inode *ifind(struct super_block *sb,
 	spin_lock(&inode_lock);
 	inode = find_inode(sb, head, test, data);
 	if (inode) {
-		__iget(inode);
 		spin_unlock(&inode_lock);
 		if (likely(wait))
 			wait_on_inode(inode);
@@ -909,7 +1080,6 @@ static struct inode *ifind_fast(struct super_block *sb,
 	spin_lock(&inode_lock);
 	inode = find_inode_fast(sb, head, ino);
 	if (inode) {
-		__iget(inode);
 		spin_unlock(&inode_lock);
 		wait_on_inode(inode);
 		return inode;
@@ -1095,7 +1265,7 @@ int insert_inode_locked(struct inode *inode)
 		__iget(old);
 		spin_unlock(&inode_lock);
 		wait_on_inode(old);
-		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
 			return -EBUSY;
 		}
@@ -1134,7 +1304,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 		__iget(old);
 		spin_unlock(&inode_lock);
 		wait_on_inode(old);
-		if (unlikely(!hlist_unhashed(&old->i_hash))) {
+		if (unlikely(!inode_unhashed(old))) {
 			iput(old);
 			return -EBUSY;
 		}
@@ -1143,36 +1313,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
 }
 EXPORT_SYMBOL(insert_inode_locked4);
 
-/**
- *	__insert_inode_hash - hash an inode
- *	@inode: unhashed inode
- *	@hashval: unsigned long value used to locate this object in the
- *		inode_hashtable.
- *
- *	Add an inode to the inode hash for this superblock.
- */
-void __insert_inode_hash(struct inode *inode, unsigned long hashval)
-{
-	struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
-	spin_lock(&inode_lock);
-	hlist_add_head(&inode->i_hash, head);
-	spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(__insert_inode_hash);
-
-/**
- *	remove_inode_hash - remove an inode from the hash
- *	@inode: inode to unhash
- *
- *	Remove an inode from the superblock.
- */
-void remove_inode_hash(struct inode *inode)
-{
-	spin_lock(&inode_lock);
-	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(remove_inode_hash);
 
 int generic_delete_inode(struct inode *inode)
 {
@@ -1187,7 +1327,7 @@ EXPORT_SYMBOL(generic_delete_inode);
  */
 int generic_drop_inode(struct inode *inode)
 {
-	return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
+	return !inode->i_nlink || inode_unhashed(inode);
 }
 EXPORT_SYMBOL_GPL(generic_drop_inode);
 
@@ -1213,10 +1353,11 @@ static void iput_final(struct inode *inode)
 		drop = generic_drop_inode(inode);
 
 	if (!drop) {
-		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-			list_move(&inode->i_list, &inode_unused);
-		inodes_stat.nr_unused++;
 		if (sb->s_flags & MS_ACTIVE) {
+			inode->i_state |= I_REFERENCED;
+			if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+				inode_lru_list_add(inode);
+			}
 			spin_unlock(&inode_lock);
 			return;
 		}
@@ -1227,19 +1368,23 @@ static void iput_final(struct inode *inode)
 		spin_lock(&inode_lock);
 		WARN_ON(inode->i_state & I_NEW);
 		inode->i_state &= ~I_WILL_FREE;
-		inodes_stat.nr_unused--;
-		hlist_del_init(&inode->i_hash);
+		__remove_inode_hash(inode);
 	}
-	list_del_init(&inode->i_list);
-	list_del_init(&inode->i_sb_list);
+
 	WARN_ON(inode->i_state & I_NEW);
 	inode->i_state |= I_FREEING;
-	inodes_stat.nr_inodes--;
+
+	/*
+	 * Move the inode off the IO lists and LRU once I_FREEING is
+	 * set so that it won't get moved back on there if it is dirty.
+	 */
+	inode_lru_list_del(inode);
+	list_del_init(&inode->i_wb_list);
+
+	__inode_sb_list_del(inode);
 	spin_unlock(&inode_lock);
 	evict(inode);
-	spin_lock(&inode_lock);
-	hlist_del_init(&inode->i_hash);
-	spin_unlock(&inode_lock);
+	remove_inode_hash(inode);
 	wake_up_inode(inode);
 	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
 	destroy_inode(inode);
@@ -1503,6 +1648,8 @@ void __init inode_init(void)
 					 SLAB_MEM_SPREAD),
 					 init_once);
 	register_shrinker(&icache_shrinker);
+	percpu_counter_init(&nr_inodes, 0);
+	percpu_counter_init(&nr_inodes_unused, 0);
 
 	/* Hash may have been set up in inode_init_early */
 	if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h
index a6910e91cee..e43b9a4dbf4 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,3 +101,10 @@ extern void put_super(struct super_block *sb);
 struct nameidata;
 extern struct file *nameidata_to_filp(struct nameidata *);
 extern void release_open_intent(struct nameidata *);
+
+/*
+ * inode.c
+ */
+extern int get_nr_dirty_inodes(void);
+extern void evict_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index f855ea4fc88..d6cc1647662 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -6,7 +6,6 @@
 
 #include <linux/syscalls.h>
 #include <linux/mm.h>
-#include <linux/smp_lock.h>
 #include <linux/capability.h>
 #include <linux/file.h>
 #include <linux/fs.h>
diff --git a/fs/ioprio.c b/fs/ioprio.c
index 748cfb92dcc..7da2a06508e 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -103,12 +103,7 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
 	}
 
 	ret = -ESRCH;
-	/*
-	 * We want IOPRIO_WHO_PGRP/IOPRIO_WHO_USER to be "atomic",
-	 * so we can't use rcu_read_lock(). See re-copy of ->ioprio
-	 * in copy_process().
-	 */
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
 			if (!who)
@@ -153,7 +148,7 @@ free_uid:
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -197,7 +192,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 	int ret = -ESRCH;
 	int tmpio;
 
-	read_lock(&tasklist_lock);
+	rcu_read_lock();
 	switch (which) {
 		case IOPRIO_WHO_PROCESS:
 			if (!who)
@@ -250,6 +245,6 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
 			ret = -EINVAL;
 	}
 
-	read_unlock(&tasklist_lock);
+	rcu_read_unlock();
 	return ret;
 }
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index 09ff41a752a..bfdeb82a53b 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -544,6 +544,34 @@ static unsigned int isofs_get_last_session(struct super_block *sb, s32 session)
 }
 
 /*
+ * Check if root directory is empty (has less than 3 files).
+ *
+ * Used to detect broken CDs where ISO root directory is empty but Joliet root
+ * directory is OK. If such CD has Rock Ridge extensions, they will be disabled
+ * (and Joliet used instead) or else no files would be visible.
+ */
+static bool rootdir_empty(struct super_block *sb, unsigned long block)
+{
+	int offset = 0, files = 0, de_len;
+	struct iso_directory_record *de;
+	struct buffer_head *bh;
+
+	bh = sb_bread(sb, block);
+	if (!bh)
+		return true;
+	while (files < 3) {
+		de = (struct iso_directory_record *) (bh->b_data + offset);
+		de_len = *(unsigned char *) de;
+		if (de_len == 0)
+			break;
+		files++;
+		offset += de_len;
+	}
+	brelse(bh);
+	return files < 3;
+}
+
+/*
  * Initialize the superblock and read the root inode.
  *
  * Note: a check_disk_change() has been done immediately prior
@@ -843,6 +871,18 @@ root_found:
 		goto out_no_root;
 
 	/*
+	 * Fix for broken CDs with Rock Ridge and empty ISO root directory but
+	 * correct Joliet root directory.
+	 */
+	if (sbi->s_rock == 1 && joliet_level &&
+				rootdir_empty(s, sbi->s_firstdatazone)) {
+		printk(KERN_NOTICE
+			"ISOFS: primary root directory is empty. "
+			"Disabling Rock Ridge and switching to Joliet.");
+		sbi->s_rock = 0;
+	}
+
+	/*
 	 * If this disk has both Rock Ridge and Joliet on it, then we
 	 * want to use Rock Ridge by default.  This can be overridden
 	 * by using the norock mount option.  There is still one other
@@ -962,25 +1002,23 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
  * or getblk() if they are not.  Returns the number of blocks inserted
  * (-ve == error.)
  */
-int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
+int isofs_get_blocks(struct inode *inode, sector_t iblock,
 		     struct buffer_head **bh, unsigned long nblocks)
 {
-	unsigned long b_off;
+	unsigned long b_off = iblock;
 	unsigned offset, sect_size;
 	unsigned int firstext;
 	unsigned long nextblk, nextoff;
-	long iblock = (long)iblock_s;
 	int section, rv, error;
 	struct iso_inode_info *ei = ISOFS_I(inode);
 
 	error = -EIO;
 	rv = 0;
-	if (iblock < 0 || iblock != iblock_s) {
+	if (iblock != b_off) {
 		printk(KERN_DEBUG "%s: block number too large\n", __func__);
 		goto abort;
 	}
 
-	b_off = iblock;
 
 	offset = 0;
 	firstext = ei->i_first_extent;
@@ -998,8 +1036,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
 		 * I/O errors.
 		 */
 		if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
-			printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n",
-				__func__, iblock, (unsigned long) inode->i_size);
+			printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
+				__func__, b_off,
+				(unsigned long long)inode->i_size);
 			goto abort;
 		}
 
@@ -1025,9 +1064,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
 			if (++section > 100) {
 				printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
 					" aborting...\n", __func__);
-				printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u "
+				printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
 					"nextblk=%lu nextoff=%lu\n", __func__,
-					iblock, firstext, (unsigned) sect_size,
+					b_off, firstext, (unsigned) sect_size,
 					nextblk, nextoff);
 				goto abort;
 			}
@@ -1468,17 +1507,16 @@ struct inode *isofs_iget(struct super_block *sb,
 	return inode;
 }
 
-static int isofs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *isofs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, isofs_fill_super,
-				mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, isofs_fill_super);
 }
 
 static struct file_system_type iso9660_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "iso9660",
-	.get_sb		= isofs_get_sb,
+	.mount		= isofs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 05a38b9c4c0..e4b87bc1fa5 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -221,7 +221,7 @@ restart:
 			goto restart;
 		}
 		if (buffer_locked(bh)) {
-			atomic_inc(&bh->b_count);
+			get_bh(bh);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
@@ -283,7 +283,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 	int ret = 0;
 
 	if (buffer_locked(bh)) {
-		atomic_inc(&bh->b_count);
+		get_bh(bh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
 		wait_on_buffer(bh);
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 85a6883c0ac..34a4861c14b 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -587,13 +587,13 @@ void journal_commit_transaction(journal_t *journal)
 		/* Bump b_count to prevent truncate from stumbling over
                    the shadowed buffer!  @@@ This can go if we ever get
                    rid of the BJ_IO/BJ_Shadow pairing of buffers. */
-		atomic_inc(&jh2bh(jh)->b_count);
+		get_bh(jh2bh(jh));
 
 		/* Make a temporary IO buffer with which to write it out
                    (this will requeue both the metadata buffer and the
                    temporary IO buffer). new_bh goes on BJ_IO*/
 
-		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
+		set_buffer_jwrite(jh2bh(jh));
 		/*
 		 * akpm: journal_write_metadata_buffer() sets
 		 * new_bh->b_transaction to commit_transaction.
@@ -603,7 +603,7 @@ void journal_commit_transaction(journal_t *journal)
 		JBUFFER_TRACE(jh, "ph3: write metadata");
 		flags = journal_write_metadata_buffer(commit_transaction,
 						      jh, &new_jh, blocknr);
-		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+		set_buffer_jwrite(jh2bh(new_jh));
 		wbuf[bufs++] = jh2bh(new_jh);
 
 		/* Record the new block's tag in the current descriptor
@@ -713,7 +713,7 @@ wait_for_iobuf:
                    shadowed buffer */
 		jh = commit_transaction->t_shadow_list->b_tprev;
 		bh = jh2bh(jh);
-		clear_bit(BH_JWrite, &bh->b_state);
+		clear_buffer_jwrite(bh);
 		J_ASSERT_BH(bh, buffer_jbddirty(bh));
 
 		/* The metadata is now released for reuse, but we need
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 2c4b1f109da..da1b5e4ffce 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -36,6 +36,7 @@
 #include <linux/poison.h>
 #include <linux/proc_fs.h>
 #include <linux/debugfs.h>
+#include <linux/ratelimit.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -84,6 +85,7 @@ EXPORT_SYMBOL(journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
+static const char *journal_dev_name(journal_t *journal, char *buffer);
 
 /*
  * Helper function used to manage commit timeouts
@@ -439,7 +441,7 @@ int __log_start_commit(journal_t *journal, tid_t target)
 	 */
 	if (!tid_geq(journal->j_commit_request, target)) {
 		/*
-		 * We want a new commit: OK, mark the request and wakup the
+		 * We want a new commit: OK, mark the request and wakeup the
 		 * commit thread.  We do _not_ do the commit ourselves.
 		 */
 
@@ -950,6 +952,8 @@ int journal_create(journal_t *journal)
 		if (err)
 			return err;
 		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+		if (unlikely(!bh))
+			return -ENOMEM;
 		lock_buffer(bh);
 		memset (bh->b_data, 0, journal->j_blocksize);
 		BUFFER_TRACE(bh, "marking dirty");
@@ -1010,6 +1014,23 @@ void journal_update_superblock(journal_t *journal, int wait)
 		goto out;
 	}
 
+	if (buffer_write_io_error(bh)) {
+		char b[BDEVNAME_SIZE];
+		/*
+		 * Oh, dear.  A previous attempt to write the journal
+		 * superblock failed.  This could happen because the
+		 * USB device was yanked out.  Or it could happen to
+		 * be a transient write error and maybe the block will
+		 * be remapped.  Nothing we can do but to retry the
+		 * write and hope for the best.
+		 */
+		printk(KERN_ERR "JBD: previous I/O error detected "
+		       "for journal superblock update for %s.\n",
+		       journal_dev_name(journal, b));
+		clear_buffer_write_io_error(bh);
+		set_buffer_uptodate(bh);
+	}
+
 	spin_lock(&journal->j_state_lock);
 	jbd_debug(1,"JBD: updating superblock (start %u, seq %d, errno %d)\n",
 		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
@@ -1021,9 +1042,17 @@ void journal_update_superblock(journal_t *journal, int wait)
 
 	BUFFER_TRACE(bh, "marking dirty");
 	mark_buffer_dirty(bh);
-	if (wait)
+	if (wait) {
 		sync_dirty_buffer(bh);
-	else
+		if (buffer_write_io_error(bh)) {
+			char b[BDEVNAME_SIZE];
+			printk(KERN_ERR "JBD: I/O error detected "
+			       "when updating journal superblock for %s.\n",
+			       journal_dev_name(journal, b));
+			clear_buffer_write_io_error(bh);
+			set_buffer_uptodate(bh);
+		}
+	} else
 		write_dirty_buffer(bh, WRITE);
 
 out:
@@ -1719,7 +1748,6 @@ static void journal_destroy_journal_head_cache(void)
 static struct journal_head *journal_alloc_journal_head(void)
 {
 	struct journal_head *ret;
-	static unsigned long last_warning;
 
 #ifdef CONFIG_JBD_DEBUG
 	atomic_inc(&nr_journal_heads);
@@ -1727,11 +1755,9 @@ static struct journal_head *journal_alloc_journal_head(void)
 	ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
 	if (ret == NULL) {
 		jbd_debug(1, "out of memory for journal_head\n");
-		if (time_after(jiffies, last_warning + 5*HZ)) {
-			printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
-			       __func__);
-			last_warning = jiffies;
-		}
+		printk_ratelimited(KERN_NOTICE "ENOMEM in %s, retrying.\n",
+				   __func__);
+
 		while (ret == NULL) {
 			yield();
 			ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c
index 81051dafebf..5b43e96788e 100644
--- a/fs/jbd/recovery.c
+++ b/fs/jbd/recovery.c
@@ -296,10 +296,10 @@ int journal_skip_recovery(journal_t *journal)
 #ifdef CONFIG_JBD_DEBUG
 		int dropped = info.end_transaction -
 			      be32_to_cpu(journal->j_superblock->s_sequence);
-#endif
 		jbd_debug(1,
 			  "JBD: ignoring %d transaction%s from the journal.\n",
 			  dropped, (dropped == 1) ? "" : "s");
+#endif
 		journal->j_transaction_sequence = ++info.end_transaction;
 	}
 
diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c
index 5ae71e75a49..846a3f31411 100644
--- a/fs/jbd/transaction.c
+++ b/fs/jbd/transaction.c
@@ -293,9 +293,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
 		jbd_free_handle(handle);
 		current->journal_info = NULL;
 		handle = ERR_PTR(err);
-		goto out;
 	}
-out:
 	return handle;
 }
 
@@ -528,7 +526,7 @@ do_get_write_access(handle_t *handle, struct journal_head *jh,
 	transaction = handle->h_transaction;
 	journal = transaction->t_journal;
 
-	jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
+	jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy);
 
 	JBUFFER_TRACE(jh, "entry");
 repeat:
@@ -713,7 +711,7 @@ done:
 		J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
 			    "Possible IO failure.\n");
 		page = jh2bh(jh)->b_page;
-		offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
+		offset = offset_in_page(jh2bh(jh)->b_data);
 		source = kmap_atomic(page, KM_USER0);
 		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
 		kunmap_atomic(source, KM_USER0);
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6571a056e55..6a79fd0a1a3 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -299,6 +299,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 		transaction->t_chp_stats.cs_forced_to_close++;
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
+		if (unlikely(journal->j_flags & JBD2_UNMOUNT))
+			/*
+			 * The journal thread is dead; so starting and
+			 * waiting for a commit to finish will cause
+			 * us to wait for a _very_ long time.
+			 */
+			printk(KERN_ERR "JBD2: %s: "
+			       "Waiting for Godot: block %llu\n",
+			       journal->j_devname,
+			       (unsigned long long) bh->b_blocknr);
 		jbd2_log_start_commit(journal, tid);
 		jbd2_log_wait_commit(journal, tid);
 		ret = 1;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index bc6be8bda1c..f3ad1598b20 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -26,7 +26,9 @@
 #include <linux/backing-dev.h>
 #include <linux/bio.h>
 #include <linux/blkdev.h>
+#include <linux/bitops.h>
 #include <trace/events/jbd2.h>
+#include <asm/system.h>
 
 /*
  * Default IO end handler for temporary BJ_IO buffer_heads.
@@ -201,7 +203,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 	spin_lock(&journal->j_list_lock);
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
 		mapping = jinode->i_vfs_inode->i_mapping;
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
 		spin_unlock(&journal->j_list_lock);
 		/*
 		 * submit the inode data buffers. We use writepage
@@ -216,7 +218,8 @@ static int journal_submit_data_buffers(journal_t *journal,
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
 		commit_transaction->t_flushed_data_blocks = 1;
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+		smp_mb__after_clear_bit();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
 	spin_unlock(&journal->j_list_lock);
@@ -237,7 +240,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 	/* For locking, see the comment in journal_submit_data_buffers() */
 	spin_lock(&journal->j_list_lock);
 	list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
-		jinode->i_flags |= JI_COMMIT_RUNNING;
+		set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
 		spin_unlock(&journal->j_list_lock);
 		err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
 		if (err) {
@@ -253,7 +256,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
 				ret = err;
 		}
 		spin_lock(&journal->j_list_lock);
-		jinode->i_flags &= ~JI_COMMIT_RUNNING;
+		clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+		smp_mb__after_clear_bit();
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 262419f83d8..f837ba95352 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -42,12 +42,14 @@
 #include <linux/log2.h>
 #include <linux/vmalloc.h>
 #include <linux/backing-dev.h>
+#include <linux/bitops.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/jbd2.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
+#include <asm/system.h>
 
 EXPORT_SYMBOL(jbd2_journal_extend);
 EXPORT_SYMBOL(jbd2_journal_stop);
@@ -478,7 +480,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 	 */
 	if (!tid_geq(journal->j_commit_request, target)) {
 		/*
-		 * We want a new commit: OK, mark the request and wakup the
+		 * We want a new commit: OK, mark the request and wakeup the
 		 * commit thread.  We do _not_ do the commit ourselves.
 		 */
 
@@ -897,6 +899,14 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 
 	/* journal descriptor can store up to n blocks -bzzz */
 	journal->j_blocksize = blocksize;
+	journal->j_dev = bdev;
+	journal->j_fs_dev = fs_dev;
+	journal->j_blk_offset = start;
+	journal->j_maxlen = len;
+	bdevname(journal->j_dev, journal->j_devname);
+	p = journal->j_devname;
+	while ((p = strchr(p, '/')))
+		*p = '!';
 	jbd2_stats_proc_init(journal);
 	n = journal->j_blocksize / sizeof(journal_block_tag_t);
 	journal->j_wbufsize = n;
@@ -906,14 +916,6 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			__func__);
 		goto out_err;
 	}
-	journal->j_dev = bdev;
-	journal->j_fs_dev = fs_dev;
-	journal->j_blk_offset = start;
-	journal->j_maxlen = len;
-	bdevname(journal->j_dev, journal->j_devname);
-	p = journal->j_devname;
-	while ((p = strchr(p, '/')))
-		*p = '!';
 
 	bh = __getblk(journal->j_dev, start, journal->j_blocksize);
 	if (!bh) {
@@ -1836,7 +1838,6 @@ size_t journal_tag_bytes(journal_t *journal)
  */
 #define JBD2_MAX_SLABS 8
 static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
-static DECLARE_MUTEX(jbd2_slab_create_sem);
 
 static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
 	"jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
@@ -1857,6 +1858,7 @@ static void jbd2_journal_destroy_slabs(void)
 
 static int jbd2_journal_create_slab(size_t size)
 {
+	static DEFINE_MUTEX(jbd2_slab_create_mutex);
 	int i = order_base_2(size) - 10;
 	size_t slab_size;
 
@@ -1868,16 +1870,16 @@ static int jbd2_journal_create_slab(size_t size)
 
 	if (unlikely(i < 0))
 		i = 0;
-	down(&jbd2_slab_create_sem);
+	mutex_lock(&jbd2_slab_create_mutex);
 	if (jbd2_slab[i]) {
-		up(&jbd2_slab_create_sem);
+		mutex_unlock(&jbd2_slab_create_mutex);
 		return 0;	/* Already created */
 	}
 
 	slab_size = 1 << (i+10);
 	jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
 					 slab_size, 0, NULL);
-	up(&jbd2_slab_create_sem);
+	mutex_unlock(&jbd2_slab_create_mutex);
 	if (!jbd2_slab[i]) {
 		printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
 		return -ENOMEM;
@@ -2210,7 +2212,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
 restart:
 	spin_lock(&journal->j_list_lock);
 	/* Is commit writing out inode - we have to wait */
-	if (jinode->i_flags & JI_COMMIT_RUNNING) {
+	if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
 		wait_queue_head_t *wq;
 		DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
 		wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index f3479d6e0a8..6bf0a242613 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -156,6 +156,7 @@ alloc_transaction:
 	 */
 repeat:
 	read_lock(&journal->j_state_lock);
+	BUG_ON(journal->j_flags & JBD2_UNMOUNT);
 	if (is_journal_aborted(journal) ||
 	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
 		read_unlock(&journal->j_state_lock);
diff --git a/fs/jffs2/build.c b/fs/jffs2/build.c
index a906f538d11..85c6be2db02 100644
--- a/fs/jffs2/build.c
+++ b/fs/jffs2/build.c
@@ -23,7 +23,7 @@ static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
 static inline struct jffs2_inode_cache *
 first_inode_chain(int *i, struct jffs2_sb_info *c)
 {
-	for (; *i < INOCACHE_HASHSIZE; (*i)++) {
+	for (; *i < c->inocache_hashsize; (*i)++) {
 		if (c->inocache_list[*i])
 			return c->inocache_list[*i];
 	}
diff --git a/fs/jffs2/compr.c b/fs/jffs2/compr.c
index 617a1e5694c..de4247021d2 100644
--- a/fs/jffs2/compr.c
+++ b/fs/jffs2/compr.c
@@ -103,7 +103,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 			spin_unlock(&jffs2_compressor_list_lock);
 			*datalen  = orig_slen;
 			*cdatalen = orig_dlen;
-			compr_ret = this->compress(data_in, output_buf, datalen, cdatalen, NULL);
+			compr_ret = this->compress(data_in, output_buf, datalen, cdatalen);
 			spin_lock(&jffs2_compressor_list_lock);
 			this->usecount--;
 			if (!compr_ret) {
@@ -152,7 +152,7 @@ uint16_t jffs2_compress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 			spin_unlock(&jffs2_compressor_list_lock);
 			*datalen  = orig_slen;
 			*cdatalen = orig_dlen;
-			compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen, NULL);
+			compr_ret = this->compress(data_in, this->compr_buf, datalen, cdatalen);
 			spin_lock(&jffs2_compressor_list_lock);
 			this->usecount--;
 			if (!compr_ret) {
@@ -220,7 +220,7 @@ int jffs2_decompress(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
 			if (comprtype == this->compr) {
 				this->usecount++;
 				spin_unlock(&jffs2_compressor_list_lock);
-				ret = this->decompress(cdata_in, data_out, cdatalen, datalen, NULL);
+				ret = this->decompress(cdata_in, data_out, cdatalen, datalen);
 				spin_lock(&jffs2_compressor_list_lock);
 				if (ret) {
 					printk(KERN_WARNING "Decompressor \"%s\" returned %d\n", this->name, ret);
diff --git a/fs/jffs2/compr.h b/fs/jffs2/compr.h
index e471a9106fd..13bb7597ab3 100644
--- a/fs/jffs2/compr.h
+++ b/fs/jffs2/compr.h
@@ -49,9 +49,9 @@ struct jffs2_compressor {
 	char *name;
 	char compr;			/* JFFS2_COMPR_XXX */
 	int (*compress)(unsigned char *data_in, unsigned char *cpage_out,
-			uint32_t *srclen, uint32_t *destlen, void *model);
+			uint32_t *srclen, uint32_t *destlen);
 	int (*decompress)(unsigned char *cdata_in, unsigned char *data_out,
-			  uint32_t cdatalen, uint32_t datalen, void *model);
+			  uint32_t cdatalen, uint32_t datalen);
 	int usecount;
 	int disabled;		/* if set the compressor won't compress */
 	unsigned char *compr_buf;	/* used by size compr. mode */
diff --git a/fs/jffs2/compr_lzo.c b/fs/jffs2/compr_lzo.c
index ed25ae7c98e..af186ee674d 100644
--- a/fs/jffs2/compr_lzo.c
+++ b/fs/jffs2/compr_lzo.c
@@ -42,7 +42,7 @@ static int __init alloc_workspace(void)
 }
 
 static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
-			      uint32_t *sourcelen, uint32_t *dstlen, void *model)
+			      uint32_t *sourcelen, uint32_t *dstlen)
 {
 	size_t compress_size;
 	int ret;
@@ -67,7 +67,7 @@ static int jffs2_lzo_compress(unsigned char *data_in, unsigned char *cpage_out,
 }
 
 static int jffs2_lzo_decompress(unsigned char *data_in, unsigned char *cpage_out,
-				 uint32_t srclen, uint32_t destlen, void *model)
+				 uint32_t srclen, uint32_t destlen)
 {
 	size_t dl = destlen;
 	int ret;
diff --git a/fs/jffs2/compr_rtime.c b/fs/jffs2/compr_rtime.c
index 9696ad9ef5f..16a5047903a 100644
--- a/fs/jffs2/compr_rtime.c
+++ b/fs/jffs2/compr_rtime.c
@@ -31,8 +31,7 @@
 /* _compress returns the compressed size, -1 if bigger */
 static int jffs2_rtime_compress(unsigned char *data_in,
 				unsigned char *cpage_out,
-				uint32_t *sourcelen, uint32_t *dstlen,
-				void *model)
+				uint32_t *sourcelen, uint32_t *dstlen)
 {
 	short positions[256];
 	int outpos = 0;
@@ -73,8 +72,7 @@ static int jffs2_rtime_compress(unsigned char *data_in,
 
 static int jffs2_rtime_decompress(unsigned char *data_in,
 				  unsigned char *cpage_out,
-				  uint32_t srclen, uint32_t destlen,
-				  void *model)
+				  uint32_t srclen, uint32_t destlen)
 {
 	short positions[256];
 	int outpos = 0;
diff --git a/fs/jffs2/compr_rubin.c b/fs/jffs2/compr_rubin.c
index a12b4f76337..9e7cec808c4 100644
--- a/fs/jffs2/compr_rubin.c
+++ b/fs/jffs2/compr_rubin.c
@@ -298,7 +298,7 @@ static int rubin_do_compress(int bit_divider, int *bits, unsigned char *data_in,
 #if 0
 /* _compress returns the compressed size, -1 if bigger */
 int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
-		   uint32_t *sourcelen, uint32_t *dstlen, void *model)
+		   uint32_t *sourcelen, uint32_t *dstlen)
 {
 	return rubin_do_compress(BIT_DIVIDER_MIPS, bits_mips, data_in,
 				 cpage_out, sourcelen, dstlen);
@@ -306,8 +306,7 @@ int jffs2_rubinmips_compress(unsigned char *data_in, unsigned char *cpage_out,
 #endif
 static int jffs2_dynrubin_compress(unsigned char *data_in,
 				   unsigned char *cpage_out,
-				   uint32_t *sourcelen, uint32_t *dstlen,
-				   void *model)
+				   uint32_t *sourcelen, uint32_t *dstlen)
 {
 	int bits[8];
 	unsigned char histo[256];
@@ -387,8 +386,7 @@ static void rubin_do_decompress(int bit_divider, int *bits,
 
 static int jffs2_rubinmips_decompress(unsigned char *data_in,
 				      unsigned char *cpage_out,
-				      uint32_t sourcelen, uint32_t dstlen,
-				      void *model)
+				      uint32_t sourcelen, uint32_t dstlen)
 {
 	rubin_do_decompress(BIT_DIVIDER_MIPS, bits_mips, data_in,
 			    cpage_out, sourcelen, dstlen);
@@ -397,8 +395,7 @@ static int jffs2_rubinmips_decompress(unsigned char *data_in,
 
 static int jffs2_dynrubin_decompress(unsigned char *data_in,
 				     unsigned char *cpage_out,
-				     uint32_t sourcelen, uint32_t dstlen,
-				     void *model)
+				     uint32_t sourcelen, uint32_t dstlen)
 {
 	int bits[8];
 	int c;
diff --git a/fs/jffs2/compr_zlib.c b/fs/jffs2/compr_zlib.c
index 97fc45de6f8..fd05a0b9431 100644
--- a/fs/jffs2/compr_zlib.c
+++ b/fs/jffs2/compr_zlib.c
@@ -68,8 +68,7 @@ static void free_workspaces(void)
 
 static int jffs2_zlib_compress(unsigned char *data_in,
 			       unsigned char *cpage_out,
-			       uint32_t *sourcelen, uint32_t *dstlen,
-			       void *model)
+			       uint32_t *sourcelen, uint32_t *dstlen)
 {
 	int ret;
 
@@ -136,8 +135,7 @@ static int jffs2_zlib_compress(unsigned char *data_in,
 
 static int jffs2_zlib_decompress(unsigned char *data_in,
 				 unsigned char *cpage_out,
-				 uint32_t srclen, uint32_t destlen,
-				 void *model)
+				 uint32_t srclen, uint32_t destlen)
 {
 	int ret;
 	int wbits = MAX_WBITS;
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c
index ed78a3cf3cb..92978658ed1 100644
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 		mutex_unlock(&f->sem);
 		d_instantiate(dentry, old_dentry->d_inode);
 		dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-		atomic_inc(&old_dentry->d_inode->i_count);
+		ihold(old_dentry->d_inode);
 	}
 	return ret;
 }
@@ -367,7 +367,7 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 	}
 
 	/* We use f->target field to store the target path. */
-	f->target = kmalloc(targetlen + 1, GFP_KERNEL);
+	f->target = kmemdup(target, targetlen + 1, GFP_KERNEL);
 	if (!f->target) {
 		printk(KERN_WARNING "Can't allocate %d bytes of memory\n", targetlen + 1);
 		mutex_unlock(&f->sem);
@@ -376,7 +376,6 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 		goto fail;
 	}
 
-	memcpy(f->target, target, targetlen + 1);
 	D1(printk(KERN_DEBUG "jffs2_symlink: symlink's target '%s' cached\n", (char *)f->target));
 
 	/* No data here. Only a metadata node, which will be
@@ -864,7 +863,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
 		printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
 		/* Might as well let the VFS know */
 		d_instantiate(new_dentry, old_dentry->d_inode);
-		atomic_inc(&old_dentry->d_inode->i_count);
+		ihold(old_dentry->d_inode);
 		new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
 		return ret;
 	}
diff --git a/fs/jffs2/erase.c b/fs/jffs2/erase.c
index abac961f617..e513f1913c1 100644
--- a/fs/jffs2/erase.c
+++ b/fs/jffs2/erase.c
@@ -151,7 +151,7 @@ int jffs2_erase_pending_blocks(struct jffs2_sb_info *c, int count)
 		}
 
 		/* Be nice */
-		yield();
+		cond_resched();
 		mutex_lock(&c->erase_free_sem);
 		spin_lock(&c->erase_completion_lock);
 	}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index d9beb06e6fc..e896e67767e 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -474,6 +474,25 @@ struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_i
 	return inode;
 }
 
+static int calculate_inocache_hashsize(uint32_t flash_size)
+{
+	/*
+	 * Pick a inocache hash size based on the size of the medium.
+	 * Count how many megabytes we're dealing with, apply a hashsize twice
+	 * that size, but rounding down to the usual big powers of 2. And keep
+	 * to sensible bounds.
+	 */
+
+	int size_mb = flash_size / 1024 / 1024;
+	int hashsize = (size_mb * 2) & ~0x3f;
+
+	if (hashsize < INOCACHE_HASHSIZE_MIN)
+		return INOCACHE_HASHSIZE_MIN;
+	if (hashsize > INOCACHE_HASHSIZE_MAX)
+		return INOCACHE_HASHSIZE_MAX;
+
+	return hashsize;
+}
 
 int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 {
@@ -520,7 +539,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
 	if (ret)
 		return ret;
 
-	c->inocache_list = kcalloc(INOCACHE_HASHSIZE, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
+	c->inocache_hashsize = calculate_inocache_hashsize(c->flash_size);
+	c->inocache_list = kcalloc(c->inocache_hashsize, sizeof(struct jffs2_inode_cache *), GFP_KERNEL);
 	if (!c->inocache_list) {
 		ret = -ENOMEM;
 		goto out_wbuf;
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 846a7945249..31dce611337 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -219,13 +219,14 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
 	if (!list_empty(&c->erase_complete_list) ||
 	    !list_empty(&c->erase_pending_list)) {
 		spin_unlock(&c->erase_completion_lock);
+		mutex_unlock(&c->alloc_sem);
 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
-		if (jffs2_erase_pending_blocks(c, 1)) {
-			mutex_unlock(&c->alloc_sem);
+		if (jffs2_erase_pending_blocks(c, 1))
 			return 0;
-		}
+
 		D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
 		spin_lock(&c->erase_completion_lock);
+		mutex_lock(&c->alloc_sem);
 	}
 
 	/* First, work out which block we're garbage-collecting */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 6784bc89add..f864005de64 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -100,6 +100,7 @@ struct jffs2_sb_info {
 	wait_queue_head_t erase_wait;		/* For waiting for erases to complete */
 
 	wait_queue_head_t inocache_wq;
+	int inocache_hashsize;
 	struct jffs2_inode_cache **inocache_list;
 	spinlock_t inocache_lock;
 
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index af02bd13846..5e03233c236 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -420,7 +420,7 @@ struct jffs2_inode_cache *jffs2_get_ino_cache(struct jffs2_sb_info *c, uint32_t
 {
 	struct jffs2_inode_cache *ret;
 
-	ret = c->inocache_list[ino % INOCACHE_HASHSIZE];
+	ret = c->inocache_list[ino % c->inocache_hashsize];
 	while (ret && ret->ino < ino) {
 		ret = ret->next;
 	}
@@ -441,7 +441,7 @@ void jffs2_add_ino_cache (struct jffs2_sb_info *c, struct jffs2_inode_cache *new
 
 	dbg_inocache("add %p (ino #%u)\n", new, new->ino);
 
-	prev = &c->inocache_list[new->ino % INOCACHE_HASHSIZE];
+	prev = &c->inocache_list[new->ino % c->inocache_hashsize];
 
 	while ((*prev) && (*prev)->ino < new->ino) {
 		prev = &(*prev)->next;
@@ -462,7 +462,7 @@ void jffs2_del_ino_cache(struct jffs2_sb_info *c, struct jffs2_inode_cache *old)
 	dbg_inocache("del %p (ino #%u)\n", old, old->ino);
 	spin_lock(&c->inocache_lock);
 
-	prev = &c->inocache_list[old->ino % INOCACHE_HASHSIZE];
+	prev = &c->inocache_list[old->ino % c->inocache_hashsize];
 
 	while ((*prev) && (*prev)->ino < old->ino) {
 		prev = &(*prev)->next;
@@ -487,7 +487,7 @@ void jffs2_free_ino_caches(struct jffs2_sb_info *c)
 	int i;
 	struct jffs2_inode_cache *this, *next;
 
-	for (i=0; i<INOCACHE_HASHSIZE; i++) {
+	for (i=0; i < c->inocache_hashsize; i++) {
 		this = c->inocache_list[i];
 		while (this) {
 			next = this->next;
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 523a9169105..5a53d9bdb2b 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -199,7 +199,8 @@ struct jffs2_inode_cache {
 #define RAWNODE_CLASS_XATTR_DATUM	1
 #define RAWNODE_CLASS_XATTR_REF		2
 
-#define INOCACHE_HASHSIZE 128
+#define INOCACHE_HASHSIZE_MIN 128
+#define INOCACHE_HASHSIZE_MAX 1024
 
 #define write_ofs(c) ((c)->nextblock->offset + (c)->sector_size - (c)->nextblock->free_size)
 
diff --git a/fs/jffs2/scan.c b/fs/jffs2/scan.c
index 46f870d1cc3..b632dddcb48 100644
--- a/fs/jffs2/scan.c
+++ b/fs/jffs2/scan.c
@@ -20,7 +20,7 @@
 #include "summary.h"
 #include "debug.h"
 
-#define DEFAULT_EMPTY_SCAN_SIZE 1024
+#define DEFAULT_EMPTY_SCAN_SIZE 256
 
 #define noisy_printk(noise, args...) do { \
 	if (*(noise)) { \
@@ -435,7 +435,7 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
 				  unsigned char *buf, uint32_t buf_size, struct jffs2_summary *s) {
 	struct jffs2_unknown_node *node;
 	struct jffs2_unknown_node crcnode;
-	uint32_t ofs, prevofs;
+	uint32_t ofs, prevofs, max_ofs;
 	uint32_t hdr_crc, buf_ofs, buf_len;
 	int err;
 	int noise = 0;
@@ -550,12 +550,12 @@ static int jffs2_scan_eraseblock (struct jffs2_sb_info *c, struct jffs2_eraseblo
 
 	/* We temporarily use 'ofs' as a pointer into the buffer/jeb */
 	ofs = 0;
-
-	/* Scan only 4KiB of 0xFF before declaring it's empty */
-	while(ofs < EMPTY_SCAN_SIZE(c->sector_size) && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
+	max_ofs = EMPTY_SCAN_SIZE(c->sector_size);
+	/* Scan only EMPTY_SCAN_SIZE of 0xFF before declaring it's empty */
+	while(ofs < max_ofs && *(uint32_t *)(&buf[ofs]) == 0xFFFFFFFF)
 		ofs += 4;
 
-	if (ofs == EMPTY_SCAN_SIZE(c->sector_size)) {
+	if (ofs == max_ofs) {
 #ifdef CONFIG_JFFS2_FS_WRITEBUFFER
 		if (jffs2_cleanmarker_oob(c)) {
 			/* scan oob, take care of cleanmarker */
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index d1ae5dfc22b..c86041b866a 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -179,12 +179,11 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
 	return ret;
 }
 
-static int jffs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *jffs2_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_mtd(fs_type, flags, dev_name, data, jffs2_fill_super,
-			  mnt);
+	return mount_mtd(fs_type, flags, dev_name, data, jffs2_fill_super);
 }
 
 static void jffs2_put_super (struct super_block *sb)
@@ -229,7 +228,7 @@ static void jffs2_kill_sb(struct super_block *sb)
 static struct file_system_type jffs2_fs_type = {
 	.owner =	THIS_MODULE,
 	.name =		"jffs2",
-	.get_sb =	jffs2_get_sb,
+	.mount =	jffs2_mount,
 	.kill_sb =	jffs2_kill_sb,
 };
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index f8332dc8eeb..3a09423b6c2 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -497,7 +497,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 	 * appear hashed, but do not put on any lists.  hlist_del()
 	 * will work fine and require no locking.
 	 */
-	ip->i_hash.pprev = &ip->i_hash.next;
+	hlist_add_fake(&ip->i_hash);
 
 	return (ip);
 }
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index d945ea76b44..9466957ec84 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid,		/* transaction identifier */
 	 * lazy commit thread finishes processing
 	 */
 	if (tblk->xflag & COMMIT_DELETE) {
-		atomic_inc(&tblk->u.ip->i_count);
+		ihold(tblk->u.ip);
 		/*
 		 * Avoid a rare deadlock
 		 *
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
index a9cf8e8675b..231ca4af9bc 100644
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry,
 	ip->i_ctime = CURRENT_TIME;
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	mark_inode_dirty(dir);
-	atomic_inc(&ip->i_count);
+	ihold(ip);
 
 	iplist[0] = ip;
 	iplist[1] = dir;
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 68eee2bf629..0669fc1cc3b 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -583,11 +583,10 @@ static int jfs_unfreeze(struct super_block *sb)
 	return 0;
 }
 
-static int jfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *jfs_do_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, jfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, jfs_fill_super);
 }
 
 static int jfs_sync_fs(struct super_block *sb, int wait)
@@ -770,7 +769,7 @@ static const struct export_operations jfs_export_operations = {
 static struct file_system_type jfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "jfs",
-	.get_sb		= jfs_get_sb,
+	.mount		= jfs_do_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/libfs.c b/fs/libfs.c
index 62baa0387d6..a3accdf528a 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -201,9 +201,8 @@ static const struct super_operations simple_super_operations = {
  * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
  * will never be mountable)
  */
-int get_sb_pseudo(struct file_system_type *fs_type, char *name,
-	const struct super_operations *ops, unsigned long magic,
-	struct vfsmount *mnt)
+struct dentry *mount_pseudo(struct file_system_type *fs_type, char *name,
+	const struct super_operations *ops, unsigned long magic)
 {
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 	struct dentry *dentry;
@@ -211,7 +210,7 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	struct qstr d_name = {.name = name, .len = strlen(name)};
 
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 
 	s->s_flags = MS_NOUSER;
 	s->s_maxbytes = MAX_LFS_FILESIZE;
@@ -241,12 +240,11 @@ int get_sb_pseudo(struct file_system_type *fs_type, char *name,
 	d_instantiate(dentry, root);
 	s->s_root = dentry;
 	s->s_flags |= MS_ACTIVE;
-	simple_set_mnt(mnt, s);
-	return 0;
+	return dget(s->s_root);
 
 Enomem:
 	deactivate_locked_super(s);
-	return -ENOMEM;
+	return ERR_PTR(-ENOMEM);
 }
 
 int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
@@ -255,7 +253,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
 
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	dget(dentry);
 	d_instantiate(dentry, inode);
 	return 0;
@@ -892,10 +890,6 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
  */
 int generic_file_fsync(struct file *file, int datasync)
 {
-	struct writeback_control wbc = {
-		.sync_mode = WB_SYNC_ALL,
-		.nr_to_write = 0, /* metadata-only; caller takes care of data */
-	};
 	struct inode *inode = file->f_mapping->host;
 	int err;
 	int ret;
@@ -906,7 +900,7 @@ int generic_file_fsync(struct file *file, int datasync)
 	if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
 		return ret;
 
-	err = sync_inode(inode, &wbc);
+	err = sync_inode_metadata(inode, 1);
 	if (ret == 0)
 		ret = err;
 	return ret;
@@ -955,7 +949,7 @@ EXPORT_SYMBOL(dcache_dir_lseek);
 EXPORT_SYMBOL(dcache_dir_open);
 EXPORT_SYMBOL(dcache_readdir);
 EXPORT_SYMBOL(generic_read_dir);
-EXPORT_SYMBOL(get_sb_pseudo);
+EXPORT_SYMBOL(mount_pseudo);
 EXPORT_SYMBOL(simple_write_begin);
 EXPORT_SYMBOL(simple_write_end);
 EXPORT_SYMBOL(simple_dir_inode_operations);
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index d5bb86866e6..25509eb28fd 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -14,7 +14,6 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/lockd.h>
-#include <linux/smp_lock.h>
 #include <linux/kthread.h>
 
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 47ea1e1925b..332c54cf75e 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -7,7 +7,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/types.h>
 #include <linux/errno.h>
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 25e21e4023b..ed0c59fe23c 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -124,7 +124,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 			continue;
 		if (host->h_server != ni->server)
 			continue;
-		if (ni->server &&
+		if (ni->server && ni->src_len != 0 &&
 		    !rpc_cmp_addr(nlm_srcaddr(host), ni->src_sap))
 			continue;
 
@@ -167,6 +167,7 @@ static struct nlm_host *nlm_lookup_host(struct nlm_lookup_host_info *ni)
 	host->h_addrlen = ni->salen;
 	rpc_set_port(nlm_addr(host), 0);
 	memcpy(nlm_srcaddr(host), ni->src_sap, ni->src_len);
+	host->h_srcaddrlen = ni->src_len;
 	host->h_version    = ni->version;
 	host->h_proto      = ni->protocol;
 	host->h_rpcclnt    = NULL;
@@ -238,9 +239,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 				     const char *hostname,
 				     int noresvport)
 {
-	const struct sockaddr source = {
-		.sa_family	= AF_UNSPEC,
-	};
 	struct nlm_lookup_host_info ni = {
 		.server		= 0,
 		.sap		= sap,
@@ -249,8 +247,6 @@ struct nlm_host *nlmclnt_lookup_host(const struct sockaddr *sap,
 		.version	= version,
 		.hostname	= hostname,
 		.hostname_len	= strlen(hostname),
-		.src_sap	= &source,
-		.src_len	= sizeof(source),
 		.noresvport	= noresvport,
 	};
 
@@ -357,7 +353,6 @@ nlm_bind_host(struct nlm_host *host)
 			.protocol	= host->h_proto,
 			.address	= nlm_addr(host),
 			.addrsize	= host->h_addrlen,
-			.saddress	= nlm_srcaddr(host),
 			.timeout	= &timeparms,
 			.servername	= host->h_name,
 			.program	= &nlm_program,
@@ -376,6 +371,8 @@ nlm_bind_host(struct nlm_host *host)
 			args.flags |= RPC_CLNT_CREATE_HARDRTRY;
 		if (host->h_noresvport)
 			args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+		if (host->h_srcaddrlen)
+			args.saddress = nlm_srcaddr(host);
 
 		clnt = rpc_create(&args);
 		if (!IS_ERR(clnt))
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index b13aabc1229..abfff9d7979 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -22,7 +22,6 @@
 #include <linux/in.h>
 #include <linux/uio.h>
 #include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/mutex.h>
 #include <linux/kthread.h>
 #include <linux/freezer.h>
@@ -130,15 +129,6 @@ lockd(void *vrqstp)
 
 	dprintk("NFS locking service started (ver " LOCKD_VERSION ").\n");
 
-	/*
-	 * FIXME: it would be nice if lockd didn't spend its entire life
-	 * running under the BKL. At the very least, it would be good to
-	 * have someone clarify what it's intended to protect here. I've
-	 * seen some handwavy posts about posix locking needing to be
-	 * done under the BKL, but it's far from clear.
-	 */
-	lock_kernel();
-
 	if (!nlm_timeout)
 		nlm_timeout = LOCKD_DFLT_TIMEO;
 	nlmsvc_timeout = nlm_timeout * HZ;
@@ -195,7 +185,6 @@ lockd(void *vrqstp)
 	if (nlmsvc_ops)
 		nlmsvc_invalidate_all();
 	nlm_shutdown_hosts();
-	unlock_kernel();
 	return 0;
 }
 
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index a336e832475..38d26119245 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 6f1ef000975..ef5659b211e 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -25,7 +25,6 @@
 #include <linux/errno.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/lockd/nlm.h>
@@ -700,14 +699,16 @@ nlmsvc_notify_blocked(struct file_lock *fl)
 	struct nlm_block	*block;
 
 	dprintk("lockd: VFS unblock notification for block %p\n", fl);
+	spin_lock(&nlm_blocked_lock);
 	list_for_each_entry(block, &nlm_blocked, b_list) {
 		if (nlm_compare_locks(&block->b_call->a_args.lock.fl, fl)) {
-			nlmsvc_insert_block(block, 0);
+			nlmsvc_insert_block_locked(block, 0);
+			spin_unlock(&nlm_blocked_lock);
 			svc_wake_up(block->b_daemon);
 			return;
 		}
 	}
-
+	spin_unlock(&nlm_blocked_lock);
 	printk(KERN_WARNING "lockd: notification for unknown block!\n");
 }
 
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index c3069f38d60..0caea5310ac 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -9,7 +9,6 @@
 
 #include <linux/types.h>
 #include <linux/time.h>
-#include <linux/smp_lock.h>
 #include <linux/lockd/lockd.h>
 #include <linux/lockd/share.h>
 
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index d0ef94cfb3d..1ca0679c80b 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -170,6 +170,7 @@ nlm_traverse_locks(struct nlm_host *host, struct nlm_file *file,
 
 again:
 	file->f_locks = 0;
+	lock_flocks(); /* protects i_flock list */
 	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
 		if (fl->fl_lmops != &nlmsvc_lock_operations)
 			continue;
@@ -181,6 +182,7 @@ again:
 		if (match(lockhost, host)) {
 			struct file_lock lock = *fl;
 
+			unlock_flocks();
 			lock.fl_type  = F_UNLCK;
 			lock.fl_start = 0;
 			lock.fl_end   = OFFSET_MAX;
@@ -192,6 +194,7 @@ again:
 			goto again;
 		}
 	}
+	unlock_flocks();
 
 	return 0;
 }
@@ -226,10 +229,14 @@ nlm_file_inuse(struct nlm_file *file)
 	if (file->f_count || !list_empty(&file->f_blocks) || file->f_shares)
 		return 1;
 
+	lock_flocks();
 	for (fl = inode->i_flock; fl; fl = fl->fl_next) {
-		if (fl->fl_lmops == &nlmsvc_lock_operations)
+		if (fl->fl_lmops == &nlmsvc_lock_operations) {
+			unlock_flocks();
 			return 1;
+		}
 	}
+	unlock_flocks();
 	file->f_locks = 0;
 	return 0;
 }
diff --git a/fs/locks.c b/fs/locks.c
index 8b2b6ad56a0..8729347bcd1 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -122,7 +122,6 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/syscalls.h>
 #include <linux/time.h>
 #include <linux/rcupdate.h>
@@ -142,6 +141,7 @@ int lease_break_time = 45;
 
 static LIST_HEAD(file_lock_list);
 static LIST_HEAD(blocked_list);
+static DEFINE_SPINLOCK(file_lock_lock);
 
 /*
  * Protects the two list heads above, plus the inode->i_flock list
@@ -149,23 +149,24 @@ static LIST_HEAD(blocked_list);
  */
 void lock_flocks(void)
 {
-	lock_kernel();
+	spin_lock(&file_lock_lock);
 }
 EXPORT_SYMBOL_GPL(lock_flocks);
 
 void unlock_flocks(void)
 {
-	unlock_kernel();
+	spin_unlock(&file_lock_lock);
 }
 EXPORT_SYMBOL_GPL(unlock_flocks);
 
 static struct kmem_cache *filelock_cache __read_mostly;
 
 /* Allocate an empty lock structure. */
-static struct file_lock *locks_alloc_lock(void)
+struct file_lock *locks_alloc_lock(void)
 {
 	return kmem_cache_alloc(filelock_cache, GFP_KERNEL);
 }
+EXPORT_SYMBOL_GPL(locks_alloc_lock);
 
 void locks_release_private(struct file_lock *fl)
 {
@@ -184,7 +185,7 @@ void locks_release_private(struct file_lock *fl)
 EXPORT_SYMBOL_GPL(locks_release_private);
 
 /* Free a lock which is not in use. */
-static void locks_free_lock(struct file_lock *fl)
+void locks_free_lock(struct file_lock *fl)
 {
 	BUG_ON(waitqueue_active(&fl->fl_wait));
 	BUG_ON(!list_empty(&fl->fl_block));
@@ -193,6 +194,7 @@ static void locks_free_lock(struct file_lock *fl)
 	locks_release_private(fl);
 	kmem_cache_free(filelock_cache, fl);
 }
+EXPORT_SYMBOL(locks_free_lock);
 
 void locks_init_lock(struct file_lock *fl)
 {
@@ -232,11 +234,8 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 			fl->fl_ops->fl_copy_lock(new, fl);
 		new->fl_ops = fl->fl_ops;
 	}
-	if (fl->fl_lmops) {
-		if (fl->fl_lmops->fl_copy_lock)
-			fl->fl_lmops->fl_copy_lock(new, fl);
+	if (fl->fl_lmops)
 		new->fl_lmops = fl->fl_lmops;
-	}
 }
 
 /*
@@ -1365,31 +1364,27 @@ int fcntl_getlease(struct file *filp)
 int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 {
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
-	struct file_lock *new_fl = NULL;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	int error, rdlease_count = 0, wrlease_count = 0;
 
+	lease = *flp;
+
+	error = -EACCES;
 	if ((current_fsuid() != inode->i_uid) && !capable(CAP_LEASE))
-		return -EACCES;
+		goto out;
+	error = -EINVAL;
 	if (!S_ISREG(inode->i_mode))
-		return -EINVAL;
+		goto out;
 	error = security_file_lock(filp, arg);
 	if (error)
-		return error;
+		goto out;
 
 	time_out_leases(inode);
 
 	BUG_ON(!(*flp)->fl_lmops->fl_break);
 
-	lease = *flp;
-
 	if (arg != F_UNLCK) {
-		error = -ENOMEM;
-		new_fl = locks_alloc_lock();
-		if (new_fl == NULL)
-			goto out;
-
 		error = -EAGAIN;
 		if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0))
 			goto out;
@@ -1429,12 +1424,12 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 		goto out;
 
 	if (my_before != NULL) {
-		*flp = *my_before;
 		error = lease->fl_lmops->fl_change(my_before, arg);
+		if (!error)
+			*flp = *my_before;
 		goto out;
 	}
 
-	error = 0;
 	if (arg == F_UNLCK)
 		goto out;
 
@@ -1442,15 +1437,10 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 	if (!leases_enable)
 		goto out;
 
-	locks_copy_lock(new_fl, lease);
-	locks_insert_lock(before, new_fl);
-
-	*flp = new_fl;
+	locks_insert_lock(before, lease);
 	return 0;
 
 out:
-	if (new_fl != NULL)
-		locks_free_lock(new_fl);
 	return error;
 }
 EXPORT_SYMBOL(generic_setlease);
@@ -1502,6 +1492,59 @@ int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
 }
 EXPORT_SYMBOL_GPL(vfs_setlease);
 
+static int do_fcntl_delete_lease(struct file *filp)
+{
+	struct file_lock fl, *flp = &fl;
+
+	lease_init(filp, F_UNLCK, flp);
+
+	return vfs_setlease(filp, F_UNLCK, &flp);
+}
+
+static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
+{
+	struct file_lock *fl, *ret;
+	struct fasync_struct *new;
+	int error;
+
+	fl = lease_alloc(filp, arg);
+	if (IS_ERR(fl))
+		return PTR_ERR(fl);
+
+	new = fasync_alloc();
+	if (!new) {
+		locks_free_lock(fl);
+		return -ENOMEM;
+	}
+	ret = fl;
+	lock_flocks();
+	error = __vfs_setlease(filp, arg, &ret);
+	if (error) {
+		unlock_flocks();
+		locks_free_lock(fl);
+		goto out_free_fasync;
+	}
+	if (ret != fl)
+		locks_free_lock(fl);
+
+	/*
+	 * fasync_insert_entry() returns the old entry if any.
+	 * If there was no old entry, then it used 'new' and
+	 * inserted it into the fasync list. Clear new so that
+	 * we don't release it here.
+	 */
+	if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
+		new = NULL;
+
+	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+	unlock_flocks();
+
+out_free_fasync:
+	if (new)
+		fasync_free(new);
+	return error;
+}
+
 /**
  *	fcntl_setlease	-	sets a lease on an open file
  *	@fd: open file descriptor
@@ -1514,34 +1557,9 @@ EXPORT_SYMBOL_GPL(vfs_setlease);
  */
 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
-	struct file_lock fl, *flp = &fl;
-	struct inode *inode = filp->f_path.dentry->d_inode;
-	int error;
-
-	locks_init_lock(&fl);
-	error = lease_init(filp, arg, &fl);
-	if (error)
-		return error;
-
-	lock_flocks();
-
-	error = __vfs_setlease(filp, arg, &flp);
-	if (error || arg == F_UNLCK)
-		goto out_unlock;
-
-	error = fasync_helper(fd, filp, 1, &flp->fl_fasync);
-	if (error < 0) {
-		/* remove lease just inserted by setlease */
-		flp->fl_type = F_UNLCK | F_INPROGRESS;
-		flp->fl_break_time = jiffies - 10;
-		time_out_leases(inode);
-		goto out_unlock;
-	}
-
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-out_unlock:
-	unlock_flocks();
-	return error;
+	if (arg == F_UNLCK)
+		return do_fcntl_delete_lease(filp);
+	return do_fcntl_add_lease(fd, filp, arg);
 }
 
 /**
@@ -2109,7 +2127,7 @@ EXPORT_SYMBOL_GPL(vfs_cancel_lock);
 #include <linux/seq_file.h>
 
 static void lock_get_status(struct seq_file *f, struct file_lock *fl,
-							int id, char *pfx)
+			    loff_t id, char *pfx)
 {
 	struct inode *inode = NULL;
 	unsigned int fl_pid;
@@ -2122,7 +2140,7 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl,
 	if (fl->fl_file != NULL)
 		inode = fl->fl_file->f_path.dentry->d_inode;
 
-	seq_printf(f, "%d:%s ", id, pfx);
+	seq_printf(f, "%lld:%s ", id, pfx);
 	if (IS_POSIX(fl)) {
 		seq_printf(f, "%6s %s ",
 			     (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ",
@@ -2185,24 +2203,27 @@ static int locks_show(struct seq_file *f, void *v)
 
 	fl = list_entry(v, struct file_lock, fl_link);
 
-	lock_get_status(f, fl, (long)f->private, "");
+	lock_get_status(f, fl, *((loff_t *)f->private), "");
 
 	list_for_each_entry(bfl, &fl->fl_block, fl_block)
-		lock_get_status(f, bfl, (long)f->private, " ->");
+		lock_get_status(f, bfl, *((loff_t *)f->private), " ->");
 
-	f->private++;
 	return 0;
 }
 
 static void *locks_start(struct seq_file *f, loff_t *pos)
 {
+	loff_t *p = f->private;
+
 	lock_flocks();
-	f->private = (void *)1;
+	*p = (*pos + 1);
 	return seq_list_start(&file_lock_list, *pos);
 }
 
 static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
 {
+	loff_t *p = f->private;
+	++*p;
 	return seq_list_next(v, &file_lock_list, pos);
 }
 
@@ -2220,14 +2241,14 @@ static const struct seq_operations locks_seq_operations = {
 
 static int locks_open(struct inode *inode, struct file *filp)
 {
-	return seq_open(filp, &locks_seq_operations);
+	return seq_open_private(filp, &locks_seq_operations, sizeof(loff_t));
 }
 
 static const struct file_operations proc_locks_operations = {
 	.open		= locks_open,
 	.read		= seq_read,
 	.llseek		= seq_lseek,
-	.release	= seq_release,
+	.release	= seq_release_private,
 };
 
 static int __init proc_locks_init(void)
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index 9bd2ce2a304..92ca6fbe09b 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -298,9 +298,9 @@ static int bdev_write_sb(struct super_block *sb, struct page *page)
 	return sync_request(page, bdev, WRITE);
 }
 
-static void bdev_put_device(struct super_block *sb)
+static void bdev_put_device(struct logfs_super *s)
 {
-	close_bdev_exclusive(logfs_super(sb)->s_bdev, FMODE_READ|FMODE_WRITE);
+	close_bdev_exclusive(s->s_bdev, FMODE_READ|FMODE_WRITE);
 }
 
 static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
@@ -320,8 +320,8 @@ static const struct logfs_device_ops bd_devops = {
 	.put_device	= bdev_put_device,
 };
 
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
-		const char *devname, struct vfsmount *mnt)
+int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
+		const char *devname)
 {
 	struct block_device *bdev;
 
@@ -332,8 +332,11 @@ int logfs_get_sb_bdev(struct file_system_type *type, int flags,
 	if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
 		int mtdnr = MINOR(bdev->bd_dev);
 		close_bdev_exclusive(bdev, FMODE_READ|FMODE_WRITE);
-		return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+		return logfs_get_sb_mtd(p, mtdnr);
 	}
 
-	return logfs_get_sb_device(type, flags, NULL, bdev, &bd_devops, mnt);
+	p->s_bdev = bdev;
+	p->s_mtd = NULL;
+	p->s_devops = &bd_devops;
+	return 0;
 }
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index a85d47d13e4..7466e9dcc8c 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -230,9 +230,9 @@ static void mtd_writeseg(struct super_block *sb, u64 ofs, size_t len)
 	__mtd_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
 }
 
-static void mtd_put_device(struct super_block *sb)
+static void mtd_put_device(struct logfs_super *s)
 {
-	put_mtd_device(logfs_super(sb)->s_mtd);
+	put_mtd_device(s->s_mtd);
 }
 
 static int mtd_can_write_buf(struct super_block *sb, u64 ofs)
@@ -265,14 +265,14 @@ static const struct logfs_device_ops mtd_devops = {
 	.put_device	= mtd_put_device,
 };
 
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
-		int mtdnr, struct vfsmount *mnt)
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
 {
-	struct mtd_info *mtd;
-	const struct logfs_device_ops *devops = &mtd_devops;
-
-	mtd = get_mtd_device(NULL, mtdnr);
+	struct mtd_info *mtd = get_mtd_device(NULL, mtdnr);
 	if (IS_ERR(mtd))
 		return PTR_ERR(mtd);
-	return logfs_get_sb_device(type, flags, mtd, NULL, devops, mnt);
+
+	s->s_bdev = NULL;
+	s->s_mtd = mtd;
+	s->s_devops = &mtd_devops;
+	return 0;
 }
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 1eb4e89e045..409dfd65e9a 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
 		return -EMLINK;
 
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	inode->i_nlink++;
 	mark_inode_dirty_sync(inode);
 
diff --git a/fs/logfs/journal.c b/fs/logfs/journal.c
index f46ee8b0e13..9da29706f91 100644
--- a/fs/logfs/journal.c
+++ b/fs/logfs/journal.c
@@ -828,7 +828,7 @@ void do_logfs_journal_wl_pass(struct super_block *sb)
 		super->s_journal_seg[i] = segno;
 		super->s_journal_ec[i] = ec;
 		logfs_set_segment_reserved(sb, segno);
-		err = btree_insert32(head, segno, (void *)1, GFP_KERNEL);
+		err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
 		BUG_ON(err); /* mempool should prevent this */
 		err = logfs_erase_segment(sb, segno, 1);
 		BUG_ON(err); /* FIXME: remount-ro would be nicer */
diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h
index b8786264d24..57afd4a6fab 100644
--- a/fs/logfs/logfs.h
+++ b/fs/logfs/logfs.h
@@ -136,6 +136,7 @@ struct logfs_area_ops {
 	int	(*erase_segment)(struct logfs_area *area);
 };
 
+struct logfs_super;	/* forward */
 /**
  * struct logfs_device_ops - device access operations
  *
@@ -156,7 +157,7 @@ struct logfs_device_ops {
 			int ensure_write);
 	int (*can_write_buf)(struct super_block *sb, u64 ofs);
 	void (*sync)(struct super_block *sb);
-	void (*put_device)(struct super_block *sb);
+	void (*put_device)(struct logfs_super *s);
 };
 
 /**
@@ -471,11 +472,13 @@ void logfs_compr_exit(void);
 
 /* dev_bdev.c */
 #ifdef CONFIG_BLOCK
-int logfs_get_sb_bdev(struct file_system_type *type, int flags,
-		const char *devname, struct vfsmount *mnt);
+int logfs_get_sb_bdev(struct logfs_super *s,
+		struct file_system_type *type,
+		const char *devname);
 #else
-static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
-		const char *devname, struct vfsmount *mnt)
+static inline int logfs_get_sb_bdev(struct logfs_super *s,
+		struct file_system_type *type,
+		const char *devname)
 {
 	return -ENODEV;
 }
@@ -483,11 +486,9 @@ static inline int logfs_get_sb_bdev(struct file_system_type *type, int flags,
 
 /* dev_mtd.c */
 #ifdef CONFIG_MTD
-int logfs_get_sb_mtd(struct file_system_type *type, int flags,
-		int mtdnr, struct vfsmount *mnt);
+int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr);
 #else
-static inline int logfs_get_sb_mtd(struct file_system_type *type, int flags,
-		int mtdnr, struct vfsmount *mnt)
+static inline int logfs_get_sb_mtd(struct logfs_super *s, int mtdnr)
 {
 	return -ENODEV;
 }
@@ -619,9 +620,6 @@ void emergency_read_end(struct page *page);
 void logfs_crash_dump(struct super_block *sb);
 void *memchr_inv(const void *s, int c, size_t n);
 int logfs_statfs(struct dentry *dentry, struct kstatfs *stats);
-int logfs_get_sb_device(struct file_system_type *type, int flags,
-		struct mtd_info *mtd, struct block_device *bdev,
-		const struct logfs_device_ops *devops, struct vfsmount *mnt);
 int logfs_check_ds(struct logfs_disk_super *ds);
 int logfs_write_sb(struct super_block *sb);
 
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 6127baf0e18..ee99a9f5dfd 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -1994,6 +1994,9 @@ static int do_write_inode(struct inode *inode)
 
 	/* FIXME: transaction is part of logfs_block now.  Is that enough? */
 	err = logfs_write_buf(master_inode, page, 0);
+	if (err)
+		move_page_to_inode(inode, page);
+
 	logfs_put_write_page(page);
 	return err;
 }
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 5336155c5d8..33435e4b14d 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -325,7 +325,7 @@ static int logfs_make_writeable(struct super_block *sb)
 	return 0;
 }
 
-static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
+static int logfs_get_sb_final(struct super_block *sb)
 {
 	struct logfs_super *super = logfs_super(sb);
 	struct inode *rootdir;
@@ -356,7 +356,6 @@ static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
 	}
 
 	log_super("LogFS: Finished mounting\n");
-	simple_set_mnt(mnt, sb);
 	return 0;
 
 fail:
@@ -529,43 +528,37 @@ static void logfs_kill_sb(struct super_block *sb)
 	logfs_cleanup_rw(sb);
 	if (super->s_erase_page)
 		__free_page(super->s_erase_page);
-	super->s_devops->put_device(sb);
+	super->s_devops->put_device(super);
 	logfs_mempool_destroy(super->s_btree_pool);
 	logfs_mempool_destroy(super->s_alias_pool);
 	kfree(super);
 	log_super("LogFS: Finished unmounting\n");
 }
 
-int logfs_get_sb_device(struct file_system_type *type, int flags,
-		struct mtd_info *mtd, struct block_device *bdev,
-		const struct logfs_device_ops *devops, struct vfsmount *mnt)
+static struct dentry *logfs_get_sb_device(struct logfs_super *super,
+		struct file_system_type *type, int flags)
 {
-	struct logfs_super *super;
 	struct super_block *sb;
 	int err = -ENOMEM;
 	static int mount_count;
 
 	log_super("LogFS: Start mount %x\n", mount_count++);
-	super = kzalloc(sizeof(*super), GFP_KERNEL);
-	if (!super)
-		goto err0;
 
-	super->s_mtd	= mtd;
-	super->s_bdev	= bdev;
 	err = -EINVAL;
 	sb = sget(type, logfs_sb_test, logfs_sb_set, super);
-	if (IS_ERR(sb))
-		goto err0;
+	if (IS_ERR(sb)) {
+		super->s_devops->put_device(super);
+		kfree(super);
+		return ERR_CAST(sb);
+	}
 
 	if (sb->s_root) {
 		/* Device is already in use */
-		err = 0;
-		simple_set_mnt(mnt, sb);
-		goto err0;
+		super->s_devops->put_device(super);
+		kfree(super);
+		return dget(sb->s_root);
 	}
 
-	super->s_devops = devops;
-
 	/*
 	 * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
 	 * only covers 16TB and the upper 8TB are used for indirect blocks.
@@ -581,10 +574,12 @@ int logfs_get_sb_device(struct file_system_type *type, int flags,
 		goto err1;
 
 	sb->s_flags |= MS_ACTIVE;
-	err = logfs_get_sb_final(sb, mnt);
-	if (err)
+	err = logfs_get_sb_final(sb);
+	if (err) {
 		deactivate_locked_super(sb);
-	return err;
+		return ERR_PTR(err);
+	}
+	return dget(sb->s_root);
 
 err1:
 	/* no ->s_root, no ->put_super() */
@@ -592,37 +587,45 @@ err1:
 	iput(super->s_segfile_inode);
 	iput(super->s_mapping_inode);
 	deactivate_locked_super(sb);
-	return err;
-err0:
-	kfree(super);
-	//devops->put_device(sb);
-	return err;
+	return ERR_PTR(err);
 }
 
-static int logfs_get_sb(struct file_system_type *type, int flags,
-		const char *devname, void *data, struct vfsmount *mnt)
+static struct dentry *logfs_mount(struct file_system_type *type, int flags,
+		const char *devname, void *data)
 {
 	ulong mtdnr;
+	struct logfs_super *super;
+	int err;
 
-	if (!devname)
-		return logfs_get_sb_bdev(type, flags, devname, mnt);
-	if (strncmp(devname, "mtd", 3))
-		return logfs_get_sb_bdev(type, flags, devname, mnt);
+	super = kzalloc(sizeof(*super), GFP_KERNEL);
+	if (!super)
+		return ERR_PTR(-ENOMEM);
 
-	{
+	if (!devname)
+		err = logfs_get_sb_bdev(super, type, devname);
+	else if (strncmp(devname, "mtd", 3))
+		err = logfs_get_sb_bdev(super, type, devname);
+	else {
 		char *garbage;
 		mtdnr = simple_strtoul(devname+3, &garbage, 0);
 		if (*garbage)
-			return -EINVAL;
+			err = -EINVAL;
+		else
+			err = logfs_get_sb_mtd(super, mtdnr);
+	}
+
+	if (err) {
+		kfree(super);
+		return ERR_PTR(err);
 	}
 
-	return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
+	return logfs_get_sb_device(super, type, flags);
 }
 
 static struct file_system_type logfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "logfs",
-	.get_sb		= logfs_get_sb,
+	.mount		= logfs_mount,
 	.kill_sb	= logfs_kill_sb,
 	.fs_flags	= FS_REQUIRES_DEV,
 
diff --git a/fs/minix/inode.c b/fs/minix/inode.c
index e39d6bf2e8f..fb2020858a3 100644
--- a/fs/minix/inode.c
+++ b/fs/minix/inode.c
@@ -614,17 +614,16 @@ void minix_truncate(struct inode * inode)
 		V2_minix_truncate(inode);
 }
 
-static int minix_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *minix_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, minix_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, minix_fill_super);
 }
 
 static struct file_system_type minix_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "minix",
-	.get_sb		= minix_get_sb,
+	.mount		= minix_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index f3f3578393a..c0d35a3acce 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	return add_nondir(dentry, inode);
 }
 
diff --git a/fs/namei.c b/fs/namei.c
index 24896e83356..4ff7ca53053 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1121,11 +1121,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
 static struct dentry *__lookup_hash(struct qstr *name,
 		struct dentry *base, struct nameidata *nd)
 {
+	struct inode *inode = base->d_inode;
 	struct dentry *dentry;
-	struct inode *inode;
 	int err;
 
-	inode = base->d_inode;
+	err = exec_permission(inode);
+	if (err)
+		return ERR_PTR(err);
 
 	/*
 	 * See if the low-level filesystem might want
@@ -1161,11 +1163,6 @@ out:
  */
 static struct dentry *lookup_hash(struct nameidata *nd)
 {
-	int err;
-
-	err = exec_permission(nd->path.dentry->d_inode);
-	if (err)
-		return ERR_PTR(err);
 	return __lookup_hash(&nd->last, nd->path.dentry, nd);
 }
 
@@ -1213,9 +1210,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
 	if (err)
 		return ERR_PTR(err);
 
-	err = exec_permission(base->d_inode);
-	if (err)
-		return ERR_PTR(err);
 	return __lookup_hash(&this, base, NULL);
 }
 
@@ -1580,6 +1574,7 @@ static struct file *finish_open(struct nameidata *nd,
 	 */
 	if (will_truncate)
 		mnt_drop_write(nd->path.mnt);
+	path_put(&nd->path);
 	return filp;
 
 exit:
@@ -1681,6 +1676,7 @@ static struct file *do_last(struct nameidata *nd, struct path *path,
 		}
 		filp = nameidata_to_filp(nd);
 		mnt_drop_write(nd->path.mnt);
+		path_put(&nd->path);
 		if (!IS_ERR(filp)) {
 			error = ima_file_check(filp, acc_mode);
 			if (error) {
@@ -1752,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (!(open_flag & O_CREAT))
 		mode = 0;
 
+	/* Must never be set by userspace */
+	open_flag &= ~FMODE_NONOTIFY;
+
 	/*
 	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
 	 * check for O_DSYNC if the need any syncing at all we enforce it's
@@ -2291,7 +2290,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
 			goto slashes;
 		inode = dentry->d_inode;
 		if (inode)
-			atomic_inc(&inode->i_count);
+			ihold(inode);
 		error = mnt_want_write(nd.path.mnt);
 		if (error)
 			goto exit2;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7ca5182c0be..3dbfc072ec7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -13,7 +13,6 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/percpu.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/acct.h>
@@ -595,7 +594,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
 				goto out_free;
 		}
 
-		mnt->mnt_flags = old->mnt_flags;
+		mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
 		atomic_inc(&sb->s_active);
 		mnt->mnt_sb = sb;
 		mnt->mnt_root = dget(root);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index aac8832e919..f22b12e7d33 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -19,7 +19,6 @@
 #include <linux/mm.h>
 #include <asm/uaccess.h>
 #include <asm/byteorder.h>
-#include <linux/smp_lock.h>
 
 #include <linux/ncp_fs.h>
 
diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c
index 6c754f70c52..cb50aaf981d 100644
--- a/fs/ncpfs/file.c
+++ b/fs/ncpfs/file.c
@@ -17,7 +17,6 @@
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
-#include <linux/smp_lock.h>
 
 #include <linux/ncp_fs.h>
 #include "ncplib_kernel.h"
diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c
index 985fabb26ac..8fb93b604e7 100644
--- a/fs/ncpfs/inode.c
+++ b/fs/ncpfs/inode.c
@@ -26,7 +26,6 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/init.h>
-#include <linux/smp_lock.h>
 #include <linux/vfs.h>
 #include <linux/mount.h>
 #include <linux/seq_file.h>
@@ -1020,16 +1019,16 @@ out:
 	return result;
 }
 
-static int ncp_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ncp_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, ncp_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, ncp_fill_super);
 }
 
 static struct file_system_type ncp_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ncpfs",
-	.get_sb		= ncp_get_sb,
+	.mount		= ncp_mount,
 	.kill_sb	= kill_anon_super,
 	.fs_flags	= FS_BINARY_MOUNTDATA,
 };
diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c
index c2a1f9a155c..d40a547e337 100644
--- a/fs/ncpfs/ioctl.c
+++ b/fs/ncpfs/ioctl.c
@@ -17,7 +17,6 @@
 #include <linux/mount.h>
 #include <linux/slab.h>
 #include <linux/highuid.h>
-#include <linux/smp_lock.h>
 #include <linux/vmalloc.h>
 #include <linux/sched.h>
 
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index fd667652c50..ba306658a6d 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -1,7 +1,6 @@
 config NFS_FS
 	tristate "NFS client support"
 	depends on INET && FILE_LOCKING
-	depends on BKL # fix as soon as lockd is done
 	select LOCKD
 	select SUNRPC
 	select NFS_ACL_SUPPORT if NFS_V3_ACL
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index aeec017fe81..93a8b3bd69e 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -9,7 +9,6 @@
 #include <linux/completion.h>
 #include <linux/ip.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 232a7eead33..1fd62fc49be 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -11,7 +11,6 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
-#include <linux/smp_lock.h>
 #include <linux/spinlock.h>
 
 #include <linux/nfs4.h>
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 257e4052492..996dd8989a9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -34,6 +34,7 @@
 #include <linux/mount.h>
 #include <linux/sched.h>
 #include <linux/vmalloc.h>
+#include <linux/kmemleak.h>
 
 #include "delegation.h"
 #include "iostat.h"
@@ -56,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *,
 		      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, int);
 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
-static int nfs_readdir_clear_array(struct page*, gfp_t);
+static void nfs_readdir_clear_array(struct page*);
 
 const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
@@ -82,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = {
 	.setattr	= nfs_setattr,
 };
 
-const struct address_space_operations nfs_dir_addr_space_ops = {
-	.releasepage = nfs_readdir_clear_array,
+const struct address_space_operations nfs_dir_aops = {
+	.freepage = nfs_readdir_clear_array,
 };
 
 #ifdef CONFIG_NFS_V3
@@ -161,6 +162,7 @@ struct nfs_cache_array_entry {
 	u64 cookie;
 	u64 ino;
 	struct qstr string;
+	unsigned char d_type;
 };
 
 struct nfs_cache_array {
@@ -170,14 +172,13 @@ struct nfs_cache_array {
 	struct nfs_cache_array_entry array[0];
 };
 
-#define MAX_READDIR_ARRAY ((PAGE_SIZE - sizeof(struct nfs_cache_array)) / sizeof(struct nfs_cache_array_entry))
-
 typedef __be32 * (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, struct nfs_server *, int);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
 	unsigned long	page_index;
 	u64		*dir_cookie;
+	u64		last_cookie;
 	loff_t		current_index;
 	decode_dirent_t	decode;
 
@@ -194,9 +195,13 @@ typedef struct {
 static
 struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
 {
+	void *ptr;
 	if (page == NULL)
 		return ERR_PTR(-EIO);
-	return (struct nfs_cache_array *)kmap(page);
+	ptr = kmap(page);
+	if (ptr == NULL)
+		return ERR_PTR(-ENOMEM);
+	return ptr;
 }
 
 static
@@ -209,14 +214,15 @@ void nfs_readdir_release_array(struct page *page)
  * we are freeing strings created by nfs_add_to_readdir_array()
  */
 static
-int nfs_readdir_clear_array(struct page *page, gfp_t mask)
+void nfs_readdir_clear_array(struct page *page)
 {
-	struct nfs_cache_array *array = nfs_readdir_get_array(page);
+	struct nfs_cache_array *array;
 	int i;
+
+	array = kmap_atomic(page, KM_USER0);
 	for (i = 0; i < array->size; i++)
 		kfree(array->array[i].string.name);
-	nfs_readdir_release_array(page);
-	return 0;
+	kunmap_atomic(array, KM_USER0);
 }
 
 /*
@@ -231,6 +237,11 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
 	string->name = kmemdup(name, len, GFP_KERNEL);
 	if (string->name == NULL)
 		return -ENOMEM;
+	/*
+	 * Avoid a kmemleak false positive. The pointer to the name is stored
+	 * in a page cache page which kmemleak does not scan.
+	 */
+	kmemleak_not_leak(string->name);
 	string->hash = full_name_hash(name, len);
 	return 0;
 }
@@ -244,20 +255,24 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 
 	if (IS_ERR(array))
 		return PTR_ERR(array);
-	ret = -EIO;
-	if (array->size >= MAX_READDIR_ARRAY)
-		goto out;
 
 	cache_entry = &array->array[array->size];
+
+	/* Check that this entry lies within the page bounds */
+	ret = -ENOSPC;
+	if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
+		goto out;
+
 	cache_entry->cookie = entry->prev_cookie;
 	cache_entry->ino = entry->ino;
+	cache_entry->d_type = entry->d_type;
 	ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
 	if (ret)
 		goto out;
 	array->last_cookie = entry->cookie;
-	if (entry->eof == 1)
-		array->eof_index = array->size;
 	array->size++;
+	if (entry->eof != 0)
+		array->eof_index = array->size;
 out:
 	nfs_readdir_release_array(page);
 	return ret;
@@ -272,7 +287,7 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
 	if (diff < 0)
 		goto out_eof;
 	if (diff >= array->size) {
-		if (array->eof_index > 0)
+		if (array->eof_index >= 0)
 			goto out_eof;
 		desc->current_index += array->size;
 		return -EAGAIN;
@@ -281,8 +296,6 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
 	index = (unsigned int)diff;
 	*desc->dir_cookie = array->array[index].cookie;
 	desc->cache_entry_index = index;
-	if (index == array->eof_index)
-		desc->eof = 1;
 	return 0;
 out_eof:
 	desc->eof = 1;
@@ -296,17 +309,16 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 	int status = -EAGAIN;
 
 	for (i = 0; i < array->size; i++) {
-		if (i == array->eof_index) {
-			desc->eof = 1;
-			status = -EBADCOOKIE;
-		}
 		if (array->array[i].cookie == *desc->dir_cookie) {
 			desc->cache_entry_index = i;
-			status = 0;
-			break;
+			return 0;
 		}
 	}
-
+	if (array->eof_index >= 0) {
+		status = -EBADCOOKIE;
+		if (*desc->dir_cookie == array->last_cookie)
+			desc->eof = 1;
+	}
 	return status;
 }
 
@@ -314,10 +326,7 @@ static
 int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 {
 	struct nfs_cache_array *array;
-	int status = -EBADCOOKIE;
-
-	if (desc->dir_cookie == NULL)
-		goto out;
+	int status;
 
 	array = nfs_readdir_get_array(desc->page);
 	if (IS_ERR(array)) {
@@ -330,6 +339,10 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 	else
 		status = nfs_readdir_search_for_cookie(array, desc);
 
+	if (status == -EAGAIN) {
+		desc->last_cookie = array->last_cookie;
+		desc->page_index++;
+	}
 	nfs_readdir_release_array(desc->page);
 out:
 	return status;
@@ -381,13 +394,9 @@ int xdr_decode(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry, struct x
 static
 int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
 {
-	struct nfs_inode *node;
 	if (dentry->d_inode == NULL)
 		goto different;
-	node = NFS_I(dentry->d_inode);
-	if (node->fh.size != entry->fh->size)
-		goto different;
-	if (strncmp(node->fh.data, entry->fh->data, node->fh.size) != 0)
+	if (nfs_compare_fh(entry->fh, NFS_FH(dentry->d_inode)) != 0)
 		goto different;
 	return 1;
 different:
@@ -449,14 +458,15 @@ out:
 
 /* Perform conversion from xdr to cache array */
 static
-void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
+int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
 				void *xdr_page, struct page *page, unsigned int buflen)
 {
 	struct xdr_stream stream;
 	struct xdr_buf buf;
 	__be32 *ptr = xdr_page;
-	int status;
 	struct nfs_cache_array *array;
+	unsigned int count = 0;
+	int status;
 
 	buf.head->iov_base = xdr_page;
 	buf.head->iov_len = buflen;
@@ -471,21 +481,32 @@ void nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *e
 
 	do {
 		status = xdr_decode(desc, entry, &stream);
-		if (status != 0)
+		if (status != 0) {
+			if (status == -EAGAIN)
+				status = 0;
 			break;
+		}
 
-		if (nfs_readdir_add_to_array(entry, page) == -1)
-			break;
-		if (desc->plus == 1)
+		count++;
+
+		if (desc->plus != 0)
 			nfs_prime_dcache(desc->file->f_path.dentry, entry);
+
+		status = nfs_readdir_add_to_array(entry, page);
+		if (status != 0)
+			break;
 	} while (!entry->eof);
 
-	if (status == -EBADCOOKIE && entry->eof) {
+	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
 		array = nfs_readdir_get_array(page);
-		array->eof_index = array->size - 1;
-		status = 0;
-		nfs_readdir_release_array(page);
+		if (!IS_ERR(array)) {
+			array->eof_index = array->size;
+			status = 0;
+			nfs_readdir_release_array(page);
+		} else
+			status = PTR_ERR(array);
 	}
+	return status;
 }
 
 static
@@ -537,11 +558,11 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	struct nfs_entry entry;
 	struct file	*file = desc->file;
 	struct nfs_cache_array *array;
-	int status = 0;
+	int status = -ENOMEM;
 	unsigned int array_size = ARRAY_SIZE(pages);
 
 	entry.prev_cookie = 0;
-	entry.cookie = *desc->dir_cookie;
+	entry.cookie = desc->last_cookie;
 	entry.eof = 0;
 	entry.fh = nfs_alloc_fhandle();
 	entry.fattr = nfs_alloc_fattr();
@@ -549,6 +570,10 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 		goto out;
 
 	array = nfs_readdir_get_array(page);
+	if (IS_ERR(array)) {
+		status = PTR_ERR(array);
+		goto out;
+	}
 	memset(array, 0, sizeof(struct nfs_cache_array));
 	array->eof_index = -1;
 
@@ -556,12 +581,19 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	if (!pages_ptr)
 		goto out_release_array;
 	do {
+		unsigned int pglen;
 		status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
 
 		if (status < 0)
 			break;
-		nfs_readdir_page_filler(desc, &entry, pages_ptr, page, array_size * PAGE_SIZE);
-	} while (array->eof_index < 0 && array->size < MAX_READDIR_ARRAY);
+		pglen = status;
+		status = nfs_readdir_page_filler(desc, &entry, pages_ptr, page, pglen);
+		if (status < 0) {
+			if (status == -ENOSPC)
+				status = 0;
+			break;
+		}
+	} while (array->eof_index < 0);
 
 	nfs_readdir_free_large_page(pages_ptr, pages, array_size);
 out_release_array:
@@ -582,8 +614,10 @@ static
 int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 {
 	struct inode	*inode = desc->file->f_path.dentry->d_inode;
+	int ret;
 
-	if (nfs_readdir_xdr_to_array(desc, page, inode) < 0)
+	ret = nfs_readdir_xdr_to_array(desc, page, inode);
+	if (ret < 0)
 		goto error;
 	SetPageUptodate(page);
 
@@ -595,12 +629,14 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 	return 0;
  error:
 	unlock_page(page);
-	return -EIO;
+	return ret;
 }
 
 static
 void cache_page_release(nfs_readdir_descriptor_t *desc)
 {
+	if (!desc->page->mapping)
+		nfs_readdir_clear_array(desc->page);
 	page_cache_release(desc->page);
 	desc->page = NULL;
 }
@@ -608,12 +644,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
 static
 struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
 {
-	struct page *page;
-	page = read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
+	return read_cache_page(desc->file->f_path.dentry->d_inode->i_mapping,
 			desc->page_index, (filler_t *)nfs_readdir_filler, desc);
-	if (IS_ERR(page))
-		desc->eof = 1;
-	return page;
 }
 
 /*
@@ -629,9 +661,8 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
 		return PTR_ERR(desc->page);
 
 	res = nfs_readdir_search_array(desc);
-	if (res == 0)
-		return 0;
-	cache_page_release(desc);
+	if (res != 0)
+		cache_page_release(desc);
 	return res;
 }
 
@@ -639,22 +670,18 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
 static inline
 int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 {
-	int res = -EAGAIN;
+	int res;
 
-	while (1) {
-		res = find_cache_page(desc);
-		if (res != -EAGAIN)
-			break;
-		desc->page_index++;
+	if (desc->page_index == 0) {
+		desc->current_index = 0;
+		desc->last_cookie = 0;
 	}
+	do {
+		res = find_cache_page(desc);
+	} while (res == -EAGAIN);
 	return res;
 }
 
-static inline unsigned int dt_type(struct inode *inode)
-{
-	return (inode->i_mode >> 12) & 15;
-}
-
 /*
  * Once we've found the start of the dirent within a page: fill 'er up...
  */
@@ -666,35 +693,35 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 	int i = 0;
 	int res = 0;
 	struct nfs_cache_array *array = NULL;
-	unsigned int d_type = DT_UNKNOWN;
-	struct dentry *dentry = NULL;
 
 	array = nfs_readdir_get_array(desc->page);
+	if (IS_ERR(array)) {
+		res = PTR_ERR(array);
+		goto out;
+	}
 
 	for (i = desc->cache_entry_index; i < array->size; i++) {
-		d_type = DT_UNKNOWN;
+		struct nfs_cache_array_entry *ent;
 
-		res = filldir(dirent, array->array[i].string.name,
-			array->array[i].string.len, file->f_pos,
-			nfs_compat_user_ino64(array->array[i].ino), d_type);
-		if (res < 0)
+		ent = &array->array[i];
+		if (filldir(dirent, ent->string.name, ent->string.len,
+		    file->f_pos, nfs_compat_user_ino64(ent->ino),
+		    ent->d_type) < 0) {
+			desc->eof = 1;
 			break;
+		}
 		file->f_pos++;
-		desc->cache_entry_index = i;
 		if (i < (array->size-1))
 			*desc->dir_cookie = array->array[i+1].cookie;
 		else
 			*desc->dir_cookie = array->last_cookie;
-		if (i == array->eof_index) {
-			desc->eof = 1;
-			break;
-		}
 	}
+	if (array->eof_index >= 0)
+		desc->eof = 1;
 
 	nfs_readdir_release_array(desc->page);
+out:
 	cache_page_release(desc);
-	if (dentry != NULL)
-		dput(dentry);
 	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
 			(unsigned long long)*desc->dir_cookie, res);
 	return res;
@@ -729,13 +756,14 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 		goto out;
 	}
 
-	if (nfs_readdir_xdr_to_array(desc, page, inode) == -1) {
-		status = -EIO;
-		goto out_release;
-	}
-
 	desc->page_index = 0;
+	desc->last_cookie = *desc->dir_cookie;
 	desc->page = page;
+
+	status = nfs_readdir_xdr_to_array(desc, page, inode);
+	if (status < 0)
+		goto out_release;
+
 	status = nfs_do_filldir(desc, dirent, filldir);
 
  out:
@@ -757,7 +785,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct inode	*inode = dentry->d_inode;
 	nfs_readdir_descriptor_t my_desc,
 			*desc = &my_desc;
-	int res = -ENOMEM;
+	int res;
 
 	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
 			dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -782,18 +810,18 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (res < 0)
 		goto out;
 
-	while (desc->eof != 1) {
+	do {
 		res = readdir_search_pagecache(desc);
 
 		if (res == -EBADCOOKIE) {
+			res = 0;
 			/* This means either end of directory */
 			if (*desc->dir_cookie && desc->eof == 0) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc, dirent, filldir);
-				if (res >= 0)
+				if (res == 0)
 					continue;
 			}
-			res = 0;
 			break;
 		}
 		if (res == -ETOOSMALL && desc->plus) {
@@ -808,11 +836,9 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			break;
 
 		res = nfs_do_filldir(desc, dirent, filldir);
-		if (res < 0) {
-			res = 0;
+		if (res < 0)
 			break;
-		}
-	}
+	} while (!desc->eof);
 out:
 	nfs_unblock_sillyrename(dentry);
 	if (res > 0)
@@ -1345,12 +1371,12 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry
 				res = NULL;
 				goto out;
 			/* This turned out not to be a regular file */
-			case -EISDIR:
 			case -ENOTDIR:
 				goto no_open;
 			case -ELOOP:
 				if (!(nd->intent.open.flags & O_NOFOLLOW))
 					goto no_open;
+			/* case -EISDIR: */
 			/* case -EINVAL: */
 			default:
 				res = ERR_CAST(inode);
@@ -1801,7 +1827,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
 	d_drop(dentry);
 	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
 	if (error == 0) {
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 		d_add(dentry, inode);
 	}
 	return error;
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 064a8096167..e6ace0d93c7 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -867,13 +867,13 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
 		goto out;
 	nfs_alloc_commit_data(dreq);
 
-	if (dreq->commit_data == NULL || count < wsize)
+	if (dreq->commit_data == NULL || count <= wsize)
 		sync = NFS_FILE_SYNC;
 
 	dreq->inode = inode;
 	dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
 	dreq->l_ctx = nfs_get_lock_context(dreq->ctx);
-	if (dreq->l_ctx != NULL)
+	if (dreq->l_ctx == NULL)
 		goto out_release;
 	if (!is_sync_kiocb(iocb))
 		dreq->iocb = iocb;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index e756075637b..7bf029ef408 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -693,6 +693,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
+	unsigned int saved_type = fl->fl_type;
 
 	/* Try local locking first */
 	posix_test_lock(filp, fl);
@@ -700,6 +701,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 		/* found a conflict */
 		goto out;
 	}
+	fl->fl_type = saved_type;
 
 	if (nfs_have_delegation(inode, FMODE_READ))
 		goto out_noconflict;
@@ -884,6 +886,5 @@ static int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
 	dprintk("NFS: setlease(%s/%s, arg=%ld)\n",
 			file->f_path.dentry->d_parent->d_name.name,
 			file->f_path.dentry->d_name.name, arg);
-
 	return -EINVAL;
 }
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index a70e446e160..ac7b814ce16 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 			iput(inode);
 			return -ENOMEM;
 		}
-		/* Circumvent igrab(): we know the inode is not being freed */
-		atomic_inc(&inode->i_count);
+		ihold(inode);
 		/*
 		 * Ensure that this dentry is invisible to d_find_alias().
 		 * Otherwise, it may be spliced into the tree by
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index dec47ed8b6b..4e2d9b6b138 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -123,7 +123,7 @@ static ssize_t nfs_idmap_get_desc(const char *name, size_t namelen,
 	size_t desclen = typelen + namelen + 2;
 
 	*desc = kmalloc(desclen, GFP_KERNEL);
-	if (!desc)
+	if (!*desc)
 		return -ENOMEM;
 
 	cp = *desc;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 314f5716460..e67e31c7341 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		} else if (S_ISDIR(inode->i_mode)) {
 			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
+			inode->i_data.a_ops = &nfs_dir_aops;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
 				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			/* Deal with crossing mountpoints */
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index db08ff3ff45..e6356b750b7 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -362,6 +362,15 @@ unsigned int nfs_page_length(struct page *page)
 }
 
 /*
+ * Convert a umode to a dirent->d_type
+ */
+static inline
+unsigned char nfs_umode_to_dtype(umode_t mode)
+{
+	return (mode >> 12) & 15;
+}
+
+/*
  * Determine the number of pages in an array of length 'len' and
  * with a base offset of 'base'
  */
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index eceafe74f47..4f981f1f668 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -505,13 +505,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
 
 static struct rpc_version mnt_version1 = {
 	.number		= 1,
-	.nrprocs	= 2,
+	.nrprocs	= ARRAY_SIZE(mnt_procedures),
 	.procs		= mnt_procedures,
 };
 
 static struct rpc_version mnt_version3 = {
 	.number		= 3,
-	.nrprocs	= 2,
+	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
 	.procs		= mnt3_procedures,
 };
 
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index e6bf45710cc..5914a1911c9 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -423,7 +423,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	struct page **page;
 	size_t hdrlen;
 	unsigned int pglen, recvd;
-	int status, nr = 0;
+	int status;
 
 	if ((status = ntohl(*p++)))
 		return nfs_stat_to_errno(status);
@@ -443,7 +443,7 @@ nfs_xdr_readdirres(struct rpc_rqst *req, __be32 *p, void *dummy)
 	if (pglen > recvd)
 		pglen = recvd;
 	page = rcvbuf->pages;
-	return nr;
+	return pglen;
 }
 
 static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -485,6 +485,8 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
 	entry->prev_cookie	  = entry->cookie;
 	entry->cookie	  = ntohl(*p++);
 
+	entry->d_type = DT_UNKNOWN;
+
 	p = xdr_inline_peek(xdr, 8);
 	if (p != NULL)
 		entry->eof = !p[0] && p[1];
@@ -495,7 +497,7 @@ nfs_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_se
 
 out_overflow:
 	print_overflow_msg(__func__, xdr);
-	return ERR_PTR(-EIO);
+	return ERR_PTR(-EAGAIN);
 }
 
 /*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index d9a5e832c25..f6cc60f06da 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -555,7 +555,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 	struct page **page;
 	size_t hdrlen;
 	u32 recvd, pglen;
-	int status, nr = 0;
+	int status;
 
 	status = ntohl(*p++);
 	/* Decode post_op_attrs */
@@ -586,7 +586,7 @@ nfs3_xdr_readdirres(struct rpc_rqst *req, __be32 *p, struct nfs3_readdirres *res
 		pglen = recvd;
 	page = rcvbuf->pages;
 
-	return nr;
+	return pglen;
 }
 
 __be32 *
@@ -622,11 +622,13 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
 	entry->prev_cookie = entry->cookie;
 	p = xdr_decode_hyper(p, &entry->cookie);
 
+	entry->d_type = DT_UNKNOWN;
 	if (plus) {
 		entry->fattr->valid = 0;
 		p = xdr_decode_post_op_attr_stream(xdr, entry->fattr);
 		if (IS_ERR(p))
 			goto out_overflow_exit;
+		entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
 		/* In fact, a post_op_fh3: */
 		p = xdr_inline_decode(xdr, 4);
 		if (unlikely(!p))
@@ -656,7 +658,7 @@ nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, struct nfs_s
 out_overflow:
 	print_overflow_msg(__func__, xdr);
 out_overflow_exit:
-	return ERR_PTR(-EIO);
+	return ERR_PTR(-EAGAIN);
 }
 
 /*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 32c8758c99f..4435e5e1f90 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -429,7 +429,7 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
 		 * returned NFS4ERR_DELAY as per Section 2.10.6.2
 		 * of RFC5661.
 		 */
-		dprintk("%s: slot=%ld seq=%d: Operation in progress\n",
+		dprintk("%s: slot=%td seq=%d: Operation in progress\n",
 			__func__,
 			res->sr_slot - res->sr_session->fc_slot_table.slots,
 			res->sr_slot->seq_nr);
@@ -573,7 +573,7 @@ int nfs4_setup_sequence(const struct nfs_server *server,
 		goto out;
 	}
 
-	dprintk("--> %s clp %p session %p sr_slot %ld\n",
+	dprintk("--> %s clp %p session %p sr_slot %td\n",
 		__func__, session->clp, session, res->sr_slot ?
 			res->sr_slot - session->fc_slot_table.slots : -1);
 
@@ -2852,8 +2852,10 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 	nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
 	res.pgbase = args.pgbase;
 	status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0);
-	if (status == 0)
+	if (status >= 0) {
 		memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
+		status += args.pgbase;
+	}
 
 	nfs_invalidate_atime(dir);
 
@@ -3359,6 +3361,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 	ret = nfs_revalidate_inode(server, inode);
 	if (ret < 0)
 		return ret;
+	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+		nfs_zap_acl_cache(inode);
 	ret = nfs4_read_cached_acl(inode, buf, buflen);
 	if (ret != -ENOENT)
 		return ret;
@@ -3387,6 +3391,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
 	ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+	/*
+	 * Acl update can result in inode attribute update.
+	 * so mark the attribute cache invalid.
+	 */
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
+	spin_unlock(&inode->i_lock);
 	nfs_access_zap_cache(inode);
 	nfs_zap_acl_cache(inode);
 	return ret;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f313c4cce7e..9f1826b012e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4518,7 +4518,7 @@ static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct n
 	xdr_read_pages(xdr, pglen);
 
 
-	return 0;
+	return pglen;
 }
 
 static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
@@ -6208,6 +6208,10 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 	if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
 		entry->ino = entry->fattr->fileid;
 
+	entry->d_type = DT_UNKNOWN;
+	if (entry->fattr->valid & NFS_ATTR_FATTR_TYPE)
+		entry->d_type = nfs_umode_to_dtype(entry->fattr->mode);
+
 	if (verify_attr_len(xdr, p, len) < 0)
 		goto out_overflow;
 
@@ -6221,7 +6225,7 @@ __be32 *nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
 
 out_overflow:
 	print_overflow_msg(__func__, xdr);
-	return ERR_PTR(-EIO);
+	return ERR_PTR(-EAGAIN);
 }
 
 /*
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 460df365288..903908a2002 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -101,6 +101,7 @@ static char nfs_export_path[NFS_MAXPATHLEN + 1] __initdata = "";
 /* server:export path string passed to super.c */
 static char nfs_root_device[NFS_MAXPATHLEN + 1] __initdata = "";
 
+#ifdef RPC_DEBUG
 /*
  * When the "nfsrootdebug" kernel command line option is specified,
  * enable debugging messages for NFSROOT.
@@ -112,6 +113,7 @@ static int __init nfs_root_debug(char *__unused)
 }
 
 __setup("nfsrootdebug", nfs_root_debug);
+#endif
 
 /*
  *  Parse NFS server and directory information passed on the kernel
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 919490232e1..b68536cc904 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -65,6 +65,13 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	if (req == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	/* get lock context early so we can deal with alloc failures */
+	req->wb_lock_context = nfs_get_lock_context(ctx);
+	if (req->wb_lock_context == NULL) {
+		nfs_page_free(req);
+		return ERR_PTR(-ENOMEM);
+	}
+
 	/* Initialize the request struct. Initially, we assume a
 	 * long write-back delay. This will be adjusted in
 	 * update_nfs_request below if the region is not locked. */
@@ -79,7 +86,6 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
 	req->wb_pgbase	= offset;
 	req->wb_bytes   = count;
 	req->wb_context = get_nfs_open_context(ctx);
-	req->wb_lock_context = nfs_get_lock_context(ctx);
 	kref_init(&req->wb_kref);
 	return req;
 }
@@ -109,7 +115,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
 {
 	if (!nfs_lock_request_dontget(req))
 		return 0;
-	if (req->wb_page != NULL)
+	if (test_bit(PG_MAPPED, &req->wb_flags))
 		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 1;
 }
@@ -119,7 +125,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
  */
 void nfs_clear_page_tag_locked(struct nfs_page *req)
 {
-	if (req->wb_page != NULL) {
+	if (test_bit(PG_MAPPED, &req->wb_flags)) {
 		struct inode *inode = req->wb_context->path.dentry->d_inode;
 		struct nfs_inode *nfsi = NFS_I(inode);
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e4b62c6f5a6..aedcaa7f291 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req)
 			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
-	nfs_clear_request(req);
 	nfs_release_request(req);
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3600ec700d5..4100630c9a5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -39,7 +39,6 @@
 #include <linux/nfs_mount.h>
 #include <linux/nfs4_mount.h>
 #include <linux/lockd/bind.h>
-#include <linux/smp_lock.h>
 #include <linux/seq_file.h>
 #include <linux/mount.h>
 #include <linux/mnt_namespace.h>
@@ -67,6 +66,12 @@
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
 
+#ifdef CONFIG_NFS_V3
+#define NFS_DEFAULT_VERSION 3
+#else
+#define NFS_DEFAULT_VERSION 2
+#endif
+
 enum {
 	/* Mount options that take no arguments */
 	Opt_soft, Opt_hard,
@@ -260,8 +265,8 @@ static int  nfs_statfs(struct dentry *, struct kstatfs *);
 static int  nfs_show_options(struct seq_file *, struct vfsmount *);
 static int  nfs_show_stats(struct seq_file *, struct vfsmount *);
 static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
-static int nfs_xdev_get_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
+		int flags, const char *dev_name, void *raw_data);
 static void nfs_put_super(struct super_block *);
 static void nfs_kill_super(struct super_block *);
 static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -277,7 +282,7 @@ static struct file_system_type nfs_fs_type = {
 struct file_system_type nfs_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs",
-	.get_sb		= nfs_xdev_get_sb,
+	.mount		= nfs_xdev_mount,
 	.kill_sb	= nfs_kill_super,
 	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -302,14 +307,14 @@ static int nfs4_try_mount(int flags, const char *dev_name,
 	struct nfs_parsed_mount_data *data, struct vfsmount *mnt);
 static int nfs4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data);
+static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data);
 static int nfs4_referral_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *raw_data);
 static void nfs4_kill_super(struct super_block *sb);
 
 static struct file_system_type nfs4_fs_type = {
@@ -323,7 +328,7 @@ static struct file_system_type nfs4_fs_type = {
 static struct file_system_type nfs4_remote_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs4_remote_get_sb,
+	.mount		= nfs4_remote_mount,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -331,7 +336,7 @@ static struct file_system_type nfs4_remote_fs_type = {
 struct file_system_type nfs4_xdev_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs4_xdev_get_sb,
+	.mount		= nfs4_xdev_mount,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -339,7 +344,7 @@ struct file_system_type nfs4_xdev_fs_type = {
 static struct file_system_type nfs4_remote_referral_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfs4",
-	.get_sb		= nfs4_remote_referral_get_sb,
+	.mount		= nfs4_remote_referral_mount,
 	.kill_sb	= nfs4_kill_super,
 	.fs_flags	= FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
@@ -1064,12 +1069,10 @@ static int nfs_parse_mount_options(char *raw,
 			mnt->flags |= NFS_MOUNT_VER3;
 			mnt->version = 3;
 			break;
-#ifdef CONFIG_NFS_V4
 		case Opt_v4:
 			mnt->flags &= ~NFS_MOUNT_VER3;
 			mnt->version = 4;
 			break;
-#endif
 		case Opt_udp:
 			mnt->flags &= ~NFS_MOUNT_TCP;
 			mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1281,12 +1284,10 @@ static int nfs_parse_mount_options(char *raw,
 				mnt->flags |= NFS_MOUNT_VER3;
 				mnt->version = 3;
 				break;
-#ifdef CONFIG_NFS_V4
 			case NFS4_VERSION:
 				mnt->flags &= ~NFS_MOUNT_VER3;
 				mnt->version = 4;
 				break;
-#endif
 			default:
 				goto out_invalid_value;
 			}
@@ -2277,7 +2278,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
 	};
 	int error = -ENOMEM;
 
-	data = nfs_alloc_parsed_mount_data(3);
+	data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
 	mntfh = nfs_alloc_fhandle();
 	if (data == NULL || mntfh == NULL)
 		goto out_free_fh;
@@ -2397,9 +2398,9 @@ static void nfs_kill_super(struct super_block *s)
 /*
  * Clone an NFS2/3 server record on xdev traversal (FSID-change)
  */
-static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
-			   const char *dev_name, void *raw_data,
-			   struct vfsmount *mnt)
+static struct dentry *
+nfs_xdev_mount(struct file_system_type *fs_type, int flags,
+		const char *dev_name, void *raw_data)
 {
 	struct nfs_clone_mount *data = raw_data;
 	struct super_block *s;
@@ -2411,7 +2412,7 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	};
 	int error;
 
-	dprintk("--> nfs_xdev_get_sb()\n");
+	dprintk("--> nfs_xdev_mount()\n");
 
 	/* create a new volume representation */
 	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -2458,28 +2459,26 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	}
 
 	s->s_flags |= MS_ACTIVE;
-	mnt->mnt_sb = s;
-	mnt->mnt_root = mntroot;
 
 	/* clone any lsm security options from the parent to the new sb */
 	security_sb_clone_mnt_opts(data->sb, s);
 
-	dprintk("<-- nfs_xdev_get_sb() = 0\n");
-	return 0;
+	dprintk("<-- nfs_xdev_mount() = 0\n");
+	return mntroot;
 
 out_err_nosb:
 	nfs_free_server(server);
 out_err_noserver:
-	dprintk("<-- nfs_xdev_get_sb() = %d [error]\n", error);
-	return error;
+	dprintk("<-- nfs_xdev_mount() = %d [error]\n", error);
+	return ERR_PTR(error);
 
 error_splat_super:
 	if (server && !s->s_root)
 		bdi_unregister(&server->backing_dev_info);
 error_splat_bdi:
 	deactivate_locked_super(s);
-	dprintk("<-- nfs_xdev_get_sb() = %d [splat]\n", error);
-	return error;
+	dprintk("<-- nfs_xdev_mount() = %d [splat]\n", error);
+	return ERR_PTR(error);
 }
 
 #ifdef CONFIG_NFS_V4
@@ -2649,8 +2648,9 @@ out_no_address:
 /*
  * Get the superblock for the NFS4 root partition
  */
-static int nfs4_remote_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt)
+static struct dentry *
+nfs4_remote_mount(struct file_system_type *fs_type, int flags,
+		  const char *dev_name, void *raw_data)
 {
 	struct nfs_parsed_mount_data *data = raw_data;
 	struct super_block *s;
@@ -2714,15 +2714,16 @@ static int nfs4_remote_get_sb(struct file_system_type *fs_type,
 		goto error_splat_root;
 
 	s->s_flags |= MS_ACTIVE;
-	mnt->mnt_sb = s;
-	mnt->mnt_root = mntroot;
-	error = 0;
+
+	security_free_mnt_opts(&data->lsm_opts);
+	nfs_free_fhandle(mntfh);
+	return mntroot;
 
 out:
 	security_free_mnt_opts(&data->lsm_opts);
 out_free_fh:
 	nfs_free_fhandle(mntfh);
-	return error;
+	return ERR_PTR(error);
 
 out_free:
 	nfs_free_server(server);
@@ -2968,9 +2969,9 @@ static void nfs4_kill_super(struct super_block *sb)
 /*
  * Clone an NFS4 server record on xdev traversal (FSID-change)
  */
-static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
-			    const char *dev_name, void *raw_data,
-			    struct vfsmount *mnt)
+static struct dentry *
+nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
+		 const char *dev_name, void *raw_data)
 {
 	struct nfs_clone_mount *data = raw_data;
 	struct super_block *s;
@@ -2982,7 +2983,7 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	};
 	int error;
 
-	dprintk("--> nfs4_xdev_get_sb()\n");
+	dprintk("--> nfs4_xdev_mount()\n");
 
 	/* create a new volume representation */
 	server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr);
@@ -3029,32 +3030,30 @@ static int nfs4_xdev_get_sb(struct file_system_type *fs_type, int flags,
 	}
 
 	s->s_flags |= MS_ACTIVE;
-	mnt->mnt_sb = s;
-	mnt->mnt_root = mntroot;
 
 	security_sb_clone_mnt_opts(data->sb, s);
 
-	dprintk("<-- nfs4_xdev_get_sb() = 0\n");
-	return 0;
+	dprintk("<-- nfs4_xdev_mount() = 0\n");
+	return mntroot;
 
 out_err_nosb:
 	nfs_free_server(server);
 out_err_noserver:
-	dprintk("<-- nfs4_xdev_get_sb() = %d [error]\n", error);
-	return error;
+	dprintk("<-- nfs4_xdev_mount() = %d [error]\n", error);
+	return ERR_PTR(error);
 
 error_splat_super:
 	if (server && !s->s_root)
 		bdi_unregister(&server->backing_dev_info);
 error_splat_bdi:
 	deactivate_locked_super(s);
-	dprintk("<-- nfs4_xdev_get_sb() = %d [splat]\n", error);
-	return error;
+	dprintk("<-- nfs4_xdev_mount() = %d [splat]\n", error);
+	return ERR_PTR(error);
 }
 
-static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
-		int flags, const char *dev_name, void *raw_data,
-		struct vfsmount *mnt)
+static struct dentry *
+nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
+			   const char *dev_name, void *raw_data)
 {
 	struct nfs_clone_mount *data = raw_data;
 	struct super_block *s;
@@ -3118,14 +3117,12 @@ static int nfs4_remote_referral_get_sb(struct file_system_type *fs_type,
 	}
 
 	s->s_flags |= MS_ACTIVE;
-	mnt->mnt_sb = s;
-	mnt->mnt_root = mntroot;
 
 	security_sb_clone_mnt_opts(data->sb, s);
 
 	nfs_free_fhandle(mntfh);
 	dprintk("<-- nfs4_referral_get_sb() = 0\n");
-	return 0;
+	return mntroot;
 
 out_err_nosb:
 	nfs_free_server(server);
@@ -3133,7 +3130,7 @@ out_err_noserver:
 	nfs_free_fhandle(mntfh);
 out_err_nofh:
 	dprintk("<-- nfs4_referral_get_sb() = %d [error]\n", error);
-	return error;
+	return ERR_PTR(error);
 
 error_splat_super:
 	if (server && !s->s_root)
@@ -3142,7 +3139,7 @@ error_splat_bdi:
 	deactivate_locked_super(s);
 	nfs_free_fhandle(mntfh);
 	dprintk("<-- nfs4_referral_get_sb() = %d [splat]\n", error);
-	return error;
+	return ERR_PTR(error);
 }
 
 /*
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 9a16bad5d2e..7bdec853140 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -444,9 +444,9 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
 
 	/* set up nfs_renamedata */
 	data->old_dir = old_dir;
-	atomic_inc(&old_dir->i_count);
+	ihold(old_dir);
 	data->new_dir = new_dir;
-	atomic_inc(&new_dir->i_count);
+	ihold(new_dir);
 	data->old_dentry = dget(old_dentry);
 	data->new_dentry = dget(new_dentry);
 	nfs_fattr_init(&data->old_fattr);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 605e292501f..10d648ea128 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -290,9 +290,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
 	nfs_pageio_cond_complete(pgio, page->index);
-	ret = nfs_page_async_flush(pgio, page,
-			wbc->sync_mode == WB_SYNC_NONE ||
-			wbc->nonblocking != 0);
+	ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
 	if (ret == -EAGAIN) {
 		redirty_page_for_writepage(wbc, page);
 		ret = 0;
@@ -392,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
+	set_bit(PG_MAPPED, &req->wb_flags);
 	SetPagePrivate(req->wb_page);
 	set_page_private(req->wb_page, (unsigned long)req);
 	nfsi->npages++;
@@ -417,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	spin_lock(&inode->i_lock);
 	set_page_private(req->wb_page, 0);
 	ClearPagePrivate(req->wb_page);
+	clear_bit(PG_MAPPED, &req->wb_flags);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
 	if (!nfsi->npages) {
@@ -424,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 		iput(inode);
 	} else
 		spin_unlock(&inode->i_lock);
-	nfs_clear_request(req);
 	nfs_release_request(req);
 }
 
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 31a78fce473..18b3e8975fe 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -2,7 +2,6 @@ config NFSD
 	tristate "NFS server support"
 	depends on INET
 	depends on FILE_LOCKING
-	depends on BKL # fix as soon as lockd is done
 	select LOCKD
 	select SUNRPC
 	select EXPORTFS
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2a533a0af2a..7e84a852cda 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -260,9 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp)
 	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
 			&fhp->fh_post_attr);
 	fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
-	if (err)
+	if (err) {
 		fhp->fh_post_saved = 0;
-	else
+		/* Grab the ctime anyway - set_change_info might use it */
+		fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
+	} else
 		fhp->fh_post_saved = 1;
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9019e8ec9dc..116cab970e0 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -673,16 +673,17 @@ static void nfsd4_hash_conn(struct nfsd4_conn *conn, struct nfsd4_session *ses)
 	spin_unlock(&clp->cl_lock);
 }
 
-static void nfsd4_register_conn(struct nfsd4_conn *conn)
+static int nfsd4_register_conn(struct nfsd4_conn *conn)
 {
 	conn->cn_xpt_user.callback = nfsd4_conn_lost;
-	register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
+	return register_xpt_user(conn->cn_xprt, &conn->cn_xpt_user);
 }
 
 static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
 {
 	struct nfsd4_conn *conn;
 	u32 flags = NFS4_CDFC4_FORE;
+	int ret;
 
 	if (ses->se_flags & SESSION4_BACK_CHAN)
 		flags |= NFS4_CDFC4_BACK;
@@ -690,7 +691,10 @@ static __be32 nfsd4_new_conn(struct svc_rqst *rqstp, struct nfsd4_session *ses)
 	if (!conn)
 		return nfserr_jukebox;
 	nfsd4_hash_conn(conn, ses);
-	nfsd4_register_conn(conn);
+	ret = nfsd4_register_conn(conn);
+	if (ret)
+		/* oops; xprt is already down: */
+		nfsd4_conn_lost(&conn->cn_xpt_user);
 	return nfs_ok;
 }
 
@@ -1644,6 +1648,7 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
 {
 	struct nfs4_client *clp = ses->se_client;
 	struct nfsd4_conn *c;
+	int ret;
 
 	spin_lock(&clp->cl_lock);
 	c = __nfsd4_find_conn(new->cn_xprt, ses);
@@ -1654,7 +1659,10 @@ static void nfsd4_sequence_check_conn(struct nfsd4_conn *new, struct nfsd4_sessi
 	}
 	__nfsd4_hash_conn(new, ses);
 	spin_unlock(&clp->cl_lock);
-	nfsd4_register_conn(new);
+	ret = nfsd4_register_conn(new);
+	if (ret)
+		/* oops; xprt is already down: */
+		nfsd4_conn_lost(&new->cn_xpt_user);
 	return;
 }
 
@@ -2254,7 +2262,7 @@ nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access)
  * Spawn a thread to perform a recall on the delegation represented
  * by the lease (file_lock)
  *
- * Called from break_lease() with lock_kernel() held.
+ * Called from break_lease() with lock_flocks() held.
  * Note: we assume break_lease will only call this *once* for any given
  * lease.
  */
@@ -2278,7 +2286,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
 	list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
 	spin_unlock(&recall_lock);
 
-	/* only place dl_time is set. protected by lock_kernel*/
+	/* only place dl_time is set. protected by lock_flocks*/
 	dp->dl_time = get_seconds();
 
 	/*
@@ -2295,7 +2303,7 @@ void nfsd_break_deleg_cb(struct file_lock *fl)
 /*
  * The file_lock is being reapd.
  *
- * Called by locks_free_lock() with lock_kernel() held.
+ * Called by locks_free_lock() with lock_flocks() held.
  */
 static
 void nfsd_release_deleg_cb(struct file_lock *fl)
@@ -2310,23 +2318,7 @@ void nfsd_release_deleg_cb(struct file_lock *fl)
 }
 
 /*
- * Set the delegation file_lock back pointer.
- *
- * Called from setlease() with lock_kernel() held.
- */
-static
-void nfsd_copy_lock_deleg_cb(struct file_lock *new, struct file_lock *fl)
-{
-	struct nfs4_delegation *dp = (struct nfs4_delegation *)new->fl_owner;
-
-	dprintk("NFSD: nfsd_copy_lock_deleg_cb: new fl %p dp %p\n", new, dp);
-	if (!dp)
-		return;
-	dp->dl_flock = new;
-}
-
-/*
- * Called from setlease() with lock_kernel() held
+ * Called from setlease() with lock_flocks() held
  */
 static
 int nfsd_same_client_deleg_cb(struct file_lock *onlist, struct file_lock *try)
@@ -2355,7 +2347,6 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
 static const struct lock_manager_operations nfsd_lease_mng_ops = {
 	.fl_break = nfsd_break_deleg_cb,
 	.fl_release_private = nfsd_release_deleg_cb,
-	.fl_copy_lock = nfsd_copy_lock_deleg_cb,
 	.fl_mylease = nfsd_same_client_deleg_cb,
 	.fl_change = nfsd_change_deleg_cb,
 };
@@ -2614,7 +2605,7 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 	struct nfs4_delegation *dp;
 	struct nfs4_stateowner *sop = stp->st_stateowner;
 	int cb_up = atomic_read(&sop->so_client->cl_cb_set);
-	struct file_lock fl, *flp = &fl;
+	struct file_lock *fl;
 	int status, flag = 0;
 
 	flag = NFS4_OPEN_DELEGATE_NONE;
@@ -2648,21 +2639,28 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
 		flag = NFS4_OPEN_DELEGATE_NONE;
 		goto out;
 	}
-	locks_init_lock(&fl);
-	fl.fl_lmops = &nfsd_lease_mng_ops;
-	fl.fl_flags = FL_LEASE;
-	fl.fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
-	fl.fl_end = OFFSET_MAX;
-	fl.fl_owner =  (fl_owner_t)dp;
-	fl.fl_file = find_readable_file(stp->st_file);
-	BUG_ON(!fl.fl_file);
-	fl.fl_pid = current->tgid;
+	status = -ENOMEM;
+	fl = locks_alloc_lock();
+	if (!fl)
+		goto out;
+	locks_init_lock(fl);
+	fl->fl_lmops = &nfsd_lease_mng_ops;
+	fl->fl_flags = FL_LEASE;
+	fl->fl_type = flag == NFS4_OPEN_DELEGATE_READ? F_RDLCK: F_WRLCK;
+	fl->fl_end = OFFSET_MAX;
+	fl->fl_owner =  (fl_owner_t)dp;
+	fl->fl_file = find_readable_file(stp->st_file);
+	BUG_ON(!fl->fl_file);
+	fl->fl_pid = current->tgid;
+	dp->dl_flock = fl;
 
 	/* vfs_setlease checks to see if delegation should be handed out.
 	 * the lock_manager callbacks fl_mylease and fl_change are used
 	 */
-	if ((status = vfs_setlease(fl.fl_file, fl.fl_type, &flp))) {
+	if ((status = vfs_setlease(fl->fl_file, fl->fl_type, &fl))) {
 		dprintk("NFSD: setlease failed [%d], no delegation\n", status);
+		dp->dl_flock = NULL;
+		locks_free_lock(fl);
 		unhash_delegation(dp);
 		flag = NFS4_OPEN_DELEGATE_NONE;
 		goto out;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index d6dc3f61f8b..4514ebbee4d 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1405,16 +1405,16 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
 	return simple_fill_super(sb, 0x6e667364, nfsd_files);
 }
 
-static int nfsd_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *nfsd_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, nfsd_fill_super, mnt);
+	return mount_single(fs_type, flags, data, nfsd_fill_super);
 }
 
 static struct file_system_type nfsd_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "nfsd",
-	.get_sb		= nfsd_get_sb,
+	.mount		= nfsd_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 661a6cf8e82..184938fcff0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -281,23 +281,13 @@ commit_metadata(struct svc_fh *fhp)
 {
 	struct inode *inode = fhp->fh_dentry->d_inode;
 	const struct export_operations *export_ops = inode->i_sb->s_export_op;
-	int error = 0;
 
 	if (!EX_ISSYNC(fhp->fh_export))
 		return 0;
 
-	if (export_ops->commit_metadata) {
-		error = export_ops->commit_metadata(inode);
-	} else {
-		struct writeback_control wbc = {
-			.sync_mode = WB_SYNC_ALL,
-			.nr_to_write = 0, /* metadata only */
-		};
-
-		error = sync_inode(inode, &wbc);
-	}
-
-	return error;
+	if (export_ops->commit_metadata)
+		return export_ops->commit_metadata(inode);
+	return sync_inode_metadata(inode, 1);
 }
 
 /*
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 4d476ff08ae..60fce3dc5cb 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -484,18 +484,17 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
 static inline void
 set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 {
-	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
-	cinfo->atomic = 1;
+	BUG_ON(!fhp->fh_pre_saved);
+	cinfo->atomic = fhp->fh_post_saved;
 	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
-	if (cinfo->change_supported) {
-		cinfo->before_change = fhp->fh_pre_change;
-		cinfo->after_change = fhp->fh_post_change;
-	} else {
-		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
-		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
-		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
-		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
-	}
+
+	cinfo->before_change = fhp->fh_pre_change;
+	cinfo->after_change = fhp->fh_post_change;
+	cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+	cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+	cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+	cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+
 }
 
 int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
diff --git a/fs/nilfs2/dat.c b/fs/nilfs2/dat.c
index 49c844dab33..59e5fe742f7 100644
--- a/fs/nilfs2/dat.c
+++ b/fs/nilfs2/dat.c
@@ -335,7 +335,7 @@ int nilfs_dat_move(struct inode *dat, __u64 vblocknr, sector_t blocknr)
 	 * the device at this point.
 	 *
 	 * To prevent nilfs_dat_translate() from returning the
-	 * uncommited block number, this makes a copy of the entry
+	 * uncommitted block number, this makes a copy of the entry
 	 * buffer and redirects nilfs_dat_translate() to the copy.
 	 */
 	if (!buffer_nilfs_redirected(entry_bh)) {
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 33ad25ddd5c..caf9a6a3fb5 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -176,7 +176,6 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
 int nilfs_init_gcinode(struct inode *inode)
 {
 	struct nilfs_inode_info *ii = NILFS_I(inode);
-	struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
 
 	inode->i_mode = S_IFREG;
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
@@ -186,14 +185,6 @@ int nilfs_init_gcinode(struct inode *inode)
 	ii->i_flags = 0;
 	nilfs_bmap_init_gc(ii->i_bmap);
 
-	/*
-	 * Add the inode to GC inode list. Garbage Collection
-	 * is serialized and no two processes manipulate the
-	 * list simultaneously.
-	 */
-	igrab(inode);
-	list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
-
 	return 0;
 }
 
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index 3e90f86d5bf..b185e937a33 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -337,6 +337,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
 				   struct nilfs_argv *argv, void *buf)
 {
 	size_t nmembs = argv->v_nmembs;
+	struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
 	struct inode *inode;
 	struct nilfs_vdesc *vdesc;
 	struct buffer_head *bh, *n;
@@ -349,10 +350,21 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
 		ino = vdesc->vd_ino;
 		cno = vdesc->vd_cno;
 		inode = nilfs_iget_for_gc(sb, ino, cno);
-		if (unlikely(inode == NULL)) {
-			ret = -ENOMEM;
+		if (IS_ERR(inode)) {
+			ret = PTR_ERR(inode);
 			goto failed;
 		}
+		if (list_empty(&NILFS_I(inode)->i_dirty)) {
+			/*
+			 * Add the inode to GC inode list. Garbage Collection
+			 * is serialized and no two processes manipulate the
+			 * list simultaneously.
+			 */
+			igrab(inode);
+			list_add(&NILFS_I(inode)->i_dirty,
+				 &nilfs->ns_gc_inodes);
+		}
+
 		do {
 			ret = nilfs_ioctl_move_inode_block(inode, vdesc,
 							   &buffers);
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 185d1607cb0..6e9557ecf16 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -207,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
 
 	inode->i_ctime = CURRENT_TIME;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	err = nilfs_add_nondir(dentry, inode);
 	if (!err)
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index d926af62617..687d090cea3 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -1609,7 +1609,7 @@ nilfs_copy_replace_page_buffers(struct page *page, struct list_head *out)
 	kunmap_atomic(kaddr, KM_USER0);
 
 	if (!TestSetPageWriteback(clone_page))
-		inc_zone_page_state(clone_page, NR_WRITEBACK);
+		account_page_writeback(clone_page);
 	unlock_page(clone_page);
 
 	return 0;
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 35ae03c0db8..f804d41ec9d 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1141,9 +1141,9 @@ static int nilfs_test_bdev_super(struct super_block *s, void *data)
 	return (void *)s->s_bdev == data;
 }
 
-static int
-nilfs_get_sb(struct file_system_type *fs_type, int flags,
-	     const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *
+nilfs_mount(struct file_system_type *fs_type, int flags,
+	     const char *dev_name, void *data)
 {
 	struct nilfs_super_data sd;
 	struct super_block *s;
@@ -1156,7 +1156,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 
 	sd.bdev = open_bdev_exclusive(dev_name, mode, fs_type);
 	if (IS_ERR(sd.bdev))
-		return PTR_ERR(sd.bdev);
+		return ERR_CAST(sd.bdev);
 
 	sd.cno = 0;
 	sd.flags = flags;
@@ -1235,9 +1235,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
 	if (!s_new)
 		close_bdev_exclusive(sd.bdev, mode);
 
-	mnt->mnt_sb = s;
-	mnt->mnt_root = root_dentry;
-	return 0;
+	return root_dentry;
 
  failed_super:
 	deactivate_locked_super(s);
@@ -1245,13 +1243,13 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
  failed:
 	if (!s_new)
 		close_bdev_exclusive(sd.bdev, mode);
-	return err;
+	return ERR_PTR(err);
 }
 
 struct file_system_type nilfs_fs_type = {
 	.owner    = THIS_MODULE,
 	.name     = "nilfs2",
-	.get_sb   = nilfs_get_sb,
+	.mount    = nilfs_mount,
 	.kill_sb  = kill_block_super,
 	.fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/notify/Kconfig b/fs/notify/Kconfig
index b388443c3a0..22c629eedd8 100644
--- a/fs/notify/Kconfig
+++ b/fs/notify/Kconfig
@@ -3,4 +3,4 @@ config FSNOTIFY
 
 source "fs/notify/dnotify/Kconfig"
 source "fs/notify/inotify/Kconfig"
-#source "fs/notify/fanotify/Kconfig"
+source "fs/notify/fanotify/Kconfig"
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 85366c78cc3..f35794b97e8 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq, event->response);
+	wait_event(group->fanotify_data.access_waitq, event->response ||
+				atomic_read(&group->fanotify_data.bypass_perm));
+
+	if (!event->response) /* bypass_perm set */
+		return 0;
 
 	/* userspace responded, convert to something usable */
 	spin_lock(&event->lock);
@@ -131,6 +135,7 @@ static int fanotify_handle_event(struct fsnotify_group *group,
 	BUILD_BUG_ON(FAN_Q_OVERFLOW != FS_Q_OVERFLOW);
 	BUILD_BUG_ON(FAN_OPEN_PERM != FS_OPEN_PERM);
 	BUILD_BUG_ON(FAN_ACCESS_PERM != FS_ACCESS_PERM);
+	BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
@@ -160,20 +165,21 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
 				       __u32 event_mask, void *data, int data_type)
 {
 	__u32 marks_mask, marks_ignored_mask;
+	struct path *path = data;
 
 	pr_debug("%s: group=%p to_tell=%p inode_mark=%p vfsmnt_mark=%p "
 		 "mask=%x data=%p data_type=%d\n", __func__, group, to_tell,
 		 inode_mark, vfsmnt_mark, event_mask, data, data_type);
 
-	/* sorry, fanotify only gives a damn about files and dirs */
-	if (!S_ISREG(to_tell->i_mode) &&
-	    !S_ISDIR(to_tell->i_mode))
-		return false;
-
 	/* if we don't have enough info to send an event to userspace say no */
 	if (data_type != FSNOTIFY_EVENT_PATH)
 		return false;
 
+	/* sorry, fanotify only gives a damn about files and dirs */
+	if (!S_ISREG(path->dentry->d_inode->i_mode) &&
+	    !S_ISDIR(path->dentry->d_inode->i_mode))
+		return false;
+
 	if (inode_mark && vfsmnt_mark) {
 		marks_mask = (vfsmnt_mark->mask | inode_mark->mask);
 		marks_ignored_mask = (vfsmnt_mark->ignored_mask | inode_mark->ignored_mask);
@@ -194,16 +200,29 @@ static bool fanotify_should_send_event(struct fsnotify_group *group,
 		BUG();
 	}
 
+	if (S_ISDIR(path->dentry->d_inode->i_mode) &&
+	    (marks_ignored_mask & FS_ISDIR))
+		return false;
+
 	if (event_mask & marks_mask & ~marks_ignored_mask)
 		return true;
 
 	return false;
 }
 
+static void fanotify_free_group_priv(struct fsnotify_group *group)
+{
+	struct user_struct *user;
+
+	user = group->fanotify_data.user;
+	atomic_dec(&user->fanotify_listeners);
+	free_uid(user);
+}
+
 const struct fsnotify_ops fanotify_fsnotify_ops = {
 	.handle_event = fanotify_handle_event,
 	.should_send_event = fanotify_should_send_event,
-	.free_group_priv = NULL,
+	.free_group_priv = fanotify_free_group_priv,
 	.free_event_priv = NULL,
 	.freeing_mark = NULL,
 };
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index bbcb98e7fcc..8b61220cffc 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -16,6 +16,10 @@
 
 #include <asm/ioctls.h>
 
+#define FANOTIFY_DEFAULT_MAX_EVENTS	16384
+#define FANOTIFY_DEFAULT_MAX_MARKS	8192
+#define FANOTIFY_DEFAULT_MAX_LISTENERS	128
+
 extern const struct fsnotify_ops fanotify_fsnotify_ops;
 
 static struct kmem_cache *fanotify_mark_cache __read_mostly;
@@ -102,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 	return client_fd;
 }
 
-static ssize_t fill_event_metadata(struct fsnotify_group *group,
+static int fill_event_metadata(struct fsnotify_group *group,
 				   struct fanotify_event_metadata *metadata,
 				   struct fsnotify_event *event)
 {
+	int ret = 0;
+
 	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
 		 group, metadata, event);
 
 	metadata->event_len = FAN_EVENT_METADATA_LEN;
+	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
 	metadata->pid = pid_vnr(event->tgid);
-	metadata->fd = create_fd(group, event);
+	if (unlikely(event->mask & FAN_Q_OVERFLOW))
+		metadata->fd = FAN_NOFD;
+	else {
+		metadata->fd = create_fd(group, event);
+		if (metadata->fd < 0)
+			ret = metadata->fd;
+	}
 
-	return metadata->fd;
+	return ret;
 }
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -196,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	if (group->fanotify_data.bypass_perm) {
+	if (atomic_read(&group->fanotify_data.bypass_perm)) {
 		mutex_unlock(&group->fanotify_data.access_mutex);
 		kmem_cache_free(fanotify_response_event_cache, re);
 		event->response = FAN_ALLOW;
@@ -253,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	fd = fill_event_metadata(group, &fanotify_event_metadata, event);
-	if (fd < 0)
-		return fd;
+	ret = fill_event_metadata(group, &fanotify_event_metadata, event);
+	if (ret < 0)
+		goto out;
 
+	fd = fanotify_event_metadata.fd;
 	ret = prepare_for_access_response(group, event, fd);
 	if (ret)
 		goto out_close_fd;
 
 	ret = -EFAULT;
-	if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
+	if (copy_to_user(buf, &fanotify_event_metadata,
+			 fanotify_event_metadata.event_len))
 		goto out_kill_access_response;
 
-	return FAN_EVENT_METADATA_LEN;
+	return fanotify_event_metadata.event_len;
 
 out_kill_access_response:
 	remove_access_response(group, event, fd);
 out_close_fd:
-	sys_close(fd);
+	if (fd != FAN_NOFD)
+		sys_close(fd);
+out:
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (event->mask & FAN_ALL_PERM_EVENTS) {
+		event->response = FAN_DENY;
+		wake_up(&group->fanotify_data.access_waitq);
+	}
+#endif
 	return ret;
 }
 
@@ -326,7 +349,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 		ret = -EAGAIN;
 		if (file->f_flags & O_NONBLOCK)
 			break;
-		ret = -EINTR;
+		ret = -ERESTARTSYS;
 		if (signal_pending(current))
 			break;
 
@@ -372,14 +395,13 @@ static ssize_t fanotify_write(struct file *file, const char __user *buf, size_t
 static int fanotify_release(struct inode *ignored, struct file *file)
 {
 	struct fsnotify_group *group = file->private_data;
-	struct fanotify_response_event *re, *lre;
-
-	pr_debug("%s: file=%p group=%p\n", __func__, file, group);
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	struct fanotify_response_event *re, *lre;
+
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	group->fanotify_data.bypass_perm = true;
+	atomic_inc(&group->fanotify_data.bypass_perm);
 
 	list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
 		pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -554,18 +576,24 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
 				       __u32 mask,
 				       unsigned int flags)
 {
-	__u32 oldmask;
+	__u32 oldmask = -1;
 
 	spin_lock(&fsn_mark->lock);
 	if (!(flags & FAN_MARK_IGNORED_MASK)) {
 		oldmask = fsn_mark->mask;
 		fsnotify_set_mark_mask_locked(fsn_mark, (oldmask | mask));
 	} else {
-		oldmask = fsn_mark->ignored_mask;
-		fsnotify_set_mark_ignored_mask_locked(fsn_mark, (oldmask | mask));
+		__u32 tmask = fsn_mark->ignored_mask | mask;
+		fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
 		if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
 			fsn_mark->flags |= FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY;
 	}
+
+	if (!(flags & FAN_MARK_ONDIR)) {
+		__u32 tmask = fsn_mark->ignored_mask | FAN_ONDIR;
+		fsnotify_set_mark_ignored_mask_locked(fsn_mark, tmask);
+	}
+
 	spin_unlock(&fsn_mark->lock);
 
 	return mask & ~oldmask;
@@ -577,10 +605,12 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
+	int ret = 0;
 
 	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
 	if (!fsn_mark) {
-		int ret;
+		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+			return -ENOSPC;
 
 		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 		if (!fsn_mark)
@@ -588,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
-		if (ret) {
-			fanotify_free_mark(fsn_mark);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	fsnotify_put_mark(fsn_mark);
+
 	if (added & ~mnt->mnt_fsnotify_mask)
 		fsnotify_recalc_vfsmount_mask(mnt);
-
-	return 0;
+err:
+	fsnotify_put_mark(fsn_mark);
+	return ret;
 }
 
 static int fanotify_add_inode_mark(struct fsnotify_group *group,
@@ -607,12 +636,24 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
+	int ret = 0;
 
 	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
+	/*
+	 * If some other task has this inode open for write we should not add
+	 * an ignored mark, unless that ignored mark is supposed to survive
+	 * modification changes anyway.
+	 */
+	if ((flags & FAN_MARK_IGNORED_MASK) &&
+	    !(flags & FAN_MARK_IGNORED_SURV_MODIFY) &&
+	    (atomic_read(&inode->i_writecount) > 0))
+		return 0;
+
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark) {
-		int ret;
+		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
+			return -ENOSPC;
 
 		fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 		if (!fsn_mark)
@@ -620,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
-		if (ret) {
-			fanotify_free_mark(fsn_mark);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	fsnotify_put_mark(fsn_mark);
+
 	if (added & ~inode->i_fsnotify_mask)
 		fsnotify_recalc_inode_mask(inode);
-	return 0;
+err:
+	fsnotify_put_mark(fsn_mark);
+	return ret;
 }
 
 /* fanotify syscalls */
@@ -637,6 +678,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 {
 	struct fsnotify_group *group;
 	int f_flags, fd;
+	struct user_struct *user;
 
 	pr_debug("%s: flags=%d event_f_flags=%d\n",
 		__func__, flags, event_f_flags);
@@ -647,6 +689,12 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	if (flags & ~FAN_ALL_INIT_FLAGS)
 		return -EINVAL;
 
+	user = get_current_user();
+	if (atomic_read(&user->fanotify_listeners) > FANOTIFY_DEFAULT_MAX_LISTENERS) {
+		free_uid(user);
+		return -EMFILE;
+	}
+
 	f_flags = O_RDWR | FMODE_NONOTIFY;
 	if (flags & FAN_CLOEXEC)
 		f_flags |= O_CLOEXEC;
@@ -655,15 +703,53 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 
 	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
 	group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
-	if (IS_ERR(group))
+	if (IS_ERR(group)) {
+		free_uid(user);
 		return PTR_ERR(group);
+	}
+
+	group->fanotify_data.user = user;
+	atomic_inc(&user->fanotify_listeners);
 
 	group->fanotify_data.f_flags = event_f_flags;
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	mutex_init(&group->fanotify_data.access_mutex);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+	atomic_set(&group->fanotify_data.bypass_perm, 0);
 #endif
+	switch (flags & FAN_ALL_CLASS_BITS) {
+	case FAN_CLASS_NOTIF:
+		group->priority = FS_PRIO_0;
+		break;
+	case FAN_CLASS_CONTENT:
+		group->priority = FS_PRIO_1;
+		break;
+	case FAN_CLASS_PRE_CONTENT:
+		group->priority = FS_PRIO_2;
+		break;
+	default:
+		fd = -EINVAL;
+		goto out_put_group;
+	}
+
+	if (flags & FAN_UNLIMITED_QUEUE) {
+		fd = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out_put_group;
+		group->max_events = UINT_MAX;
+	} else {
+		group->max_events = FANOTIFY_DEFAULT_MAX_EVENTS;
+	}
+
+	if (flags & FAN_UNLIMITED_MARKS) {
+		fd = -EPERM;
+		if (!capable(CAP_SYS_ADMIN))
+			goto out_put_group;
+		group->fanotify_data.max_marks = UINT_MAX;
+	} else {
+		group->fanotify_data.max_marks = FANOTIFY_DEFAULT_MAX_MARKS;
+	}
 
 	fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 	if (fd < 0)
@@ -697,13 +783,21 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	if (flags & ~FAN_ALL_MARK_FLAGS)
 		return -EINVAL;
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
-	case FAN_MARK_ADD:
+	case FAN_MARK_ADD:		/* fallthrough */
 	case FAN_MARK_REMOVE:
+		if (!mask)
+			return -EINVAL;
 	case FAN_MARK_FLUSH:
 		break;
 	default:
 		return -EINVAL;
 	}
+
+	if (mask & FAN_ONDIR) {
+		flags |= FAN_MARK_ONDIR;
+		mask &= ~FAN_ONDIR;
+	}
+
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
 	if (mask & ~(FAN_ALL_EVENTS | FAN_ALL_PERM_EVENTS | FAN_EVENT_ON_CHILD))
 #else
@@ -719,6 +813,16 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	ret = -EINVAL;
 	if (unlikely(filp->f_op != &fanotify_fops))
 		goto fput_and_out;
+	group = filp->private_data;
+
+	/*
+	 * group->priority == FS_PRIO_0 == FAN_CLASS_NOTIF.  These are not
+	 * allowed to set permissions events.
+	 */
+	ret = -EINVAL;
+	if (mask & FAN_ALL_PERM_EVENTS &&
+	    group->priority == FS_PRIO_0)
+		goto fput_and_out;
 
 	ret = fanotify_find_path(dfd, pathname, &path, flags);
 	if (ret)
@@ -729,7 +833,6 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 		inode = path.dentry->d_inode;
 	else
 		mnt = path.mnt;
-	group = filp->private_data;
 
 	/* create/update an inode mark */
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 36802420d69..20dc218707c 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -84,59 +84,39 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 }
 
 /* Notify this dentry's parent about a child's events. */
-void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
+int __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
 {
 	struct dentry *parent;
 	struct inode *p_inode;
-	bool send = false;
-	bool should_update_children = false;
+	int ret = 0;
 
 	if (!dentry)
 		dentry = path->dentry;
 
 	if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
-		return;
+		return 0;
 
-	spin_lock(&dentry->d_lock);
-	parent = dentry->d_parent;
+	parent = dget_parent(dentry);
 	p_inode = parent->d_inode;
 
-	if (fsnotify_inode_watches_children(p_inode)) {
-		if (p_inode->i_fsnotify_mask & mask) {
-			dget(parent);
-			send = true;
-		}
-	} else {
-		/*
-		 * The parent doesn't care about events on it's children but
-		 * at least one child thought it did.  We need to run all the
-		 * children and update their d_flags to let them know p_inode
-		 * doesn't care about them any more.
-		 */
-		dget(parent);
-		should_update_children = true;
-	}
-
-	spin_unlock(&dentry->d_lock);
-
-	if (send) {
+	if (unlikely(!fsnotify_inode_watches_children(p_inode)))
+		__fsnotify_update_child_dentry_flags(p_inode);
+	else if (p_inode->i_fsnotify_mask & mask) {
 		/* we are notifying a parent so come up with the new mask which
 		 * specifies these are events which came from a child. */
 		mask |= FS_EVENT_ON_CHILD;
 
 		if (path)
-			fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
-				 dentry->d_name.name, 0);
+			ret = fsnotify(p_inode, mask, path, FSNOTIFY_EVENT_PATH,
+				       dentry->d_name.name, 0);
 		else
-			fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
-				 dentry->d_name.name, 0);
-		dput(parent);
+			ret = fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
+				       dentry->d_name.name, 0);
 	}
 
-	if (unlikely(should_update_children)) {
-		__fsnotify_update_child_dentry_flags(p_inode);
-		dput(parent);
-	}
+	dput(parent);
+
+	return ret;
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
@@ -275,20 +255,23 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
 		if (inode_group > vfsmount_group) {
 			/* handle inode */
-			send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
-				      data_is, cookie, file_name, &event);
+			ret = send_to_group(to_tell, NULL, inode_mark, NULL, mask, data,
+					    data_is, cookie, file_name, &event);
 			/* we didn't use the vfsmount_mark */
 			vfsmount_group = NULL;
 		} else if (vfsmount_group > inode_group) {
-			send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
-				      data_is, cookie, file_name, &event);
+			ret = send_to_group(to_tell, mnt, NULL, vfsmount_mark, mask, data,
+					    data_is, cookie, file_name, &event);
 			inode_group = NULL;
 		} else {
-			send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
-				      mask, data, data_is, cookie, file_name,
-				      &event);
+			ret = send_to_group(to_tell, mnt, inode_mark, vfsmount_mark,
+					    mask, data, data_is, cookie, file_name,
+					    &event);
 		}
 
+		if (ret && (mask & ALL_FSNOTIFY_PERM_EVENTS))
+			goto out;
+
 		if (inode_group)
 			inode_node = srcu_dereference(inode_node->next,
 						      &fsnotify_mark_srcu);
@@ -296,7 +279,8 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 			vfsmount_node = srcu_dereference(vfsmount_node->next,
 							 &fsnotify_mark_srcu);
 	}
-
+	ret = 0;
+out:
 	srcu_read_unlock(&fsnotify_mark_srcu, idx);
 	/*
 	 * fsnotify_create_event() took a reference so the event can't be cleaned
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index 33297c00506..4c29fcf557d 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -177,7 +177,8 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
  * Attach an initialized mark to a given inode.
  * These marks may be used for the fsnotify backend to determine which
  * event types should be delivered to which group and for which inodes.  These
- * marks are ordered according to the group's location in memory.
+ * marks are ordered according to priority, highest number first, and then by
+ * the group's location in memory.
  */
 int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			    struct fsnotify_group *group, struct inode *inode,
@@ -211,7 +212,11 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			goto out;
 		}
 
-		if (mark->group < lmark->group)
+		if (mark->group->priority < lmark->group->priority)
+			continue;
+
+		if ((mark->group->priority == lmark->group->priority) &&
+		    (mark->group < lmark->group))
 			continue;
 
 		hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
@@ -240,6 +245,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
 {
 	struct inode *inode, *next_i, *need_iput = NULL;
 
+	spin_lock(&inode_lock);
 	list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
 		struct inode *need_iput_tmp;
 
@@ -297,4 +303,5 @@ void fsnotify_unmount_inodes(struct list_head *list)
 
 		spin_lock(&inode_lock);
 	}
+	spin_unlock(&inode_lock);
 }
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 24edc1185d5..4cd5d5d78f9 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	if (ret >= 0)
 		return ret;
 
+	fsnotify_put_group(group);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
@@ -862,7 +863,7 @@ static int __init inotify_user_setup(void)
 	BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW);
 	BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED);
 	BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK);
-	BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR);
+	BUILD_BUG_ON(IN_ISDIR != FS_ISDIR);
 	BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT);
 
 	BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21);
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index 56772b578fb..85eebff6d0d 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -169,7 +169,11 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 			goto out;
 		}
 
-		if (mark->group < lmark->group)
+		if (mark->group->priority < lmark->group->priority)
+			continue;
+
+		if ((mark->group->priority == lmark->group->priority) &&
+		    (mark->group < lmark->group))
 			continue;
 
 		hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 19c5180f8a2..a30ecacc01f 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2911,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
 		goto unl_upcase_iput_tmp_ino_err_out_now;
 	}
 	if ((sb->s_root = d_alloc_root(vol->root_ino))) {
-		/* We increment i_count simulating an ntfs_iget(). */
-		atomic_inc(&vol->root_ino->i_count);
+		/* We grab a reference, simulating an ntfs_iget(). */
+		ihold(vol->root_ino);
 		ntfs_debug("Exiting, status successful.");
 		/* Release the default upcase if it has no users. */
 		mutex_lock(&ntfs_lock);
@@ -3021,21 +3021,6 @@ iput_tmp_ino_err_out_now:
 	if (vol->mft_ino && vol->mft_ino != tmp_ino)
 		iput(vol->mft_ino);
 	vol->mft_ino = NULL;
-	/*
-	 * This is needed to get ntfs_clear_extent_inode() called for each
-	 * inode we have ever called ntfs_iget()/iput() on, otherwise we A)
-	 * leak resources and B) a subsequent mount fails automatically due to
-	 * ntfs_iget() never calling down into our ntfs_read_locked_inode()
-	 * method again... FIXME: Do we need to do this twice now because of
-	 * attribute inodes? I think not, so leave as is for now... (AIA)
-	 */
-	if (invalidate_inodes(sb)) {
-		ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
-				"driver bug.");
-		/* Copied from fs/super.c. I just love this message. (-; */
-		printk("NTFS: Busy inodes after umount. Self-destruct in 5 "
-				"seconds.  Have a nice day...\n");
-	}
 	/* Errors at this stage are irrelevant. */
 err_out_now:
 	sb->s_fs_info = NULL;
@@ -3074,17 +3059,16 @@ struct kmem_cache *ntfs_index_ctx_cache;
 /* Driver wide mutex. */
 DEFINE_MUTEX(ntfs_lock);
 
-static int ntfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ntfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ntfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ntfs_fill_super);
 }
 
 static struct file_system_type ntfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ntfs",
-	.get_sb		= ntfs_get_sb,
+	.mount		= ntfs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5cfeee11815..0d7c5540ad6 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
 	 * ocfs2 never allocates in this function - the only time we
 	 * need to use BH_New is when we're extending i_size on a file
 	 * system which doesn't support holes, in which case BH_New
-	 * allows block_prepare_write() to zero.
+	 * allows __block_write_begin() to zero.
 	 *
 	 * If we see this on a sparse file system, then a truncate has
 	 * raced us and removed the cluster. In this case, we clear
@@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
 	return ret;
 }
 
-/*
- * This is called from ocfs2_write_zero_page() which has handled it's
- * own cluster locking and has ensured allocation exists for those
- * blocks to be written.
- */
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
-			       unsigned from, unsigned to)
-{
-	int ret;
-
-	ret = block_prepare_write(page, from, to, ocfs2_get_block);
-
-	return ret;
-}
-
 /* Taken from ext3. We don't necessarily need the full blown
  * functionality yet, but IMHO it's better to cut and paste the whole
  * thing so we can avoid introducing our own bugs (and easily pick up
@@ -588,11 +573,14 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
 	/* this io's submitter should not have unlocked this before we could */
 	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
 
+	if (ocfs2_iocb_is_sem_locked(iocb)) {
+		up_read(&inode->i_alloc_sem);
+		ocfs2_iocb_clear_sem_locked(iocb);
+	}
+
 	ocfs2_iocb_clear_rw_locked(iocb);
 
 	level = ocfs2_iocb_rw_locked_level(iocb);
-	if (!level)
-		up_read(&inode->i_alloc_sem);
 	ocfs2_rw_unlock(inode, level);
 
 	if (is_async)
@@ -732,7 +720,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
 }
 
 /*
- * Some of this taken from block_prepare_write(). We already have our
+ * Some of this taken from __block_write_begin(). We already have our
  * mapping by now though, and the entire write will be allocating or
  * it won't, so not much need to use BH_New.
  *
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 7606f663da6..eceb456037c 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,9 +22,6 @@
 #ifndef OCFS2_AOPS_H
 #define OCFS2_AOPS_H
 
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
-			       unsigned from, unsigned to);
-
 handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
 							 struct page *page,
 							 unsigned from,
@@ -71,8 +68,27 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
 	else
 		clear_bit(1, (unsigned long *)&iocb->private);
 }
+
+/*
+ * Using a named enum representing lock types in terms of #N bit stored in
+ * iocb->private, which is going to be used for communication bewteen
+ * ocfs2_dio_end_io() and ocfs2_file_aio_write/read().
+ */
+enum ocfs2_iocb_lock_bits {
+	OCFS2_IOCB_RW_LOCK = 0,
+	OCFS2_IOCB_RW_LOCK_LEVEL,
+	OCFS2_IOCB_SEM,
+	OCFS2_IOCB_NUM_LOCKS
+};
+
 #define ocfs2_iocb_clear_rw_locked(iocb) \
-	clear_bit(0, (unsigned long *)&iocb->private)
+	clear_bit(OCFS2_IOCB_RW_LOCK, (unsigned long *)&iocb->private)
 #define ocfs2_iocb_rw_locked_level(iocb) \
-	test_bit(1, (unsigned long *)&iocb->private)
+	test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_set_sem_locked(iocb) \
+	set_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_clear_sem_locked(iocb) \
+	clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
+#define ocfs2_iocb_is_sem_locked(iocb) \
+	test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
 #endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index 52c7557f3e2..9f26ac9be2a 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -1964,8 +1964,10 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
 	if (reg == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	if (strlen(name) > O2HB_MAX_REGION_NAME_LEN)
-		return ERR_PTR(-ENAMETOOLONG);
+	if (strlen(name) > O2HB_MAX_REGION_NAME_LEN) {
+		ret = -ENAMETOOLONG;
+		goto free;
+	}
 
 	spin_lock(&o2hb_live_lock);
 	reg->hr_region_num = 0;
@@ -1974,7 +1976,8 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
 							 O2NM_MAX_REGIONS);
 		if (reg->hr_region_num >= O2NM_MAX_REGIONS) {
 			spin_unlock(&o2hb_live_lock);
-			return ERR_PTR(-EFBIG);
+			ret = -EFBIG;
+			goto free;
 		}
 		set_bit(reg->hr_region_num, o2hb_region_bitmap);
 	}
@@ -1986,10 +1989,13 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
 	ret = o2hb_debug_region_init(reg, o2hb_debug_dir);
 	if (ret) {
 		config_item_put(&reg->hr_item);
-		return ERR_PTR(ret);
+		goto free;
 	}
 
 	return &reg->hr_item;
+free:
+	kfree(reg);
+	return ERR_PTR(ret);
 }
 
 static void o2hb_heartbeat_group_drop_item(struct config_group *group,
diff --git a/fs/ocfs2/cluster/masklog.c b/fs/ocfs2/cluster/masklog.c
index c7fba396392..6c61771469a 100644
--- a/fs/ocfs2/cluster/masklog.c
+++ b/fs/ocfs2/cluster/masklog.c
@@ -113,10 +113,11 @@ static struct mlog_attribute mlog_attrs[MLOG_MAX_BITS] = {
 	define_mask(QUOTA),
 	define_mask(REFCOUNT),
 	define_mask(BASTS),
+	define_mask(RESERVATIONS),
+	define_mask(CLUSTER),
 	define_mask(ERROR),
 	define_mask(NOTICE),
 	define_mask(KTHREAD),
-	define_mask(RESERVATIONS),
 };
 
 static struct attribute *mlog_attr_ptrs[MLOG_MAX_BITS] = {NULL, };
diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h
index ea2ed9f56c9..34d6544357d 100644
--- a/fs/ocfs2/cluster/masklog.h
+++ b/fs/ocfs2/cluster/masklog.h
@@ -81,7 +81,7 @@
 #include <linux/sched.h>
 
 /* bits that are frequently given and infrequently matched in the low word */
-/* NOTE: If you add a flag, you need to also update mlog.c! */
+/* NOTE: If you add a flag, you need to also update masklog.c! */
 #define ML_ENTRY	0x0000000000000001ULL /* func call entry */
 #define ML_EXIT		0x0000000000000002ULL /* func call exit */
 #define ML_TCP		0x0000000000000004ULL /* net cluster/tcp.c */
@@ -114,13 +114,14 @@
 #define ML_XATTR	0x0000000020000000ULL /* ocfs2 extended attributes */
 #define ML_QUOTA	0x0000000040000000ULL /* ocfs2 quota operations */
 #define ML_REFCOUNT	0x0000000080000000ULL /* refcount tree operations */
-#define ML_BASTS	0x0000001000000000ULL /* dlmglue asts and basts */
+#define ML_BASTS	0x0000000100000000ULL /* dlmglue asts and basts */
+#define ML_RESERVATIONS	0x0000000200000000ULL /* ocfs2 alloc reservations */
+#define ML_CLUSTER	0x0000000400000000ULL /* cluster stack */
+
 /* bits that are infrequently given and frequently matched in the high word */
-#define ML_ERROR	0x0000000100000000ULL /* sent to KERN_ERR */
-#define ML_NOTICE	0x0000000200000000ULL /* setn to KERN_NOTICE */
-#define ML_KTHREAD	0x0000000400000000ULL /* kernel thread activity */
-#define ML_RESERVATIONS	0x0000000800000000ULL /* ocfs2 alloc reservations */
-#define ML_CLUSTER	0x0000001000000000ULL /* cluster stack */
+#define ML_ERROR	0x1000000000000000ULL /* sent to KERN_ERR */
+#define ML_NOTICE	0x2000000000000000ULL /* setn to KERN_NOTICE */
+#define ML_KTHREAD	0x4000000000000000ULL /* kernel thread activity */
 
 #define MLOG_INITIAL_AND_MASK (ML_ERROR|ML_NOTICE)
 #define MLOG_INITIAL_NOT_MASK (ML_ENTRY|ML_EXIT)
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index edaded48e7e..895532ac4d9 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -476,7 +476,6 @@ static void ocfs2_dentry_iput(struct dentry *dentry, struct inode *inode)
 
 out:
 	iput(inode);
-	ocfs2_dentry_attach_gen(dentry);
 }
 
 /*
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index c49f6de0e7a..d417b3f9b0c 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2461,8 +2461,10 @@ static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
 
 	di->i_dx_root = cpu_to_le64(dr_blkno);
 
+	spin_lock(&OCFS2_I(dir)->ip_lock);
 	OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
 	di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+	spin_unlock(&OCFS2_I(dir)->ip_lock);
 
 	ocfs2_journal_dirty(handle, di_bh);
 
@@ -4466,8 +4468,10 @@ static int ocfs2_dx_dir_remove_index(struct inode *dir,
 		goto out_commit;
 	}
 
+	spin_lock(&OCFS2_I(dir)->ip_lock);
 	OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
 	di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
+	spin_unlock(&OCFS2_I(dir)->ip_lock);
 	di->i_dx_root = cpu_to_le64(0ULL);
 
 	ocfs2_journal_dirty(handle, di_bh);
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 58a93b95373..cc2aaa96cfe 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -959,7 +959,7 @@ static int dlm_match_regions(struct dlm_ctxt *dlm,
 		r += O2HB_MAX_REGION_NAME_LEN;
 	}
 
-	local = kmalloc(sizeof(qr->qr_regions), GFP_KERNEL);
+	local = kmalloc(sizeof(qr->qr_regions), GFP_ATOMIC);
 	if (!local) {
 		status = -ENOMEM;
 		goto bail;
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index f564b0e5f80..59f0f6bdfc6 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2346,7 +2346,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
  */
 static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
 				      struct dlm_lock_resource *res,
-				      int *numlocks)
+				      int *numlocks,
+				      int *hasrefs)
 {
 	int ret;
 	int i;
@@ -2356,6 +2357,9 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
 
 	assert_spin_locked(&res->spinlock);
 
+	*numlocks = 0;
+	*hasrefs = 0;
+
 	ret = -EINVAL;
 	if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
 		mlog(0, "cannot migrate lockres with unknown owner!\n");
@@ -2386,7 +2390,13 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
 	}
 
 	*numlocks = count;
-	mlog(0, "migrateable lockres having %d locks\n", *numlocks);
+
+	count = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+	if (count < O2NM_MAX_NODES)
+		*hasrefs = 1;
+
+	mlog(0, "%s: res %.*s, Migrateable, locks %d, refs %d\n", dlm->name,
+	     res->lockname.len, res->lockname.name, *numlocks, *hasrefs);
 
 leave:
 	return ret;
@@ -2408,7 +2418,7 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	const char *name;
 	unsigned int namelen;
 	int mle_added = 0;
-	int numlocks;
+	int numlocks, hasrefs;
 	int wake = 0;
 
 	if (!dlm_grab(dlm))
@@ -2417,13 +2427,13 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	name = res->lockname.name;
 	namelen = res->lockname.len;
 
-	mlog(0, "migrating %.*s to %u\n", namelen, name, target);
+	mlog(0, "%s: Migrating %.*s to %u\n", dlm->name, namelen, name, target);
 
 	/*
 	 * ensure this lockres is a proper candidate for migration
 	 */
 	spin_lock(&res->spinlock);
-	ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
+	ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
 	if (ret < 0) {
 		spin_unlock(&res->spinlock);
 		goto leave;
@@ -2431,10 +2441,8 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	spin_unlock(&res->spinlock);
 
 	/* no work to do */
-	if (numlocks == 0) {
-		mlog(0, "no locks were found on this lockres! done!\n");
+	if (numlocks == 0 && !hasrefs)
 		goto leave;
-	}
 
 	/*
 	 * preallocate up front
@@ -2459,14 +2467,14 @@ static int dlm_migrate_lockres(struct dlm_ctxt *dlm,
 	 * find a node to migrate the lockres to
 	 */
 
-	mlog(0, "picking a migration node\n");
 	spin_lock(&dlm->spinlock);
 	/* pick a new node */
 	if (!test_bit(target, dlm->domain_map) ||
 	    target >= O2NM_MAX_NODES) {
 		target = dlm_pick_migration_target(dlm, res);
 	}
-	mlog(0, "node %u chosen for migration\n", target);
+	mlog(0, "%s: res %.*s, Node %u chosen for migration\n", dlm->name,
+	     namelen, name, target);
 
 	if (target >= O2NM_MAX_NODES ||
 	    !test_bit(target, dlm->domain_map)) {
@@ -2667,7 +2675,7 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 {
 	int ret;
 	int lock_dropped = 0;
-	int numlocks;
+	int numlocks, hasrefs;
 
 	spin_lock(&res->spinlock);
 	if (res->owner != dlm->node_num) {
@@ -2681,8 +2689,8 @@ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
 	}
 
 	/* No need to migrate a lockres having no locks */
-	ret = dlm_is_lockres_migrateable(dlm, res, &numlocks);
-	if (ret >= 0 && numlocks == 0) {
+	ret = dlm_is_lockres_migrateable(dlm, res, &numlocks, &hasrefs);
+	if (ret >= 0 && numlocks == 0 && !hasrefs) {
 		spin_unlock(&res->spinlock);
 		goto leave;
 	}
@@ -2915,6 +2923,12 @@ static u8 dlm_pick_migration_target(struct dlm_ctxt *dlm,
 		}
 		queue++;
 	}
+
+	nodenum = find_next_bit(res->refmap, O2NM_MAX_NODES, 0);
+	if (nodenum < O2NM_MAX_NODES) {
+		spin_unlock(&res->spinlock);
+		return nodenum;
+	}
 	spin_unlock(&res->spinlock);
 	mlog(0, "have not found a suitable target yet! checking domain map\n");
 
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index a7ebd9d42dc..b2df490a19e 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 	if (inode) {
 		ip = DLMFS_I(inode);
 
+		inode->i_ino = get_next_ino();
 		inode->i_mode = mode;
 		inode->i_uid = current_fsuid();
 		inode->i_gid = current_fsgid();
@@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
 	if (!inode)
 		return NULL;
 
+	inode->i_ino = get_next_ino();
 	inode->i_mode = mode;
 	inode->i_uid = current_fsuid();
 	inode->i_gid = current_fsgid();
@@ -641,16 +643,16 @@ static const struct inode_operations dlmfs_file_inode_operations = {
 	.setattr	= dlmfs_file_setattr,
 };
 
-static int dlmfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *dlmfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, dlmfs_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, dlmfs_fill_super);
 }
 
 static struct file_system_type dlmfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ocfs2_dlmfs",
-	.get_sb		= dlmfs_get_sb,
+	.mount		= dlmfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 1ca6867935b..f6cba566429 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -796,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
 		block_end = block_start + (1 << inode->i_blkbits);
 
 		/*
-		 * block_start is block-aligned.  Bump it by one to
-		 * force ocfs2_{prepare,commit}_write() to zero the
+		 * block_start is block-aligned.  Bump it by one to force
+		 * __block_write_begin and block_commit_write to zero the
 		 * whole block.
 		 */
-		ret = ocfs2_prepare_write_nolock(inode, page,
-						 block_start + 1,
-						 block_start + 1);
+		ret = __block_write_begin(page, block_start + 1, 0,
+					  ocfs2_get_block);
 		if (ret < 0) {
 			mlog_errno(ret);
 			goto out_unlock;
@@ -2242,11 +2241,15 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
 
 	mutex_lock(&inode->i_mutex);
 
+	ocfs2_iocb_clear_sem_locked(iocb);
+
 relock:
 	/* to match setattr's i_mutex -> i_alloc_sem -> rw_lock ordering */
 	if (direct_io) {
 		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
+		/* communicate with ocfs2_dio_end_io */
+		ocfs2_iocb_set_sem_locked(iocb);
 	}
 
 	/*
@@ -2383,8 +2386,10 @@ out:
 		ocfs2_rw_unlock(inode, rw_level);
 
 out_sems:
-	if (have_alloc_sem)
+	if (have_alloc_sem) {
 		up_read(&inode->i_alloc_sem);
+		ocfs2_iocb_clear_sem_locked(iocb);
+	}
 
 	mutex_unlock(&inode->i_mutex);
 
@@ -2528,6 +2533,8 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 		goto bail;
 	}
 
+	ocfs2_iocb_clear_sem_locked(iocb);
+
 	/*
 	 * buffered reads protect themselves in ->readpage().  O_DIRECT reads
 	 * need locks to protect pending reads from racing with truncate.
@@ -2535,6 +2542,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	if (filp->f_flags & O_DIRECT) {
 		down_read(&inode->i_alloc_sem);
 		have_alloc_sem = 1;
+		ocfs2_iocb_set_sem_locked(iocb);
 
 		ret = ocfs2_rw_lock(inode, 0);
 		if (ret < 0) {
@@ -2576,8 +2584,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
 	}
 
 bail:
-	if (have_alloc_sem)
+	if (have_alloc_sem) {
 		up_read(&inode->i_alloc_sem);
+		ocfs2_iocb_clear_sem_locked(iocb);
+	}
 	if (rw_level != -1)
 		ocfs2_rw_unlock(inode, rw_level);
 	mlog_exit(ret);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index e7bde21149a..ff5744e1e36 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -742,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry,
 		goto out_commit;
 	}
 
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	dentry->d_op = &ocfs2_dentry_ops;
 	d_instantiate(dentry, inode);
 
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d8408217e3b..70dd3b1798f 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -159,7 +159,9 @@ struct ocfs2_lock_res {
 	char                     l_name[OCFS2_LOCK_ID_MAX_LEN];
 	unsigned int             l_ro_holders;
 	unsigned int             l_ex_holders;
-	unsigned char            l_level;
+	signed char		 l_level;
+	signed char		 l_requested;
+	signed char		 l_blocking;
 
 	/* Data packed - type enum ocfs2_lock_type */
 	unsigned char            l_type;
@@ -169,8 +171,6 @@ struct ocfs2_lock_res {
 	unsigned char            l_action;
 	/* Data packed - enum type ocfs2_unlock_action */
 	unsigned char            l_unlock_action;
-	unsigned char            l_requested;
-	unsigned char            l_blocking;
 	unsigned int             l_pending_gen;
 
 	spinlock_t               l_lock;
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index c2e4f8222e2..bf2e7764920 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -350,7 +350,7 @@ enum {
 #define OCFS2_LAST_LOCAL_SYSTEM_INODE LOCAL_GROUP_QUOTA_SYSTEM_INODE
 	NUM_SYSTEM_INODES
 };
-#define NUM_GLOBAL_SYSTEM_INODES OCFS2_LAST_GLOBAL_SYSTEM_INODE
+#define NUM_GLOBAL_SYSTEM_INODES OCFS2_FIRST_LOCAL_SYSTEM_INODE
 #define NUM_LOCAL_SYSTEM_INODES	\
 		(NUM_SYSTEM_INODES - OCFS2_FIRST_LOCAL_SYSTEM_INODE)
 
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index 252e7c82f92..a5ebe421195 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -190,7 +190,7 @@ static struct ocfs2_live_connection *ocfs2_connection_find(const char *name)
 			return c;
 	}
 
-	return c;
+	return NULL;
 }
 
 /*
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 56f0cb39582..cfeab7ce369 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -41,7 +41,6 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/quotaops.h>
-#include <linux/smp_lock.h>
 
 #define MLOG_MASK_PREFIX ML_SUPER
 #include <cluster/masklog.h>
@@ -1236,14 +1235,12 @@ read_super_error:
 	return status;
 }
 
-static int ocfs2_get_sb(struct file_system_type *fs_type,
+static struct dentry *ocfs2_mount(struct file_system_type *fs_type,
 			int flags,
 			const char *dev_name,
-			void *data,
-			struct vfsmount *mnt)
+			void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ocfs2_fill_super);
 }
 
 static void ocfs2_kill_sb(struct super_block *sb)
@@ -1267,8 +1264,7 @@ out:
 static struct file_system_type ocfs2_fs_type = {
 	.owner          = THIS_MODULE,
 	.name           = "ocfs2",
-	.get_sb         = ocfs2_get_sb, /* is this called when we mount
-					* the fs? */
+	.mount          = ocfs2_mount,
 	.kill_sb        = ocfs2_kill_sb,
 
 	.fs_flags       = FS_REQUIRES_DEV|FS_RENAME_DOES_D_MOVE,
diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c
index 14a22863291..e043c4cb9a9 100644
--- a/fs/omfs/inode.c
+++ b/fs/omfs/inode.c
@@ -557,17 +557,16 @@ end:
 	return ret;
 }
 
-static int omfs_get_sb(struct file_system_type *fs_type,
-			int flags, const char *dev_name,
-			void *data, struct vfsmount *m)
+static struct dentry *omfs_mount(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, omfs_fill_super, m);
+	return mount_bdev(fs_type, flags, dev_name, data, omfs_fill_super);
 }
 
 static struct file_system_type omfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "omfs",
-	.get_sb = omfs_get_sb,
+	.mount = omfs_mount,
 	.kill_sb = kill_block_super,
 	.fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/open.c b/fs/open.c
index d74e1983e8d..4197b9ed023 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -786,11 +786,11 @@ struct file *nameidata_to_filp(struct nameidata *nd)
 	/* Pick up the filp from the open intent */
 	filp = nd->intent.open.file;
 	/* Has the filesystem initialised the file for us? */
-	if (filp->f_path.dentry == NULL)
+	if (filp->f_path.dentry == NULL) {
+		path_get(&nd->path);
 		filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
 				     NULL, cred);
-	else
-		path_put(&nd->path);
+	}
 	return filp;
 }
 
diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c
index ffcd04f0012..911e61f348f 100644
--- a/fs/openpromfs/inode.c
+++ b/fs/openpromfs/inode.c
@@ -415,16 +415,16 @@ out_no_root:
 	return ret;
 }
 
-static int openprom_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *openprom_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_single(fs_type, flags, data, openprom_fill_super, mnt);
+	return mount_single(fs_type, flags, data, openprom_fill_super);
 }
 
 static struct file_system_type openprom_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "openpromfs",
-	.get_sb		= openprom_get_sb,
+	.mount		= openprom_mount,
 	.kill_sb	= kill_anon_super,
 };
 
diff --git a/fs/pipe.c b/fs/pipe.c
index 37eb1ebeaa9..04629f36e39 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void)
 	if (!inode)
 		goto fail_inode;
 
+	inode->i_ino = get_next_ino();
+
 	pipe = alloc_pipe_info(inode);
 	if (!pipe)
 		goto fail_iput;
@@ -1197,12 +1199,24 @@ int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf,
 	return ret;
 }
 
+/*
+ * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
+ * location, so checking ->i_pipe is not enough to verify that this is a
+ * pipe.
+ */
+struct pipe_inode_info *get_pipe_info(struct file *file)
+{
+	struct inode *i = file->f_path.dentry->d_inode;
+
+	return S_ISFIFO(i->i_mode) ? i->i_pipe : NULL;
+}
+
 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct pipe_inode_info *pipe;
 	long ret;
 
-	pipe = file->f_path.dentry->d_inode->i_pipe;
+	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;
 
@@ -1245,16 +1259,15 @@ out:
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pipe: will go nicely and kill the special-casing in procfs.
  */
-static int pipefs_get_sb(struct file_system_type *fs_type,
-			 int flags, const char *dev_name, void *data,
-			 struct vfsmount *mnt)
+static struct dentry *pipefs_mount(struct file_system_type *fs_type,
+			 int flags, const char *dev_name, void *data)
 {
-	return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt);
+	return mount_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC);
 }
 
 static struct file_system_type pipe_fs_type = {
 	.name		= "pipefs",
-	.get_sb		= pipefs_get_sb,
+	.mount		= pipefs_mount,
 	.kill_sb	= kill_anon_super,
 };
 
diff --git a/fs/proc/Kconfig b/fs/proc/Kconfig
index 50f8f0600f0..6a0068841d9 100644
--- a/fs/proc/Kconfig
+++ b/fs/proc/Kconfig
@@ -33,8 +33,8 @@ config PROC_KCORE
 	depends on PROC_FS && MMU
 
 config PROC_VMCORE
-        bool "/proc/vmcore support (EXPERIMENTAL)"
-        depends on PROC_FS && CRASH_DUMP
+	bool "/proc/vmcore support"
+	depends on PROC_FS && CRASH_DUMP
 	default y
         help
         Exports the dump image of crashed kernel in ELF format.
diff --git a/fs/proc/base.c b/fs/proc/base.c
index dc5d5f51f3f..182845147fe 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -226,7 +226,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
 {
 	struct mm_struct *mm;
 
-	if (mutex_lock_killable(&task->cred_guard_mutex))
+	if (mutex_lock_killable(&task->signal->cred_guard_mutex))
 		return NULL;
 
 	mm = get_task_mm(task);
@@ -235,7 +235,7 @@ struct mm_struct *mm_for_maps(struct task_struct *task)
 		mmput(mm);
 		mm = NULL;
 	}
-	mutex_unlock(&task->cred_guard_mutex);
+	mutex_unlock(&task->signal->cred_guard_mutex);
 
 	return mm;
 }
@@ -771,6 +771,8 @@ static const struct file_operations proc_single_file_operations = {
 static int mem_open(struct inode* inode, struct file* file)
 {
 	file->private_data = (void*)((long)current->self_exec_id);
+	/* OK to pass negative loff_t, we can catch out-of-range */
+	file->f_mode |= FMODE_UNSIGNED_OFFSET;
 	return 0;
 }
 
@@ -1023,28 +1025,47 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
 		count = sizeof(buffer) - 1;
-	if (copy_from_user(buffer, buf, count))
-		return -EFAULT;
+	if (copy_from_user(buffer, buf, count)) {
+		err = -EFAULT;
+		goto out;
+	}
 
 	err = strict_strtol(strstrip(buffer), 0, &oom_adjust);
 	if (err)
-		return -EINVAL;
+		goto out;
 	if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
-	     oom_adjust != OOM_DISABLE)
-		return -EINVAL;
+	     oom_adjust != OOM_DISABLE) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	task = get_proc_task(file->f_path.dentry->d_inode);
-	if (!task)
-		return -ESRCH;
+	if (!task) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	task_lock(task);
+	if (!task->mm) {
+		err = -EINVAL;
+		goto err_task_lock;
+	}
+
 	if (!lock_task_sighand(task, &flags)) {
-		put_task_struct(task);
-		return -ESRCH;
+		err = -ESRCH;
+		goto err_task_lock;
 	}
 
 	if (oom_adjust < task->signal->oom_adj && !capable(CAP_SYS_RESOURCE)) {
-		unlock_task_sighand(task, &flags);
-		put_task_struct(task);
-		return -EACCES;
+		err = -EACCES;
+		goto err_sighand;
+	}
+
+	if (oom_adjust != task->signal->oom_adj) {
+		if (oom_adjust == OOM_DISABLE)
+			atomic_inc(&task->mm->oom_disable_count);
+		if (task->signal->oom_adj == OOM_DISABLE)
+			atomic_dec(&task->mm->oom_disable_count);
 	}
 
 	/*
@@ -1065,10 +1086,13 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	else
 		task->signal->oom_score_adj = (oom_adjust * OOM_SCORE_ADJ_MAX) /
 								-OOM_DISABLE;
+err_sighand:
 	unlock_task_sighand(task, &flags);
+err_task_lock:
+	task_unlock(task);
 	put_task_struct(task);
-
-	return count;
+out:
+	return err < 0 ? err : count;
 }
 
 static const struct file_operations proc_oom_adjust_operations = {
@@ -1109,30 +1133,49 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
 	memset(buffer, 0, sizeof(buffer));
 	if (count > sizeof(buffer) - 1)
 		count = sizeof(buffer) - 1;
-	if (copy_from_user(buffer, buf, count))
-		return -EFAULT;
+	if (copy_from_user(buffer, buf, count)) {
+		err = -EFAULT;
+		goto out;
+	}
 
 	err = strict_strtol(strstrip(buffer), 0, &oom_score_adj);
 	if (err)
-		return -EINVAL;
+		goto out;
 	if (oom_score_adj < OOM_SCORE_ADJ_MIN ||
-			oom_score_adj > OOM_SCORE_ADJ_MAX)
-		return -EINVAL;
+			oom_score_adj > OOM_SCORE_ADJ_MAX) {
+		err = -EINVAL;
+		goto out;
+	}
 
 	task = get_proc_task(file->f_path.dentry->d_inode);
-	if (!task)
-		return -ESRCH;
+	if (!task) {
+		err = -ESRCH;
+		goto out;
+	}
+
+	task_lock(task);
+	if (!task->mm) {
+		err = -EINVAL;
+		goto err_task_lock;
+	}
+
 	if (!lock_task_sighand(task, &flags)) {
-		put_task_struct(task);
-		return -ESRCH;
+		err = -ESRCH;
+		goto err_task_lock;
 	}
+
 	if (oom_score_adj < task->signal->oom_score_adj &&
 			!capable(CAP_SYS_RESOURCE)) {
-		unlock_task_sighand(task, &flags);
-		put_task_struct(task);
-		return -EACCES;
+		err = -EACCES;
+		goto err_sighand;
 	}
 
+	if (oom_score_adj != task->signal->oom_score_adj) {
+		if (oom_score_adj == OOM_SCORE_ADJ_MIN)
+			atomic_inc(&task->mm->oom_disable_count);
+		if (task->signal->oom_score_adj == OOM_SCORE_ADJ_MIN)
+			atomic_dec(&task->mm->oom_disable_count);
+	}
 	task->signal->oom_score_adj = oom_score_adj;
 	/*
 	 * Scale /proc/pid/oom_adj appropriately ensuring that OOM_DISABLE is
@@ -1143,9 +1186,13 @@ static ssize_t oom_score_adj_write(struct file *file, const char __user *buf,
 	else
 		task->signal->oom_adj = (oom_score_adj * OOM_ADJUST_MAX) /
 							OOM_SCORE_ADJ_MAX;
+err_sighand:
 	unlock_task_sighand(task, &flags);
+err_task_lock:
+	task_unlock(task);
 	put_task_struct(task);
-	return count;
+out:
+	return err < 0 ? err : count;
 }
 
 static const struct file_operations proc_oom_score_adj_operations = {
@@ -1527,7 +1574,7 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
 	if (!tmp)
 		return -ENOMEM;
 
-	pathname = d_path_with_unreachable(path, tmp, PAGE_SIZE);
+	pathname = d_path(path, tmp, PAGE_SIZE);
 	len = PTR_ERR(pathname);
 	if (IS_ERR(pathname))
 		goto out;
@@ -1601,6 +1648,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
 
 	/* Common stuff */
 	ei = PROC_I(inode);
+	inode->i_ino = get_next_ino();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 	inode->i_op = &proc_def_inode_operations;
 
@@ -2306,14 +2354,14 @@ static ssize_t proc_pid_attr_write(struct file * file, const char __user * buf,
 		goto out_free;
 
 	/* Guard against adverse ptrace interaction */
-	length = mutex_lock_interruptible(&task->cred_guard_mutex);
+	length = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
 	if (length < 0)
 		goto out_free;
 
 	length = security_setprocattr(task,
 				      (char*)file->f_path.dentry->d_name.name,
 				      (void*)page, count);
-	mutex_unlock(&task->cred_guard_mutex);
+	mutex_unlock(&task->signal->cred_guard_mutex);
 out_free:
 	free_page((unsigned long) page);
 out:
@@ -2547,6 +2595,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
 
 	/* Initialize the inode */
 	ei = PROC_I(inode);
+	inode->i_ino = get_next_ino();
 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
 
 	/*
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 9c2b5f48487..3ddb6068177 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -16,7 +16,6 @@
 #include <linux/limits.h>
 #include <linux/init.h>
 #include <linux/module.h>
-#include <linux/smp_lock.h>
 #include <linux/sysctl.h>
 #include <linux/slab.h>
 
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 2fc52552271..b652cb00906 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
 	if (!inode)
 		goto out;
 
+	inode->i_ino = get_next_ino();
+
 	sysctl_head_get(head);
 	ei = PROC_I(inode);
 	ei->sysctl = head;
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 93d99b31632..ef9fa8e24ad 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,8 +35,8 @@ static int proc_set_super(struct super_block *sb, void *data)
 	return set_anon_super(sb, NULL);
 }
 
-static int proc_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *proc_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
 	int err;
 	struct super_block *sb;
@@ -61,14 +61,14 @@ static int proc_get_sb(struct file_system_type *fs_type,
 
 	sb = sget(fs_type, proc_test_super, proc_set_super, ns);
 	if (IS_ERR(sb))
-		return PTR_ERR(sb);
+		return ERR_CAST(sb);
 
 	if (!sb->s_root) {
 		sb->s_flags = flags;
 		err = proc_fill_super(sb);
 		if (err) {
 			deactivate_locked_super(sb);
-			return err;
+			return ERR_PTR(err);
 		}
 
 		ei = PROC_I(sb->s_root->d_inode);
@@ -79,11 +79,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
 		}
 
 		sb->s_flags |= MS_ACTIVE;
-		ns->proc_mnt = mnt;
 	}
 
-	simple_set_mnt(mnt, sb);
-	return 0;
+	return dget(sb->s_root);
 }
 
 static void proc_kill_sb(struct super_block *sb)
@@ -97,7 +95,7 @@ static void proc_kill_sb(struct super_block *sb)
 
 static struct file_system_type proc_fs_type = {
 	.name		= "proc",
-	.get_sb		= proc_get_sb,
+	.mount		= proc_mount,
 	.kill_sb	= proc_kill_sb,
 };
 
@@ -115,6 +113,7 @@ void __init proc_root_init(void)
 		return;
 	}
 
+	init_pid_ns.proc_mnt = proc_mnt;
 	proc_symlink("mounts", NULL, "self/mounts");
 
 	proc_net_init();
@@ -213,6 +212,7 @@ int pid_ns_prepare_proc(struct pid_namespace *ns)
 	if (IS_ERR(mnt))
 		return PTR_ERR(mnt);
 
+	ns->proc_mnt = mnt;
 	return 0;
 }
 
diff --git a/fs/proc/softirqs.c b/fs/proc/softirqs.c
index 1807c2419f1..37994737c98 100644
--- a/fs/proc/softirqs.c
+++ b/fs/proc/softirqs.c
@@ -10,13 +10,13 @@ static int show_softirqs(struct seq_file *p, void *v)
 {
 	int i, j;
 
-	seq_printf(p, "                ");
+	seq_printf(p, "                    ");
 	for_each_possible_cpu(i)
 		seq_printf(p, "CPU%-8d", i);
 	seq_printf(p, "\n");
 
 	for (i = 0; i < NR_SOFTIRQS; i++) {
-		seq_printf(p, "%8s:", softirq_to_name[i]);
+		seq_printf(p, "%12s:", softirq_to_name[i]);
 		for_each_possible_cpu(j)
 			seq_printf(p, " %10u", kstat_softirqs_cpu(i, j));
 		seq_printf(p, "\n");
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf31b03fc27..e15a19c93ba 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -31,7 +31,6 @@ static int show_stat(struct seq_file *p, void *v)
 	u64 sum_softirq = 0;
 	unsigned int per_softirq_sums[NR_SOFTIRQS] = {0};
 	struct timespec boottime;
-	unsigned int per_irq_sum;
 
 	user = nice = system = idle = iowait =
 		irq = softirq = steal = cputime64_zero;
@@ -52,9 +51,7 @@ static int show_stat(struct seq_file *p, void *v)
 		guest = cputime64_add(guest, kstat_cpu(i).cpustat.guest);
 		guest_nice = cputime64_add(guest_nice,
 			kstat_cpu(i).cpustat.guest_nice);
-		for_each_irq_nr(j) {
-			sum += kstat_irqs_cpu(j, i);
-		}
+		sum += kstat_cpu_irqs_sum(i);
 		sum += arch_irq_stat_cpu(i);
 
 		for (j = 0; j < NR_SOFTIRQS; j++) {
@@ -110,13 +107,8 @@ static int show_stat(struct seq_file *p, void *v)
 	seq_printf(p, "intr %llu", (unsigned long long)sum);
 
 	/* sum again ? it could be updated? */
-	for_each_irq_nr(j) {
-		per_irq_sum = 0;
-		for_each_possible_cpu(i)
-			per_irq_sum += kstat_irqs_cpu(j, i);
-
-		seq_printf(p, " %u", per_irq_sum);
-	}
+	for_each_irq_nr(j)
+		seq_printf(p, " %u", kstat_irqs(j));
 
 	seq_printf(p,
 		"\nctxt %llu\n"
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 871e25ed006..c126c83b9a4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -327,6 +327,7 @@ struct mem_size_stats {
 	unsigned long private_clean;
 	unsigned long private_dirty;
 	unsigned long referenced;
+	unsigned long anonymous;
 	unsigned long swap;
 	u64 pss;
 };
@@ -357,6 +358,9 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 		if (!page)
 			continue;
 
+		if (PageAnon(page))
+			mss->anonymous += PAGE_SIZE;
+
 		mss->resident += PAGE_SIZE;
 		/* Accumulate the size in pages that have been accessed. */
 		if (pte_young(ptent) || PageReferenced(page))
@@ -410,6 +414,7 @@ static int show_smap(struct seq_file *m, void *v)
 		   "Private_Clean:  %8lu kB\n"
 		   "Private_Dirty:  %8lu kB\n"
 		   "Referenced:     %8lu kB\n"
+		   "Anonymous:      %8lu kB\n"
 		   "Swap:           %8lu kB\n"
 		   "KernelPageSize: %8lu kB\n"
 		   "MMUPageSize:    %8lu kB\n",
@@ -421,6 +426,7 @@ static int show_smap(struct seq_file *m, void *v)
 		   mss.private_clean >> 10,
 		   mss.private_dirty >> 10,
 		   mss.referenced >> 10,
+		   mss.anonymous >> 10,
 		   mss.swap >> 10,
 		   vma_kernel_pagesize(vma) >> 10,
 		   vma_mmu_pagesize(vma) >> 10);
@@ -700,6 +706,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
  * skip over unmapped regions.
  */
 #define PAGEMAP_WALK_SIZE	(PMD_SIZE)
+#define PAGEMAP_WALK_MASK	(PMD_MASK)
 static ssize_t pagemap_read(struct file *file, char __user *buf,
 			    size_t count, loff_t *ppos)
 {
@@ -770,7 +777,7 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
 		unsigned long end;
 
 		pm.pos = 0;
-		end = start_vaddr + PAGEMAP_WALK_SIZE;
+		end = (start_vaddr + PAGEMAP_WALK_SIZE) & PAGEMAP_WALK_MASK;
 		/* overflow ? */
 		if (end < start_vaddr || end > end_vaddr)
 			end = end_vaddr;
diff --git a/fs/qnx4/inode.c b/fs/qnx4/inode.c
index 01bad30026f..fcada42f1aa 100644
--- a/fs/qnx4/inode.c
+++ b/fs/qnx4/inode.c
@@ -454,17 +454,16 @@ static void destroy_inodecache(void)
 	kmem_cache_destroy(qnx4_inode_cachep);
 }
 
-static int qnx4_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *qnx4_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, qnx4_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, qnx4_fill_super);
 }
 
 static struct file_system_type qnx4_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "qnx4",
-	.get_sb		= qnx4_get_sb,
+	.mount		= qnx4_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/quota/Kconfig b/fs/quota/Kconfig
index 3e21b1e2ad3..880fd988436 100644
--- a/fs/quota/Kconfig
+++ b/fs/quota/Kconfig
@@ -4,6 +4,7 @@
 
 config QUOTA
 	bool "Quota support"
+	select QUOTACTL
 	help
 	  If you say Y here, you will be able to set per user limits for disk
 	  usage (also called disk quotas). Currently, it works for the
@@ -65,8 +66,7 @@ config QFMT_V2
 
 config QUOTACTL
 	bool
-	depends on XFS_QUOTA || QUOTA
-	default y
+	default n
 
 config QUOTACTL_COMPAT
 	bool
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index aad1316a977..0fed41e6efc 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1386,6 +1386,9 @@ static void __dquot_initialize(struct inode *inode, int type)
 		/* Avoid races with quotaoff() */
 		if (!sb_has_quota_active(sb, cnt))
 			continue;
+		/* We could race with quotaon or dqget() could have failed */
+		if (!got[cnt])
+			continue;
 		if (!inode->i_dquot[cnt]) {
 			inode->i_dquot[cnt] = got[cnt];
 			got[cnt] = NULL;
@@ -1736,6 +1739,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	qsize_t rsv_space = 0;
 	struct dquot *transfer_from[MAXQUOTAS] = {};
 	int cnt, ret = 0;
+	char is_valid[MAXQUOTAS] = {};
 	char warntype_to[MAXQUOTAS];
 	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
 
@@ -1757,8 +1761,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	space = cur_space + rsv_space;
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		/*
+		 * Skip changes for same uid or gid or for turned off quota-type.
+		 */
 		if (!transfer_to[cnt])
 			continue;
+		/* Avoid races with quotaoff() */
+		if (!sb_has_quota_active(inode->i_sb, cnt))
+			continue;
+		is_valid[cnt] = 1;
 		transfer_from[cnt] = inode->i_dquot[cnt];
 		ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt);
 		if (ret)
@@ -1772,12 +1783,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	 * Finally perform the needed transfer from transfer_from to transfer_to
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		/*
-		 * Skip changes for same uid or gid or for turned off quota-type.
-		 */
-		if (!transfer_to[cnt])
+		if (!is_valid[cnt])
 			continue;
-
 		/* Due to IO error we might not have transfer_from[] structure */
 		if (transfer_from[cnt]) {
 			warntype_from_inodes[cnt] =
@@ -1801,18 +1808,19 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 
 	mark_all_dquot_dirty(transfer_from);
 	mark_all_dquot_dirty(transfer_to);
-	/* Pass back references to put */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		transfer_to[cnt] = transfer_from[cnt];
-warn:
 	flush_warnings(transfer_to, warntype_to);
 	flush_warnings(transfer_from, warntype_from_inodes);
 	flush_warnings(transfer_from, warntype_from_space);
-	return ret;
+	/* Pass back references to put */
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+		if (is_valid[cnt])
+			transfer_to[cnt] = transfer_from[cnt];
+	return 0;
 over_quota:
 	spin_unlock(&dq_data_lock);
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	goto warn;
+	flush_warnings(transfer_to, warntype_to);
+	return ret;
 }
 EXPORT_SYMBOL(__dquot_transfer);
 
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index a5ebae70dc6..eacb166fb25 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
 	struct inode * inode = new_inode(sb);
 
 	if (inode) {
+		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
 		inode->i_mapping->a_ops = &ramfs_aops;
 		inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
@@ -254,17 +255,16 @@ fail:
 	return err;
 }
 
-int ramfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+struct dentry *ramfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags, data, ramfs_fill_super, mnt);
+	return mount_nodev(fs_type, flags, data, ramfs_fill_super);
 }
 
-static int rootfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *rootfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super,
-			    mnt);
+	return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
 }
 
 static void ramfs_kill_sb(struct super_block *sb)
@@ -275,12 +275,12 @@ static void ramfs_kill_sb(struct super_block *sb)
 
 static struct file_system_type ramfs_fs_type = {
 	.name		= "ramfs",
-	.get_sb		= ramfs_get_sb,
+	.mount		= ramfs_mount,
 	.kill_sb	= ramfs_kill_sb,
 };
 static struct file_system_type rootfs_fs_type = {
 	.name		= "rootfs",
-	.get_sb		= rootfs_get_sb,
+	.mount		= rootfs_mount,
 	.kill_sb	= kill_litter_super,
 };
 
diff --git a/fs/read_write.c b/fs/read_write.c
index e757ef26e4c..5d431bacbea 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,7 +9,6 @@
 #include <linux/fcntl.h>
 #include <linux/file.h>
 #include <linux/uio.h>
-#include <linux/smp_lock.h>
 #include <linux/fsnotify.h>
 #include <linux/security.h>
 #include <linux/module.h>
@@ -31,6 +30,20 @@ const struct file_operations generic_ro_fops = {
 
 EXPORT_SYMBOL(generic_ro_fops);
 
+static int
+__negative_fpos_check(struct file *file, loff_t pos, size_t count)
+{
+	/*
+	 * pos or pos+count is negative here, check overflow.
+	 * too big "count" will be caught in rw_verify_area().
+	 */
+	if ((pos < 0) && (pos + count < pos))
+		return -EOVERFLOW;
+	if (file->f_mode & FMODE_UNSIGNED_OFFSET)
+		return 0;
+	return -EINVAL;
+}
+
 /**
  * generic_file_llseek_unlocked - lockless generic llseek implementation
  * @file:	file structure to seek on
@@ -62,7 +75,9 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
 		break;
 	}
 
-	if (offset < 0 || offset > inode->i_sb->s_maxbytes)
+	if (offset < 0 && __negative_fpos_check(file, offset, 0))
+		return -EINVAL;
+	if (offset > inode->i_sb->s_maxbytes)
 		return -EINVAL;
 
 	/* Special lock needed here? */
@@ -137,7 +152,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
 			offset += file->f_pos;
 	}
 	retval = -EINVAL;
-	if (offset >= 0) {
+	if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) {
 		if (offset != file->f_pos) {
 			file->f_pos = offset;
 			file->f_version = 0;
@@ -221,13 +236,12 @@ bad:
 }
 #endif
 
+
 /*
  * rw_verify_area doesn't like huge counts. We limit
  * them to something that fits in "int" so that others
  * won't have to do range checks all the time.
  */
-#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK)
-
 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count)
 {
 	struct inode *inode;
@@ -238,8 +252,11 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
 	if (unlikely((ssize_t) count < 0))
 		return retval;
 	pos = *ppos;
-	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
-		return retval;
+	if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) {
+		retval = __negative_fpos_check(file, pos, count);
+		if (retval)
+			return retval;
+	}
 
 	if (unlikely(inode->i_flock && mandatory_lock(inode))) {
 		retval = locks_mandatory_area(
@@ -564,65 +581,71 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      unsigned long nr_segs, unsigned long fast_segs,
 			      struct iovec *fast_pointer,
 			      struct iovec **ret_pointer)
-  {
+{
 	unsigned long seg;
-  	ssize_t ret;
+	ssize_t ret;
 	struct iovec *iov = fast_pointer;
 
-  	/*
-  	 * SuS says "The readv() function *may* fail if the iovcnt argument
-  	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-  	 * traditionally returned zero for zero segments, so...
-  	 */
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
 	if (nr_segs == 0) {
 		ret = 0;
-  		goto out;
+		goto out;
 	}
 
-  	/*
-  	 * First get the "struct iovec" from user memory and
-  	 * verify all the pointers
-  	 */
+	/*
+	 * First get the "struct iovec" from user memory and
+	 * verify all the pointers
+	 */
 	if (nr_segs > UIO_MAXIOV) {
 		ret = -EINVAL;
-  		goto out;
+		goto out;
 	}
 	if (nr_segs > fast_segs) {
-  		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
+		iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
 		if (iov == NULL) {
 			ret = -ENOMEM;
-  			goto out;
+			goto out;
 		}
-  	}
+	}
 	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
 		ret = -EFAULT;
-  		goto out;
+		goto out;
 	}
 
-  	/*
+	/*
 	 * According to the Single Unix Specification we should return EINVAL
 	 * if an element length is < 0 when cast to ssize_t or if the
 	 * total length would overflow the ssize_t return value of the
 	 * system call.
-  	 */
+	 *
+	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+	 * overflow case.
+	 */
 	ret = 0;
-  	for (seg = 0; seg < nr_segs; seg++) {
-  		void __user *buf = iov[seg].iov_base;
-  		ssize_t len = (ssize_t)iov[seg].iov_len;
+	for (seg = 0; seg < nr_segs; seg++) {
+		void __user *buf = iov[seg].iov_base;
+		ssize_t len = (ssize_t)iov[seg].iov_len;
 
 		/* see if we we're about to use an invalid len or if
 		 * it's about to overflow ssize_t */
-		if (len < 0 || (ret + len < ret)) {
+		if (len < 0) {
 			ret = -EINVAL;
-  			goto out;
+			goto out;
 		}
 		if (unlikely(!access_ok(vrfy_dir(type), buf, len))) {
 			ret = -EFAULT;
-  			goto out;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - ret) {
+			len = MAX_RW_COUNT - ret;
+			iov[seg].iov_len = len;
 		}
-
 		ret += len;
-  	}
+	}
 out:
 	*ret_pointer = iov;
 	return ret;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index caa758377d6..0bae036831e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -8,7 +8,6 @@
 #include <linux/reiserfs_acl.h>
 #include <linux/reiserfs_xattr.h>
 #include <linux/exportfs.h>
-#include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/slab.h>
@@ -22,8 +21,6 @@
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to);
 
 void reiserfs_evict_inode(struct inode *inode)
 {
@@ -165,7 +162,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
 ** but tail is still sitting in a direct item, and we can't write to
 ** it.  So, look through this page, and check all the mapped buffers
 ** to make sure they have valid block numbers.  Any that don't need
-** to be unmapped, so that block_prepare_write will correctly call
+** to be unmapped, so that __block_write_begin will correctly call
 ** reiserfs_get_block to convert the tail into an unformatted node
 */
 static inline void fix_tail_page_for_writing(struct page *page)
@@ -439,13 +436,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
 }
 
 /* special version of get_block that is only used by grab_tail_page right
-** now.  It is sent to block_prepare_write, and when you try to get a
+** now.  It is sent to __block_write_begin, and when you try to get a
 ** block past the end of the file (or a block from a hole) it returns
-** -ENOENT instead of a valid buffer.  block_prepare_write expects to
+** -ENOENT instead of a valid buffer.  __block_write_begin expects to
 ** be able to do i/o on the buffers returned, unless an error value
 ** is also returned.
 **
-** So, this allows block_prepare_write to be used for reading a single block
+** So, this allows __block_write_begin to be used for reading a single block
 ** in a page.  Where it does not produce a valid page for holes, or past the
 ** end of the file.  This turns out to be exactly what we need for reading
 ** tails for conversion.
@@ -558,11 +555,12 @@ static int convert_tail_for_hole(struct inode *inode,
 	 **
 	 ** We must fix the tail page for writing because it might have buffers
 	 ** that are mapped, but have a block number of 0.  This indicates tail
-	 ** data that has been read directly into the page, and block_prepare_write
-	 ** won't trigger a get_block in this case.
+	 ** data that has been read directly into the page, and
+	 ** __block_write_begin won't trigger a get_block in this case.
 	 */
 	fix_tail_page_for_writing(tail_page);
-	retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
+	retval = __reiserfs_write_begin(tail_page, tail_start,
+				      tail_end - tail_start);
 	if (retval)
 		goto unlock;
 
@@ -2033,7 +2031,7 @@ static int grab_tail_page(struct inode *inode,
 	/* start within the page of the last block in the file */
 	start = (offset / blocksize) * blocksize;
 
-	error = block_prepare_write(page, start, offset,
+	error = __block_write_begin(page, start, offset - start,
 				    reiserfs_get_block_create_0);
 	if (error)
 		goto unlock;
@@ -2438,7 +2436,7 @@ static int reiserfs_write_full_page(struct page *page,
 		/* from this point on, we know the buffer is mapped to a
 		 * real block and not a direct item
 		 */
-		if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
+		if (wbc->sync_mode != WB_SYNC_NONE) {
 			lock_buffer(bh);
 		} else {
 			if (!trylock_buffer(bh)) {
@@ -2628,8 +2626,7 @@ static int reiserfs_write_begin(struct file *file,
 	return ret;
 }
 
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to)
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
 {
 	struct inode *inode = page->mapping->host;
 	int ret;
@@ -2650,7 +2647,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
 		th->t_refcount++;
 	}
 
-	ret = block_prepare_write(page, from, to, reiserfs_get_block);
+	ret = __block_write_begin(page, from, len, reiserfs_get_block);
 	if (ret && reiserfs_transaction_running(inode->i_sb)) {
 		struct reiserfs_transaction_handle *th = current->journal_info;
 		/* this gets a little ugly.  If reiserfs_get_block returned an
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 5cbb81e134a..79265fdc317 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -9,7 +9,6 @@
 #include <linux/time.h>
 #include <asm/uaccess.h>
 #include <linux/pagemap.h>
-#include <linux/smp_lock.h>
 #include <linux/compat.h>
 
 /*
@@ -160,8 +159,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to);
 /*
 ** reiserfs_unpack
 ** Function try to convert tail from direct item into indirect.
@@ -186,12 +183,11 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 		return 0;
 	}
 
-	/* we need to make sure nobody is changing the file size beneath
-	 ** us
-	 */
-	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
 	depth = reiserfs_write_lock_once(inode->i_sb);
 
+	/* we need to make sure nobody is changing the file size beneath us */
+	reiserfs_mutex_lock_safe(&inode->i_mutex, inode->i_sb);
+
 	write_from = inode->i_size & (blocksize - 1);
 	/* if we are on a block boundary, we are already unpacked.  */
 	if (write_from == 0) {
@@ -200,7 +196,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 	}
 
 	/* we unpack by finding the page with the tail, and calling
-	 ** reiserfs_prepare_write on that page.  This will force a
+	 ** __reiserfs_write_begin on that page.  This will force a
 	 ** reiserfs_get_block to unpack the tail for us.
 	 */
 	index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -210,7 +206,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
 	if (!page) {
 		goto out;
 	}
-	retval = reiserfs_prepare_write(NULL, page, write_from, write_from);
+	retval = __reiserfs_write_begin(page, write_from, 0);
 	if (retval)
 		goto out_unlock;
 
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 076c8b19468..d31bce1a9f9 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -43,7 +43,6 @@
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
-#include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/workqueue.h>
 #include <linux/writeback.h>
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index ee78d4a0086..ba5f51ec345 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
 	inode->i_ctime = CURRENT_TIME_SEC;
 	reiserfs_update_sd(&th, inode);
 
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(dentry, inode);
 	retval = journal_end(&th, dir->i_sb, jbegin_count);
 	reiserfs_write_unlock(dir->i_sb);
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index e15ff612002..b243117b875 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -28,7 +28,6 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/crc32.h>
-#include <linux/smp_lock.h>
 
 struct file_system_type reiserfs_fs_type;
 
@@ -2213,12 +2212,11 @@ out:
 
 #endif
 
-static int get_super_block(struct file_system_type *fs_type,
+static struct dentry *get_super_block(struct file_system_type *fs_type,
 			   int flags, const char *dev_name,
-			   void *data, struct vfsmount *mnt)
+			   void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, reiserfs_fill_super);
 }
 
 static int __init init_reiserfs_fs(void)
@@ -2253,7 +2251,7 @@ static void __exit exit_reiserfs_fs(void)
 struct file_system_type reiserfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "reiserfs",
-	.get_sb = get_super_block,
+	.mount = get_super_block,
 	.kill_sb = reiserfs_kill_sb,
 	.fs_flags = FS_REQUIRES_DEV,
 };
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8c4cf273c67..5d04a7828e7 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -418,13 +418,11 @@ static inline __u32 xattr_hash(const char *msg, int len)
 
 int reiserfs_commit_write(struct file *f, struct page *page,
 			  unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-			   unsigned from, unsigned to);
 
 static void update_ctime(struct inode *inode)
 {
 	struct timespec now = current_fs_time(inode->i_sb);
-	if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink ||
+	if (inode_unhashed(inode) || !inode->i_nlink ||
 	    timespec_equal(&inode->i_ctime, &now))
 		return;
 
@@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 			rxh->h_hash = cpu_to_le32(xahash);
 		}
 
-		err = reiserfs_prepare_write(NULL, page, page_offset,
-					    page_offset + chunk + skip);
+		err = __reiserfs_write_begin(page, page_offset, chunk + skip);
 		if (!err) {
 			if (buffer)
 				memcpy(data + skip, buffer + buffer_pos, chunk);
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 536d697a8a2..90d2fcb67a3 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -472,7 +472,9 @@ int reiserfs_acl_chmod(struct inode *inode)
 		struct reiserfs_transaction_handle th;
 		size_t size = reiserfs_xattr_nblocks(inode,
 					     reiserfs_acl_size(clone->a_count));
-		reiserfs_write_lock(inode->i_sb);
+		int depth;
+
+		depth = reiserfs_write_lock_once(inode->i_sb);
 		error = journal_begin(&th, inode->i_sb, size * 2);
 		if (!error) {
 			int error2;
@@ -482,7 +484,7 @@ int reiserfs_acl_chmod(struct inode *inode)
 			if (error2)
 				error = error2;
 		}
-		reiserfs_write_unlock(inode->i_sb);
+		reiserfs_write_unlock_once(inode->i_sb, depth);
 	}
 	posix_acl_release(clone);
 	return error;
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index 268580535c9..6647f90e55c 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -552,20 +552,19 @@ error_rsb:
 /*
  * get a superblock for mounting
  */
-static int romfs_get_sb(struct file_system_type *fs_type,
+static struct dentry *romfs_mount(struct file_system_type *fs_type,
 			int flags, const char *dev_name,
-			void *data, struct vfsmount *mnt)
+			void *data)
 {
-	int ret = -EINVAL;
+	struct dentry *ret = ERR_PTR(-EINVAL);
 
 #ifdef CONFIG_ROMFS_ON_MTD
-	ret = get_sb_mtd(fs_type, flags, dev_name, data, romfs_fill_super,
-			 mnt);
+	ret = mount_mtd(fs_type, flags, dev_name, data, romfs_fill_super);
 #endif
 #ifdef CONFIG_ROMFS_ON_BLOCK
-	if (ret == -EINVAL)
-		ret = get_sb_bdev(fs_type, flags, dev_name, data,
-				  romfs_fill_super, mnt);
+	if (ret == ERR_PTR(-EINVAL))
+		ret = mount_bdev(fs_type, flags, dev_name, data,
+				  romfs_fill_super);
 #endif
 	return ret;
 }
@@ -592,7 +591,7 @@ static void romfs_kill_sb(struct super_block *sb)
 static struct file_system_type romfs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "romfs",
-	.get_sb		= romfs_get_sb,
+	.mount		= romfs_mount,
 	.kill_sb	= romfs_kill_sb,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/select.c b/fs/select.c
index 500a669f779..b7b10aa3086 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -67,7 +67,7 @@ static long __estimate_accuracy(struct timespec *tv)
 	return slack;
 }
 
-static long estimate_accuracy(struct timespec *tv)
+long select_estimate_accuracy(struct timespec *tv)
 {
 	unsigned long ret;
 	struct timespec now;
@@ -417,7 +417,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
 	}
 
 	if (end_time && !timed_out)
-		slack = estimate_accuracy(end_time);
+		slack = select_estimate_accuracy(end_time);
 
 	retval = 0;
 	for (;;) {
@@ -769,7 +769,7 @@ static int do_poll(unsigned int nfds,  struct poll_list *list,
 	}
 
 	if (end_time && !timed_out)
-		slack = estimate_accuracy(end_time);
+		slack = select_estimate_accuracy(end_time);
 
 	for (;;) {
 		struct poll_list *walk;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 0e7cb1395a9..05d6b0e78c9 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -462,9 +462,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
 	if (size) {
 		char *p;
 
-		spin_lock(&dcache_lock);
 		p = __d_path(path, root, buf, size);
-		spin_unlock(&dcache_lock);
 		res = PTR_ERR(p);
 		if (!IS_ERR(p)) {
 			char *end = mangle_path(buf, p, esc);
diff --git a/fs/smbfs/Kconfig b/fs/smbfs/Kconfig
deleted file mode 100644
index 2bc24a8c403..00000000000
--- a/fs/smbfs/Kconfig
+++ /dev/null
@@ -1,56 +0,0 @@
-config SMB_FS
-	tristate "SMB file system support (OBSOLETE, please use CIFS)"
-	depends on BKL # probably unfixable
-	depends on INET
-	select NLS
-	help
-	  SMB (Server Message Block) is the protocol Windows for Workgroups
-	  (WfW), Windows 95/98, Windows NT and OS/2 Lan Manager use to share
-	  files and printers over local networks.  Saying Y here allows you to
-	  mount their file systems (often called "shares" in this context) and
-	  access them just like any other Unix directory.  Currently, this
-	  works only if the Windows machines use TCP/IP as the underlying
-	  transport protocol, and not NetBEUI.  For details, read
-	  <file:Documentation/filesystems/smbfs.txt> and the SMB-HOWTO,
-	  available from <http://www.tldp.org/docs.html#howto>.
-
-	  Note: if you just want your box to act as an SMB *server* and make
-	  files and printing services available to Windows clients (which need
-	  to have a TCP/IP stack), you don't need to say Y here; you can use
-	  the program SAMBA (available from <ftp://ftp.samba.org/pub/samba/>)
-	  for that.
-
-	  General information about how to connect Linux, Windows machines and
-	  Macs is on the WWW at <http://www.eats.com/linux_mac_win.html>.
-
-	  To compile the SMB support as a module, choose M here:
-	  the module will be called smbfs.  Most people say N, however.
-
-config SMB_NLS_DEFAULT
-	bool "Use a default NLS"
-	depends on SMB_FS
-	help
-	  Enabling this will make smbfs use nls translations by default. You
-	  need to specify the local charset (CONFIG_NLS_DEFAULT) in the nls
-	  settings and you need to give the default nls for the SMB server as
-	  CONFIG_SMB_NLS_REMOTE.
-
-	  The nls settings can be changed at mount time, if your smbmount
-	  supports that, using the codepage and iocharset parameters.
-
-	  smbmount from samba 2.2.0 or later supports this.
-
-config SMB_NLS_REMOTE
-	string "Default Remote NLS Option"
-	depends on SMB_NLS_DEFAULT
-	default "cp437"
-	help
-	  This setting allows you to specify a default value for which
-	  codepage the server uses. If this field is left blank no
-	  translations will be done by default. The local codepage/charset
-	  default to CONFIG_NLS_DEFAULT.
-
-	  The nls settings can be changed at mount time, if your smbmount
-	  supports that, using the codepage and iocharset parameters.
-
-	  smbmount from samba 2.2.0 or later supports this.
diff --git a/fs/smbfs/Makefile b/fs/smbfs/Makefile
deleted file mode 100644
index 4faf8c4722c..00000000000
--- a/fs/smbfs/Makefile
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Makefile for the linux smb-filesystem routines.
-#
-
-obj-$(CONFIG_SMB_FS) += smbfs.o
-
-smbfs-objs := proc.o dir.o cache.o sock.o inode.o file.o ioctl.o getopt.o \
-		symlink.o smbiod.o request.o
-
-# If you want debugging output, you may add these flags to the EXTRA_CFLAGS
-# SMBFS_PARANOIA should normally be enabled.
-
-EXTRA_CFLAGS += -DSMBFS_PARANOIA
-#EXTRA_CFLAGS += -DSMBFS_DEBUG
-#EXTRA_CFLAGS += -DSMBFS_DEBUG_VERBOSE
-#EXTRA_CFLAGS += -DDEBUG_SMB_TIMESTAMP
-#EXTRA_CFLAGS += -Werror
-
diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c
deleted file mode 100644
index 8c177eb7e34..00000000000
--- a/fs/smbfs/cache.c
+++ /dev/null
@@ -1,208 +0,0 @@
-/*
- *  cache.c
- *
- * Copyright (C) 1997 by Bill Hawes
- *
- * Routines to support directory cacheing using the page cache.
- * This cache code is almost directly taken from ncpfs.
- *
- * Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/smb_fs.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-
-#include <asm/page.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-/*
- * Force the next attempt to use the cache to be a timeout.
- * If we can't find the page that's fine, it will cause a refresh.
- */
-void
-smb_invalid_dir_cache(struct inode * dir)
-{
-	struct smb_sb_info *server = server_from_inode(dir);
-	union  smb_dir_cache *cache = NULL;
-	struct page *page = NULL;
-
-	page = grab_cache_page(&dir->i_data, 0);
-	if (!page)
-		goto out;
-
-	if (!PageUptodate(page))
-		goto out_unlock;
-
-	cache = kmap(page);
-	cache->head.time = jiffies - SMB_MAX_AGE(server);
-
-	kunmap(page);
-	SetPageUptodate(page);
-out_unlock:
-	unlock_page(page);
-	page_cache_release(page);
-out:
-	return;
-}
-
-/*
- * Mark all dentries for 'parent' as invalid, forcing them to be re-read
- */
-void
-smb_invalidate_dircache_entries(struct dentry *parent)
-{
-	struct smb_sb_info *server = server_from_dentry(parent);
-	struct list_head *next;
-	struct dentry *dentry;
-
-	spin_lock(&dcache_lock);
-	next = parent->d_subdirs.next;
-	while (next != &parent->d_subdirs) {
-		dentry = list_entry(next, struct dentry, d_u.d_child);
-		dentry->d_fsdata = NULL;
-		smb_age_dentry(server, dentry);
-		next = next->next;
-	}
-	spin_unlock(&dcache_lock);
-}
-
-/*
- * dget, but require that fpos and parent matches what the dentry contains.
- * dentry is not known to be a valid pointer at entry.
- */
-struct dentry *
-smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
-{
-	struct dentry *dent = dentry;
-	struct list_head *next;
-
-	if (d_validate(dent, parent)) {
-		if (dent->d_name.len <= SMB_MAXNAMELEN &&
-		    (unsigned long)dent->d_fsdata == fpos) {
-			if (!dent->d_inode) {
-				dput(dent);
-				dent = NULL;
-			}
-			return dent;
-		}
-		dput(dent);
-	}
-
-	/* If a pointer is invalid, we search the dentry. */
-	spin_lock(&dcache_lock);
-	next = parent->d_subdirs.next;
-	while (next != &parent->d_subdirs) {
-		dent = list_entry(next, struct dentry, d_u.d_child);
-		if ((unsigned long)dent->d_fsdata == fpos) {
-			if (dent->d_inode)
-				dget_locked(dent);
-			else
-				dent = NULL;
-			goto out_unlock;
-		}
-		next = next->next;
-	}
-	dent = NULL;
-out_unlock:
-	spin_unlock(&dcache_lock);
-	return dent;
-}
-
-
-/*
- * Create dentry/inode for this file and add it to the dircache.
- */
-int
-smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
-	       struct smb_cache_control *ctrl, struct qstr *qname,
-	       struct smb_fattr *entry)
-{
-	struct dentry *newdent, *dentry = filp->f_path.dentry;
-	struct inode *newino, *inode = dentry->d_inode;
-	struct smb_cache_control ctl = *ctrl;
-	int valid = 0;
-	int hashed = 0;
-	ino_t ino = 0;
-
-	qname->hash = full_name_hash(qname->name, qname->len);
-
-	if (dentry->d_op && dentry->d_op->d_hash)
-		if (dentry->d_op->d_hash(dentry, qname) != 0)
-			goto end_advance;
-
-	newdent = d_lookup(dentry, qname);
-
-	if (!newdent) {
-		newdent = d_alloc(dentry, qname);
-		if (!newdent)
-			goto end_advance;
-	} else {
-		hashed = 1;
-		memcpy((char *) newdent->d_name.name, qname->name,
-		       newdent->d_name.len);
-	}
-
-	if (!newdent->d_inode) {
-		smb_renew_times(newdent);
-		entry->f_ino = iunique(inode->i_sb, 2);
-		newino = smb_iget(inode->i_sb, entry);
-		if (newino) {
-			smb_new_dentry(newdent);
-			d_instantiate(newdent, newino);
-			if (!hashed)
-				d_rehash(newdent);
-		}
-	} else
-		smb_set_inode_attr(newdent->d_inode, entry);
-
-        if (newdent->d_inode) {
-		ino = newdent->d_inode->i_ino;
-		newdent->d_fsdata = (void *) ctl.fpos;
-		smb_new_dentry(newdent);
-	}
-
-	if (ctl.idx >= SMB_DIRCACHE_SIZE) {
-		if (ctl.page) {
-			kunmap(ctl.page);
-			SetPageUptodate(ctl.page);
-			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
-		}
-		ctl.cache = NULL;
-		ctl.idx  -= SMB_DIRCACHE_SIZE;
-		ctl.ofs  += 1;
-		ctl.page  = grab_cache_page(&inode->i_data, ctl.ofs);
-		if (ctl.page)
-			ctl.cache = kmap(ctl.page);
-	}
-	if (ctl.cache) {
-		ctl.cache->dentry[ctl.idx] = newdent;
-		valid = 1;
-	}
-	dput(newdent);
-
-end_advance:
-	if (!valid)
-		ctl.valid = 0;
-	if (!ctl.filled && (ctl.fpos == filp->f_pos)) {
-		if (!ino)
-			ino = find_inode_number(dentry, qname);
-		if (!ino)
-			ino = iunique(inode->i_sb, 2);
-		ctl.filled = filldir(dirent, qname->name, qname->len,
-				     filp->f_pos, ino, DT_UNKNOWN);
-		if (!ctl.filled)
-			filp->f_pos += 1;
-	}
-	ctl.fpos += 1;
-	ctl.idx  += 1;
-	*ctrl = ctl;
-	return (ctl.valid || !ctl.filled);
-}
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c
deleted file mode 100644
index 00a70cab1f3..00000000000
--- a/fs/smbfs/dir.c
+++ /dev/null
@@ -1,702 +0,0 @@
-/*
- *  dir.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/smp_lock.h>
-#include <linux/ctype.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-#include <linux/smbno.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int smb_readdir(struct file *, void *, filldir_t);
-static int smb_dir_open(struct inode *, struct file *);
-
-static struct dentry *smb_lookup(struct inode *, struct dentry *, struct nameidata *);
-static int smb_create(struct inode *, struct dentry *, int, struct nameidata *);
-static int smb_mkdir(struct inode *, struct dentry *, int);
-static int smb_rmdir(struct inode *, struct dentry *);
-static int smb_unlink(struct inode *, struct dentry *);
-static int smb_rename(struct inode *, struct dentry *,
-		      struct inode *, struct dentry *);
-static int smb_make_node(struct inode *,struct dentry *,int,dev_t);
-static int smb_link(struct dentry *, struct inode *, struct dentry *);
-
-const struct file_operations smb_dir_operations =
-{
-	.llseek		= generic_file_llseek,
-	.read		= generic_read_dir,
-	.readdir	= smb_readdir,
-	.unlocked_ioctl	= smb_ioctl,
-	.open		= smb_dir_open,
-};
-
-const struct inode_operations smb_dir_inode_operations =
-{
-	.create		= smb_create,
-	.lookup		= smb_lookup,
-	.unlink		= smb_unlink,
-	.mkdir		= smb_mkdir,
-	.rmdir		= smb_rmdir,
-	.rename		= smb_rename,
-	.getattr	= smb_getattr,
-	.setattr	= smb_notify_change,
-};
-
-const struct inode_operations smb_dir_inode_operations_unix =
-{
-	.create		= smb_create,
-	.lookup		= smb_lookup,
-	.unlink		= smb_unlink,
-	.mkdir		= smb_mkdir,
-	.rmdir		= smb_rmdir,
-	.rename		= smb_rename,
-	.getattr	= smb_getattr,
-	.setattr	= smb_notify_change,
-	.symlink	= smb_symlink,
-	.mknod		= smb_make_node,
-	.link		= smb_link,
-};
-
-/*
- * Read a directory, using filldir to fill the dirent memory.
- * smb_proc_readdir does the actual reading from the smb server.
- *
- * The cache code is almost directly taken from ncpfs
- */
-static int 
-smb_readdir(struct file *filp, void *dirent, filldir_t filldir)
-{
-	struct dentry *dentry = filp->f_path.dentry;
-	struct inode *dir = dentry->d_inode;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	union  smb_dir_cache *cache = NULL;
-	struct smb_cache_control ctl;
-	struct page *page = NULL;
-	int result;
-
-	ctl.page  = NULL;
-	ctl.cache = NULL;
-
-	VERBOSE("reading %s/%s, f_pos=%d\n",
-		DENTRY_PATH(dentry),  (int) filp->f_pos);
-
-	result = 0;
-
-	lock_kernel();
-
-	switch ((unsigned int) filp->f_pos) {
-	case 0:
-		if (filldir(dirent, ".", 1, 0, dir->i_ino, DT_DIR) < 0)
-			goto out;
-		filp->f_pos = 1;
-		/* fallthrough */
-	case 1:
-		if (filldir(dirent, "..", 2, 1, parent_ino(dentry), DT_DIR) < 0)
-			goto out;
-		filp->f_pos = 2;
-	}
-
-	/*
-	 * Make sure our inode is up-to-date.
-	 */
-	result = smb_revalidate_inode(dentry);
-	if (result)
-		goto out;
-
-
-	page = grab_cache_page(&dir->i_data, 0);
-	if (!page)
-		goto read_really;
-
-	ctl.cache = cache = kmap(page);
-	ctl.head  = cache->head;
-
-	if (!PageUptodate(page) || !ctl.head.eof) {
-		VERBOSE("%s/%s, page uptodate=%d, eof=%d\n",
-			 DENTRY_PATH(dentry), PageUptodate(page),ctl.head.eof);
-		goto init_cache;
-	}
-
-	if (filp->f_pos == 2) {
-		if (jiffies - ctl.head.time >= SMB_MAX_AGE(server))
-			goto init_cache;
-
-		/*
-		 * N.B. ncpfs checks mtime of dentry too here, we don't.
-		 *   1. common smb servers do not update mtime on dir changes
-		 *   2. it requires an extra smb request
-		 *      (revalidate has the same timeout as ctl.head.time)
-		 *
-		 * Instead smbfs invalidates its own cache on local changes
-		 * and remote changes are not seen until timeout.
-		 */
-	}
-
-	if (filp->f_pos > ctl.head.end)
-		goto finished;
-
-	ctl.fpos = filp->f_pos + (SMB_DIRCACHE_START - 2);
-	ctl.ofs  = ctl.fpos / SMB_DIRCACHE_SIZE;
-	ctl.idx  = ctl.fpos % SMB_DIRCACHE_SIZE;
-
-	for (;;) {
-		if (ctl.ofs != 0) {
-			ctl.page = find_lock_page(&dir->i_data, ctl.ofs);
-			if (!ctl.page)
-				goto invalid_cache;
-			ctl.cache = kmap(ctl.page);
-			if (!PageUptodate(ctl.page))
-				goto invalid_cache;
-		}
-		while (ctl.idx < SMB_DIRCACHE_SIZE) {
-			struct dentry *dent;
-			int res;
-
-			dent = smb_dget_fpos(ctl.cache->dentry[ctl.idx],
-					     dentry, filp->f_pos);
-			if (!dent)
-				goto invalid_cache;
-
-			res = filldir(dirent, dent->d_name.name,
-				      dent->d_name.len, filp->f_pos,
-				      dent->d_inode->i_ino, DT_UNKNOWN);
-			dput(dent);
-			if (res)
-				goto finished;
-			filp->f_pos += 1;
-			ctl.idx += 1;
-			if (filp->f_pos > ctl.head.end)
-				goto finished;
-		}
-		if (ctl.page) {
-			kunmap(ctl.page);
-			SetPageUptodate(ctl.page);
-			unlock_page(ctl.page);
-			page_cache_release(ctl.page);
-			ctl.page = NULL;
-		}
-		ctl.idx  = 0;
-		ctl.ofs += 1;
-	}
-invalid_cache:
-	if (ctl.page) {
-		kunmap(ctl.page);
-		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
-		ctl.page = NULL;
-	}
-	ctl.cache = cache;
-init_cache:
-	smb_invalidate_dircache_entries(dentry);
-	ctl.head.time = jiffies;
-	ctl.head.eof = 0;
-	ctl.fpos = 2;
-	ctl.ofs = 0;
-	ctl.idx = SMB_DIRCACHE_START;
-	ctl.filled = 0;
-	ctl.valid  = 1;
-read_really:
-	result = server->ops->readdir(filp, dirent, filldir, &ctl);
-	if (result == -ERESTARTSYS && page)
-		ClearPageUptodate(page);
-	if (ctl.idx == -1)
-		goto invalid_cache;	/* retry */
-	ctl.head.end = ctl.fpos - 1;
-	ctl.head.eof = ctl.valid;
-finished:
-	if (page) {
-		cache->head = ctl.head;
-		kunmap(page);
-		if (result != -ERESTARTSYS)
-			SetPageUptodate(page);
-		unlock_page(page);
-		page_cache_release(page);
-	}
-	if (ctl.page) {
-		kunmap(ctl.page);
-		SetPageUptodate(ctl.page);
-		unlock_page(ctl.page);
-		page_cache_release(ctl.page);
-	}
-out:
-	unlock_kernel();
-	return result;
-}
-
-static int
-smb_dir_open(struct inode *dir, struct file *file)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	struct smb_sb_info *server;
-	int error = 0;
-
-	VERBOSE("(%s/%s)\n", dentry->d_parent->d_name.name,
-		file->f_path.dentry->d_name.name);
-
-	/*
-	 * Directory timestamps in the core protocol aren't updated
-	 * when a file is added, so we give them a very short TTL.
-	 */
-	lock_kernel();
-	server = server_from_dentry(dentry);
-	if (server->opt.protocol < SMB_PROTOCOL_LANMAN2) {
-		unsigned long age = jiffies - SMB_I(dir)->oldmtime;
-		if (age > 2*HZ)
-			smb_invalid_dir_cache(dir);
-	}
-
-	/*
-	 * Note: in order to allow the smbmount process to open the
-	 * mount point, we only revalidate if the connection is valid or
-	 * if the process is trying to access something other than the root.
-	 */
-	if (server->state == CONN_VALID || !IS_ROOT(dentry))
-		error = smb_revalidate_inode(dentry);
-	unlock_kernel();
-	return error;
-}
-
-/*
- * Dentry operations routines
- */
-static int smb_lookup_validate(struct dentry *, struct nameidata *);
-static int smb_hash_dentry(struct dentry *, struct qstr *);
-static int smb_compare_dentry(struct dentry *, struct qstr *, struct qstr *);
-static int smb_delete_dentry(struct dentry *);
-
-static const struct dentry_operations smbfs_dentry_operations =
-{
-	.d_revalidate	= smb_lookup_validate,
-	.d_hash		= smb_hash_dentry,
-	.d_compare	= smb_compare_dentry,
-	.d_delete	= smb_delete_dentry,
-};
-
-static const struct dentry_operations smbfs_dentry_operations_case =
-{
-	.d_revalidate	= smb_lookup_validate,
-	.d_delete	= smb_delete_dentry,
-};
-
-
-/*
- * This is the callback when the dcache has a lookup hit.
- */
-static int
-smb_lookup_validate(struct dentry * dentry, struct nameidata *nd)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	struct inode * inode = dentry->d_inode;
-	unsigned long age = jiffies - dentry->d_time;
-	int valid;
-
-	/*
-	 * The default validation is based on dentry age:
-	 * we believe in dentries for a few seconds.  (But each
-	 * successful server lookup renews the timestamp.)
-	 */
-	valid = (age <= SMB_MAX_AGE(server));
-#ifdef SMBFS_DEBUG_VERBOSE
-	if (!valid)
-		VERBOSE("%s/%s not valid, age=%lu\n", 
-			DENTRY_PATH(dentry), age);
-#endif
-
-	if (inode) {
-		lock_kernel();
-		if (is_bad_inode(inode)) {
-			PARANOIA("%s/%s has dud inode\n", DENTRY_PATH(dentry));
-			valid = 0;
-		} else if (!valid)
-			valid = (smb_revalidate_inode(dentry) == 0);
-		unlock_kernel();
-	} else {
-		/*
-		 * What should we do for negative dentries?
-		 */
-	}
-	return valid;
-}
-
-static int 
-smb_hash_dentry(struct dentry *dir, struct qstr *this)
-{
-	unsigned long hash;
-	int i;
-
-	hash = init_name_hash();
-	for (i=0; i < this->len ; i++)
-		hash = partial_name_hash(tolower(this->name[i]), hash);
-	this->hash = end_name_hash(hash);
-  
-	return 0;
-}
-
-static int
-smb_compare_dentry(struct dentry *dir, struct qstr *a, struct qstr *b)
-{
-	int i, result = 1;
-
-	if (a->len != b->len)
-		goto out;
-	for (i=0; i < a->len; i++) {
-		if (tolower(a->name[i]) != tolower(b->name[i]))
-			goto out;
-	}
-	result = 0;
-out:
-	return result;
-}
-
-/*
- * This is the callback from dput() when d_count is going to 0.
- * We use this to unhash dentries with bad inodes.
- */
-static int
-smb_delete_dentry(struct dentry * dentry)
-{
-	if (dentry->d_inode) {
-		if (is_bad_inode(dentry->d_inode)) {
-			PARANOIA("bad inode, unhashing %s/%s\n",
-				 DENTRY_PATH(dentry));
-			return 1;
-		}
-	} else {
-		/* N.B. Unhash negative dentries? */
-	}
-	return 0;
-}
-
-/*
- * Initialize a new dentry
- */
-void
-smb_new_dentry(struct dentry *dentry)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-
-	if (server->mnt->flags & SMB_MOUNT_CASE)
-		dentry->d_op = &smbfs_dentry_operations_case;
-	else
-		dentry->d_op = &smbfs_dentry_operations;
-	dentry->d_time = jiffies;
-}
-
-
-/*
- * Whenever a lookup succeeds, we know the parent directories
- * are all valid, so we want to update the dentry timestamps.
- * N.B. Move this to dcache?
- */
-void
-smb_renew_times(struct dentry * dentry)
-{
-	dget(dentry);
-	spin_lock(&dentry->d_lock);
-	for (;;) {
-		struct dentry *parent;
-
-		dentry->d_time = jiffies;
-		if (IS_ROOT(dentry))
-			break;
-		parent = dentry->d_parent;
-		dget(parent);
-		spin_unlock(&dentry->d_lock);
-		dput(dentry);
-		dentry = parent;
-		spin_lock(&dentry->d_lock);
-	}
-	spin_unlock(&dentry->d_lock);
-	dput(dentry);
-}
-
-static struct dentry *
-smb_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
-{
-	struct smb_fattr finfo;
-	struct inode *inode;
-	int error;
-	struct smb_sb_info *server;
-
-	error = -ENAMETOOLONG;
-	if (dentry->d_name.len > SMB_MAXNAMELEN)
-		goto out;
-
-	/* Do not allow lookup of names with backslashes in */
-	error = -EINVAL;
-	if (memchr(dentry->d_name.name, '\\', dentry->d_name.len))
-		goto out;
-
-	lock_kernel();
-	error = smb_proc_getattr(dentry, &finfo);
-#ifdef SMBFS_PARANOIA
-	if (error && error != -ENOENT)
-		PARANOIA("find %s/%s failed, error=%d\n",
-			 DENTRY_PATH(dentry), error);
-#endif
-
-	inode = NULL;
-	if (error == -ENOENT)
-		goto add_entry;
-	if (!error) {
-		error = -EACCES;
-		finfo.f_ino = iunique(dentry->d_sb, 2);
-		inode = smb_iget(dir->i_sb, &finfo);
-		if (inode) {
-	add_entry:
-			server = server_from_dentry(dentry);
-			if (server->mnt->flags & SMB_MOUNT_CASE)
-				dentry->d_op = &smbfs_dentry_operations_case;
-			else
-				dentry->d_op = &smbfs_dentry_operations;
-
-			d_add(dentry, inode);
-			smb_renew_times(dentry);
-			error = 0;
-		}
-	}
-	unlock_kernel();
-out:
-	return ERR_PTR(error);
-}
-
-/*
- * This code is common to all routines creating a new inode.
- */
-static int
-smb_instantiate(struct dentry *dentry, __u16 fileid, int have_id)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	struct inode *inode;
-	int error;
-	struct smb_fattr fattr;
-
-	VERBOSE("file %s/%s, fileid=%u\n", DENTRY_PATH(dentry), fileid);
-
-	error = smb_proc_getattr(dentry, &fattr);
-	if (error)
-		goto out_close;
-
-	smb_renew_times(dentry);
-	fattr.f_ino = iunique(dentry->d_sb, 2);
-	inode = smb_iget(dentry->d_sb, &fattr);
-	if (!inode)
-		goto out_no_inode;
-
-	if (have_id) {
-		struct smb_inode_info *ei = SMB_I(inode);
-		ei->fileid = fileid;
-		ei->access = SMB_O_RDWR;
-		ei->open = server->generation;
-	}
-	d_instantiate(dentry, inode);
-out:
-	return error;
-
-out_no_inode:
-	error = -EACCES;
-out_close:
-	if (have_id) {
-		PARANOIA("%s/%s failed, error=%d, closing %u\n",
-			 DENTRY_PATH(dentry), error, fileid);
-		smb_close_fileid(dentry, fileid);
-	}
-	goto out;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_create(struct inode *dir, struct dentry *dentry, int mode,
-		struct nameidata *nd)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	__u16 fileid;
-	int error;
-	struct iattr attr;
-
-	VERBOSE("creating %s/%s, mode=%d\n", DENTRY_PATH(dentry), mode);
-
-	lock_kernel();
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_create(dentry, 0, get_seconds(), &fileid);
-	if (!error) {
-		if (server->opt.capabilities & SMB_CAP_UNIX) {
-			/* Set attributes for new file */
-			attr.ia_valid = ATTR_MODE;
-			attr.ia_mode = mode;
-			error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
-		}
-		error = smb_instantiate(dentry, fileid, 1);
-	} else {
-		PARANOIA("%s/%s failed, error=%d\n",
-			 DENTRY_PATH(dentry), error);
-	}
-	unlock_kernel();
-	return error;
-}
-
-/* N.B. How should the mode argument be used? */
-static int
-smb_mkdir(struct inode *dir, struct dentry *dentry, int mode)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int error;
-	struct iattr attr;
-
-	lock_kernel();
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_mkdir(dentry);
-	if (!error) {
-		if (server->opt.capabilities & SMB_CAP_UNIX) {
-			/* Set attributes for new directory */
-			attr.ia_valid = ATTR_MODE;
-			attr.ia_mode = mode;
-			error = smb_proc_setattr_unix(dentry, &attr, 0, 0);
-		}
-		error = smb_instantiate(dentry, 0, 0);
-	}
-	unlock_kernel();
-	return error;
-}
-
-static int
-smb_rmdir(struct inode *dir, struct dentry *dentry)
-{
-	struct inode *inode = dentry->d_inode;
-	int error;
-
-	/*
-	 * Close the directory if it's open.
-	 */
-	lock_kernel();
-	smb_close(inode);
-
-	/*
-	 * Check that nobody else is using the directory..
-	 */
-	error = -EBUSY;
-	if (!d_unhashed(dentry))
-		goto out;
-
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_rmdir(dentry);
-
-out:
-	unlock_kernel();
-	return error;
-}
-
-static int
-smb_unlink(struct inode *dir, struct dentry *dentry)
-{
-	int error;
-
-	/*
-	 * Close the file if it's open.
-	 */
-	lock_kernel();
-	smb_close(dentry->d_inode);
-
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_unlink(dentry);
-	if (!error)
-		smb_renew_times(dentry);
-	unlock_kernel();
-	return error;
-}
-
-static int
-smb_rename(struct inode *old_dir, struct dentry *old_dentry,
-	   struct inode *new_dir, struct dentry *new_dentry)
-{
-	int error;
-
-	/*
-	 * Close any open files, and check whether to delete the
-	 * target before attempting the rename.
-	 */
-	lock_kernel();
-	if (old_dentry->d_inode)
-		smb_close(old_dentry->d_inode);
-	if (new_dentry->d_inode) {
-		smb_close(new_dentry->d_inode);
-		error = smb_proc_unlink(new_dentry);
-		if (error) {
-			VERBOSE("unlink %s/%s, error=%d\n",
-				DENTRY_PATH(new_dentry), error);
-			goto out;
-		}
-		/* FIXME */
-		d_delete(new_dentry);
-	}
-
-	smb_invalid_dir_cache(old_dir);
-	smb_invalid_dir_cache(new_dir);
-	error = smb_proc_mv(old_dentry, new_dentry);
-	if (!error) {
-		smb_renew_times(old_dentry);
-		smb_renew_times(new_dentry);
-	}
-out:
-	unlock_kernel();
-	return error;
-}
-
-/*
- * FIXME: samba servers won't let you create device nodes unless uid/gid
- * matches the connection credentials (and we don't know which those are ...)
- */
-static int
-smb_make_node(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
-{
-	int error;
-	struct iattr attr;
-
-	attr.ia_valid = ATTR_MODE | ATTR_UID | ATTR_GID;
-	attr.ia_mode = mode;
-	current_euid_egid(&attr.ia_uid, &attr.ia_gid);
-
-	if (!new_valid_dev(dev))
-		return -EINVAL;
-
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_setattr_unix(dentry, &attr, MAJOR(dev), MINOR(dev));
-	if (!error) {
-		error = smb_instantiate(dentry, 0, 0);
-	}
-	return error;
-}
-
-/*
- * dentry = existing file
- * new_dentry = new file
- */
-static int
-smb_link(struct dentry *dentry, struct inode *dir, struct dentry *new_dentry)
-{
-	int error;
-
-	DEBUG1("smb_link old=%s/%s new=%s/%s\n",
-	       DENTRY_PATH(dentry), DENTRY_PATH(new_dentry));
-	smb_invalid_dir_cache(dir);
-	error = smb_proc_link(server_from_dentry(dentry), dentry, new_dentry);
-	if (!error) {
-		smb_renew_times(dentry);
-		error = smb_instantiate(new_dentry, 0, 0);
-	}
-	return error;
-}
diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c
deleted file mode 100644
index 8e187a0f94b..00000000000
--- a/fs/smbfs/file.c
+++ /dev/null
@@ -1,454 +0,0 @@
-/*
- *  file.c
- *
- *  Copyright (C) 1995, 1996, 1997 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/aio.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-static int
-smb_fsync(struct file *file, int datasync)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int result;
-
-	VERBOSE("sync file %s/%s\n", DENTRY_PATH(dentry));
-
-	/*
-	 * The VFS will writepage() all dirty pages for us, but we
-	 * should send a SMBflush to the server, letting it know that
-	 * we want things synchronized with actual storage.
-	 *
-	 * Note: this function requires all pages to have been written already
-	 *       (should be ok with writepage_sync)
-	 */
-	result = smb_proc_flush(server, SMB_I(dentry->d_inode)->fileid);
-	return result;
-}
-
-/*
- * Read a page synchronously.
- */
-static int
-smb_readpage_sync(struct dentry *dentry, struct page *page)
-{
-	char *buffer = kmap(page);
-	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	unsigned int rsize = smb_get_rsize(server);
-	int count = PAGE_SIZE;
-	int result;
-
-	VERBOSE("file %s/%s, count=%d@%Ld, rsize=%d\n",
-		DENTRY_PATH(dentry), count, offset, rsize);
-
-	result = smb_open(dentry, SMB_O_RDONLY);
-	if (result < 0)
-		goto io_error;
-
-	do {
-		if (count < rsize)
-			rsize = count;
-
-		result = server->ops->read(dentry->d_inode,offset,rsize,buffer);
-		if (result < 0)
-			goto io_error;
-
-		count -= result;
-		offset += result;
-		buffer += result;
-		dentry->d_inode->i_atime =
-			current_fs_time(dentry->d_inode->i_sb);
-		if (result < rsize)
-			break;
-	} while (count);
-
-	memset(buffer, 0, count);
-	flush_dcache_page(page);
-	SetPageUptodate(page);
-	result = 0;
-
-io_error:
-	kunmap(page);
-	unlock_page(page);
-	return result;
-}
-
-/*
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_readpage(struct file *file, struct page *page)
-{
-	int		error;
-	struct dentry  *dentry = file->f_path.dentry;
-
-	page_cache_get(page);
-	error = smb_readpage_sync(dentry, page);
-	page_cache_release(page);
-	return error;
-}
-
-/*
- * Write a page synchronously.
- * Offset is the data offset within the page.
- */
-static int
-smb_writepage_sync(struct inode *inode, struct page *page,
-		   unsigned long pageoffset, unsigned int count)
-{
-	loff_t offset;
-	char *buffer = kmap(page) + pageoffset;
-	struct smb_sb_info *server = server_from_inode(inode);
-	unsigned int wsize = smb_get_wsize(server);
-	int ret = 0;
-
-	offset = ((loff_t)page->index << PAGE_CACHE_SHIFT) + pageoffset;
-	VERBOSE("file ino=%ld, fileid=%d, count=%d@%Ld, wsize=%d\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, offset, wsize);
-
-	do {
-		int write_ret;
-
-		if (count < wsize)
-			wsize = count;
-
-		write_ret = server->ops->write(inode, offset, wsize, buffer);
-		if (write_ret < 0) {
-			PARANOIA("failed write, wsize=%d, write_ret=%d\n",
-				 wsize, write_ret);
-			ret = write_ret;
-			break;
-		}
-		/* N.B. what if result < wsize?? */
-#ifdef SMBFS_PARANOIA
-		if (write_ret < wsize)
-			PARANOIA("short write, wsize=%d, write_ret=%d\n",
-				 wsize, write_ret);
-#endif
-		buffer += wsize;
-		offset += wsize;
-		count -= wsize;
-		/*
-		 * Update the inode now rather than waiting for a refresh.
-		 */
-		inode->i_mtime = inode->i_atime = current_fs_time(inode->i_sb);
-		SMB_I(inode)->flags |= SMB_F_LOCALWRITE;
-		if (offset > inode->i_size)
-			inode->i_size = offset;
-	} while (count);
-
-	kunmap(page);
-	return ret;
-}
-
-/*
- * Write a page to the server. This will be used for NFS swapping only
- * (for now), and we currently do this synchronously only.
- *
- * We are called with the page locked and we unlock it when done.
- */
-static int
-smb_writepage(struct page *page, struct writeback_control *wbc)
-{
-	struct address_space *mapping = page->mapping;
-	struct inode *inode;
-	unsigned long end_index;
-	unsigned offset = PAGE_CACHE_SIZE;
-	int err;
-
-	BUG_ON(!mapping);
-	inode = mapping->host;
-	BUG_ON(!inode);
-
-	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
-
-	/* easy case */
-	if (page->index < end_index)
-		goto do_it;
-	/* things got complicated... */
-	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
-	/* OK, are we completely out? */
-	if (page->index >= end_index+1 || !offset)
-		return 0; /* truncated - don't care */
-do_it:
-	page_cache_get(page);
-	err = smb_writepage_sync(inode, page, 0, offset);
-	SetPageUptodate(page);
-	unlock_page(page);
-	page_cache_release(page);
-	return err;
-}
-
-static int
-smb_updatepage(struct file *file, struct page *page, unsigned long offset,
-	       unsigned int count)
-{
-	struct dentry *dentry = file->f_path.dentry;
-
-	DEBUG1("(%s/%s %d@%lld)\n", DENTRY_PATH(dentry), count,
-		((unsigned long long)page->index << PAGE_CACHE_SHIFT) + offset);
-
-	return smb_writepage_sync(dentry->d_inode, page, offset, count);
-}
-
-static ssize_t
-smb_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
-			unsigned long nr_segs, loff_t pos)
-{
-	struct file * file = iocb->ki_filp;
-	struct dentry * dentry = file->f_path.dentry;
-	ssize_t	status;
-
-	VERBOSE("file %s/%s, count=%lu@%lu\n", DENTRY_PATH(dentry),
-		(unsigned long) iocb->ki_left, (unsigned long) pos);
-
-	status = smb_revalidate_inode(dentry);
-	if (status) {
-		PARANOIA("%s/%s validation failed, error=%Zd\n",
-			 DENTRY_PATH(dentry), status);
-		goto out;
-	}
-
-	VERBOSE("before read, size=%ld, flags=%x, atime=%ld\n",
-		(long)dentry->d_inode->i_size,
-		dentry->d_inode->i_flags, dentry->d_inode->i_atime.tv_sec);
-
-	status = generic_file_aio_read(iocb, iov, nr_segs, pos);
-out:
-	return status;
-}
-
-static int
-smb_file_mmap(struct file * file, struct vm_area_struct * vma)
-{
-	struct dentry * dentry = file->f_path.dentry;
-	int	status;
-
-	VERBOSE("file %s/%s, address %lu - %lu\n",
-		DENTRY_PATH(dentry), vma->vm_start, vma->vm_end);
-
-	status = smb_revalidate_inode(dentry);
-	if (status) {
-		PARANOIA("%s/%s validation failed, error=%d\n",
-			 DENTRY_PATH(dentry), status);
-		goto out;
-	}
-	status = generic_file_mmap(file, vma);
-out:
-	return status;
-}
-
-static ssize_t
-smb_file_splice_read(struct file *file, loff_t *ppos,
-		     struct pipe_inode_info *pipe, size_t count,
-		     unsigned int flags)
-{
-	struct dentry *dentry = file->f_path.dentry;
-	ssize_t status;
-
-	VERBOSE("file %s/%s, pos=%Ld, count=%lu\n",
-		DENTRY_PATH(dentry), *ppos, count);
-
-	status = smb_revalidate_inode(dentry);
-	if (status) {
-		PARANOIA("%s/%s validation failed, error=%Zd\n",
-			 DENTRY_PATH(dentry), status);
-		goto out;
-	}
-	status = generic_file_splice_read(file, ppos, pipe, count, flags);
-out:
-	return status;
-}
-
-/*
- * This does the "real" work of the write. The generic routine has
- * allocated the page, locked it, done all the page alignment stuff
- * calculations etc. Now we should just copy the data from user
- * space and write it back to the real medium..
- *
- * If the writer ends up delaying the write, the writer needs to
- * increment the page use counts until he is done with the page.
- */
-static int smb_write_begin(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned flags,
-			struct page **pagep, void **fsdata)
-{
-	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
-	*pagep = grab_cache_page_write_begin(mapping, index, flags);
-	if (!*pagep)
-		return -ENOMEM;
-	return 0;
-}
-
-static int smb_write_end(struct file *file, struct address_space *mapping,
-			loff_t pos, unsigned len, unsigned copied,
-			struct page *page, void *fsdata)
-{
-	int status;
-	unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
-
-	lock_kernel();
-	status = smb_updatepage(file, page, offset, copied);
-	unlock_kernel();
-
-	if (!status) {
-		if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
-			SetPageUptodate(page);
-		status = copied;
-	}
-
-	unlock_page(page);
-	page_cache_release(page);
-
-	return status;
-}
-
-const struct address_space_operations smb_file_aops = {
-	.readpage = smb_readpage,
-	.writepage = smb_writepage,
-	.write_begin = smb_write_begin,
-	.write_end = smb_write_end,
-};
-
-/* 
- * Write to a file (through the page cache).
- */
-static ssize_t
-smb_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
-			       unsigned long nr_segs, loff_t pos)
-{
-	struct file * file = iocb->ki_filp;
-	struct dentry * dentry = file->f_path.dentry;
-	ssize_t	result;
-
-	VERBOSE("file %s/%s, count=%lu@%lu\n",
-		DENTRY_PATH(dentry),
-		(unsigned long) iocb->ki_left, (unsigned long) pos);
-
-	result = smb_revalidate_inode(dentry);
-	if (result) {
-		PARANOIA("%s/%s validation failed, error=%Zd\n",
-			 DENTRY_PATH(dentry), result);
-		goto out;
-	}
-
-	result = smb_open(dentry, SMB_O_WRONLY);
-	if (result)
-		goto out;
-
-	if (iocb->ki_left > 0) {
-		result = generic_file_aio_write(iocb, iov, nr_segs, pos);
-		VERBOSE("pos=%ld, size=%ld, mtime=%ld, atime=%ld\n",
-			(long) file->f_pos, (long) dentry->d_inode->i_size,
-			dentry->d_inode->i_mtime.tv_sec,
-			dentry->d_inode->i_atime.tv_sec);
-	}
-out:
-	return result;
-}
-
-static int
-smb_file_open(struct inode *inode, struct file * file)
-{
-	int result;
-	struct dentry *dentry = file->f_path.dentry;
-	int smb_mode = (file->f_mode & O_ACCMODE) - 1;
-
-	lock_kernel();
-	result = smb_open(dentry, smb_mode);
-	if (result)
-		goto out;
-	SMB_I(inode)->openers++;
-out:
-	unlock_kernel();
-	return result;
-}
-
-static int
-smb_file_release(struct inode *inode, struct file * file)
-{
-	lock_kernel();
-	if (!--SMB_I(inode)->openers) {
-		/* We must flush any dirty pages now as we won't be able to
-		   write anything after close. mmap can trigger this.
-		   "openers" should perhaps include mmap'ers ... */
-		filemap_write_and_wait(inode->i_mapping);
-		smb_close(inode);
-	}
-	unlock_kernel();
-	return 0;
-}
-
-/*
- * Check whether the required access is compatible with
- * an inode's permission. SMB doesn't recognize superuser
- * privileges, so we need our own check for this.
- */
-static int
-smb_file_permission(struct inode *inode, int mask)
-{
-	int mode = inode->i_mode;
-	int error = 0;
-
-	VERBOSE("mode=%x, mask=%x\n", mode, mask);
-
-	/* Look at user permissions */
-	mode >>= 6;
-	if (mask & ~mode & (MAY_READ | MAY_WRITE | MAY_EXEC))
-		error = -EACCES;
-	return error;
-}
-
-static loff_t smb_remote_llseek(struct file *file, loff_t offset, int origin)
-{
-	loff_t ret;
-	lock_kernel();
-	ret = generic_file_llseek_unlocked(file, offset, origin);
-	unlock_kernel();
-	return ret;
-}
-
-const struct file_operations smb_file_operations =
-{
-	.llseek 	= smb_remote_llseek,
-	.read		= do_sync_read,
-	.aio_read	= smb_file_aio_read,
-	.write		= do_sync_write,
-	.aio_write	= smb_file_aio_write,
-	.unlocked_ioctl	= smb_ioctl,
-	.mmap		= smb_file_mmap,
-	.open		= smb_file_open,
-	.release	= smb_file_release,
-	.fsync		= smb_fsync,
-	.splice_read	= smb_file_splice_read,
-};
-
-const struct inode_operations smb_file_inode_operations =
-{
-	.permission	= smb_file_permission,
-	.getattr	= smb_getattr,
-	.setattr	= smb_notify_change,
-};
diff --git a/fs/smbfs/getopt.c b/fs/smbfs/getopt.c
deleted file mode 100644
index 7ae0f5273ab..00000000000
--- a/fs/smbfs/getopt.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * getopt.c
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/net.h>
-
-#include "getopt.h"
-
-/**
- *	smb_getopt - option parser
- *	@caller: name of the caller, for error messages
- *	@options: the options string
- *	@opts: an array of &struct option entries controlling parser operations
- *	@optopt: output; will contain the current option
- *	@optarg: output; will contain the value (if one exists)
- *	@flag: output; may be NULL; should point to a long for or'ing flags
- *	@value: output; may be NULL; will be overwritten with the integer value
- *		of the current argument.
- *
- *	Helper to parse options on the format used by mount ("a=b,c=d,e,f").
- *	Returns opts->val if a matching entry in the 'opts' array is found,
- *	0 when no more tokens are found, -1 if an error is encountered.
- */
-int smb_getopt(char *caller, char **options, struct option *opts,
-	       char **optopt, char **optarg, unsigned long *flag,
-	       unsigned long *value)
-{
-	char *token;
-	char *val;
-	int i;
-
-	do {
-		if ((token = strsep(options, ",")) == NULL)
-			return 0;
-	} while (*token == '\0');
-	*optopt = token;
-
-	*optarg = NULL;
-	if ((val = strchr (token, '=')) != NULL) {
-		*val++ = 0;
-		if (value)
-			*value = simple_strtoul(val, NULL, 0);
-		*optarg = val;
-	}
-
-	for (i = 0; opts[i].name != NULL; i++) {
-		if (!strcmp(opts[i].name, token)) {
-			if (!opts[i].flag && (!val || !*val)) {
-				printk("%s: the %s option requires an argument\n",
-				       caller, token);
-				return -1;
-			}
-
-			if (flag && opts[i].flag)
-				*flag |= opts[i].flag;
-
-			return opts[i].val;
-		}
-	}
-	printk("%s: Unrecognized mount option %s\n", caller, token);
-	return -1;
-}
diff --git a/fs/smbfs/getopt.h b/fs/smbfs/getopt.h
deleted file mode 100644
index 146219ac7c4..00000000000
--- a/fs/smbfs/getopt.h
+++ /dev/null
@@ -1,14 +0,0 @@
-#ifndef _LINUX_GETOPT_H
-#define _LINUX_GETOPT_H
-
-struct option {
-	const char *name;
-	unsigned long flag;
-	int val;
-};
-
-extern int smb_getopt(char *caller, char **options, struct option *opts,
-		      char **optopt, char **optarg, unsigned long *flag,
-		      unsigned long *value);
-
-#endif /* _LINUX_GETOPT_H */
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c
deleted file mode 100644
index 8fc5e50e142..00000000000
--- a/fs/smbfs/inode.c
+++ /dev/null
@@ -1,844 +0,0 @@
-/*
- *  inode.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/module.h>
-#include <linux/time.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/smp_lock.h>
-#include <linux/nls.h>
-#include <linux/seq_file.h>
-#include <linux/mount.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/highuid.h>
-#include <linux/sched.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "getopt.h"
-#include "proto.h"
-
-/* Always pick a default string */
-#ifdef CONFIG_SMB_NLS_REMOTE
-#define SMB_NLS_REMOTE CONFIG_SMB_NLS_REMOTE
-#else
-#define SMB_NLS_REMOTE ""
-#endif
-
-#define SMB_TTL_DEFAULT 1000
-
-static void smb_evict_inode(struct inode *);
-static void smb_put_super(struct super_block *);
-static int  smb_statfs(struct dentry *, struct kstatfs *);
-static int  smb_show_options(struct seq_file *, struct vfsmount *);
-
-static struct kmem_cache *smb_inode_cachep;
-
-static struct inode *smb_alloc_inode(struct super_block *sb)
-{
-	struct smb_inode_info *ei;
-	ei = (struct smb_inode_info *)kmem_cache_alloc(smb_inode_cachep, GFP_KERNEL);
-	if (!ei)
-		return NULL;
-	return &ei->vfs_inode;
-}
-
-static void smb_destroy_inode(struct inode *inode)
-{
-	kmem_cache_free(smb_inode_cachep, SMB_I(inode));
-}
-
-static void init_once(void *foo)
-{
-	struct smb_inode_info *ei = (struct smb_inode_info *) foo;
-
-	inode_init_once(&ei->vfs_inode);
-}
-
-static int init_inodecache(void)
-{
-	smb_inode_cachep = kmem_cache_create("smb_inode_cache",
-					     sizeof(struct smb_inode_info),
-					     0, (SLAB_RECLAIM_ACCOUNT|
-						SLAB_MEM_SPREAD),
-					     init_once);
-	if (smb_inode_cachep == NULL)
-		return -ENOMEM;
-	return 0;
-}
-
-static void destroy_inodecache(void)
-{
-	kmem_cache_destroy(smb_inode_cachep);
-}
-
-static int smb_remount(struct super_block *sb, int *flags, char *data)
-{
-	*flags |= MS_NODIRATIME;
-	return 0;
-}
-
-static const struct super_operations smb_sops =
-{
-	.alloc_inode	= smb_alloc_inode,
-	.destroy_inode	= smb_destroy_inode,
-	.drop_inode	= generic_delete_inode,
-	.evict_inode	= smb_evict_inode,
-	.put_super	= smb_put_super,
-	.statfs		= smb_statfs,
-	.show_options	= smb_show_options,
-	.remount_fs	= smb_remount,
-};
-
-
-/* We are always generating a new inode here */
-struct inode *
-smb_iget(struct super_block *sb, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = SMB_SB(sb);
-	struct inode *result;
-
-	DEBUG1("smb_iget: %p\n", fattr);
-
-	result = new_inode(sb);
-	if (!result)
-		return result;
-	result->i_ino = fattr->f_ino;
-	SMB_I(result)->open = 0;
-	SMB_I(result)->fileid = 0;
-	SMB_I(result)->access = 0;
-	SMB_I(result)->flags = 0;
-	SMB_I(result)->closed = 0;
-	SMB_I(result)->openers = 0;
-	smb_set_inode_attr(result, fattr);
-	if (S_ISREG(result->i_mode)) {
-		result->i_op = &smb_file_inode_operations;
-		result->i_fop = &smb_file_operations;
-		result->i_data.a_ops = &smb_file_aops;
-	} else if (S_ISDIR(result->i_mode)) {
-		if (server->opt.capabilities & SMB_CAP_UNIX)
-			result->i_op = &smb_dir_inode_operations_unix;
-		else
-			result->i_op = &smb_dir_inode_operations;
-		result->i_fop = &smb_dir_operations;
-	} else if (S_ISLNK(result->i_mode)) {
-		result->i_op = &smb_link_inode_operations;
-	} else {
-		init_special_inode(result, result->i_mode, fattr->f_rdev);
-	}
-	insert_inode_hash(result);
-	return result;
-}
-
-/*
- * Copy the inode data to a smb_fattr structure.
- */
-void
-smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
-	memset(fattr, 0, sizeof(struct smb_fattr));
-	fattr->f_mode	= inode->i_mode;
-	fattr->f_nlink	= inode->i_nlink;
-	fattr->f_ino	= inode->i_ino;
-	fattr->f_uid	= inode->i_uid;
-	fattr->f_gid	= inode->i_gid;
-	fattr->f_size	= inode->i_size;
-	fattr->f_mtime	= inode->i_mtime;
-	fattr->f_ctime	= inode->i_ctime;
-	fattr->f_atime	= inode->i_atime;
-	fattr->f_blocks	= inode->i_blocks;
-
-	fattr->attr	= SMB_I(inode)->attr;
-	/*
-	 * Keep the attributes in sync with the inode permissions.
-	 */
-	if (fattr->f_mode & S_IWUSR)
-		fattr->attr &= ~aRONLY;
-	else
-		fattr->attr |= aRONLY;
-}
-
-/*
- * Update the inode, possibly causing it to invalidate its pages if mtime/size
- * is different from last time.
- */
-void
-smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr)
-{
-	struct smb_inode_info *ei = SMB_I(inode);
-
-	/*
-	 * A size change should have a different mtime, or same mtime
-	 * but different size.
-	 */
-	time_t last_time = inode->i_mtime.tv_sec;
-	loff_t last_sz = inode->i_size;
-
-	inode->i_mode	= fattr->f_mode;
-	inode->i_nlink	= fattr->f_nlink;
-	inode->i_uid	= fattr->f_uid;
-	inode->i_gid	= fattr->f_gid;
-	inode->i_ctime	= fattr->f_ctime;
-	inode->i_blocks = fattr->f_blocks;
-	inode->i_size	= fattr->f_size;
-	inode->i_mtime	= fattr->f_mtime;
-	inode->i_atime	= fattr->f_atime;
-	ei->attr = fattr->attr;
-
-	/*
-	 * Update the "last time refreshed" field for revalidation.
-	 */
-	ei->oldmtime = jiffies;
-
-	if (inode->i_mtime.tv_sec != last_time || inode->i_size != last_sz) {
-		VERBOSE("%ld changed, old=%ld, new=%ld, oz=%ld, nz=%ld\n",
-			inode->i_ino,
-			(long) last_time, (long) inode->i_mtime.tv_sec,
-			(long) last_sz, (long) inode->i_size);
-
-		if (!S_ISDIR(inode->i_mode))
-			invalidate_remote_inode(inode);
-	}
-}
-
-/*
- * This is called if the connection has gone bad ...
- * try to kill off all the current inodes.
- */
-void
-smb_invalidate_inodes(struct smb_sb_info *server)
-{
-	VERBOSE("\n");
-	shrink_dcache_sb(SB_of(server));
-	invalidate_inodes(SB_of(server));
-}
-
-/*
- * This is called to update the inode attributes after
- * we've made changes to a file or directory.
- */
-static int
-smb_refresh_inode(struct dentry *dentry)
-{
-	struct inode *inode = dentry->d_inode;
-	int error;
-	struct smb_fattr fattr;
-
-	error = smb_proc_getattr(dentry, &fattr);
-	if (!error) {
-		smb_renew_times(dentry);
-		/*
-		 * Check whether the type part of the mode changed,
-		 * and don't update the attributes if it did.
-		 *
-		 * And don't dick with the root inode
-		 */
-		if (inode->i_ino == 2)
-			return error;
-		if (S_ISLNK(inode->i_mode))
-			return error;	/* VFS will deal with it */
-
-		if ((inode->i_mode & S_IFMT) == (fattr.f_mode & S_IFMT)) {
-			smb_set_inode_attr(inode, &fattr);
-		} else {
-			/*
-			 * Big trouble! The inode has become a new object,
-			 * so any operations attempted on it are invalid.
-			 *
-			 * To limit damage, mark the inode as bad so that
-			 * subsequent lookup validations will fail.
-			 */
-			PARANOIA("%s/%s changed mode, %07o to %07o\n",
-				 DENTRY_PATH(dentry),
-				 inode->i_mode, fattr.f_mode);
-
-			fattr.f_mode = inode->i_mode; /* save mode */
-			make_bad_inode(inode);
-			inode->i_mode = fattr.f_mode; /* restore mode */
-			/*
-			 * No need to worry about unhashing the dentry: the
-			 * lookup validation will see that the inode is bad.
-			 * But we do want to invalidate the caches ...
-			 */
-			if (!S_ISDIR(inode->i_mode))
-				invalidate_remote_inode(inode);
-			else
-				smb_invalid_dir_cache(inode);
-			error = -EIO;
-		}
-	}
-	return error;
-}
-
-/*
- * This is called when we want to check whether the inode
- * has changed on the server.  If it has changed, we must
- * invalidate our local caches.
- */
-int
-smb_revalidate_inode(struct dentry *dentry)
-{
-	struct smb_sb_info *s = server_from_dentry(dentry);
-	struct inode *inode = dentry->d_inode;
-	int error = 0;
-
-	DEBUG1("smb_revalidate_inode\n");
-	lock_kernel();
-
-	/*
-	 * Check whether we've recently refreshed the inode.
-	 */
-	if (time_before(jiffies, SMB_I(inode)->oldmtime + SMB_MAX_AGE(s))) {
-		VERBOSE("up-to-date, ino=%ld, jiffies=%lu, oldtime=%lu\n",
-			inode->i_ino, jiffies, SMB_I(inode)->oldmtime);
-		goto out;
-	}
-
-	error = smb_refresh_inode(dentry);
-out:
-	unlock_kernel();
-	return error;
-}
-
-/*
- * This routine is called when i_nlink == 0 and i_count goes to 0.
- * All blocking cleanup operations need to go here to avoid races.
- */
-static void
-smb_evict_inode(struct inode *ino)
-{
-	DEBUG1("ino=%ld\n", ino->i_ino);
-	truncate_inode_pages(&ino->i_data, 0);
-	end_writeback(ino);
-	lock_kernel();
-	if (smb_close(ino))
-		PARANOIA("could not close inode %ld\n", ino->i_ino);
-	unlock_kernel();
-}
-
-static struct option opts[] = {
-	{ "version",	0, 'v' },
-	{ "win95",	SMB_MOUNT_WIN95, 1 },
-	{ "oldattr",	SMB_MOUNT_OLDATTR, 1 },
-	{ "dirattr",	SMB_MOUNT_DIRATTR, 1 },
-	{ "case",	SMB_MOUNT_CASE, 1 },
-	{ "uid",	0, 'u' },
-	{ "gid",	0, 'g' },
-	{ "file_mode",	0, 'f' },
-	{ "dir_mode",	0, 'd' },
-	{ "iocharset",	0, 'i' },
-	{ "codepage",	0, 'c' },
-	{ "ttl",	0, 't' },
-	{ NULL,		0, 0}
-};
-
-static int
-parse_options(struct smb_mount_data_kernel *mnt, char *options)
-{
-	int c;
-	unsigned long flags;
-	unsigned long value;
-	char *optarg;
-	char *optopt;
-
-	flags = 0;
-	while ( (c = smb_getopt("smbfs", &options, opts,
-				&optopt, &optarg, &flags, &value)) > 0) {
-
-		VERBOSE("'%s' -> '%s'\n", optopt, optarg ? optarg : "<none>");
-		switch (c) {
-		case 1:
-			/* got a "flag" option */
-			break;
-		case 'v':
-			if (value != SMB_MOUNT_VERSION) {
-			printk ("smbfs: Bad mount version %ld, expected %d\n",
-				value, SMB_MOUNT_VERSION);
-				return 0;
-			}
-			mnt->version = value;
-			break;
-		case 'u':
-			mnt->uid = value;
-			flags |= SMB_MOUNT_UID;
-			break;
-		case 'g':
-			mnt->gid = value;
-			flags |= SMB_MOUNT_GID;
-			break;
-		case 'f':
-			mnt->file_mode = (value & S_IRWXUGO) | S_IFREG;
-			flags |= SMB_MOUNT_FMODE;
-			break;
-		case 'd':
-			mnt->dir_mode = (value & S_IRWXUGO) | S_IFDIR;
-			flags |= SMB_MOUNT_DMODE;
-			break;
-		case 'i':
-			strlcpy(mnt->codepage.local_name, optarg, 
-				SMB_NLS_MAXNAMELEN);
-			break;
-		case 'c':
-			strlcpy(mnt->codepage.remote_name, optarg,
-				SMB_NLS_MAXNAMELEN);
-			break;
-		case 't':
-			mnt->ttl = value;
-			break;
-		default:
-			printk ("smbfs: Unrecognized mount option %s\n",
-				optopt);
-			return -1;
-		}
-	}
-	mnt->flags = flags;
-	return c;
-}
-
-/*
- * smb_show_options() is for displaying mount options in /proc/mounts.
- * It tries to avoid showing settings that were not changed from their
- * defaults.
- */
-static int
-smb_show_options(struct seq_file *s, struct vfsmount *m)
-{
-	struct smb_mount_data_kernel *mnt = SMB_SB(m->mnt_sb)->mnt;
-	int i;
-
-	for (i = 0; opts[i].name != NULL; i++)
-		if (mnt->flags & opts[i].flag)
-			seq_printf(s, ",%s", opts[i].name);
-
-	if (mnt->flags & SMB_MOUNT_UID)
-		seq_printf(s, ",uid=%d", mnt->uid);
-	if (mnt->flags & SMB_MOUNT_GID)
-		seq_printf(s, ",gid=%d", mnt->gid);
-	if (mnt->mounted_uid != 0)
-		seq_printf(s, ",mounted_uid=%d", mnt->mounted_uid);
-
-	/* 
-	 * Defaults for file_mode and dir_mode are unknown to us; they
-	 * depend on the current umask of the user doing the mount.
-	 */
-	if (mnt->flags & SMB_MOUNT_FMODE)
-		seq_printf(s, ",file_mode=%04o", mnt->file_mode & S_IRWXUGO);
-	if (mnt->flags & SMB_MOUNT_DMODE)
-		seq_printf(s, ",dir_mode=%04o", mnt->dir_mode & S_IRWXUGO);
-
-	if (strcmp(mnt->codepage.local_name, CONFIG_NLS_DEFAULT))
-		seq_printf(s, ",iocharset=%s", mnt->codepage.local_name);
-	if (strcmp(mnt->codepage.remote_name, SMB_NLS_REMOTE))
-		seq_printf(s, ",codepage=%s", mnt->codepage.remote_name);
-
-	if (mnt->ttl != SMB_TTL_DEFAULT)
-		seq_printf(s, ",ttl=%d", mnt->ttl);
-
-	return 0;
-}
-
-static void
-smb_unload_nls(struct smb_sb_info *server)
-{
-	unload_nls(server->remote_nls);
-	unload_nls(server->local_nls);
-}
-
-static void
-smb_put_super(struct super_block *sb)
-{
-	struct smb_sb_info *server = SMB_SB(sb);
-
-	lock_kernel();
-
-	smb_lock_server(server);
-	server->state = CONN_INVALID;
-	smbiod_unregister_server(server);
-
-	smb_close_socket(server);
-
-	if (server->conn_pid)
-		kill_pid(server->conn_pid, SIGTERM, 1);
-
-	bdi_destroy(&server->bdi);
-	kfree(server->ops);
-	smb_unload_nls(server);
-	sb->s_fs_info = NULL;
-	smb_unlock_server(server);
-	put_pid(server->conn_pid);
-	kfree(server);
-
-	unlock_kernel();
-}
-
-static int smb_fill_super(struct super_block *sb, void *raw_data, int silent)
-{
-	struct smb_sb_info *server;
-	struct smb_mount_data_kernel *mnt;
-	struct smb_mount_data *oldmnt;
-	struct inode *root_inode;
-	struct smb_fattr root;
-	int ver;
-	void *mem;
-	static int warn_count;
-
-	lock_kernel();
-
-	if (warn_count < 5) {
-		warn_count++;
-		printk(KERN_EMERG "smbfs is deprecated and will be removed"
-			" from the 2.6.27 kernel. Please migrate to cifs\n");
-	}
-
-	if (!raw_data)
-		goto out_no_data;
-
-	oldmnt = (struct smb_mount_data *) raw_data;
-	ver = oldmnt->version;
-	if (ver != SMB_MOUNT_OLDVERSION && cpu_to_be32(ver) != SMB_MOUNT_ASCII)
-		goto out_wrong_data;
-
-	sb->s_flags |= MS_NODIRATIME;
-	sb->s_blocksize = 1024;	/* Eh...  Is this correct? */
-	sb->s_blocksize_bits = 10;
-	sb->s_magic = SMB_SUPER_MAGIC;
-	sb->s_op = &smb_sops;
-	sb->s_time_gran = 100;
-
-	server = kzalloc(sizeof(struct smb_sb_info), GFP_KERNEL);
-	if (!server)
-		goto out_no_server;
-	sb->s_fs_info = server;
-	
-	if (bdi_setup_and_register(&server->bdi, "smbfs", BDI_CAP_MAP_COPY))
-		goto out_bdi;
-
-	sb->s_bdi = &server->bdi;
-
-	server->super_block = sb;
-	server->mnt = NULL;
-	server->sock_file = NULL;
-	init_waitqueue_head(&server->conn_wq);
-	init_MUTEX(&server->sem);
-	INIT_LIST_HEAD(&server->entry);
-	INIT_LIST_HEAD(&server->xmitq);
-	INIT_LIST_HEAD(&server->recvq);
-	server->conn_error = 0;
-	server->conn_pid = NULL;
-	server->state = CONN_INVALID; /* no connection yet */
-	server->generation = 0;
-
-	/* Allocate the global temp buffer and some superblock helper structs */
-	/* FIXME: move these to the smb_sb_info struct */
-	VERBOSE("alloc chunk = %lu\n", sizeof(struct smb_ops) +
-		sizeof(struct smb_mount_data_kernel));
-	mem = kmalloc(sizeof(struct smb_ops) +
-		      sizeof(struct smb_mount_data_kernel), GFP_KERNEL);
-	if (!mem)
-		goto out_no_mem;
-
-	server->ops = mem;
-	smb_install_null_ops(server->ops);
-	server->mnt = mem + sizeof(struct smb_ops);
-
-	/* Setup NLS stuff */
-	server->remote_nls = NULL;
-	server->local_nls = NULL;
-
-	mnt = server->mnt;
-
-	memset(mnt, 0, sizeof(struct smb_mount_data_kernel));
-	strlcpy(mnt->codepage.local_name, CONFIG_NLS_DEFAULT,
-		SMB_NLS_MAXNAMELEN);
-	strlcpy(mnt->codepage.remote_name, SMB_NLS_REMOTE,
-		SMB_NLS_MAXNAMELEN);
-
-	mnt->ttl = SMB_TTL_DEFAULT;
-	if (ver == SMB_MOUNT_OLDVERSION) {
-		mnt->version = oldmnt->version;
-
-		SET_UID(mnt->uid, oldmnt->uid);
-		SET_GID(mnt->gid, oldmnt->gid);
-
-		mnt->file_mode = (oldmnt->file_mode & S_IRWXUGO) | S_IFREG;
-		mnt->dir_mode = (oldmnt->dir_mode & S_IRWXUGO) | S_IFDIR;
-
-		mnt->flags = (oldmnt->file_mode >> 9) | SMB_MOUNT_UID |
-			SMB_MOUNT_GID | SMB_MOUNT_FMODE | SMB_MOUNT_DMODE;
-	} else {
-		mnt->file_mode = S_IRWXU | S_IRGRP | S_IXGRP |
-				S_IROTH | S_IXOTH | S_IFREG;
-		mnt->dir_mode = S_IRWXU | S_IRGRP | S_IXGRP |
-				S_IROTH | S_IXOTH | S_IFDIR;
-		if (parse_options(mnt, raw_data))
-			goto out_bad_option;
-	}
-	mnt->mounted_uid = current_uid();
-	smb_setcodepage(server, &mnt->codepage);
-
-	/*
-	 * Display the enabled options
-	 * Note: smb_proc_getattr uses these in 2.4 (but was changed in 2.2)
-	 */
-	if (mnt->flags & SMB_MOUNT_OLDATTR)
-		printk("SMBFS: Using core getattr (Win 95 speedup)\n");
-	else if (mnt->flags & SMB_MOUNT_DIRATTR)
-		printk("SMBFS: Using dir ff getattr\n");
-
-	if (smbiod_register_server(server) < 0) {
-		printk(KERN_ERR "smbfs: failed to start smbiod\n");
-		goto out_no_smbiod;
-	}
-
-	/*
-	 * Keep the super block locked while we get the root inode.
-	 */
-	smb_init_root_dirent(server, &root, sb);
-	root_inode = smb_iget(sb, &root);
-	if (!root_inode)
-		goto out_no_root;
-
-	sb->s_root = d_alloc_root(root_inode);
-	if (!sb->s_root)
-		goto out_no_root;
-
-	smb_new_dentry(sb->s_root);
-
-	unlock_kernel();
-	return 0;
-
-out_no_root:
-	iput(root_inode);
-out_no_smbiod:
-	smb_unload_nls(server);
-out_bad_option:
-	kfree(mem);
-out_no_mem:
-	bdi_destroy(&server->bdi);
-out_bdi:
-	if (!server->mnt)
-		printk(KERN_ERR "smb_fill_super: allocation failure\n");
-	sb->s_fs_info = NULL;
-	kfree(server);
-	goto out_fail;
-out_wrong_data:
-	printk(KERN_ERR "smbfs: mount_data version %d is not supported\n", ver);
-	goto out_fail;
-out_no_data:
-	printk(KERN_ERR "smb_fill_super: missing data argument\n");
-out_fail:
-	unlock_kernel();
-	return -EINVAL;
-out_no_server:
-	printk(KERN_ERR "smb_fill_super: cannot allocate struct smb_sb_info\n");
-	unlock_kernel();
-	return -ENOMEM;
-}
-
-static int
-smb_statfs(struct dentry *dentry, struct kstatfs *buf)
-{
-	int result;
-	
-	lock_kernel();
-
-	result = smb_proc_dskattr(dentry, buf);
-
-	unlock_kernel();
-
-	buf->f_type = SMB_SUPER_MAGIC;
-	buf->f_namelen = SMB_MAXPATHLEN;
-	return result;
-}
-
-int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
-{
-	int err = smb_revalidate_inode(dentry);
-	if (!err)
-		generic_fillattr(dentry->d_inode, stat);
-	return err;
-}
-
-int
-smb_notify_change(struct dentry *dentry, struct iattr *attr)
-{
-	struct inode *inode = dentry->d_inode;
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	unsigned int mask = (S_IFREG | S_IFDIR | S_IRWXUGO);
-	int error, changed, refresh = 0;
-	struct smb_fattr fattr;
-
-	lock_kernel();
-
-	error = smb_revalidate_inode(dentry);
-	if (error)
-		goto out;
-
-	if ((error = inode_change_ok(inode, attr)) < 0)
-		goto out;
-
-	error = -EPERM;
-	if ((attr->ia_valid & ATTR_UID) && (attr->ia_uid != server->mnt->uid))
-		goto out;
-
-	if ((attr->ia_valid & ATTR_GID) && (attr->ia_uid != server->mnt->gid))
-		goto out;
-
-	if ((attr->ia_valid & ATTR_MODE) && (attr->ia_mode & ~mask))
-		goto out;
-
-	if ((attr->ia_valid & ATTR_SIZE) != 0) {
-		VERBOSE("changing %s/%s, old size=%ld, new size=%ld\n",
-			DENTRY_PATH(dentry),
-			(long) inode->i_size, (long) attr->ia_size);
-
-		filemap_write_and_wait(inode->i_mapping);
-
-		error = smb_open(dentry, O_WRONLY);
-		if (error)
-			goto out;
-		error = server->ops->truncate(inode, attr->ia_size);
-		if (error)
-			goto out;
-		truncate_setsize(inode, attr->ia_size);
-		refresh = 1;
-	}
-
-	if (server->opt.capabilities & SMB_CAP_UNIX) {
-		/* For now we don't want to set the size with setattr_unix */
-		attr->ia_valid &= ~ATTR_SIZE;
-		/* FIXME: only call if we actually want to set something? */
-		error = smb_proc_setattr_unix(dentry, attr, 0, 0);
-		if (!error)
-			refresh = 1;
-
-		goto out;
-	}
-
-	/*
-	 * Initialize the fattr and check for changed fields.
-	 * Note: CTIME under SMB is creation time rather than
-	 * change time, so we don't attempt to change it.
-	 */
-	smb_get_inode_attr(inode, &fattr);
-
-	changed = 0;
-	if ((attr->ia_valid & ATTR_MTIME) != 0) {
-		fattr.f_mtime = attr->ia_mtime;
-		changed = 1;
-	}
-	if ((attr->ia_valid & ATTR_ATIME) != 0) {
-		fattr.f_atime = attr->ia_atime;
-		/* Earlier protocols don't have an access time */
-		if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2)
-			changed = 1;
-	}
-	if (changed) {
-		error = smb_proc_settime(dentry, &fattr);
-		if (error)
-			goto out;
-		refresh = 1;
-	}
-
-	/*
-	 * Check for mode changes ... we're extremely limited in
-	 * what can be set for SMB servers: just the read-only bit.
-	 */
-	if ((attr->ia_valid & ATTR_MODE) != 0) {
-		VERBOSE("%s/%s mode change, old=%x, new=%x\n",
-			DENTRY_PATH(dentry), fattr.f_mode, attr->ia_mode);
-		changed = 0;
-		if (attr->ia_mode & S_IWUSR) {
-			if (fattr.attr & aRONLY) {
-				fattr.attr &= ~aRONLY;
-				changed = 1;
-			}
-		} else {
-			if (!(fattr.attr & aRONLY)) {
-				fattr.attr |= aRONLY;
-				changed = 1;
-			}
-		}
-		if (changed) {
-			error = smb_proc_setattr(dentry, &fattr);
-			if (error)
-				goto out;
-			refresh = 1;
-		}
-	}
-	error = 0;
-
-out:
-	if (refresh)
-		smb_refresh_inode(dentry);
-	unlock_kernel();
-	return error;
-}
-
-static int smb_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-{
-	return get_sb_nodev(fs_type, flags, data, smb_fill_super, mnt);
-}
-
-static struct file_system_type smb_fs_type = {
-	.owner		= THIS_MODULE,
-	.name		= "smbfs",
-	.get_sb		= smb_get_sb,
-	.kill_sb	= kill_anon_super,
-	.fs_flags	= FS_BINARY_MOUNTDATA,
-};
-
-static int __init init_smb_fs(void)
-{
-	int err;
-	DEBUG1("registering ...\n");
-
-	err = init_inodecache();
-	if (err)
-		goto out_inode;
-	err = smb_init_request_cache();
-	if (err)
-		goto out_request;
-	err = register_filesystem(&smb_fs_type);
-	if (err)
-		goto out;
-	return 0;
-out:
-	smb_destroy_request_cache();
-out_request:
-	destroy_inodecache();
-out_inode:
-	return err;
-}
-
-static void __exit exit_smb_fs(void)
-{
-	DEBUG1("unregistering ...\n");
-	unregister_filesystem(&smb_fs_type);
-	smb_destroy_request_cache();
-	destroy_inodecache();
-}
-
-module_init(init_smb_fs)
-module_exit(exit_smb_fs)
-MODULE_LICENSE("GPL");
diff --git a/fs/smbfs/ioctl.c b/fs/smbfs/ioctl.c
deleted file mode 100644
index 07215312ad3..00000000000
--- a/fs/smbfs/ioctl.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- *  ioctl.c
- *
- *  Copyright (C) 1995, 1996 by Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/ioctl.h>
-#include <linux/time.h>
-#include <linux/mm.h>
-#include <linux/highuid.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb_mount.h>
-
-#include <asm/uaccess.h>
-
-#include "proto.h"
-
-long
-smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
-{
-	struct smb_sb_info *server = server_from_inode(filp->f_path.dentry->d_inode);
-	struct smb_conn_opt opt;
-	int result = -EINVAL;
-
-	lock_kernel();
-	switch (cmd) {
-		uid16_t uid16;
-		uid_t uid32;
-	case SMB_IOC_GETMOUNTUID:
-		SET_UID(uid16, server->mnt->mounted_uid);
-		result = put_user(uid16, (uid16_t __user *) arg);
-		break;
-	case SMB_IOC_GETMOUNTUID32:
-		SET_UID(uid32, server->mnt->mounted_uid);
-		result = put_user(uid32, (uid_t __user *) arg);
-		break;
-
-	case SMB_IOC_NEWCONN:
-		/* arg is smb_conn_opt, or NULL if no connection was made */
-		if (!arg) {
-			result = 0;
-			smb_lock_server(server);
-			server->state = CONN_RETRIED;
-			printk(KERN_ERR "Connection attempt failed!  [%d]\n",
-			       server->conn_error);
-			smbiod_flush(server);
-			smb_unlock_server(server);
-			break;
-		}
-
-		result = -EFAULT;
-		if (!copy_from_user(&opt, (void __user *)arg, sizeof(opt)))
-			result = smb_newconn(server, &opt);
-		break;
-	default:
-		break;
-	}
-	unlock_kernel();
-
-	return result;
-}
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c
deleted file mode 100644
index 71c29b6670b..00000000000
--- a/fs/smbfs/proc.c
+++ /dev/null
@@ -1,3507 +0,0 @@
-/*
- *  proc.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/types.h>
-#include <linux/capability.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/file.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/dcache.h>
-#include <linux/nls.h>
-#include <linux/smp_lock.h>
-#include <linux/net.h>
-#include <linux/vfs.h>
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <net/sock.h>
-
-#include <asm/string.h>
-#include <asm/div64.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-/* Features. Undefine if they cause problems, this should perhaps be a
-   config option. */
-#define SMBFS_POSIX_UNLINK 1
-
-/* Allow smb_retry to be interrupted. */
-#define SMB_RETRY_INTR
-
-#define SMB_VWV(packet)  ((packet) + SMB_HEADER_LEN)
-#define SMB_CMD(packet)  (*(packet+8))
-#define SMB_WCT(packet)  (*(packet+SMB_HEADER_LEN - 1))
-
-#define SMB_DIRINFO_SIZE 43
-#define SMB_STATUS_SIZE  21
-
-#define SMB_ST_BLKSIZE	(PAGE_SIZE)
-#define SMB_ST_BLKSHIFT	(PAGE_SHIFT)
-
-static struct smb_ops smb_ops_core;
-static struct smb_ops smb_ops_os2;
-static struct smb_ops smb_ops_win95;
-static struct smb_ops smb_ops_winNT;
-static struct smb_ops smb_ops_unix;
-static struct smb_ops smb_ops_null;
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr);
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
-		      struct smb_fattr *fattr);
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
-		    struct smb_fattr *fattr);
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
-		      u16 attr);
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
-		     struct inode *inode, struct smb_fattr *fattr);
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server);
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src);
-
-
-static void
-str_upper(char *name, int len)
-{
-	while (len--)
-	{
-		if (*name >= 'a' && *name <= 'z')
-			*name -= ('a' - 'A');
-		name++;
-	}
-}
-
-#if 0
-static void
-str_lower(char *name, int len)
-{
-	while (len--)
-	{
-		if (*name >= 'A' && *name <= 'Z')
-			*name += ('a' - 'A');
-		name++;
-	}
-}
-#endif
-
-/* reverse a string inline. This is used by the dircache walking routines */
-static void reverse_string(char *buf, int len)
-{
-	char c;
-	char *end = buf+len-1;
-
-	while(buf < end) {
-		c = *buf;
-		*(buf++) = *end;
-		*(end--) = c;
-	}
-}
-
-/* no conversion, just a wrapper for memcpy. */
-static int convert_memcpy(unsigned char *output, int olen,
-			  const unsigned char *input, int ilen,
-			  struct nls_table *nls_from,
-			  struct nls_table *nls_to)
-{
-	if (olen < ilen)
-		return -ENAMETOOLONG;
-	memcpy(output, input, ilen);
-	return ilen;
-}
-
-static inline int write_char(unsigned char ch, char *output, int olen)
-{
-	if (olen < 4)
-		return -ENAMETOOLONG;
-	sprintf(output, ":x%02x", ch);
-	return 4;
-}
-
-static inline int write_unichar(wchar_t ch, char *output, int olen)
-{
-	if (olen < 5)
-		return -ENAMETOOLONG;
-	sprintf(output, ":%04x", ch);
-	return 5;
-}
-
-/* convert from one "codepage" to another (possibly being utf8). */
-static int convert_cp(unsigned char *output, int olen,
-		      const unsigned char *input, int ilen,
-		      struct nls_table *nls_from,
-		      struct nls_table *nls_to)
-{
-	int len = 0;
-	int n;
-	wchar_t ch;
-
-	while (ilen > 0) {
-		/* convert by changing to unicode and back to the new cp */
-		n = nls_from->char2uni(input, ilen, &ch);
-		if (n == -EINVAL) {
-			ilen--;
-			n = write_char(*input++, output, olen);
-			if (n < 0)
-				goto fail;
-			output += n;
-			olen -= n;
-			len += n;
-			continue;
-		} else if (n < 0)
-			goto fail;
-		input += n;
-		ilen -= n;
-
-		n = nls_to->uni2char(ch, output, olen);
-		if (n == -EINVAL)
-			n = write_unichar(ch, output, olen);
-		if (n < 0)
-			goto fail;
-		output += n;
-		olen -= n;
-
-		len += n;
-	}
-	return len;
-fail:
-	return n;
-}
-
-/* ----------------------------------------------------------- */
-
-/*
- * nls_unicode
- *
- * This encodes/decodes little endian unicode format
- */
-
-static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
-{
-	if (boundlen < 2)
-		return -EINVAL;
-	*out++ = uni & 0xff;
-	*out++ = uni >> 8;
-	return 2;
-}
-
-static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
-{
-	if (boundlen < 2)
-		return -EINVAL;
-	*uni = (rawstring[1] << 8) | rawstring[0];
-	return 2;
-}
-
-static struct nls_table unicode_table = {
-	.charset	= "unicode",
-	.uni2char	= uni2char,
-	.char2uni	= char2uni,
-};
-
-/* ----------------------------------------------------------- */
-
-static int setcodepage(struct nls_table **p, char *name)
-{
-	struct nls_table *nls;
-
-	if (!name || !*name) {
-		nls = NULL;
-	} else if ( (nls = load_nls(name)) == NULL) {
-		printk (KERN_ERR "smbfs: failed to load nls '%s'\n", name);
-		return -EINVAL;
-	}
-
-	/* if already set, unload the previous one. */
-	if (*p && *p != &unicode_table)
-		unload_nls(*p);
-	*p = nls;
-
-	return 0;
-}
-
-/* Handles all changes to codepage settings. */
-int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp)
-{
-	int n = 0;
-
-	smb_lock_server(server);
-
-	/* Don't load any nls_* at all, if no remote is requested */
-	if (!*cp->remote_name)
-		goto out;
-
-	/* local */
-	n = setcodepage(&server->local_nls, cp->local_name);
-	if (n != 0)
-		goto out;
-
-	/* remote */
-	if (!strcmp(cp->remote_name, "unicode")) {
-		server->remote_nls = &unicode_table;
-	} else {
-		n = setcodepage(&server->remote_nls, cp->remote_name);
-		if (n != 0)
-			setcodepage(&server->local_nls, NULL);
-	}
-
-out:
-	if (server->local_nls != NULL && server->remote_nls != NULL)
-		server->ops->convert = convert_cp;
-	else
-		server->ops->convert = convert_memcpy;
-
-	smb_unlock_server(server);
-	return n;
-}
-
-
-/*****************************************************************************/
-/*                                                                           */
-/*  Encoding/Decoding section                                                */
-/*                                                                           */
-/*****************************************************************************/
-
-static __u8 *
-smb_encode_smb_length(__u8 * p, __u32 len)
-{
-	*p = 0;
-	*(p+1) = 0;
-	*(p+2) = (len & 0xFF00) >> 8;
-	*(p+3) = (len & 0xFF);
-	if (len > 0xFFFF)
-	{
-		*(p+1) = 1;
-	}
-	return p + 4;
-}
-
-/*
- * smb_build_path: build the path to entry and name storing it in buf.
- * The path returned will have the trailing '\0'.
- */
-static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
-			  int maxlen,
-			  struct dentry *entry, struct qstr *name)
-{
-	unsigned char *path = buf;
-	int len;
-	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE) != 0;
-
-	if (maxlen < (2<<unicode))
-		return -ENAMETOOLONG;
-
-	if (maxlen > SMB_MAXPATHLEN + 1)
-		maxlen = SMB_MAXPATHLEN + 1;
-
-	if (entry == NULL)
-		goto test_name_and_out;
-
-	/*
-	 * If IS_ROOT, we have to do no walking at all.
-	 */
-	if (IS_ROOT(entry) && !name) {
-		*path++ = '\\';
-		if (unicode) *path++ = '\0';
-		*path++ = '\0';
-		if (unicode) *path++ = '\0';
-		return path-buf;
-	}
-
-	/*
-	 * Build the path string walking the tree backward from end to ROOT
-	 * and store it in reversed order [see reverse_string()]
-	 */
-	dget(entry);
-	spin_lock(&entry->d_lock);
-	while (!IS_ROOT(entry)) {
-		struct dentry *parent;
-
-		if (maxlen < (3<<unicode)) {
-			spin_unlock(&entry->d_lock);
-			dput(entry);
-			return -ENAMETOOLONG;
-		}
-
-		len = server->ops->convert(path, maxlen-2, 
-				      entry->d_name.name, entry->d_name.len,
-				      server->local_nls, server->remote_nls);
-		if (len < 0) {
-			spin_unlock(&entry->d_lock);
-			dput(entry);
-			return len;
-		}
-		reverse_string(path, len);
-		path += len;
-		if (unicode) {
-			/* Note: reverse order */
-			*path++ = '\0';
-			maxlen--;
-		}
-		*path++ = '\\';
-		maxlen -= len+1;
-
-		parent = entry->d_parent;
-		dget(parent);
-		spin_unlock(&entry->d_lock);
-		dput(entry);
-		entry = parent;
-		spin_lock(&entry->d_lock);
-	}
-	spin_unlock(&entry->d_lock);
-	dput(entry);
-	reverse_string(buf, path-buf);
-
-	/* maxlen has space for at least one char */
-test_name_and_out:
-	if (name) {
-		if (maxlen < (3<<unicode))
-			return -ENAMETOOLONG;
-		*path++ = '\\';
-		if (unicode) {
-			*path++ = '\0';
-			maxlen--;
-		}
-		len = server->ops->convert(path, maxlen-2, 
-				      name->name, name->len,
-				      server->local_nls, server->remote_nls);
-		if (len < 0)
-			return len;
-		path += len;
-		maxlen -= len+1;
-	}
-	/* maxlen has space for at least one char */
-	*path++ = '\0';
-	if (unicode) *path++ = '\0';
-	return path-buf;
-}
-
-static int smb_encode_path(struct smb_sb_info *server, char *buf, int maxlen,
-			   struct dentry *dir, struct qstr *name)
-{
-	int result;
-
-	result = smb_build_path(server, buf, maxlen, dir, name);
-	if (result < 0)
-		goto out;
-	if (server->opt.protocol <= SMB_PROTOCOL_COREPLUS)
-		str_upper(buf, result);
-out:
-	return result;
-}
-
-/* encode_path for non-trans2 request SMBs */
-static int smb_simple_encode_path(struct smb_request *req, char **p,
-				  struct dentry * entry, struct qstr * name)
-{
-	struct smb_sb_info *server = req->rq_server;
-	char *s = *p;
-	int res;
-	int maxlen = ((char *)req->rq_buffer + req->rq_bufsize) - s;
-	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
-	if (!maxlen)
-		return -ENAMETOOLONG;
-	*s++ = 4;	/* ASCII data format */
-
-	/*
-	 * SMB Unicode strings must be 16bit aligned relative the start of the
-	 * packet. If they are not they must be padded with 0.
-	 */
-	if (unicode) {
-		int align = s - (char *)req->rq_buffer;
-		if (!(align & 1)) {
-			*s++ = '\0';
-			maxlen--;
-		}
-	}
-
-	res = smb_encode_path(server, s, maxlen-1, entry, name);
-	if (res < 0)
-		return res;
-	*p = s + res;
-	return 0;
-}
-
-/* The following are taken directly from msdos-fs */
-
-/* Linear day numbers of the respective 1sts in non-leap years. */
-
-static int day_n[] =
-{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 0, 0, 0, 0};
-		  /* JanFebMarApr May Jun Jul Aug Sep Oct Nov Dec */
-
-
-static time_t
-utc2local(struct smb_sb_info *server, time_t time)
-{
-	return time - server->opt.serverzone*60;
-}
-
-static time_t
-local2utc(struct smb_sb_info *server, time_t time)
-{
-	return time + server->opt.serverzone*60;
-}
-
-/* Convert a MS-DOS time/date pair to a UNIX date (seconds since 1 1 70). */
-
-static time_t
-date_dos2unix(struct smb_sb_info *server, __u16 date, __u16 time)
-{
-	int month, year;
-	time_t secs;
-
-	/* first subtract and mask after that... Otherwise, if
-	   date == 0, bad things happen */
-	month = ((date >> 5) - 1) & 15;
-	year = date >> 9;
-	secs = (time & 31) * 2 + 60 * ((time >> 5) & 63) + (time >> 11) * 3600 + 86400 *
-	    ((date & 31) - 1 + day_n[month] + (year / 4) + year * 365 - ((year & 3) == 0 &&
-						   month < 2 ? 1 : 0) + 3653);
-	/* days since 1.1.70 plus 80's leap day */
-	return local2utc(server, secs);
-}
-
-
-/* Convert linear UNIX date to a MS-DOS time/date pair. */
-
-static void
-date_unix2dos(struct smb_sb_info *server,
-	      int unix_date, __u16 *date, __u16 *time)
-{
-	int day, year, nl_day, month;
-
-	unix_date = utc2local(server, unix_date);
-	if (unix_date < 315532800)
-		unix_date = 315532800;
-
-	*time = (unix_date % 60) / 2 +
-		(((unix_date / 60) % 60) << 5) +
-		(((unix_date / 3600) % 24) << 11);
-
-	day = unix_date / 86400 - 3652;
-	year = day / 365;
-	if ((year + 3) / 4 + 365 * year > day)
-		year--;
-	day -= (year + 3) / 4 + 365 * year;
-	if (day == 59 && !(year & 3)) {
-		nl_day = day;
-		month = 2;
-	} else {
-		nl_day = (year & 3) || day <= 59 ? day : day - 1;
-		for (month = 1; month < 12; month++)
-			if (day_n[month] > nl_day)
-				break;
-	}
-	*date = nl_day - day_n[month - 1] + 1 + (month << 5) + (year << 9);
-}
-
-/* The following are taken from fs/ntfs/util.c */
-
-#define NTFS_TIME_OFFSET ((u64)(369*365 + 89) * 24 * 3600 * 10000000)
-
-/*
- * Convert the NT UTC (based 1601-01-01, in hundred nanosecond units)
- * into Unix UTC (based 1970-01-01, in seconds).
- */
-static struct timespec
-smb_ntutc2unixutc(u64 ntutc)
-{
-	struct timespec ts;
-	/* FIXME: what about the timezone difference? */
-	/* Subtract the NTFS time offset, then convert to 1s intervals. */
-	u64 t = ntutc - NTFS_TIME_OFFSET;
-	ts.tv_nsec = do_div(t, 10000000) * 100;
-	ts.tv_sec = t; 
-	return ts;
-}
-
-/* Convert the Unix UTC into NT time */
-static u64
-smb_unixutc2ntutc(struct timespec ts)
-{
-	/* Note: timezone conversion is probably wrong. */
-	/* return ((u64)utc2local(server, t)) * 10000000 + NTFS_TIME_OFFSET; */
-	return ((u64)ts.tv_sec) * 10000000 + ts.tv_nsec/100 + NTFS_TIME_OFFSET;
-}
-
-#define MAX_FILE_MODE	6
-static mode_t file_mode[] = {
-	S_IFREG, S_IFDIR, S_IFLNK, S_IFCHR, S_IFBLK, S_IFIFO, S_IFSOCK
-};
-
-static int smb_filetype_to_mode(u32 filetype)
-{
-	if (filetype > MAX_FILE_MODE) {
-		PARANOIA("Filetype out of range: %d\n", filetype);
-		return S_IFREG;
-	}
-	return file_mode[filetype];
-}
-
-static u32 smb_filetype_from_mode(int mode)
-{
-	if (S_ISREG(mode))
-		return UNIX_TYPE_FILE;
-	if (S_ISDIR(mode))
-		return UNIX_TYPE_DIR;
-	if (S_ISLNK(mode))
-		return UNIX_TYPE_SYMLINK;
-	if (S_ISCHR(mode))
-		return UNIX_TYPE_CHARDEV;
-	if (S_ISBLK(mode))
-		return UNIX_TYPE_BLKDEV;
-	if (S_ISFIFO(mode))
-		return UNIX_TYPE_FIFO;
-	if (S_ISSOCK(mode))
-		return UNIX_TYPE_SOCKET;
-	return UNIX_TYPE_UNKNOWN;
-}
-
-
-/*****************************************************************************/
-/*                                                                           */
-/*  Support section.                                                         */
-/*                                                                           */
-/*****************************************************************************/
-
-__u32
-smb_len(__u8 * p)
-{
-	return ((*(p+1) & 0x1) << 16L) | (*(p+2) << 8L) | *(p+3);
-}
-
-static __u16
-smb_bcc(__u8 * packet)
-{
-	int pos = SMB_HEADER_LEN + SMB_WCT(packet) * sizeof(__u16);
-	return WVAL(packet, pos);
-}
-
-/* smb_valid_packet: We check if packet fulfills the basic
-   requirements of a smb packet */
-
-static int
-smb_valid_packet(__u8 * packet)
-{
-	return (packet[4] == 0xff
-		&& packet[5] == 'S'
-		&& packet[6] == 'M'
-		&& packet[7] == 'B'
-		&& (smb_len(packet) + 4 == SMB_HEADER_LEN
-		    + SMB_WCT(packet) * 2 + smb_bcc(packet)));
-}
-
-/* smb_verify: We check if we got the answer we expected, and if we
-   got enough data. If bcc == -1, we don't care. */
-
-static int
-smb_verify(__u8 * packet, int command, int wct, int bcc)
-{
-	if (SMB_CMD(packet) != command)
-		goto bad_command;
-	if (SMB_WCT(packet) < wct)
-		goto bad_wct;
-	if (bcc != -1 && smb_bcc(packet) < bcc)
-		goto bad_bcc;
-	return 0;
-
-bad_command:
-	printk(KERN_ERR "smb_verify: command=%x, SMB_CMD=%x??\n",
-	       command, SMB_CMD(packet));
-	goto fail;
-bad_wct:
-	printk(KERN_ERR "smb_verify: command=%x, wct=%d, SMB_WCT=%d??\n",
-	       command, wct, SMB_WCT(packet));
-	goto fail;
-bad_bcc:
-	printk(KERN_ERR "smb_verify: command=%x, bcc=%d, SMB_BCC=%d??\n",
-	       command, bcc, smb_bcc(packet));
-fail:
-	return -EIO;
-}
-
-/*
- * Returns the maximum read or write size for the "payload". Making all of the
- * packet fit within the negotiated max_xmit size.
- *
- * N.B. Since this value is usually computed before locking the server,
- * the server's packet size must never be decreased!
- */
-static inline int
-smb_get_xmitsize(struct smb_sb_info *server, int overhead)
-{
-	return server->opt.max_xmit - overhead;
-}
-
-/*
- * Calculate the maximum read size
- */
-int
-smb_get_rsize(struct smb_sb_info *server)
-{
-	/* readX has 12 parameters, read has 5 */
-	int overhead = SMB_HEADER_LEN + 12 * sizeof(__u16) + 2 + 1 + 2;
-	int size = smb_get_xmitsize(server, overhead);
-
-	VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
-	return size;
-}
-
-/*
- * Calculate the maximum write size
- */
-int
-smb_get_wsize(struct smb_sb_info *server)
-{
-	/* writeX has 14 parameters, write has 5 */
-	int overhead = SMB_HEADER_LEN + 14 * sizeof(__u16) + 2 + 1 + 2;
-	int size = smb_get_xmitsize(server, overhead);
-
-	VERBOSE("xmit=%d, size=%d\n", server->opt.max_xmit, size);
-
-	return size;
-}
-
-/*
- * Convert SMB error codes to -E... errno values.
- */
-int
-smb_errno(struct smb_request *req)
-{
-	int errcls = req->rq_rcls;
-	int error  = req->rq_err;
-	char *class = "Unknown";
-
-	VERBOSE("errcls %d  code %d  from command 0x%x\n",
-		errcls, error, SMB_CMD(req->rq_header));
-
-	if (errcls == ERRDOS) {
-		switch (error) {
-		case ERRbadfunc:
-			return -EINVAL;
-		case ERRbadfile:
-		case ERRbadpath:
-			return -ENOENT;
-		case ERRnofids:
-			return -EMFILE;
-		case ERRnoaccess:
-			return -EACCES;
-		case ERRbadfid:
-			return -EBADF;
-		case ERRbadmcb:
-			return -EREMOTEIO;
-		case ERRnomem:
-			return -ENOMEM;
-		case ERRbadmem:
-			return -EFAULT;
-		case ERRbadenv:
-		case ERRbadformat:
-			return -EREMOTEIO;
-		case ERRbadaccess:
-			return -EACCES;
-		case ERRbaddata:
-			return -E2BIG;
-		case ERRbaddrive:
-			return -ENXIO;
-		case ERRremcd:
-			return -EREMOTEIO;
-		case ERRdiffdevice:
-			return -EXDEV;
-		case ERRnofiles:
-			return -ENOENT;
-		case ERRbadshare:
-			return -ETXTBSY;
-		case ERRlock:
-			return -EDEADLK;
-		case ERRfilexists:
-			return -EEXIST;
-		case ERROR_INVALID_PARAMETER:
-			return -EINVAL;
-		case ERROR_DISK_FULL:
-			return -ENOSPC;
-		case ERROR_INVALID_NAME:
-			return -ENOENT;
-		case ERROR_DIR_NOT_EMPTY:
-			return -ENOTEMPTY;
-		case ERROR_NOT_LOCKED:
-                       return -ENOLCK;
-		case ERROR_ALREADY_EXISTS:
-			return -EEXIST;
-		default:
-			class = "ERRDOS";
-			goto err_unknown;
-		}
-	} else if (errcls == ERRSRV) {
-		switch (error) {
-		/* N.B. This is wrong ... EIO ? */
-		case ERRerror:
-			return -ENFILE;
-		case ERRbadpw:
-			return -EINVAL;
-		case ERRbadtype:
-		case ERRtimeout:
-			return -EIO;
-		case ERRaccess:
-			return -EACCES;
-		/*
-		 * This is a fatal error, as it means the "tree ID"
-		 * for this connection is no longer valid. We map
-		 * to a special error code and get a new connection.
-		 */
-		case ERRinvnid:
-			return -EBADSLT;
-		default:
-			class = "ERRSRV";
-			goto err_unknown;
-		}
-	} else if (errcls == ERRHRD) {
-		switch (error) {
-		case ERRnowrite:
-			return -EROFS;
-		case ERRbadunit:
-			return -ENODEV;
-		case ERRnotready:
-			return -EUCLEAN;
-		case ERRbadcmd:
-		case ERRdata:
-			return -EIO;
-		case ERRbadreq:
-			return -ERANGE;
-		case ERRbadshare:
-			return -ETXTBSY;
-		case ERRlock:
-			return -EDEADLK;
-		case ERRdiskfull:
-			return -ENOSPC;
-		default:
-			class = "ERRHRD";
-			goto err_unknown;
-		}
-	} else if (errcls == ERRCMD) {
-		class = "ERRCMD";
-	} else if (errcls == SUCCESS) {
-		return 0;	/* This is the only valid 0 return */
-	}
-
-err_unknown:
-	printk(KERN_ERR "smb_errno: class %s, code %d from command 0x%x\n",
-	       class, error, SMB_CMD(req->rq_header));
-	return -EIO;
-}
-
-/* smb_request_ok: We expect the server to be locked. Then we do the
-   request and check the answer completely. When smb_request_ok
-   returns 0, you can be quite sure that everything went well. When
-   the answer is <=0, the returned number is a valid unix errno. */
-
-static int
-smb_request_ok(struct smb_request *req, int command, int wct, int bcc)
-{
-	int result;
-
-	req->rq_resp_wct = wct;
-	req->rq_resp_bcc = bcc;
-
-	result = smb_add_request(req);
-	if (result != 0) {
-		DEBUG1("smb_request failed\n");
-		goto out;
-	}
-
-	if (smb_valid_packet(req->rq_header) != 0) {
-		PARANOIA("invalid packet!\n");
-		goto out;
-	}
-
-	result = smb_verify(req->rq_header, command, wct, bcc);
-
-out:
-	return result;
-}
-
-/*
- * This implements the NEWCONN ioctl. It installs the server pid,
- * sets server->state to CONN_VALID, and wakes up the waiting process.
- */
-int
-smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt)
-{
-	struct file *filp;
-	struct sock *sk;
-	int error;
-
-	VERBOSE("fd=%d, pid=%d\n", opt->fd, current->pid);
-
-	smb_lock_server(server);
-
-	/*
-	 * Make sure we don't already have a valid connection ...
-	 */
-	error = -EINVAL;
-	if (server->state == CONN_VALID)
-		goto out;
-
-	error = -EACCES;
-	if (current_uid() != server->mnt->mounted_uid &&
-	    !capable(CAP_SYS_ADMIN))
-		goto out;
-
-	error = -EBADF;
-	filp = fget(opt->fd);
-	if (!filp)
-		goto out;
-	if (!smb_valid_socket(filp->f_path.dentry->d_inode))
-		goto out_putf;
-
-	server->sock_file = filp;
-	server->conn_pid = get_pid(task_pid(current));
-	server->opt = *opt;
-	server->generation += 1;
-	server->state = CONN_VALID;
-	error = 0;
-
-	if (server->conn_error) {
-		/*
-		 * conn_error is the returncode we originally decided to
-		 * drop the old connection on. This message should be positive
-		 * and not make people ask questions on why smbfs is printing
-		 * error messages ...
-		 */
-		printk(KERN_INFO "SMB connection re-established (%d)\n",
-		       server->conn_error);
-		server->conn_error = 0;
-	}
-
-	/*
-	 * Store the server in sock user_data (Only used by sunrpc)
-	 */
-	sk = SOCKET_I(filp->f_path.dentry->d_inode)->sk;
-	sk->sk_user_data = server;
-
-	/* chain into the data_ready callback */
-	server->data_ready = xchg(&sk->sk_data_ready, smb_data_ready);
-
-	/* check if we have an old smbmount that uses seconds for the 
-	   serverzone */
-	if (server->opt.serverzone > 12*60 || server->opt.serverzone < -12*60)
-		server->opt.serverzone /= 60;
-
-	/* now that we have an established connection we can detect the server
-	   type and enable bug workarounds */
-	if (server->opt.protocol < SMB_PROTOCOL_LANMAN2)
-		install_ops(server->ops, &smb_ops_core);
-	else if (server->opt.protocol == SMB_PROTOCOL_LANMAN2)
-		install_ops(server->ops, &smb_ops_os2);
-	else if (server->opt.protocol == SMB_PROTOCOL_NT1 &&
-		 (server->opt.max_xmit < 0x1000) &&
-		 !(server->opt.capabilities & SMB_CAP_NT_SMBS)) {
-		/* FIXME: can we kill the WIN95 flag now? */
-		server->mnt->flags |= SMB_MOUNT_WIN95;
-		VERBOSE("detected WIN95 server\n");
-		install_ops(server->ops, &smb_ops_win95);
-	} else {
-		/*
-		 * Samba has max_xmit 65535
-		 * NT4spX has max_xmit 4536 (or something like that)
-		 * win2k has ...
-		 */
-		VERBOSE("detected NT1 (Samba, NT4/5) server\n");
-		install_ops(server->ops, &smb_ops_winNT);
-	}
-
-	/* FIXME: the win9x code wants to modify these ... (seek/trunc bug) */
-	if (server->mnt->flags & SMB_MOUNT_OLDATTR) {
-		server->ops->getattr = smb_proc_getattr_core;
-	} else if (server->mnt->flags & SMB_MOUNT_DIRATTR) {
-		server->ops->getattr = smb_proc_getattr_ff;
-	}
-
-	/* Decode server capabilities */
-	if (server->opt.capabilities & SMB_CAP_LARGE_FILES) {
-		/* Should be ok to set this now, as no one can access the
-		   mount until the connection has been established. */
-		SB_of(server)->s_maxbytes = ~0ULL >> 1;
-		VERBOSE("LFS enabled\n");
-	}
-	if (server->opt.capabilities & SMB_CAP_UNICODE) {
-		server->mnt->flags |= SMB_MOUNT_UNICODE;
-		VERBOSE("Unicode enabled\n");
-	} else {
-		server->mnt->flags &= ~SMB_MOUNT_UNICODE;
-	}
-#if 0
-	/* flags we may test for other patches ... */
-	if (server->opt.capabilities & SMB_CAP_LARGE_READX) {
-		VERBOSE("Large reads enabled\n");
-	}
-	if (server->opt.capabilities & SMB_CAP_LARGE_WRITEX) {
-		VERBOSE("Large writes enabled\n");
-	}
-#endif
-	if (server->opt.capabilities & SMB_CAP_UNIX) {
-		struct inode *inode;
-		VERBOSE("Using UNIX CIFS extensions\n");
-		install_ops(server->ops, &smb_ops_unix);
-		inode = SB_of(server)->s_root->d_inode;
-		if (inode)
-			inode->i_op = &smb_dir_inode_operations_unix;
-	}
-
-	VERBOSE("protocol=%d, max_xmit=%d, pid=%d capabilities=0x%x\n",
-		server->opt.protocol, server->opt.max_xmit,
-		pid_nr(server->conn_pid), server->opt.capabilities);
-
-	/* FIXME: this really should be done by smbmount. */
-	if (server->opt.max_xmit > SMB_MAX_PACKET_SIZE) {
-		server->opt.max_xmit = SMB_MAX_PACKET_SIZE;
-	}
-
-	smb_unlock_server(server);
-	smbiod_wake_up();
-	if (server->opt.capabilities & SMB_CAP_UNIX)
-		smb_proc_query_cifsunix(server);
-
-	server->conn_complete++;
-	wake_up_interruptible_all(&server->conn_wq);
-	return error;
-
-out:
-	smb_unlock_server(server);
-	smbiod_wake_up();
-	return error;
-
-out_putf:
-	fput(filp);
-	goto out;
-}
-
-/* smb_setup_header: We completely set up the packet. You only have to
-   insert the command-specific fields */
-
-__u8 *
-smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc)
-{
-	__u32 xmit_len = SMB_HEADER_LEN + wct * sizeof(__u16) + bcc + 2;
-	__u8 *p = req->rq_header;
-	struct smb_sb_info *server = req->rq_server;
-
-	p = smb_encode_smb_length(p, xmit_len - 4);
-
-	*p++ = 0xff;
-	*p++ = 'S';
-	*p++ = 'M';
-	*p++ = 'B';
-	*p++ = command;
-
-	memset(p, '\0', 19);
-	p += 19;
-	p += 8;
-
-	if (server->opt.protocol > SMB_PROTOCOL_CORE) {
-		int flags = SMB_FLAGS_CASELESS_PATHNAMES;
-		int flags2 = SMB_FLAGS2_LONG_PATH_COMPONENTS |
-			SMB_FLAGS2_EXTENDED_ATTRIBUTES;	/* EA? not really ... */
-
-		*(req->rq_header + smb_flg) = flags;
-		if (server->mnt->flags & SMB_MOUNT_UNICODE)
-			flags2 |= SMB_FLAGS2_UNICODE_STRINGS;
-		WSET(req->rq_header, smb_flg2, flags2);
-	}
-	*p++ = wct;		/* wct */
-	p += 2 * wct;
-	WSET(p, 0, bcc);
-
-	/* Include the header in the data to send */
-	req->rq_iovlen = 1;
-	req->rq_iov[0].iov_base = req->rq_header;
-	req->rq_iov[0].iov_len  = xmit_len - bcc;
-
-	return req->rq_buffer;
-}
-
-static void
-smb_setup_bcc(struct smb_request *req, __u8 *p)
-{
-	u16 bcc = p - req->rq_buffer;
-	u8 *pbcc = req->rq_header + SMB_HEADER_LEN + 2*SMB_WCT(req->rq_header);
-
-	WSET(pbcc, 0, bcc);
-
-	smb_encode_smb_length(req->rq_header, SMB_HEADER_LEN + 
-			      2*SMB_WCT(req->rq_header) - 2 + bcc);
-
-	/* Include the "bytes" in the data to send */
-	req->rq_iovlen = 2;
-	req->rq_iov[1].iov_base = req->rq_buffer;
-	req->rq_iov[1].iov_len  = bcc;
-}
-
-static int
-smb_proc_seek(struct smb_sb_info *server, __u16 fileid,
-	      __u16 mode, off_t offset)
-{
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBlseek, 4, 0);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	WSET(req->rq_header, smb_vwv1, mode);
-	DSET(req->rq_header, smb_vwv2, offset);
-	req->rq_flags |= SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBlseek, 2, 0);
-	if (result < 0) {
-		result = 0;
-		goto out_free;
-	}
-
-	result = DVAL(req->rq_header, smb_vwv0);
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_open(struct smb_sb_info *server, struct dentry *dentry, int wish)
-{
-	struct inode *ino = dentry->d_inode;
-	struct smb_inode_info *ei = SMB_I(ino);
-	int mode, read_write = 0x42, read_only = 0x40;
-	int res;
-	char *p;
-	struct smb_request *req;
-
-	/*
-	 * Attempt to open r/w, unless there are no write privileges.
-	 */
-	mode = read_write;
-	if (!(ino->i_mode & (S_IWUSR | S_IWGRP | S_IWOTH)))
-		mode = read_only;
-#if 0
-	/* FIXME: why is this code not in? below we fix it so that a caller
-	   wanting RO doesn't get RW. smb_revalidate_inode does some 
-	   optimization based on access mode. tail -f needs it to be correct.
-
-	   We must open rw since we don't do the open if called a second time
-	   with different 'wish'. Is that not supported by smb servers? */
-	if (!(wish & (O_WRONLY | O_RDWR)))
-		mode = read_only;
-#endif
-
-	res = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-      retry:
-	p = smb_setup_header(req, SMBopen, 2, 0);
-	WSET(req->rq_header, smb_vwv0, mode);
-	WSET(req->rq_header, smb_vwv1, aSYSTEM | aHIDDEN | aDIR);
-	res = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (res < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	res = smb_request_ok(req, SMBopen, 7, 0);
-	if (res != 0) {
-		if (mode == read_write &&
-		    (res == -EACCES || res == -ETXTBSY || res == -EROFS))
-		{
-			VERBOSE("%s/%s R/W failed, error=%d, retrying R/O\n",
-				DENTRY_PATH(dentry), res);
-			mode = read_only;
-			req->rq_flags = 0;
-			goto retry;
-		}
-		goto out_free;
-	}
-	/* We should now have data in vwv[0..6]. */
-
-	ei->fileid = WVAL(req->rq_header, smb_vwv0);
-	ei->attr   = WVAL(req->rq_header, smb_vwv1);
-	/* smb_vwv2 has mtime */
-	/* smb_vwv4 has size  */
-	ei->access = (WVAL(req->rq_header, smb_vwv6) & SMB_ACCMASK);
-	ei->open = server->generation;
-
-out_free:
-	smb_rput(req);
-out:
-	return res;
-}
-
-/*
- * Make sure the file is open, and check that the access
- * is compatible with the desired access.
- */
-int
-smb_open(struct dentry *dentry, int wish)
-{
-	struct inode *inode = dentry->d_inode;
-	int result;
-	__u16 access;
-
-	result = -ENOENT;
-	if (!inode) {
-		printk(KERN_ERR "smb_open: no inode for dentry %s/%s\n",
-		       DENTRY_PATH(dentry));
-		goto out;
-	}
-
-	if (!smb_is_open(inode)) {
-		struct smb_sb_info *server = server_from_inode(inode);
-		result = 0;
-		if (!smb_is_open(inode))
-			result = smb_proc_open(server, dentry, wish);
-		if (result)
-			goto out;
-		/*
-		 * A successful open means the path is still valid ...
-		 */
-		smb_renew_times(dentry);
-	}
-
-	/*
-	 * Check whether the access is compatible with the desired mode.
-	 */
-	result = 0;
-	access = SMB_I(inode)->access;
-	if (access != wish && access != SMB_O_RDWR) {
-		PARANOIA("%s/%s access denied, access=%x, wish=%x\n",
-			 DENTRY_PATH(dentry), access, wish);
-		result = -EACCES;
-	}
-out:
-	return result;
-}
-
-static int 
-smb_proc_close(struct smb_sb_info *server, __u16 fileid, __u32 mtime)
-{
-	struct smb_request *req;
-	int result = -ENOMEM;
-
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBclose, 3, 0);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	DSET(req->rq_header, smb_vwv1, utc2local(server, mtime));
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_request_ok(req, SMBclose, 0, 0);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Win NT 4.0 has an apparent bug in that it fails to update the
- * modify time when writing to a file. As a workaround, we update
- * both modify and access time locally, and post the times to the
- * server when closing the file.
- */
-static int 
-smb_proc_close_inode(struct smb_sb_info *server, struct inode * ino)
-{
-	struct smb_inode_info *ei = SMB_I(ino);
-	int result = 0;
-	if (smb_is_open(ino))
-	{
-		/*
-		 * We clear the open flag in advance, in case another
- 		 * process observes the value while we block below.
-		 */
-		ei->open = 0;
-
-		/*
-		 * Kludge alert: SMB timestamps are accurate only to
-		 * two seconds ... round the times to avoid needless
-		 * cache invalidations!
-		 */
-		if (ino->i_mtime.tv_sec & 1) { 
-			ino->i_mtime.tv_sec--;
-			ino->i_mtime.tv_nsec = 0; 
-		}
-		if (ino->i_atime.tv_sec & 1) {
-			ino->i_atime.tv_sec--;
-			ino->i_atime.tv_nsec = 0;
-		}
-		/*
-		 * If the file is open with write permissions,
-		 * update the time stamps to sync mtime and atime.
-		 */
-		if ((server->opt.capabilities & SMB_CAP_UNIX) == 0 &&
-		    (server->opt.protocol >= SMB_PROTOCOL_LANMAN2) &&
-		    !(ei->access == SMB_O_RDONLY))
-		{
-			struct smb_fattr fattr;
-			smb_get_inode_attr(ino, &fattr);
-			smb_proc_setattr_ext(server, ino, &fattr);
-		}
-
-		result = smb_proc_close(server, ei->fileid, ino->i_mtime.tv_sec);
-		/*
-		 * Force a revalidation after closing ... some servers
-		 * don't post the size until the file has been closed.
-		 */
-		if (server->opt.protocol < SMB_PROTOCOL_NT1)
-			ei->oldmtime = 0;
-		ei->closed = jiffies;
-	}
-	return result;
-}
-
-int
-smb_close(struct inode *ino)
-{
-	int result = 0;
-
-	if (smb_is_open(ino)) {
-		struct smb_sb_info *server = server_from_inode(ino);
-		result = smb_proc_close_inode(server, ino);
-	}
-	return result;
-}
-
-/*
- * This is used to close a file following a failed instantiate.
- * Since we don't have an inode, we can't use any of the above.
- */
-int
-smb_close_fileid(struct dentry *dentry, __u16 fileid)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int result;
-
-	result = smb_proc_close(server, fileid, get_seconds());
-	return result;
-}
-
-/* In smb_proc_read and smb_proc_write we do not retry, because the
-   file-id would not be valid after a reconnection. */
-
-static void
-smb_proc_read_data(struct smb_request *req)
-{
-	req->rq_iov[0].iov_base = req->rq_buffer;
-	req->rq_iov[0].iov_len  = 3;
-
-	req->rq_iov[1].iov_base = req->rq_page;
-	req->rq_iov[1].iov_len  = req->rq_rsize;
-	req->rq_iovlen = 2;
-
-	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_read(struct inode *inode, loff_t offset, int count, char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	__u16 returned_count, data_len;
-	unsigned char *buf;
-	int result;
-	struct smb_request *req;
-	u8 rbuf[4];
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBread, 5, 0);
-	buf = req->rq_header;
-	WSET(buf, smb_vwv0, SMB_I(inode)->fileid);
-	WSET(buf, smb_vwv1, count);
-	DSET(buf, smb_vwv2, offset);
-	WSET(buf, smb_vwv4, 0);
-
-	req->rq_page = data;
-	req->rq_rsize = count;
-	req->rq_callback = smb_proc_read_data;
-	req->rq_buffer = rbuf;
-	req->rq_flags |= SMB_REQ_NORETRY | SMB_REQ_STATIC;
-
-	result = smb_request_ok(req, SMBread, 5, -1);
-	if (result < 0)
-		goto out_free;
-	returned_count = WVAL(req->rq_header, smb_vwv0);
-
-	data_len = WVAL(rbuf, 1);
-
-	if (returned_count != data_len) {
-		printk(KERN_NOTICE "smb_proc_read: returned != data_len\n");
-		printk(KERN_NOTICE "smb_proc_read: ret_c=%d, data_len=%d\n",
-		       returned_count, data_len);
-	}
-	result = data_len;
-
-out_free:
-	smb_rput(req);
-out:
-	VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, result);
-	return result;
-}
-
-static int
-smb_proc_write(struct inode *inode, loff_t offset, int count, const char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result;
-	u16 fileid = SMB_I(inode)->fileid;
-	u8 buf[4];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
-		inode->i_ino, fileid, count, offset);
-
-	smb_setup_header(req, SMBwrite, 5, count + 3);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	WSET(req->rq_header, smb_vwv1, count);
-	DSET(req->rq_header, smb_vwv2, offset);
-	WSET(req->rq_header, smb_vwv4, 0);
-
-	buf[0] = 1;
-	WSET(buf, 1, count);	/* yes, again ... */
-	req->rq_iov[1].iov_base = buf;
-	req->rq_iov[1].iov_len = 3;
-	req->rq_iov[2].iov_base = (char *) data;
-	req->rq_iov[2].iov_len = count;
-	req->rq_iovlen = 3;
-	req->rq_flags |= SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBwrite, 1, 0);
-	if (result >= 0)
-		result = WVAL(req->rq_header, smb_vwv0);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * In smb_proc_readX and smb_proc_writeX we do not retry, because the
- * file-id would not be valid after a reconnection.
- */
-
-#define SMB_READX_MAX_PAD      64
-static void
-smb_proc_readX_data(struct smb_request *req)
-{
-	/* header length, excluding the netbios length (-4) */
-	int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
-	int data_off = WVAL(req->rq_header, smb_vwv6);
-
-	/*
-	 * Some genius made the padding to the data bytes arbitrary.
-	 * So we must first calculate the amount of padding used by the server.
-	 */
-	data_off -= hdrlen;
-	if (data_off > SMB_READX_MAX_PAD || data_off < 0) {
-		PARANOIA("offset is larger than SMB_READX_MAX_PAD or negative!\n");
-		PARANOIA("%d > %d || %d < 0\n", data_off, SMB_READX_MAX_PAD, data_off);
-		req->rq_rlen = req->rq_bufsize + 1;
-		return;
-	}
-	req->rq_iov[0].iov_base = req->rq_buffer;
-	req->rq_iov[0].iov_len  = data_off;
-
-	req->rq_iov[1].iov_base = req->rq_page;
-	req->rq_iov[1].iov_len  = req->rq_rsize;
-	req->rq_iovlen = 2;
-
-	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-}
-
-static int
-smb_proc_readX(struct inode *inode, loff_t offset, int count, char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	unsigned char *buf;
-	int result;
-	struct smb_request *req;
-	static char pad[SMB_READX_MAX_PAD];
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBreadX, 12, 0);
-	buf = req->rq_header;
-	WSET(buf, smb_vwv0, 0x00ff);
-	WSET(buf, smb_vwv1, 0);
-	WSET(buf, smb_vwv2, SMB_I(inode)->fileid);
-	DSET(buf, smb_vwv3, (u32)offset);               /* low 32 bits */
-	WSET(buf, smb_vwv5, count);
-	WSET(buf, smb_vwv6, 0);
-	DSET(buf, smb_vwv7, 0);
-	WSET(buf, smb_vwv9, 0);
-	DSET(buf, smb_vwv10, (u32)(offset >> 32));      /* high 32 bits */
-	WSET(buf, smb_vwv11, 0);
-
-	req->rq_page = data;
-	req->rq_rsize = count;
-	req->rq_callback = smb_proc_readX_data;
-	req->rq_buffer = pad;
-	req->rq_bufsize = SMB_READX_MAX_PAD;
-	req->rq_flags |= SMB_REQ_STATIC | SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBreadX, 12, -1);
-	if (result < 0)
-		goto out_free;
-	result = WVAL(req->rq_header, smb_vwv5);
-
-out_free:
-	smb_rput(req);
-out:
-	VERBOSE("ino=%ld, fileid=%d, count=%d, result=%d\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, result);
-	return result;
-}
-
-static int
-smb_proc_writeX(struct inode *inode, loff_t offset, int count, const char *data)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result;
-	u8 *p;
-	static u8 pad[4];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	VERBOSE("ino=%ld, fileid=%d, count=%d@%Ld\n",
-		inode->i_ino, SMB_I(inode)->fileid, count, offset);
-
-	p = smb_setup_header(req, SMBwriteX, 14, count + 1);
-	WSET(req->rq_header, smb_vwv0, 0x00ff);
-	WSET(req->rq_header, smb_vwv1, 0);
-	WSET(req->rq_header, smb_vwv2, SMB_I(inode)->fileid);
-	DSET(req->rq_header, smb_vwv3, (u32)offset);	/* low 32 bits */
-	DSET(req->rq_header, smb_vwv5, 0);
-	WSET(req->rq_header, smb_vwv7, 0);		/* write mode */
-	WSET(req->rq_header, smb_vwv8, 0);
-	WSET(req->rq_header, smb_vwv9, 0);
-	WSET(req->rq_header, smb_vwv10, count);		/* data length */
-	WSET(req->rq_header, smb_vwv11, smb_vwv12 + 2 + 1);
-	DSET(req->rq_header, smb_vwv12, (u32)(offset >> 32));
-
-	req->rq_iov[1].iov_base = pad;
-	req->rq_iov[1].iov_len = 1;
-	req->rq_iov[2].iov_base = (char *) data;
-	req->rq_iov[2].iov_len = count;
-	req->rq_iovlen = 3;
-	req->rq_flags |= SMB_REQ_NORETRY;
-
-	result = smb_request_ok(req, SMBwriteX, 6, 0);
- 	if (result >= 0)
-		result = WVAL(req->rq_header, smb_vwv2);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBcreate, 3, 0);
-	WSET(req->rq_header, smb_vwv0, attr);
-	DSET(req->rq_header, smb_vwv1, utc2local(server, ctime));
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	result = smb_request_ok(req, SMBcreate, 1, 0);
-	if (result < 0)
-		goto out_free;
-
-	*fileid = WVAL(req->rq_header, smb_vwv0);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry)
-{
-	struct smb_sb_info *server = server_from_dentry(old_dentry);
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBmv, 1, 0);
-	WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN | aDIR);
-	result = smb_simple_encode_path(req, &p, old_dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	result = smb_simple_encode_path(req, &p, new_dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	if ((result = smb_request_ok(req, SMBmv, 0, 0)) < 0)
-		goto out_free;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Code common to mkdir and rmdir.
- */
-static int
-smb_proc_generic_command(struct dentry *dentry, __u8 command)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, command, 0, 0);
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	result = smb_request_ok(req, command, 0, 0);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_mkdir(struct dentry *dentry)
-{
-	return smb_proc_generic_command(dentry, SMBmkdir);
-}
-
-int
-smb_proc_rmdir(struct dentry *dentry)
-{
-	return smb_proc_generic_command(dentry, SMBrmdir);
-}
-
-#if SMBFS_POSIX_UNLINK
-/*
- * Removes readonly attribute from a file. Used by unlink to give posix
- * semantics.
- */
-static int
-smb_set_rw(struct dentry *dentry,struct smb_sb_info *server)
-{
-	int result;
-	struct smb_fattr fattr;
-
-	/* FIXME: cifsUE should allow removing a readonly file. */
-
-	/* first get current attribute */
-	smb_init_dirent(server, &fattr);
-	result = server->ops->getattr(server, dentry, &fattr);
-	smb_finish_dirent(server, &fattr);
-	if (result < 0)
-		return result;
-
-	/* if RONLY attribute is set, remove it */
-	if (fattr.attr & aRONLY) {  /* read only attribute is set */
-		fattr.attr &= ~aRONLY;
-		result = smb_proc_setattr_core(server, dentry, fattr.attr);
-	}
-	return result;
-}
-#endif
-
-int
-smb_proc_unlink(struct dentry *dentry)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	int flag = 0;
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-      retry:
-	p = smb_setup_header(req, SMBunlink, 1, 0);
-	WSET(req->rq_header, smb_vwv0, aSYSTEM | aHIDDEN);
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	smb_setup_bcc(req, p);
-
-	if ((result = smb_request_ok(req, SMBunlink, 0, 0)) < 0) {
-#if SMBFS_POSIX_UNLINK
-		if (result == -EACCES && !flag) {
-			/* Posix semantics is for the read-only state
-			   of a file to be ignored in unlink(). In the
-			   SMB world a unlink() is refused on a
-			   read-only file. To make things easier for
-			   unix users we try to override the files
-			   permission if the unlink fails with the
-			   right error.
-			   This introduces a race condition that could
-			   lead to a file being written by someone who
-			   shouldn't have access, but as far as I can
-			   tell that is unavoidable */
-
-			/* remove RONLY attribute and try again */
-			result = smb_set_rw(dentry,server);
-			if (result == 0) {
-				flag = 1;
-				req->rq_flags = 0;
-				goto retry;
-			}
-		}
-#endif
-		goto out_free;
-	}
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_flush(struct smb_sb_info *server, __u16 fileid)
-{
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBflush, 1, 0);
-	WSET(req->rq_header, smb_vwv0, fileid);
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_request_ok(req, SMBflush, 0, 0);
-
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_trunc32(struct inode *inode, loff_t length)
-{
-	/*
-	 * Writing 0bytes is old-SMB magic for truncating files.
-	 * MAX_NON_LFS should prevent this from being called with a too
-	 * large offset.
-	 */
-	return smb_proc_write(inode, length, 0, NULL);
-}
-
-static int
-smb_proc_trunc64(struct inode *inode, loff_t length)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result;
-	char *param;
-	char *data;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 14)))
-		goto out;
-
-	param = req->rq_buffer;
-	data = req->rq_buffer + 6;
-
-	/* FIXME: must we also set allocation size? winNT seems to do that */
-	WSET(param, 0, SMB_I(inode)->fileid);
-	WSET(param, 2, SMB_SET_FILE_END_OF_FILE_INFO);
-	WSET(param, 4, 0);
-	LSET(data, 0, length);
-
-	req->rq_trans2_command = TRANSACT2_SETFILEINFO;
-	req->rq_ldata = 8;
-	req->rq_data  = data;
-	req->rq_lparm = 6;
-	req->rq_parm  = param;
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	result = 0;
-	if (req->rq_rcls != 0)
-		result = smb_errno(req);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_trunc95(struct inode *inode, loff_t length)
-{
-	struct smb_sb_info *server = server_from_inode(inode);
-	int result = smb_proc_trunc32(inode, length);
- 
-	/*
-	 * win9x doesn't appear to update the size immediately.
-	 * It will return the old file size after the truncate,
-	 * confusing smbfs. So we force an update.
-	 *
-	 * FIXME: is this still necessary?
-	 */
-	smb_proc_flush(server, SMB_I(inode)->fileid);
-	return result;
-}
-
-static void
-smb_init_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
-	memset(fattr, 0, sizeof(*fattr));
-
-	fattr->f_nlink = 1;
-	fattr->f_uid = server->mnt->uid;
-	fattr->f_gid = server->mnt->gid;
-	fattr->f_unix = 0;
-}
-
-static void
-smb_finish_dirent(struct smb_sb_info *server, struct smb_fattr *fattr)
-{
-	if (fattr->f_unix)
-		return;
-
-	fattr->f_mode = server->mnt->file_mode;
-	if (fattr->attr & aDIR) {
-		fattr->f_mode = server->mnt->dir_mode;
-		fattr->f_size = SMB_ST_BLKSIZE;
-	}
-	/* Check the read-only flag */
-	if (fattr->attr & aRONLY)
-		fattr->f_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
-
-	/* How many 512 byte blocks do we need for this file? */
-	fattr->f_blocks = 0;
-	if (fattr->f_size != 0)
-		fattr->f_blocks = 1 + ((fattr->f_size-1) >> 9);
-	return;
-}
-
-void
-smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
-		     struct super_block *sb)
-{
-	smb_init_dirent(server, fattr);
-	fattr->attr = aDIR;
-	fattr->f_ino = 2; /* traditional root inode number */
-	fattr->f_mtime = current_fs_time(sb);
-	smb_finish_dirent(server, fattr);
-}
-
-/*
- * Decode a dirent for old protocols
- *
- * qname is filled with the decoded, and possibly translated, name.
- * fattr receives decoded attributes
- *
- * Bugs Noted:
- * (1) Pathworks servers may pad the name with extra spaces.
- */
-static char *
-smb_decode_short_dirent(struct smb_sb_info *server, char *p,
-			struct qstr *qname, struct smb_fattr *fattr,
-			unsigned char *name_buf)
-{
-	int len;
-
-	/*
-	 * SMB doesn't have a concept of inode numbers ...
-	 */
-	smb_init_dirent(server, fattr);
-	fattr->f_ino = 0;	/* FIXME: do we need this? */
-
-	p += SMB_STATUS_SIZE;	/* reserved (search_status) */
-	fattr->attr = *p;
-	fattr->f_mtime.tv_sec = date_dos2unix(server, WVAL(p, 3), WVAL(p, 1));
-	fattr->f_mtime.tv_nsec = 0;
-	fattr->f_size = DVAL(p, 5);
-	fattr->f_ctime = fattr->f_mtime;
-	fattr->f_atime = fattr->f_mtime;
-	qname->name = p + 9;
-	len = strnlen(qname->name, 12);
-
-	/*
-	 * Trim trailing blanks for Pathworks servers
-	 */
-	while (len > 2 && qname->name[len-1] == ' ')
-		len--;
-
-	smb_finish_dirent(server, fattr);
-
-#if 0
-	/* FIXME: These only work for ascii chars, and recent smbmount doesn't
-	   allow the flag to be set anyway. It kills const. Remove? */
-	switch (server->opt.case_handling) {
-	case SMB_CASE_UPPER:
-		str_upper(entry->name, len);
-		break;
-	case SMB_CASE_LOWER:
-		str_lower(entry->name, len);
-		break;
-	default:
-		break;
-	}
-#endif
-
-	qname->len = 0;
-	len = server->ops->convert(name_buf, SMB_MAXNAMELEN,
-				   qname->name, len,
-				   server->remote_nls, server->local_nls);
-	if (len > 0) {
-		qname->len = len;
-		qname->name = name_buf;
-		DEBUG1("len=%d, name=%.*s\n",qname->len,qname->len,qname->name);
-	}
-
-	return p + 22;
-}
-
-/*
- * This routine is used to read in directory entries from the network.
- * Note that it is for short directory name seeks, i.e.: protocol <
- * SMB_PROTOCOL_LANMAN2
- */
-static int
-smb_proc_readdir_short(struct file *filp, void *dirent, filldir_t filldir,
-		       struct smb_cache_control *ctl)
-{
-	struct dentry *dir = filp->f_path.dentry;
-	struct smb_sb_info *server = server_from_dentry(dir);
-	struct qstr qname;
-	struct smb_fattr fattr;
-	char *p;
-	int result;
-	int i, first, entries_seen, entries;
-	int entries_asked = (server->opt.max_xmit - 100) / SMB_DIRINFO_SIZE;
-	__u16 bcc;
-	__u16 count;
-	char status[SMB_STATUS_SIZE];
-	static struct qstr mask = {
-		.name	= "*.*",
-		.len	= 3,
-	};
-	unsigned char *last_status;
-	struct smb_request *req;
-	unsigned char *name_buf;
-
-	VERBOSE("%s/%s\n", DENTRY_PATH(dir));
-
-	lock_kernel();
-
-	result = -ENOMEM;
-	if (! (name_buf = kmalloc(SMB_MAXNAMELEN, GFP_KERNEL)))
-		goto out;
-
-	first = 1;
-	entries = 0;
-	entries_seen = 2; /* implicit . and .. */
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
-		goto out_name;
-
-	while (1) {
-		p = smb_setup_header(req, SMBsearch, 2, 0);
-		WSET(req->rq_header, smb_vwv0, entries_asked);
-		WSET(req->rq_header, smb_vwv1, aDIR);
-		if (first == 1) {
-			result = smb_simple_encode_path(req, &p, dir, &mask);
-			if (result < 0)
-				goto out_free;
-			if (p + 3 > (char *)req->rq_buffer + req->rq_bufsize) {
-				result = -ENAMETOOLONG;
-				goto out_free;
-			}
-			*p++ = 5;
-			WSET(p, 0, 0);
-			p += 2;
-			first = 0;
-		} else {
-			if (p + 5 + SMB_STATUS_SIZE >
-			    (char *)req->rq_buffer + req->rq_bufsize) {
-				result = -ENAMETOOLONG;
-				goto out_free;
-			}
-				
-			*p++ = 4;
-			*p++ = 0;
-			*p++ = 5;
-			WSET(p, 0, SMB_STATUS_SIZE);
-			p += 2;
-			memcpy(p, status, SMB_STATUS_SIZE);
-			p += SMB_STATUS_SIZE;
-		}
-
-		smb_setup_bcc(req, p);
-
-		result = smb_request_ok(req, SMBsearch, 1, -1);
-		if (result < 0) {
-			if ((req->rq_rcls == ERRDOS) && 
-			    (req->rq_err  == ERRnofiles))
-				break;
-			goto out_free;
-		}
-		count = WVAL(req->rq_header, smb_vwv0);
-		if (count <= 0)
-			break;
-
-		result = -EIO;
-		bcc = smb_bcc(req->rq_header);
-		if (bcc != count * SMB_DIRINFO_SIZE + 3)
-			goto out_free;
-		p = req->rq_buffer + 3;
-
-
-		/* Make sure the response fits in the buffer. Fixed sized 
-		   entries means we don't have to check in the decode loop. */
-
-		last_status = req->rq_buffer + 3 + (count-1) * SMB_DIRINFO_SIZE;
-
-		if (last_status + SMB_DIRINFO_SIZE >=
-		    req->rq_buffer + req->rq_bufsize) {
-			printk(KERN_ERR "smb_proc_readdir_short: "
-			       "last dir entry outside buffer! "
-			       "%d@%p  %d@%p\n", SMB_DIRINFO_SIZE, last_status,
-			       req->rq_bufsize, req->rq_buffer);
-			goto out_free;
-		}
-
-		/* Read the last entry into the status field. */
-		memcpy(status, last_status, SMB_STATUS_SIZE);
-
-
-		/* Now we are ready to parse smb directory entries. */
-
-		for (i = 0; i < count; i++) {
-			p = smb_decode_short_dirent(server, p, 
-						    &qname, &fattr, name_buf);
-			if (qname.len == 0)
-				continue;
-
-			if (entries_seen == 2 && qname.name[0] == '.') {
-				if (qname.len == 1)
-					continue;
-				if (qname.name[1] == '.' && qname.len == 2)
-					continue;
-			}
-			if (!smb_fill_cache(filp, dirent, filldir, ctl, 
-					    &qname, &fattr))
-				;	/* stop reading? */
-			entries_seen++;
-		}
-	}
-	result = entries;
-
-out_free:
-	smb_rput(req);
-out_name:
-	kfree(name_buf);
-out:
-	unlock_kernel();
-	return result;
-}
-
-static void smb_decode_unix_basic(struct smb_fattr *fattr, struct smb_sb_info *server, char *p)
-{
-	u64 size, disk_bytes;
-
-	/* FIXME: verify nls support. all is sent as utf8? */
-
-	fattr->f_unix = 1;
-	fattr->f_mode = 0;
-
-	/* FIXME: use the uniqueID from the remote instead? */
-	/* 0 L file size in bytes */
-	/* 8 L file size on disk in bytes (block count) */
-	/* 40 L uid */
-	/* 48 L gid */
-	/* 56 W file type */
-	/* 60 L devmajor */
-	/* 68 L devminor */
-	/* 76 L unique ID (inode) */
-	/* 84 L permissions */
-	/* 92 L link count */
-
-	size = LVAL(p, 0);
-	disk_bytes = LVAL(p, 8);
-
-	/*
-	 * Some samba versions round up on-disk byte usage
-	 * to 1MB boundaries, making it useless. When seeing
-	 * that, use the size instead.
-	 */
-	if (!(disk_bytes & 0xfffff))
-		disk_bytes = size+511;
-
-	fattr->f_size = size;
-	fattr->f_blocks = disk_bytes >> 9;
-	fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 16));
-	fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 24));
-	fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 32));
-
-	if (server->mnt->flags & SMB_MOUNT_UID)
-		fattr->f_uid = server->mnt->uid;
-	else
-		fattr->f_uid = LVAL(p, 40);
-
-	if (server->mnt->flags & SMB_MOUNT_GID)
-		fattr->f_gid = server->mnt->gid;
-	else
-		fattr->f_gid = LVAL(p, 48);
-
-	fattr->f_mode |= smb_filetype_to_mode(WVAL(p, 56));
-
-	if (S_ISBLK(fattr->f_mode) || S_ISCHR(fattr->f_mode)) {
-		__u64 major = LVAL(p, 60);
-		__u64 minor = LVAL(p, 68);
-
-		fattr->f_rdev = MKDEV(major & 0xffffffff, minor & 0xffffffff);
-		if (MAJOR(fattr->f_rdev) != (major & 0xffffffff) ||
-	    	MINOR(fattr->f_rdev) != (minor & 0xffffffff))
-			fattr->f_rdev = 0;
-	}
-
-	fattr->f_mode |= LVAL(p, 84);
-
-	if ( (server->mnt->flags & SMB_MOUNT_DMODE) &&
-	     (S_ISDIR(fattr->f_mode)) )
-		fattr->f_mode = (server->mnt->dir_mode & S_IRWXUGO) | S_IFDIR;
-	else if ( (server->mnt->flags & SMB_MOUNT_FMODE) &&
-	          !(S_ISDIR(fattr->f_mode)) )
-		fattr->f_mode = (server->mnt->file_mode & S_IRWXUGO) |
-				(fattr->f_mode & S_IFMT);
-
-}
-
-/*
- * Interpret a long filename structure using the specified info level:
- *   level 1 for anything below NT1 protocol
- *   level 260 for NT1 protocol
- *
- * qname is filled with the decoded, and possibly translated, name
- * fattr receives decoded attributes.
- *
- * Bugs Noted:
- * (1) Win NT 4.0 appends a null byte to names and counts it in the length!
- */
-static char *
-smb_decode_long_dirent(struct smb_sb_info *server, char *p, int level,
-		       struct qstr *qname, struct smb_fattr *fattr,
-		       unsigned char *name_buf)
-{
-	char *result;
-	unsigned int len = 0;
-	int n;
-	__u16 date, time;
-	int unicode = (server->mnt->flags & SMB_MOUNT_UNICODE);
-
-	/*
-	 * SMB doesn't have a concept of inode numbers ...
-	 */
-	smb_init_dirent(server, fattr);
-	fattr->f_ino = 0;	/* FIXME: do we need this? */
-
-	switch (level) {
-	case 1:
-		len = *((unsigned char *) p + 22);
-		qname->name = p + 23;
-		result = p + 24 + len;
-
-		date = WVAL(p, 0);
-		time = WVAL(p, 2);
-		fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
-		fattr->f_ctime.tv_nsec = 0;
-
-		date = WVAL(p, 4);
-		time = WVAL(p, 6);
-		fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
-		fattr->f_atime.tv_nsec = 0;
-
-		date = WVAL(p, 8);
-		time = WVAL(p, 10);
-		fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
-		fattr->f_mtime.tv_nsec = 0;
-		fattr->f_size = DVAL(p, 12);
-		/* ULONG allocation size */
-		fattr->attr = WVAL(p, 20);
-
-		VERBOSE("info 1 at %p, len=%d, name=%.*s\n",
-			p, len, len, qname->name);
-		break;
-	case 260:
-		result = p + WVAL(p, 0);
-		len = DVAL(p, 60);
-		if (len > 255) len = 255;
-		/* NT4 null terminates, unless we are using unicode ... */
-		qname->name = p + 94;
-		if (!unicode && len && qname->name[len-1] == '\0')
-			len--;
-
-		fattr->f_ctime = smb_ntutc2unixutc(LVAL(p, 8));
-		fattr->f_atime = smb_ntutc2unixutc(LVAL(p, 16));
-		fattr->f_mtime = smb_ntutc2unixutc(LVAL(p, 24));
-		/* change time (32) */
-		fattr->f_size = LVAL(p, 40);
-		/* alloc size (48) */
-		fattr->attr = DVAL(p, 56);
-
-		VERBOSE("info 260 at %p, len=%d, name=%.*s\n",
-			p, len, len, qname->name);
-		break;
-	case SMB_FIND_FILE_UNIX:
-		result = p + WVAL(p, 0);
-		qname->name = p + 108;
-
-		len = strlen(qname->name);
-		/* FIXME: should we check the length?? */
-
-		p += 8;
-		smb_decode_unix_basic(fattr, server, p);
-		VERBOSE("info SMB_FIND_FILE_UNIX at %p, len=%d, name=%.*s\n",
-			p, len, len, qname->name);
-		break;
-	default:
-		PARANOIA("Unknown info level %d\n", level);
-		result = p + WVAL(p, 0);
-		goto out;
-	}
-
-	smb_finish_dirent(server, fattr);
-
-#if 0
-	/* FIXME: These only work for ascii chars, and recent smbmount doesn't
-	   allow the flag to be set anyway. Remove? */
-	switch (server->opt.case_handling) {
-	case SMB_CASE_UPPER:
-		str_upper(qname->name, len);
-		break;
-	case SMB_CASE_LOWER:
-		str_lower(qname->name, len);
-		break;
-	default:
-		break;
-	}
-#endif
-
-	qname->len = 0;
-	n = server->ops->convert(name_buf, SMB_MAXNAMELEN,
-				 qname->name, len,
-				 server->remote_nls, server->local_nls);
-	if (n > 0) {
-		qname->len = n;
-		qname->name = name_buf;
-	}
-
-out:
-	return result;
-}
-
-/* findfirst/findnext flags */
-#define SMB_CLOSE_AFTER_FIRST (1<<0)
-#define SMB_CLOSE_IF_END (1<<1)
-#define SMB_REQUIRE_RESUME_KEY (1<<2)
-#define SMB_CONTINUE_BIT (1<<3)
-
-/*
- * Note: samba-2.0.7 (at least) has a very similar routine, cli_list, in
- * source/libsmb/clilist.c. When looking for smb bugs in the readdir code,
- * go there for advise.
- *
- * Bugs Noted:
- * (1) When using Info Level 1 Win NT 4.0 truncates directory listings 
- * for certain patterns of names and/or lengths. The breakage pattern
- * is completely reproducible and can be toggled by the creation of a
- * single file. (E.g. echo hi >foo breaks, rm -f foo works.)
- */
-static int
-smb_proc_readdir_long(struct file *filp, void *dirent, filldir_t filldir,
-		      struct smb_cache_control *ctl)
-{
-	struct dentry *dir = filp->f_path.dentry;
-	struct smb_sb_info *server = server_from_dentry(dir);
-	struct qstr qname;
-	struct smb_fattr fattr;
-
-	unsigned char *p, *lastname;
-	char *mask, *param;
-	__u16 command;
-	int first, entries_seen;
-
-	/* Both NT and OS/2 accept info level 1 (but see note below). */
-	int info_level = 260;
-	const int max_matches = 512;
-
-	unsigned int ff_searchcount = 0;
-	unsigned int ff_eos = 0;
-	unsigned int ff_lastname = 0;
-	unsigned int ff_dir_handle = 0;
-	unsigned int loop_count = 0;
-	unsigned int mask_len, i;
-	int result;
-	struct smb_request *req;
-	unsigned char *name_buf;
-	static struct qstr star = {
-		.name	= "*",
-		.len	= 1,
-	};
-
-	lock_kernel();
-
-	/*
-	 * We always prefer unix style. Use info level 1 for older
-	 * servers that don't do 260.
-	 */
-	if (server->opt.capabilities & SMB_CAP_UNIX)
-		info_level = SMB_FIND_FILE_UNIX;
-	else if (server->opt.protocol < SMB_PROTOCOL_NT1)
-		info_level = 1;
-
-	result = -ENOMEM;
-	if (! (name_buf = kmalloc(SMB_MAXNAMELEN+2, GFP_KERNEL)))
-		goto out;
-	if (! (req = smb_alloc_request(server, server->opt.max_xmit)))
-		goto out_name;
-	param = req->rq_buffer;
-
-	/*
-	 * Encode the initial path
-	 */
-	mask = param + 12;
-
-	result = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dir, &star);
-	if (result <= 0)
-		goto out_free;
-	mask_len = result - 1;	/* mask_len is strlen, not #bytes */
-	result = 0;
-	first = 1;
-	VERBOSE("starting mask_len=%d, mask=%s\n", mask_len, mask);
-
-	entries_seen = 2;
-	ff_eos = 0;
-
-	while (ff_eos == 0) {
-		loop_count += 1;
-		if (loop_count > 10) {
-			printk(KERN_WARNING "smb_proc_readdir_long: "
-			       "Looping in FIND_NEXT??\n");
-			result = -EIO;
-			break;
-		}
-
-		if (first != 0) {
-			command = TRANSACT2_FINDFIRST;
-			WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
-			WSET(param, 2, max_matches);	/* max count */
-			WSET(param, 4, SMB_CLOSE_IF_END);
-			WSET(param, 6, info_level);
-			DSET(param, 8, 0);
-		} else {
-			command = TRANSACT2_FINDNEXT;
-
-			VERBOSE("handle=0x%X, lastname=%d, mask=%.*s\n",
-				ff_dir_handle, ff_lastname, mask_len, mask);
-
-			WSET(param, 0, ff_dir_handle);	/* search handle */
-			WSET(param, 2, max_matches);	/* max count */
-			WSET(param, 4, info_level);
-			DSET(param, 6, 0);
-			WSET(param, 10, SMB_CONTINUE_BIT|SMB_CLOSE_IF_END);
-		}
-
-		req->rq_trans2_command = command;
-		req->rq_ldata = 0;
-		req->rq_data  = NULL;
-		req->rq_lparm = 12 + mask_len + 1;
-		req->rq_parm  = param;
-		req->rq_flags = 0;
-		result = smb_add_request(req);
-		if (result < 0) {
-			PARANOIA("error=%d, breaking\n", result);
-			break;
-		}
-
-		if (req->rq_rcls == ERRSRV && req->rq_err == ERRerror) {
-			/* a damn Win95 bug - sometimes it clags if you 
-			   ask it too fast */
-			schedule_timeout_interruptible(msecs_to_jiffies(200));
-			continue;
-                }
-
-		if (req->rq_rcls != 0) {
-			result = smb_errno(req);
-			PARANOIA("name=%s, result=%d, rcls=%d, err=%d\n",
-				 mask, result, req->rq_rcls, req->rq_err);
-			break;
-		}
-
-		/* parse out some important return info */
-		if (first != 0) {
-			ff_dir_handle = WVAL(req->rq_parm, 0);
-			ff_searchcount = WVAL(req->rq_parm, 2);
-			ff_eos = WVAL(req->rq_parm, 4);
-			ff_lastname = WVAL(req->rq_parm, 8);
-		} else {
-			ff_searchcount = WVAL(req->rq_parm, 0);
-			ff_eos = WVAL(req->rq_parm, 2);
-			ff_lastname = WVAL(req->rq_parm, 6);
-		}
-
-		if (ff_searchcount == 0)
-			break;
-
-		/* Now we are ready to parse smb directory entries. */
-
-		/* point to the data bytes */
-		p = req->rq_data;
-		for (i = 0; i < ff_searchcount; i++) {
-			/* make sure we stay within the buffer */
-			if (p >= req->rq_data + req->rq_ldata) {
-				printk(KERN_ERR "smb_proc_readdir_long: "
-				       "dirent pointer outside buffer! "
-				       "%p  %d@%p\n",
-				       p, req->rq_ldata, req->rq_data);
-				result = -EIO; /* always a comm. error? */
-				goto out_free;
-			}
-
-			p = smb_decode_long_dirent(server, p, info_level,
-						   &qname, &fattr, name_buf);
-
-			/* ignore . and .. from the server */
-			if (entries_seen == 2 && qname.name[0] == '.') {
-				if (qname.len == 1)
-					continue;
-				if (qname.name[1] == '.' && qname.len == 2)
-					continue;
-			}
-
-			if (!smb_fill_cache(filp, dirent, filldir, ctl, 
-					    &qname, &fattr))
-				;	/* stop reading? */
-			entries_seen++;
-		}
-
-		VERBOSE("received %d entries, eos=%d\n", ff_searchcount,ff_eos);
-
-		/*
-		 * We might need the lastname for continuations.
-		 *
-		 * Note that some servers (win95?) point to the filename and
-		 * others (NT4, Samba using NT1) to the dir entry. We assume
-		 * here that those who do not point to a filename do not need
-		 * this info to continue the listing.
-		 *
-		 * OS/2 needs this and talks infolevel 1.
-		 * NetApps want lastname with infolevel 260.
-		 * win2k want lastname with infolevel 260, and points to
-		 *       the record not to the name.
-		 * Samba+CifsUnixExt doesn't need lastname.
-		 *
-		 * Both are happy if we return the data they point to. So we do.
-		 * (FIXME: above is not true with win2k)
-		 */
-		mask_len = 0;
-		if (info_level != SMB_FIND_FILE_UNIX &&
-		    ff_lastname > 0 && ff_lastname < req->rq_ldata) {
-			lastname = req->rq_data + ff_lastname;
-
-			switch (info_level) {
-			case 260:
-				mask_len = req->rq_ldata - ff_lastname;
-				break;
-			case 1:
-				/* lastname points to a length byte */
-				mask_len = *lastname++;
-				if (ff_lastname + 1 + mask_len > req->rq_ldata)
-					mask_len = req->rq_ldata - ff_lastname - 1;
-				break;
-			}
-
-			/*
-			 * Update the mask string for the next message.
-			 */
-			if (mask_len > 255)
-				mask_len = 255;
-			if (mask_len)
-				strncpy(mask, lastname, mask_len);
-		}
-		mask_len = strnlen(mask, mask_len);
-		VERBOSE("new mask, len=%d@%d of %d, mask=%.*s\n",
-			mask_len, ff_lastname, req->rq_ldata, mask_len, mask);
-
-		first = 0;
-		loop_count = 0;
-	}
-
-out_free:
-	smb_rput(req);
-out_name:
-	kfree(name_buf);
-out:
-	unlock_kernel();
-	return result;
-}
-
-/*
- * This version uses the trans2 TRANSACT2_FINDFIRST message 
- * to get the attribute data.
- *
- * Bugs Noted:
- */
-static int
-smb_proc_getattr_ff(struct smb_sb_info *server, struct dentry *dentry,
-			struct smb_fattr *fattr)
-{
-	char *param, *mask;
-	__u16 date, time;
-	int mask_len, result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-	mask = param + 12;
-
-	mask_len = smb_encode_path(server, mask, SMB_MAXPATHLEN+1, dentry,NULL);
-	if (mask_len < 0) {
-		result = mask_len;
-		goto out_free;
-	}
-	VERBOSE("name=%s, len=%d\n", mask, mask_len);
-	WSET(param, 0, aSYSTEM | aHIDDEN | aDIR);
-	WSET(param, 2, 1);	/* max count */
-	WSET(param, 4, 1);	/* close after this call */
-	WSET(param, 6, 1);	/* info_level */
-	DSET(param, 8, 0);
-
-	req->rq_trans2_command = TRANSACT2_FINDFIRST;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = 12 + mask_len;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-	if (req->rq_rcls != 0) {
-		result = smb_errno(req);
-#ifdef SMBFS_PARANOIA
-		if (result != -ENOENT)
-			PARANOIA("error for %s, rcls=%d, err=%d\n",
-				 mask, req->rq_rcls, req->rq_err);
-#endif
-		goto out_free;
-	}
-	/* Make sure we got enough data ... */
-	result = -EINVAL;
-	if (req->rq_ldata < 22 || WVAL(req->rq_parm, 2) != 1) {
-		PARANOIA("bad result for %s, len=%d, count=%d\n",
-			 mask, req->rq_ldata, WVAL(req->rq_parm, 2));
-		goto out_free;
-	}
-
-	/*
-	 * Decode the response into the fattr ...
-	 */
-	date = WVAL(req->rq_data, 0);
-	time = WVAL(req->rq_data, 2);
-	fattr->f_ctime.tv_sec = date_dos2unix(server, date, time);
-	fattr->f_ctime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 4);
-	time = WVAL(req->rq_data, 6);
-	fattr->f_atime.tv_sec = date_dos2unix(server, date, time);
-	fattr->f_atime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 8);
-	time = WVAL(req->rq_data, 10);
-	fattr->f_mtime.tv_sec = date_dos2unix(server, date, time);
-	fattr->f_mtime.tv_nsec = 0;
-	VERBOSE("name=%s, date=%x, time=%x, mtime=%ld\n",
-		mask, date, time, fattr->f_mtime.tv_sec);
-	fattr->f_size = DVAL(req->rq_data, 12);
-	/* ULONG allocation size */
-	fattr->attr = WVAL(req->rq_data, 20);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_core(struct smb_sb_info *server, struct dentry *dir,
-		      struct smb_fattr *fattr)
-{
-	int result;
-	char *p;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBgetatr, 0, 0);
-	result = smb_simple_encode_path(req, &p, dir, NULL);
-	if (result < 0)
- 		goto out_free;
-	smb_setup_bcc(req, p);
-
-	if ((result = smb_request_ok(req, SMBgetatr, 10, 0)) < 0)
-		goto out_free;
-	fattr->attr    = WVAL(req->rq_header, smb_vwv0);
-	fattr->f_mtime.tv_sec = local2utc(server, DVAL(req->rq_header, smb_vwv1));
-	fattr->f_mtime.tv_nsec = 0;
-	fattr->f_size  = DVAL(req->rq_header, smb_vwv3);
-	fattr->f_ctime = fattr->f_mtime; 
-	fattr->f_atime = fattr->f_mtime; 
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk("getattr_core: %s/%s, mtime=%ld\n",
-	       DENTRY_PATH(dir), fattr->f_mtime);
-#endif
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Bugs Noted:
- * (1) Win 95 swaps the date and time fields in the standard info level.
- */
-static int
-smb_proc_getattr_trans2(struct smb_sb_info *server, struct dentry *dir,
-			struct smb_request *req, int infolevel)
-{
-	char *p, *param;
-	int result;
-
-	param = req->rq_buffer;
-	WSET(param, 0, infolevel);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
-	if (result < 0)
-		goto out;
-	p = param + 6 + result;
-
-	req->rq_trans2_command = TRANSACT2_QPATHINFO;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out;
-	if (req->rq_rcls != 0) {
-		VERBOSE("for %s: result=%d, rcls=%d, err=%d\n",
-			&param[6], result, req->rq_rcls, req->rq_err);
-		result = smb_errno(req);
-		goto out;
-	}
-	result = -ENOENT;
-	if (req->rq_ldata < 22) {
-		PARANOIA("not enough data for %s, len=%d\n",
-			 &param[6], req->rq_ldata);
-		goto out;
-	}
-
-	result = 0;
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_trans2_std(struct smb_sb_info *server, struct dentry *dir,
-			    struct smb_fattr *attr)
-{
-	u16 date, time;
-	int off_date = 0, off_time = 2;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	result = smb_proc_getattr_trans2(server, dir, req, SMB_INFO_STANDARD);
-	if (result < 0)
-		goto out_free;
-
-	/*
-	 * Kludge alert: Win 95 swaps the date and time field,
-	 * contrary to the CIFS docs and Win NT practice.
-	 */
-	if (server->mnt->flags & SMB_MOUNT_WIN95) {
-		off_date = 2;
-		off_time = 0;
-	}
-	date = WVAL(req->rq_data, off_date);
-	time = WVAL(req->rq_data, off_time);
-	attr->f_ctime.tv_sec = date_dos2unix(server, date, time);
-	attr->f_ctime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 4 + off_date);
-	time = WVAL(req->rq_data, 4 + off_time);
-	attr->f_atime.tv_sec = date_dos2unix(server, date, time);
-	attr->f_atime.tv_nsec = 0;
-
-	date = WVAL(req->rq_data, 8 + off_date);
-	time = WVAL(req->rq_data, 8 + off_time);
-	attr->f_mtime.tv_sec = date_dos2unix(server, date, time);
-	attr->f_mtime.tv_nsec = 0;
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk(KERN_DEBUG "getattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n",
-	       DENTRY_PATH(dir), date, time, attr->f_mtime);
-#endif
-	attr->f_size = DVAL(req->rq_data, 12);
-	attr->attr = WVAL(req->rq_data, 20);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_trans2_all(struct smb_sb_info *server, struct dentry *dir,
-			    struct smb_fattr *attr)
-{
-	struct smb_request *req;
-	int result;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	result = smb_proc_getattr_trans2(server, dir, req,
-					 SMB_QUERY_FILE_ALL_INFO);
-	if (result < 0)
-		goto out_free;
-
-	attr->f_ctime = smb_ntutc2unixutc(LVAL(req->rq_data, 0));
-	attr->f_atime = smb_ntutc2unixutc(LVAL(req->rq_data, 8));
-	attr->f_mtime = smb_ntutc2unixutc(LVAL(req->rq_data, 16));
-	/* change (24) */
-	attr->attr = WVAL(req->rq_data, 32);
-	/* pad? (34) */
-	/* allocated size (40) */
-	attr->f_size = LVAL(req->rq_data, 48);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_unix(struct smb_sb_info *server, struct dentry *dir,
-		      struct smb_fattr *attr)
-{
-	struct smb_request *req;
-	int result;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	result = smb_proc_getattr_trans2(server, dir, req,
-					 SMB_QUERY_FILE_UNIX_BASIC);
-	if (result < 0)
-		goto out_free;
-
-	smb_decode_unix_basic(attr, server, req->rq_data);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_getattr_95(struct smb_sb_info *server, struct dentry *dir,
-		    struct smb_fattr *attr)
-{
-	struct inode *inode = dir->d_inode;
-	int result;
-
-	/* FIXME: why not use the "all" version? */
-	result = smb_proc_getattr_trans2_std(server, dir, attr);
-	if (result < 0)
-		goto out;
-
-	/*
-	 * None of the getattr versions here can make win9x return the right
-	 * filesize if there are changes made to an open file.
-	 * A seek-to-end does return the right size, but we only need to do
-	 * that on files we have written.
-	 */
-	if (inode && SMB_I(inode)->flags & SMB_F_LOCALWRITE &&
-	    smb_is_open(inode))
-	{
-		__u16 fileid = SMB_I(inode)->fileid;
-		attr->f_size = smb_proc_seek(server, fileid, 2, 0);
-	}
-
-out:
-	return result;
-}
-
-static int
-smb_proc_ops_wait(struct smb_sb_info *server)
-{
-	int result;
-
-	result = wait_event_interruptible_timeout(server->conn_wq,
-				server->conn_complete, 30*HZ);
-
-	if (!result || signal_pending(current))
-		return -EIO;
-
-	return 0;
-}
-
-static int
-smb_proc_getattr_null(struct smb_sb_info *server, struct dentry *dir,
-			  struct smb_fattr *fattr)
-{
-	int result;
-
-	if (smb_proc_ops_wait(server) < 0)
-		return -EIO;
-
-	smb_init_dirent(server, fattr);
-	result = server->ops->getattr(server, dir, fattr);
-	smb_finish_dirent(server, fattr);
-
-	return result;
-}
-
-static int
-smb_proc_readdir_null(struct file *filp, void *dirent, filldir_t filldir,
-		      struct smb_cache_control *ctl)
-{
-	struct smb_sb_info *server = server_from_dentry(filp->f_path.dentry);
-
-	if (smb_proc_ops_wait(server) < 0)
-		return -EIO;
-
-	return server->ops->readdir(filp, dirent, filldir, ctl);
-}
-
-int
-smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = server_from_dentry(dir);
-	int result;
-
-	smb_init_dirent(server, fattr);
-	result = server->ops->getattr(server, dir, fattr);
-	smb_finish_dirent(server, fattr);
-
-	return result;
-}
-
-
-/*
- * Because of bugs in the core protocol, we use this only to set
- * attributes. See smb_proc_settime() below for timestamp handling.
- *
- * Bugs Noted:
- * (1) If mtime is non-zero, both Win 3.1 and Win 95 fail
- * with an undocumented error (ERRDOS code 50). Setting
- * mtime to 0 allows the attributes to be set.
- * (2) The extra parameters following the name string aren't
- * in the CIFS docs, but seem to be necessary for operation.
- */
-static int
-smb_proc_setattr_core(struct smb_sb_info *server, struct dentry *dentry,
-		      __u16 attr)
-{
-	char *p;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-
-	p = smb_setup_header(req, SMBsetatr, 8, 0);
-	WSET(req->rq_header, smb_vwv0, attr);
-	DSET(req->rq_header, smb_vwv1, 0); /* mtime */
-	WSET(req->rq_header, smb_vwv3, 0); /* reserved values */
-	WSET(req->rq_header, smb_vwv4, 0);
-	WSET(req->rq_header, smb_vwv5, 0);
-	WSET(req->rq_header, smb_vwv6, 0);
-	WSET(req->rq_header, smb_vwv7, 0);
-	result = smb_simple_encode_path(req, &p, dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	if (p + 2 > (char *)req->rq_buffer + req->rq_bufsize) {
-		result = -ENAMETOOLONG;
-		goto out_free;
-	}
-	*p++ = 4;
-	*p++ = 0;
-	smb_setup_bcc(req, p);
-
-	result = smb_request_ok(req, SMBsetatr, 0, 0);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Because of bugs in the trans2 setattr messages, we must set
- * attributes and timestamps separately. The core SMBsetatr
- * message seems to be the only reliable way to set attributes.
- */
-int
-smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = server_from_dentry(dir);
-	int result;
-
-	VERBOSE("setting %s/%s, open=%d\n", 
-		DENTRY_PATH(dir), smb_is_open(dir->d_inode));
-	result = smb_proc_setattr_core(server, dir, fattr->attr);
-	return result;
-}
-
-/*
- * Sets the timestamps for an file open with write permissions.
- */
-static int
-smb_proc_setattr_ext(struct smb_sb_info *server,
-		      struct inode *inode, struct smb_fattr *fattr)
-{
-	__u16 date, time;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBsetattrE, 7, 0);
-	WSET(req->rq_header, smb_vwv0, SMB_I(inode)->fileid);
-	/* We don't change the creation time */
-	WSET(req->rq_header, smb_vwv1, 0);
-	WSET(req->rq_header, smb_vwv2, 0);
-	date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
-	WSET(req->rq_header, smb_vwv3, date);
-	WSET(req->rq_header, smb_vwv4, time);
-	date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
-	WSET(req->rq_header, smb_vwv5, date);
-	WSET(req->rq_header, smb_vwv6, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk(KERN_DEBUG "smb_proc_setattr_ext: date=%d, time=%d, mtime=%ld\n",
-	       date, time, fattr->f_mtime);
-#endif
-
-	req->rq_flags |= SMB_REQ_NORETRY;
-	result = smb_request_ok(req, SMBsetattrE, 0, 0);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Bugs Noted:
- * (1) The TRANSACT2_SETPATHINFO message under Win NT 4.0 doesn't
- * set the file's attribute flags.
- */
-static int
-smb_proc_setattr_trans2(struct smb_sb_info *server,
-			struct dentry *dir, struct smb_fattr *fattr)
-{
-	__u16 date, time;
-	char *p, *param;
-	int result;
-	char data[26];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, 1);	/* Info level SMB_INFO_STANDARD */
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, dir, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	WSET(data, 0, 0); /* creation time */
-	WSET(data, 2, 0);
-	date_unix2dos(server, fattr->f_atime.tv_sec, &date, &time);
-	WSET(data, 4, date);
-	WSET(data, 6, time);
-	date_unix2dos(server, fattr->f_mtime.tv_sec, &date, &time);
-	WSET(data, 8, date);
-	WSET(data, 10, time);
-#ifdef SMBFS_DEBUG_TIMESTAMP
-	printk(KERN_DEBUG "setattr_trans2: %s/%s, date=%x, time=%x, mtime=%ld\n", 
-	       DENTRY_PATH(dir), date, time, fattr->f_mtime);
-#endif
-	DSET(data, 12, 0); /* size */
-	DSET(data, 16, 0); /* blksize */
-	WSET(data, 20, 0); /* attr */
-	DSET(data, 22, 0); /* ULONG EA size */
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_ldata = 26;
-	req->rq_data  = data;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-	result = 0;
-	if (req->rq_rcls != 0)
-		result = smb_errno(req);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * ATTR_MODE      0x001
- * ATTR_UID       0x002
- * ATTR_GID       0x004
- * ATTR_SIZE      0x008
- * ATTR_ATIME     0x010
- * ATTR_MTIME     0x020
- * ATTR_CTIME     0x040
- * ATTR_ATIME_SET 0x080
- * ATTR_MTIME_SET 0x100
- * ATTR_FORCE     0x200	
- * ATTR_ATTR_FLAG 0x400
- *
- * major/minor should only be set by mknod.
- */
-int
-smb_proc_setattr_unix(struct dentry *d, struct iattr *attr,
-		      unsigned int major, unsigned int minor)
-{
-	struct smb_sb_info *server = server_from_dentry(d);
-	u64 nttime;
-	char *p, *param;
-	int result;
-	char data[100];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	DEBUG1("valid flags = 0x%04x\n", attr->ia_valid);
-
-	WSET(param, 0, SMB_SET_FILE_UNIX_BASIC);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	/* 0 L file size in bytes */
-	/* 8 L file size on disk in bytes (block count) */
-	/* 40 L uid */
-	/* 48 L gid */
-	/* 56 W file type enum */
-	/* 60 L devmajor */
-	/* 68 L devminor */
-	/* 76 L unique ID (inode) */
-	/* 84 L permissions */
-	/* 92 L link count */
-	LSET(data, 0, SMB_SIZE_NO_CHANGE);
-	LSET(data, 8, SMB_SIZE_NO_CHANGE);
-	LSET(data, 16, SMB_TIME_NO_CHANGE);
-	LSET(data, 24, SMB_TIME_NO_CHANGE);
-	LSET(data, 32, SMB_TIME_NO_CHANGE);
-	LSET(data, 40, SMB_UID_NO_CHANGE);
-	LSET(data, 48, SMB_GID_NO_CHANGE);
-	DSET(data, 56, smb_filetype_from_mode(attr->ia_mode));
-	LSET(data, 60, major);
-	LSET(data, 68, minor);
-	LSET(data, 76, 0);
-	LSET(data, 84, SMB_MODE_NO_CHANGE);
-	LSET(data, 92, 0);
-
-	if (attr->ia_valid & ATTR_SIZE) {
-		LSET(data, 0, attr->ia_size);
-		LSET(data, 8, 0); /* can't set anyway */
-	}
-
-	/*
-	 * FIXME: check the conversion function it the correct one
-	 *
-	 * we can't set ctime but we might as well pass this to the server
-	 * and let it ignore it.
-	 */
-	if (attr->ia_valid & ATTR_CTIME) {
-		nttime = smb_unixutc2ntutc(attr->ia_ctime);
-		LSET(data, 16, nttime);
-	}
-	if (attr->ia_valid & ATTR_ATIME) {
-		nttime = smb_unixutc2ntutc(attr->ia_atime);
-		LSET(data, 24, nttime);
-	}
-	if (attr->ia_valid & ATTR_MTIME) {
-		nttime = smb_unixutc2ntutc(attr->ia_mtime);
-		LSET(data, 32, nttime);
-	}
-	
-	if (attr->ia_valid & ATTR_UID) {
-		LSET(data, 40, attr->ia_uid);
-	}
-	if (attr->ia_valid & ATTR_GID) {
-		LSET(data, 48, attr->ia_gid); 
-	}
-	
-	if (attr->ia_valid & ATTR_MODE) {
-		LSET(data, 84, attr->ia_mode);
-	}
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_ldata = 100;
-	req->rq_data  = data;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-
-/*
- * Set the modify and access timestamps for a file.
- *
- * Incredibly enough, in all of SMB there is no message to allow
- * setting both attributes and timestamps at once. 
- *
- * Bugs Noted:
- * (1) Win 95 doesn't support the TRANSACT2_SETFILEINFO message 
- * with info level 1 (INFO_STANDARD).
- * (2) Win 95 seems not to support setting directory timestamps.
- * (3) Under the core protocol apparently the only way to set the
- * timestamp is to open and close the file.
- */
-int
-smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr)
-{
-	struct smb_sb_info *server = server_from_dentry(dentry);
-	struct inode *inode = dentry->d_inode;
-	int result;
-
-	VERBOSE("setting %s/%s, open=%d\n",
-		DENTRY_PATH(dentry), smb_is_open(inode));
-
-	/* setting the time on a Win95 server fails (tridge) */
-	if (server->opt.protocol >= SMB_PROTOCOL_LANMAN2 && 
-	    !(server->mnt->flags & SMB_MOUNT_WIN95)) {
-		if (smb_is_open(inode) && SMB_I(inode)->access != SMB_O_RDONLY)
-			result = smb_proc_setattr_ext(server, inode, fattr);
-		else
-			result = smb_proc_setattr_trans2(server, dentry, fattr);
-	} else {
-		/*
-		 * Fail silently on directories ... timestamp can't be set?
-		 */
-		result = 0;
-		if (S_ISREG(inode->i_mode)) {
-			/*
-			 * Set the mtime by opening and closing the file.
-			 * Note that the file is opened read-only, but this
-			 * still allows us to set the date (tridge)
-			 */
-			result = -EACCES;
-			if (!smb_is_open(inode))
-				smb_proc_open(server, dentry, SMB_O_RDONLY);
-			if (smb_is_open(inode)) {
-				inode->i_mtime = fattr->f_mtime;
-				result = smb_proc_close_inode(server, inode);
-			}
-		}
-	}
-
-	return result;
-}
-
-int
-smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr)
-{
-	struct smb_sb_info *server = SMB_SB(dentry->d_sb);
-	int result;
-	char *p;
-	long unit;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 0)))
-		goto out;
-
-	smb_setup_header(req, SMBdskattr, 0, 0);
-	if ((result = smb_request_ok(req, SMBdskattr, 5, 0)) < 0)
-		goto out_free;
-	p = SMB_VWV(req->rq_header);
-	unit = (WVAL(p, 2) * WVAL(p, 4)) >> SMB_ST_BLKSHIFT;
-	attr->f_blocks = WVAL(p, 0) * unit;
-	attr->f_bsize  = SMB_ST_BLKSIZE;
-	attr->f_bavail = attr->f_bfree = WVAL(p, 6) * unit;
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-int
-smb_proc_read_link(struct smb_sb_info *server, struct dentry *d,
-		   char *buffer, int len)
-{
-	char *p, *param;
-	int result;
-	struct smb_request *req;
-
-	DEBUG1("readlink of %s/%s\n", DENTRY_PATH(d));
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, SMB_QUERY_FILE_UNIX_LINK);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param+6, SMB_MAXPATHLEN+1, d, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	req->rq_trans2_command = TRANSACT2_QPATHINFO;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
-		&param[6], result, req->rq_rcls, req->rq_err);
-
-	/* copy data up to the \0 or buffer length */
-	result = len;
-	if (req->rq_ldata < len)
-		result = req->rq_ldata;
-	strncpy(buffer, req->rq_data, result);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-
-/*
- * Create a symlink object called dentry which points to oldpath.
- * Samba does not permit dangling links but returns a suitable error message.
- */
-int
-smb_proc_symlink(struct smb_sb_info *server, struct dentry *d,
-		 const char *oldpath)
-{
-	char *p, *param;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, SMB_SET_FILE_UNIX_LINK);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1, d, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_ldata = strlen(oldpath) + 1;
-	req->rq_data  = (char *) oldpath;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
-		&param[6], result, req->rq_rcls, req->rq_err);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-/*
- * Create a hard link object called new_dentry which points to dentry.
- */
-int
-smb_proc_link(struct smb_sb_info *server, struct dentry *dentry,
-	      struct dentry *new_dentry)
-{
-	char *p, *param;
-	int result;
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, PAGE_SIZE)))
-		goto out;
-	param = req->rq_buffer;
-
-	WSET(param, 0, SMB_SET_FILE_UNIX_HLINK);
-	DSET(param, 2, 0);
-	result = smb_encode_path(server, param + 6, SMB_MAXPATHLEN+1,
-				 new_dentry, NULL);
-	if (result < 0)
-		goto out_free;
-	p = param + 6 + result;
-
-	/* Grr, pointless separation of parameters and data ... */
-	req->rq_data = p;
-	req->rq_ldata = smb_encode_path(server, p, SMB_MAXPATHLEN+1,
-					dentry, NULL);
-
-	req->rq_trans2_command = TRANSACT2_SETPATHINFO;
-	req->rq_lparm = p - param;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	DEBUG1("for %s: result=%d, rcls=%d, err=%d\n",
-	       &param[6], result, req->rq_rcls, req->rq_err);
-	result = 0;
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-static int
-smb_proc_query_cifsunix(struct smb_sb_info *server)
-{
-	int result;
-	int major, minor;
-	u64 caps;
-	char param[2];
-	struct smb_request *req;
-
-	result = -ENOMEM;
-	if (! (req = smb_alloc_request(server, 100)))
-		goto out;
-
-	WSET(param, 0, SMB_QUERY_CIFS_UNIX_INFO);
-
-	req->rq_trans2_command = TRANSACT2_QFSINFO;
-	req->rq_ldata = 0;
-	req->rq_data  = NULL;
-	req->rq_lparm = 2;
-	req->rq_parm  = param;
-	req->rq_flags = 0;
-	result = smb_add_request(req);
-	if (result < 0)
-		goto out_free;
-
-	if (req->rq_ldata < 12) {
-		PARANOIA("Not enough data\n");
-		goto out_free;
-	}
-	major = WVAL(req->rq_data, 0);
-	minor = WVAL(req->rq_data, 2);
-
-	DEBUG1("Server implements CIFS Extensions for UNIX systems v%d.%d\n",
-	       major, minor);
-	/* FIXME: verify that we are ok with this major/minor? */
-
-	caps = LVAL(req->rq_data, 4);
-	DEBUG1("Server capabilities 0x%016llx\n", caps);
-
-out_free:
-	smb_rput(req);
-out:
-	return result;
-}
-
-
-static void
-install_ops(struct smb_ops *dst, struct smb_ops *src)
-{
-	memcpy(dst, src, sizeof(void *) * SMB_OPS_NUM_STATIC);
-}
-
-/* < LANMAN2 */
-static struct smb_ops smb_ops_core =
-{
-	.read		= smb_proc_read,
-	.write		= smb_proc_write,
-	.readdir	= smb_proc_readdir_short,
-	.getattr	= smb_proc_getattr_core,
-	.truncate	= smb_proc_trunc32,
-};
-
-/* LANMAN2, OS/2, others? */
-static struct smb_ops smb_ops_os2 =
-{
-	.read		= smb_proc_read,
-	.write		= smb_proc_write,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_trans2_std,
-	.truncate	= smb_proc_trunc32,
-};
-
-/* Win95, and possibly some NetApp versions too */
-static struct smb_ops smb_ops_win95 =
-{
-	.read		= smb_proc_read,    /* does not support 12word readX */
-	.write		= smb_proc_write,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_95,
-	.truncate	= smb_proc_trunc95,
-};
-
-/* Samba, NT4 and NT5 */
-static struct smb_ops smb_ops_winNT =
-{
-	.read		= smb_proc_readX,
-	.write		= smb_proc_writeX,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_trans2_all,
-	.truncate	= smb_proc_trunc64,
-};
-
-/* Samba w/ unix extensions. Others? */
-static struct smb_ops smb_ops_unix =
-{
-	.read		= smb_proc_readX,
-	.write		= smb_proc_writeX,
-	.readdir	= smb_proc_readdir_long,
-	.getattr	= smb_proc_getattr_unix,
-	/* FIXME: core/ext/time setattr needs to be cleaned up! */
-	/* .setattr	= smb_proc_setattr_unix, */
-	.truncate	= smb_proc_trunc64,
-};
-
-/* Place holder until real ops are in place */
-static struct smb_ops smb_ops_null =
-{
-	.readdir	= smb_proc_readdir_null,
-	.getattr	= smb_proc_getattr_null,
-};
-
-void smb_install_null_ops(struct smb_ops *ops)
-{
-	install_ops(ops, &smb_ops_null);
-}
diff --git a/fs/smbfs/proto.h b/fs/smbfs/proto.h
deleted file mode 100644
index 05939a6f43e..00000000000
--- a/fs/smbfs/proto.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- *  Autogenerated with cproto on:  Sat Sep 13 17:18:51 CEST 2003
- */
-
-struct smb_request;
-struct sock;
-struct statfs;
-
-/* proc.c */
-extern int smb_setcodepage(struct smb_sb_info *server, struct smb_nls_codepage *cp);
-extern __u32 smb_len(__u8 *p);
-extern int smb_get_rsize(struct smb_sb_info *server);
-extern int smb_get_wsize(struct smb_sb_info *server);
-extern int smb_errno(struct smb_request *req);
-extern int smb_newconn(struct smb_sb_info *server, struct smb_conn_opt *opt);
-extern __u8 *smb_setup_header(struct smb_request *req, __u8 command, __u16 wct, __u16 bcc);
-extern int smb_open(struct dentry *dentry, int wish);
-extern int smb_close(struct inode *ino);
-extern int smb_close_fileid(struct dentry *dentry, __u16 fileid);
-extern int smb_proc_create(struct dentry *dentry, __u16 attr, time_t ctime, __u16 *fileid);
-extern int smb_proc_mv(struct dentry *old_dentry, struct dentry *new_dentry);
-extern int smb_proc_mkdir(struct dentry *dentry);
-extern int smb_proc_rmdir(struct dentry *dentry);
-extern int smb_proc_unlink(struct dentry *dentry);
-extern int smb_proc_flush(struct smb_sb_info *server, __u16 fileid);
-extern void smb_init_root_dirent(struct smb_sb_info *server, struct smb_fattr *fattr,
-				 struct super_block *sb);
-extern int smb_proc_getattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr(struct dentry *dir, struct smb_fattr *fattr);
-extern int smb_proc_setattr_unix(struct dentry *d, struct iattr *attr, unsigned int major, unsigned int minor);
-extern int smb_proc_settime(struct dentry *dentry, struct smb_fattr *fattr);
-extern int smb_proc_dskattr(struct dentry *dentry, struct kstatfs *attr);
-extern int smb_proc_read_link(struct smb_sb_info *server, struct dentry *d, char *buffer, int len);
-extern int smb_proc_symlink(struct smb_sb_info *server, struct dentry *d, const char *oldpath);
-extern int smb_proc_link(struct smb_sb_info *server, struct dentry *dentry, struct dentry *new_dentry);
-extern void smb_install_null_ops(struct smb_ops *ops);
-/* dir.c */
-extern const struct file_operations smb_dir_operations;
-extern const struct inode_operations smb_dir_inode_operations;
-extern const struct inode_operations smb_dir_inode_operations_unix;
-extern void smb_new_dentry(struct dentry *dentry);
-extern void smb_renew_times(struct dentry *dentry);
-/* cache.c */
-extern void smb_invalid_dir_cache(struct inode *dir);
-extern void smb_invalidate_dircache_entries(struct dentry *parent);
-extern struct dentry *smb_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos);
-extern int smb_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct smb_cache_control *ctrl, struct qstr *qname, struct smb_fattr *entry);
-/* sock.c */
-extern void smb_data_ready(struct sock *sk, int len);
-extern int smb_valid_socket(struct inode *inode);
-extern void smb_close_socket(struct smb_sb_info *server);
-extern int smb_recv_available(struct smb_sb_info *server);
-extern int smb_receive_header(struct smb_sb_info *server);
-extern int smb_receive_drop(struct smb_sb_info *server);
-extern int smb_receive(struct smb_sb_info *server, struct smb_request *req);
-extern int smb_send_request(struct smb_request *req);
-/* inode.c */
-extern struct inode *smb_iget(struct super_block *sb, struct smb_fattr *fattr);
-extern void smb_get_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_set_inode_attr(struct inode *inode, struct smb_fattr *fattr);
-extern void smb_invalidate_inodes(struct smb_sb_info *server);
-extern int smb_revalidate_inode(struct dentry *dentry);
-extern int smb_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
-extern int smb_notify_change(struct dentry *dentry, struct iattr *attr);
-/* file.c */
-extern const struct address_space_operations smb_file_aops;
-extern const struct file_operations smb_file_operations;
-extern const struct inode_operations smb_file_inode_operations;
-/* ioctl.c */
-extern long smb_ioctl(struct file *filp, unsigned int cmd, unsigned long arg);
-/* smbiod.c */
-extern void smbiod_wake_up(void);
-extern int smbiod_register_server(struct smb_sb_info *server);
-extern void smbiod_unregister_server(struct smb_sb_info *server);
-extern void smbiod_flush(struct smb_sb_info *server);
-extern int smbiod_retry(struct smb_sb_info *server);
-/* request.c */
-extern int smb_init_request_cache(void);
-extern void smb_destroy_request_cache(void);
-extern struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize);
-extern void smb_rput(struct smb_request *req);
-extern int smb_add_request(struct smb_request *req);
-extern int smb_request_send_server(struct smb_sb_info *server);
-extern int smb_request_recv(struct smb_sb_info *server);
-/* symlink.c */
-extern int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname);
-extern const struct inode_operations smb_link_inode_operations;
diff --git a/fs/smbfs/request.c b/fs/smbfs/request.c
deleted file mode 100644
index 45f45933e86..00000000000
--- a/fs/smbfs/request.c
+++ /dev/null
@@ -1,818 +0,0 @@
-/*
- *  request.c
- *
- *  Copyright (C) 2001 by Urban Widmark
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/slab.h>
-#include <linux/net.h>
-#include <linux/sched.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-/* #define SMB_SLAB_DEBUG	(SLAB_RED_ZONE | SLAB_POISON) */
-#define SMB_SLAB_DEBUG	0
-
-/* cache for request structures */
-static struct kmem_cache *req_cachep;
-
-static int smb_request_send_req(struct smb_request *req);
-
-/*
-  /proc/slabinfo:
-  name, active, num, objsize, active_slabs, num_slaps, #pages
-*/
-
-
-int smb_init_request_cache(void)
-{
-	req_cachep = kmem_cache_create("smb_request",
-				       sizeof(struct smb_request), 0,
-				       SMB_SLAB_DEBUG | SLAB_HWCACHE_ALIGN,
-				       NULL);
-	if (req_cachep == NULL)
-		return -ENOMEM;
-
-	return 0;
-}
-
-void smb_destroy_request_cache(void)
-{
-	kmem_cache_destroy(req_cachep);
-}
-
-/*
- * Allocate and initialise a request structure
- */
-static struct smb_request *smb_do_alloc_request(struct smb_sb_info *server,
-						int bufsize)
-{
-	struct smb_request *req;
-	unsigned char *buf = NULL;
-
-	req = kmem_cache_zalloc(req_cachep, GFP_KERNEL);
-	VERBOSE("allocating request: %p\n", req);
-	if (!req)
-		goto out;
-
-	if (bufsize > 0) {
-		buf = kmalloc(bufsize, GFP_NOFS);
-		if (!buf) {
-			kmem_cache_free(req_cachep, req);
-			return NULL;
-		}
-	}
-
-	req->rq_buffer = buf;
-	req->rq_bufsize = bufsize;
-	req->rq_server = server;
-	init_waitqueue_head(&req->rq_wait);
-	INIT_LIST_HEAD(&req->rq_queue);
-	atomic_set(&req->rq_count, 1);
-
-out:
-	return req;
-}
-
-struct smb_request *smb_alloc_request(struct smb_sb_info *server, int bufsize)
-{
-	struct smb_request *req = NULL;
-
-	for (;;) {
-		atomic_inc(&server->nr_requests);
-		if (atomic_read(&server->nr_requests) <= MAX_REQUEST_HARD) {
-			req = smb_do_alloc_request(server, bufsize);
-			if (req != NULL)
-				break;
-		}
-
-#if 0
-		/*
-		 * Try to free up at least one request in order to stay
-		 * below the hard limit
-		 */
-                if (nfs_try_to_free_pages(server))
-			continue;
-
-		if (fatal_signal_pending(current))
-			return ERR_PTR(-ERESTARTSYS);
-		current->policy = SCHED_YIELD;
-		schedule();
-#else
-		/* FIXME: we want something like nfs does above, but that
-		   requires changes to all callers and can wait. */
-		break;
-#endif
-	}
-	return req;
-}
-
-static void smb_free_request(struct smb_request *req)
-{
-	atomic_dec(&req->rq_server->nr_requests);
-	if (req->rq_buffer && !(req->rq_flags & SMB_REQ_STATIC))
-		kfree(req->rq_buffer);
-	kfree(req->rq_trans2buffer);
-	kmem_cache_free(req_cachep, req);
-}
-
-/*
- * What prevents a rget to race with a rput? The count must never drop to zero
- * while it is in use. Only rput if it is ok that it is free'd.
- */
-static void smb_rget(struct smb_request *req)
-{
-	atomic_inc(&req->rq_count);
-}
-void smb_rput(struct smb_request *req)
-{
-	if (atomic_dec_and_test(&req->rq_count)) {
-		list_del_init(&req->rq_queue);
-		smb_free_request(req);
-	}
-}
-
-/* setup to receive the data part of the SMB */
-static int smb_setup_bcc(struct smb_request *req)
-{
-	int result = 0;
-	req->rq_rlen = smb_len(req->rq_header) + 4 - req->rq_bytes_recvd;
-
-	if (req->rq_rlen > req->rq_bufsize) {
-		PARANOIA("Packet too large %d > %d\n",
-			 req->rq_rlen, req->rq_bufsize);
-		return -ENOBUFS;
-	}
-
-	req->rq_iov[0].iov_base = req->rq_buffer;
-	req->rq_iov[0].iov_len  = req->rq_rlen;
-	req->rq_iovlen = 1;
-
-	return result;
-}
-
-/*
- * Prepare a "normal" request structure.
- */
-static int smb_setup_request(struct smb_request *req)
-{
-	int len = smb_len(req->rq_header) + 4;
-	req->rq_slen = len;
-
-	/* if we expect a data part in the reply we set the iov's to read it */
-	if (req->rq_resp_bcc)
-		req->rq_setup_read = smb_setup_bcc;
-
-	/* This tries to support re-using the same request */
-	req->rq_bytes_sent = 0;
-	req->rq_rcls = 0;
-	req->rq_err = 0;
-	req->rq_errno = 0;
-	req->rq_fragment = 0;
-	kfree(req->rq_trans2buffer);
-	req->rq_trans2buffer = NULL;
-
-	return 0;
-}
-
-/*
- * Prepare a transaction2 request structure
- */
-static int smb_setup_trans2request(struct smb_request *req)
-{
-	struct smb_sb_info *server = req->rq_server;
-	int mparam, mdata;
-	static unsigned char padding[4];
-
-	/* I know the following is very ugly, but I want to build the
-	   smb packet as efficiently as possible. */
-
-	const int smb_parameters = 15;
-	const int header = SMB_HEADER_LEN + 2 * smb_parameters + 2;
-	const int oparam = ALIGN(header + 3, sizeof(u32));
-	const int odata  = ALIGN(oparam + req->rq_lparm, sizeof(u32));
-	const int bcc = (req->rq_data ? odata + req->rq_ldata :
-					oparam + req->rq_lparm) - header;
-
-	if ((bcc + oparam) > server->opt.max_xmit)
-		return -ENOMEM;
-	smb_setup_header(req, SMBtrans2, smb_parameters, bcc);
-
-	/*
-	 * max parameters + max data + max setup == bufsize to make NT4 happy
-	 * and not abort the transfer or split into multiple responses. It also
-	 * makes smbfs happy as handling packets larger than the buffer size
-	 * is extra work.
-	 *
-	 * OS/2 is probably going to hate me for this ...
-	 */
-	mparam = SMB_TRANS2_MAX_PARAM;
-	mdata = req->rq_bufsize - mparam;
-
-	mdata = server->opt.max_xmit - mparam - 100;
-	if (mdata < 1024) {
-		mdata = 1024;
-		mparam = 20;
-	}
-
-#if 0
-	/* NT/win2k has ~4k max_xmit, so with this we request more than it wants
-	   to return as one SMB. Useful for testing the fragmented trans2
-	   handling. */
-	mdata = 8192;
-#endif
-
-	WSET(req->rq_header, smb_tpscnt, req->rq_lparm);
-	WSET(req->rq_header, smb_tdscnt, req->rq_ldata);
-	WSET(req->rq_header, smb_mprcnt, mparam);
-	WSET(req->rq_header, smb_mdrcnt, mdata);
-	WSET(req->rq_header, smb_msrcnt, 0);    /* max setup always 0 ? */
-	WSET(req->rq_header, smb_flags, 0);
-	DSET(req->rq_header, smb_timeout, 0);
-	WSET(req->rq_header, smb_pscnt, req->rq_lparm);
-	WSET(req->rq_header, smb_psoff, oparam - 4);
-	WSET(req->rq_header, smb_dscnt, req->rq_ldata);
-	WSET(req->rq_header, smb_dsoff, req->rq_data ? odata - 4 : 0);
-	*(req->rq_header + smb_suwcnt) = 0x01;          /* setup count */
-	*(req->rq_header + smb_suwcnt + 1) = 0x00;      /* reserved */
-	WSET(req->rq_header, smb_setup0, req->rq_trans2_command);
-
-	req->rq_iovlen = 2;
-	req->rq_iov[0].iov_base = (void *) req->rq_header;
-	req->rq_iov[0].iov_len = oparam;
-	req->rq_iov[1].iov_base = (req->rq_parm==NULL) ? padding : req->rq_parm;
-	req->rq_iov[1].iov_len = req->rq_lparm;
-	req->rq_slen = oparam + req->rq_lparm;
-
-	if (req->rq_data) {
-		req->rq_iovlen += 2;
-		req->rq_iov[2].iov_base = padding;
-		req->rq_iov[2].iov_len = odata - oparam - req->rq_lparm;
-		req->rq_iov[3].iov_base = req->rq_data;
-		req->rq_iov[3].iov_len = req->rq_ldata;
-		req->rq_slen = odata + req->rq_ldata;
-	}
-
-	/* always a data part for trans2 replies */
-	req->rq_setup_read = smb_setup_bcc;
-
-	return 0;
-}
-
-/*
- * Add a request and tell smbiod to process it
- */
-int smb_add_request(struct smb_request *req)
-{
-	long timeleft;
-	struct smb_sb_info *server = req->rq_server;
-	int result = 0;
-
-	smb_setup_request(req);
-	if (req->rq_trans2_command) {
-		if (req->rq_buffer == NULL) {
-			PARANOIA("trans2 attempted without response buffer!\n");
-			return -EIO;
-		}
-		result = smb_setup_trans2request(req);
-	}
-	if (result < 0)
-		return result;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
-	add_xmit_stats(req);
-#endif
-
-	/* add 'req' to the queue of requests */
-	if (smb_lock_server_interruptible(server))
-		return -EINTR;
-
-	/*
-	 * Try to send the request as the process. If that fails we queue the
-	 * request and let smbiod send it later.
-	 */
-
-	/* FIXME: each server has a number on the maximum number of parallel
-	   requests. 10, 50 or so. We should not allow more requests to be
-	   active. */
-	if (server->mid > 0xf000)
-		server->mid = 0;
-	req->rq_mid = server->mid++;
-	WSET(req->rq_header, smb_mid, req->rq_mid);
-
-	result = 0;
-	if (server->state == CONN_VALID) {
-		if (list_empty(&server->xmitq))
-			result = smb_request_send_req(req);
-		if (result < 0) {
-			/* Connection lost? */
-			server->conn_error = result;
-			server->state = CONN_INVALID;
-		}
-	}
-	if (result != 1)
-		list_add_tail(&req->rq_queue, &server->xmitq);
-	smb_rget(req);
-
-	if (server->state != CONN_VALID)
-		smbiod_retry(server);
-
-	smb_unlock_server(server);
-
-	smbiod_wake_up();
-
-	timeleft = wait_event_interruptible_timeout(req->rq_wait,
-				    req->rq_flags & SMB_REQ_RECEIVED, 30*HZ);
-	if (!timeleft || signal_pending(current)) {
-		/*
-		 * On timeout or on interrupt we want to try and remove the
-		 * request from the recvq/xmitq.
-		 * First check if the request is still part of a queue. (May
-		 * have been removed by some error condition)
-		 */
-		smb_lock_server(server);
-		if (!list_empty(&req->rq_queue)) {
-			list_del_init(&req->rq_queue);
-			smb_rput(req);
-		}
-		smb_unlock_server(server);
-	}
-
-	if (!timeleft) {
-		PARANOIA("request [%p, mid=%d] timed out!\n",
-			 req, req->rq_mid);
-		VERBOSE("smb_com:  %02x\n", *(req->rq_header + smb_com));
-		VERBOSE("smb_rcls: %02x\n", *(req->rq_header + smb_rcls));
-		VERBOSE("smb_flg:  %02x\n", *(req->rq_header + smb_flg));
-		VERBOSE("smb_tid:  %04x\n", WVAL(req->rq_header, smb_tid));
-		VERBOSE("smb_pid:  %04x\n", WVAL(req->rq_header, smb_pid));
-		VERBOSE("smb_uid:  %04x\n", WVAL(req->rq_header, smb_uid));
-		VERBOSE("smb_mid:  %04x\n", WVAL(req->rq_header, smb_mid));
-		VERBOSE("smb_wct:  %02x\n", *(req->rq_header + smb_wct));
-
-		req->rq_rcls = ERRSRV;
-		req->rq_err  = ERRtimeout;
-
-		/* Just in case it was "stuck" */
-		smbiod_wake_up();
-	}
-	VERBOSE("woke up, rcls=%d\n", req->rq_rcls);
-
-	if (req->rq_rcls != 0)
-		req->rq_errno = smb_errno(req);
-	if (signal_pending(current))
-		req->rq_errno = -ERESTARTSYS;
-	return req->rq_errno;
-}
-
-/*
- * Send a request and place it on the recvq if successfully sent.
- * Must be called with the server lock held.
- */
-static int smb_request_send_req(struct smb_request *req)
-{
-	struct smb_sb_info *server = req->rq_server;
-	int result;
-
-	if (req->rq_bytes_sent == 0) {
-		WSET(req->rq_header, smb_tid, server->opt.tid);
-		WSET(req->rq_header, smb_pid, 1);
-		WSET(req->rq_header, smb_uid, server->opt.server_uid);
-	}
-
-	result = smb_send_request(req);
-	if (result < 0 && result != -EAGAIN)
-		goto out;
-
-	result = 0;
-	if (!(req->rq_flags & SMB_REQ_TRANSMITTED))
-		goto out;
-
-	list_move_tail(&req->rq_queue, &server->recvq);
-	result = 1;
-out:
-	return result;
-}
-
-/*
- * Sends one request for this server. (smbiod)
- * Must be called with the server lock held.
- * Returns: <0 on error
- *           0 if no request could be completely sent
- *           1 if all data for one request was sent
- */
-int smb_request_send_server(struct smb_sb_info *server)
-{
-	struct list_head *head;
-	struct smb_request *req;
-	int result;
-
-	if (server->state != CONN_VALID)
-		return 0;
-
-	/* dequeue first request, if any */
-	req = NULL;
-	head = server->xmitq.next;
-	if (head != &server->xmitq) {
-		req = list_entry(head, struct smb_request, rq_queue);
-	}
-	if (!req)
-		return 0;
-
-	result = smb_request_send_req(req);
-	if (result < 0) {
-		server->conn_error = result;
-		list_move(&req->rq_queue, &server->xmitq);
-		result = -EIO;
-		goto out;
-	}
-
-out:
-	return result;
-}
-
-/*
- * Try to find a request matching this "mid". Typically the first entry will
- * be the matching one.
- */
-static struct smb_request *find_request(struct smb_sb_info *server, int mid)
-{
-	struct list_head *tmp;
-	struct smb_request *req = NULL;
-
-	list_for_each(tmp, &server->recvq) {
-		req = list_entry(tmp, struct smb_request, rq_queue);
-		if (req->rq_mid == mid) {
-			break;
-		}
-		req = NULL;
-	}
-
-	if (!req) {
-		VERBOSE("received reply with mid %d but no request!\n",
-			WVAL(server->header, smb_mid));
-		server->rstate = SMB_RECV_DROP;
-	}
-
-	return req;
-}
-
-/*
- * Called when we have read the smb header and believe this is a response.
- */
-static int smb_init_request(struct smb_sb_info *server, struct smb_request *req)
-{
-	int hdrlen, wct;
-
-	memcpy(req->rq_header, server->header, SMB_HEADER_LEN);
-
-	wct = *(req->rq_header + smb_wct);
-	if (wct > 20) {	
-		PARANOIA("wct too large, %d > 20\n", wct);
-		server->rstate = SMB_RECV_DROP;
-		return 0;
-	}
-
-	req->rq_resp_wct = wct;
-	hdrlen = SMB_HEADER_LEN + wct*2 + 2;
-	VERBOSE("header length: %d   smb_wct: %2d\n", hdrlen, wct);
-
-	req->rq_bytes_recvd = SMB_HEADER_LEN;
-	req->rq_rlen = hdrlen;
-	req->rq_iov[0].iov_base = req->rq_header;
-	req->rq_iov[0].iov_len  = hdrlen;
-	req->rq_iovlen = 1;
-	server->rstate = SMB_RECV_PARAM;
-
-#ifdef SMB_DEBUG_PACKET_SIZE
-	add_recv_stats(smb_len(server->header));
-#endif
-	return 0;
-}
-
-/*
- * Reads the SMB parameters
- */
-static int smb_recv_param(struct smb_sb_info *server, struct smb_request *req)
-{
-	int result;
-
-	result = smb_receive(server, req);
-	if (result < 0)
-		return result;
-	if (req->rq_bytes_recvd < req->rq_rlen)
-		return 0;
-
-	VERBOSE("result: %d   smb_bcc:  %04x\n", result,
-		WVAL(req->rq_header, SMB_HEADER_LEN +
-		     (*(req->rq_header + smb_wct) * 2)));
-
-	result = 0;
-	req->rq_iov[0].iov_base = NULL;
-	req->rq_rlen = 0;
-	if (req->rq_callback)
-		req->rq_callback(req);
-	else if (req->rq_setup_read)
-		result = req->rq_setup_read(req);
-	if (result < 0) {
-		server->rstate = SMB_RECV_DROP;
-		return result;
-	}
-
-	server->rstate = req->rq_rlen > 0 ? SMB_RECV_DATA : SMB_RECV_END;
-
-	req->rq_bytes_recvd = 0;	// recvd out of the iov
-
-	VERBOSE("rlen: %d\n", req->rq_rlen);
-	if (req->rq_rlen < 0) {
-		PARANOIA("Parameters read beyond end of packet!\n");
-		server->rstate = SMB_RECV_END;
-		return -EIO;
-	}
-	return 0;
-}
-
-/*
- * Reads the SMB data
- */
-static int smb_recv_data(struct smb_sb_info *server, struct smb_request *req)
-{
-	int result;
-
-	result = smb_receive(server, req);
-	if (result < 0)
-		goto out;
-	if (req->rq_bytes_recvd < req->rq_rlen)
-		goto out;
-	server->rstate = SMB_RECV_END;
-out:
-	VERBOSE("result: %d\n", result);
-	return result;
-}
-
-/*
- * Receive a transaction2 response
- * Return: 0 if the response has been fully read
- *         1 if there are further "fragments" to read
- *        <0 if there is an error
- */
-static int smb_recv_trans2(struct smb_sb_info *server, struct smb_request *req)
-{
-	unsigned char *inbuf;
-	unsigned int parm_disp, parm_offset, parm_count, parm_tot;
-	unsigned int data_disp, data_offset, data_count, data_tot;
-	int hdrlen = SMB_HEADER_LEN + req->rq_resp_wct*2 - 2;
-
-	VERBOSE("handling trans2\n");
-
-	inbuf = req->rq_header;
-	data_tot    = WVAL(inbuf, smb_tdrcnt);
-	parm_tot    = WVAL(inbuf, smb_tprcnt);
-	parm_disp   = WVAL(inbuf, smb_prdisp);
-	parm_offset = WVAL(inbuf, smb_proff);
-	parm_count  = WVAL(inbuf, smb_prcnt);
-	data_disp   = WVAL(inbuf, smb_drdisp);
-	data_offset = WVAL(inbuf, smb_droff);
-	data_count  = WVAL(inbuf, smb_drcnt);
-
-	/* Modify offset for the split header/buffer we use */
-	if (data_count || data_offset) {
-		if (unlikely(data_offset < hdrlen))
-			goto out_bad_data;
-		else
-			data_offset -= hdrlen;
-	}
-	if (parm_count || parm_offset) {
-		if (unlikely(parm_offset < hdrlen))
-			goto out_bad_parm;
-		else
-			parm_offset -= hdrlen;
-	}
-
-	if (parm_count == parm_tot && data_count == data_tot) {
-		/*
-		 * This packet has all the trans2 data.
-		 *
-		 * We setup the request so that this will be the common
-		 * case. It may be a server error to not return a
-		 * response that fits.
-		 */
-		VERBOSE("single trans2 response  "
-			"dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
-			data_count, parm_count,
-			data_offset, parm_offset);
-		req->rq_ldata = data_count;
-		req->rq_lparm = parm_count;
-		req->rq_data = req->rq_buffer + data_offset;
-		req->rq_parm = req->rq_buffer + parm_offset;
-		if (unlikely(parm_offset + parm_count > req->rq_rlen))
-			goto out_bad_parm;
-		if (unlikely(data_offset + data_count > req->rq_rlen))
-			goto out_bad_data;
-		return 0;
-	}
-
-	VERBOSE("multi trans2 response  "
-		"frag=%d, dcnt=%u, pcnt=%u, doff=%u, poff=%u\n",
-		req->rq_fragment,
-		data_count, parm_count,
-		data_offset, parm_offset);
-
-	if (!req->rq_fragment) {
-		int buf_len;
-
-		/* We got the first trans2 fragment */
-		req->rq_fragment = 1;
-		req->rq_total_data = data_tot;
-		req->rq_total_parm = parm_tot;
-		req->rq_ldata = 0;
-		req->rq_lparm = 0;
-
-		buf_len = data_tot + parm_tot;
-		if (buf_len > SMB_MAX_PACKET_SIZE)
-			goto out_too_long;
-
-		req->rq_trans2bufsize = buf_len;
-		req->rq_trans2buffer = kzalloc(buf_len, GFP_NOFS);
-		if (!req->rq_trans2buffer)
-			goto out_no_mem;
-
-		req->rq_parm = req->rq_trans2buffer;
-		req->rq_data = req->rq_trans2buffer + parm_tot;
-	} else if (unlikely(req->rq_total_data < data_tot ||
-			    req->rq_total_parm < parm_tot))
-		goto out_data_grew;
-
-	if (unlikely(parm_disp + parm_count > req->rq_total_parm ||
-		     parm_offset + parm_count > req->rq_rlen))
-		goto out_bad_parm;
-	if (unlikely(data_disp + data_count > req->rq_total_data ||
-		     data_offset + data_count > req->rq_rlen))
-		goto out_bad_data;
-
-	inbuf = req->rq_buffer;
-	memcpy(req->rq_parm + parm_disp, inbuf + parm_offset, parm_count);
-	memcpy(req->rq_data + data_disp, inbuf + data_offset, data_count);
-
-	req->rq_ldata += data_count;
-	req->rq_lparm += parm_count;
-
-	/*
-	 * Check whether we've received all of the data. Note that
-	 * we use the packet totals -- total lengths might shrink!
-	 */
-	if (req->rq_ldata >= data_tot && req->rq_lparm >= parm_tot) {
-		req->rq_ldata = data_tot;
-		req->rq_lparm = parm_tot;
-		return 0;
-	}
-	return 1;
-
-out_too_long:
-	printk(KERN_ERR "smb_trans2: data/param too long, data=%u, parm=%u\n",
-		data_tot, parm_tot);
-	goto out_EIO;
-out_no_mem:
-	printk(KERN_ERR "smb_trans2: couldn't allocate data area of %d bytes\n",
-	       req->rq_trans2bufsize);
-	req->rq_errno = -ENOMEM;
-	goto out;
-out_data_grew:
-	printk(KERN_ERR "smb_trans2: data/params grew!\n");
-	goto out_EIO;
-out_bad_parm:
-	printk(KERN_ERR "smb_trans2: invalid parms, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
-	       parm_disp, parm_count, parm_tot, parm_offset);
-	goto out_EIO;
-out_bad_data:
-	printk(KERN_ERR "smb_trans2: invalid data, disp=%u, cnt=%u, tot=%u, ofs=%u\n",
-	       data_disp, data_count, data_tot, data_offset);
-out_EIO:
-	req->rq_errno = -EIO;
-out:
-	return req->rq_errno;
-}
-
-/*
- * State machine for receiving responses. We handle the fact that we can't
- * read the full response in one try by having states telling us how much we
- * have read.
- *
- * Must be called with the server lock held (only called from smbiod).
- *
- * Return: <0 on error
- */
-int smb_request_recv(struct smb_sb_info *server)
-{
-	struct smb_request *req = NULL;
-	int result = 0;
-
-	if (smb_recv_available(server) <= 0)
-		return 0;
-
-	VERBOSE("state: %d\n", server->rstate);
-	switch (server->rstate) {
-	case SMB_RECV_DROP:
-		result = smb_receive_drop(server);
-		if (result < 0)
-			break;
-		if (server->rstate == SMB_RECV_DROP)
-			break;
-		server->rstate = SMB_RECV_START;
-		/* fallthrough */
-	case SMB_RECV_START:
-		server->smb_read = 0;
-		server->rstate = SMB_RECV_HEADER;
-		/* fallthrough */
-	case SMB_RECV_HEADER:
-		result = smb_receive_header(server);
-		if (result < 0)
-			break;
-		if (server->rstate == SMB_RECV_HEADER)
-			break;
-		if (! (*(server->header + smb_flg) & SMB_FLAGS_REPLY) ) {
-			server->rstate = SMB_RECV_REQUEST;
-			break;
-		}
-		if (server->rstate != SMB_RECV_HCOMPLETE)
-			break;
-		/* fallthrough */
-	case SMB_RECV_HCOMPLETE:
-		req = find_request(server, WVAL(server->header, smb_mid));
-		if (!req)
-			break;
-		smb_init_request(server, req);
-		req->rq_rcls = *(req->rq_header + smb_rcls);
-		req->rq_err  = WVAL(req->rq_header, smb_err);
-		if (server->rstate != SMB_RECV_PARAM)
-			break;
-		/* fallthrough */
-	case SMB_RECV_PARAM:
-		if (!req)
-			req = find_request(server,WVAL(server->header,smb_mid));
-		if (!req)
-			break;
-		result = smb_recv_param(server, req);
-		if (result < 0)
-			break;
-		if (server->rstate != SMB_RECV_DATA)
-			break;
-		/* fallthrough */
-	case SMB_RECV_DATA:
-		if (!req)
-			req = find_request(server,WVAL(server->header,smb_mid));
-		if (!req)
-			break;
-		result = smb_recv_data(server, req);
-		if (result < 0)
-			break;
-		break;
-
-		/* We should never be called with any of these states */
-	case SMB_RECV_END:
-	case SMB_RECV_REQUEST:
-		BUG();
-	}
-
-	if (result < 0) {
-		/* We saw an error */
-		return result;
-	}
-
-	if (server->rstate != SMB_RECV_END)
-		return 0;
-
-	result = 0;
-	if (req->rq_trans2_command && req->rq_rcls == SUCCESS)
-		result = smb_recv_trans2(server, req);
-
-	/*
-	 * Response completely read. Drop any extra bytes sent by the server.
-	 * (Yes, servers sometimes add extra bytes to responses)
-	 */
-	VERBOSE("smb_len: %d   smb_read: %d\n",
-		server->smb_len, server->smb_read);
-	if (server->smb_read < server->smb_len)
-		smb_receive_drop(server);
-
-	server->rstate = SMB_RECV_START;
-
-	if (!result) {
-		list_del_init(&req->rq_queue);
-		req->rq_flags |= SMB_REQ_RECEIVED;
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-	return 0;
-}
diff --git a/fs/smbfs/request.h b/fs/smbfs/request.h
deleted file mode 100644
index efb21451e7c..00000000000
--- a/fs/smbfs/request.h
+++ /dev/null
@@ -1,70 +0,0 @@
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/uio.h>
-#include <linux/wait.h>
-
-struct smb_request {
-	struct list_head rq_queue;	/* recvq or xmitq for the server */
-
-	atomic_t rq_count;
-
-	wait_queue_head_t rq_wait;
-	int rq_flags;
-	int rq_mid;	/* multiplex ID, set by request.c */
-
-	struct smb_sb_info *rq_server;
-
-	/* header + word count + parameter words + byte count */
-	unsigned char rq_header[SMB_HEADER_LEN + 20*2 + 2];
-
-	int rq_bufsize;
-	unsigned char *rq_buffer;
-
-	/* FIXME: this is not good enough for merging IO requests. */
-	unsigned char *rq_page;
-	int rq_rsize;
-
-	int rq_resp_wct;
-	int rq_resp_bcc;
-
-	int rq_rlen;
-	int rq_bytes_recvd;
-
-	int rq_slen;
-	int rq_bytes_sent;
-
-	int rq_iovlen;
-	struct kvec rq_iov[4];
-
-	int (*rq_setup_read) (struct smb_request *);
-	void (*rq_callback) (struct smb_request *);
-
-	/* ------ trans2 stuff ------ */
-
-	u16 rq_trans2_command;	/* 0 if not a trans2 request */
-	unsigned int rq_ldata;
-	unsigned char *rq_data;
-	unsigned int rq_lparm;
-	unsigned char *rq_parm;
-
-	int rq_fragment;
-	u32 rq_total_data;
-	u32 rq_total_parm;
-	int rq_trans2bufsize;
-	unsigned char *rq_trans2buffer;
-
-	/* ------ response ------ */
-
-	unsigned short rq_rcls;
-	unsigned short rq_err;
-	int rq_errno;
-};
-
-#define SMB_REQ_STATIC		0x0001	/* rq_buffer is static */
-#define SMB_REQ_NORETRY		0x0002	/* request is invalid after retry */
-
-#define SMB_REQ_TRANSMITTED	0x4000	/* all data has been sent */
-#define SMB_REQ_RECEIVED	0x8000	/* reply received, smbiod is done */
-
-#define xSMB_REQ_NOREPLY	0x0004	/* we don't want the reply (if any) */
-#define xSMB_REQ_NORECEIVER	0x0008	/* caller doesn't wait for response */
diff --git a/fs/smbfs/smb_debug.h b/fs/smbfs/smb_debug.h
deleted file mode 100644
index fc4b1a5dd75..00000000000
--- a/fs/smbfs/smb_debug.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Defines some debug macros for smbfs.
- */
-
-/* This makes a dentry parent/child name pair. Useful for debugging printk's */
-#define DENTRY_PATH(dentry) \
-	(dentry)->d_parent->d_name.name,(dentry)->d_name.name
-
-/*
- * safety checks that should never happen ???
- * these are normally enabled.
- */
-#ifdef SMBFS_PARANOIA
-# define PARANOIA(f, a...) printk(KERN_NOTICE "%s: " f, __func__ , ## a)
-#else
-# define PARANOIA(f, a...) do { ; } while(0)
-#endif
-
-/* lots of debug messages */
-#ifdef SMBFS_DEBUG_VERBOSE
-# define VERBOSE(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-# define VERBOSE(f, a...) do { ; } while(0)
-#endif
-
-/*
- * "normal" debug messages, but not with a normal DEBUG define ... way
- * too common name.
- */
-#ifdef SMBFS_DEBUG
-#define DEBUG1(f, a...) printk(KERN_DEBUG "%s: " f, __func__ , ## a)
-#else
-#define DEBUG1(f, a...) do { ; } while(0)
-#endif
diff --git a/fs/smbfs/smbiod.c b/fs/smbfs/smbiod.c
deleted file mode 100644
index 0e39a924f10..00000000000
--- a/fs/smbfs/smbiod.c
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- *  smbiod.c
- *
- *  Copyright (C) 2000, Charles Loep / Corel Corp.
- *  Copyright (C) 2001, Urban Widmark
- */
-
-
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/file.h>
-#include <linux/dcache.h>
-#include <linux/module.h>
-#include <linux/net.h>
-#include <linux/kthread.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smbno.h>
-#include <linux/smb_mount.h>
-
-#include <asm/system.h>
-#include <asm/uaccess.h>
-
-#include "smb_debug.h"
-#include "request.h"
-#include "proto.h"
-
-enum smbiod_state {
-	SMBIOD_DEAD,
-	SMBIOD_STARTING,
-	SMBIOD_RUNNING,
-};
-
-static enum smbiod_state smbiod_state = SMBIOD_DEAD;
-static struct task_struct *smbiod_thread;
-static DECLARE_WAIT_QUEUE_HEAD(smbiod_wait);
-static LIST_HEAD(smb_servers);
-static DEFINE_SPINLOCK(servers_lock);
-
-#define SMBIOD_DATA_READY	(1<<0)
-static unsigned long smbiod_flags;
-
-static int smbiod(void *);
-static int smbiod_start(void);
-
-/*
- * called when there's work for us to do
- */
-void smbiod_wake_up(void)
-{
-	if (smbiod_state == SMBIOD_DEAD)
-		return;
-	set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-	wake_up_interruptible(&smbiod_wait);
-}
-
-/*
- * start smbiod if none is running
- */
-static int smbiod_start(void)
-{
-	struct task_struct *tsk;
-	int err = 0;
-
-	if (smbiod_state != SMBIOD_DEAD)
-		return 0;
-	smbiod_state = SMBIOD_STARTING;
-	__module_get(THIS_MODULE);
-	spin_unlock(&servers_lock);
-	tsk = kthread_run(smbiod, NULL, "smbiod");
-	if (IS_ERR(tsk)) {
-		err = PTR_ERR(tsk);
-		module_put(THIS_MODULE);
-	}
-
-	spin_lock(&servers_lock);
-	if (err < 0) {
-		smbiod_state = SMBIOD_DEAD;
-		smbiod_thread = NULL;
-	} else {
-		smbiod_state = SMBIOD_RUNNING;
-		smbiod_thread = tsk;
-	}
-	return err;
-}
-
-/*
- * register a server & start smbiod if necessary
- */
-int smbiod_register_server(struct smb_sb_info *server)
-{
-	int ret;
-	spin_lock(&servers_lock);
-	list_add(&server->entry, &smb_servers);
-	VERBOSE("%p\n", server);
-	ret = smbiod_start();
-	spin_unlock(&servers_lock);
-	return ret;
-}
-
-/*
- * Unregister a server
- * Must be called with the server lock held.
- */
-void smbiod_unregister_server(struct smb_sb_info *server)
-{
-	spin_lock(&servers_lock);
-	list_del_init(&server->entry);
-	VERBOSE("%p\n", server);
-	spin_unlock(&servers_lock);
-
-	smbiod_wake_up();
-	smbiod_flush(server);
-}
-
-void smbiod_flush(struct smb_sb_info *server)
-{
-	struct list_head *tmp, *n;
-	struct smb_request *req;
-
-	list_for_each_safe(tmp, n, &server->xmitq) {
-		req = list_entry(tmp, struct smb_request, rq_queue);
-		req->rq_errno = -EIO;
-		list_del_init(&req->rq_queue);
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-	list_for_each_safe(tmp, n, &server->recvq) {
-		req = list_entry(tmp, struct smb_request, rq_queue);
-		req->rq_errno = -EIO;
-		list_del_init(&req->rq_queue);
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-}
-
-/*
- * Wake up smbmount and make it reconnect to the server.
- * This must be called with the server locked.
- *
- * FIXME: add smbconnect version to this
- */
-int smbiod_retry(struct smb_sb_info *server)
-{
-	struct list_head *head;
-	struct smb_request *req;
-	struct pid *pid = get_pid(server->conn_pid);
-	int result = 0;
-
-	VERBOSE("state: %d\n", server->state);
-	if (server->state == CONN_VALID || server->state == CONN_RETRYING)
-		goto out;
-
-	smb_invalidate_inodes(server);
-
-	/*
-	 * Some requests are meaningless after a retry, so we abort them.
-	 * One example are all requests using 'fileid' since the files are
-	 * closed on retry.
-	 */
-	head = server->xmitq.next;
-	while (head != &server->xmitq) {
-		req = list_entry(head, struct smb_request, rq_queue);
-		head = head->next;
-
-		req->rq_bytes_sent = 0;
-		if (req->rq_flags & SMB_REQ_NORETRY) {
-			VERBOSE("aborting request %p on xmitq\n", req);
-			req->rq_errno = -EIO;
-			list_del_init(&req->rq_queue);
-			smb_rput(req);
-			wake_up_interruptible(&req->rq_wait);
-		}
-	}
-
-	/*
-	 * FIXME: test the code for retrying request we already sent
-	 */
-	head = server->recvq.next;
-	while (head != &server->recvq) {
-		req = list_entry(head, struct smb_request, rq_queue);
-		head = head->next;
-#if 0
-		if (req->rq_flags & SMB_REQ_RETRY) {
-			/* must move the request to the xmitq */
-			VERBOSE("retrying request %p on recvq\n", req);
-			list_move(&req->rq_queue, &server->xmitq);
-			continue;
-		}
-#endif
-
-		VERBOSE("aborting request %p on recvq\n", req);
-		/* req->rq_rcls = ???; */ /* FIXME: set smb error code too? */
-		req->rq_errno = -EIO;
-		list_del_init(&req->rq_queue);
-		smb_rput(req);
-		wake_up_interruptible(&req->rq_wait);
-	}
-
-	smb_close_socket(server);
-
-	if (!pid) {
-		/* FIXME: this is fatal, umount? */
-		printk(KERN_ERR "smb_retry: no connection process\n");
-		server->state = CONN_RETRIED;
-		goto out;
-	}
-
-	/*
-	 * Change state so that only one retry per server will be started.
-	 */
-	server->state = CONN_RETRYING;
-
-	/*
-	 * Note: use the "priv" flag, as a user process may need to reconnect.
-	 */
-	result = kill_pid(pid, SIGUSR1, 1);
-	if (result) {
-		/* FIXME: this is most likely fatal, umount? */
-		printk(KERN_ERR "smb_retry: signal failed [%d]\n", result);
-		goto out;
-	}
-	VERBOSE("signalled pid %d\n", pid_nr(pid));
-
-	/* FIXME: The retried requests should perhaps get a "time boost". */
-
-out:
-	put_pid(pid);
-	return result;
-}
-
-/*
- * Currently handles lockingX packets.
- */
-static void smbiod_handle_request(struct smb_sb_info *server)
-{
-	PARANOIA("smbiod got a request ... and we don't implement oplocks!\n");
-	server->rstate = SMB_RECV_DROP;
-}
-
-/*
- * Do some IO for one server.
- */
-static void smbiod_doio(struct smb_sb_info *server)
-{
-	int result;
-	int maxwork = 7;
-
-	if (server->state != CONN_VALID)
-		goto out;
-
-	do {
-		result = smb_request_recv(server);
-		if (result < 0) {
-			server->state = CONN_INVALID;
-			smbiod_retry(server);
-			goto out;	/* reconnecting is slow */
-		} else if (server->rstate == SMB_RECV_REQUEST)
-			smbiod_handle_request(server);
-	} while (result > 0 && maxwork-- > 0);
-
-	/*
-	 * If there is more to read then we want to be sure to wake up again.
-	 */
-	if (server->state != CONN_VALID)
-		goto out;
-	if (smb_recv_available(server) > 0)
-		set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-	do {
-		result = smb_request_send_server(server);
-		if (result < 0) {
-			server->state = CONN_INVALID;
-			smbiod_retry(server);
-			goto out;	/* reconnecting is slow */
-		}
-	} while (result > 0);
-
-	/*
-	 * If the last request was not sent out we want to wake up again.
-	 */
-	if (!list_empty(&server->xmitq))
-		set_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-out:
-	return;
-}
-
-/*
- * smbiod kernel thread
- */
-static int smbiod(void *unused)
-{
-	VERBOSE("SMB Kernel thread starting (%d) ...\n", current->pid);
-
-	for (;;) {
-		struct smb_sb_info *server;
-		struct list_head *pos, *n;
-
-		/* FIXME: Use poll? */
-		wait_event_interruptible(smbiod_wait,
-			 test_bit(SMBIOD_DATA_READY, &smbiod_flags));
-		if (signal_pending(current)) {
-			spin_lock(&servers_lock);
-			smbiod_state = SMBIOD_DEAD;
-			spin_unlock(&servers_lock);
-			break;
-		}
-
-		clear_bit(SMBIOD_DATA_READY, &smbiod_flags);
-
-		spin_lock(&servers_lock);
-		if (list_empty(&smb_servers)) {
-			smbiod_state = SMBIOD_DEAD;
-			spin_unlock(&servers_lock);
-			break;
-		}
-
-		list_for_each_safe(pos, n, &smb_servers) {
-			server = list_entry(pos, struct smb_sb_info, entry);
-			VERBOSE("checking server %p\n", server);
-
-			if (server->state == CONN_VALID) {
-				spin_unlock(&servers_lock);
-
-				smb_lock_server(server);
-				smbiod_doio(server);
-				smb_unlock_server(server);
-
-				spin_lock(&servers_lock);
-			}
-		}
-		spin_unlock(&servers_lock);
-	}
-
-	VERBOSE("SMB Kernel thread exiting (%d) ...\n", current->pid);
-	module_put_and_exit(0);
-}
diff --git a/fs/smbfs/sock.c b/fs/smbfs/sock.c
deleted file mode 100644
index e37fe4deebd..00000000000
--- a/fs/smbfs/sock.c
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- *  sock.c
- *
- *  Copyright (C) 1995, 1996 by Paal-Kr. Engstad and Volker Lendecke
- *  Copyright (C) 1997 by Volker Lendecke
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/fs.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-#include <linux/fcntl.h>
-#include <linux/file.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/mm.h>
-#include <linux/netdevice.h>
-#include <linux/workqueue.h>
-#include <net/scm.h>
-#include <net/tcp_states.h>
-#include <net/ip.h>
-
-#include <linux/smb_fs.h>
-#include <linux/smb.h>
-#include <linux/smbno.h>
-
-#include <asm/uaccess.h>
-#include <asm/ioctls.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-#include "request.h"
-
-
-static int
-_recvfrom(struct socket *socket, unsigned char *ubuf, int size, unsigned flags)
-{
-	struct kvec iov = {ubuf, size};
-	struct msghdr msg = {.msg_flags = flags};
-	msg.msg_flags |= MSG_DONTWAIT | MSG_NOSIGNAL;
-	return kernel_recvmsg(socket, &msg, &iov, 1, size, msg.msg_flags);
-}
-
-/*
- * Return the server this socket belongs to
- */
-static struct smb_sb_info *
-server_from_socket(struct socket *socket)
-{
-	return socket->sk->sk_user_data;
-}
-
-/*
- * Called when there is data on the socket.
- */
-void
-smb_data_ready(struct sock *sk, int len)
-{
-	struct smb_sb_info *server = server_from_socket(sk->sk_socket);
-	void (*data_ready)(struct sock *, int) = server->data_ready;
-
-	data_ready(sk, len);
-	VERBOSE("(%p, %d)\n", sk, len);
-	smbiod_wake_up();
-}
-
-int
-smb_valid_socket(struct inode * inode)
-{
-	return (inode && S_ISSOCK(inode->i_mode) && 
-		SOCKET_I(inode)->type == SOCK_STREAM);
-}
-
-static struct socket *
-server_sock(struct smb_sb_info *server)
-{
-	struct file *file;
-
-	if (server && (file = server->sock_file))
-	{
-#ifdef SMBFS_PARANOIA
-		if (!smb_valid_socket(file->f_path.dentry->d_inode))
-			PARANOIA("bad socket!\n");
-#endif
-		return SOCKET_I(file->f_path.dentry->d_inode);
-	}
-	return NULL;
-}
-
-void
-smb_close_socket(struct smb_sb_info *server)
-{
-	struct file * file = server->sock_file;
-
-	if (file) {
-		struct socket *sock = server_sock(server);
-
-		VERBOSE("closing socket %p\n", sock);
-		sock->sk->sk_data_ready = server->data_ready;
-		server->sock_file = NULL;
-		fput(file);
-	}
-}
-
-static int
-smb_get_length(struct socket *socket, unsigned char *header)
-{
-	int result;
-
-	result = _recvfrom(socket, header, 4, MSG_PEEK);
-	if (result == -EAGAIN)
-		return -ENODATA;
-	if (result < 0) {
-		PARANOIA("recv error = %d\n", -result);
-		return result;
-	}
-	if (result < 4)
-		return -ENODATA;
-
-	switch (header[0]) {
-	case 0x00:
-	case 0x82:
-		break;
-
-	case 0x85:
-		DEBUG1("Got SESSION KEEP ALIVE\n");
-		_recvfrom(socket, header, 4, 0);	/* read away */
-		return -ENODATA;
-
-	default:
-		PARANOIA("Invalid NBT packet, code=%x\n", header[0]);
-		return -EIO;
-	}
-
-	/* The length in the RFC NB header is the raw data length */
-	return smb_len(header);
-}
-
-int
-smb_recv_available(struct smb_sb_info *server)
-{
-	mm_segment_t oldfs;
-	int avail, err;
-	struct socket *sock = server_sock(server);
-
-	oldfs = get_fs();
-	set_fs(get_ds());
-	err = sock->ops->ioctl(sock, SIOCINQ, (unsigned long) &avail);
-	set_fs(oldfs);
-	return (err >= 0) ? avail : err;
-}
-
-/*
- * Adjust the kvec to move on 'n' bytes (from nfs/sunrpc)
- */
-static int
-smb_move_iov(struct kvec **data, size_t *num, struct kvec *vec, unsigned amount)
-{
-	struct kvec *iv = *data;
-	int i;
-	int len;
-
-	/*
-	 *	Eat any sent kvecs
-	 */
-	while (iv->iov_len <= amount) {
-		amount -= iv->iov_len;
-		iv++;
-		(*num)--;
-	}
-
-	/*
-	 *	And chew down the partial one
-	 */
-	vec[0].iov_len = iv->iov_len-amount;
-	vec[0].iov_base =((unsigned char *)iv->iov_base)+amount;
-	iv++;
-
-	len = vec[0].iov_len;
-
-	/*
-	 *	And copy any others
-	 */
-	for (i = 1; i < *num; i++) {
-		vec[i] = *iv++;
-		len += vec[i].iov_len;
-	}
-
-	*data = vec;
-	return len;
-}
-
-/*
- * smb_receive_header
- * Only called by the smbiod thread.
- */
-int
-smb_receive_header(struct smb_sb_info *server)
-{
-	struct socket *sock;
-	int result = 0;
-	unsigned char peek_buf[4];
-
-	result = -EIO; 
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	if (!server->smb_read) {
-		result = smb_get_length(sock, peek_buf);
-		if (result < 0) {
-			if (result == -ENODATA)
-				result = 0;
-			goto out;
-		}
-		server->smb_len = result + 4;
-
-		if (server->smb_len < SMB_HEADER_LEN) {
-			PARANOIA("short packet: %d\n", result);
-			server->rstate = SMB_RECV_DROP;
-			result = -EIO;
-			goto out;
-		}
-		if (server->smb_len > SMB_MAX_PACKET_SIZE) {
-			PARANOIA("long packet: %d\n", result);
-			server->rstate = SMB_RECV_DROP;
-			result = -EIO;
-			goto out;
-		}
-	}
-
-	result = _recvfrom(sock, server->header + server->smb_read,
-			   SMB_HEADER_LEN - server->smb_read, 0);
-	VERBOSE("_recvfrom: %d\n", result);
-	if (result < 0) {
-		VERBOSE("receive error: %d\n", result);
-		goto out;
-	}
-	server->smb_read += result;
-
-	if (server->smb_read == SMB_HEADER_LEN)
-		server->rstate = SMB_RECV_HCOMPLETE;
-out:
-	return result;
-}
-
-static char drop_buffer[PAGE_SIZE];
-
-/*
- * smb_receive_drop - read and throw away the data
- * Only called by the smbiod thread.
- *
- * FIXME: we are in the kernel, could we just tell the socket that we want
- * to drop stuff from the buffer?
- */
-int
-smb_receive_drop(struct smb_sb_info *server)
-{
-	struct socket *sock;
-	unsigned int flags;
-	struct kvec iov;
-	struct msghdr msg;
-	int rlen = smb_len(server->header) - server->smb_read + 4;
-	int result = -EIO;
-
-	if (rlen > PAGE_SIZE)
-		rlen = PAGE_SIZE;
-
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	flags = MSG_DONTWAIT | MSG_NOSIGNAL;
-	iov.iov_base = drop_buffer;
-	iov.iov_len = PAGE_SIZE;
-	msg.msg_flags = flags;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-
-	result = kernel_recvmsg(sock, &msg, &iov, 1, rlen, flags);
-
-	VERBOSE("read: %d\n", result);
-	if (result < 0) {
-		VERBOSE("receive error: %d\n", result);
-		goto out;
-	}
-	server->smb_read += result;
-
-	if (server->smb_read >= server->smb_len)
-		server->rstate = SMB_RECV_END;
-
-out:
-	return result;
-}
-
-/*
- * smb_receive
- * Only called by the smbiod thread.
- */
-int
-smb_receive(struct smb_sb_info *server, struct smb_request *req)
-{
-	struct socket *sock;
-	unsigned int flags;
-	struct kvec iov[4];
-	struct kvec *p = req->rq_iov;
-	size_t num = req->rq_iovlen;
-	struct msghdr msg;
-	int rlen;
-	int result = -EIO;
-
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	flags = MSG_DONTWAIT | MSG_NOSIGNAL;
-	msg.msg_flags = flags;
-	msg.msg_name = NULL;
-	msg.msg_namelen = 0;
-	msg.msg_control = NULL;
-
-	/* Dont repeat bytes and count available bufferspace */
-	rlen = min_t(int, smb_move_iov(&p, &num, iov, req->rq_bytes_recvd),
-			(req->rq_rlen - req->rq_bytes_recvd));
-
-	result = kernel_recvmsg(sock, &msg, p, num, rlen, flags);
-
-	VERBOSE("read: %d\n", result);
-	if (result < 0) {
-		VERBOSE("receive error: %d\n", result);
-		goto out;
-	}
-	req->rq_bytes_recvd += result;
-	server->smb_read += result;
-
-out:
-	return result;
-}
-
-/*
- * Try to send a SMB request. This may return after sending only parts of the
- * request. SMB_REQ_TRANSMITTED will be set if a request was fully sent.
- *
- * Parts of this was taken from xprt_sendmsg from net/sunrpc/xprt.c
- */
-int
-smb_send_request(struct smb_request *req)
-{
-	struct smb_sb_info *server = req->rq_server;
-	struct socket *sock;
-	struct msghdr msg = {.msg_flags = MSG_NOSIGNAL | MSG_DONTWAIT};
-        int slen = req->rq_slen - req->rq_bytes_sent;
-	int result = -EIO;
-	struct kvec iov[4];
-	struct kvec *p = req->rq_iov;
-	size_t num = req->rq_iovlen;
-
-	sock = server_sock(server);
-	if (!sock)
-		goto out;
-	if (sock->sk->sk_state != TCP_ESTABLISHED)
-		goto out;
-
-	/* Dont repeat bytes */
-	if (req->rq_bytes_sent)
-		smb_move_iov(&p, &num, iov, req->rq_bytes_sent);
-
-	result = kernel_sendmsg(sock, &msg, p, num, slen);
-
-	if (result >= 0) {
-		req->rq_bytes_sent += result;
-		if (req->rq_bytes_sent >= req->rq_slen)
-			req->rq_flags |= SMB_REQ_TRANSMITTED;
-	}
-out:
-	return result;
-}
diff --git a/fs/smbfs/symlink.c b/fs/smbfs/symlink.c
deleted file mode 100644
index 00b2909bd46..00000000000
--- a/fs/smbfs/symlink.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- *  symlink.c
- *
- *  Copyright (C) 2002 by John Newbigin
- *
- *  Please add a note about your changes to smbfs in the ChangeLog file.
- */
-
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/fcntl.h>
-#include <linux/stat.h>
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/net.h>
-#include <linux/namei.h>
-
-#include <asm/uaccess.h>
-#include <asm/system.h>
-
-#include <linux/smbno.h>
-#include <linux/smb_fs.h>
-
-#include "smb_debug.h"
-#include "proto.h"
-
-int smb_symlink(struct inode *inode, struct dentry *dentry, const char *oldname)
-{
-	DEBUG1("create symlink %s -> %s/%s\n", oldname, DENTRY_PATH(dentry));
-
-	return smb_proc_symlink(server_from_dentry(dentry), dentry, oldname);
-}
-
-static void *smb_follow_link(struct dentry *dentry, struct nameidata *nd)
-{
-	char *link = __getname();
-	DEBUG1("followlink of %s/%s\n", DENTRY_PATH(dentry));
-
-	if (!link) {
-		link = ERR_PTR(-ENOMEM);
-	} else {
-		int len = smb_proc_read_link(server_from_dentry(dentry),
-						dentry, link, PATH_MAX - 1);
-		if (len < 0) {
-			__putname(link);
-			link = ERR_PTR(len);
-		} else {
-			link[len] = 0;
-		}
-	}
-	nd_set_link(nd, link);
-	return NULL;
-}
-
-static void smb_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
-{
-	char *s = nd_get_link(nd);
-	if (!IS_ERR(s))
-		__putname(s);
-}
-
-const struct inode_operations smb_link_inode_operations =
-{
-	.readlink	= generic_readlink,
-	.follow_link	= smb_follow_link,
-	.put_link	= smb_put_link,
-};
diff --git a/fs/splice.c b/fs/splice.c
index 8f1dfaecc8f..ce2f02579e3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1311,18 +1311,6 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
 			       struct pipe_inode_info *opipe,
 			       size_t len, unsigned int flags);
-/*
- * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same
- * location, so checking ->i_pipe is not enough to verify that this is a
- * pipe.
- */
-static inline struct pipe_inode_info *pipe_info(struct inode *inode)
-{
-	if (S_ISFIFO(inode->i_mode))
-		return inode->i_pipe;
-
-	return NULL;
-}
 
 /*
  * Determine where to splice to/from.
@@ -1336,8 +1324,8 @@ static long do_splice(struct file *in, loff_t __user *off_in,
 	loff_t offset, *off;
 	long ret;
 
-	ipipe = pipe_info(in->f_path.dentry->d_inode);
-	opipe = pipe_info(out->f_path.dentry->d_inode);
+	ipipe = get_pipe_info(in);
+	opipe = get_pipe_info(out);
 
 	if (ipipe && opipe) {
 		if (off_in || off_out)
@@ -1555,7 +1543,7 @@ static long vmsplice_to_user(struct file *file, const struct iovec __user *iov,
 	int error;
 	long ret;
 
-	pipe = pipe_info(file->f_path.dentry->d_inode);
+	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;
 
@@ -1642,7 +1630,7 @@ static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov,
 	};
 	long ret;
 
-	pipe = pipe_info(file->f_path.dentry->d_inode);
+	pipe = get_pipe_info(file);
 	if (!pipe)
 		return -EBADF;
 
@@ -2022,8 +2010,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 static long do_tee(struct file *in, struct file *out, size_t len,
 		   unsigned int flags)
 {
-	struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode);
-	struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode);
+	struct pipe_inode_info *ipipe = get_pipe_info(in);
+	struct pipe_inode_info *opipe = get_pipe_info(out);
 	int ret = -EINVAL;
 
 	/*
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 07a4f115604..24de30ba34c 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -370,12 +370,10 @@ static void squashfs_put_super(struct super_block *sb)
 }
 
 
-static int squashfs_get_sb(struct file_system_type *fs_type, int flags,
-				const char *dev_name, void *data,
-				struct vfsmount *mnt)
+static struct dentry *squashfs_mount(struct file_system_type *fs_type, int flags,
+				const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, squashfs_fill_super,
-				mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, squashfs_fill_super);
 }
 
 
@@ -451,7 +449,7 @@ static void squashfs_destroy_inode(struct inode *inode)
 static struct file_system_type squashfs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "squashfs",
-	.get_sb = squashfs_get_sb,
+	.mount = squashfs_mount,
 	.kill_sb = kill_block_super,
 	.fs_flags = FS_REQUIRES_DEV
 };
diff --git a/fs/squashfs/xattr.c b/fs/squashfs/xattr.c
index 652b8541f9c..3876c36699a 100644
--- a/fs/squashfs/xattr.c
+++ b/fs/squashfs/xattr.c
@@ -158,17 +158,18 @@ static int squashfs_xattr_get(struct inode *inode, int name_index,
 					strncmp(target, name, name_size) == 0) {
 			/* found xattr */
 			if (type & SQUASHFS_XATTR_VALUE_OOL) {
-				__le64 xattr;
+				__le64 xattr_val;
+				u64 xattr;
 				/* val is a reference to the real location */
 				err = squashfs_read_metadata(sb, &val, &start,
 						&offset, sizeof(val));
 				if (err < 0)
 					goto failed;
-				err = squashfs_read_metadata(sb, &xattr, &start,
-					 &offset, sizeof(xattr));
+				err = squashfs_read_metadata(sb, &xattr_val,
+					&start, &offset, sizeof(xattr_val));
 				if (err < 0)
 					goto failed;
-				xattr = le64_to_cpu(xattr);
+				xattr = le64_to_cpu(xattr_val);
 				start = SQUASHFS_XATTR_BLK(xattr) +
 							msblk->xattr_table;
 				offset = SQUASHFS_XATTR_OFFSET(xattr);
diff --git a/fs/squashfs/xattr.h b/fs/squashfs/xattr.h
index 49fe0d719fb..b634efce4bd 100644
--- a/fs/squashfs/xattr.h
+++ b/fs/squashfs/xattr.h
@@ -25,7 +25,7 @@
 extern __le64 *squashfs_read_xattr_id_table(struct super_block *, u64,
 		u64 *, int *);
 extern int squashfs_xattr_lookup(struct super_block *, unsigned int, int *,
-		int *, unsigned long long *);
+		unsigned int *, unsigned long long *);
 #else
 static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
 		u64 start, u64 *xattr_table_start, int *xattr_ids)
@@ -35,7 +35,7 @@ static inline __le64 *squashfs_read_xattr_id_table(struct super_block *sb,
 }
 
 static inline int squashfs_xattr_lookup(struct super_block *sb,
-		unsigned int index, int *count, int *size,
+		unsigned int index, int *count, unsigned int *size,
 		unsigned long long *xattr)
 {
 	return 0;
diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c
index cfb41106098..d33be5dd6c3 100644
--- a/fs/squashfs/xattr_id.c
+++ b/fs/squashfs/xattr_id.c
@@ -34,6 +34,7 @@
 #include "squashfs_fs_sb.h"
 #include "squashfs_fs_i.h"
 #include "squashfs.h"
+#include "xattr.h"
 
 /*
  * Map xattr id using the xattr id look up table
diff --git a/fs/super.c b/fs/super.c
index 8819e3a7ff2..ca696155cd9 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -273,14 +273,14 @@ void generic_shutdown_super(struct super_block *sb)
 		get_fs_excl();
 		sb->s_flags &= ~MS_ACTIVE;
 
-		/* bad name - it should be evict_inodes() */
-		invalidate_inodes(sb);
+		fsnotify_unmount_inodes(&sb->s_inodes);
+
+		evict_inodes(sb);
 
 		if (sop->put_super)
 			sop->put_super(sb);
 
-		/* Forget any remaining inodes */
-		if (invalidate_inodes(sb)) {
+		if (!list_empty(&sb->s_inodes)) {
 			printk("VFS: Busy inodes after unmount of %s. "
 			   "Self-destruct in 5 seconds.  Have a nice day...\n",
 			   sb->s_id);
@@ -715,15 +715,14 @@ static int ns_set_super(struct super_block *sb, void *data)
 	return set_anon_super(sb, NULL);
 }
 
-int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+struct dentry *mount_ns(struct file_system_type *fs_type, int flags,
+	void *data, int (*fill_super)(struct super_block *, void *, int))
 {
 	struct super_block *sb;
 
 	sb = sget(fs_type, ns_test_super, ns_set_super, data);
 	if (IS_ERR(sb))
-		return PTR_ERR(sb);
+		return ERR_CAST(sb);
 
 	if (!sb->s_root) {
 		int err;
@@ -731,17 +730,16 @@ int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
 		err = fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 		if (err) {
 			deactivate_locked_super(sb);
-			return err;
+			return ERR_PTR(err);
 		}
 
 		sb->s_flags |= MS_ACTIVE;
 	}
 
-	simple_set_mnt(mnt, sb);
-	return 0;
+	return dget(sb->s_root);
 }
 
-EXPORT_SYMBOL(get_sb_ns);
+EXPORT_SYMBOL(mount_ns);
 
 #ifdef CONFIG_BLOCK
 static int set_bdev_super(struct super_block *s, void *data)
@@ -762,10 +760,9 @@ static int test_bdev_super(struct super_block *s, void *data)
 	return (void *)s->s_bdev == data;
 }
 
-int get_sb_bdev(struct file_system_type *fs_type,
+struct dentry *mount_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+	int (*fill_super)(struct super_block *, void *, int))
 {
 	struct block_device *bdev;
 	struct super_block *s;
@@ -777,7 +774,7 @@ int get_sb_bdev(struct file_system_type *fs_type,
 
 	bdev = open_bdev_exclusive(dev_name, mode, fs_type);
 	if (IS_ERR(bdev))
-		return PTR_ERR(bdev);
+		return ERR_CAST(bdev);
 
 	/*
 	 * once the super is inserted into the list by sget, s_umount
@@ -829,15 +826,30 @@ int get_sb_bdev(struct file_system_type *fs_type,
 		bdev->bd_super = s;
 	}
 
-	simple_set_mnt(mnt, s);
-	return 0;
+	return dget(s->s_root);
 
 error_s:
 	error = PTR_ERR(s);
 error_bdev:
 	close_bdev_exclusive(bdev, mode);
 error:
-	return error;
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(mount_bdev);
+
+int get_sb_bdev(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
+{
+	struct dentry *root;
+
+	root = mount_bdev(fs_type, flags, dev_name, data, fill_super);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+	mnt->mnt_root = root;
+	mnt->mnt_sb = root->d_sb;
+	return 0;
 }
 
 EXPORT_SYMBOL(get_sb_bdev);
@@ -856,29 +868,42 @@ void kill_block_super(struct super_block *sb)
 EXPORT_SYMBOL(kill_block_super);
 #endif
 
-int get_sb_nodev(struct file_system_type *fs_type,
+struct dentry *mount_nodev(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+	int (*fill_super)(struct super_block *, void *, int))
 {
 	int error;
 	struct super_block *s = sget(fs_type, NULL, set_anon_super, NULL);
 
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 
 	s->s_flags = flags;
 
 	error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 	if (error) {
 		deactivate_locked_super(s);
-		return error;
+		return ERR_PTR(error);
 	}
 	s->s_flags |= MS_ACTIVE;
-	simple_set_mnt(mnt, s);
-	return 0;
+	return dget(s->s_root);
 }
+EXPORT_SYMBOL(mount_nodev);
+
+int get_sb_nodev(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
+{
+	struct dentry *root;
 
+	root = mount_nodev(fs_type, flags, data, fill_super);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+	mnt->mnt_root = root;
+	mnt->mnt_sb = root->d_sb;
+	return 0;
+}
 EXPORT_SYMBOL(get_sb_nodev);
 
 static int compare_single(struct super_block *s, void *p)
@@ -886,29 +911,42 @@ static int compare_single(struct super_block *s, void *p)
 	return 1;
 }
 
-int get_sb_single(struct file_system_type *fs_type,
+struct dentry *mount_single(struct file_system_type *fs_type,
 	int flags, void *data,
-	int (*fill_super)(struct super_block *, void *, int),
-	struct vfsmount *mnt)
+	int (*fill_super)(struct super_block *, void *, int))
 {
 	struct super_block *s;
 	int error;
 
 	s = sget(fs_type, compare_single, set_anon_super, NULL);
 	if (IS_ERR(s))
-		return PTR_ERR(s);
+		return ERR_CAST(s);
 	if (!s->s_root) {
 		s->s_flags = flags;
 		error = fill_super(s, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(s);
-			return error;
+			return ERR_PTR(error);
 		}
 		s->s_flags |= MS_ACTIVE;
 	} else {
 		do_remount_sb(s, flags, data, 0);
 	}
-	simple_set_mnt(mnt, s);
+	return dget(s->s_root);
+}
+EXPORT_SYMBOL(mount_single);
+
+int get_sb_single(struct file_system_type *fs_type,
+	int flags, void *data,
+	int (*fill_super)(struct super_block *, void *, int),
+	struct vfsmount *mnt)
+{
+	struct dentry *root;
+	root = mount_single(fs_type, flags, data, fill_super);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+	mnt->mnt_root = root;
+	mnt->mnt_sb = root->d_sb;
 	return 0;
 }
 
@@ -918,6 +956,7 @@ struct vfsmount *
 vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void *data)
 {
 	struct vfsmount *mnt;
+	struct dentry *root;
 	char *secdata = NULL;
 	int error;
 
@@ -942,9 +981,19 @@ vfs_kern_mount(struct file_system_type *type, int flags, const char *name, void
 			goto out_free_secdata;
 	}
 
-	error = type->get_sb(type, flags, name, data, mnt);
-	if (error < 0)
-		goto out_free_secdata;
+	if (type->mount) {
+		root = type->mount(type, flags, name, data);
+		if (IS_ERR(root)) {
+			error = PTR_ERR(root);
+			goto out_free_secdata;
+		}
+		mnt->mnt_root = root;
+		mnt->mnt_sb = root->d_sb;
+	} else {
+		error = type->get_sb(type, flags, name, data, mnt);
+		if (error < 0)
+			goto out_free_secdata;
+	}
 	BUG_ON(!mnt->mnt_sb);
 	WARN_ON(!mnt->mnt_sb->s_bdi);
 	mnt->mnt_sb->s_flags |= MS_BORN;
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index f2af22574c5..266895783b4 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -23,7 +23,7 @@
 #include "sysfs.h"
 
 
-static struct vfsmount *sysfs_mount;
+static struct vfsmount *sysfs_mnt;
 struct kmem_cache *sysfs_dir_cachep;
 
 static const struct super_operations sysfs_ops = {
@@ -95,18 +95,17 @@ static int sysfs_set_super(struct super_block *sb, void *data)
 	return error;
 }
 
-static int sysfs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *sysfs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
 	struct sysfs_super_info *info;
 	enum kobj_ns_type type;
 	struct super_block *sb;
 	int error;
 
-	error = -ENOMEM;
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
-		goto out;
+		return ERR_PTR(-ENOMEM);
 
 	for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
 		info->ns[type] = kobj_ns_current(type);
@@ -114,24 +113,19 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
 	sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
 	if (IS_ERR(sb) || sb->s_fs_info != info)
 		kfree(info);
-	if (IS_ERR(sb)) {
-		error = PTR_ERR(sb);
-		goto out;
-	}
+	if (IS_ERR(sb))
+		return ERR_CAST(sb);
 	if (!sb->s_root) {
 		sb->s_flags = flags;
 		error = sysfs_fill_super(sb, data, flags & MS_SILENT ? 1 : 0);
 		if (error) {
 			deactivate_locked_super(sb);
-			goto out;
+			return ERR_PTR(error);
 		}
 		sb->s_flags |= MS_ACTIVE;
 	}
 
-	simple_set_mnt(mnt, sb);
-	error = 0;
-out:
-	return error;
+	return dget(sb->s_root);
 }
 
 static void sysfs_kill_sb(struct super_block *sb)
@@ -147,7 +141,7 @@ static void sysfs_kill_sb(struct super_block *sb)
 
 static struct file_system_type sysfs_fs_type = {
 	.name		= "sysfs",
-	.get_sb		= sysfs_get_sb,
+	.mount		= sysfs_mount,
 	.kill_sb	= sysfs_kill_sb,
 };
 
@@ -189,11 +183,11 @@ int __init sysfs_init(void)
 
 	err = register_filesystem(&sysfs_fs_type);
 	if (!err) {
-		sysfs_mount = kern_mount(&sysfs_fs_type);
-		if (IS_ERR(sysfs_mount)) {
+		sysfs_mnt = kern_mount(&sysfs_fs_type);
+		if (IS_ERR(sysfs_mnt)) {
 			printk(KERN_ERR "sysfs: could not mount!\n");
-			err = PTR_ERR(sysfs_mount);
-			sysfs_mount = NULL;
+			err = PTR_ERR(sysfs_mnt);
+			sysfs_mnt = NULL;
 			unregister_filesystem(&sysfs_fs_type);
 			goto out_err;
 		}
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 33e047b59b8..11e7f7d11cd 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	return add_nondir(dentry, inode);
 }
diff --git a/fs/sysv/super.c b/fs/sysv/super.c
index a0b0cda6927..3d9c62be0c1 100644
--- a/fs/sysv/super.c
+++ b/fs/sysv/super.c
@@ -526,23 +526,22 @@ failed:
 
 /* Every kernel module contains stuff like this. */
 
-static int sysv_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *sysv_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, sysv_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, sysv_fill_super);
 }
 
-static int v7_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *v7_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, v7_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, v7_fill_super);
 }
 
 static struct file_system_type sysv_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "sysv",
-	.get_sb		= sysv_get_sb,
+	.mount		= sysv_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
@@ -550,7 +549,7 @@ static struct file_system_type sysv_fs_type = {
 static struct file_system_type v7_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "v7",
-	.get_sb		= v7_get_sb,
+	.mount		= v7_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 87ebcce7221..14f64b689d7 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
 
 	lock_2_inodes(dir, inode);
 	inc_nlink(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	inode->i_ctime = ubifs_current_time(inode);
 	dir->i_size += sz_change;
 	dir_ui->ui_size = dir->i_size;
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 9a47c9f0ad0..91fac54c70e 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2038,8 +2038,8 @@ static int sb_test(struct super_block *sb, void *data)
 	return c->vi.cdev == *dev;
 }
 
-static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
-			const char *name, void *data, struct vfsmount *mnt)
+static struct dentry *ubifs_mount(struct file_system_type *fs_type, int flags,
+			const char *name, void *data)
 {
 	struct ubi_volume_desc *ubi;
 	struct ubi_volume_info vi;
@@ -2057,7 +2057,7 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
 	if (IS_ERR(ubi)) {
 		dbg_err("cannot open \"%s\", error %d",
 			name, (int)PTR_ERR(ubi));
-		return PTR_ERR(ubi);
+		return ERR_CAST(ubi);
 	}
 	ubi_get_volume_info(ubi, &vi);
 
@@ -2095,20 +2095,19 @@ static int ubifs_get_sb(struct file_system_type *fs_type, int flags,
 	/* 'fill_super()' opens ubi again so we must close it here */
 	ubi_close_volume(ubi);
 
-	simple_set_mnt(mnt, sb);
-	return 0;
+	return dget(sb->s_root);
 
 out_deact:
 	deactivate_locked_super(sb);
 out_close:
 	ubi_close_volume(ubi);
-	return err;
+	return ERR_PTR(err);
 }
 
 static struct file_system_type ubifs_fs_type = {
 	.name    = "ubifs",
 	.owner   = THIS_MODULE,
-	.get_sb  = ubifs_get_sb,
+	.mount   = ubifs_mount,
 	.kill_sb = kill_anon_super,
 };
 
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index bf5fc674193..6d8dc02baeb 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 	inc_nlink(inode);
 	inode->i_ctime = current_fs_time(inode->i_sb);
 	mark_inode_dirty(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(dentry, inode);
 	unlock_kernel();
 
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 76f3d6d97b4..4a5c7c61836 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -107,17 +107,16 @@ struct logicalVolIntegrityDescImpUse *udf_sb_lvidiu(struct udf_sb_info *sbi)
 }
 
 /* UDF filesystem type */
-static int udf_get_sb(struct file_system_type *fs_type,
-		      int flags, const char *dev_name, void *data,
-		      struct vfsmount *mnt)
+static struct dentry *udf_mount(struct file_system_type *fs_type,
+		      int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, udf_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, udf_fill_super);
 }
 
 static struct file_system_type udf_fstype = {
 	.owner		= THIS_MODULE,
 	.name		= "udf",
-	.get_sb		= udf_get_sb,
+	.mount		= udf_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index b056f02b1fb..12f39b9e443 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
 
 	inode->i_ctime = CURRENT_TIME_SEC;
 	inode_inc_link_count(inode);
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 
 	error = ufs_add_nondir(dentry, inode);
 	unlock_kernel();
diff --git a/fs/ufs/super.c b/fs/ufs/super.c
index 6b9be90dae7..2c47daed56d 100644
--- a/fs/ufs/super.c
+++ b/fs/ufs/super.c
@@ -1454,16 +1454,16 @@ static const struct super_operations ufs_super_ops = {
 	.show_options   = ufs_show_options,
 };
 
-static int ufs_get_sb(struct file_system_type *fs_type,
-	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+static struct dentry *ufs_mount(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ufs_fill_super, mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, ufs_fill_super);
 }
 
 static struct file_system_type ufs_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "ufs",
-	.get_sb		= ufs_get_sb,
+	.mount		= ufs_mount,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 480f28127f0..6100ec0fa1d 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -22,6 +22,7 @@ config XFS_FS
 config XFS_QUOTA
 	bool "XFS Quota support"
 	depends on XFS_FS
+	select QUOTACTL
 	help
 	  If you say Y here, you will be able to set limits for disk usage on
 	  a per user and/or a per group basis under XFS.  XFS considers quota
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c
index b552f816de1..691f61223ed 100644
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -934,7 +934,6 @@ xfs_aops_discard_page(
 	struct xfs_inode	*ip = XFS_I(inode);
 	struct buffer_head	*bh, *head;
 	loff_t			offset = page_offset(page);
-	ssize_t			len = 1 << inode->i_blkbits;
 
 	if (!xfs_is_delayed_page(page, IO_DELAY))
 		goto out_invalidate;
@@ -949,58 +948,14 @@ xfs_aops_discard_page(
 	xfs_ilock(ip, XFS_ILOCK_EXCL);
 	bh = head = page_buffers(page);
 	do {
-		int		done;
-		xfs_fileoff_t	offset_fsb;
-		xfs_bmbt_irec_t	imap;
-		int		nimaps = 1;
 		int		error;
-		xfs_fsblock_t	firstblock;
-		xfs_bmap_free_t flist;
+		xfs_fileoff_t	start_fsb;
 
 		if (!buffer_delay(bh))
 			goto next_buffer;
 
-		offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
-
-		/*
-		 * Map the range first and check that it is a delalloc extent
-		 * before trying to unmap the range. Otherwise we will be
-		 * trying to remove a real extent (which requires a
-		 * transaction) or a hole, which is probably a bad idea...
-		 */
-		error = xfs_bmapi(NULL, ip, offset_fsb, 1,
-				XFS_BMAPI_ENTIRE,  NULL, 0, &imap,
-				&nimaps, NULL);
-
-		if (error) {
-			/* something screwed, just bail */
-			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
-				xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
-				"page discard failed delalloc mapping lookup.");
-			}
-			break;
-		}
-		if (!nimaps) {
-			/* nothing there */
-			goto next_buffer;
-		}
-		if (imap.br_startblock != DELAYSTARTBLOCK) {
-			/* been converted, ignore */
-			goto next_buffer;
-		}
-		WARN_ON(imap.br_blockcount == 0);
-
-		/*
-		 * Note: while we initialise the firstblock/flist pair, they
-		 * should never be used because blocks should never be
-		 * allocated or freed for a delalloc extent and hence we need
-		 * don't cancel or finish them after the xfs_bunmapi() call.
-		 */
-		xfs_bmap_init(&flist, &firstblock);
-		error = xfs_bunmapi(NULL, ip, offset_fsb, 1, 0, 1, &firstblock,
-					&flist, &done);
-
-		ASSERT(!flist.xbf_count && !flist.xbf_first);
+		start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
+		error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
 		if (error) {
 			/* something screwed, just bail */
 			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
@@ -1010,7 +965,7 @@ xfs_aops_discard_page(
 			break;
 		}
 next_buffer:
-		offset += len;
+		offset += 1 << inode->i_blkbits;
 
 	} while ((bh = bh->b_this_page) != head);
 
@@ -1111,11 +1066,12 @@ xfs_vm_writepage(
 			uptodate = 0;
 
 		/*
-		 * A hole may still be marked uptodate because discard_buffer
-		 * leaves the flag set.
+		 * set_page_dirty dirties all buffers in a page, independent
+		 * of their state.  The dirty state however is entirely
+		 * meaningless for holes (!mapped && uptodate), so skip
+		 * buffers covering holes here.
 		 */
 		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
-			ASSERT(!buffer_dirty(bh));
 			imap_valid = 0;
 			continue;
 		}
@@ -1139,8 +1095,7 @@ xfs_vm_writepage(
 				type = IO_DELAY;
 				flags = BMAPI_ALLOCATE;
 
-				if (wbc->sync_mode == WB_SYNC_NONE &&
-				    wbc->nonblocking)
+				if (wbc->sync_mode == WB_SYNC_NONE)
 					flags |= BMAPI_TRYLOCK;
 			}
 
@@ -1505,11 +1460,42 @@ xfs_vm_write_failed(
 	struct inode		*inode = mapping->host;
 
 	if (to > inode->i_size) {
-		struct iattr	ia = {
-			.ia_valid	= ATTR_SIZE | ATTR_FORCE,
-			.ia_size	= inode->i_size,
-		};
-		xfs_setattr(XFS_I(inode), &ia, XFS_ATTR_NOLOCK);
+		/*
+		 * punch out the delalloc blocks we have already allocated. We
+		 * don't call xfs_setattr() to do this as we may be in the
+		 * middle of a multi-iovec write and so the vfs inode->i_size
+		 * will not match the xfs ip->i_size and so it will zero too
+		 * much. Hence we jus truncate the page cache to zero what is
+		 * necessary and punch the delalloc blocks directly.
+		 */
+		struct xfs_inode	*ip = XFS_I(inode);
+		xfs_fileoff_t		start_fsb;
+		xfs_fileoff_t		end_fsb;
+		int			error;
+
+		truncate_pagecache(inode, to, inode->i_size);
+
+		/*
+		 * Check if there are any blocks that are outside of i_size
+		 * that need to be trimmed back.
+		 */
+		start_fsb = XFS_B_TO_FSB(ip->i_mount, inode->i_size) + 1;
+		end_fsb = XFS_B_TO_FSB(ip->i_mount, to);
+		if (end_fsb <= start_fsb)
+			return;
+
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
+							end_fsb - start_fsb);
+		if (error) {
+			/* something screwed, just bail */
+			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+				xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+			"xfs_vm_write_failed: unable to clean up ino %lld",
+						ip->i_ino);
+			}
+		}
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
 	}
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index ba5312802aa..4c5deb6e9e3 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -488,29 +488,16 @@ found:
 	spin_unlock(&pag->pag_buf_lock);
 	xfs_perag_put(pag);
 
-	/* Attempt to get the semaphore without sleeping,
-	 * if this does not work then we need to drop the
-	 * spinlock and do a hard attempt on the semaphore.
-	 */
-	if (down_trylock(&bp->b_sema)) {
+	if (xfs_buf_cond_lock(bp)) {
+		/* failed, so wait for the lock if requested. */
 		if (!(flags & XBF_TRYLOCK)) {
-			/* wait for buffer ownership */
 			xfs_buf_lock(bp);
 			XFS_STATS_INC(xb_get_locked_waited);
 		} else {
-			/* We asked for a trylock and failed, no need
-			 * to look at file offset and length here, we
-			 * know that this buffer at least overlaps our
-			 * buffer and is locked, therefore our buffer
-			 * either does not exist, or is this buffer.
-			 */
 			xfs_buf_rele(bp);
 			XFS_STATS_INC(xb_busy_locked);
 			return NULL;
 		}
-	} else {
-		/* trylock worked */
-		XB_SET_OWNER(bp);
 	}
 
 	if (bp->b_flags & XBF_STALE) {
@@ -876,10 +863,18 @@ xfs_buf_rele(
  */
 
 /*
- *	Locks a buffer object, if it is not already locked.
- *	Note that this in no way locks the underlying pages, so it is only
- *	useful for synchronizing concurrent use of buffer objects, not for
- *	synchronizing independent access to the underlying pages.
+ *	Locks a buffer object, if it is not already locked.  Note that this in
+ *	no way locks the underlying pages, so it is only useful for
+ *	synchronizing concurrent use of buffer objects, not for synchronizing
+ *	independent access to the underlying pages.
+ *
+ *	If we come across a stale, pinned, locked buffer, we know that we are
+ *	being asked to lock a buffer that has been reallocated. Because it is
+ *	pinned, we know that the log has not been pushed to disk and hence it
+ *	will still be locked.  Rather than continuing to have trylock attempts
+ *	fail until someone else pushes the log, push it ourselves before
+ *	returning.  This means that the xfsaild will not get stuck trying
+ *	to push on stale inode buffers.
  */
 int
 xfs_buf_cond_lock(
@@ -890,6 +885,8 @@ xfs_buf_cond_lock(
 	locked = down_trylock(&bp->b_sema) == 0;
 	if (locked)
 		XB_SET_OWNER(bp);
+	else if (atomic_read(&bp->b_pin_count) && (bp->b_flags & XBF_STALE))
+		xfs_log_force(bp->b_target->bt_mount, 0);
 
 	trace_xfs_buf_cond_lock(bp, _RET_IP_);
 	return locked ? 0 : -EBUSY;
@@ -1580,6 +1577,7 @@ xfs_mapping_buftarg(
 			XFS_BUFTARG_NAME(btp));
 		return ENOMEM;
 	}
+	inode->i_ino = get_next_ino();
 	inode->i_mode = S_IFBLK;
 	inode->i_bdev = bdev;
 	inode->i_rdev = bdev->bd_dev;
@@ -1780,7 +1778,6 @@ xfs_buf_delwri_split(
 	INIT_LIST_HEAD(list);
 	spin_lock(dwlk);
 	list_for_each_entry_safe(bp, n, dwq, b_list) {
-		trace_xfs_buf_delwri_split(bp, _RET_IP_);
 		ASSERT(bp->b_flags & XBF_DELWRI);
 
 		if (!XFS_BUF_ISPINNED(bp) && !xfs_buf_cond_lock(bp)) {
@@ -1794,6 +1791,7 @@ xfs_buf_delwri_split(
 					 _XBF_RUN_QUEUES);
 			bp->b_flags |= XBF_WRITE;
 			list_move_tail(&bp->b_list, list);
+			trace_xfs_buf_delwri_split(bp, _RET_IP_);
 		} else
 			skipped++;
 	}
diff --git a/fs/xfs/linux-2.6/xfs_ioctl.c b/fs/xfs/linux-2.6/xfs_ioctl.c
index 2ea238f6d38..ad442d9e392 100644
--- a/fs/xfs/linux-2.6/xfs_ioctl.c
+++ b/fs/xfs/linux-2.6/xfs_ioctl.c
@@ -416,7 +416,7 @@ xfs_attrlist_by_handle(
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	kbuf = kmalloc(al_hreq.buflen, GFP_KERNEL);
+	kbuf = kzalloc(al_hreq.buflen, GFP_KERNEL);
 	if (!kbuf)
 		goto out_dput;
 
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index ec858e09d54..94d5fd6a297 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -317,7 +317,7 @@ xfs_vn_link(
 	if (unlikely(error))
 		return -error;
 
-	atomic_inc(&inode->i_count);
+	ihold(inode);
 	d_instantiate(dentry, inode);
 	return 0;
 }
@@ -760,7 +760,10 @@ xfs_setup_inode(
 
 	inode->i_ino = ip->i_ino;
 	inode->i_state = I_NEW;
-	inode_add_to_lists(ip->i_mount->m_super, inode);
+
+	inode_sb_list_add(inode);
+	/* make the inode look hashed for the writeback code */
+	hlist_add_fake(&inode->i_hash);
 
 	inode->i_mode	= ip->i_d.di_mode;
 	inode->i_nlink	= ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index ab31ce5aeaf..064f964d4f3 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -353,9 +353,6 @@ xfs_parseargs(
 			mp->m_qflags &= ~XFS_OQUOTA_ENFD;
 		} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
 			mp->m_flags |= XFS_MOUNT_DELAYLOG;
-			cmn_err(CE_WARN,
-				"Enabling EXPERIMENTAL delayed logging feature "
-				"- use at your own risk.\n");
 		} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
 			mp->m_flags &= ~XFS_MOUNT_DELAYLOG;
 		} else if (!strcmp(this_char, "ihashsize")) {
@@ -576,7 +573,7 @@ xfs_max_file_offset(
 
 	/* Figure out maximum filesize, on Linux this can depend on
 	 * the filesystem blocksize (on 32 bit platforms).
-	 * __block_prepare_write does this in an [unsigned] long...
+	 * __block_write_begin does this in an [unsigned] long...
 	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
 	 * So, for page sized blocks (4K on 32 bit platforms),
 	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
@@ -1609,16 +1606,14 @@ xfs_fs_fill_super(
 	goto out_free_sb;
 }
 
-STATIC int
-xfs_fs_get_sb(
+STATIC struct dentry *
+xfs_fs_mount(
 	struct file_system_type	*fs_type,
 	int			flags,
 	const char		*dev_name,
-	void			*data,
-	struct vfsmount		*mnt)
+	void			*data)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super,
-			   mnt);
+	return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
 }
 
 static const struct super_operations xfs_super_operations = {
@@ -1639,7 +1634,7 @@ static const struct super_operations xfs_super_operations = {
 static struct file_system_type xfs_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= "xfs",
-	.get_sb			= xfs_fs_get_sb,
+	.mount			= xfs_fs_mount,
 	.kill_sb		= kill_block_super,
 	.fs_flags		= FS_REQUIRES_DEV,
 };
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 37d33254981..afb0d7cfad1 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -853,6 +853,7 @@ restart:
 		if (trylock) {
 			if (!mutex_trylock(&pag->pag_ici_reclaim_lock)) {
 				skipped++;
+				xfs_perag_put(pag);
 				continue;
 			}
 			first_index = pag->pag_ici_reclaim_cursor;
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 8abd12e32e1..4111cd3966c 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -5471,8 +5471,13 @@ xfs_getbmap(
 			if (error)
 				goto out_unlock_iolock;
 		}
-
-		ASSERT(ip->i_delayed_blks == 0);
+		/*
+		 * even after flushing the inode, there can still be delalloc
+		 * blocks on the inode beyond EOF due to speculative
+		 * preallocation. These are not removed until the release
+		 * function is called or the inode is inactivated. Hence we
+		 * cannot assert here that ip->i_delayed_blks == 0.
+		 */
 	}
 
 	lock = xfs_ilock_map_shared(ip);
@@ -6070,3 +6075,79 @@ xfs_bmap_disk_count_leaves(
 		*count += xfs_bmbt_disk_get_blockcount(frp);
 	}
 }
+
+/*
+ * dead simple method of punching delalyed allocation blocks from a range in
+ * the inode. Walks a block at a time so will be slow, but is only executed in
+ * rare error cases so the overhead is not critical. This will alays punch out
+ * both the start and end blocks, even if the ranges only partially overlap
+ * them, so it is up to the caller to ensure that partial blocks are not
+ * passed in.
+ */
+int
+xfs_bmap_punch_delalloc_range(
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		start_fsb,
+	xfs_fileoff_t		length)
+{
+	xfs_fileoff_t		remaining = length;
+	int			error = 0;
+
+	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+
+	do {
+		int		done;
+		xfs_bmbt_irec_t	imap;
+		int		nimaps = 1;
+		xfs_fsblock_t	firstblock;
+		xfs_bmap_free_t flist;
+
+		/*
+		 * Map the range first and check that it is a delalloc extent
+		 * before trying to unmap the range. Otherwise we will be
+		 * trying to remove a real extent (which requires a
+		 * transaction) or a hole, which is probably a bad idea...
+		 */
+		error = xfs_bmapi(NULL, ip, start_fsb, 1,
+				XFS_BMAPI_ENTIRE,  NULL, 0, &imap,
+				&nimaps, NULL);
+
+		if (error) {
+			/* something screwed, just bail */
+			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+				xfs_fs_cmn_err(CE_ALERT, ip->i_mount,
+			"Failed delalloc mapping lookup ino %lld fsb %lld.",
+						ip->i_ino, start_fsb);
+			}
+			break;
+		}
+		if (!nimaps) {
+			/* nothing there */
+			goto next_block;
+		}
+		if (imap.br_startblock != DELAYSTARTBLOCK) {
+			/* been converted, ignore */
+			goto next_block;
+		}
+		WARN_ON(imap.br_blockcount == 0);
+
+		/*
+		 * Note: while we initialise the firstblock/flist pair, they
+		 * should never be used because blocks should never be
+		 * allocated or freed for a delalloc extent and hence we need
+		 * don't cancel or finish them after the xfs_bunmapi() call.
+		 */
+		xfs_bmap_init(&flist, &firstblock);
+		error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
+					&flist, &done);
+		if (error)
+			break;
+
+		ASSERT(!flist.xbf_count && !flist.xbf_first);
+next_block:
+		start_fsb++;
+		remaining--;
+	} while(remaining > 0);
+
+	return error;
+}
diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h
index 71ec9b6ecdf..3651191daea 100644
--- a/fs/xfs/xfs_bmap.h
+++ b/fs/xfs/xfs_bmap.h
@@ -394,6 +394,11 @@ xfs_bmap_count_blocks(
 	int			whichfork,
 	int			*count);
 
+int
+xfs_bmap_punch_delalloc_range(
+	struct xfs_inode	*ip,
+	xfs_fileoff_t		start_fsb,
+	xfs_fileoff_t		length);
 #endif	/* __KERNEL__ */
 
 #endif	/* __XFS_BMAP_H__ */
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index 3b9582c60a2..e60490bc00a 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -377,6 +377,19 @@ xfs_swap_extents(
 	ip->i_d.di_format = tip->i_d.di_format;
 	tip->i_d.di_format = tmp;
 
+	/*
+	 * The extents in the source inode could still contain speculative
+	 * preallocation beyond EOF (e.g. the file is open but not modified
+	 * while defrag is in progress). In that case, we need to copy over the
+	 * number of delalloc blocks the data fork in the source inode is
+	 * tracking beyond EOF so that when the fork is truncated away when the
+	 * temporary inode is unlinked we don't underrun the i_delayed_blks
+	 * counter on that inode.
+	 */
+	ASSERT(tip->i_delayed_blks == 0);
+	tip->i_delayed_blks = ip->i_delayed_blks;
+	ip->i_delayed_blks = 0;
+
 	ilf_fields = XFS_ILOG_CORE;
 
 	switch(ip->i_d.di_format) {
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index ed999026766..c78cc6a3d87 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -58,6 +58,7 @@ xfs_error_trap(int e)
 int	xfs_etest[XFS_NUM_INJECT_ERROR];
 int64_t	xfs_etest_fsid[XFS_NUM_INJECT_ERROR];
 char *	xfs_etest_fsname[XFS_NUM_INJECT_ERROR];
+int	xfs_error_test_active;
 
 int
 xfs_error_test(int error_tag, int *fsidp, char *expression,
@@ -108,6 +109,7 @@ xfs_errortag_add(int error_tag, xfs_mount_t *mp)
 			len = strlen(mp->m_fsname);
 			xfs_etest_fsname[i] = kmem_alloc(len + 1, KM_SLEEP);
 			strcpy(xfs_etest_fsname[i], mp->m_fsname);
+			xfs_error_test_active++;
 			return 0;
 		}
 	}
@@ -137,6 +139,7 @@ xfs_errortag_clearall(xfs_mount_t *mp, int loud)
 			xfs_etest_fsid[i] = 0LL;
 			kmem_free(xfs_etest_fsname[i]);
 			xfs_etest_fsname[i] = NULL;
+			xfs_error_test_active--;
 		}
 	}
 
diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h
index c2c1a072bb8..f338847f80b 100644
--- a/fs/xfs/xfs_error.h
+++ b/fs/xfs/xfs_error.h
@@ -127,13 +127,14 @@ extern void xfs_corruption_error(const char *tag, int level,
 #define	XFS_RANDOM_BMAPIFORMAT				XFS_RANDOM_DEFAULT
 
 #ifdef DEBUG
+extern int xfs_error_test_active;
 extern int xfs_error_test(int, int *, char *, int, char *, unsigned long);
 
 #define	XFS_NUM_INJECT_ERROR				10
 #define XFS_TEST_ERROR(expr, mp, tag, rf)		\
-	((expr) || \
+	((expr) || (xfs_error_test_active && \
 	 xfs_error_test((tag), (mp)->m_fixedfsid, "expr", __LINE__, __FILE__, \
-			(rf)))
+			(rf))))
 
 extern int xfs_errortag_add(int error_tag, xfs_mount_t *mp);
 extern int xfs_errortag_clearall(xfs_mount_t *mp, int loud);
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 9b715dce569..9124425b7f2 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -744,9 +744,15 @@ xfs_filestream_new_ag(
 	 * If the file's parent directory is known, take its iolock in exclusive
 	 * mode to prevent two sibling files from racing each other to migrate
 	 * themselves and their parent to different AGs.
+	 *
+	 * Note that we lock the parent directory iolock inside the child
+	 * iolock here.  That's fine as we never hold both parent and child
+	 * iolock in any other place.  This is different from the ilock,
+	 * which requires locking of the child after the parent for namespace
+	 * operations.
 	 */
 	if (pip)
-		xfs_ilock(pip, XFS_IOLOCK_EXCL);
+		xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT);
 
 	/*
 	 * A new AG needs to be found for the file.  If the file's parent
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index fac52290de9..fb2ca2e4cdc 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -500,7 +500,7 @@ void		xfs_mark_inode_dirty_sync(xfs_inode_t *);
 #define IHOLD(ip) \
 do { \
 	ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
-	atomic_inc(&(VFS_I(ip)->i_count)); \
+	ihold(VFS_I(ip)); \
 	trace_xfs_ihold(ip, _THIS_IP_); \
 } while (0)
 
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index c7ac020705d..7c8d30c453c 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -657,18 +657,37 @@ xfs_inode_item_unlock(
 }
 
 /*
- * This is called to find out where the oldest active copy of the
- * inode log item in the on disk log resides now that the last log
- * write of it completed at the given lsn.  Since we always re-log
- * all dirty data in an inode, the latest copy in the on disk log
- * is the only one that matters.  Therefore, simply return the
- * given lsn.
+ * This is called to find out where the oldest active copy of the inode log
+ * item in the on disk log resides now that the last log write of it completed
+ * at the given lsn.  Since we always re-log all dirty data in an inode, the
+ * latest copy in the on disk log is the only one that matters.  Therefore,
+ * simply return the given lsn.
+ *
+ * If the inode has been marked stale because the cluster is being freed, we
+ * don't want to (re-)insert this inode into the AIL. There is a race condition
+ * where the cluster buffer may be unpinned before the inode is inserted into
+ * the AIL during transaction committed processing. If the buffer is unpinned
+ * before the inode item has been committed and inserted, then it is possible
+ * for the buffer to be written and IO completions before the inode is inserted
+ * into the AIL. In that case, we'd be inserting a clean, stale inode into the
+ * AIL which will never get removed. It will, however, get reclaimed which
+ * triggers an assert in xfs_inode_free() complaining about freein an inode
+ * still in the AIL.
+ *
+ * To avoid this, return a lower LSN than the one passed in so that the
+ * transaction committed code will not move the inode forward in the AIL but
+ * will still unpin it properly.
  */
 STATIC xfs_lsn_t
 xfs_inode_item_committed(
 	struct xfs_log_item	*lip,
 	xfs_lsn_t		lsn)
 {
+	struct xfs_inode_log_item *iip = INODE_ITEM(lip);
+	struct xfs_inode	*ip = iip->ili_inode;
+
+	if (xfs_iflags_test(ip, XFS_ISTALE))
+		return lsn - 1;
 	return lsn;
 }
 
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index b1498ab5a39..19e9dfa1c25 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -275,6 +275,7 @@ xfs_free_perag(
 		pag = radix_tree_delete(&mp->m_perag_tree, agno);
 		spin_unlock(&mp->m_perag_lock);
 		ASSERT(pag);
+		ASSERT(atomic_read(&pag->pag_ref) == 0);
 		call_rcu(&pag->rcu_head, __xfs_free_perag);
 	}
 }
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index e0e64b113bd..9bb6eda4cd2 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -346,8 +346,17 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
 #define xfs_trans_mod_dquot_byino(tp, ip, fields, delta)
 #define xfs_trans_apply_dquot_deltas(tp)
 #define xfs_trans_unreserve_and_mod_dquots(tp)
-#define xfs_trans_reserve_quota_nblks(tp, ip, nblks, ninos, flags)	(0)
-#define xfs_trans_reserve_quota_bydquots(tp, mp, u, g, nb, ni, fl)	(0)
+static inline int xfs_trans_reserve_quota_nblks(struct xfs_trans *tp,
+		struct xfs_inode *ip, long nblks, long ninos, uint flags)
+{
+	return 0;
+}
+static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,
+		struct xfs_mount *mp, struct xfs_dquot *udqp,
+		struct xfs_dquot *gdqp, long nblks, long nions, uint flags)
+{
+	return 0;
+}
 #define xfs_qm_vop_create_dqattach(tp, ip, u, g)
 #define xfs_qm_vop_rename_dqattach(it)					(0)
 #define xfs_qm_vop_chown(tp, ip, old, new)				(NULL)
@@ -357,11 +366,14 @@ xfs_qm_vop_dqalloc(struct xfs_inode *ip, uid_t uid, gid_t gid, prid_t prid,
 #define xfs_qm_dqdetach(ip)
 #define xfs_qm_dqrele(d)
 #define xfs_qm_statvfs(ip, s)
-#define xfs_qm_sync(mp, fl)						(0)
+static inline int xfs_qm_sync(struct xfs_mount *mp, int flags)
+{
+	return 0;
+}
 #define xfs_qm_newmount(mp, a, b)					(0)
 #define xfs_qm_mount_quotas(mp)
 #define xfs_qm_unmount(mp)
-#define xfs_qm_unmount_quotas(mp)					(0)
+#define xfs_qm_unmount_quotas(mp)
 #endif /* CONFIG_XFS_QUOTA */
 
 #define xfs_trans_unreserve_quota_nblks(tp, ip, nblks, ninos, flags) \
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d2af0a8381a..77a59891734 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -297,6 +297,7 @@ xfs_rename(
 	 * it and some incremental backup programs won't work without it.
 	 */
 	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
 
 	/*
 	 * Adjust the link count on src_dp.  This is necessary when