22 files changed, 317 insertions, 348 deletions
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index b6ab27ccf21..811384bec8d 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -68,11 +68,7 @@
  * Here we can be a bit looser than the data sections since this
  * needs to only meet arch ABI requirements.
  */
-#ifdef ARCH_SLAB_MINALIGN
-#define FLAT_STACK_ALIGN	(ARCH_SLAB_MINALIGN)
-#else
-#define FLAT_STACK_ALIGN	(sizeof(void *))
-#endif
+#define FLAT_STACK_ALIGN	max_t(unsigned long, sizeof(void *), ARCH_SLAB_MINALIGN)
 
 #define RELOC_FAILED 0xff00ff01		/* Relocation incorrect somewhere */
 #define UNLOADED_LIB 0x7ff000ff		/* Placeholder for unused library */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 78c02eb4cb1..484e52bb40b 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -473,14 +473,24 @@ static int cifs_remount(struct super_block *sb, int *flags, char *data)
 	return 0;
 }
 
+void cifs_drop_inode(struct inode *inode)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
+		return generic_drop_inode(inode);
+
+	return generic_delete_inode(inode);
+}
+
 static const struct super_operations cifs_super_ops = {
 	.put_super = cifs_put_super,
 	.statfs = cifs_statfs,
 	.alloc_inode = cifs_alloc_inode,
 	.destroy_inode = cifs_destroy_inode,
-/*	.drop_inode	    = generic_delete_inode,
-	.delete_inode	= cifs_delete_inode,  */  /* Do not need above two
-	functions unless later we add lazy close of inodes or unless the
+	.drop_inode	= cifs_drop_inode,
+/*	.delete_inode	= cifs_delete_inode,  */  /* Do not need above
+	function unless later we add lazy close of inodes or unless the
 	kernel forgets to call us with the same number of releases (closes)
 	as opens */
 	.show_options = cifs_show_options,
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index fb1657e0fdb..fb6318b8150 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -106,7 +106,6 @@ extern struct cifsFileInfo *cifs_new_fileinfo(struct inode *newinode,
 				__u16 fileHandle, struct file *file,
 				struct vfsmount *mnt, unsigned int oflags);
 extern int cifs_posix_open(char *full_path, struct inode **pinode,
-				struct vfsmount *mnt,
 				struct super_block *sb,
 				int mode, int oflags,
 				__u32 *poplock, __u16 *pnetfid, int xid);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 391816b461c..e7ae78b66fa 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/namei.h>
 #include <linux/mount.h>
+#include <linux/file.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
 #include "cifsglob.h"
@@ -184,12 +185,13 @@ cifs_new_fileinfo(struct inode *newinode, __u16 fileHandle,
 	}
 	write_unlock(&GlobalSMBSeslock);
 
+	file->private_data = pCifsFile;
+
 	return pCifsFile;
 }
 
 int cifs_posix_open(char *full_path, struct inode **pinode,
-			struct vfsmount *mnt, struct super_block *sb,
-			int mode, int oflags,
+			struct super_block *sb, int mode, int oflags,
 			__u32 *poplock, __u16 *pnetfid, int xid)
 {
 	int rc;
@@ -258,19 +260,6 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
 		cifs_fattr_to_inode(*pinode, &fattr);
 	}
 
-	/*
-	 * cifs_fill_filedata() takes care of setting cifsFileInfo pointer to
-	 * file->private_data.
-	 */
-	if (mnt) {
-		struct cifsFileInfo *pfile_info;
-
-		pfile_info = cifs_new_fileinfo(*pinode, *pnetfid, NULL, mnt,
-					       oflags);
-		if (pfile_info == NULL)
-			rc = -ENOMEM;
-	}
-
 posix_open_ret:
 	kfree(presp_data);
 	return rc;
@@ -298,7 +287,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	int create_options = CREATE_NOT_DIR;
 	__u32 oplock = 0;
 	int oflags;
-	bool posix_create = false;
 	/*
 	 * BB below access is probably too much for mknod to request
 	 *    but we have to do query and setpathinfo so requesting
@@ -339,7 +327,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
 			le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		rc = cifs_posix_open(full_path, &newinode,
-			nd ? nd->path.mnt : NULL,
 			inode->i_sb, mode, oflags, &oplock, &fileHandle, xid);
 		/* EIO could indicate that (posix open) operation is not
 		   supported, despite what server claimed in capability
@@ -347,7 +334,6 @@ cifs_create(struct inode *inode, struct dentry *direntry, int mode,
 		   handled in posix open */
 
 		if (rc == 0) {
-			posix_create = true;
 			if (newinode == NULL) /* query inode info */
 				goto cifs_create_get_file_info;
 			else /* success, no need to query */
@@ -478,21 +464,28 @@ cifs_create_set_dentry:
 	else
 		cFYI(1, "Create worked, get_inode_info failed rc = %d", rc);
 
-	/* nfsd case - nfs srv does not set nd */
-	if ((nd == NULL) || (!(nd->flags & LOOKUP_OPEN))) {
-		/* mknod case - do not leave file open */
-		CIFSSMBClose(xid, tcon, fileHandle);
-	} else if (!(posix_create) && (newinode)) {
+	if (newinode && nd && (nd->flags & LOOKUP_OPEN)) {
 		struct cifsFileInfo *pfile_info;
-		/*
-		 * cifs_fill_filedata() takes care of setting cifsFileInfo
-		 * pointer to file->private_data.
-		 */
-		pfile_info = cifs_new_fileinfo(newinode, fileHandle, NULL,
+		struct file *filp;
+
+		filp = lookup_instantiate_filp(nd, direntry, generic_file_open);
+		if (IS_ERR(filp)) {
+			rc = PTR_ERR(filp);
+			CIFSSMBClose(xid, tcon, fileHandle);
+			goto cifs_create_out;
+		}
+
+		pfile_info = cifs_new_fileinfo(newinode, fileHandle, filp,
 					       nd->path.mnt, oflags);
-		if (pfile_info == NULL)
+		if (pfile_info == NULL) {
+			fput(filp);
+			CIFSSMBClose(xid, tcon, fileHandle);
 			rc = -ENOMEM;
+		}
+	} else {
+		CIFSSMBClose(xid, tcon, fileHandle);
 	}
+
 cifs_create_out:
 	kfree(buf);
 	kfree(full_path);
@@ -636,6 +629,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 	bool posix_open = false;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *pTcon;
+	struct cifsFileInfo *cfile;
 	struct inode *newInode = NULL;
 	char *full_path = NULL;
 	struct file *filp;
@@ -703,7 +697,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		if (nd && !(nd->flags & (LOOKUP_PARENT | LOOKUP_DIRECTORY)) &&
 		     (nd->flags & LOOKUP_OPEN) && !pTcon->broken_posix_open &&
 		     (nd->intent.open.flags & O_CREAT)) {
-			rc = cifs_posix_open(full_path, &newInode, nd->path.mnt,
+			rc = cifs_posix_open(full_path, &newInode,
 					parent_dir_inode->i_sb,
 					nd->intent.open.create_mode,
 					nd->intent.open.flags, &oplock,
@@ -733,8 +727,25 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		else
 			direntry->d_op = &cifs_dentry_ops;
 		d_add(direntry, newInode);
-		if (posix_open)
-			filp = lookup_instantiate_filp(nd, direntry, NULL);
+		if (posix_open) {
+			filp = lookup_instantiate_filp(nd, direntry,
+						       generic_file_open);
+			if (IS_ERR(filp)) {
+				rc = PTR_ERR(filp);
+				CIFSSMBClose(xid, pTcon, fileHandle);
+				goto lookup_out;
+			}
+
+			cfile = cifs_new_fileinfo(newInode, fileHandle, filp,
+						  nd->path.mnt,
+						  nd->intent.open.flags);
+			if (cfile == NULL) {
+				fput(filp);
+				CIFSSMBClose(xid, pTcon, fileHandle);
+				rc = -ENOMEM;
+				goto lookup_out;
+			}
+		}
 		/* since paths are not looked up by component - the parent
 		   directories are presumed to be good here */
 		renew_parental_timestamps(direntry);
@@ -755,6 +766,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry,
 		is a common return code */
 	}
 
+lookup_out:
 	kfree(full_path);
 	FreeXid(xid);
 	return ERR_PTR(rc);
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 75541af4b3d..409e4f523e6 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -162,44 +162,12 @@ psx_client_can_cache:
 	return 0;
 }
 
-static struct cifsFileInfo *
-cifs_fill_filedata(struct file *file)
-{
-	struct list_head *tmp;
-	struct cifsFileInfo *pCifsFile = NULL;
-	struct cifsInodeInfo *pCifsInode = NULL;
-
-	/* search inode for this file and fill in file->private_data */
-	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-	read_lock(&GlobalSMBSeslock);
-	list_for_each(tmp, &pCifsInode->openFileList) {
-		pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
-		if ((pCifsFile->pfile == NULL) &&
-		    (pCifsFile->pid == current->tgid)) {
-			/* mode set in cifs_create */
-
-			/* needed for writepage */
-			pCifsFile->pfile = file;
-			file->private_data = pCifsFile;
-			break;
-		}
-	}
-	read_unlock(&GlobalSMBSeslock);
-
-	if (file->private_data != NULL) {
-		return pCifsFile;
-	} else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
-			cERROR(1, "could not find file instance for "
-				   "new file %p", file);
-	return NULL;
-}
-
 /* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
-	struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
+static inline int cifs_open_inode_helper(struct inode *inode,
 	struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
 	char *full_path, int xid)
 {
+	struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
 	struct timespec temp;
 	int rc;
 
@@ -213,36 +181,35 @@ static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
 	/* if not oplocked, invalidate inode pages if mtime or file
 	   size changed */
 	temp = cifs_NTtimeToUnix(buf->LastWriteTime);
-	if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
-			   (file->f_path.dentry->d_inode->i_size ==
+	if (timespec_equal(&inode->i_mtime, &temp) &&
+			   (inode->i_size ==
 			    (loff_t)le64_to_cpu(buf->EndOfFile))) {
 		cFYI(1, "inode unchanged on server");
 	} else {
-		if (file->f_path.dentry->d_inode->i_mapping) {
+		if (inode->i_mapping) {
 			/* BB no need to lock inode until after invalidate
 			since namei code should already have it locked? */
-			rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
+			rc = filemap_write_and_wait(inode->i_mapping);
 			if (rc != 0)
-				CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
+				pCifsInode->write_behind_rc = rc;
 		}
 		cFYI(1, "invalidating remote inode since open detected it "
 			 "changed");
-		invalidate_remote_inode(file->f_path.dentry->d_inode);
+		invalidate_remote_inode(inode);
 	}
 
 client_can_cache:
 	if (pTcon->unix_ext)
-		rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
-			full_path, inode->i_sb, xid);
+		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+					      xid);
 	else
-		rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
-			full_path, buf, inode->i_sb, xid, NULL);
+		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+					 xid, NULL);
 
 	if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
 		pCifsInode->clientCanCacheAll = true;
 		pCifsInode->clientCanCacheRead = true;
-		cFYI(1, "Exclusive Oplock granted on inode %p",
-			 file->f_path.dentry->d_inode);
+		cFYI(1, "Exclusive Oplock granted on inode %p", inode);
 	} else if ((*oplock & 0xF) == OPLOCK_READ)
 		pCifsInode->clientCanCacheRead = true;
 
@@ -256,7 +223,7 @@ int cifs_open(struct inode *inode, struct file *file)
 	__u32 oplock;
 	struct cifs_sb_info *cifs_sb;
 	struct cifsTconInfo *tcon;
-	struct cifsFileInfo *pCifsFile;
+	struct cifsFileInfo *pCifsFile = NULL;
 	struct cifsInodeInfo *pCifsInode;
 	char *full_path = NULL;
 	int desiredAccess;
@@ -270,12 +237,6 @@ int cifs_open(struct inode *inode, struct file *file)
 	tcon = cifs_sb->tcon;
 
 	pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
-	pCifsFile = cifs_fill_filedata(file);
-	if (pCifsFile) {
-		rc = 0;
-		FreeXid(xid);
-		return rc;
-	}
 
 	full_path = build_path_from_dentry(file->f_path.dentry);
 	if (full_path == NULL) {
@@ -299,8 +260,7 @@ int cifs_open(struct inode *inode, struct file *file)
 		int oflags = (int) cifs_posix_convert_flags(file->f_flags);
 		oflags |= SMB_O_CREAT;
 		/* can not refresh inode info since size could be stale */
-		rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
-				inode->i_sb,
+		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
 				cifs_sb->mnt_file_mode /* ignored */,
 				oflags, &oplock, &netfid, xid);
 		if (rc == 0) {
@@ -308,9 +268,20 @@ int cifs_open(struct inode *inode, struct file *file)
 			/* no need for special case handling of setting mode
 			   on read only files needed here */
 
-			pCifsFile = cifs_fill_filedata(file);
-			cifs_posix_open_inode_helper(inode, file, pCifsInode,
-						     oplock, netfid);
+			rc = cifs_posix_open_inode_helper(inode, file,
+					pCifsInode, oplock, netfid);
+			if (rc != 0) {
+				CIFSSMBClose(xid, tcon, netfid);
+				goto out;
+			}
+
+			pCifsFile = cifs_new_fileinfo(inode, netfid, file,
+							file->f_path.mnt,
+							oflags);
+			if (pCifsFile == NULL) {
+				CIFSSMBClose(xid, tcon, netfid);
+				rc = -ENOMEM;
+			}
 			goto out;
 		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
 			if (tcon->ses->serverNOS)
@@ -391,17 +362,17 @@ int cifs_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 
+	rc = cifs_open_inode_helper(inode, tcon, &oplock, buf, full_path, xid);
+	if (rc != 0)
+		goto out;
+
 	pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
 					file->f_flags);
-	file->private_data = pCifsFile;
-	if (file->private_data == NULL) {
+	if (pCifsFile == NULL) {
 		rc = -ENOMEM;
 		goto out;
 	}
 
-	rc = cifs_open_inode_helper(inode, file, pCifsInode, pCifsFile, tcon,
-				    &oplock, buf, full_path, xid);
-
 	if (oplock & CIFS_CREATE_ACTION) {
 		/* time to set mode which we can not set earlier due to
 		   problems creating new read-only files */
@@ -513,8 +484,7 @@ reopen_error_exit:
 			le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		int oflags = (int) cifs_posix_convert_flags(file->f_flags);
 		/* can not refresh inode info since size could be stale */
-		rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
-				inode->i_sb,
+		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
 				cifs_sb->mnt_file_mode /* ignored */,
 				oflags, &oplock, &netfid, xid);
 		if (rc == 0) {
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 62b324f26a5..6f0683c6895 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1401,6 +1401,10 @@ cifs_do_rename(int xid, struct dentry *from_dentry, const char *fromPath,
 	if (rc == 0 || rc != -ETXTBSY)
 		return rc;
 
+	/* open-file renames don't work across directories */
+	if (to_dentry->d_parent != from_dentry->d_parent)
+		return rc;
+
 	/* open the file to be renamed -- we need DELETE perms */
 	rc = CIFSSMBOpen(xid, pTcon, fromPath, FILE_OPEN, DELETE,
 			 CREATE_NOT_DIR, &srcfid, &oplock, NULL,
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7707389bdf2..0a57cb7db5d 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -730,15 +730,7 @@ ssetup_ntlmssp_authenticate:
 
 		/* calculate session key */
 		setup_ntlmv2_rsp(ses, v2_sess_key, nls_cp);
-		if (first_time) /* should this be moved into common code
-				   with similar ntlmv2 path? */
-		/*   cifs_calculate_ntlmv2_mac_key(ses->server->mac_signing_key,
-				response BB FIXME, v2_sess_key); */
-
-		/* copy session key */
-
-	/*	memcpy(bcc_ptr, (char *)ntlm_session_key,LM2_SESS_KEY_SIZE);
-		bcc_ptr += LM2_SESS_KEY_SIZE; */
+		/* FIXME: calculate MAC key */
 		memcpy(bcc_ptr, (char *)v2_sess_key,
 		       sizeof(struct ntlmv2_resp));
 		bcc_ptr += sizeof(struct ntlmv2_resp);
diff --git a/fs/dcache.c b/fs/dcache.c
index d96047b4a63..c8c78ba0782 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -590,6 +590,8 @@ static void prune_dcache(int count)
 			up_read(&sb->s_umount);
 		}
 		spin_lock(&sb_lock);
+		/* lock was dropped, must reset next */
+		list_safe_reset_next(sb, n, s_list);
 		count -= pruned;
 		__put_super(sb);
 		/* more work left to do? */
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index ca7e2a0ed98..2bcc0431bad 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -200,6 +200,7 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl)
 					return error;
 				else {
 					inode->i_mode = mode;
+					inode->i_ctime = CURRENT_TIME_SEC;
 					mark_inode_dirty(inode);
 					if (error == 0)
 						acl = NULL;
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index 01552abbca3..8a11fe21218 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -205,6 +205,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 					return error;
 				else {
 					inode->i_mode = mode;
+					inode->i_ctime = CURRENT_TIME_SEC;
 					ext3_mark_inode_dirty(handle, inode);
 					if (error == 0)
 						acl = NULL;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 51e11bf5708..9d175d623aa 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -733,12 +733,14 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 {
 	while (fa) {
 		struct fown_struct *fown;
+		unsigned long flags;
+
 		if (fa->magic != FASYNC_MAGIC) {
 			printk(KERN_ERR "kill_fasync: bad magic number in "
 			       "fasync_struct!\n");
 			return;
 		}
-		spin_lock(&fa->fa_lock);
+		spin_lock_irqsave(&fa->fa_lock, flags);
 		if (fa->fa_file) {
 			fown = &fa->fa_file->f_owner;
 			/* Don't send SIGURG to processes which have not set a
@@ -747,7 +749,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 			if (!(sig == SIGURG && fown->signum == 0))
 				send_sigio(fown, fa->fa_fd, band);
 		}
-		spin_unlock(&fa->fa_lock);
+		spin_unlock_irqrestore(&fa->fa_lock, flags);
 		fa = rcu_dereference(fa->fa_next);
 	}
 }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 1d1088f48bc..0609607d395 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -63,24 +63,16 @@ struct bdi_work {
 };
 
 enum {
-	WS_USED_B = 0,
-	WS_ONSTACK_B,
+	WS_INPROGRESS = 0,
+	WS_ONSTACK,
 };
 
-#define WS_USED (1 << WS_USED_B)
-#define WS_ONSTACK (1 << WS_ONSTACK_B)
-
-static inline bool bdi_work_on_stack(struct bdi_work *work)
-{
-	return test_bit(WS_ONSTACK_B, &work->state);
-}
-
 static inline void bdi_work_init(struct bdi_work *work,
 				 struct wb_writeback_args *args)
 {
 	INIT_RCU_HEAD(&work->rcu_head);
 	work->args = *args;
-	work->state = WS_USED;
+	__set_bit(WS_INPROGRESS, &work->state);
 }
 
 /**
@@ -95,43 +87,16 @@ int writeback_in_progress(struct backing_dev_info *bdi)
 	return !list_empty(&bdi->work_list);
 }
 
-static void bdi_work_clear(struct bdi_work *work)
-{
-	clear_bit(WS_USED_B, &work->state);
-	smp_mb__after_clear_bit();
-	/*
-	 * work can have disappeared at this point. bit waitq functions
-	 * should be able to tolerate this, provided bdi_sched_wait does
-	 * not dereference it's pointer argument.
-	*/
-	wake_up_bit(&work->state, WS_USED_B);
-}
-
 static void bdi_work_free(struct rcu_head *head)
 {
 	struct bdi_work *work = container_of(head, struct bdi_work, rcu_head);
 
-	if (!bdi_work_on_stack(work))
-		kfree(work);
-	else
-		bdi_work_clear(work);
-}
-
-static void wb_work_complete(struct bdi_work *work)
-{
-	const enum writeback_sync_modes sync_mode = work->args.sync_mode;
-	int onstack = bdi_work_on_stack(work);
+	clear_bit(WS_INPROGRESS, &work->state);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&work->state, WS_INPROGRESS);
 
-	/*
-	 * For allocated work, we can clear the done/seen bit right here.
-	 * For on-stack work, we need to postpone both the clear and free
-	 * to after the RCU grace period, since the stack could be invalidated
-	 * as soon as bdi_work_clear() has done the wakeup.
-	 */
-	if (!onstack)
-		bdi_work_clear(work);
-	if (sync_mode == WB_SYNC_NONE || onstack)
-		call_rcu(&work->rcu_head, bdi_work_free);
+	if (!test_bit(WS_ONSTACK, &work->state))
+		kfree(work);
 }
 
 static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
@@ -147,7 +112,7 @@ static void wb_clear_pending(struct bdi_writeback *wb, struct bdi_work *work)
 		list_del_rcu(&work->list);
 		spin_unlock(&bdi->wb_lock);
 
-		wb_work_complete(work);
+		call_rcu(&work->rcu_head, bdi_work_free);
 	}
 }
 
@@ -185,9 +150,9 @@ static void bdi_queue_work(struct backing_dev_info *bdi, struct bdi_work *work)
  * Used for on-stack allocated work items. The caller needs to wait until
  * the wb threads have acked the work before it's safe to continue.
  */
-static void bdi_wait_on_work_clear(struct bdi_work *work)
+static void bdi_wait_on_work_done(struct bdi_work *work)
 {
-	wait_on_bit(&work->state, WS_USED_B, bdi_sched_wait,
+	wait_on_bit(&work->state, WS_INPROGRESS, bdi_sched_wait,
 		    TASK_UNINTERRUPTIBLE);
 }
 
@@ -213,37 +178,28 @@ static void bdi_alloc_queue_work(struct backing_dev_info *bdi,
 }
 
 /**
- * bdi_sync_writeback - start and wait for writeback
- * @bdi: the backing device to write from
+ * bdi_queue_work_onstack - start and wait for writeback
  * @sb: write inodes from this super_block
  *
  * Description:
- *   This does WB_SYNC_ALL data integrity writeback and waits for the
- *   IO to complete. Callers must hold the sb s_umount semaphore for
+ *   This function initiates writeback and waits for the operation to
+ *   complete. Callers must hold the sb s_umount semaphore for
  *   reading, to avoid having the super disappear before we are done.
  */
-static void bdi_sync_writeback(struct backing_dev_info *bdi,
-			       struct super_block *sb)
+static void bdi_queue_work_onstack(struct wb_writeback_args *args)
 {
-	struct wb_writeback_args args = {
-		.sb		= sb,
-		.sync_mode	= WB_SYNC_ALL,
-		.nr_pages	= LONG_MAX,
-		.range_cyclic	= 0,
-	};
 	struct bdi_work work;
 
-	bdi_work_init(&work, &args);
-	work.state |= WS_ONSTACK;
+	bdi_work_init(&work, args);
+	__set_bit(WS_ONSTACK, &work.state);
 
-	bdi_queue_work(bdi, &work);
-	bdi_wait_on_work_clear(&work);
+	bdi_queue_work(args->sb->s_bdi, &work);
+	bdi_wait_on_work_done(&work);
 }
 
 /**
  * bdi_start_writeback - start writeback
  * @bdi: the backing device to write from
- * @sb: write inodes from this super_block
  * @nr_pages: the number of pages to write
  *
  * Description:
@@ -252,25 +208,34 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
  *   completion. Caller need not hold sb s_umount semaphore.
  *
  */
-void bdi_start_writeback(struct backing_dev_info *bdi, struct super_block *sb,
-			 long nr_pages)
+void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages)
 {
 	struct wb_writeback_args args = {
-		.sb		= sb,
 		.sync_mode	= WB_SYNC_NONE,
 		.nr_pages	= nr_pages,
 		.range_cyclic	= 1,
 	};
 
-	/*
-	 * We treat @nr_pages=0 as the special case to do background writeback,
-	 * ie. to sync pages until the background dirty threshold is reached.
-	 */
-	if (!nr_pages) {
-		args.nr_pages = LONG_MAX;
-		args.for_background = 1;
-	}
+	bdi_alloc_queue_work(bdi, &args);
+}
 
+/**
+ * bdi_start_background_writeback - start background writeback
+ * @bdi: the backing device to write from
+ *
+ * Description:
+ *   This does WB_SYNC_NONE background writeback. The IO is only
+ *   started when this function returns, we make no guarentees on
+ *   completion. Caller need not hold sb s_umount semaphore.
+ */
+void bdi_start_background_writeback(struct backing_dev_info *bdi)
+{
+	struct wb_writeback_args args = {
+		.sync_mode	= WB_SYNC_NONE,
+		.nr_pages	= LONG_MAX,
+		.for_background = 1,
+		.range_cyclic	= 1,
+	};
 	bdi_alloc_queue_work(bdi, &args);
 }
 
@@ -561,48 +526,30 @@ select_queue:
 	return ret;
 }
 
-static void unpin_sb_for_writeback(struct super_block *sb)
-{
-	up_read(&sb->s_umount);
-	put_super(sb);
-}
-
-enum sb_pin_state {
-	SB_PINNED,
-	SB_NOT_PINNED,
-	SB_PIN_FAILED
-};
-
 /*
- * For WB_SYNC_NONE writeback, the caller does not have the sb pinned
+ * For background writeback the caller does not have the sb pinned
  * before calling writeback. So make sure that we do pin it, so it doesn't
  * go away while we are writing inodes from it.
  */
-static enum sb_pin_state pin_sb_for_writeback(struct writeback_control *wbc,
-					      struct super_block *sb)
+static bool pin_sb_for_writeback(struct super_block *sb)
 {
-	/*
-	 * Caller must already hold the ref for this
-	 */
-	if (wbc->sync_mode == WB_SYNC_ALL) {
-		WARN_ON(!rwsem_is_locked(&sb->s_umount));
-		return SB_NOT_PINNED;
-	}
 	spin_lock(&sb_lock);
+	if (list_empty(&sb->s_instances)) {
+		spin_unlock(&sb_lock);
+		return false;
+	}
+
 	sb->s_count++;
+	spin_unlock(&sb_lock);
+
 	if (down_read_trylock(&sb->s_umount)) {
-		if (sb->s_root) {
-			spin_unlock(&sb_lock);
-			return SB_PINNED;
-		}
-		/*
-		 * umounted, drop rwsem again and fall through to failure
-		 */
+		if (sb->s_root)
+			return true;
 		up_read(&sb->s_umount);
 	}
-	sb->s_count--;
-	spin_unlock(&sb_lock);
-	return SB_PIN_FAILED;
+
+	put_super(sb);
+	return false;
 }
 
 /*
@@ -681,24 +628,31 @@ static void writeback_inodes_wb(struct bdi_writeback *wb,
 		struct inode *inode = list_entry(wb->b_io.prev,
 						 struct inode, i_list);
 		struct super_block *sb = inode->i_sb;
-		enum sb_pin_state state;
 
-		if (wbc->sb && sb != wbc->sb) {
-			/* super block given and doesn't
-			   match, skip this inode */
-			redirty_tail(inode);
-			continue;
-		}
-		state = pin_sb_for_writeback(wbc, sb);
+		if (wbc->sb) {
+			/*
+			 * We are requested to write out inodes for a specific
+			 * superblock.  This means we already have s_umount
+			 * taken by the caller which also waits for us to
+			 * complete the writeout.
+			 */
+			if (sb != wbc->sb) {
+				redirty_tail(inode);
+				continue;
+			}
 
-		if (state == SB_PIN_FAILED) {
-			requeue_io(inode);
-			continue;
+			WARN_ON(!rwsem_is_locked(&sb->s_umount));
+
+			ret = writeback_sb_inodes(sb, wb, wbc);
+		} else {
+			if (!pin_sb_for_writeback(sb)) {
+				requeue_io(inode);
+				continue;
+			}
+			ret = writeback_sb_inodes(sb, wb, wbc);
+			drop_super(sb);
 		}
-		ret = writeback_sb_inodes(sb, wb, wbc);
 
-		if (state == SB_PINNED)
-			unpin_sb_for_writeback(sb);
 		if (ret)
 			break;
 	}
@@ -911,7 +865,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		 * If this isn't a data integrity operation, just notify
 		 * that we have seen this work and we are now starting it.
 		 */
-		if (args.sync_mode == WB_SYNC_NONE)
+		if (!test_bit(WS_ONSTACK, &work->state))
 			wb_clear_pending(wb, work);
 
 		wrote += wb_writeback(wb, &args);
@@ -920,7 +874,7 @@ long wb_do_writeback(struct bdi_writeback *wb, int force_wait)
 		 * This is a data integrity writeback, so only do the
 		 * notification when we have completed the work.
 		 */
-		if (args.sync_mode == WB_SYNC_ALL)
+		if (test_bit(WS_ONSTACK, &work->state))
 			wb_clear_pending(wb, work);
 	}
 
@@ -978,42 +932,32 @@ int bdi_writeback_task(struct bdi_writeback *wb)
 }
 
 /*
- * Schedule writeback for all backing devices. This does WB_SYNC_NONE
- * writeback, for integrity writeback see bdi_sync_writeback().
+ * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
+ * the whole world.
  */
-static void bdi_writeback_all(struct super_block *sb, long nr_pages)
+void wakeup_flusher_threads(long nr_pages)
 {
+	struct backing_dev_info *bdi;
 	struct wb_writeback_args args = {
-		.sb		= sb,
-		.nr_pages	= nr_pages,
 		.sync_mode	= WB_SYNC_NONE,
 	};
-	struct backing_dev_info *bdi;
 
-	rcu_read_lock();
+	if (nr_pages) {
+		args.nr_pages = nr_pages;
+	} else {
+		args.nr_pages = global_page_state(NR_FILE_DIRTY) +
+				global_page_state(NR_UNSTABLE_NFS);
+	}
 
+	rcu_read_lock();
 	list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
 		if (!bdi_has_dirty_io(bdi))
 			continue;
-
 		bdi_alloc_queue_work(bdi, &args);
 	}
-
 	rcu_read_unlock();
 }
 
-/*
- * Start writeback of `nr_pages' pages.  If `nr_pages' is zero, write back
- * the whole world.
- */
-void wakeup_flusher_threads(long nr_pages)
-{
-	if (nr_pages == 0)
-		nr_pages = global_page_state(NR_FILE_DIRTY) +
-				global_page_state(NR_UNSTABLE_NFS);
-	bdi_writeback_all(NULL, nr_pages);
-}
-
 static noinline void block_dump___mark_inode_dirty(struct inode *inode)
 {
 	if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) {
@@ -1218,12 +1162,17 @@ void writeback_inodes_sb(struct super_block *sb)
 {
 	unsigned long nr_dirty = global_page_state(NR_FILE_DIRTY);
 	unsigned long nr_unstable = global_page_state(NR_UNSTABLE_NFS);
-	long nr_to_write;
+	struct wb_writeback_args args = {
+		.sb		= sb,
+		.sync_mode	= WB_SYNC_NONE,
+	};
+
+	WARN_ON(!rwsem_is_locked(&