1 files changed, 3148 insertions, 1299 deletions
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index da4f5e10b3c..e90a1e9aa62 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2,9 +2,10 @@
  *   fs/cifs/file.c
  *
  *   vfs operations that deal with files
- * 
- *   Copyright (C) International Business Machines  Corp., 2002,2003
+ *
+ *   Copyright (C) International Business Machines  Corp., 2002,2010
  *   Author(s): Steve French (sfrench@us.ibm.com)
+ *              Jeremy Allison (jra@samba.org)
  *
  *   This library is free software; you can redistribute it and/or modify
  *   it under the terms of the GNU Lesser General Public License as published
@@ -24,12 +25,14 @@
 #include <linux/backing-dev.h>
 #include <linux/stat.h>
 #include <linux/fcntl.h>
-#include <linux/mpage.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
-#include <linux/smp_lock.h>
 #include <linux/writeback.h>
+#include <linux/task_io_accounting_ops.h>
 #include <linux/delay.h>
+#include <linux/mount.h>
+#include <linux/slab.h>
+#include <linux/swap.h>
 #include <asm/div64.h>
 #include "cifsfs.h"
 #include "cifspdu.h"
@@ -38,27 +41,8 @@
 #include "cifs_unicode.h"
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
+#include "fscache.h"
 
-static inline struct cifsFileInfo *cifs_init_private(
-	struct cifsFileInfo *private_data, struct inode *inode,
-	struct file *file, __u16 netfid)
-{
-	memset(private_data, 0, sizeof(struct cifsFileInfo));
-	private_data->netfid = netfid;
-	private_data->pid = current->tgid;	
-	init_MUTEX(&private_data->fh_sem);
-	private_data->pfile = file; /* needed for writepage */
-	private_data->pInode = inode;
-	private_data->invalidHandle = FALSE;
-	private_data->closePend = FALSE;
-	/* we have to track num writers to the inode, since writepages
-	does not tell us which handle the write is for so there can
-	be a close (overlapping with write) of the filehandle that
-	cifs_writepages chose to use */
-	atomic_set(&private_data->wrtPending,0); 
-
-	return private_data;
-}
 
 static inline int cifs_convert_flags(unsigned int flags)
 {
@@ -73,7 +57,43 @@ static inline int cifs_convert_flags(unsigned int flags)
 		return (GENERIC_READ | GENERIC_WRITE);
 	}
 
-	return 0x20197;
+	return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
+		FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
+		FILE_READ_DATA);
+}
+
+static u32 cifs_posix_convert_flags(unsigned int flags)
+{
+	u32 posix_flags = 0;
+
+	if ((flags & O_ACCMODE) == O_RDONLY)
+		posix_flags = SMB_O_RDONLY;
+	else if ((flags & O_ACCMODE) == O_WRONLY)
+		posix_flags = SMB_O_WRONLY;
+	else if ((flags & O_ACCMODE) == O_RDWR)
+		posix_flags = SMB_O_RDWR;
+
+	if (flags & O_CREAT) {
+		posix_flags |= SMB_O_CREAT;
+		if (flags & O_EXCL)
+			posix_flags |= SMB_O_EXCL;
+	} else if (flags & O_EXCL)
+		cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
+			 current->comm, current->tgid);
+
+	if (flags & O_TRUNC)
+		posix_flags |= SMB_O_TRUNC;
+	/* be safe and imply O_SYNC for O_DSYNC */
+	if (flags & O_DSYNC)
+		posix_flags |= SMB_O_SYNC;
+	if (flags & O_DIRECTORY)
+		posix_flags |= SMB_O_DIRECTORY;
+	if (flags & O_NOFOLLOW)
+		posix_flags |= SMB_O_NOFOLLOW;
+	if (flags & O_DIRECT)
+		posix_flags |= SMB_O_DIRECT;
+
+	return posix_flags;
 }
 
 static inline int cifs_get_disposition(unsigned int flags)
@@ -84,143 +104,97 @@ static inline int cifs_get_disposition(unsigned int flags)
 		return FILE_OVERWRITE_IF;
 	else if ((flags & O_CREAT) == O_CREAT)
 		return FILE_OPEN_IF;
+	else if ((flags & O_TRUNC) == O_TRUNC)
+		return FILE_OVERWRITE;
 	else
 		return FILE_OPEN;
 }
 
-/* all arguments to this function must be checked for validity in caller */
-static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
-	struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
-	struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
-	char *full_path, int xid)
+int cifs_posix_open(char *full_path, struct inode **pinode,
+			struct super_block *sb, int mode, unsigned int f_flags,
+			__u32 *poplock, __u16 *pnetfid, unsigned int xid)
 {
-	struct timespec temp;
 	int rc;
+	FILE_UNIX_BASIC_INFO *presp_data;
+	__u32 posix_flags = 0;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+	struct cifs_fattr fattr;
+	struct tcon_link *tlink;
+	struct cifs_tcon *tcon;
 
-	/* want handles we can use to read with first
-	   in the list so we do not have to walk the
-	   list to search for one in prepare_write */
-	if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
-		list_add_tail(&pCifsFile->flist, 
-			      &pCifsInode->openFileList);
-	} else {
-		list_add(&pCifsFile->flist,
-			 &pCifsInode->openFileList);
-	}
-	write_unlock(&GlobalSMBSeslock);
-	write_unlock(&file->f_owner.lock);
-	if (pCifsInode->clientCanCacheRead) {
-		/* we have the inode open somewhere else
-		   no need to discard cache data */
-		goto client_can_cache;
-	}
-
-	/* BB need same check in cifs_create too? */
-	/* if not oplocked, invalidate inode pages if mtime or file
-	   size changed */
-	temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
-	if (timespec_equal(&file->f_dentry->d_inode->i_mtime, &temp) && 
-			   (file->f_dentry->d_inode->i_size == 
-			    (loff_t)le64_to_cpu(buf->EndOfFile))) {
-		cFYI(1, ("inode unchanged on server"));
-	} else {
-		if (file->f_dentry->d_inode->i_mapping) {
-		/* BB no need to lock inode until after invalidate
-		   since namei code should already have it locked? */
-			filemap_fdatawrite(file->f_dentry->d_inode->i_mapping);
-			filemap_fdatawait(file->f_dentry->d_inode->i_mapping);
-		}
-		cFYI(1, ("invalidating remote inode since open detected it "
-			 "changed"));
-		invalidate_remote_inode(file->f_dentry->d_inode);
+	cifs_dbg(FYI, "posix open %s\n", full_path);
+
+	presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
+	if (presp_data == NULL)
+		return -ENOMEM;
+
+	tlink = cifs_sb_tlink(cifs_sb);
+	if (IS_ERR(tlink)) {
+		rc = PTR_ERR(tlink);
+		goto posix_open_ret;
 	}
 
-client_can_cache:
-	if (pTcon->ses->capabilities & CAP_UNIX)
-		rc = cifs_get_inode_info_unix(&file->f_dentry->d_inode,
-			full_path, inode->i_sb, xid);
-	else
-		rc = cifs_get_inode_info(&file->f_dentry->d_inode,
-			full_path, buf, inode->i_sb, xid);
+	tcon = tlink_tcon(tlink);
+	mode &= ~current_umask();
 
-	if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-		pCifsInode->clientCanCacheAll = TRUE;
-		pCifsInode->clientCanCacheRead = TRUE;
-		cFYI(1, ("Exclusive Oplock granted on inode %p",
-			 file->f_dentry->d_inode));
-	} else if ((*oplock & 0xF) == OPLOCK_READ)
-		pCifsInode->clientCanCacheRead = TRUE;
+	posix_flags = cifs_posix_convert_flags(f_flags);
+	rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
+			     poplock, full_path, cifs_sb->local_nls,
+			     cifs_sb->mnt_cifs_flags &
+					CIFS_MOUNT_MAP_SPECIAL_CHR);
+	cifs_put_tlink(tlink);
 
-	return rc;
-}
+	if (rc)
+		goto posix_open_ret;
 
-int cifs_open(struct inode *inode, struct file *file)
-{
-	int rc = -EACCES;
-	int xid, oplock;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	struct cifsFileInfo *pCifsFile;
-	struct cifsInodeInfo *pCifsInode;
-	struct list_head *tmp;
-	char *full_path = NULL;
-	int desiredAccess;
-	int disposition;
-	__u16 netfid;
-	FILE_ALL_INFO *buf = NULL;
+	if (presp_data->Type == cpu_to_le32(-1))
+		goto posix_open_ret; /* open ok, caller does qpathinfo */
 
-	xid = GetXid();
+	if (!pinode)
+		goto posix_open_ret; /* caller does not need info */
 
-	cifs_sb = CIFS_SB(inode->i_sb);
-	pTcon = cifs_sb->tcon;
-
-	if (file->f_flags & O_CREAT) {
-		/* search inode for this file and fill in file->private_data */
-		pCifsInode = CIFS_I(file->f_dentry->d_inode);
-		read_lock(&GlobalSMBSeslock);
-		list_for_each(tmp, &pCifsInode->openFileList) {
-			pCifsFile = list_entry(tmp, struct cifsFileInfo,
-					       flist);
-			if ((pCifsFile->pfile == NULL) &&
-			    (pCifsFile->pid == current->tgid)) {
-				/* mode set in cifs_create */
-
-				/* needed for writepage */
-				pCifsFile->pfile = file;
-				
-				file->private_data = pCifsFile;
-				break;
-			}
-		}
-		read_unlock(&GlobalSMBSeslock);
-		if (file->private_data != NULL) {
-			rc = 0;
-			FreeXid(xid);
-			return rc;
-		} else {
-			if (file->f_flags & O_EXCL)
-				cERROR(1, ("could not find file instance for "
-					   "new file %p ", file));
+	cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
+
+	/* get new inode and set it up */
+	if (*pinode == NULL) {
+		cifs_fill_uniqueid(sb, &fattr);
+		*pinode = cifs_iget(sb, &fattr);
+		if (!*pinode) {
+			rc = -ENOMEM;
+			goto posix_open_ret;
 		}
+	} else {
+		cifs_fattr_to_inode(*pinode, &fattr);
 	}
 
-	down(&inode->i_sb->s_vfs_rename_sem);
-	full_path = build_path_from_dentry(file->f_dentry);
-	up(&inode->i_sb->s_vfs_rename_sem);
-	if (full_path == NULL) {
-		FreeXid(xid);
-		return -ENOMEM;
-	}
+posix_open_ret:
+	kfree(presp_data);
+	return rc;
+}
 
-	cFYI(1, (" inode = 0x%p file flags are 0x%x for %s",
-		 inode, file->f_flags, full_path));
-	desiredAccess = cifs_convert_flags(file->f_flags);
+static int
+cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
+	     struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
+	     struct cifs_fid *fid, unsigned int xid)
+{
+	int rc;
+	int desired_access;
+	int disposition;
+	int create_options = CREATE_NOT_DIR;
+	FILE_ALL_INFO *buf;
+	struct TCP_Server_Info *server = tcon->ses->server;
+	struct cifs_open_parms oparms;
+
+	if (!server->ops->open)
+		return -ENOSYS;
+
+	desired_access = cifs_convert_flags(f_flags);
 
 /*********************************************************************
  *  open flag mapping table:
- *  
+ *
  *	POSIX Flag            CIFS Disposition
- *	----------            ---------------- 
+ *	----------            ----------------
  *	O_CREAT               FILE_OPEN_IF
  *	O_CREAT | O_EXCL      FILE_CREATE
  *	O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
@@ -228,739 +202,1625 @@ int cifs_open(struct inode *inode, struct file *file)
  *	none of the above     FILE_OPEN
  *
  *	Note that there is not a direct match between disposition
- *	FILE_SUPERSEDE (ie create whether or not file exists although 
+ *	FILE_SUPERSEDE (ie create whether or not file exists although
  *	O_CREAT | O_TRUNC is similar but truncates the existing
  *	file rather than creating a new file as FILE_SUPERSEDE does
  *	(which uses the attributes / metadata passed in on open call)
  *?
- *?  O_SYNC is a reasonable match to CIFS writethrough flag  
+ *?  O_SYNC is a reasonable match to CIFS writethrough flag
  *?  and the read write flags match reasonably.  O_LARGEFILE
  *?  is irrelevant because largefile support is always used
  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
  *	 O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
  *********************************************************************/
 
-	disposition = cifs_get_disposition(file->f_flags);
+	disposition = cifs_get_disposition(f_flags);
 
-	if (oplockEnabled)
-		oplock = REQ_OPLOCK;
+	/* BB pass O_SYNC flag through on file attributes .. BB */
+
+	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	if (backup_cred(cifs_sb))
+		create_options |= CREATE_OPEN_BACKUP_INTENT;
+
+	oparms.tcon = tcon;
+	oparms.cifs_sb = cifs_sb;
+	oparms.desired_access = desired_access;
+	oparms.create_options = create_options;
+	oparms.disposition = disposition;
+	oparms.path = full_path;
+	oparms.fid = fid;
+	oparms.reconnect = false;
+
+	rc = server->ops->open(xid, &oparms, oplock, buf);
+
+	if (rc)
+		goto out;
+
+	if (tcon->unix_ext)
+		rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
+					      xid);
 	else
-		oplock = FALSE;
+		rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
+					 xid, fid);
 
-	/* BB pass O_SYNC flag through on file attributes .. BB */
+out:
+	kfree(buf);
+	return rc;
+}
 
-	/* Also refresh inode by passing in file_info buf returned by SMBOpen
-	   and calling get_inode_info with returned buf (at least helps
-	   non-Unix server case) */
+static bool
+cifs_has_mand_locks(struct cifsInodeInfo *cinode)
+{
+	struct cifs_fid_locks *cur;
+	bool has_locks = false;
 
-	/* BB we can not do this if this is the second open of a file 
-	   and the first handle has writebehind data, we might be 
-	   able to simply do a filemap_fdatawrite/filemap_fdatawait first */
-	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
-	if (!buf) {
+	down_read(&cinode->lock_sem);
+	list_for_each_entry(cur, &cinode->llist, llist) {
+		if (!list_empty(&cur->locks)) {
+			has_locks = true;
+			break;
+		}
+	}
+	up_read(&cinode->lock_sem);
+	return has_locks;
+}
+
+struct cifsFileInfo *
+cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
+		  struct tcon_link *tlink, __u32 oplock)
+{
+	struct dentry *dentry = file->f_path.dentry;
+	struct inode *inode = dentry->d_inode;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifsFileInfo *cfile;
+	struct cifs_fid_locks *fdlocks;
+	struct cifs_tcon *tcon = tlink_tcon(tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
+
+	cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
+	if (cfile == NULL)
+		return cfile;
+
+	fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
+	if (!fdlocks) {
+		kfree(cfile);
+		return NULL;
+	}
+
+	INIT_LIST_HEAD(&fdlocks->locks);
+	fdlocks->cfile = cfile;
+	cfile->llist = fdlocks;
+	down_write(&cinode->lock_sem);
+	list_add(&fdlocks->llist, &cinode->llist);
+	up_write(&cinode->lock_sem);
+
+	cfile->count = 1;
+	cfile->pid = current->tgid;
+	cfile->uid = current_fsuid();
+	cfile->dentry = dget(dentry);
+	cfile->f_flags = file->f_flags;
+	cfile->invalidHandle = false;
+	cfile->tlink = cifs_get_tlink(tlink);
+	INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
+	mutex_init(&cfile->fh_mutex);
+
+	cifs_sb_active(inode->i_sb);
+
+	/*
+	 * If the server returned a read oplock and we have mandatory brlocks,
+	 * set oplock level to None.
+	 */
+	if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
+		cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
+		oplock = 0;
+	}
+
+	spin_lock(&cifs_file_list_lock);
+	if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
+		oplock = fid->pending_open->oplock;
+	list_del(&fid->pending_open->olist);
+
+	fid->purge_cache = false;
+	server->ops->set_fid(cfile, fid, oplock);
+
+	list_add(&cfile->tlist, &tcon->openFileList);
+	/* if readable file instance put first in list*/
+	if (file->f_mode & FMODE_READ)
+		list_add(&cfile->flist, &cinode->openFileList);
+	else
+		list_add_tail(&cfile->flist, &cinode->openFileList);
+	spin_unlock(&cifs_file_list_lock);
+
+	if (fid->purge_cache)
+		cifs_zap_mapping(inode);
+
+	file->private_data = cfile;
+	return cfile;
+}
+
+struct cifsFileInfo *
+cifsFileInfo_get(struct cifsFileInfo *cifs_file)
+{
+	spin_lock(&cifs_file_list_lock);
+	cifsFileInfo_get_locked(cifs_file);
+	spin_unlock(&cifs_file_list_lock);
+	return cifs_file;
+}
+
+/*
+ * Release a reference on the file private data. This may involve closing
+ * the filehandle out on the server. Must be called without holding
+ * cifs_file_list_lock.
+ */
+void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
+{
+	struct inode *inode = cifs_file->dentry->d_inode;
+	struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
+	struct cifsInodeInfo *cifsi = CIFS_I(inode);
+	struct super_block *sb = inode->i_sb;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
+	struct cifsLockInfo *li, *tmp;
+	struct cifs_fid fid;
+	struct cifs_pending_open open;
+
+	spin_lock(&cifs_file_list_lock);
+	if (--cifs_file->count > 0) {
+		spin_unlock(&cifs_file_list_lock);
+		return;
+	}
+
+	if (server->ops->get_lease_key)
+		server->ops->get_lease_key(inode, &fid);
+
+	/* store open in pending opens to make sure we don't miss lease break */
+	cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
+
+	/* remove it from the lists */
+	list_del(&cifs_file->flist);
+	list_del(&cifs_file->tlist);
+
+	if (list_empty(&cifsi->openFileList)) {
+		cifs_dbg(FYI, "closing last open instance for inode %p\n",
+			 cifs_file->dentry->d_inode);
+		/*
+		 * In strict cache mode we need invalidate mapping on the last
+		 * close  because it may cause a error when we open this file
+		 * again and get at least level II oplock.
+		 */
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
+			set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
+		cifs_set_oplock_level(cifsi, 0);
+	}
+	spin_unlock(&cifs_file_list_lock);
+
+	cancel_work_sync(&cifs_file->oplock_break);
+
+	if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
+		struct TCP_Server_Info *server = tcon->ses->server;
+		unsigned int xid;
+
+		xid = get_xid();
+		if (server->ops->close)
+			server->ops->close(xid, tcon, &cifs_file->fid);
+		_free_xid(xid);
+	}
+
+	cifs_del_pending_open(&open);
+
+	/*
+	 * Delete any outstanding lock records. We'll lose them when the file
+	 * is closed anyway.
+	 */
+	down_write(&cifsi->lock_sem);
+	list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
+		list_del(&li->llist);
+		cifs_del_lock_waiters(li);
+		kfree(li);
+	}
+	list_del(&cifs_file->llist->llist);
+	kfree(cifs_file->llist);
+	up_write(&cifsi->lock_sem);
+
+	cifs_put_tlink(cifs_file->tlink);
+	dput(cifs_file->dentry);
+	cifs_sb_deactive(sb);
+	kfree(cifs_file);
+}
+
+int cifs_open(struct inode *inode, struct file *file)
+
+{
+	int rc = -EACCES;
+	unsigned int xid;
+	__u32 oplock;
+	struct cifs_sb_info *cifs_sb;
+	struct TCP_Server_Info *server;
+	struct cifs_tcon *tcon;
+	struct tcon_link *tlink;
+	struct cifsFileInfo *cfile = NULL;
+	char *full_path = NULL;
+	bool posix_open_ok = false;
+	struct cifs_fid fid;
+	struct cifs_pending_open open;
+
+	xid = get_xid();
+
+	cifs_sb = CIFS_SB(inode->i_sb);
+	tlink = cifs_sb_tlink(cifs_sb);
+	if (IS_ERR(tlink)) {
+		free_xid(xid);
+		return PTR_ERR(tlink);
+	}
+	tcon = tlink_tcon(tlink);
+	server = tcon->ses->server;
+
+	full_path = build_path_from_dentry(file->f_path.dentry);
+	if (full_path == NULL) {
 		rc = -ENOMEM;
 		goto out;
 	}
-	rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess,
-			 CREATE_NOT_DIR, &netfid, &oplock, buf,
-			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
-				 & CIFS_MOUNT_MAP_SPECIAL_CHR);
-	if (rc == -EIO) {
-		/* Old server, try legacy style OpenX */
-		rc = SMBLegacyOpen(xid, pTcon, full_path, disposition,
-			desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
-			cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
-				& CIFS_MOUNT_MAP_SPECIAL_CHR);
+
+	cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
+		 inode, file->f_flags, full_path);
+
+	if (server->oplocks)
+		oplock = REQ_OPLOCK;
+	else
+		oplock = 0;
+
+	if (!tcon->broken_posix_open && tcon->unix_ext &&
+	    cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+		/* can not refresh inode info since size could be stale */
+		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
+				cifs_sb->mnt_file_mode /* ignored */,
+				file->f_flags, &oplock, &fid.netfid, xid);
+		if (rc == 0) {
+			cifs_dbg(FYI, "posix open succeeded\n");
+			posix_open_ok = true;
+		} else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
+			if (tcon->ses->serverNOS)
+				cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
+					 tcon->ses->serverName,
+					 tcon->ses->serverNOS);
+			tcon->broken_posix_open = true;
+		} else if ((rc != -EIO) && (rc != -EREMOTE) &&
+			 (rc != -EOPNOTSUPP)) /* path not found or net err */
+			goto out;
+		/*
+		 * Else fallthrough to retry open the old way on network i/o
+		 * or DFS errors.
+		 */
 	}
-	if (rc) {
-		cFYI(1, ("cifs_open returned 0x%x ", rc));
-		goto out;
+
+	if (server->ops->get_lease_key)
+		server->ops->get_lease_key(inode, &fid);
+
+	cifs_add_pending_open(&fid, tlink, &open);
+
+	if (!posix_open_ok) {
+		if (server->ops->get_lease_key)
+			server->ops->get_lease_key(inode, &fid);
+
+		rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
+				  file->f_flags, &oplock, &fid, xid);
+		if (rc) {
+			cifs_del_pending_open(&open);
+			goto out;
+		}
 	}
-	file->private_data =
-		kmalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
-	if (file->private_data == NULL) {
+
+	cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
+	if (cfile == NULL) {
+		if (server->ops->close)
+			server->ops->close(xid, tcon, &fid);
+		cifs_del_pending_open(&open);
 		rc = -ENOMEM;
 		goto out;
 	}
-	pCifsFile = cifs_init_private(file->private_data, inode, file, netfid);
-	write_lock(&file->f_owner.lock);
-	write_lock(&GlobalSMBSeslock);
-	list_add(&pCifsFile->tlist, &pTcon->openFileList);
 
-	pCifsInode = CIFS_I(file->f_dentry->d_inode);
-	if (pCifsInode) {
-		rc = cifs_open_inode_helper(inode, file, pCifsInode,
-					    pCifsFile, pTcon,
-					    &oplock, buf, full_path, xid);
-	} else {
-		write_unlock(&GlobalSMBSeslock);
-		write_unlock(&file->f_owner.lock);
-	}
-
-	if (oplock & CIFS_CREATE_ACTION) {           
-		/* time to set mode which we can not set earlier due to
-		   problems creating new read-only files */
-		if (cifs_sb->tcon->ses->capabilities & CAP_UNIX) {
-			CIFSSMBUnixSetPerms(xid, pTcon, full_path,
-					    inode->i_mode,
-					    (__u64)-1, (__u64)-1, 0 /* dev */,
-					    cifs_sb->local_nls,
-					    cifs_sb->mnt_cifs_flags & 
-						CIFS_MOUNT_MAP_SPECIAL_CHR);
-		} else {
-			/* BB implement via Windows security descriptors eg
-			   CIFSSMBWinSetPerms(xid, pTcon, full_path, mode,
-					      -1, -1, local_nls);
-			   in the meantime could set r/o dos attribute when
-			   perms are eg: mode & 0222 == 0 */
-		}
+	cifs_fscache_set_inode_cookie(inode, file);
+
+	if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
+		/*
+		 * Time to set mode which we can not set earlier due to
+		 * problems creating new read-only files.
+		 */
+		struct cifs_unix_set_info_args args = {
+			.mode	= inode->i_mode,
+			.uid	= INVALID_UID, /* no change */
+			.gid	= INVALID_GID, /* no change */
+			.ctime	= NO_CHANGE_64,
+			.atime	= NO_CHANGE_64,
+			.mtime	= NO_CHANGE_64,
+			.device	= 0,
+		};
+		CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
+				       cfile->pid);
 	}
 
 out:
-	kfree(buf);
 	kfree(full_path);
-	FreeXid(xid);
+	free_xid(xid);
+	cifs_put_tlink(tlink);
 	return rc;
 }
 
-/* Try to reaquire byte range locks that were released when session */
-/* to server was lost */
-static int cifs_relock_file(struct cifsFileInfo *cifsFile)
+static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
+
+/*
+ * Try to reacquire byte range locks that were released when session
+ * to server was lost.
+ */
+static int
+cifs_relock_file(struct cifsFileInfo *cfile)
 {
+	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
 	int rc = 0;
 
-/* BB list all locks open on this file and relock */
+	down_read(&cinode->lock_sem);
+	if (cinode->can_cache_brlcks) {
+		/* can cache locks - no need to relock */
+		up_read(&cinode->lock_sem);
+		return rc;
+	}
+
+	if (cap_unix(tcon->ses) &&
+	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+		rc = cifs_push_posix_locks(cfile);
+	else
+		rc = tcon->ses->server->ops->push_mand_locks(cfile);
 
+	up_read(&cinode->lock_sem);
 	return rc;
 }
 
-static int cifs_reopen_file(struct inode *inode, struct file *file, 
-	int can_flush)
+static int
+cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 {
 	int rc = -EACCES;
-	int xid, oplock;
+	unsigned int xid;
+	__u32 oplock;
 	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	struct cifsFileInfo *pCifsFile;
-	struct cifsInodeInfo *pCifsInode;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	struct cifsInodeInfo *cinode;
+	struct inode *inode;
 	char *full_path = NULL;
-	int desiredAccess;
+	int desired_access;
 	int disposition = FILE_OPEN;
-	__u16 netfid;
-
-	if (inode == NULL)
-		return -EBADF;
-	if (file->private_data) {
-		pCifsFile = (struct cifsFileInfo *)file->private_data;
-	} else
-		return -EBADF;
-
-	xid = GetXid();
-	down(&pCifsFile->fh_sem);
-	if (pCifsFile->invalidHandle == FALSE) {
-		up(&pCifsFile->fh_sem);
-		FreeXid(xid);
-		return 0;
+	int create_options = CREATE_NOT_DIR;
+	struct cifs_open_parms oparms;
+
+	xid = get_xid();
+	mutex_lock(&cfile->fh_mutex);
+	if (!cfile->invalidHandle) {
+		mutex_unlock(&cfile->fh_mutex);
+		rc = 0;
+		free_xid(xid);
+		return rc;
 	}
 
-	if (file->f_dentry == NULL) {
-		up(&pCifsFile->fh_sem);
-		cFYI(1, ("failed file reopen, no valid name if dentry freed"));
-		FreeXid(xid);
-		return -EBADF;
-	}
+	inode = cfile->dentry->d_inode;
 	cifs_sb = CIFS_SB(inode->i_sb);
-	pTcon = cifs_sb->tcon;
-/* can not grab rename sem here because various ops, including
-   those that already have the rename sem can end up causing writepage
-   to get called and if the server was down that means we end up here,
-   and we can never tell if the caller already has the rename_sem */
-	full_path = build_path_from_dentry(file->f_dentry);
+	tcon = tlink_tcon(cfile->tlink);
+	server = tcon->ses->server;
+
+	/*
+	 * Can not grab rename sem here because various ops, including those
+	 * that already have the rename sem can end up causing writepage to get
+	 * called and if the server was down that means we end up here, and we
+	 * can never tell if the caller already has the rename_sem.
+	 */
+	full_path = build_path_from_dentry(cfile->dentry);
 	if (full_path == NULL) {
-		up(&pCifsFile->fh_sem);
-		FreeXid(xid);
-		return -ENOMEM;
+		rc = -ENOMEM;
+		mutex_unlock(&cfile->fh_mutex);
+		free_xid(xid);
+		return rc;
 	}
 
-	cFYI(1, (" inode = 0x%p file flags are 0x%x for %s",
-		 inode, file->f_flags,full_path));
-	desiredAccess = cifs_convert_flags(file->f_flags);
+	cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
+		 inode, cfile->f_flags, full_path);
 
-	if (oplockEnabled)
+	if (tcon->ses->server->oplocks)
 		oplock = REQ_OPLOCK;
 	else
-		oplock = FALSE;
-
-	/* Can not refresh inode by passing in file_info buf to be returned
-	   by SMBOpen and then calling get_inode_info with returned buf 
-	   since file might have write behind data that needs to be flushed 
-	   and server version of file size can be stale. If we knew for sure
-	   that inode was not dirty locally we could do this */
-
-/*	buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
-	if (buf == 0) {
-		up(&pCifsFile->fh_sem);
-		kfree(full_path);
-		FreeXid(xid);
-		return -ENOMEM;
-	} */
-	rc = CIFSSMBOpen(xid, pTcon, full_path, disposition, desiredAccess,
-			 CREATE_NOT_DIR, &netfid, &oplock, NULL,
-			 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags & 
-				CIFS_MOUNT_MAP_SPECIAL_CHR);
-	if (rc) {
-		up(&pCifsFile->fh_sem);
-		cFYI(1, ("cifs_open returned 0x%x ", rc));
-		cFYI(1, ("oplock: %d ", oplock));
-	} else {
-		pCifsFile->netfid = netfid;
-		pCifsFile->invalidHandle = FALSE;
-		up(&pCifsFile->fh_sem);
-		pCifsInode = CIFS_I(inode);
-		if (pCifsInode) {
-			if (can_flush) {
-				filemap_fdatawrite(inode->i_mapping);
-				filemap_fdatawait(inode->i_mapping);
-			/* temporarily disable caching while we
-			   go to server to get inode info */
-				pCifsInode->clientCanCacheAll = FALSE;
-				pCifsInode->clientCanCacheRead = FALSE;
-				if (pTcon->ses->capabilities & CAP_UNIX)
-					rc = cifs_get_inode_info_unix(&inode,
-						full_path, inode->i_sb, xid);
-				else
-					rc = cifs_get_inode_info(&inode,
-						full_path, NULL, inode->i_sb,
-						xid);
-			} /* else we are writing out data to server already
-			     and could deadlock if we tried to flush data, and
-			     since we do not know if we have data that would
-			     invalidate the current end of file on the server
-			     we can not go to the server to get the new inod
-			     info */
-			if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
-				pCifsInode->clientCanCacheAll = TRUE;
-				pCifsInode->clientCanCacheRead = TRUE;
-				cFYI(1, ("Exclusive Oplock granted on inode %p",
-					 file->f_dentry->d_inode));
-			} else if ((oplock & 0xF) == OPLOCK_READ) {
-				pCifsInode->clientCanCacheRead = TRUE;
-				pCifsInode->clientCanCacheAll = FALSE;
-			} else {
-				pCifsInode->clientCanCacheRead = FALSE;
-				pCifsInode->clientCanCacheAll = FALSE;
-			}
-			cifs_relock_file(pCifsFile);
+		oplock = 0;
+
+	if (tcon->unix_ext && cap_unix(tcon->ses) &&
+	    (CIFS_UNIX_POSIX_PATH_OPS_CAP &
+				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
+		/*
+		 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
+		 * original open. Must mask them off for a reopen.
+		 */
+		unsigned int oflags = cfile->f_flags &
+						~(O_CREAT | O_EXCL | O_TRUNC);
+
+		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
+				     cifs_sb->mnt_file_mode /* ignored */,
+				     oflags, &oplock, &cfile->fid.netfid, xid);
+		if (rc == 0) {
+			cifs_dbg(FYI, "posix reopen succeeded\n");
+			oparms.reconnect = true;
+			goto reopen_success;
 		}
+		/*
+		 * fallthrough to retry open the old way on errors, especially
+		 * in the reconnect path it is important to retry hard
+		 */
 	}
 
+	desired_access = cifs_convert_flags(cfile->f_flags);
+
+	if (backup_cred(cifs_sb))
+		create_options |= CREATE_OPEN_BACKUP_INTENT;
+
+	if (server->ops->get_lease_key)
+		server->ops->get_lease_key(inode, &cfile->fid);
+
+	oparms.tcon = tcon;
+	oparms.cifs_sb = cifs_sb;
+	oparms.desired_access = desired_access;
+	oparms.create_options = create_options;
+	oparms.disposition = disposition;
+	oparms.path = full_path;
+	oparms.fid = &cfile->fid;
+	oparms.reconnect = true;
+
+	/*
+	 * Can not refresh inode by passing in file_info buf to be returned by
+	 * ops->open and then calling get_inode_info with returned buf since
+	 * file might have write behind data that needs to be flushed and server
+	 * version of file size can be stale. If we knew for sure that inode was
+	 * not dirty locally we could do this.
+	 */
+	rc = server->ops->open(xid, &oparms, &oplock, NULL);
+	if (rc == -ENOENT && oparms.reconnect == false) {
+		/* durable handle timeout is expired - open the file again */
+		rc = server->ops->open(xid, &oparms, &oplock, NULL);
+		/* indicate that we need to relock the file */
+		oparms.reconnect = true;
+	}
+
+	if (rc) {
+		mutex_unlock(&cfile->fh_mutex);
+		cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
+		cifs_dbg(FYI, "oplock: %d\n", oplock);
+		goto reopen_error_exit;
+	}
+
+reopen_success:
+	cfile->invalidHandle = false;
+	mutex_unlock(&cfile->fh_mutex);
+	cinode = CIFS_I(inode);
+
+	if (can_flush) {
+		rc = filemap_write_and_wait(inode->i_mapping);
+		mapping_set_error(inode->i_mapping, rc);
+
+		if (tcon->unix_ext)
+			rc = cifs_get_inode_info_unix(&inode, full_path,
+						      inode->i_sb, xid);
+		else
+			rc = cifs_get_inode_info(&inode, full_path, NULL,
+						 inode->i_sb, xid, NULL);
+	}
+	/*
+	 * Else we are writing out data to server already and could deadlock if
+	 * we tried to flush data, and since we do not know if we have data that
+	 * would invalidate the current end of file on the server we can not go
+	 * to the server to get the new inode info.
+	 */
+
+	server->ops->set_fid(cfile, &cfile->fid, oplock);
+	if (oparms.reconnect)
+		cifs_relock_file(cfile);
+
+reopen_error_exit:
 	kfree(full_path);
-	FreeXid(xid);
+	free_xid(xid);
 	return rc;
 }
 
 int cifs_close(struct inode *inode, struct file *file)
 {
+	if (file->private_data != NULL) {
+		cifsFileInfo_put(file->private_data);
+		file->private_data = NULL;
+	}
+
+	/* return code from the ->release op is always ignored */
+	return 0;
+}
+
+int cifs_closedir(struct inode *inode, struct file *file)
+{
 	int rc = 0;
-	int xid;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	struct cifsFileInfo *pSMBFile =
-		(struct cifsFileInfo *)file->private_data;
+	unsigned int xid;
+	struct cifsFileInfo *cfile = file->private_data;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	char *buf;
 
-	xid = GetXid();
+	cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
 
-	cifs_sb = CIFS_SB(inode->i_sb);
-	pTcon = cifs_sb->tcon;
-	if (pSMBFile) {
-		pSMBFile->closePend = TRUE;
-		write_lock(&file->f_owner.lock);
-		if (pTcon) {
-			/* no sense reconnecting to close a file that is
-			   already closed */
-			if (pTcon->tidStatus != CifsNeedReconnect) {
-				int timeout = 2;
-				while((atomic_read(&pSMBFile->wrtPending) != 0)
-					 && (timeout < 1000) ) {
-					/* Give write a better chance to get to
-					server ahead of the close.  We do not
-					want to add a wait_q here as it would
-					increase the memory utilization as
-					the struct would be in each open file,
-					but this should give enough time to 
-					clear the socket */
-					cERROR(1,("close with pending writes"));
-					msleep(timeout);
-					timeout *= 4;
-				} 
-				write_unlock(&file->f_owner.lock);
-				rc = CIFSSMBClose(xid, pTcon,
-						  pSMBFile->netfid);
-				write_lock(&file->f_owner.lock);
-			}
-		}
-		write_lock(&GlobalSMBSeslock);
-		list_del(&pSMBFile->flist);
-		list_del(&pSMBFile->tlist);
-		write_unlock(&GlobalSMBSeslock);
-		write_unlock(&file->f_owner.lock);
-		kfree(pSMBFile->search_resume_name);
-		kfree(file->private_data);
-		file->private_data = NULL;
+	if (cfile == NULL)
+		return rc;
+
+	xid = get_xid();
+	tcon = tlink_tcon(cfile->tlink);
+	server = tcon->ses->server;
+
+	cifs_dbg(FYI, "Freeing private data in close dir\n");
+	spin_lock(&cifs_file_list_lock);
+	if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
+		cfile->invalidHandle = true;
+		spin_unlock(&cifs_file_list_lock);
+		if (server->ops->close_dir)
+			rc = server->ops->close_dir(xid, tcon, &cfile->fid);
+		else
+			rc = -ENOSYS;
+		cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
+		/* not much we can do if it fails anyway, ignore rc */
+		rc = 0;
 	} else
-		rc = -EBADF;
+		spin_unlock(&cifs_file_list_lock);
+
+	buf = cfile->srch_inf.ntwrk_buf_start;
+	if (buf) {
+		cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
+		cfile->srch_inf.ntwrk_buf_start = NULL;
+		if (cfile->srch_inf.smallBuf)
+			cifs_small_buf_release(buf);
+		else
+			cifs_buf_release(buf);
+	}
+
+	cifs_put_tlink(cfile->tlink);
+	kfree(file->private_data);
+	file->private_data = NULL;
+	/* BB can we lock the filestruct while this is going on? */
+	free_xid(xid);
+	return rc;
+}
+
+static struct cifsLockInfo *
+cifs_lock_init(__u64 offset, __u64 length, __u8 type)
+{
+	struct cifsLockInfo *lock =
+		kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
+	if (!lock)
+		return lock;
+	lock->offset = offset;
+	lock->length = length;
+	lock->type = type;
+	lock->pid = current->tgid;
+	INIT_LIST_HEAD(&lock->blist);
+	init_waitqueue_head(&lock->block_q);
+	return lock;
+}
 
-	if (list_empty(&(CIFS_I(inode)->openFileList))) {
-		cFYI(1, ("closing last open instance for inode %p", inode));
-		/* if the file is not open we do not know if we can cache info
-		   on this inode, much less write behind and read ahead */
-		CIFS_I(inode)->clientCanCacheRead = FALSE;
-		CIFS_I(inode)->clientCanCacheAll  = FALSE;
+void
+cifs_del_lock_waiters(struct cifsLockInfo *lock)
+{
+	struct cifsLockInfo *li, *tmp;
+	list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
+		list_del_init(&li->blist);
+		wake_up(&li->block_q);
 	}
-	if ((rc ==0) && CIFS_I(inode)->write_behind_rc)
-		rc = CIFS_I(inode)->write_behind_rc;
-	FreeXid(xid);
+}
+
+#define CIFS_LOCK_OP	0
+#define CIFS_READ_OP	1
+#define CIFS_WRITE_OP	2
+
+/* @rw_check : 0 - no op, 1 - read, 2 - write */
+static bool
+cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
+			    __u64 length, __u8 type, struct cifsFileInfo *cfile,
+			    struct cifsLockInfo **conf_lock, int rw_check)
+{
+	struct cifsLockInfo *li;
+	struct cifsFileInfo *cur_cfile = fdlocks->cfile;
+	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
+
+	list_for_each_entry(li, &fdlocks->locks, llist) {
+		if (offset + length <= li->offset ||
+		    offset >= li->offset + li->length)
+			continue;
+		if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
+		    server->ops->compare_fids(cfile, cur_cfile)) {
+			/* shared lock prevents write op through the same fid */
+			if (!(li->type & server->vals->shared_lock_type) ||
+			    rw_check != CIFS_WRITE_OP)
+				continue;
+		}
+		if ((type & server->vals->shared_lock_type) &&
+		    ((server->ops->compare_fids(cfile, cur_cfile) &&
+		     current->tgid == li->pid) || type == li->type))
+			continue;
+		if (conf_lock)
+			*conf_lock = li;
+		return true;
+	}
+	return false;
+}
+
+bool
+cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
+			__u8 type, struct cifsLockInfo **conf_lock,
+			int rw_check)
+{
+	bool rc = false;
+	struct cifs_fid_locks *cur;
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+
+	list_for_each_entry(cur, &cinode->llist, llist) {
+		rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
+						 cfile, conf_lock, rw_check);
+		if (rc)
+			break;
+	}
+
 	return rc;
 }
 
-int cifs_closedir(struct inode *inode, struct file *file)
+/*
+ * Check if there is another lock that prevents us to set the lock (mandatory
+ * style). If such a lock exists, update the flock structure with its
+ * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
+ * or leave it the same if we can't. Returns 0 if we don't need to request to
+ * the server or 1 otherwise.
+ */
+static int
+cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
+	       __u8 type, struct file_lock *flock)
 {
 	int rc = 0;
-	int xid;
-	struct cifsFileInfo *pCFileStruct =
-	    (struct cifsFileInfo *)file->private_data;
-	char *ptmp;
+	struct cifsLockInfo *conf_lock;
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
+	bool exist;
+
+	down_read(&cinode->lock_sem);
+
+	exist = cifs_find_lock_conflict(cfile, offset, length, type,
+					&conf_lock, CIFS_LOCK_OP);
+	if (exist) {
+		flock->fl_start = conf_lock->offset;
+		flock->fl_end = conf_lock->offset + conf_lock->length - 1;
+		flock->fl_pid = conf_lock->pid;
+		if (conf_lock->type & server->vals->shared_lock_type)
+			flock->fl_type = F_RDLCK;
+		else
+			flock->fl_type = F_WRLCK;
+	} else if (!cinode->can_cache_brlcks)
+		rc = 1;
+	else
+		flock->fl_type = F_UNLCK;
 
-	cFYI(1, ("Closedir inode = 0x%p with ", inode));
+	up_read(&cinode->lock_sem);
+	return rc;
+}
 
-	xid = GetXid();
+static void
+cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
+{
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	down_write(&cinode->lock_sem);
+	list_add_tail(&lock->llist, &cfile->llist->locks);
+	up_write(&cinode->lock_sem);
+}
 
-	if (pCFileStruct) {
-		struct cifsTconInfo *pTcon;
-		struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_dentry->d_sb);
+/*
+ * Set the byte-range lock (mandatory style). Returns:
+ * 1) 0, if we set the lock and don't need to request to the server;
+ * 2) 1, if no locks prevent us but we need to request to the server;
+ * 3) -EACCESS, if there is a lock that prevents us and wait is false.
+ */
+static int
+cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
+		 bool wait)
+{
+	struct cifsLockInfo *conf_lock;
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	bool exist;
+	int rc = 0;
 
-		pTcon = cifs_sb->tcon;
+try_again:
+	exist = false;
+	down_write(&cinode->lock_sem);
 
-		cFYI(1, ("Freeing private data in close dir"));
-		if ((pCFileStruct->srch_inf.endOfSearch == FALSE) &&
-		   (pCFileStruct->invalidHandle == FALSE)) {
-			pCFileStruct->invalidHandle = TRUE;
-			rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
-			cFYI(1, ("Closing uncompleted readdir with rc %d",
-				 rc));
-			/* not much we can do if it fails anyway, ignore rc */
-			rc = 0;
-		}
-		ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
-		if (ptmp) {
-   /* BB removeme BB */	cFYI(1, ("freeing smb buf in srch struct in closedir"));
-			pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
-			cifs_buf_release(ptmp);
+	exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
+					lock->type, &conf_lock, CIFS_LOCK_OP);
+	if (!exist && cinode->can_cache_brlcks) {
+		list_add_tail(&lock->llist, &cfile->llist->locks);
+		up_write(&cinode->lock_sem);
+		return rc;
+	}
+
+	if (!exist)
+		rc = 1;
+	else if (!wait)
+		rc = -EACCES;
+	else {
+		list_add_tail(&lock->blist, &conf_lock->blist);
+		up_write(&cinode->lock_sem);
+		rc = wait_event_interruptible(lock->block_q,
+					(lock->blist.prev == &lock->blist) &&
+					(lock->blist.next == &lock->blist));
+		if (!rc)
+			goto try_again;
+		down_write(&cinode->lock_sem);
+		list_del_init(&lock->blist);
+	}
+
+	up_write(&cinode->lock_sem);
+	return rc;
+}
+
+/*
+ * Check if there is another lock that prevents us to set the lock (posix
+ * style). If such a lock exists, update the flock structure with its
+ * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
+ * or leave it the same if we can't. Returns 0 if we don't need to request to
+ * the server or 1 otherwise.
+ */
+static int
+cifs_posix_lock_test(struct file *file, struct file_lock *flock)
+{
+	int rc = 0;
+	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
+	unsigned char saved_type = flock->fl_type;
+
+	if ((flock->fl_flags & FL_POSIX) == 0)
+		return 1;
+
+	down_read(&cinode->lock_sem);
+	posix_test_lock(file, flock);
+
+	if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
+		flock->fl_type = saved_type;
+		rc = 1;
+	}
+
+	up_read(&cinode->lock_sem);
+	return rc;
+}
+
+/*
+ * Set the byte-range lock (posix style). Returns:
+ * 1) 0, if we set the lock and don't need to request to the server;
+ * 2) 1, if we need to request to the server;
+ * 3) <0, if the error occurs while setting the lock.
+ */
+static int
+cifs_posix_lock_set(struct file *file, struct file_lock *flock)
+{
+	struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
+	int rc = 1;
+
+	if ((flock->fl_flags & FL_POSIX) == 0)
+		return rc;
+
+try_again:
+	down_write(&cinode->lock_sem);
+	if (!cinode->can_cache_brlcks) {
+		up_write(&cinode->lock_sem);
+		return rc;
+	}
+
+	rc = posix_lock_file(file, flock, NULL);
+	up_write(&cinode->lock_sem);
+	if (rc == FILE_LOCK_DEFERRED) {
+		rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
+		if (!rc)
+			goto try_again;
+		posix_unblock_lock(flock);
+	}
+	return rc;
+}
+
+int
+cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
+{
+	unsigned int xid;
+	int rc = 0, stored_rc;
+	struct cifsLockInfo *li, *tmp;
+	struct cifs_tcon *tcon;
+	unsigned int num, max_num, max_buf;
+	LOCKING_ANDX_RANGE *buf, *cur;
+	int types[] = {LOCKING_ANDX_LARGE_FILES,
+		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+	int i;
+
+	xid = get_xid();
+	tcon = tlink_tcon(cfile->tlink);
+
+	/*
+	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
+	 * and check it for zero before using.
+	 */
+	max_buf = tcon->ses->server->maxBuf;
+	if (!max_buf) {
+		free_xid(xid);
+		return -EINVAL;
+	}
+
+	max_num = (max_buf - sizeof(struct smb_hdr)) /
+						sizeof(LOCKING_ANDX_RANGE);
+	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	if (!buf) {
+		free_xid(xid);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < 2; i++) {
+		cur = buf;
+		num = 0;
+		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
+			if (li->type != types[i])
+				continue;
+			cur->Pid = cpu_to_le16(li->pid);
+			cur->LengthLow = cpu_to_le32((u32)li->length);
+			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
+			cur->OffsetLow = cpu_to_le32((u32)li->offset);
+			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
+			if (++num == max_num) {
+				stored_rc = cifs_lockv(xid, tcon,
+						       cfile->fid.netfid,
+						       (__u8)li->type, 0, num,
+						       buf);
+				if (stored_rc)
+					rc = stored_rc;
+				cur = buf;
+				num = 0;
+			} else
+				cur++;
 		}
-		ptmp = pCFileStruct->search_resume_name;
-		if (ptmp) {
-   /* BB removeme BB */	cFYI(1, ("freeing resume name in closedir"));
-			pCFileStruct->search_resume_name = NULL;
-			kfree(ptmp);
+
+		if (num) {
+			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
+					       (__u8)types[i], 0, num, buf);
+			if (stored_rc)
+				rc = stored_rc;
 		}
-		kfree(file->private_data);
-		file->private_data = NULL;
 	}
-	/* BB can we lock the filestruct while this is going on? */
-	FreeXid(xid);
+
+	kfree(buf);
+	free_xid(xid);
 	return rc;
 }
 
-int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
-{
-	int rc, xid;
-	__u32 lockType = LOCKING_ANDX_LARGE_FILES;
-	__u32 numLock = 0;
-	__u32 numUnlock = 0;
+/* copied from fs/locks.c with a name change */
+#define cifs_for_each_lock(inode, lockp) \
+	for (lockp = &inode->i_flock; *lockp != NULL; \
+	     lockp = &(*lockp)->fl_next)
+
+struct lock_to_push {
+	struct list_head llist;
+	__u64 offset;
 	__u64 length;
-	int wait_flag = FALSE;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
+	__u32 pid;
+	__u16 netfid;
+	__u8 type;
+};
 
-	length = 1 + pfLock->fl_end - pfLock->fl_start;
-	rc = -EACCES;
-	xid = GetXid();
-
-	cFYI(1, ("Lock parm: 0x%x flockflags: "
-		 "0x%x flocktype: 0x%x start: %lld end: %lld",
-	        cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
-	        pfLock->fl_end));
-
-	if (pfLock->fl_flags & FL_POSIX)
-		cFYI(1, ("Posix "));
-	if (pfLock->fl_flags & FL_FLOCK)
-		cFYI(1, ("Flock "));
-	if (pfLock->fl_flags & FL_SLEEP) {
-		cFYI(1, ("Blocking lock "));
-		wait_flag = TRUE;
-	}
-	if (pfLock->fl_flags & FL_ACCESS)
-		cFYI(1, ("Process suspended by mandatory locking - "
-			 "not implemented yet "));
-	if (pfLock->fl_flags & FL_LEASE)
-		cFYI(1, ("Lease on file - not implemented yet"));
-	if (pfLock->fl_flags & 
-	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
-		cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags));
-
-	if (pfLock->fl_type == F_WRLCK) {
-		cFYI(1, ("F_WRLCK "));
-		numLock = 1;
-	} else if (pfLock->fl_type == F_UNLCK) {
-		cFYI(1, ("F_UNLCK "));
-		numUnlock = 1;
-	} else if (pfLock->fl_type == F_RDLCK) {
-		cFYI(1, ("F_RDLCK "));
-		lockType |= LOCKING_ANDX_SHARED_LOCK;
-		numLock = 1;
-	} else if (pfLock->fl_type == F_EXLCK) {
-		cFYI(1, ("F_EXLCK "));
-		numLock = 1;
-	} else if (pfLock->fl_type == F_SHLCK) {
-		cFYI(1, ("F_SHLCK "));
-		lockType |= LOCKING_ANDX_SHARED_LOCK;
-		numLock = 1;
-	} else
-		cFYI(1, ("Unknown type of lock "));
+static int
+cifs_push_posix_locks(struct cifsFileInfo *cfile)
+{
+	struct inode *inode = cfile->dentry->d_inode;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	struct file_lock *flock, **before;
+	unsigned int count = 0, i = 0;
+	int rc = 0, xid, type;
+	struct list_head locks_to_send, *el;
+	struct lock_to_push *lck, *tmp;
+	__u64 length;
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
-	pTcon = cifs_sb->tcon;
+	xid = get_xid();
 
-	if (file->private_data == NULL) {
-		FreeXid(xid);
-		return -EBADF;
+	spin_lock(&inode->i_lock);
+	cifs_for_each_lock(inode, before) {
+		if ((*before)->fl_flags & FL_POSIX)
+			count++;
 	}
+	spin_unlock(&inode->i_lock);
 
-	if (IS_GETLK(cmd)) {
-		rc = CIFSSMBLock(xid, pTcon,
-				 ((struct cifsFileInfo *)file->
-				  private_data)->netfid,
-				 length,
-				 pfLock->fl_start, 0, 1, lockType,
-				 0 /* wait flag */ );
-		if (rc == 0) {
-			rc = CIFSSMBLock(xid, pTcon,
-					 ((struct cifsFileInfo *) file->
-					  private_data)->netfid,
-					 length,
-					 pfLock->fl_start, 1 /* numUnlock */ ,
-					 0 /* numLock */ , lockType,
-					 0 /* wait flag */ );
-			pfLock->fl_type = F_UNLCK;
-			if (rc != 0)
-				cERROR(1, ("Error unlocking previously locked "
-					   "range %d during test of lock ",
-					   rc));
-			rc = 0;
+	INIT_LIST_HEAD(&locks_to_send);
 
-		} else {
-			/* if rc == ERR_SHARING_VIOLATION ? */
-			rc = 0;	/* do not change lock type to unlock
-				   since range in use */
+	/*
+	 * Allocating count locks is enough because no FL_POSIX locks can be
+	 * added to the list while we are holding cinode->lock_sem that
+	 * protects locking operations of this inode.
+	 */
+	for (; i < count; i++) {
+		lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
+		if (!lck) {
+			rc = -ENOMEM;
+			goto err_out;
 		}
+		list_add_tail(&lck->llist, &locks_to_send);
+	}
+
+	el = locks_to_send.next;
+	spin_lock(&inode->i_lock);
+	cifs_for_each_lock(inode, before) {
+		flock = *before;
+		if ((flock->fl_flags & FL_POSIX) == 0)
+			continue;
+		if (el == &locks_to_send) {
+			/*
+			 * The list ended. We don't have enough allocated
+			 * structures - something is really wrong.
+			 */
+			cifs_dbg(VFS, "Can't push all brlocks!\n");
+			break;
+		}
+		length = 1 + flock->fl_end - flock->fl_start;
+		if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
+			type = CIFS_RDLCK;
+		else
+			type = CIFS_WRLCK;
+		lck = list_entry(el, struct lock_to_push, llist);
+		lck->pid = flock->fl_pid;
+		lck->netfid = cfile->fid.netfid;
+		lck->length = length;
+		lck->type = type;
+		lck->offset = flock->fl_start;
+		el = el->next;
+	}
+	spin_unlock(&inode->i_lock);
+
+	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
+		int stored_rc;
+
+		stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
+					     lck->offset, lck->length, NULL,
+					     lck->type, 0);
+		if (stored_rc)
+			rc = stored_rc;
+		list_del(&lck->llist);
+		kfree(lck);
+	}
+
+out:
+	free_xid(xid);
+	return rc;
+err_out:
+	list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
+		list_del(&lck->llist);
+		kfree(lck);
+	}
+	goto out;
+}
+
+static int
+cifs_push_locks(struct cifsFileInfo *cfile)
+{
+	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	int rc = 0;
 
-		FreeXid(xid);
+	/* we are going to update can_cache_brlcks here - need a write access */
+	down_write(&cinode->lock_sem);
+	if (!cinode->can_cache_brlcks) {
+		up_write(&cinode->lock_sem);
 		return rc;
 	}
 
-	rc = CIFSSMBLock(xid, pTcon,
-			 ((struct cifsFileInfo *) file->private_data)->
-			 netfid, length,
-			 pfLock->fl_start, numUnlock, numLock, lockType,
-			 wait_flag);
-	if (pfLock->fl_flags & FL_POSIX)
-		posix_lock_file_wait(file, pfLock);
-	FreeXid(xid);
+	if (cap_unix(tcon->ses) &&
+	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+		rc = cifs_push_posix_locks(cfile);
+	else
+		rc = tcon->ses->server->ops->push_mand_locks(cfile);
+
+	cinode->can_cache_brlcks = false;
+	up_write(&cinode->lock_sem);
 	return rc;
 }
 
-ssize_t cifs_user_write(struct file *file, const char __user *write_data,
-	size_t write_size, loff_t *poffset)
+static void
+cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
+		bool *wait_flag, struct TCP_Server_Info *server)
+{
+	if (flock->fl_flags & FL_POSIX)
+		cifs_dbg(FYI, "Posix\n");
+	if (flock->fl_flags & FL_FLOCK)
+		cifs_dbg(FYI, "Flock\n");
+	if (flock->fl_flags & FL_SLEEP) {
+		cifs_dbg(FYI, "Blocking lock\n");
+		*wait_flag = true;
+	}
+	if (flock->fl_flags & FL_ACCESS)
+		cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
+	if (flock->fl_flags & FL_LEASE)
+		cifs_dbg(FYI, "Lease on file - not implemented yet\n");
+	if (flock->fl_flags &
+	    (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
+	       FL_ACCESS | FL_LEASE | FL_CLOSE)))
+		cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
+
+	*type = server->vals->large_lock_type;
+	if (flock->fl_type == F_WRLCK) {
+		cifs_dbg(FYI, "F_WRLCK\n");
+		*type |= server->vals->exclusive_lock_type;
+		*lock = 1;
+	} else if (flock->fl_type == F_UNLCK) {
+		cifs_dbg(FYI, "F_UNLCK\n");
+		*type |= server->vals->unlock_lock_type;
+		*unlock = 1;
+		/* Check if unlock includes more than one lock range */
+	} else if (flock->fl_type == F_RDLCK) {
+		cifs_dbg(FYI, "F_RDLCK\n");
+		*type |= server->vals->shared_lock_type;
+		*lock = 1;
+	} else if (flock->fl_type == F_EXLCK) {
+		cifs_dbg(FYI, "F_EXLCK\n");
+		*type |= server->vals->exclusive_lock_type;
+		*lock = 1;
+	} else if (flock->fl_type == F_SHLCK) {
+		cifs_dbg(FYI, "F_SHLCK\n");
+		*type |= server->vals->shared_lock_type;
+		*lock = 1;
+	} else
+		cifs_dbg(FYI, "Unknown type of lock\n");
+}
+
+static int
+cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
+	   bool wait_flag, bool posix_lck, unsigned int xid)
 {
 	int rc = 0;
-	unsigned int bytes_written = 0;
-	unsigned int total_written;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	int xid, long_op;
-	struct cifsFileInfo *open_file;
+	__u64 length = 1 + flock->fl_end - flock->fl_start;
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
+	__u16 netfid = cfile->fid.netfid;
 
-	if (file->f_dentry == NULL)
-		return -EBADF;
+	if (posix_lck) {
+		int posix_lock_type;
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
-	if (cifs_sb == NULL)
-		return -EBADF;
+		rc = cifs_posix_lock_test(file, flock);
+		if (!rc)
+			return rc;
 
-	pTcon = cifs_sb->tcon;
+		if (type & server->vals->shared_lock_type)
+			posix_lock_type = CIFS_RDLCK;
+		else
+			posix_lock_type = CIFS_WRLCK;
+		rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
+				      flock->fl_start, length, flock,
+				      posix_lock_type, wait_flag);
+		return rc;
+	}
 
-	/* cFYI(1,
-	   (" write %d bytes to offset %lld of %s", write_size,
-	   *poffset, file->f_dentry->d_name.name)); */
+	rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
+	if (!rc)
+		return rc;
 
-	if (file->private_data == NULL)
-		return -EBADF;
-	else
-		open_file = (struct cifsFileInfo *) file->private_data;
-	
-	xid = GetXid();
-	if (file->f_dentry->d_inode == NULL) {
-		FreeXid(xid);
-		return -EBADF;
+	/* BB we could chain these into one lock request BB */
+	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
+				    1, 0, false);
+	if (rc == 0) {
+		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+					    type, 0, 1, false);
+		flock->fl_type = F_UNLCK;
+		if (rc != 0)
+			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
+				 rc);
+		return 0;
 	}
 
-	if (*poffset > file->f_dentry->d_inode->i_size)
-		long_op = 2; /* writes past end of file can take a long time */
-	else
-		long_op = 1;
+	if (type & server->vals->shared_lock_type) {
+		flock->fl_type = F_WRLCK;
+		return 0;
+	}
 
-	for (total_written = 0; write_size > total_written;
-	     total_written += bytes_written) {
-		rc = -EAGAIN;
-		while (rc == -EAGAIN) {
-			if (file->private_data == NULL) {
-				/* file has been closed on us */
-				FreeXid(xid);
-			/* if we have gotten here we have written some data
-			   and blocked, and the file has been freed on us while
-			   we blocked so return what we managed to write */
-				return total_written;
-			} 
-			if (open_file->closePend) {
-				FreeXid(xid);
-				if (total_written)
-					return total_written;
-				else
-					return -EBADF;
-			}
-			if (open_file->invalidHandle) {
-				if ((file->f_dentry == NULL) ||
-				    (file->f_dentry->d_inode == NULL)) {
-					FreeXid(xid);
-					return total_written;
-				}
-				/* we could deadlock if we called
-				   filemap_fdatawait from here so tell
-				   reopen_file not to flush data to server
-				   now */
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
-					file, FALSE);
-				if (rc != 0)
-					break;
+	type &= ~server->vals->exclusive_lock_type;
+
+	rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+				    type | server->vals->shared_lock_type,
+				    1, 0, false);
+	if (rc == 0) {
+		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+			type | server->vals->shared_lock_type, 0, 1, false);
+		flock->fl_type = F_RDLCK;
+		if (rc != 0)
+			cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
+				 rc);
+	} else
+		flock->fl_type = F_WRLCK;
+
+	return 0;
+}
+
+void
+cifs_move_llist(struct list_head *source, struct list_head *dest)
+{
+	struct list_head *li, *tmp;
+	list_for_each_safe(li, tmp, source)
+		list_move(li, dest);
+}
+
+void
+cifs_free_llist(struct list_head *llist)
+{
+	struct cifsLockInfo *li, *tmp;
+	list_for_each_entry_safe(li, tmp, llist, llist) {
+		cifs_del_lock_waiters(li);
+		list_del(&li->llist);
+		kfree(li);
+	}
+}
+
+int
+cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
+		  unsigned int xid)
+{
+	int rc = 0, stored_rc;
+	int types[] = {LOCKING_ANDX_LARGE_FILES,
+		       LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
+	unsigned int i;
+	unsigned int max_num, num, max_buf;
+	LOCKING_ANDX_RANGE *buf, *cur;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+	struct cifsLockInfo *li, *tmp;
+	__u64 length = 1 + flock->fl_end - flock->fl_start;
+	struct list_head tmp_llist;
+
+	INIT_LIST_HEAD(&tmp_llist);
+
+	/*
+	 * Accessing maxBuf is racy with cifs_reconnect - need to store value
+	 * and check it for zero before using.
+	 */
+	max_buf = tcon->ses->server->maxBuf;
+	if (!max_buf)
+		return -EINVAL;
+
+	max_num = (max_buf - sizeof(struct smb_hdr)) /
+						sizeof(LOCKING_ANDX_RANGE);
+	buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	down_write(&cinode->lock_sem);
+	for (i = 0; i < 2; i++) {
+		cur = buf;
+		num = 0;
+		list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
+			if (flock->fl_start > li->offset ||
+			    (flock->fl_start + length) <
+			    (li->offset + li->length))
+				continue;
+			if (current->tgid != li->pid)
+				continue;
+			if (types[i] != li->type)
+				continue;
+			if (cinode->can_cache_brlcks) {
+				/*
+				 * We can cache brlock requests - simply remove
+				 * a lock from the file's list.
+				 */
+				list_del(&li->llist);
+				cifs_del_lock_waiters(li);
+				kfree(li);
+				continue;
 			}
+			cur->Pid = cpu_to_le16(li->pid);
+			cur->LengthLow = cpu_to_le32((u32)li->length);
+			cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
+			cur->OffsetLow = cpu_to_le32((u32)li->offset);
+			cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
+			/*
+			 * We need to save a lock here to let us add it again to
+			 * the file's list if the unlock range request fails on
+			 * the server.
+			 */
+			list_move(&li->llist, &tmp_llist);
+			if (++num == max_num) {
+				stored_rc = cifs_lockv(xid, tcon,
+						       cfile->fid.netfid,
+						       li->type, num, 0, buf);
+				if (stored_rc) {
+					/*
+					 * We failed on the unlock range
+					 * request - add all locks from the tmp
+					 * list to the head of the file's list.
+					 */
+					cifs_move_llist(&tmp_llist,
+							&cfile->llist->locks);
+					rc = stored_rc;
+				} else
+					/*
+					 * The unlock range request succeed -
+					 * free the tmp list.
+					 */
+					cifs_free_llist(&tmp_llist);
+				cur = buf;
+				num = 0;
+			} else
+				cur++;
+		}
+		if (num) {
+			stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
+					       types[i], num, 0, buf);
+			if (stored_rc) {
+				cifs_move_llist(&tmp_llist,
+						&cfile->llist->locks);
+				rc = stored_rc;
+			} else
+				cifs_free_llist(&tmp_llist);
+		}
+	}
+
+	up_write(&cinode->lock_sem);
+	kfree(buf);
+	return rc;
+}
+
+static int
+cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
+	   bool wait_flag, bool posix_lck, int lock, int unlock,
+	   unsigned int xid)
+{
+	int rc = 0;
+	__u64 length = 1 + flock->fl_end - flock->fl_start;
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
+	struct inode *inode = cfile->dentry->d_inode;
+
+	if (posix_lck) {
+		int posix_lock_type;
+
+		rc = cifs_posix_lock_set(file, flock);
+		if (!rc || rc < 0)
+			return rc;
 
-			rc = CIFSSMBWrite(xid, pTcon,
-				open_file->netfid,
-				min_t(const int, cifs_sb->wsize,
-				      write_size - total_written),
-				*poffset, &bytes_written,
-				NULL, write_data + total_written, long_op);
+		if (type & server->vals->shared_lock_type)
+			posix_lock_type = CIFS_RDLCK;
+		else
+			posix_lock_type = CIFS_WRLCK;
+
+		if (unlock == 1)
+			posix_lock_type = CIFS_UNLCK;
+
+		rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
+				      current->tgid, flock->fl_start, length,
+				      NULL, posix_lock_type, wait_flag);
+		goto out;
+	}
+
+	if (lock) {
+		struct cifsLockInfo *lock;
+
+		lock = cifs_lock_init(flock->fl_start, length, type);
+		if (!lock)
+			return -ENOMEM;
+
+		rc = cifs_lock_add_if(cfile, lock, wait_flag);
+		if (rc < 0) {
+			kfree(lock);
+			return rc;
 		}
-		if (rc || (bytes_written == 0)) {
-			if (total_written)
-				break;
-			else {
-				FreeXid(xid);
-				return rc;
-			}
-		} else
-			*poffset += bytes_written;
-		long_op = FALSE; /* subsequent writes fast -
-				    15 seconds is plenty */
-	}
-
-	cifs_stats_bytes_written(pTcon, total_written);
-
-	/* since the write may have blocked check these pointers again */
-	if (file->f_dentry) {
-		if (file->f_dentry->d_inode) {
-			struct inode *inode = file->f_dentry->d_inode;
-			inode->i_ctime = inode->i_mtime =
-				current_fs_time(inode->i_sb);
-			if (total_written > 0) {
-				if (*poffset > file->f_dentry->d_inode->i_size)
-					i_size_write(file->f_dentry->d_inode,
-					*poffset);
-			}
-			mark_inode_dirty_sync(file->f_dentry->d_inode);
+		if (!rc)
+			goto out;
+
+		/*
+		 * Windows 7 server can delay breaking lease from read to None
+		 * if we set a byte-range lock on a file - break it explicitly
+		 * before sending the lock to the server to be sure the next
+		 * read won't conflict with non-overlapted locks due to
+		 * pagereading.
+		 */
+		if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
+					CIFS_CACHE_READ(CIFS_I(inode))) {
+			cifs_zap_mapping(inode);
+			cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
+				 inode);
+			CIFS_I(inode)->oplock = 0;
 		}
+
+		rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
+					    type, 1, 0, wait_flag);
+		if (rc) {
+			kfree(lock);
+			return rc;
+		}
+
+		cifs_lock_add(cfile, lock);
+	} else if (unlock)
+		rc = server->ops->mand_unlock_range(cfile, flock, xid);
+
+out:
+	if (flock->fl_flags & FL_POSIX)
+		posix_lock_file_wait(file, flock);
+	return rc;
+}
+
+int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
+{
+	int rc, xid;
+	int lock = 0, unlock = 0;
+	bool wait_flag = false;
+	bool posix_lck = false;
+	struct cifs_sb_info *cifs_sb;
+	struct cifs_tcon *tcon;
+	struct cifsInodeInfo *cinode;
+	struct cifsFileInfo *cfile;
+	__u16 netfid;
+	__u32 type;
+
+	rc = -EACCES;
+	xid = get_xid();
+
+	cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
+		 cmd, flock->fl_flags, flock->fl_type,
+		 flock->fl_start, flock->fl_end);
+
+	cfile = (struct cifsFileInfo *)file->private_data;
+	tcon = tlink_tcon(cfile->tlink);
+
+	cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
+			tcon->ses->server);
+
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	netfid = cfile->fid.netfid;
+	cinode = CIFS_I(file_inode(file));
+
+	if (cap_unix(tcon->ses) &&
+	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+		posix_lck = true;
+	/*
+	 * BB add code here to normalize offset and length to account for
+	 * negative length which we can not accept over the wire.
+	 */
+	if (IS_GETLK(cmd)) {
+		rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
+		free_xid(xid);
+		return rc;
 	}
-	FreeXid(xid);
-	return total_written;
+
+	if (!lock && !unlock) {
+		/*
+		 * if no lock or unlock then nothing to do since we do not
+		 * know what it is
+		 */
+		free_xid(xid);
+		return -EOPNOTSUPP;
+	}
+
+	rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
+			xid);
+	free_xid(xid);
+	return rc;
 }
 
-static ssize_t cifs_write(struct file *file, const char *write_data,
-	size_t write_size, loff_t *poffset)
+/*
+ * update the file size (if needed) after a write. Should be called with
+ * the inode->i_lock held
+ */
+void
+cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
+		      unsigned int bytes_written)
+{
+	loff_t end_of_write = offset + bytes_written;
+
+	if (end_of_write > cifsi->server_eof)
+		cifsi->server_eof = end_of_write;
+}
+
+static ssize_t
+cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
+	   size_t write_size, loff_t *offset)
 {
 	int rc = 0;
 	unsigned int bytes_written = 0;
 	unsigned int total_written;
 	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	int xid, long_op;
-	struct cifsFileInfo *open_file;
-
-	if (file->f_dentry == NULL)
-		return -EBADF;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	unsigned int xid;
+	struct dentry *dentry = open_file->dentry;
+	struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
+	struct cifs_io_parms io_parms;
 
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
-	if (cifs_sb == NULL)
-		return -EBADF;
+	cifs_sb = CIFS_SB(dentry->d_sb);
 
-	pTcon = cifs_sb->tcon;
+	cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
+		 write_size, *offset, dentry->d_name.name);
 
-	cFYI(1,("write %zd bytes to offset %lld of %s", write_size,
-	   *poffset, file->f_dentry->d_name.name));
+	tcon = tlink_tcon(open_file->tlink);
+	server = tcon->ses->server;
 
-	if (file->private_data == NULL)
-		return -EBADF;
-	else
-		open_file = (struct cifsFileInfo *)file->private_data;
-	
-	xid = GetXid();
-	if (file->f_dentry->d_inode == NULL) {
-		FreeXid(xid);
-		return -EBADF;
-	}
+	if (!server->ops->sync_write)
+		return -ENOSYS;
 
-	if (*poffset > file->f_dentry->d_inode->i_size)
-		long_op = 2; /* writes past end of file can take a long time */
-	else
-		long_op = 1;
+	xid = get_xid();
 
 	for (total_written = 0; write_size > total_written;
 	     total_written += bytes_written) {
 		rc = -EAGAIN;
 		while (rc == -EAGAIN) {
-			if (file->private_data == NULL) {
-				/* file has been closed on us */
-				FreeXid(xid);
-			/* if we have gotten here we have written some data
-			   and blocked, and the file has been freed on us
-			   while we blocked so return what we managed to 
-			   write */
-				return total_written;
-			} 
-			if (open_file->closePend) {
-				FreeXid(xid);
-				if (total_written)
-					return total_written;
-				else
-					return -EBADF;
-			}
+			struct kvec iov[2];
+			unsigned int len;
+
 			if (open_file->invalidHandle) {
-				if ((file->f_dentry == NULL) ||
-				   (file->f_dentry->d_inode == NULL)) {
-					FreeXid(xid);
-					return total_written;
-				}
 				/* we could deadlock if we called
 				   filemap_fdatawait from here so tell
-				   reopen_file not to flush data to 
+				   reopen_file not to flush data to
 				   server now */
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
-					file, FALSE);
+				rc = cifs_reopen_file(open_file, false);
 				if (rc != 0)
 					break;
 			}
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-			/* BB FIXME We can not sign across two buffers yet */
-			if((experimEnabled) && ((pTcon->ses->server->secMode & 
-			 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) == 0)) {
-				struct kvec iov[2];
-				unsigned int len;
-
-				len = min((size_t)cifs_sb->wsize,
-					  write_size - total_written);
-				/* iov[0] is reserved for smb header */
-				iov[1].iov_base = (char *)write_data +
-						  total_written;
-				iov[1].iov_len = len;
-				rc = CIFSSMBWrite2(xid, pTcon,
-						open_file->netfid, len,
-						*poffset, &bytes_written,
-						iov, 1, long_op);
-			} else
-			/* BB FIXME fixup indentation of line below */
-#endif			
-			rc = CIFSSMBWrite(xid, pTcon,
-				 open_file->netfid,
-				 min_t(const int, cifs_sb->wsize, 
-				       write_size - total_written),
-				 *poffset, &bytes_written,
-				 write_data + total_written, NULL, long_op);
+
+			len = min((size_t)cifs_sb->wsize,
+				  write_size - total_written);
+			/* iov[0] is reserved for smb header */
+			iov[1].iov_base = (char *)write_data + total_written;
+			iov[1].iov_len = len;
+			io_parms.pid = pid;
+			io_parms.tcon = tcon;
+			io_parms.offset = *offset;
+			io_parms.length = len;
+			rc = server->ops->sync_write(xid, open_file, &io_parms,
+						     &bytes_written, iov, 1);
 		}
 		if (rc || (bytes_written == 0)) {
 			if (total_written)
 				break;
 			else {
-				FreeXid(xid);
+				free_xid(xid);
 				return rc;
 			}
-		} else
-			*poffset += bytes_written;
-		long_op = FALSE; /* subsequent writes fast - 
-				    15 seconds is plenty */
-	}
-
-	cifs_stats_bytes_written(pTcon, total_written);
-
-	/* since the write may have blocked check these pointers again */
-	if (file->f_dentry) {
-		if (file->f_dentry->d_inode) {
-			file->f_dentry->d_inode->i_ctime = 
-			file->f_dentry->d_inode->i_mtime = CURRENT_TIME;
-			if (total_written > 0) {
-				if (*poffset > file->f_dentry->d_inode->i_size)
-					i_size_write(file->f_dentry->d_inode, 
-						     *poffset);
-			}
-			mark_inode_dirty_sync(file->f_dentry->d_inode);
+		} else {
+			spin_lock(&dentry->d_inode->i_lock);
+			cifs_update_eof(cifsi, *offset, bytes_written);
+			spin_unlock(&dentry->d_inode->i_lock);
+			*offset += bytes_written;
 		}
 	}
-	FreeXid(xid);
+
+	cifs_stats_bytes_written(tcon, total_written);
+
+	if (total_written > 0) {
+		spin_lock(&dentry->d_inode->i_lock);
+		if (*offset > dentry->d_inode->i_size)
+			i_size_write(dentry->d_inode, *offset);
+		spin_unlock(&dentry->d_inode->i_lock);
+	}
+	mark_inode_dirty_sync(dentry->d_inode);
+	free_xid(xid);
 	return total_written;
 }
 
-struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
+struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
+					bool fsuid_only)
 {
-	struct cifsFileInfo *open_file;
+	struct cifsFileInfo *open_file = NULL;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+
+	/* only filter by fsuid on multiuser mounts */
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
+		fsuid_only = false;
+
+	spin_lock(&cifs_file_list_lock);
+	/* we could simply get the first_list_entry since write-only entries
+	   are always at the end of the list but since the first entry might
+	   have a close pending, we go through the whole list */
+	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
+		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
+			continue;
+		if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
+			if (!open_file->invalidHandle) {
+				/* found a good file */
+				/* lock it so it will not be closed on us */
+				cifsFileInfo_get_locked(open_file);
+				spin_unlock(&cifs_file_list_lock);
+				return open_file;
+			} /* else might as well continue, and look for
+			     another, or simply have the caller reopen it
+			     again rather than trying to fix this handle */
+		} else /* write only file */
+			break; /* write only files are last so must be done */
+	}
+	spin_unlock(&cifs_file_list_lock);
+	return NULL;
+}
+
+struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
+					bool fsuid_only)
+{
+	struct cifsFileInfo *open_file, *inv_file = NULL;
+	struct cifs_sb_info *cifs_sb;
+	bool any_available = false;
 	int rc;
+	unsigned int refind = 0;
+
+	/* Having a null inode here (because mapping->host was set to zero by
+	the VFS or MM) should not happen but we had reports of on oops (due to
+	it being zero) during stress testcases so we need to check for it */
 
-	read_lock(&GlobalSMBSeslock);
+	if (cifs_inode == NULL) {
+		cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
+		dump_stack();
+		return NULL;
+	}
+
+	cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+
+	/* only filter by fsuid on multiuser mounts */
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
+		fsuid_only = false;
+
+	spin_lock(&cifs_file_list_lock);
+refind_writable:
+	if (refind > MAX_REOPEN_ATT) {
+		spin_unlock(&cifs_file_list_lock);
+		return NULL;
+	}
 	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
-		if (open_file->closePend)
+		if (!any_available && open_file->pid != current->tgid)
 			continue;
-		if (open_file->pfile &&
-		    ((open_file->pfile->f_flags & O_RDWR) ||
-		     (open_file->pfile->f_flags & O_WRONLY))) {
-			atomic_inc(&open_file->wrtPending);
-			read_unlock(&GlobalSMBSeslock);
-			if((open_file->invalidHandle) && 
-			   (!open_file->closePend) /* BB fixme -since the second clause can not be true remove it BB */) {
-				rc = cifs_reopen_file(&cifs_inode->vfs_inode, 
-						      open_file->pfile, FALSE);
-				/* if it fails, try another handle - might be */
-				/* dangerous to hold up writepages with retry */
-				if(rc) {
-					cFYI(1,("failed on reopen file in wp"));
-					read_lock(&GlobalSMBSeslock);
-					/* can not use this handle, no write
-					pending on this one after all */
-					atomic_dec
-					     (&open_file->wrtPending);
-					continue;
-				}
+		if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
+			continue;
+		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
+			if (!open_file->invalidHandle) {
+				/* found a good writable file */
+				cifsFileInfo_get_locked(open_file);
+				spin_unlock(&cifs_file_list_lock);
+				return open_file;
+			} else {
+				if (!inv_file)
+					inv_file = open_file;
 			}
-			return open_file;
 		}
 	}
-	read_unlock(&GlobalSMBSeslock);
+	/* couldn't find useable FH with same pid, try any available */
+	if (!any_available) {
+		any_available = true;
+		goto refind_writable;
+	}
+
+	if (inv_file) {
+		any_available = false;
+		cifsFileInfo_get_locked(inv_file);
+	}
+
+	spin_unlock(&cifs_file_list_lock);
+
+	if (inv_file) {
+		rc = cifs_reopen_file(inv_file, false);
+		if (!rc)
+			return inv_file;
+		else {
+			spin_lock(&cifs_file_list_lock);
+			list_move_tail(&inv_file->flist,
+					&cifs_inode->openFileList);
+			spin_unlock(&cifs_file_list_lock);
+			cifsFileInfo_put(inv_file);
+			spin_lock(&cifs_file_list_lock);
+			++refind;
+			goto refind_writable;
+		}
+	}
+
 	return NULL;
 }
 
@@ -971,8 +1831,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 	char *write_data;
 	int rc = -EFAULT;
 	int bytes_written = 0;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
 	struct inode *inode;
 	struct cifsFileInfo *open_file;
 
@@ -980,8 +1838,6 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 		return -EFAULT;
 
 	inode = page->mapping->host;
-	cifs_sb = CIFS_SB(inode->i_sb);
-	pTcon = cifs_sb->tcon;
 
 	offset += (loff_t)from;
 	write_data = kmap(page);
@@ -1000,23 +1856,21 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 
 	/* check to make sure that we are not extending the file */
 	if (mapping->host->i_size - offset < (loff_t)to)
-		to = (unsigned)(mapping->host->i_size - offset); 
+		to = (unsigned)(mapping->host->i_size - offset);
 
-	open_file = find_writable_file(CIFS_I(mapping->host));
+	open_file = find_writable_file(CIFS_I(mapping->host), false);
 	if (open_file) {
-		bytes_written = cifs_write(open_file->pfile, write_data,
-					   to-from, &offset);
-		atomic_dec(&open_file->wrtPending);
+		bytes_written = cifs_write(open_file, open_file->pid,
+					   write_data, to - from, &offset);
+		cifsFileInfo_put(open_file);
 		/* Does mm or vfs already set times? */
 		inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
-		if ((bytes_written > 0) && (offset)) {
+		if ((bytes_written > 0) && (offset))
 			rc = 0;
-		} else if (bytes_written < 0) {
-			if (rc != -EBADF)
-				rc = bytes_written;
-		}
+		else if (bytes_written < 0)
+			rc = bytes_written;
 	} else {
-		cFYI(1, ("No writeable filehandles for inode"));
+		cifs_dbg(FYI, "No writeable filehandles for inode\n");
 		rc = -EIO;
 	}
 
@@ -1024,86 +1878,75 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
 	return rc;
 }
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 static int cifs_writepages(struct address_space *mapping,
 			   struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
-	unsigned int bytes_to_write;
-	unsigned int bytes_written;
-	struct cifs_sb_info *cifs_sb;
-	int done = 0;
-	pgoff_t end = -1;
-	pgoff_t index;
-	int is_range = 0;
-	struct kvec iov[32];
-	int len;
-	int n_iov = 0;
-	pgoff_t next;
-	int nr_pages;
-	__u64 offset = 0;
-	struct cifsFileInfo *open_file;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
+	bool done = false, scanned = false, range_whole = false;
+	pgoff_t end, index;
+	struct cifs_writedata *wdata;
+	struct TCP_Server_Info *server;
 	struct page *page;
-	struct pagevec pvec;
 	int rc = 0;
-	int scanned = 0;
-	int xid;
 
-	cifs_sb = CIFS_SB(mapping->host->i_sb);
-	
 	/*
-	 * If wsize is smaller that the page cache size, default to writing
+	 * If wsize is smaller than the page cache size, default to writing
 	 * one page at a time via cifs_writepage
 	 */
 	if (cifs_sb->wsize < PAGE_CACHE_SIZE)
 		return generic_writepages(mapping, wbc);
 
-	/* BB FIXME we do not have code to sign across multiple buffers yet,
-	   so go to older writepage style write which we can sign if needed */
-	if((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
-		if(cifs_sb->tcon->ses->server->secMode &
-                          (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
-			return generic_writepages(mapping, wbc);
-
-	/*
-	 * BB: Is this meaningful for a non-block-device file system?
-	 * If it is, we should test it again after we do I/O
-	 */
-	if (wbc->nonblocking && bdi_write_congested(bdi)) {
-		wbc->encountered_congestion = 1;
-		return 0;
-	}
-
-	xid = GetXid();
-
-	pagevec_init(&pvec, 0);
-	if (wbc->sync_mode == WB_SYNC_NONE)
+	if (wbc->range_cyclic) {
 		index = mapping->writeback_index; /* Start from prev offset */
-	else {
-		index = 0;
-		scanned = 1;
-	}
-	if (wbc->start || wbc->end) {
-		index = wbc->start >> PAGE_CACHE_SHIFT;
-		end = wbc->end >> PAGE_CACHE_SHIFT;
-		is_range = 1;
-		scanned = 1;
+		end = -1;
+	} else {
+		index = wbc->range_start >> PAGE_CACHE_SHIFT;
+		end = wbc->range_end >> PAGE_CACHE_SHIFT;
+		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
+			range_whole = true;
+		scanned = true;
 	}
 retry:
-	while (!done && (index <= end) &&
-	       (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
-			PAGECACHE_TAG_DIRTY,
-			min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
-		int first;
-		unsigned int i;
+	while (!done && index <= end) {
+		unsigned int i, nr_pages, found_pages;
+		pgoff_t next = 0, tofind;
+		struct page **pages;
+
+		tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
+				end - index) + 1;
+
+		wdata = cifs_writedata_alloc((unsigned int)tofind,
+					     cifs_writev_complete);
+		if (!wdata) {
+			rc = -ENOMEM;
+			break;
+		}
 
-		first = -1;
-		next = 0;
-		n_iov = 0;
-		bytes_to_write = 0;
+		/*
+		 * find_get_pages_tag seems to return a max of 256 on each
+		 * iteration, so we must call it several times in order to
+		 * fill the array or the wsize is effectively limited to
+		 * 256 * PAGE_CACHE_SIZE.
+		 */
+		found_pages = 0;
+		pages = wdata->pages;
+		do {
+			nr_pages = find_get_pages_tag(mapping, &index,
+							PAGECACHE_TAG_DIRTY,
+							tofind, pages);
+			found_pages += nr_pages;
+			tofind -= nr_pages;
+			pages += nr_pages;
+		} while (nr_pages && tofind && index <= end);
+
+		if (found_pages == 0) {
+			kref_put(&wdata->refcount, cifs_writedata_release);
+			break;
+		}
 
-		for (i = 0; i < nr_pages; i++) {
-			page = pvec.pages[i];
+		nr_pages = 0;
+		for (i = 0; i < found_pages; i++) {
+			page = wdata->pages[i];
 			/*
 			 * At this point we hold neither mapping->tree_lock nor
 			 * lock on the page itself: the page may be truncated or
@@ -1112,9 +1955,9 @@ retry:
 			 * mapping
 			 */
 
-			if (first < 0)
+			if (nr_pages == 0)
 				lock_page(page);
-			else if (TestSetPageLocked(page))
+			else if (!trylock_page(page))
 				break;
 
 			if (unlikely(page->mapping != mapping)) {
@@ -1122,8 +1965,8 @@ retry:
 				break;
 			}
 
-			if (unlikely(is_range) && (page->index > end)) {
-				done = 1;
+			if (!wbc->range_cyclic && page->index > end) {
+				done = true;
 				unlock_page(page);
 				break;
 			}
@@ -1138,652 +1981,1457 @@ retry:
 				wait_on_page_writeback(page);
 
 			if (PageWriteback(page) ||
-					!test_clear_page_dirty(page)) {
+					!clear_page_dirty_for_io(page)) {
 				unlock_page(page);
 				break;
 			}
 
-			if (page_offset(page) >= mapping->host->i_size) {
-				done = 1;
+			/*
+			 * This actually clears the dirty bit in the radix tree.
+			 * See cifs_writepage() for more commentary.
+			 */
+			set_page_writeback(page);
+
+			if (page_offset(page) >= i_size_read(mapping->host)) {
+				done = true;
 				unlock_page(page);
+				end_page_writeback(page);
 				break;
 			}
 
-			/*
-			 * BB can we get rid of this?  pages are held by pvec
-			 */
-			page_cache_get(page);
+			wdata->pages[i] = page;
+			next = page->index + 1;
+			++nr_pages;
+		}
 
-			len = min(mapping->host->i_size - page_offset(page),
-				  (loff_t)PAGE_CACHE_SIZE);
+		/* reset index to refind any pages skipped */
+		if (nr_pages == 0)
+			index = wdata->pages[0]->index + 1;
 
-			/* reserve iov[0] for the smb header */
-			n_iov++;
-			iov[n_iov].iov_base = kmap(page);
-			iov[n_iov].iov_len = len;
-			bytes_to_write += len;
+		/* put any pages we aren't going to use */
+		for (i = nr_pages; i < found_pages; i++) {
+			page_cache_release(wdata->pages[i]);
+			wdata->pages[i] = NULL;
+		}
 
-			if (first < 0) {
-				first = i;
-				offset = page_offset(page);
-			}
-			next = page->index + 1;
-			if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
-				break;
+		/* nothing to write? */
+		if (nr_pages == 0) {
+			kref_put(&wdata->refcount, cifs_writedata_release);
+			continue;
 		}
-		if (n_iov) {
-			/* Search for a writable handle every time we call
-			 * CIFSSMBWrite2.  We can't rely on the last handle
-			 * we used to still be valid
-			 */
-			open_file = find_writable_file(CIFS_I(mapping->host));
-			if (!open_file) {
-				cERROR(1, ("No writable handles for inode"));
+
+		wdata->sync_mode = wbc->sync_mode;
+		wdata->nr_pages = nr_pages;
+		wdata->offset = page_offset(wdata->pages[0]);
+		wdata->pagesz = PAGE_CACHE_SIZE;
+		wdata->tailsz =
+			min(i_size_read(mapping->host) -
+			    page_offset(wdata->pages[nr_pages - 1]),
+			    (loff_t)PAGE_CACHE_SIZE);
+		wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
+					wdata->tailsz;
+
+		do {
+			if (wdata->cfile != NULL)
+				cifsFileInfo_put(wdata->cfile);
+			wdata->cfile = find_writable_file(CIFS_I(mapping->host),
+							  false);
+			if (!wdata->cfile) {
+				cifs_dbg(VFS, "No writable handles for inode\n");
 				rc = -EBADF;
-			} else {
-				rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
-						   open_file->netfid,
-						   bytes_to_write, offset,
-						   &bytes_written, iov, n_iov,
-						   1);
-				atomic_dec(&open_file->wrtPending);
-				if (rc || bytes_written < bytes_to_write) {
-					cERROR(1,("Write2 ret %d, written = %d",
-						  rc, bytes_written));
-					/* BB what if continued retry is
-					   requested via mount flags? */
-					set_bit(AS_EIO, &mapping->flags);
-					SetPageError(page);
-				} else {
-					cifs_stats_bytes_written(cifs_sb->tcon,
-								 bytes_written);
-				}
+				break;
 			}
-			for (i = 0; i < n_iov; i++) {
-				page = pvec.pages[first + i];
-				kunmap(page);
-				unlock_page(page);
-				page_cache_release(page);
+			wdata->pid = wdata->cfile->pid;
+			server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+			rc = server->ops->async_writev(wdata,
+							cifs_writedata_release);
+		} while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
+
+		for (i = 0; i < nr_pages; ++i)
+			unlock_page(wdata->pages[i]);
+
+		/* send failure -- clean up the mess */
+		if (rc != 0) {
+			for (i = 0; i < nr_pages; ++i) {
+				if (rc == -EAGAIN)
+					redirty_page_for_writepage(wbc,
+							   wdata->pages[i]);
+				else
+					SetPageError(wdata->pages[i]);
+				end_page_writeback(wdata->pages[i]);
+				page_cache_release(wdata->pages[i]);
 			}
-			if ((wbc->nr_to_write -= n_iov) <= 0)
-				done = 1;
-			index = next;
+			if (rc != -EAGAIN)
+				mapping_set_error(mapping, rc);
 		}
-		pagevec_release(&pvec);
+		kref_put(&wdata->refcount, cifs_writedata_release);
+
+		wbc->nr_to_write -= nr_pages;
+		if (wbc->nr_to_write <= 0)
+			done = true;
+
+		index = next;
 	}
+
 	if (!scanned && !done) {
 		/*
 		 * We hit the last page and there is more work to be done: wrap
 		 * back to the start of the file
 		 */
-		scanned = 1;
+		scanned = true;
 		index = 0;
 		goto retry;
 	}
-	if (!is_range)
-		mapping->writeback_index = index;
 
-	FreeXid(xid);
+	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
+		mapping->writeback_index = index;
 
 	return rc;
 }
-#endif
 
-static int cifs_writepage(struct page* page, struct writeback_control *wbc)
+static int
+cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
 {
-	int rc = -EFAULT;
-	int xid;
+	int rc;
+	unsigned int xid;
 
-	xid = GetXid();
+	xid = get_xid();
 /* BB add check for wbc flags */
 	page_cache_get(page);
-        if (!PageUptodate(page)) {
-		cFYI(1, ("ppw - page not up to date"));
-	}
-	
+	if (!PageUptodate(page))
+		cifs_dbg(FYI, "ppw - page not up to date\n");
+
+	/*
+	 * Set the "writeback" flag, and clear "dirty" in the radix tree.
+	 *
+	 * A writepage() implementation always needs to do either this,
+	 * or re-dirty the page with "redirty_page_for_writepage()" in
+	 * the case of a failure.
+	 *
+	 * Just unlocking the page will cause the radix tree tag-bits
+	 * to fail to update with the state of the page correctly.
+	 */
+	set_page_writeback(page);
+retry_write:
 	rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
-	SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
+	if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
+		goto retry_write;
+	else if (rc == -EAGAIN)
+		redirty_page_for_writepage(wbc, page);
+	else if (rc != 0)
+		SetPageError(page);
+	else
+		SetPageUptodate(page);
+	end_page_writeback(page);
+	page_cache_release(page);
+	free_xid(xid);
+	return rc;
+}
+
+static int cifs_writepage(struct page *page, struct writeback_control *wbc)
+{
+	int rc = cifs_writepage_locked(page, wbc);
 	unlock_page(page);
-	page_cache_release(page);	
-	FreeXid(xid);
 	return rc;
 }
 
-static int cifs_commit_write(struct file *file, struct page *page,
-	unsigned offset, unsigned to)
+static int cifs_write_end(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned copied,
+			struct page *page, void *fsdata)
 {
-	int xid;
-	int rc = 0;
-	struct inode *inode = page->mapping->host;
-	loff_t position = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	char *page_data;
-
-	xid = GetXid();
-	cFYI(1, ("commit write for page %p up to position %lld for %d", 
-		 page, position, to));
-	if (position > inode->i_size) {
-		i_size_write(inode, position);
-		/* if (file->private_data == NULL) {
-			rc = -EBADF;
-		} else {
-			open_file = (struct cifsFileInfo *)file->private_data;
-			cifs_sb = CIFS_SB(inode->i_sb);
-			rc = -EAGAIN;
-			while (rc == -EAGAIN) {
-				if ((open_file->invalidHandle) && 
-				    (!open_file->closePend)) {
-					rc = cifs_reopen_file(
-						file->f_dentry->d_inode, file);
-					if (rc != 0)
-						break;
-				}
-				if (!open_file->closePend) {
-					rc = CIFSSMBSetFileSize(xid,
-						cifs_sb->tcon, position,
-						open_file->netfid,
-						open_file->pid, FALSE);
-				} else {
-					rc = -EBADF;
-					break;
-				}
-			}
-			cFYI(1, (" SetEOF (commit write) rc = %d", rc));
-		} */
-	}
+	int rc;
+	struct inode *inode = mapping->host;
+	struct cifsFileInfo *cfile = file->private_data;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
+	__u32 pid;
+
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+		pid = cfile->pid;
+	else
+		pid = current->tgid;
+
+	cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
+		 page, pos, copied);
+
+	if (PageChecked(page)) {
+		if (copied == len)
+			SetPageUptodate(page);
+		ClearPageChecked(page);
+	} else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+		SetPageUptodate(page);
+
 	if (!PageUptodate(page)) {
-		position =  ((loff_t)page->index << PAGE_CACHE_SHIFT) + offset;
-		/* can not rely on (or let) writepage write this data */
-		if (to < offset) {
-			cFYI(1, ("Illegal offsets, can not copy from %d to %d",
-				offset, to));
-			FreeXid(xid);
-			return rc;
-		}
+		char *page_data;
+		unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+		unsigned int xid;
+
+		xid = get_xid();
 		/* this is probably better than directly calling
 		   partialpage_write since in this function the file handle is
 		   known which we might as well	leverage */
 		/* BB check if anything else missing out of ppw
 		   such as updating last write time */
 		page_data = kmap(page);
-		rc = cifs_write(file, page_data + offset, to-offset,
-				&position);
-		if (rc > 0)
-			rc = 0;
-		/* else if (rc < 0) should we set writebehind rc? */
+		rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
+		/* if (rc < 0) should we set writebehind rc? */
 		kunmap(page);
-	} else {	
+
+		free_xid(xid);
+	} else {
+		rc = copied;
+		pos += copied;
 		set_page_dirty(page);
 	}
 
-	FreeXid(xid);
+	if (rc > 0) {
+		spin_lock(&inode->i_lock);
+		if (pos > inode->i_size)
+			i_size_write(inode, pos);
+		spin_unlock(&inode->i_lock);
+	}
+
+	unlock_page(page);
+	page_cache_release(page);
+
 	return rc;
 }
 
-int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
+int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
+		      int datasync)
 {
-	int xid;
+	unsigned int xid;
 	int rc = 0;
-	struct inode *inode = file->f_dentry->d_inode;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	struct cifsFileInfo *smbfile = file->private_data;
+	struct inode *inode = file_inode(file);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+
+	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (rc)
+		return rc;
+	mutex_lock(&inode->i_mutex);
 
-	xid = GetXid();
+	xid = get_xid();
 
-	cFYI(1, ("Sync file - name: %s datasync: 0x%x ", 
-		dentry->d_name.name, datasync));
-	
-	rc = filemap_fdatawrite(inode->i_mapping);
-	if (rc == 0)
-		CIFS_I(inode)->write_behind_rc = 0;
-	FreeXid(xid);
+	cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
+		 file->f_path.dentry->d_name.name, datasync);
+
+	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
+		rc = cifs_zap_mapping(inode);
+		if (rc) {
+			cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
+			rc = 0; /* don't care about it in fsync */
+		}
+	}
+
+	tcon = tlink_tcon(smbfile->tlink);
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
+		server = tcon->ses->server;
+		if (server->ops->flush)
+			rc = server->ops->flush(xid, tcon, &smbfile->fid);
+		else
+			rc = -ENOSYS;
+	}
+
+	free_xid(xid);
+	mutex_unlock(&inode->i_mutex);
 	return rc;
 }
 
-/* static int cifs_sync_page(struct page *page)
+int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
-	struct address_space *mapping;
-	struct inode *inode;
-	unsigned long index = page->index;
-	unsigned int rpages = 0;
+	unsigned int xid;
 	int rc = 0;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	struct cifsFileInfo *smbfile = file->private_data;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	struct inode *inode = file->f_mapping->host;
+
+	rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
+	if (rc)
+		return rc;
+	mutex_lock(&inode->i_mutex);
 
-	cFYI(1, ("sync page %p",page));
-	mapping = page->mapping;
-	if (!mapping)
-		return 0;
-	inode = mapping->host;
-	if (!inode)
-		return 0; */
+	xid = get_xid();
 
-/*	fill in rpages then 
-	result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
+	cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
+		 file->f_path.dentry->d_name.name, datasync);
 
-/*	cFYI(1, ("rpages is %d for sync page of Index %ld ", rpages, index));
+	tcon = tlink_tcon(smbfile->tlink);
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
+		server = tcon->ses->server;
+		if (server->ops->flush)
+			rc = server->ops->flush(xid, tcon, &smbfile->fid);
+		else
+			rc = -ENOSYS;
+	}
 
-	if (rc < 0)
-		return rc;
-	return 0;
-} */
+	free_xid(xid);
+	mutex_unlock(&inode->i_mutex);
+	return rc;
+}
 
 /*
  * As file closes, flush all cached write data for this inode checking
  * for write behind errors.
  */
-int cifs_flush(struct file *file)
+int cifs_flush(struct file *file, fl_owner_t id)
 {
-	struct inode * inode = file->f_dentry->d_inode;
+	struct inode *inode = file_inode(file);
 	int rc = 0;
 
-	/* Rather than do the steps manually:
-	   lock the inode for writing
-	   loop through pages looking for write behind data (dirty pages)
-	   coalesce into contiguous 16K (or smaller) chunks to write to server
-	   send to server (prefer in parallel)
-	   deal with writebehind errors
-	   unlock inode for writing
-	   filemapfdatawrite appears easier for the time being */
-
-	rc = filemap_fdatawrite(inode->i_mapping);
-	if (!rc) /* reset wb rc if we were able to write out dirty pages */
-		CIFS_I(inode)->write_behind_rc = 0;
-		
-	cFYI(1, ("Flush inode %p file %p rc %d",inode,file,rc));
+	if (file->f_mode & FMODE_WRITE)
+		rc = filemap_write_and_wait(inode->i_mapping);
+
+	cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
 
 	return rc;
 }
 
-ssize_t cifs_user_read(struct file *file, char __user *read_data,
-	size_t read_size, loff_t *poffset)
+static int
+cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
 {
-	int rc = -EACCES;
-	unsigned int bytes_read = 0;
-	unsigned int total_read = 0;
-	unsigned int current_read_size;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	int xid;
+	int rc = 0;
+	unsigned long i;
+
+	for (i = 0; i < num_pages; i++) {
+		pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+		if (!pages[i]) {
+			/*
+			 * save number of pages we have already allocated and
+			 * return with ENOMEM error
+			 */
+			num_pages = i;
+			rc = -ENOMEM;
+			break;
+		}
+	}
+
+	if (rc) {
+		for (i = 0; i < num_pages; i++)
+			put_page(pages[i]);
+	}
+	return rc;
+}
+
+static inline
+size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
+{
+	size_t num_pages;
+	size_t clen;
+
+	clen = min_t(const size_t, len, wsize);
+	num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
+
+	if (cur_len)
+		*cur_len = clen;
+
+	return num_pages;
+}
+
+static void
+cifs_uncached_writedata_release(struct kref *refcount)
+{
+	int i;
+	struct cifs_writedata *wdata = container_of(refcount,
+					struct cifs_writedata, refcount);
+
+	for (i = 0; i < wdata->nr_pages; i++)
+		put_page(wdata->pages[i]);
+	cifs_writedata_release(refcount);
+}
+
+static void
+cifs_uncached_writev_complete(struct work_struct *work)
+{
+	struct cifs_writedata *wdata = container_of(work,
+					struct cifs_writedata, work);
+	struct inode *inode = wdata->cfile->dentry->d_inode;
+	struct cifsInodeInfo *cifsi = CIFS_I(inode);
+
+	spin_lock(&inode->i_lock);
+	cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
+	if (cifsi->server_eof > inode->i_size)
+		i_size_write(inode, cifsi->server_eof);
+	spin_unlock(&inode->i_lock);
+
+	complete(&wdata->done);
+
+	kref_put(&wdata->refcount, cifs_uncached_writedata_release);
+}
+
+/* attempt to send write to server, retry on any -EAGAIN errors */
+static int
+cifs_uncached_retry_writev(struct cifs_writedata *wdata)
+{
+	int rc;
+	struct TCP_Server_Info *server;
+
+	server = tlink_tcon(wdata->cfile->tlink)->ses->server;
+
+	do {
+		if (wdata->cfile->invalidHandle) {
+			rc = cifs_reopen_file(wdata->cfile, false);
+			if (rc != 0)
+				continue;
+		}
+		rc = server->ops->async_writev(wdata,
+					       cifs_uncached_writedata_release);
+	} while (rc == -EAGAIN);
+
+	return rc;
+}
+
+static ssize_t
+cifs_iovec_write(struct file *file, struct iov_iter *from, loff_t *poffset)
+{
+	unsigned long nr_pages, i;
+	size_t bytes, copied, len, cur_len;
+	ssize_t total_written = 0;
+	loff_t offset;
 	struct cifsFileInfo *open_file;
-	char *smb_read_data;
-	char __user *current_offset;
-	struct smb_com_read_rsp *pSMBr;
+	struct cifs_tcon *tcon;
+	struct cifs_sb_info *cifs_sb;
+	struct cifs_writedata *wdata, *tmp;
+	struct list_head wdata_list;
+	int rc;
+	pid_t pid;
 
-	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
-	pTcon = cifs_sb->tcon;
+	len = iov_iter_count(from);
+	rc = generic_write_checks(file, poffset, &len, 0);
+	if (rc)
+		return rc;
 
-	if (file->private_data == NULL) {
-		FreeXid(xid);
-		return -EBADF;
+	if (!len)
+		return 0;
+
+	iov_iter_truncate(from, len);
+
+	INIT_LIST_HEAD(&wdata_list);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	open_file = file->private_data;
+	tcon = tlink_tcon(open_file->tlink);
+
+	if (!tcon->ses->server->ops->async_writev)
+		return -ENOSYS;
+
+	offset = *poffset;
+
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+		pid = open_file->pid;
+	else
+		pid = current->tgid;
+
+	do {
+		size_t save_len;
+
+		nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
+		wdata = cifs_writedata_alloc(nr_pages,
+					     cifs_uncached_writev_complete);
+		if (!wdata) {
+			rc = -ENOMEM;
+			break;
+		}
+
+		rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
+		if (rc) {
+			kfree(wdata);
+			break;
+		}
+
+		save_len = cur_len;
+		for (i = 0; i < nr_pages; i++) {
+			bytes = min_t(size_t, cur_len, PAGE_SIZE);
+			copied = copy_page_from_iter(wdata->pages[i], 0, bytes,
+						     from);
+			cur_len -= copied;
+			/*
+			 * If we didn't copy as much as we expected, then that
+			 * may mean we trod into an unmapped area. Stop copying
+			 * at that point. On the next pass through the big
+			 * loop, we'll likely end up getting a zero-length
+			 * write and bailing out of it.
+			 */
+			if (copied < bytes)
+				break;
+		}
+		cur_len = save_len - cur_len;
+
+		/*
+		 * If we have no data to send, then that probably means that
+		 * the copy above failed altogether. That's most likely because
+		 * the address in the iovec was bogus. Set the rc to -EFAULT,
+		 * free anything we allocated and bail out.
+		 */
+		if (!cur_len) {
+			for (i = 0; i < nr_pages; i++)
+				put_page(wdata->pages[i]);
+			kfree(wdata);
+			rc = -EFAULT;
+			break;
+		}
+
+		/*
+		 * i + 1 now represents the number of pages we actually used in
+		 * the copy phase above. Bring nr_pages down to that, and free
+		 * any pages that we didn't use.
+		 */
+		for ( ; nr_pages > i + 1; nr_pages--)
+			put_page(wdata->pages[nr_pages - 1]);
+
+		wdata->sync_mode = WB_SYNC_ALL;
+		wdata->nr_pages = nr_pages;
+		wdata->offset = (__u64)offset;
+		wdata->cfile = cifsFileInfo_get(open_file);
+		wdata->pid = pid;
+		wdata->bytes = cur_len;
+		wdata->pagesz = PAGE_SIZE;
+		wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
+		rc = cifs_uncached_retry_writev(wdata);
+		if (rc) {
+			kref_put(&wdata->refcount,
+				 cifs_uncached_writedata_release);
+			break;
+		}
+
+		list_add_tail(&wdata->list, &wdata_list);
+		offset += cur_len;
+		len -= cur_len;
+	} while (len > 0);
+
+	/*
+	 * If at least one write was successfully sent, then discard any rc
+	 * value from the later writes. If the other write succeeds, then
+	 * we'll end up returning whatever was written. If it fails, then
+	 * we'll get a new rc value from that.
+	 */
+	if (!list_empty(&wdata_list))
+		rc = 0;
+
+	/*
+	 * Wait for and collect replies for any successful sends in order of
+	 * increasing offset. Once an error is hit or we get a fatal signal
+	 * while waiting, then return without waiting for any more replies.
+	 */
+restart_loop:
+	list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
+		if (!rc) {
+			/* FIXME: freezable too? */
+			rc = wait_for_completion_killable(&wdata->done);
+			if (rc)
+				rc = -EINTR;
+			else if (wdata->result)
+				rc = wdata->result;
+			else
+				total_written += wdata->bytes;
+
+			/* resend call if it's a retryable error */
+			if (rc == -EAGAIN) {
+				rc = cifs_uncached_retry_writev(wdata);
+				goto restart_loop;
+			}
+		}
+		list_del_init(&wdata->list);
+		kref_put(&wdata->refcount, cifs_uncached_writedata_release);
 	}
-	open_file = (struct cifsFileInfo *)file->private_data;
 
-	if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
-		cFYI(1, ("attempting read on write only file instance"));
+	if (total_written > 0)
+		*poffset += total_written;
+
+	cifs_stats_bytes_written(tcon, total_written);
+	return total_written ? total_written : (ssize_t)rc;
+}
+
+ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+	ssize_t written;
+	struct inode *inode;
+	loff_t pos = iocb->ki_pos;
+
+	inode = file_inode(iocb->ki_filp);
+
+	/*
+	 * BB - optimize the way when signing is disabled. We can drop this
+	 * extra memory-to-memory copying and use iovec buffers for constructing
+	 * write request.
+	 */
+
+	written = cifs_iovec_write(iocb->ki_filp, from, &pos);
+	if (written > 0) {
+		set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(inode)->flags);
+		iocb->ki_pos = pos;
 	}
-	for (total_read = 0, current_offset = read_data;
-	     read_size > total_read;
-	     total_read += bytes_read, current_offset += bytes_read) {
-		current_read_size = min_t(const int, read_size - total_read, 
-					  cifs_sb->rsize);
-		rc = -EAGAIN;
-		smb_read_data = NULL;
-		while (rc == -EAGAIN) {
-			if ((open_file->invalidHandle) && 
-			    (!open_file->closePend)) {
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
-					file, TRUE);
-				if (rc != 0)
-					break;
-			}
-			rc = CIFSSMBRead(xid, pTcon,
-					open_file->netfid,
-					current_read_size, *poffset,
-					&bytes_read, &smb_read_data);
-			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
-			if (copy_to_user(current_offset, 
-					 smb_read_data + 4 /* RFC1001 hdr */
-					 + le16_to_cpu(pSMBr->DataOffset), 
-					 bytes_read)) {
-				rc = -EFAULT;
-				FreeXid(xid);
-				return rc;
-            }
-			if (smb_read_data) {
-				cifs_buf_release(smb_read_data);
-				smb_read_data = NULL;
-			}
+
+	return written;
+}
+
+static ssize_t
+cifs_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct file *file = iocb->ki_filp;
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
+	struct inode *inode = file->f_mapping->host;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
+	ssize_t rc = -EACCES;
+	loff_t lock_pos = iocb->ki_pos;
+
+	/*
+	 * We need to hold the sem to be sure nobody modifies lock list
+	 * with a brlock that prevents writing.
+	 */
+	down_read(&cinode->lock_sem);
+	mutex_lock(&inode->i_mutex);
+	if (file->f_flags & O_APPEND)
+		lock_pos = i_size_read(inode);
+	if (!cifs_find_lock_conflict(cfile, lock_pos, iov_iter_count(from),
+				     server->vals->exclusive_lock_type, NULL,
+				     CIFS_WRITE_OP)) {
+		rc = __generic_file_write_iter(iocb, from);
+		mutex_unlock(&inode->i_mutex);
+
+		if (rc > 0) {
+			ssize_t err;
+
+			err = generic_write_sync(file, iocb->ki_pos - rc, rc);
+			if (err < 0)
+				rc = err;
 		}
-		if (rc || (bytes_read == 0)) {
-			if (total_read) {
-				break;
+	} else {
+		mutex_unlock(&inode->i_mutex);
+	}
+	up_read(&cinode->lock_sem);
+	return rc;
+}
+
+ssize_t
+cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
+						iocb->ki_filp->private_data;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	ssize_t written;
+
+	written = cifs_get_writer(cinode);
+	if (written)
+		return written;
+
+	if (CIFS_CACHE_WRITE(cinode)) {
+		if (cap_unix(tcon->ses) &&
+		(CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
+		  && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
+			written = generic_file_write_iter(iocb, from);
+			goto out;
+		}
+		written = cifs_writev(iocb, from);
+		goto out;
+	}
+	/*
+	 * For non-oplocked files in strict cache mode we need to write the data
+	 * to the server exactly from the pos to pos+len-1 rather than flush all
+	 * affected pages because it may cause a error with mandatory locks on
+	 * these pages but not on the region from pos to ppos+len-1.
+	 */
+	written = cifs_user_writev(iocb, from);
+	if (written > 0 && CIFS_CACHE_READ(cinode)) {
+		/*
+		 * Windows 7 server can delay breaking level2 oplock if a write
+		 * request comes - break it on the client to prevent reading
+		 * an old data.
+		 */
+		cifs_zap_mapping(inode);
+		cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
+			 inode);
+		cinode->oplock = 0;
+	}
+out:
+	cifs_put_writer(cinode);
+	return written;
+}
+
+static struct cifs_readdata *
+cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
+{
+	struct cifs_readdata *rdata;
+
+	rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
+			GFP_KERNEL);
+	if (rdata != NULL) {
+		kref_init(&rdata->refcount);
+		INIT_LIST_HEAD(&rdata->list);
+		init_completion(&rdata->done);
+		INIT_WORK(&rdata->work, complete);
+	}
+
+	return rdata;
+}
+
+void
+cifs_readdata_release(struct kref *refcount)
+{
+	struct cifs_readdata *rdata = container_of(refcount,
+					struct cifs_readdata, refcount);
+
+	if (rdata->cfile)
+		cifsFileInfo_put(rdata->cfile);
+
+	kfree(rdata);
+}
+
+static int
+cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
+{
+	int rc = 0;
+	struct page *page;
+	unsigned int i;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
+		if (!page) {
+			rc = -ENOMEM;
+			break;
+		}
+		rdata->pages[i] = page;
+	}
+
+	if (rc) {
+		for (i = 0; i < nr_pages; i++) {
+			put_page(rdata->pages[i]);
+			rdata->pages[i] = NULL;
+		}
+	}
+	return rc;
+}
+
+static void
+cifs_uncached_readdata_release(struct kref *refcount)
+{
+	struct cifs_readdata *rdata = container_of(refcount,
+					struct cifs_readdata, refcount);
+	unsigned int i;
+
+	for (i = 0; i < rdata->nr_pages; i++) {
+		put_page(rdata->pages[i]);
+		rdata->pages[i] = NULL;
+	}
+	cifs_readdata_release(refcount);
+}
+
+static int
+cifs_retry_async_readv(struct cifs_readdata *rdata)
+{
+	int rc;
+	struct TCP_Server_Info *server;
+
+	server = tlink_tcon(rdata->cfile->tlink)->ses->server;
+
+	do {
+		if (rdata->cfile->invalidHandle) {
+			rc = cifs_reopen_file(rdata->cfile, true);
+			if (rc != 0)
+				continue;
+		}
+		rc = server->ops->async_readv(rdata);
+	} while (rc == -EAGAIN);
+
+	return rc;
+}
+
+/**
+ * cifs_readdata_to_iov - copy data from pages in response to an iovec
+ * @rdata:	the readdata response with list of pages holding data
+ * @iter:	destination for our data
+ *
+ * This function copies data from a list of pages in a readdata response into
+ * an array of iovecs. It will first calculate where the data should go
+ * based on the info in the readdata and then copy the data into that spot.
+ */
+static int
+cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
+{
+	size_t remaining = rdata->bytes;
+	unsigned int i;
+
+	for (i = 0; i < rdata->nr_pages; i++) {
+		struct page *page = rdata->pages[i];
+		size_t copy = min_t(size_t, remaining, PAGE_SIZE);
+		size_t written = copy_page_to_iter(page, 0, copy, iter);
+		remaining -= written;
+		if (written < copy && iov_iter_count(iter) > 0)
+			break;
+	}
+	return remaining ? -EFAULT : 0;
+}
+
+static void
+cifs_uncached_readv_complete(struct work_struct *work)
+{
+	struct cifs_readdata *rdata = container_of(work,
+						struct cifs_readdata, work);
+
+	complete(&rdata->done);
+	kref_put(&rdata->refcount, cifs_uncached_readdata_release);
+}
+
+static int
+cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
+			struct cifs_readdata *rdata, unsigned int len)
+{
+	int total_read = 0, result = 0;
+	unsigned int i;
+	unsigned int nr_pages = rdata->nr_pages;
+	struct kvec iov;
+
+	rdata->tailsz = PAGE_SIZE;
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = rdata->pages[i];
+
+		if (len >= PAGE_SIZE) {
+			/* enough data to fill the page */
+			iov.iov_base = kmap(page);
+			iov.iov_len = PAGE_SIZE;
+			cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
+				 i, iov.iov_base, iov.iov_len);
+			len -= PAGE_SIZE;
+		} else if (len > 0) {
+			/* enough for partial page, fill and zero the rest */
+			iov.iov_base = kmap(page);
+			iov.iov_len = len;
+			cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
+				 i, iov.iov_base, iov.iov_len);
+			memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
+			rdata->tailsz = len;
+			len = 0;
+		} else {
+			/* no need to hold page hostage */
+			rdata->pages[i] = NULL;
+			rdata->nr_pages--;
+			put_page(page);
+			continue;
+		}
+
+		result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
+		kunmap(page);
+		if (result < 0)
+			break;
+
+		total_read += result;
+	}
+
+	return total_read > 0 ? total_read : result;
+}
+
+ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct file *file = iocb->ki_filp;
+	ssize_t rc;
+	size_t len, cur_len;
+	ssize_t total_read = 0;
+	loff_t offset = iocb->ki_pos;
+	unsigned int npages;
+	struct cifs_sb_info *cifs_sb;
+	struct cifs_tcon *tcon;
+	struct cifsFileInfo *open_file;
+	struct cifs_readdata *rdata, *tmp;
+	struct list_head rdata_list;
+	pid_t pid;
+
+	len = iov_iter_count(to);
+	if (!len)
+		return 0;
+
+	INIT_LIST_HEAD(&rdata_list);
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	open_file = file->private_data;
+	tcon = tlink_tcon(open_file->tlink);
+
+	if (!tcon->ses->server->ops->async_readv)
+		return -ENOSYS;
+
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+		pid = open_file->pid;
+	else
+		pid = current->tgid;
+
+	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
+		cifs_dbg(FYI, "attempting read on write only file instance\n");
+
+	do {
+		cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
+		npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
+
+		/* allocate a readdata struct */
+		rdata = cifs_readdata_alloc(npages,
+					    cifs_uncached_readv_complete);
+		if (!rdata) {
+			rc = -ENOMEM;
+			break;
+		}
+
+		rc = cifs_read_allocate_pages(rdata, npages);
+		if (rc)
+			goto error;
+
+		rdata->cfile = cifsFileInfo_get(open_file);
+		rdata->nr_pages = npages;
+		rdata->offset = offset;
+		rdata->bytes = cur_len;
+		rdata->pid = pid;
+		rdata->pagesz = PAGE_SIZE;
+		rdata->read_into_pages = cifs_uncached_read_into_pages;
+
+		rc = cifs_retry_async_readv(rdata);
+error:
+		if (rc) {
+			kref_put(&rdata->refcount,
+				 cifs_uncached_readdata_release);
+			break;
+		}
+
+		list_add_tail(&rdata->list, &rdata_list);
+		offset += cur_len;
+		len -= cur_len;
+	} while (len > 0);
+
+	/* if at least one read request send succeeded, then reset rc */
+	if (!list_empty(&rdata_list))
+		rc = 0;
+
+	len = iov_iter_count(to);
+	/* the loop below should proceed in the order of increasing offsets */
+	list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
+	again:
+		if (!rc) {
+			/* FIXME: freezable sleep too? */
+			rc = wait_for_completion_killable(&rdata->done);
+			if (rc)
+				rc = -EINTR;
+			else if (rdata->result) {
+				rc = rdata->result;
+				/* resend call if it's a retryable error */
+				if (rc == -EAGAIN) {
+					rc = cifs_retry_async_readv(rdata);
+					goto again;
+				}
 			} else {
-				FreeXid(xid);
-				return rc;
+				rc = cifs_readdata_to_iov(rdata, to);
 			}
-		} else {
-			cifs_stats_bytes_read(pTcon, bytes_read);
-			*poffset += bytes_read;
+
 		}
+		list_del_init(&rdata->list);
+		kref_put(&rdata->refcount, cifs_uncached_readdata_release);
 	}
-	FreeXid(xid);
-	return total_read;
+
+	total_read = len - iov_iter_count(to);
+
+	cifs_stats_bytes_read(tcon, total_read);
+
+	/* mask nodata case */
+	if (rc == -ENODATA)
+		rc = 0;
+
+	if (total_read) {
+		iocb->ki_pos += total_read;
+		return total_read;
+	}
+	return rc;
 }
 
+ssize_t
+cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
+{
+	struct inode *inode = file_inode(iocb->ki_filp);
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+	struct cifsFileInfo *cfile = (struct cifsFileInfo *)
+						iocb->ki_filp->private_data;
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	int rc = -EACCES;
 
-static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
-	loff_t *poffset)
+	/*
+	 * In strict cache mode we need to read from the server all the time
+	 * if we don't have level II oplock because the server can delay mtime
+	 * change - so we can't make a decision about inode invalidating.
+	 * And we can also fail with pagereading if there are mandatory locks
+	 * on pages affected by this read but not on the region from pos to
+	 * pos+len-1.
+	 */
+	if (!CIFS_CACHE_READ(cinode))
+		return cifs_user_readv(iocb, to);
+
+	if (cap_unix(tcon->ses) &&
+	    (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
+	    ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
+		return generic_file_read_iter(iocb, to);
+
+	/*
+	 * We need to hold the sem to be sure nobody modifies lock list
+	 * with a brlock that prevents reading.
+	 */
+	down_read(&cinode->lock_sem);
+	if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
+				     tcon->ses->server->vals->shared_lock_type,
+				     NULL, CIFS_READ_OP))
+		rc = generic_file_read_iter(iocb, to);
+	up_read(&cinode->lock_sem);
+	return rc;
+}
+
+static ssize_t
+cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
 {
 	int rc = -EACCES;
 	unsigned int bytes_read = 0;
 	unsigned int total_read;
 	unsigned int current_read_size;
+	unsigned int rsize;
 	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	int xid;
-	char *current_offset;
+	struct cifs_tcon *tcon;
+	struct TCP_Server_Info *server;
+	unsigned int xid;
+	char *cur_offset;
 	struct cifsFileInfo *open_file;
+	struct cifs_io_parms io_parms;
+	int buf_type = CIFS_NO_BUFFER;
+	__u32 pid;
 
-	xid = GetXid();
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
-	pTcon = cifs_sb->tcon;
+	xid = get_xid();
+	cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+
+	/* FIXME: set up handlers for larger reads and/or convert to async */
+	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
 
 	if (file->private_data == NULL) {
-		FreeXid(xid);
-		return -EBADF;
+		rc = -EBADF;
+		free_xid(xid);
+		return rc;
+	}
+	open_file = file->private_data;
+	tcon = tlink_tcon(open_file->tlink);
+	server = tcon->ses->server;
+
+	if (!server->ops->sync_read) {
+		free_xid(xid);
+		return -ENOSYS;
 	}
-	open_file = (struct cifsFileInfo *)file->private_data;
+
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+		pid = open_file->pid;
+	else
+		pid = current->tgid;
 
 	if ((file->f_flags & O_ACCMODE) == O_WRONLY)
-		cFYI(1, ("attempting read on write only file instance"));
-
-	for (total_read = 0, current_offset = read_data; 
-	     read_size > total_read;
-	     total_read += bytes_read, current_offset += bytes_read) {
-		current_read_size = min_t(const int, read_size - total_read,
-					  cifs_sb->rsize);
-		/* For windows me and 9x we do not want to request more
-		than it negotiated since it will refuse the read then */
-		if((pTcon->ses) && 
-			!(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
-			current_read_size = min_t(const int, current_read_size,
-					pTcon->ses->server->maxBuf - 128);
+		cifs_dbg(FYI, "attempting read on write only file instance\n");
+
+	for (total_read = 0, cur_offset = read_data; read_size > total_read;
+	     total_read += bytes_read, cur_offset += bytes_read) {
+		current_read_size = min_t(uint, read_size - total_read, rsize);
+		/*
+		 * For windows me and 9x we do not want to request more than it
+		 * negotiated since it will refuse the read then.
+		 */
+		if ((tcon->ses) && !(tcon->ses->capabilities &
+				tcon->ses->server->vals->cap_large_files)) {
+			current_read_size = min_t(uint, current_read_size,
+					CIFSMaxBufSize);
 		}
 		rc = -EAGAIN;
 		while (rc == -EAGAIN) {
-			if ((open_file->invalidHandle) && 
-			    (!open_file->closePend)) {
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
-					file, TRUE);
+			if (open_file->invalidHandle) {
+				rc = cifs_reopen_file(open_file, true);
 				if (rc != 0)
 					break;
 			}
-			rc = CIFSSMBRead(xid, pTcon,
-					open_file->netfid,
-					current_read_size, *poffset,
-					&bytes_read, &current_offset);
+			io_parms.pid = pid;
+			io_parms.tcon = tcon;
+			io_parms.offset = *offset;
+			io_parms.length = current_read_size;
+			rc = server->ops->sync_read(xid, open_file, &io_parms,
+						    &bytes_read, &cur_offset,
+						    &buf_type);
 		}
 		if (rc || (bytes_read == 0)) {
 			if (total_read) {
 				break;
 			} else {
-				FreeXid(xid);
+				free_xid(xid);
 				return rc;
 			}
 		} else {
-			cifs_stats_bytes_read(pTcon, total_read);
-			*poffset += bytes_read;
+			cifs_stats_bytes_read(tcon, total_read);
+			*offset += bytes_read;
 		}
 	}
-	FreeXid(xid);
+	free_xid(xid);
 	return total_read;
 }
 
+/*
+ * If the page is mmap'ed into a process' page tables, then we need to make
+ * sure that it doesn't change while being written back.
+ */
+static int
+cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct page *page = vmf->page;
+
+	lock_page(page);
+	return VM_FAULT_LOCKED;
+}
+
+static struct vm_operations_struct cifs_file_vm_ops = {
+	.fault = filemap_fault,
+	.map_pages = filemap_map_pages,
+	.page_mkwrite = cifs_page_mkwrite,
+	.remap_pages = generic_file_remap_pages,
+};
+
+int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	int rc, xid;
+	struct inode *inode = file_inode(file);
+
+	xid = get_xid();
+
+	if (!CIFS_CACHE_READ(CIFS_I(inode))) {
+		rc = cifs_zap_mapping(inode);
+		if (rc)
+			return rc;
+	}
+
+	rc = generic_file_mmap(file, vma);
+	if (rc == 0)
+		vma->vm_ops = &cifs_file_vm_ops;
+	free_xid(xid);
+	return rc;
+}
+
 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	struct dentry *dentry = file->f_dentry;
 	int rc, xid;
 
-	xid = GetXid();
-	rc = cifs_revalidate(dentry);
+	xid = get_xid();
+	rc = cifs_revalidate_file(file);
 	if (rc) {
-		cFYI(1, ("Validation prior to mmap failed, error=%d", rc));
-		FreeXid(xid);
+		cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
+			 rc);
+		free_xid(xid);
 		return rc;
 	}
 	rc = generic_file_mmap(file, vma);
-	FreeXid(xid);
+	if (rc == 0)
+		vma->vm_ops = &cifs_file_vm_ops;
+	free_xid(xid);
 	return rc;
 }
 
-
-static void cifs_copy_cache_pages(struct address_space *mapping, 
-	struct list_head *pages, int bytes_read, char *data,
-	struct pagevec *plru_pvec)
+static void
+cifs_readv_complete(struct work_struct *work)
 {
-	struct page *page;
-	char *target;
+	unsigned int i;
+	struct cifs_readdata *rdata = container_of(work,
+						struct cifs_readdata, work);
 
-	while (bytes_read > 0) {
-		if (list_empty(pages))
-			break;
+	for (i = 0; i < rdata->nr_pages; i++) {
+		struct page *page = rdata->pages[i];
 
-		page = list_entry(pages->prev, struct page, lru);
-		list_del(&page->lru);
+		lru_cache_add_file(page);
 
-		if (add_to_page_cache(page, mapping, page->index,
-				      GFP_KERNEL)) {
-			page_cache_release(page);
-			cFYI(1, ("Add page cache failed"));
-			data += PAGE_CACHE_SIZE;
-			bytes_read -= PAGE_CACHE_SIZE;
-			continue;
+		if (rdata->result == 0) {
+			flush_dcache_page(page);
+			SetPageUptodate(page);
 		}
 
-		target = kmap_atomic(page,KM_USER0);
+		unlock_page(page);
+
+		if (rdata->result == 0)
+			cifs_readpage_to_fscache(rdata->mapping->host, page);
+
+		page_cache_release(page);
+		rdata->pages[i] = NULL;
+	}
+	kref_put(&rdata->refcount, cifs_readdata_release);
+}
 
-		if (PAGE_CACHE_SIZE > bytes_read) {
-			memcpy(target, data, bytes_read);
-			/* zero the tail end of this partial page */
-			memset(target + bytes_read, 0, 
-			       PAGE_CACHE_SIZE - bytes_read);
-			bytes_read = 0;
+static int
+cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
+			struct cifs_readdata *rdata, unsigned int len)
+{
+	int total_read = 0, result = 0;
+	unsigned int i;
+	u64 eof;
+	pgoff_t eof_index;
+	unsigned int nr_pages = rdata->nr_pages;
+	struct kvec iov;
+
+	/* determine the eof that the server (probably) has */
+	eof = CIFS_I(rdata->mapping->host)->server_eof;
+	eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
+	cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
+
+	rdata->tailsz = PAGE_CACHE_SIZE;
+	for (i = 0; i < nr_pages; i++) {
+		struct page *page = rdata->pages[i];
+
+		if (len >= PAGE_CACHE_SIZE) {
+			/* enough data to fill the page */
+			iov.iov_base = kmap(page);
+			iov.iov_len = PAGE_CACHE_SIZE;
+			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
+				 i, page->index, iov.iov_base, iov.iov_len);
+			len -= PAGE_CACHE_SIZE;
+		} else if (len > 0) {
+			/* enough for partial page, fill and zero the rest */
+			iov.iov_base = kmap(page);
+			iov.iov_len = len;
+			cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
+				 i, page->index, iov.iov_base, iov.iov_len);
+			memset(iov.iov_base + len,
+				'\0', PAGE_CACHE_SIZE - len);
+			rdata->tailsz = len;
+			len = 0;
+		} else if (page->index > eof_index) {
+			/*
+			 * The VFS will not try to do readahead past the
+			 * i_size, but it's possible that we have outstanding
+			 * writes with gaps in the middle and the i_size hasn't
+			 * caught up yet. Populate those with zeroed out pages
+			 * to prevent the VFS from repeatedly attempting to
+			 * fill them until the writes are flushed.
+			 */
+			zero_user(page, 0, PAGE_CACHE_SIZE);
+			lru_cache_add_file(page);
+			flush_dcache_page(page);
+			SetPageUptodate(page);
+			unlock_page(page);
+			page_cache_release(page);
+			rdata->pages[i] = NULL;
+			rdata->nr_pages--;
+			continue;
 		} else {
-			memcpy(target, data, PAGE_CACHE_SIZE);
-			bytes_read -= PAGE_CACHE_SIZE;
+			/* no need to hold page hostage */
+			lru_cache_add_file(page);
+			unlock_page(page);
+			page_cache_release(page);
+			rdata->pages[i] = NULL;
+			rdata->nr_pages--;
+			continue;
 		}
-		kunmap_atomic(target, KM_USER0);
 
-		flush_dcache_page(page);
-		SetPageUptodate(page);
-		unlock_page(page);
-		if (!pagevec_add(plru_pvec, page))
-			__pagevec_lru_add(plru_pvec);
-		data += PAGE_CACHE_SIZE;
+		result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
+		kunmap(page);
+		if (result < 0)
+			break;
+
+		total_read += result;
 	}
-	return;
+
+	return total_read > 0 ? total_read : result;
 }
 
 static int cifs_readpages(struct file *file, struct address_space *mapping,
 	struct list_head *page_list, unsigned num_pages)
 {
-	int rc = -EACCES;
-	int xid;
-	loff_t offset;
-	struct page *page;
-	struct cifs_sb_info *cifs_sb;
-	struct cifsTconInfo *pTcon;
-	int bytes_read = 0;
-	unsigned int read_size,i;
-	char *smb_read_data = NULL;
-	struct smb_com_read_rsp *pSMBr;
-	struct pagevec lru_pvec;
-	struct cifsFileInfo *open_file;
+	int rc;
+	struct list_head tmplist;
+	struct cifsFileInfo *open_file = file->private_data;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
+	unsigned int rsize = cifs_sb->rsize;
+	pid_t pid;
 
-	xid = GetXid();
-	if (file->private_data == NULL) {
-		FreeXid(xid);
-		return -EBADF;
-	}
-	open_file = (struct cifsFileInfo *)file->private_data;
-	cifs_sb = CIFS_SB(file->f_dentry->d_sb);
-	pTcon = cifs_sb->tcon;
+	/*
+	 * Give up immediately if rsize is too small to read an entire page.
+	 * The VFS will fall back to readpage. We should never reach this
+	 * point however since we set ra_pages to 0 when the rsize is smaller
+	 * than a cache page.
+	 */
+	if (unlikely(rsize < PAGE_CACHE_SIZE))
+		return 0;
 
-	pagevec_init(&lru_pvec, 0);
+	/*
+	 * Reads as many pages as possible from fscache. Returns -ENOBUFS
+	 * immediately if the cookie is negative
+	 *
+	 * After this point, every page in the list might have PG_fscache set,
+	 * so we will need to clean that up off of every page we don't use.
+	 */
+	rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
+					 &num_pages);
+	if (rc == 0)
+		return rc;
 
-	for (i = 0; i < num_pages; ) {
-		unsigned contig_pages;
-		struct page *tmp_page;
-		unsigned long expected_index;
+	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
+		pid = open_file->pid;
+	else
+		pid = current->tgid;
 
-		if (list_empty(page_list))
-			break;
+	rc = 0;
+	INIT_LIST_HEAD(&tmplist);
+
+	cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
+		 __func__, file, mapping, num_pages);
+
+	/*
+	 * Start with the page at end of list and move it to private
+	 * list. Do the same with any following pages until we hit
+	 * the rsize limit, hit an index discontinuity, or run out of
+	 * pages. Issue the async read and then start the loop again
+	 * until the list is empty.
+	 *
+	 * Note that list order is important. The page_list is in
+	 * the order of declining indexes. When we put the pages in
+	 * the rdata->pages, then we want them in increasing order.
+	 */
+	while (!list_empty(page_list)) {
+		unsigned int i;
+		unsigned int bytes = PAGE_CACHE_SIZE;
+		unsigned int expected_index;
+		unsigned int nr_pages = 1;
+		loff_t offset;
+		struct page *page, *tpage;
+		struct cifs_readdata *rdata;
 
 		page = list_entry(page_list->prev, struct page, lru);
-		offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 
-		/* count adjacent pages that we will read into */
-		contig_pages = 0;
-		expected_index = 
-			list_entry(page_list->prev, struct page, lru)->index;
-		list_for_each_entry_reverse(tmp_page,page_list,lru) {
-			if (tmp_page->index == expected_index) {
-				contig_pages++;
-				expected_index++;
-			} else
-				break; 
-		}
-		if (contig_pages + i >  num_pages)
-			contig_pages = num_pages - i;
+		/*
+		 * Lock the page and put it in the cache. Since no one else
+		 * should have access to this page, we're safe to simply set
+		 * PG_locked without checking it first.
+		 */
+		__set_page_locked(page);
+		rc = add_to_page_cache_locked(page, mapping,
+					      page->index, GFP_KERNEL);
 
-		/* for reads over a certain size could initiate async
-		   read ahead */
+		/* give up if we can't stick it in the cache */
+		if (rc) {
+			__clear_page_locked(page);
+			break;
+		}
 
-		read_size = contig_pages * PAGE_CACHE_SIZE;
-		/* Read size needs to be in multiples of one page */
-		read_size = min_t(const unsigned int, read_size,
-				  cifs_sb->rsize & PAGE_CACHE_MASK);
+		/* move first page to the tmplist */
+		offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+		list_move_tail(&page->lru, &tmplist);
 
-		rc = -EAGAIN;
-		while (rc == -EAGAIN) {
-			if ((open_file->invalidHandle) && 
-			    (!open_file->closePend)) {
-				rc = cifs_reopen_file(file->f_dentry->d_inode,
-					file, TRUE);
-				if (rc != 0)
-					break;
-			}
+		/* now try and add more pages onto the request */
+		expected_index = page->index + 1;
+		list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
+			/* discontinuity ? */
+			if (page->index != expected_index)
+				break;
 
-			rc = CIFSSMBRead(xid, pTcon,
-					open_file->netfid,
-					read_size, offset,
-					&bytes_read, &smb_read_data);
+			/* would this page push the read over the rsize? */
+			if (bytes + PAGE_CACHE_SIZE > rsize)
+				break;
 
-			/* BB more RC checks ? */
-			if (rc== -EAGAIN) {
-				if (smb_read_data) {
-					cifs_buf_release(smb_read_data);
-					smb_read_data = NULL;
-				}
+			__set_page_locked(page);
+			if (add_to_page_cache_locked(page, mapping,
+						page->index, GFP_KERNEL)) {
+				__clear_page_locked(page);
+				break;
 			}
+			list_move_tail(&page->lru, &tmplist);
+			bytes += PAGE_CACHE_SIZE;
+			expected_index++;
+			nr_pages++;
 		}
-		if ((rc < 0) || (smb_read_data == NULL)) {
-			cFYI(1, ("Read error in readpages: %d", rc));
-			/* clean up remaing pages off list */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
+
+		rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
+		if (!rdata) {
+			/* best to give up if we're out of mem */
+			list_for_each_entry_safe(page, tpage, &tmplist, lru) {
 				list_del(&page->lru);
+				lru_cache_add_file(page);
+				unlock_page(page);
 				page_cache_release(page);
 			}
+			rc = -ENOMEM;
 			break;
-		} else if (bytes_read > 0) {
-			pSMBr = (struct smb_com_read_rsp *)smb_read_data;
-			cifs_copy_cache_pages(mapping, page_list, bytes_read,
-				smb_read_data + 4 /* RFC1001 hdr */ +
-				le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
-
-			i +=  bytes_read >> PAGE_CACHE_SHIFT;
-			cifs_stats_bytes_read(pTcon, bytes_read);
-			if ((int)(bytes_read & PAGE_CACHE_MASK) != bytes_read) {
-				i++; /* account for partial page */
-
-				/* server copy of file can have smaller size 
-				   than client */
-				/* BB do we need to verify this common case ? 
-				   this case is ok - if we are at server EOF 
-				   we will hit it on next read */
-
-			/* while (!list_empty(page_list) && (i < num_pages)) {
-					page = list_entry(page_list->prev, 
-							  struct page, list);
-					list_del(&page->list);
-					page_cache_release(page);
-				}
-				break; */
-			}
-		} else {
-			cFYI(1, ("No bytes read (%d) at offset %lld . "
-				 "Cleaning remaining pages from readahead list",
-				 bytes_read, offset));
-			/* BB turn off caching and do new lookup on 
-			   file size at server? */
-			while (!list_empty(page_list) && (i < num_pages)) {
-				page = list_entry(page_list->prev, struct page,
-						  lru);
-				list_del(&page->lru);
+		}
 
-				/* BB removeme - replace with zero of page? */
+		rdata->cfile = cifsFileInfo_get(open_file);
+		rdata->mapping = mapping;
+		rdata->offset = offset;
+		rdata->bytes = bytes;
+		rdata->pid = pid;
+		rdata->pagesz = PAGE_CACHE_SIZE;
+		rdata->read_into_pages = cifs_readpages_read_into_pages;
+
+		list_for_each_entry_safe(page, tpage, &tmplist, lru) {
+			list_del(&page->lru);
+			rdata->pages[rdata->nr_pages++] = page;
+		}
+
+		rc = cifs_retry_async_readv(rdata);
+		if (rc != 0) {
+			for (i = 0; i < rdata->nr_pages; i++) {
+				page = rdata->pages[i];
+				lru_cache_add_file(page);
+				unlock_page(page);
 				page_cache_release(page);
 			}
+			kref_put(&rdata->refcount, cifs_readdata_release);
 			break;
 		}
-		if (smb_read_data) {
-			cifs_buf_release(smb_read_data);
-			smb_read_data = NULL;
-		}
-		bytes_read = 0;
-	}
-
-	pagevec_lru_add(&lru_pvec);
 
-/* need to free smb_read_data buf before exit */
-	if (smb_read_data) {
-		cifs_buf_release(smb_read_data);
-		smb_read_data = NULL;
-	} 
+		kref_put(&rdata->refcount, cifs_readdata_release);
+	}
 
-	FreeXid(xid);
+	/* Any pages that have been shown to fscache but didn't get added to
+	 * the pagecache must be uncached before they get returned to the
+	 * allocator.
+	 */
+	cifs_fscache_readpages_cancel(mapping->host, page_list);
 	return rc;
 }
 
+/*
+ * cifs_readpage_worker must be called with the page pinned
+ */
 static int cifs_readpage_worker(struct file *file, struct page *page,
 	loff_t *poffset)
 {
 	char *read_data;
 	int rc;
 
-	page_cache_get(page);
+	/* Is the page cached? */
+	rc = cifs_readpage_from_fscache(file_inode(file), page);
+	if (rc == 0)
+		goto read_complete;
+
 	read_data = kmap(page);
 	/* for reads over a certain size could initiate async read ahead */
-                                                                                                                           
+
 	rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
-                                                                                                                           
+
 	if (rc < 0)
 		goto io_error;
 	else
-		cFYI(1, ("Bytes read %d ",rc));
-                                                                                                                           
-	file->f_dentry->d_inode->i_atime =
-		current_fs_time(file->f_dentry->d_inode->i_sb);
-                                                                                                                           
+		cifs_dbg(FYI, "Bytes read %d\n", rc);
+
+	file_inode(file)->i_atime =
+		current_fs_time(file_inode(file)->i_sb);
+
 	if (PAGE_CACHE_SIZE > rc)
 		memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
 
 	flush_dcache_page(page);
 	SetPageUptodate(page);
+
+	/* send this page to the cache */
+	cifs_readpage_to_fscache(file_inode(file), page);
+
 	rc = 0;
-                                                                                                                           
+
 io_error:
-        kunmap(page);
-	page_cache_release(page);
+	kunmap(page);
+	unlock_page(page);
+
+read_complete:
 	return rc;
 }
 
@@ -1791,94 +3439,295 @@ static int cifs_readpage(struct file *file, struct page *page)
 {
 	loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
 	int rc = -EACCES;
-	int xid;
+	unsigned int xid;
 
-	xid = GetXid();
+	xid = get_xid();
 
 	if (file->private_data == NULL) {
-		FreeXid(xid);
-		return -EBADF;
+		rc = -EBADF;
+		free_xid(xid);
+		return rc;
 	}
 
-	cFYI(1, ("readpage %p at offset %d 0x%x\n", 
-		 page, (int)offset, (int)offset));
+	cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
+		 page, (int)offset, (int)offset);
 
 	rc = cifs_readpage_worker(file, page, &offset);
 
-	unlock_page(page);
-
-	FreeXid(xid);
+	free_xid(xid);
 	return rc;
 }
 
+static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
+{
+	struct cifsFileInfo *open_file;
+
+	spin_lock(&cifs_file_list_lock);
+	list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
+		if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
+			spin_unlock(&cifs_file_list_lock);
+			return 1;
+		}
+	}
+	spin_unlock(&cifs_file_list_lock);
+	return 0;
+}
+
 /* We do not want to update the file size from server for inodes
    open for write - to avoid races with writepage extending
    the file - in the future we could consider allowing
-   refreshing the inode only on increases in the file size 
+   refreshing the inode only on increases in the file size
    but this is tricky to do without racing with writebehind
    page caching in the current Linux kernel design */
-int is_size_safe_to_change(struct cifsInodeInfo *cifsInode)
+bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
 {
-	struct cifsFileInfo *open_file = NULL;
+	if (!cifsInode)
+		return true;
+
+	if (is_inode_writable(cifsInode)) {
+		/* This inode is open for write at least once */
+		struct cifs_sb_info *cifs_sb;
+
+		cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
+		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
+			/* since no page cache to corrupt on directio
+			we can change size safely */
+			return true;
+		}
 
-	if (cifsInode)
-		open_file =  find_writable_file(cifsInode);
- 
-	if(open_file) {
-		/* there is not actually a write pending so let
-		this handle go free and allow it to
-		be closable if needed */
-		atomic_dec(&open_file->wrtPending);
-		return 0;
+		if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
+			return true;
+
+		return false;
 	} else
-		return 1;
+		return true;
 }
 
-static int cifs_prepare_write(struct file *file, struct page *page,
-	unsigned from, unsigned to)
+static int cifs_write_begin(struct file *file, struct address_space *mapping,
+			loff_t pos, unsigned len, unsigned flags,
+			struct page **pagep, void **fsdata)
 {
+	int oncethru = 0;
+	pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+	loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
+	loff_t page_start = pos & PAGE_MASK;
+	loff_t i_size;
+	struct page *page;
 	int rc = 0;
-        loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
-	cFYI(1, ("prepare write for page %p from %d to %d",page,from,to));
-	if (!PageUptodate(page)) {
-	/*	if (to - from != PAGE_CACHE_SIZE) {
-			void *kaddr = kmap_atomic(page, KM_USER0);
-			memset(kaddr, 0, from);
-			memset(kaddr + to, 0, PAGE_CACHE_SIZE - to);
-			flush_dcache_page(page);
-			kunmap_atomic(kaddr, KM_USER0);
-		} */
-		/* If we are writing a full page it will be up to date,
-		   no need to read from the server */
-		if ((to == PAGE_CACHE_SIZE) && (from == 0))
-			SetPageUptodate(page);
 
-		/* might as well read a page, it is fast enough */
-		if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
-			rc = cifs_readpage_worker(file, page, &offset);
-		} else {
-		/* should we try using another file handle if there is one -
-		   how would we lock it to prevent close of that handle
-		   racing with this read?
-		   In any case this will be written out by commit_write */
+	cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
+
+start:
+	page = grab_cache_page_write_begin(mapping, index, flags);
+	if (!page) {
+		rc = -ENOMEM;
+		goto out;
+	}
+
+	if (PageUptodate(page))
+		goto out;
+
+	/*
+	 * If we write a full page it will be up to date, no need to read from
+	 * the server. If the write is short, we'll end up doing a sync write
+	 * instead.
+	 */
+	if (len == PAGE_CACHE_SIZE)
+		goto out;
+
+	/*
+	 * optimize away the read when we have an oplock, and we're not
+	 * expecting to use any of the data we'd be reading in. That
+	 * is, when the page lies beyond the EOF, or straddles the EOF
+	 * and the write will cover all of the existing data.
+	 */
+	if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
+		i_size = i_size_read(mapping->host);
+		if (page_start >= i_size ||
+		    (offset == 0 && (pos + len) >= i_size)) {
+			zero_user_segments(page, 0, offset,
+					   offset + len,
+					   PAGE_CACHE_SIZE);
+			/*
+			 * PageChecked means that the parts of the page
+			 * to which we're not writing are considered up
+			 * to date. Once the data is copied to the
+			 * page, it can be set uptodate.
+			 */
+			SetPageChecked(page);
+			goto out;
 		}
 	}
 
-	/* BB should we pass any errors back? 
-	   e.g. if we do not have read access to the file */
+	if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
+		/*
+		 * might as well read a page, it is fast enough. If we get
+		 * an error, we don't need to return it. cifs_write_end will
+		 * do a sync write instead since PG_uptodate isn't set.
+		 */
+		cifs_readpage_worker(file, page, &page_start);
+		page_cache_release(page);
+		oncethru = 1;
+		goto start;
+	} else {
+		/* we could try using another file handle if there is one -
+		   but how would we lock it to prevent close of that handle
+		   racing with this read? In any case
+		   this will be written out by write_end so is fine */
+	}
+out:
+	*pagep = page;
+	return rc;
+}
+
+static int cifs_release_page(struct page *page, gfp_t gfp)
+{
+	if (PagePrivate(page))
+		return 0;
+
+	return cifs_fscache_release_page(page, gfp);
+}
+
+static void cifs_invalidate_page(struct page *page, unsigned int offset,
+				 unsigned int length)
+{
+	struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
+
+	if (offset == 0 && length == PAGE_CACHE_SIZE)
+		cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
+}
+
+static int cifs_launder_page(struct page *page)
+{
+	int rc = 0;
+	loff_t range_start = page_offset(page);
+	loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+	struct writeback_control wbc = {
+		.sync_mode = WB_SYNC_ALL,
+		.nr_to_write = 0,
+		.range_start = range_start,
+		.range_end = range_end,
+	};
+
+	cifs_dbg(FYI, "Launder page: %p\n", page);
+
+	if (clear_page_dirty_for_io(page))
+		rc = cifs_writepage_locked(page, &wbc);
+
+	cifs_fscache_invalidate_page(page, page->mapping->host);
+	return rc;
+}
+
+static int
+cifs_pending_writers_wait(void *unused)
+{
+	schedule();
 	return 0;
 }
 
-struct address_space_operations cifs_addr_ops = {
+void cifs_oplock_break(struct work_struct *work)
+{
+	struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
+						  oplock_break);
+	struct inode *inode = cfile->dentry->d_inode;
+	struct cifsInodeInfo *cinode = CIFS_I(inode);
+	struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
+	struct TCP_Server_Info *server = tcon->ses->server;
+	int rc = 0;
+
+	wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
+			cifs_pending_writers_wait, TASK_UNINTERRUPTIBLE);
+
+	server->ops->downgrade_oplock(server, cinode,
+		test_bit(CIFS_INODE_DOWNGRADE_OPLOCK_TO_L2, &cinode->flags));
+
+	if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
+						cifs_has_mand_locks(cinode)) {
+		cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
+			 inode);
+		cinode->oplock = 0;
+	}
+
+	if (inode && S_ISREG(inode->i_mode)) {
+		if (CIFS_CACHE_READ(cinode))
+			break_lease(inode, O_RDONLY);
+		else
+			break_lease(inode, O_WRONLY);
+		rc = filemap_fdatawrite(inode->i_mapping);
+		if (!CIFS_CACHE_READ(cinode)) {
+			rc = filemap_fdatawait(inode->i_mapping);
+			mapping_set_error(inode->i_mapping, rc);
+			cifs_zap_mapping(inode);
+		}
+		cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
+	}
+
+	rc = cifs_push_locks(cfile);
+	if (rc)
+		cifs_dbg(VFS, "Push locks rc = %d\n", rc);
+
+	/*
+	 * releasing stale oplock after recent reconnect of smb session using
+	 * a now incorrect file handle is not a data integrity issue but do
+	 * not bother sending an oplock release if session to server still is
+	 * disconnected since oplock already released by the server
+	 */
+	if (!cfile->oplock_break_cancelled) {
+		rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
+							     cinode);
+		cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
+	}
+	cifs_done_oplock_break(cinode);
+}
+
+/*
+ * The presence of cifs_direct_io() in the address space ops vector
+ * allowes open() O_DIRECT flags which would have failed otherwise.
+ *
+ * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
+ * so this method should never be called.
+ *
+ * Direct IO is not yet supported in the cached mode. 
+ */
+static ssize_t
+cifs_direct_io(int rw, struct kiocb *iocb, struct iov_iter *iter,
+               loff_t pos)
+{
+        /*
+         * FIXME
+         * Eventually need to support direct IO for non forcedirectio mounts
+         */
+        return -EINVAL;
+}
+
+
+const struct address_space_operations cifs_addr_ops = {
 	.readpage = cifs_readpage,
 	.readpages = cifs_readpages,
 	.writepage = cifs_writepage,
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 	.writepages = cifs_writepages,
-#endif
-	.prepare_write = cifs_prepare_write,
-	.commit_write = cifs_commit_write,
+	.write_begin = cifs_write_begin,
+	.write_end = cifs_write_end,
+	.set_page_dirty = __set_page_dirty_nobuffers,
+	.releasepage = cifs_release_page,
+	.direct_IO = cifs_direct_io,
+	.invalidatepage = cifs_invalidate_page,
+	.launder_page = cifs_launder_page,
+};
+
+/*
+ * cifs_readpages requires the server to support a buffer large enough to
+ * contain the header plus one complete page of data.  Otherwise, we need
+ * to leave cifs_readpages out of the address space operations.
+ */
+const struct address_space_operations cifs_addr_ops_smallbuf = {
+	.readpage = cifs_readpage,
+	.writepage = cifs_writepage,
+	.writepages = cifs_writepages,
+	.write_begin = cifs_write_begin,
+	.write_end = cifs_write_end,
 	.set_page_dirty = __set_page_dirty_nobuffers,
-	/* .sync_page = cifs_sync_page, */
-	/* .direct_IO = */
+	.releasepage = cifs_release_page,
+	.invalidatepage = cifs_invalidate_page,
+	.launder_page = cifs_launder_page,
 };