Merge commit 'v2.6.37-rc7' into perf/core

Merge reason: Pick up the latest -rc. Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Ingo Molnar <mingo@elte.hu> 2010-12-22 11:53:20 +0100
committer: Ingo Molnar <mingo@elte.hu> 2010-12-22 11:53:23 +0100
commit: 6c529a266bdc590a870ee2d2092ff6527eff427b (patch)
tree: 7be65fa2578820a1258b5a1e8e063a509a5d6176 /fs
parent: 7639dae0ca11038286bbbcda05f2bef601c1eb8d (diff)
parent: 90a8a73c06cc32b609a880d48449d7083327e11a (diff)
55 files changed, 730 insertions, 431 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c547cca26a2..51d2e4de34e 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -696,6 +696,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
 				   __btree_submit_bio_done);
 }
 
+#ifdef CONFIG_MIGRATION
 static int btree_migratepage(struct address_space *mapping,
 			struct page *newpage, struct page *page)
 {
@@ -712,12 +713,9 @@ static int btree_migratepage(struct address_space *mapping,
 	if (page_has_private(page) &&
 	    !try_to_release_page(page, GFP_KERNEL))
 		return -EAGAIN;
-#ifdef CONFIG_MIGRATION
 	return migrate_page(mapping, newpage, page);
-#else
-	return -ENOSYS;
-#endif
 }
+#endif
 
 static int btree_writepage(struct page *page, struct writeback_control *wbc)
 {
@@ -1009,7 +1007,10 @@ static int find_and_setup_root(struct btrfs_root *tree_root,
 	blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item));
 	root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item),
 				     blocksize, generation);
-	BUG_ON(!root->node);
+	if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) {
+		free_extent_buffer(root->node);
+		return -EIO;
+	}
 	root->commit_root = btrfs_root_node(root);
 	return 0;
 }
diff --git a/fs/btrfs/export.c b/fs/btrfs/export.c
index 6f044447359..659f532d26a 100644
--- a/fs/btrfs/export.c
+++ b/fs/btrfs/export.c
@@ -166,7 +166,7 @@ static struct dentry *btrfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
 static struct dentry *btrfs_get_parent(struct dentry *child)
 {
 	struct inode *dir = child->d_inode;
-	static struct dentry *dentry;
+	struct dentry *dentry;
 	struct btrfs_root *root = BTRFS_I(dir)->root;
 	struct btrfs_path *path;
 	struct extent_buffer *leaf;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index bcd59c7dfb5..227e5815d83 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -429,6 +429,7 @@ err:
 
 static int cache_block_group(struct btrfs_block_group_cache *cache,
 			     struct btrfs_trans_handle *trans,
+			     struct btrfs_root *root,
 			     int load_cache_only)
 {
 	struct btrfs_fs_info *fs_info = cache->fs_info;
@@ -442,9 +443,12 @@ static int cache_block_group(struct btrfs_block_group_cache *cache,
 
 	/*
 	 * We can't do the read from on-disk cache during a commit since we need
-	 * to have the normal tree locking.
+	 * to have the normal tree locking.  Also if we are currently trying to
+	 * allocate blocks for the tree root we can't do the fast caching since
+	 * we likely hold important locks.
 	 */
-	if (!trans->transaction->in_commit) {
+	if (!trans->transaction->in_commit &&
+	    (root && root != root->fs_info->tree_root)) {
 		spin_lock(&cache->lock);
 		if (cache->cached != BTRFS_CACHE_NO) {
 			spin_unlock(&cache->lock);
@@ -2741,6 +2745,7 @@ static int cache_save_setup(struct btrfs_block_group_cache *block_group,
 	struct btrfs_root *root = block_group->fs_info->tree_root;
 	struct inode *inode = NULL;
 	u64 alloc_hint = 0;
+	int dcs = BTRFS_DC_ERROR;
 	int num_pages = 0;
 	int retries = 0;
 	int ret = 0;
@@ -2795,6 +2800,8 @@ again:
 
 	spin_lock(&block_group->lock);
 	if (block_group->cached != BTRFS_CACHE_FINISHED) {
+		/* We're not cached, don't bother trying to write stuff out */
+		dcs = BTRFS_DC_WRITTEN;
 		spin_unlock(&block_group->lock);
 		goto out_put;
 	}
@@ -2821,6 +2828,8 @@ again:
 	ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages,
 					      num_pages, num_pages,
 					      &alloc_hint);
+	if (!ret)
+		dcs = BTRFS_DC_SETUP;
 	btrfs_free_reserved_data_space(inode, num_pages);
 out_put:
 	iput(inode);
@@ -2828,10 +2837,7 @@ out_free:
 	btrfs_release_path(root, path);
 out:
 	spin_lock(&block_group->lock);
-	if (ret)
-		block_group->disk_cache_state = BTRFS_DC_ERROR;
-	else
-		block_group->disk_cache_state = BTRFS_DC_SETUP;
+	block_group->disk_cache_state = dcs;
 	spin_unlock(&block_group->lock);
 
 	return ret;
@@ -3037,7 +3043,13 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags)
 
 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags)
 {
-	u64 num_devices = root->fs_info->fs_devices->rw_devices;
+	/*
+	 * we add in the count of missing devices because we want
+	 * to make sure that any RAID levels on a degraded FS
+	 * continue to be honored.
+	 */
+	u64 num_devices = root->fs_info->fs_devices->rw_devices +
+		root->fs_info->fs_devices->missing_devices;
 
 	if (num_devices == 1)
 		flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0);
@@ -4080,7 +4092,7 @@ static int update_block_group(struct btrfs_trans_handle *trans,
 		 * space back to the block group, otherwise we will leak space.
 		 */
 		if (!alloc && cache->cached == BTRFS_CACHE_NO)
-			cache_block_group(cache, trans, 1);
+			cache_block_group(cache, trans, NULL, 1);
 
 		byte_in_group = bytenr - cache->key.objectid;
 		WARN_ON(byte_in_group > cache->key.offset);
@@ -4930,11 +4942,31 @@ search:
 		btrfs_get_block_group(block_group);
 		search_start = block_group->key.objectid;
 
+		/*
+		 * this can happen if we end up cycling through all the
+		 * raid types, but we want to make sure we only allocate
+		 * for the proper type.
+		 */
+		if (!block_group_bits(block_group, data)) {
+		    u64 extra = BTRFS_BLOCK_GROUP_DUP |
+				BTRFS_BLOCK_GROUP_RAID1 |
+				BTRFS_BLOCK_GROUP_RAID10;
+
+			/*
+			 * if they asked for extra copies and this block group
+			 * doesn't provide them, bail.  This does allow us to
+			 * fill raid0 from raid1.
+			 */
+			if ((data & extra) && !(block_group->flags & extra))
+				goto loop;
+		}
+
 have_block_group:
 		if (unlikely(block_group->cached == BTRFS_CACHE_NO)) {
 			u64 free_percent;
 
-			ret = cache_block_group(block_group, trans, 1);
+			ret = cache_block_group(block_group, trans,
+						orig_root, 1);
 			if (block_group->cached == BTRFS_CACHE_FINISHED)
 				goto have_block_group;
 
@@ -4958,7 +4990,8 @@ have_block_group:
 			if (loop > LOOP_CACHING_NOWAIT ||
 			    (loop > LOOP_FIND_IDEAL &&
 			     atomic_read(&space_info->caching_threads) < 2)) {
-				ret = cache_block_group(block_group, trans, 0);
+				ret = cache_block_group(block_group, trans,
+							orig_root, 0);
 				BUG_ON(ret);
 			}
 			found_uncached_bg = true;
@@ -5515,7 +5548,7 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
 	u64 num_bytes = ins->offset;
 
 	block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid);
-	cache_block_group(block_group, trans, 0);
+	cache_block_group(block_group, trans, NULL, 0);
 	caching_ctl = get_caching_control(block_group);
 
 	if (!caching_ctl) {
@@ -6300,9 +6333,13 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
 					   NULL, NULL);
 		BUG_ON(ret < 0);
 		if (ret > 0) {
-			ret = btrfs_del_orphan_item(trans, tree_root,
-						    root->root_key.objectid);
-			BUG_ON(ret);
+			/* if we fail to delete the orphan item this time
+			 * around, it'll get picked up the next time.
+			 *
+			 * The most common failure here is just -ENOENT.
+			 */
+			btrfs_del_orphan_item(trans, tree_root,
+					      root->root_key.objectid);
 		}
 	}
 
@@ -7878,7 +7915,14 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags)
 	u64 stripped = BTRFS_BLOCK_GROUP_RAID0 |
 		BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10;
 
-	num_devices = root->fs_info->fs_devices->rw_devices;
+	/*
+	 * we add in the count of missing devices because we want
+	 * to make sure that any RAID levels on a degraded FS
+	 * continue to be honored.
+	 */
+	num_devices = root->fs_info->fs_devices->rw_devices +
+		root->fs_info->fs_devices->missing_devices;
+
 	if (num_devices == 1) {
 		stripped |= BTRFS_BLOCK_GROUP_DUP;
 		stripped = flags & ~stripped;
@@ -8247,7 +8291,6 @@ int btrfs_read_block_groups(struct btrfs_root *root)
 			break;
 		if (ret != 0)
 			goto error;
-
 		leaf = path->nodes[0];
 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 		cache = kzalloc(sizeof(*cache), GFP_NOFS);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index c1faded5fca..66836d85763 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -48,30 +48,34 @@ static noinline int btrfs_copy_from_user(loff_t pos, int num_pages,
 					 struct page **prepared_pages,
 					 struct iov_iter *i)
 {
-	size_t copied;
+	size_t copied = 0;
 	int pg = 0;
 	int offset = pos & (PAGE_CACHE_SIZE - 1);
+	int total_copied = 0;
 
 	while (write_bytes > 0) {
 		size_t count = min_t(size_t,
 				     PAGE_CACHE_SIZE - offset, write_bytes);
 		struct page *page = prepared_pages[pg];
-again:
-		if (unlikely(iov_iter_fault_in_readable(i, count)))
-			return -EFAULT;
-
-		/* Copy data from userspace to the current page */
-		copied = iov_iter_copy_from_user(page, i, offset, count);
+		/*
+		 * Copy data from userspace to the current page
+		 *
+		 * Disable pagefault to avoid recursive lock since
+		 * the pages are already locked
+		 */
+		pagefault_disable();
+		copied = iov_iter_copy_from_user_atomic(page, i, offset, count);
+		pagefault_enable();
 
 		/* Flush processor's dcache for this page */
 		flush_dcache_page(page);
 		iov_iter_advance(i, copied);
 		write_bytes -= copied;
+		total_copied += copied;
 
+		/* Return to btrfs_file_aio_write to fault page */
 		if (unlikely(copied == 0)) {
-			count = min_t(size_t, PAGE_CACHE_SIZE - offset,
-				      iov_iter_single_seg_count(i));
-			goto again;
+			break;
 		}
 
 		if (unlikely(copied < PAGE_CACHE_SIZE - offset)) {
@@ -81,7 +85,7 @@ again:
 			offset = 0;
 		}
 	}
-	return 0;
+	return total_copied;
 }
 
 /*
@@ -854,6 +858,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 	unsigned long last_index;
 	int will_write;
 	int buffered = 0;
+	int copied = 0;
+	int dirty_pages = 0;
 
 	will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) ||
 		      (file->f_flags & O_DIRECT));
@@ -970,7 +976,17 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 		WARN_ON(num_pages > nrptrs);
 		memset(pages, 0, sizeof(struct page *) * nrptrs);
 
-		ret = btrfs_delalloc_reserve_space(inode, write_bytes);
+		/*
+		 * Fault pages before locking them in prepare_pages
+		 * to avoid recursive lock
+		 */
+		if (unlikely(iov_iter_fault_in_readable(&i, write_bytes))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		ret = btrfs_delalloc_reserve_space(inode,
+					num_pages << PAGE_CACHE_SHIFT);
 		if (ret)
 			goto out;
 
@@ -978,37 +994,49 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
 				    pos, first_index, last_index,
 				    write_bytes);
 		if (ret) {
-			btrfs_delalloc_release_space(inode, write_bytes);
+			btrfs_delalloc_release_space(inode,
+					num_pages << PAGE_CACHE_SHIFT);
 			goto out;
 		}
 
-		ret = btrfs_copy_from_user(pos, num_pages,
+		copied = btrfs_copy_from_user(pos, num_pages,
 					   write_bytes, pages, &i);
-		if (ret == 0) {
+		dirty_pages = (copied + PAGE_CACHE_SIZE - 1) >>
+					PAGE_CACHE_SHIFT;
+
+		if (num_pages > dirty_pages) {
+			if (copied > 0)
+				atomic_inc(
+					&BTRFS_I(inode)->outstanding_extents);
+			btrfs_delalloc_release_space(inode,
+					(num_pages - dirty_pages) <<
+					PAGE_CACHE_SHIFT);
+		}
+
+		if (copied > 0) {
 			dirty_and_release_pages(NULL, root, file, pages,
-						num_pages, pos, write_bytes);
+						dirty_pages, pos, copied);
 		}
 
 		btrfs_drop_pages(pages, num_pages);
-		if (ret) {
-			btrfs_delalloc_release_space(inode, write_bytes);
-			goto out;
-		}
 
-		if (will_write) {
-			filemap_fdatawrite_range(inode->i_mapping, pos,
-						 pos + write_bytes - 1);
-		} else {
-			balance_dirty_pages_ratelimited_nr(inode->i_mapping,
-							   num_pages);
-			if (num_pages <
-			    (root->leafsize >> PAGE_CACHE_SHIFT) + 1)
-				btrfs_btree_balance_dirty(root, 1);
-			btrfs_throttle(root);
+		if (copied > 0) {
+			if (will_write) {
+				filemap_fdatawrite_range(inode->i_mapping, pos,
+							 pos + copied - 1);
+			} else {
+				balance_dirty_pages_ratelimited_nr(
+							inode->i_mapping,
+							dirty_pages);
+				if (dirty_pages <
+				(root->leafsize >> PAGE_CACHE_SHIFT) + 1)
+					btrfs_btree_balance_dirty(root, 1);
+				btrfs_throttle(root);
+			}
 		}
 
-		pos += write_bytes;
-		num_written += write_bytes;
+		pos += copied;
+		num_written += copied;
 
 		cond_resched();
 	}
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 22ee0dc2e6b..60d68426695 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -290,7 +290,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
 		       (unsigned long long)BTRFS_I(inode)->generation,
 		       (unsigned long long)generation,
 		       (unsigned long long)block_group->key.objectid);
-		goto out;
+		goto free_cache;
 	}
 
 	if (!num_entries)
@@ -524,6 +524,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 		return 0;
 	}
 
+	node = rb_first(&block_group->free_space_offset);
+	if (!node) {
+		iput(inode);
+		return 0;
+	}
+
 	last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
 	filemap_write_and_wait(inode->i_mapping);
 	btrfs_wait_ordered_range(inode, inode->i_size &
@@ -543,10 +549,6 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 	 */
 	first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64);
 
-	node = rb_first(&block_group->free_space_offset);
-	if (!node)
-		goto out_free;
-
 	/*
 	 * Lock all pages first so we can lock the extent safely.
 	 *
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 8039390bd6a..72f31ecb5c9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -495,7 +495,7 @@ again:
 		add_async_extent(async_cow, start, num_bytes,
 				 total_compressed, pages, nr_pages_ret);
 
-		if (start + num_bytes < end && start + num_bytes < actual_end) {
+		if (start + num_bytes < end) {
 			start += num_bytes;
 			pages = NULL;
 			cond_resched();
@@ -5712,9 +5712,9 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
 
 	if (err) {
 		printk(KERN_ERR "btrfs direct IO failed ino %lu rw %lu "
-		      "disk_bytenr %lu len %u err no %d\n",
-		      dip->inode->i_ino, bio->bi_rw, bio->bi_sector,
-		      bio->bi_size, err);
+		      "sector %#Lx len %u err no %d\n",
+		      dip->inode->i_ino, bio->bi_rw,
+		      (unsigned long long)bio->bi_sector, bio->bi_size, err);
 		dip->errors = 1;
 
 		/*
@@ -5934,8 +5934,7 @@ free_ordered:
 	 */
 	if (write) {
 		struct btrfs_ordered_extent *ordered;
-		ordered = btrfs_lookup_ordered_extent(inode,
-						      dip->logical_offset);
+		ordered = btrfs_lookup_ordered_extent(inode, file_offset);
 		if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) &&
 		    !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags))
 			btrfs_free_reserved_extent(root, ordered->start,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index f1c9bb4079e..f87552a1d7e 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -947,23 +947,42 @@ out:
 
 static noinline int btrfs_ioctl_snap_create(struct file *file,
 					    void __user *arg, int subvol,
-					    int async)
+					    int v2)
 {
 	struct btrfs_ioctl_vol_args *vol_args = NULL;
-	struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
+	struct btrfs_ioctl_vol_args_v2 *vol_args_v2 = NULL;
 	char *name;
 	u64 fd;
-	u64 transid = 0;
 	int ret;
 
-	if (async) {
-		async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
-		if (IS_ERR(async_vol_args))
-			return PTR_ERR(async_vol_args);
+	if (v2) {
+		u64 transid = 0;
+		u64 *ptr = NULL;
 
-		name = async_vol_args->name;
-		fd = async_vol_args->fd;
-		async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
+		vol_args_v2 = memdup_user(arg, sizeof(*vol_args_v2));
+		if (IS_ERR(vol_args_v2))
+			return PTR_ERR(vol_args_v2);
+
+		if (vol_args_v2->flags & ~BTRFS_SUBVOL_CREATE_ASYNC) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		name = vol_args_v2->name;
+		fd = vol_args_v2->fd;
+		vol_args_v2->name[BTRFS_SUBVOL_NAME_MAX] = '\0';
+
+		if (vol_args_v2->flags & BTRFS_SUBVOL_CREATE_ASYNC)
+			ptr = &transid;
+
+		ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+						      subvol, ptr);
+
+		if (ret == 0 && ptr &&
+		    copy_to_user(arg +
+				 offsetof(struct btrfs_ioctl_vol_args_v2,
+					  transid), ptr, sizeof(*ptr)))
+			ret = -EFAULT;
 	} else {
 		vol_args = memdup_user(arg, sizeof(*vol_args));
 		if (IS_ERR(vol_args))
@@ -971,20 +990,13 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
 		name = vol_args->name;
 		fd = vol_args->fd;
 		vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-	}
-
-	ret = btrfs_ioctl_snap_create_transid(file, name, fd,
-					      subvol, &transid);
 
-	if (!ret && async) {
-		if (copy_to_user(arg +
-				offsetof(struct btrfs_ioctl_async_vol_args,
-				transid), &transid, sizeof(transid)))
-			return -EFAULT;
+		ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+						      subvol, NULL);
 	}
-
+out:
 	kfree(vol_args);
-	kfree(async_vol_args);
+	kfree(vol_args_v2);
 
 	return ret;
 }
@@ -2246,7 +2258,7 @@ long btrfs_ioctl(struct file *file, unsigned int
 		return btrfs_ioctl_getversion(file, argp);
 	case BTRFS_IOC_SNAP_CREATE:
 		return btrfs_ioctl_snap_create(file, argp, 0, 0);
-	case BTRFS_IOC_SNAP_CREATE_ASYNC:
+	case BTRFS_IOC_SNAP_CREATE_V2:
 		return btrfs_ioctl_snap_create(file, argp, 0, 1);
 	case BTRFS_IOC_SUBVOL_CREATE:
 		return btrfs_ioctl_snap_create(file, argp, 1, 0);
diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h
index 17c99ebdf96..c344d12c646 100644
--- a/fs/btrfs/ioctl.h
+++ b/fs/btrfs/ioctl.h
@@ -30,11 +30,15 @@ struct btrfs_ioctl_vol_args {
 	char name[BTRFS_PATH_NAME_MAX + 1];
 };
 
-#define BTRFS_SNAPSHOT_NAME_MAX 4079
-struct btrfs_ioctl_async_vol_args {
+#define BTRFS_SUBVOL_CREATE_ASYNC	(1ULL << 0)
+
+#define BTRFS_SUBVOL_NAME_MAX 4039
+struct btrfs_ioctl_vol_args_v2 {
 	__s64 fd;
 	__u64 transid;
-	char name[BTRFS_SNAPSHOT_NAME_MAX + 1];
+	__u64 flags;
+	__u64 unused[4];
+	char name[BTRFS_SUBVOL_NAME_MAX + 1];
 };
 
 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
@@ -187,6 +191,6 @@ struct btrfs_ioctl_space_args {
 				    struct btrfs_ioctl_space_args)
 #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
 #define BTRFS_IOC_WAIT_SYNC  _IOW(BTRFS_IOCTL_MAGIC, 22, __u64)
-#define BTRFS_IOC_SNAP_CREATE_ASYNC _IOW(BTRFS_IOCTL_MAGIC, 23, \
-				   struct btrfs_ioctl_async_vol_args)
+#define BTRFS_IOC_SNAP_CREATE_V2 _IOW(BTRFS_IOCTL_MAGIC, 23, \
+				   struct btrfs_ioctl_vol_args_v2)
 #endif
diff --git a/fs/btrfs/orphan.c b/fs/btrfs/orphan.c
index 79cba5fbc28..f8be250963a 100644
--- a/fs/btrfs/orphan.c
+++ b/fs/btrfs/orphan.c
@@ -56,8 +56,12 @@ int btrfs_del_orphan_item(struct btrfs_trans_handle *trans,
 		return -ENOMEM;
 
 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
-	if (ret)
+	if (ret < 0)
 		goto out;
+	if (ret) {
+		ret = -ENOENT;
+		goto out;
+	}
 
 	ret = btrfs_del_item(trans, root, path);
 
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index dbb51ea7a13..883c6fa1367 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -685,9 +685,9 @@ static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags,
 		mutex_unlock(&root->d_inode->i_mutex);
 
 		if (IS_ERR(new_root)) {
+			dput(root);
 			deactivate_locked_super(s);
 			error = PTR_ERR(new_root);
-			dput(root);
 			goto error_free_subvol_name;
 		}
 		if (!new_root->d_inode) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index cc04dc1445d..6b988450783 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -412,12 +412,16 @@ static noinline int device_list_add(const char *path,
 
 		device->fs_devices = fs_devices;
 		fs_devices->num_devices++;
-	} else if (strcmp(device->name, path)) {
+	} else if (!device->name || strcmp(device->name, path)) {
 		name = kstrdup(path, GFP_NOFS);
 		if (!name)
 			return -ENOMEM;
 		kfree(device->name);
 		device->name = name;
+		if (device->missing) {
+			fs_devices->missing_devices--;
+			device->missing = 0;
+		}
 	}
 
 	if (found_transid > fs_devices->latest_trans) {
@@ -1236,6 +1240,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
 
 	device->fs_devices->num_devices--;
 
+	if (device->missing)
+		root->fs_info->fs_devices->missing_devices--;
+
 	next_device = list_entry(root->fs_info->fs_devices->devices.next,
 				 struct btrfs_device, dev_list);
 	if (device->bdev == root->fs_info->sb->s_bdev)
@@ -3080,7 +3087,9 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
 	device->devid = devid;
 	device->work.func = pending_bios_fn;
 	device->fs_devices = fs_devices;
+	device->missing = 1;
 	fs_devices->num_devices++;
+	fs_devices->missing_devices++;
 	spin_lock_init(&device->io_lock);
 	INIT_LIST_HEAD(&device->dev_alloc_list);
 	memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
@@ -3278,6 +3287,15 @@ static int read_one_dev(struct btrfs_root *root,
 			device = add_missing_dev(root, devid, dev_uuid);
 			if (!device)
 				return -ENOMEM;
+		} else if (!device->missing) {
+			/*
+			 * this happens when a device that was properly setup
+			 * in the device info lists suddenly goes bad.
+			 * device->bdev is NULL, and so we have to set
+			 * device->missing to one here
+			 */
+			root->fs_info->fs_devices->missing_devices++;
+			device->missing = 1;
 		}
 	}
 
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 2b638b6e4ee..2740db49eb0 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -44,6 +44,7 @@ struct btrfs_device {
 
 	int writeable;
 	int in_fs_metadata;
+	int missing;
 
 	spinlock_t io_lock;
 
@@ -93,6 +94,7 @@ struct btrfs_fs_devices {
 	u64 num_devices;
 	u64 open_devices;
 	u64 rw_devices;
+	u64 missing_devices;
 	u64 total_rw_bytes;
 	struct block_device *latest_bdev;
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 7d447af84ec..d902948a90d 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -40,7 +40,8 @@ int ceph_init_dentry(struct dentry *dentry)
 	if (dentry->d_fsdata)
 		return 0;
 
-	if (ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
+	if (dentry->d_parent == NULL ||   /* nfs fh_to_dentry */
+	    ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP)
 		dentry->d_op = &ceph_dentry_ops;
 	else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR)
 		dentry->d_op = &ceph_snapdir_dentry_ops;
@@ -114,8 +115,8 @@ static int __dcache_readdir(struct file *filp,
 	spin_lock(&dcache_lock);
 
 	/* start at beginning? */
-	if (filp->f_pos == 2 || (last &&
-				 filp->f_pos < ceph_dentry(last)->offset)) {
+	if (filp->f_pos == 2 || last == NULL ||
+	    filp->f_pos < ceph_dentry(last)->offset) {
 		if (list_empty(&parent->d_subdirs))
 			goto out_unlock;
 		p = parent->d_subdirs.prev;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 8d79b8912e3..7d0e4a82d89 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -282,7 +282,8 @@ int ceph_release(struct inode *inode, struct file *file)
 static int striped_read(struct inode *inode,
 			u64 off, u64 len,
 			struct page **pages, int num_pages,
-			int *checkeof, bool align_to_pages)
+			int *checkeof, bool align_to_pages,
+			unsigned long buf_align)
 {
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
@@ -307,7 +308,7 @@ static int striped_read(struct inode *inode,
 
 more:
 	if (align_to_pages)
-		page_align = (pos - io_align) & ~PAGE_MASK;
+		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
 	else
 		page_align = pos & ~PAGE_MASK;
 	this_len = left;
@@ -376,16 +377,18 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 	struct inode *inode = file->f_dentry->d_inode;
 	struct page **pages;
 	u64 off = *poff;
-	int num_pages = calc_pages_for(off, len);
-	int ret;
+	int num_pages, ret;
 
 	dout("sync_read on file %p %llu~%u %s\n", file, off, len,
 	     (file->f_flags & O_DIRECT) ? "O_DIRECT" : "");
 
-	if (file->f_flags & O_DIRECT)
-		pages = ceph_get_direct_page_vector(data, num_pages);
-	else
+	if (file->f_flags & O_DIRECT) {
+		num_pages = calc_pages_for((unsigned long)data, len);
+		pages = ceph_get_direct_page_vector(data, num_pages, true);
+	} else {
+		num_pages = calc_pages_for(off, len);
 		pages = ceph_alloc_page_vector(num_pages, GFP_NOFS);
+	}
 	if (IS_ERR(pages))
 		return PTR_ERR(pages);
 
@@ -400,7 +403,8 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 		goto done;
 
 	ret = striped_read(inode, off, len, pages, num_pages, checkeof,
-			   file->f_flags & O_DIRECT);
+			   file->f_flags & O_DIRECT,
+			   (unsigned long)data & ~PAGE_MASK);
 
 	if (ret >= 0 && (file->f_flags & O_DIRECT) == 0)
 		ret = ceph_copy_page_vector_to_user(pages, data, off, ret);
@@ -409,7 +413,7 @@ static ssize_t ceph_sync_read(struct file *file, char __user *data,
 
 done:
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, true);
 	else
 		ceph_release_page_vector(pages, num_pages);
 	dout("sync_read result %d\n", ret);
@@ -456,6 +460,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	int do_sync = 0;
 	int check_caps = 0;
 	int page_align, io_align;
+	unsigned long buf_align;
 	int ret;
 	struct timespec mtime = CURRENT_TIME;
 
@@ -471,6 +476,7 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 		pos = *offset;
 
 	io_align = pos & ~PAGE_MASK;
+	buf_align = (unsigned long)data & ~PAGE_MASK;
 
 	ret = filemap_write_and_wait_range(inode->i_mapping, pos, pos + left);
 	if (ret < 0)
@@ -496,12 +502,15 @@ static ssize_t ceph_sync_write(struct file *file, const char __user *data,
 	 */
 more:
 	len = left;
-	if (file->f_flags & O_DIRECT)
+	if (file->f_flags & O_DIRECT) {
 		/* write from beginning of first page, regardless of
 		   io alignment */
-		page_align = (pos - io_align) & ~PAGE_MASK;
-	else
+		page_align = (pos - io_align + buf_align) & ~PAGE_MASK;
+		num_pages = calc_pages_for((unsigned long)data, len);
+	} else {
 		page_align = pos & ~PAGE_MASK;
+		num_pages = calc_pages_for(pos, len);
+	}
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 				    ceph_vino(inode), pos, &len,
 				    CEPH_OSD_OP_WRITE, flags,
@@ -512,10 +521,8 @@ more:
 	if (!req)
 		return -ENOMEM;
 
-	num_pages = calc_pages_for(pos, len);
-
 	if (file->f_flags & O_DIRECT) {
-		pages = ceph_get_direct_page_vector(data, num_pages);
+		pages = ceph_get_direct_page_vector(data, num_pages, false);
 		if (IS_ERR(pages)) {
 			ret = PTR_ERR(pages);
 			goto out;
@@ -565,7 +572,7 @@ more:
 	}
 
 	if (file->f_flags & O_DIRECT)
-		ceph_put_page_vector(pages, num_pages);
+		ceph_put_page_vector(pages, num_pages, false);
 	else if (file->f_flags & O_SYNC)
 		ceph_release_page_vector(pages, num_pages);
 
diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h
index a6ce54e94eb..52e8fd74d45 100644
--- a/fs/ceph/ioctl.h
+++ b/fs/ceph/ioctl.h
@@ -4,7 +4,7 @@
 #include <linux/ioctl.h>
 #include <linux/types.h>
 
-#define CEPH_IOCTL_MAGIC 0x98
+#define CEPH_IOCTL_MAGIC 0x97
 
 /* just use u64 to align sanely on all archs */
 struct ceph_ioctl_layout {
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 40abde93c34..476b329867d 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -11,40 +11,68 @@
  * Implement fcntl and flock locking functions.
  */
 static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
-			     u64 pid, u64 pid_ns,
-			     int cmd, u64 start, u64 length, u8 wait)
+			     int cmd, u8 wait, struct file_lock *fl)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	struct ceph_mds_client *mdsc =
 		ceph_sb_to_client(inode->i_sb)->mdsc;
 	struct ceph_mds_request *req;
 	int err;
+	u64 length = 0;
 
 	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
 	req->r_inode = igrab(inode);
 
+	/* mds requires start and length rather than start and end */
+	if (LLONG_MAX == fl->fl_end)
+		length = 0;
+	else
+		length = fl->fl_end - fl->fl_start + 1;
+
 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
 	     "length: %llu, wait: %d, type`: %d", (int)lock_type,
-	     (int)operation, pid, start, length, wait, cmd);
+	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
+	     length, wait, fl->fl_type);
+
 
 	req->r_args.filelock_change.rule = lock_type;
 	req->r_args.filelock_change.type = cmd;
-	req->r_args.filelock_change.pid = cpu_to_le64(pid);
+	req->r_args.filelock_change.pid = cpu_to_le64((u64)fl->fl_pid);
 	/* This should be adjusted, but I'm not sure if
 	   namespaces actually get id numbers*/
 	req->r_args.filelock_change.pid_namespace =
-		cpu_to_le64((u64)pid_ns);
-	req->r_args.filelock_change.start = cpu_to_le64(start);
+		cpu_to_le64((u64)(unsigned long)fl->fl_nspid);
+	req->r_args.filelock_change.start = cpu_to_le64(fl->fl_start);
 	req->r_args.filelock_change.length = cpu_to_le64(length);
 	req->r_args.filelock_change.wait = wait;
 
 	err = ceph_mdsc_do_request(mdsc, inode, req);
+
+	if ( operation == CEPH_MDS_OP_GETFILELOCK){
+		fl->fl_pid = le64_to_cpu(req->r_reply_info.filelock_reply->pid);
+		if (CEPH_LOCK_SHARED == req->r_reply_info.filelock_reply->type)
+			fl->fl_type = F_RDLCK;
+		else if (CEPH_LOCK_EXCL == req->r_reply_info.filelock_reply->type)
+			fl->fl_type = F_WRLCK;
+		else
+			fl->fl_type = F_UNLCK;
+
+		fl->fl_start = le64_to_cpu(req->r_reply_info.filelock_reply->start);
+		length = le64_to_cpu(req->r_reply_info.filelock_reply->start) +
+						 le64_to_cpu(req->r_reply_info.filelock_reply->length);
+		if (length >= 1)
+			fl->fl_end = length -1;
+		else
+			fl->fl_end = 0;
+
+	}
 	ceph_mdsc_put_request(req);
 	dout("ceph_lock_message: rule: %d, op: %d, pid: %llu, start: %llu, "
-	     "length: %llu, wait: %d, type`: %d err code %d", (int)lock_type,
-	     (int)operation, pid, start, length, wait, cmd, err);
+	     "length: %llu, wait: %d, type`: %d, err code %d", (int)lock_type,
+	     (int)operation, (u64)fl->fl_pid, fl->fl_start,
+	     length, wait, fl->fl_type, err);
 	return err;
 }
 
@@ -54,7 +82,6 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
  */
 int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u64 length;
 	u8 lock_cmd;
 	int err;
 	u8 wait = 0;
@@ -76,29 +103,20 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 	else
 		lock_cmd = CEPH_LOCK_UNLOCK;
 
-	if (LLONG_MAX == fl->fl_end)
-		length = 0;
-	else
-		length = fl->fl_end - fl->fl_start + 1;
-
-	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-				(u64)fl->fl_pid,
-				(u64)(unsigned long)fl->fl_nspid,
-				lock_cmd, fl->fl_start,
-				length, wait);
+	err = ceph_lock_message(CEPH_LOCK_FCNTL, op, file, lock_cmd, wait, fl);
 	if (!err) {
-		dout("mds locked, locking locally");
-		err = posix_lock_file(file, fl, NULL);
-		if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
-			/* undo! This should only happen if the kernel detects
-			 * local deadlock. */
-			ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-					  (u64)fl->fl_pid,
-					  (u64)(unsigned long)fl->fl_nspid,
-					  CEPH_LOCK_UNLOCK, fl->fl_start,
-					  length, 0);
-			dout("got %d on posix_lock_file, undid lock", err);
+		if ( op != CEPH_MDS_OP_GETFILELOCK ){
+			dout("mds locked, locking locally");
+			err = posix_lock_file(file, fl, NULL);
+			if (err && (CEPH_MDS_OP_SETFILELOCK == op)) {
+				/* undo! This should only happen if the kernel detects
+				 * local deadlock. */
+				ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
+						  CEPH_LOCK_UNLOCK, 0, fl);
+				dout("got %d on posix_lock_file, undid lock", err);
+			}
 		}
+
 	} else {
 		dout("mds returned error code %d", err);
 	}
@@ -107,7 +125,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 
 int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 {
-	u64 length;
 	u8 lock_cmd;
 	int err;
 	u8 wait = 1;
@@ -127,26 +144,15 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 		lock_cmd = CEPH_LOCK_EXCL;
 	else
 		lock_cmd = CEPH_LOCK_UNLOCK;
-	/* mds requires start and length rather than start and end */
-	if (LLONG_MAX == fl->fl_end)
-		length = 0;
-	else
-		length = fl->fl_end - fl->fl_start + 1;
 
 	err = ceph_lock_message(CEPH_LOCK_FLOCK, CEPH_MDS_OP_SETFILELOCK,
-				file, (u64)fl->fl_pid,
-				(u64)(unsigned long)fl->fl_nspid,
-				lock_cmd, fl->fl_start,
-				length, wait);
+				file, lock_cmd, wait, fl);
 	if (!err) {
 		err = flock_lock_file_wait(file, fl);
 		if (err) {
 			ceph_lock_message(CEPH_LOCK_FLOCK,
 					  CEPH_MDS_OP_SETFILELOCK,
-					  file, (u64)fl->fl_pid,
-					  (u64)(unsigned long)fl->fl_nspid,
-					  CEPH_LOCK_UNLOCK, fl->fl_start,
-					  length, 0);
+					  file, CEPH_LOCK_UNLOCK, 0, fl);
 			dout("got %d on flock_lock_file_wait, undid lock", err);
 		}
 	} else {
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 098b1850847..38800eaa81d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -202,6 +202,38 @@ out_bad:
 }
 
 /*
+ * parse fcntl F_GETLK results
+ */
+static int parse_reply_info_filelock(void **p, void *end,
+                struct ceph_mds_reply_info_parsed *info)
+{
+	if (*p + sizeof(*info->filelock_reply) > end)
+		goto bad;
+
+	info->filelock_reply = *p;
+	*p += sizeof(*info->filelock_reply);
+
+	if (unlikely(*p != end))
+		goto bad;
+	return 0;
+
+bad:
+	return -EIO;
+}
+
+/*
+ * parse extra results
+ */
+static int parse_reply_info_extra(void **p, void *end,
+                struct ceph_mds_reply_info_parsed *info)
+{
+	if (info->head->op == CEPH_MDS_OP_GETFILELOCK)
+		return parse_reply_info_filelock(p, end, info);
+	else
+		return parse_reply_info_dir(p, end, info);
+}
+
+/*
  * parse entire mds reply
  */
 static int parse_reply_info(struct ceph_msg *msg,
@@ -223,10 +255,10 @@ static int parse_reply_info(struct ceph_msg *msg,
 			goto out_bad;
 	}
 
-	/* dir content */
+	/* extra */
 	ceph_decode_32_safe(&p, end, len, bad);
 	if (len > 0) {
-		err = parse_reply_info_dir(&p, p+len, info);
+		err = parse_reply_info_extra(&p, p+len, info);
 		if (err < 0)
 			goto out_bad;
 	}
@@ -2074,7 +2106,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 
 	mutex_lock(&session->s_mutex);
 	if (err < 0) {
-		pr_err("mdsc_handle_reply got corrupt reply mds%d\n", mds);
+		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
 		ceph_msg_dump(msg);
 		goto out_err;
 	}
@@ -2094,7 +2126,8 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 	mutex_lock(&req->r_fill_mutex);
 	err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
 	if (err == 0) {
-		if (result == 0 && rinfo->dir_nr)
+		if (result == 0 && req->r_op != CEPH_MDS_OP_GETFILELOCK &&
+		    rinfo->dir_nr)
 			ceph_readdir_prepopulate(req, req->r_session);
 		ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
 	}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 9341fd4f143..aabe563b54d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -42,26 +42,37 @@ struct ceph_mds_reply_info_in {
 };
 
 /*
- * parsed info about an mds reply, including information about the
- * target inode and/or its parent directory and dentry, and directory
- * contents (for readdir results).
+ * parsed info about an mds reply, including information about
+ * either: 1) the target inode and/or its parent directory and dentry,
+ * and directory contents (for readdir results), or
+ * 2) the file range lock info (for fcntl F_GETLK results).
  */
 struct ceph_mds_reply_info_parsed {
 	struct ceph_mds_reply_head    *head;
 
+	/* trace */
 	struct ceph_mds_reply_info_in diri, targeti;
 	struct ceph_mds_reply_dirfrag *dirfrag;
 	char                          *dname;
 	u32                           dname_len;
 	struct ceph_mds_reply_lease   *dlease;
 
-	struct ceph_mds_reply_dirfrag *dir_dir;
-	int                           dir_nr;
-	char                          **dir_dname;
-	u32                           *dir_dname_len;
-	struct ceph_mds_reply_lease   **dir_dlease;
-	struct ceph_mds_reply_info_in *dir_in;
-	u8                            dir_complete, dir_end;
+	/* extra */
+	union {
+		/* for fcntl F_GETLK results */
+		struct ceph_filelock *filelock_reply;
+
+		/* for readdir results */
+		struct {
+			struct ceph_mds_reply_dirfrag *dir_dir;
+			int                           dir_nr;
+			char                          **dir_dname;
+			u32                           *dir_dname_len;
+			struct ceph_mds_reply_lease   **dir_dlease;
+			struct ceph_mds_reply_info_in *dir_in;
+			u8                            dir_complete, dir_end;
+		};
+	};
 
 	/* encoded blob describing snapshot contexts for certain
 	   operations (e.g., open) */
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index adefa60a9bd..43b19dd3919 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -6,7 +6,9 @@ obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
 	  link.o misc.o netmisc.o smbdes.o smbencrypt.o transport.o asn1.o \
 	  md4.o md5.o cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
-	  readdir.o ioctl.o sess.o export.o cifsacl.o
+	  readdir.o ioctl.o sess.o export.o
+
+cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
 
 cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 
diff --git a/fs/cifs/README b/fs/cifs/README
index ee68d103654..46af99ab361 100644
--- a/fs/cifs/README
+++ b/fs/cifs/README
@@ -337,6 +337,15 @@ A partial list of the supported mount options follows:
   wsize		default write size (default 57344)
 		maximum wsize currently allowed by CIFS is 57344 (fourteen
 		4096 byte pages)
+  actimeo=n	attribute cache timeout in seconds (default 1 second).
+		After this timeout, the cifs client requests fresh attribute
+		information from the server. This option allows to tune the
+		attribute cache timeout to suit the workload needs. Shorter
+		timeouts mean better the cache coherency, but increased number
+		of calls to the server. Longer timeouts mean reduced number
+		of calls to the server at the expense of less stricter cache
+		coherency checks (i.e. incorrect attribute cache for a short
+		period of time).
   rw		mount the network share read-write (note that the
 		server may still consider the share read-only)
   ro		mount network share read-only
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index e9a393c9c2c..7852cd67705 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -48,6 +48,7 @@ struct cifs_sb_info {
 	struct nls_table *local_nls;
 	unsigned int rsize;
 	unsigned int wsize;
+	unsigned long actimeo; /* attribute cache timeout (jiffies) */
 	atomic_t active;
 	uid_t	mnt_uid;
 	gid_t	mnt_gid;
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index c6ebea088ac..a437ec391a0 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -30,8 +30,6 @@
 #include "cifs_debug.h"
 
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-
 static struct cifs_wksid wksidarr[NUM_WK_SIDS] = {
 	{{1, 0, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0} }, "null user"},
 	{{1, 1, {0, 0, 0, 0, 0, 1}, {0, 0, 0, 0, 0} }, "nobody"},
@@ -774,4 +772,3 @@ int mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode)
 
 	return rc;
 }
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 6c8096cf515..c4ae7d03656 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -74,11 +74,7 @@ struct cifs_wksid {
 	char sidname[SIDNAMELENGTH];
 } __attribute__((packed));
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
-
 extern int match_sid(struct cifs_sid *);
 extern int compare_sids(const struct cifs_sid *, const struct cifs_sid *);
 
-#endif /*  CONFIG_CIFS_EXPERIMENTAL */
-
 #endif /* _CIFSACL_H */
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 76c8a906a63..3936aa7f2c2 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -463,6 +463,8 @@ cifs_show_options(struct seq_file *s, struct vfsmount *m)
 
 	seq_printf(s, ",rsize=%d", cifs_sb->rsize);
 	seq_printf(s, ",wsize=%d", cifs_sb->wsize);
+	/* convert actimeo and display it in seconds */
+		seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
 
 	return 0;
 }
@@ -935,7 +937,6 @@ init_cifs(void)
 	GlobalCurrentXid = 0;
 	GlobalTotalActiveXid = 0;
 	GlobalMaxActiveXid = 0;
-	memset(Local_System_Name, 0, 15);
 	spin_lock_init(&cifs_tcp_ses_lock);
 	spin_lock_init(&cifs_file_list_lock);
 	spin_lock_init(&GlobalMid_Lock);
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index b577bf0a1bb..7136c0c3e2f 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -45,6 +45,16 @@
 #define CIFS_MIN_RCV_POOL 4
 
 /*
+ * default attribute cache timeout (jiffies)
+ */
+#define CIFS_DEF_ACTIMEO (1 * HZ)
+
+/*
+ * max attribute cache timeout (jiffies) - 2^30
+ */
+#define CIFS_MAX_ACTIMEO (1 << 30)
+
+/*
  * MAX_REQ is the maximum number of requests that WE will send
  * on one socket concurrently. It also matches the most common
  * value of max multiplex returned by servers.  We may
@@ -746,8 +756,6 @@ GLOBAL_EXTERN unsigned int GlobalTotalActiveXid; /* prot by GlobalMid_Sem */
 GLOBAL_EXTERN unsigned int GlobalMaxActiveXid;	/* prot by GlobalMid_Sem */
 GLOBAL_EXTERN spinlock_t GlobalMid_Lock;  /* protects above & list operations */
 					  /* on midQ entries */
-GLOBAL_EXTERN char Local_System_Name[15];
-
 /*
  *  Global counters, updated atomically
  */
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index db961dc4fd3..e6d1481b16c 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -54,7 +54,8 @@ do {								\
 	     __func__, curr_xid, (int)rc);			\
 } while (0)
 extern char *build_path_from_dentry(struct dentry *);
-extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb);
+extern char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
+					struct cifsTconInfo *tcon);
 extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
 extern char *cifs_compose_mount_options(const char *sb_mountdata,
 		const char *fullpath, const struct dfs_info3_param *ref,
@@ -79,9 +80,7 @@ extern bool is_valid_oplock_break(struct smb_hdr *smb,
 				  struct TCP_Server_Info *);
 extern bool is_size_safe_to_change(struct cifsInodeInfo *, __u64 eof);
 extern struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *, bool);
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 extern struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *, bool);
-#endif
 extern unsigned int smbCalcSize(struct smb_hdr *ptr);
 extern unsigned int smbCalcSize_LE(struct smb_hdr *ptr);
 extern int decode_negTokenInit(unsigned char *security_blob, int length,
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 2f2632b6df5..67acfb3acad 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2478,95 +2478,6 @@ querySymLinkRetry:
 }
 
 #ifdef CONFIG_CIFS_EXPERIMENTAL
-/* Initialize NT TRANSACT SMB into small smb request buffer.
-   This assumes that all NT TRANSACTS that we init here have
-   total parm and data under about 400 bytes (to fit in small cifs
-   buffer size), which is the case so far, it easily fits. NB:
-	Setup words themselves and ByteCount
-	MaxSetupCount (size of returned setup area) and
-	MaxParameterCount (returned parms size) must be set by caller */
-static int
-smb_init_nttransact(const __u16 sub_command, const int setup_count,
-		   const int parm_len, struct cifsTconInfo *tcon,
-		   void **ret_buf)
-{
-	int rc;
-	__u32 temp_offset;
-	struct smb_com_ntransact_req *pSMB;
-
-	rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
-				(void **)&pSMB);
-	if (rc)
-		return rc;
-	*ret_buf = (void *)pSMB;
-	pSMB->Reserved = 0;
-	pSMB->TotalParameterCount = cpu_to_le32(parm_len);
-	pSMB->TotalDataCount  = 0;
-	pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
-					  MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
-	pSMB->ParameterCount = pSMB->TotalParameterCount;
-	pSMB->DataCount  = pSMB->TotalDataCount;
-	temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
-			(setup_count * 2) - 4 /* for rfc1001 length itself */;
-	pSMB->ParameterOffset = cpu_to_le32(temp_offset);
-	pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
-	pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
-	pSMB->SubCommand = cpu_to_le16(sub_command);
-	return 0;
-}
-
-static int
-validate_ntransact(char *buf, char **ppparm, char **ppdata,
-		   __u32 *pparmlen, __u32 *pdatalen)
-{
-	char *end_of_smb;
-	__u32 data_count, data_offset, parm_count, parm_offset;
-	struct smb_com_ntransact_rsp *pSMBr;
-
-	*pdatalen = 0;
-	*pparmlen = 0;
-
-	if (buf == NULL)
-		return -EINVAL;
-
-	pSMBr = (struct smb_com_ntransact_rsp *)buf;
-
-	/* ByteCount was converted from little endian in SendReceive */
-	end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
-			(char *)&pSMBr->ByteCount;
-
-	data_offset = le32_to_cpu(pSMBr->DataOffset);
-	data_count = le32_to_cpu(pSMBr->DataCount);
-	parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
-	parm_count = le32_to_cpu(pSMBr->ParameterCount);
-
-	*ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
-	*ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
-
-	/* should we also check that parm and data areas do not overlap? */
-	if (*ppparm > end_of_smb) {
-		cFYI(1, "parms start after end of smb");
-		return -EINVAL;
-	} else if (parm_count + *ppparm > end_of_smb) {
-		cFYI(1, "parm end after end of smb");
-		return -EINVAL;
-	} else if (*ppdata > end_of_smb) {
-		cFYI(1, "data starts after end of smb");
-		return -EINVAL;
-	} else if (data_count + *ppdata > end_of_smb) {
-		cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
-			*ppdata, data_count, (data_count + *ppdata),
-			end_of_smb, pSMBr);
-		return -EINVAL;
-	} else if (parm_count + data_count > pSMBr->ByteCount) {
-		cFYI(1, "parm count and data count larger than SMB");
-		return -EINVAL;
-	}
-	*pdatalen = data_count;
-	*pparmlen = parm_count;
-	return 0;
-}
-
 int
 CIFSSMBQueryReparseLinkInfo(const int xid, struct cifsTconInfo *tcon,
 			const unsigned char *searchName,
@@ -3056,7 +2967,97 @@ GetExtAttrOut:
 
 #endif /* CONFIG_POSIX */
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_ACL
+/*
+ * Initialize NT TRANSACT SMB into small smb request buffer.  This assumes that
+ * all NT TRANSACTS that we init here have total parm and data under about 400
+ * bytes (to fit in small cifs buffer size), which is the case so far, it
+ * easily fits. NB: Setup words themselves and ByteCount MaxSetupCount (size of
+ * returned setup area) and MaxParameterCount (returned parms size) must be set
+ * by caller
+ */
+static int
+smb_init_nttransact(const __u16 sub_command, const int setup_count,
+		   const int parm_len, struct cifsTconInfo *tcon,
+		   void **ret_buf)
+{
+	int rc;
+	__u32 temp_offset;
+	struct smb_com_ntransact_req *pSMB;
+
+	rc = small_smb_init(SMB_COM_NT_TRANSACT, 19 + setup_count, tcon,
+				(void **)&pSMB);
+	if (rc)
+		return rc;
+	*ret_buf = (void *)pSMB;
+	pSMB->Reserved = 0;
+	pSMB->TotalParameterCount = cpu_to_le32(parm_len);
+	pSMB->TotalDataCount  = 0;
+	pSMB->MaxDataCount = cpu_to_le32((tcon->ses->server->maxBuf -
+					  MAX_CIFS_HDR_SIZE) & 0xFFFFFF00);
+	pSMB->ParameterCount = pSMB->TotalParameterCount;
+	pSMB->DataCount  = pSMB->TotalDataCount;
+	temp_offset = offsetof(struct smb_com_ntransact_req, Parms) +
+			(setup_count * 2) - 4 /* for rfc1001 length itself */;
+	pSMB->ParameterOffset = cpu_to_le32(temp_offset);
+	pSMB->DataOffset = cpu_to_le32(temp_offset + parm_len);
+	pSMB->SetupCount = setup_count; /* no need to le convert byte fields */
+	pSMB->SubCommand = cpu_to_le16(sub_command);
+	return 0;
+}
+
+static int
+validate_ntransact(char *buf, char **ppparm, char **ppdata,
+		   __u32 *pparmlen, __u32 *pdatalen)
+{
+	char *end_of_smb;
+	__u32 data_count, data_offset, parm_count, parm_offset;
+	struct smb_com_ntransact_rsp *pSMBr;
+
+	*pdatalen = 0;
+	*pparmlen = 0;
+
+	if (buf == NULL)
+		return -EINVAL;
+
+	pSMBr = (struct smb_com_ntransact_rsp *)buf;
+
+	/* ByteCount was converted from little endian in SendReceive */
+	end_of_smb = 2 /* sizeof byte count */ + pSMBr->ByteCount +
+			(char *)&pSMBr->ByteCount;
+
+	data_offset = le32_to_cpu(pSMBr->DataOffset);
+	data_count = le32_to_cpu(pSMBr->DataCount);
+	parm_offset = le32_to_cpu(pSMBr->ParameterOffset);
+	parm_count = le32_to_cpu(pSMBr->ParameterCount);
+
+	*ppparm = (char *)&pSMBr->hdr.Protocol + parm_offset;
+	*ppdata = (char *)&pSMBr->hdr.Protocol + data_offset;
+
+	/* should we also check that parm and data areas do not overlap? */
+	if (*ppparm > end_of_smb) {
+		cFYI(1, "parms start after end of smb");
+		return -EINVAL;
+	} else if (parm_count + *ppparm > end_of_smb) {
+		cFYI(1, "parm end after end of smb");
+		return -EINVAL;
+	} else if (*ppdata > end_of_smb) {
+		cFYI(1, "data starts after end of smb");
+		return -EINVAL;
+	} else if (data_count + *ppdata > end_of_smb) {
+		cFYI(1, "data %p + count %d (%p) past smb end %p start %p",
+			*ppdata, data_count, (data_count + *ppdata),
+			end_of_smb, pSMBr);
+		return -EINVAL;
+	} else if (parm_count + data_count > pSMBr->ByteCount) {
+		cFYI(1, "parm count and data count larger than SMB");
+		return -EINVAL;
+	}
+	*pdatalen = data_count;
+	*pparmlen = parm_count;
+	return 0;
+}
+
 /* Get Security Descriptor (by handle) from remote server for a file or dir */
 int
 CIFSSMBGetCIFSACL(const int xid, struct cifsTconInfo *tcon, __u16 fid,
@@ -3214,7 +3215,7 @@ setCifsAclRetry:
 	return (rc);
 }
 
-#endif /* CONFIG_CIFS_EXPERIMENTAL */
+#endif /* CONFIG_CIFS_ACL */
 
 /* Legacy Query Path Information call for lookup to old servers such
    as Win9x/WinME */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 32fa4d9b5db..cc1a8604a79 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -105,6 +105,7 @@ struct smb_vol {
 	unsigned int wsize;
 	bool sockopt_tcp_nodelay:1;
 	unsigned short int port;
+	unsigned long actimeo; /* attribute cache timeout (jiffies) */
 	char *prepath;
 	struct sockaddr_storage srcaddr; /* allow binding to a local IP */
 	struct nls_table *local_nls;
@@ -806,23 +807,20 @@ cifs_parse_mount_options(char *options, const char *devname,
 	short int override_gid = -1;
 	bool uid_specified = false;
 	bool gid_specified = false;
+	char *nodename = utsname()->nodename;
 
 	separator[0] = ',';
 	separator[1] = 0;
 
-	if (Local_System_Name[0] != 0)
-		memcpy(vol->source_rfc1001_name, Local_System_Name, 15);
-	else {
-		char *nodename = utsname()->nodename;
-		int n = strnlen(nodename, 15);
-		memset(vol->source_rfc1001_name, 0x20, 15);
-		for (i = 0; i < n; i++) {
-			/* does not have to be perfect mapping since field is
-			informational, only used for servers that do not support
-			port 445 and it can be overridden at mount time */
-			vol->source_rfc1001_name[i] = toupper(nodename[i]);
-		}
-	}
+	/*
+	 * does not have to be perfect mapping since field is
+	 * informational, only used for servers that do not support
+	 * port 445 and it can be overridden at mount time
+	 */
+	memset(vol->source_rfc1001_name, 0x20, 15);
+	for (i = 0; i < strnlen(nodename, 15); i++)
+		vol->source_rfc1001_name[i] = toupper(nodename[i]);
+
 	vol->source_rfc1001_name[15] = 0;
 	/* null target name indicates to use *SMBSERVR default called name
 	   if we end up sending RFC1001 session initialize */
@@ -840,6 +838,8 @@ cifs_parse_mount_options(char *options, const char *devname,
 	/* default to using server inode numbers where available */
 	vol->server_ino = 1;
 
+	vol->actimeo = CIFS_DEF_ACTIMEO;
+
 	if (!options)
 		return 1;
 
@@ -1214,6 +1214,16 @@ cifs_parse_mount_options(char *options, const char *devname,
 					printk(KERN_WARNING "CIFS: server net"
 					"biosname longer than 15 truncated.\n");
 			}
+		} else if (strnicmp(data, "actimeo", 7) == 0) {
+			if (value && *value) {
+				vol->actimeo = HZ * simple_strtoul(value,
+								   &value, 0);
+				if (vol->actimeo > CIFS_MAX_ACTIMEO) {
+					cERROR(1, "CIFS: attribute cache"
+							"timeout too large");
+					return 1;
+				}
+			}
 		} else if (strnicmp(data, "credentials", 4) == 0) {
 			/* ignore */
 		} else if (strnicmp(data, "version", 3) == 0) {
@@ -2571,6 +2581,8 @@ static void setup_cifs_sb(struct smb_vol *pvolume_info,
 	cFYI(1, "file mode: 0x%x  dir mode: 0x%x",
 		cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
 
+	cifs_sb->actimeo = pvolume_info->actimeo;
+
 	if (pvolume_info->noperm)
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
 	if (pvolume_info->setuids)
@@ -2821,13 +2833,13 @@ remote_path_check:
 	/* check if a whole path (including prepath) is not remote */
 	if (!rc && cifs_sb->prepathlen && tcon) {
 		/* build_path_to_root works only when we have a valid tcon */
-		full_path = cifs_build_path_to_root(cifs_sb);
+		full_path = cifs_build_path_to_root(cifs_sb, tcon);
 		if (full_path == NULL) {
 			rc = -ENOMEM;
 			goto mount_fail_check;
 		}
 		rc = is_path_accessible(xid, tcon, cifs_sb, full_path);
-		if (rc != -EREMOTE) {
+		if (rc != 0 && rc != -EREMOTE) {
 			kfree(full_path);
 			goto mount_fail_check;
 		}
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index b857ce5db77..5a28660ca2b 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1108,7 +1108,6 @@ static ssize_t cifs_write(struct cifsFileInfo *open_file,
 	return total_written;
 }
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 					bool fsuid_only)
 {
@@ -1142,7 +1141,6 @@ struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
 	spin_unlock(&cifs_file_list_lock);
 	return NULL;
 }
-#endif
 
 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
 					bool fsuid_only)
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 28cb6e73594..589f3e3f6e0 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -686,7 +686,7 @@ int cifs_get_inode_info(struct inode **pinode,
 			cFYI(1, "cifs_sfu_type failed: %d", tmprc);
 	}
 
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_ACL
 	/* fill in 0777 bits from ACL */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 		rc = cifs_acl_to_fattr(cifs_sb, &fattr, *pinode, full_path,
@@ -697,7 +697,7 @@ int cifs_get_inode_info(struct inode **pinode,
 			goto cgii_exit;
 		}
 	}
-#endif
+#endif /* CONFIG_CIFS_ACL */
 
 	/* fill in remaining high mode bits e.g. SUID, VTX */
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UNX_EMUL)
@@ -728,12 +728,12 @@ static const struct inode_operations cifs_ipc_inode_ops = {
 	.lookup = cifs_lookup,
 };
 
-char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb)
+char *cifs_build_path_to_root(struct cifs_sb_info *cifs_sb,
+				struct cifsTconInfo *tcon)
 {
 	int pplen = cifs_sb->prepathlen;
 	int dfsplen;
 	char *full_path = NULL;
-	struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
 
 	/* if no prefix path, simply set path to the root of share to "" */
 	if (pplen == 0) {
@@ -875,7 +875,7 @@ struct inode *cifs_root_iget(struct super_block *sb, unsigned long ino)
 	char *full_path;
 	struct cifsTconInfo *tcon = cifs_sb_master_tcon(cifs_sb);
 
-	full_path = cifs_build_path_to_root(cifs_sb);
+	full_path = cifs_build_path_to_root(cifs_sb, tcon);
 	if (full_path == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -1653,6 +1653,7 @@ static bool
 cifs_inode_needs_reval(struct inode *inode)
 {
 	struct cifsInodeInfo *cifs_i = CIFS_I(inode);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 
 	if (cifs_i->clientCanCacheRead)
 		return false;
@@ -1663,12 +1664,12 @@ cifs_inode_needs_reval(struct inode *inode)
 	if (cifs_i->time == 0)
 		return true;
 
-	/* FIXME: the actimeo should be tunable */
-	if (time_after_eq(jiffies, cifs_i->time + HZ))
+	if (!time_in_range(jiffies, cifs_i->time,
+				cifs_i->time + cifs_sb->actimeo))
 		return true;
 
 	/* hardlinked files w/ noserverino get "special" treatment */
-	if (!(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
+	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
 	    S_ISREG(inode->i_mode) && inode->i_nlink != 1)
 		return true;
 
@@ -2121,7 +2122,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 
 	if (attrs->ia_valid & ATTR_MODE) {
 		rc = 0;
-#ifdef CONFIG_CIFS_EXPERIMENTAL
+#ifdef CONFIG_CIFS_ACL
 		if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) {
 			rc = mode_to_cifs_acl(inode, full_path, mode);
 			if (rc) {
@@ -2130,7 +2131,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 				goto cifs_setattr_exit;
 			}
 		} else
-#endif
+#endif /* CONFIG_CIFS_ACL */
 		if (((mode & S_IWUGO) == 0) &&
 		    (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
 
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 32d300e8f20..a73eb9f4bda 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -759,18 +759,6 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir,
 	rc = filldir(direntry, qstring.name, qstring.len, file->f_pos,
 		     ino, fattr.cf_dtype);
 
-	/*
-	 * we can not return filldir errors to the caller since they are
-	 * "normal" when the stat blocksize is too small - we return remapped
-	 * error instead
-	 *
-	 * FIXME: This looks bogus. filldir returns -EOVERFLOW in the above
-	 * case already. Why should we be clobbering other errors from it?
-	 */
-	if (rc) {
-		cFYI(1, "filldir rc = %d", rc);
-		rc = -EOVERFLOW;
-	}
 	dput(tmp_dentry);
 	return rc;
 }
diff --git a/fs/exec.c b/fs/exec.c
index d68c378a313..c62efcb959c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -275,6 +275,11 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 	vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
+
+	err = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
+	if (err)
+		goto err;
+
 	err = insert_vm_struct(mm, vma);
 	if (err)
 		goto err;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 6a5edea2d70..94ce3d7a1c4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -910,6 +910,7 @@ struct ext4_inode_info {
 #define EXT4_MOUNT_JOURNAL_CHECKSUM	0x800000 /* Journal checksums */
 #define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT	0x1000000 /* Journal Async Commit */
 #define EXT4_MOUNT_I_VERSION            0x2000000 /* i_version support */
+#define EXT4_MOUNT_MBLK_IO_SUBMIT	0x4000000 /* multi-block io submits */
 #define EXT4_MOUNT_DELALLOC		0x8000000 /* Delalloc support */
 #define EXT4_MOUNT_DATA_ERR_ABORT	0x10000000 /* Abort on file data write */
 #define EXT4_MOUNT_BLOCK_VALIDITY	0x20000000 /* Block validity checking */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bdbe6990220..e659597b690 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2125,9 +2125,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd,
 			 */
 			if (unlikely(journal_data && PageChecked(page)))
 				err = __ext4_journalled_writepage(page, len);
-			else
+			else if (test_opt(inode->i_sb, MBLK_IO_SUBMIT))
 				err = ext4_bio_write_page(&io_submit, page,
 							  len, mpd->wbc);
+			else
+				err = block_write_full_page(page,
+					noalloc_get_block_write, mpd->wbc);
 
 			if (!err)
 				mpd->pages_written++;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 92203b8a099..dc40e75cba8 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -872,7 +872,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
 	if (namelen > EXT4_NAME_LEN)
 		return NULL;
 	if ((namelen <= 2) && (name[0] == '.') &&
-	    (name[1] == '.' || name[1] == '0')) {
+	    (name[1] == '.' || name[1] == '\0')) {
 		/*
 		 * "." or ".." will only be in the first block
 		 * NFS may look up ".."; "." should be handled by the VFS
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index e32195d6aac..fb15c9c0be7 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1026,6 +1026,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	    !(def_mount_opts & EXT4_DEFM_NODELALLOC))
 		seq_puts(seq, ",nodelalloc");
 
+	if (test_opt(sb, MBLK_IO_SUBMIT))
+		seq_puts(seq, ",mblk_io_submit");
 	if (sbi->s_stripe)
 		seq_printf(seq, ",stripe=%lu", sbi->s_stripe);
 	/*
@@ -1239,8 +1241,8 @@ enum {
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
 	Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
 	Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
-	Opt_stripe, Opt_delalloc, Opt_nodelalloc,
-	Opt_block_validity, Opt_noblock_validity,
+	Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit,
+	Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity,
 	Opt_inode_readahead_blks, Opt_journal_ioprio,
 	Opt_dioread_nolock, Opt_dioread_lock,
 	Opt_discard, Opt_nodiscard,
@@ -1304,6 +1306,8 @@ static const match_table_t tokens = {
 	{Opt_resize, "resize"},
 	{Opt_delalloc, "delalloc"},
 	{Opt_nodelalloc, "nodelalloc"},
+	{Opt_mblk_io_submit, "mblk_io_submit"},
+	{Opt_nomblk_io_submit, "nomblk_io_submit"},
 	{Opt_block_validity, "block_validity"},
 	{Opt_noblock_validity, "noblock_validity"},
 	{Opt_inode_readahead_blks, "inode_readahead_blks=%u"},
@@ -1725,6 +1729,12 @@ set_qf_format:
 		case Opt_nodelalloc:
 			clear_opt(sbi->s_mount_opt, DELALLOC);
 			break;
+		case Opt_mblk_io_submit:
+			set_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+			break;
+		case Opt_nomblk_io_submit:
+			clear_opt(sbi->s_mount_opt, MBLK_IO_SUBMIT);
+			break;
 		case Opt_stripe:
 			if (match_int(&args[0], &option))
 				return 0;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 9242d294fe9..8b984a2cebb 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/module.h>
+#include <linux/compat.h>
 
 static const struct file_operations fuse_direct_io_file_operations;
 
@@ -1628,6 +1629,58 @@ static int fuse_ioctl_copy_user(struct page **pages, struct iovec *iov,
 }
 
 /*
+ * CUSE servers compiled on 32bit broke on 64bit kernels because the
+ * ABI was defined to be 'struct iovec' which is different on 32bit
+ * and 64bit.  Fortunately we can determine which structure the server
+ * used from the size of the reply.
+ */
+static int fuse_copy_ioctl_iovec(struct iovec *dst, void *src,
+				 size_t transferred, unsigned count,
+				 bool is_compat)
+{
+#ifdef CONFIG_COMPAT
+	if (count * sizeof(struct compat_iovec) == transferred) {
+		struct compat_iovec *ciov = src;
+		unsigned i;
+
+		/*
+		 * With this interface a 32bit server cannot support
+		 * non-compat (i.e. ones coming from 64bit apps) ioctl
+		 * requests
+		 */
+		if (!is_compat)
+			return -EINVAL;
+
+		for (i = 0; i < count; i++) {
+			dst[i].iov_base = compat_ptr(ciov[i].iov_base);
+			dst[i].iov_len = ciov[i].iov_len;
+		}
+		return 0;
+	}
+#endif
+
+	if (count * sizeof(struct iovec) != transferred)
+		return -EIO;
+
+	memcpy(dst, src, transferred);
+	return 0;
+}
+
+/* Make sure iov_length() won't overflow */
+static int fuse_verify_ioctl_iov(struct iovec *iov, size_t count)
+{
+	size_t n;
+	u32 max = FUSE_MAX_PAGES_PER_REQ << PAGE_SHIFT;
+
+	for (n = 0; n < count; n++) {
+		if (iov->iov_len > (size_t) max)
+			return -ENOMEM;
+		max -= iov->iov_len;
+	}
+	return 0;
+}
+
+/*
  * For ioctls, there is no generic way to determine how much memory
  * needs to be read and/or written.  Furthermore, ioctls are allowed
  * to dereference the passed pointer, so the parameter requires deep
@@ -1808,18 +1861,25 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
 		    in_iovs + out_iovs > FUSE_IOCTL_MAX_IOV)
 			goto out;
 
-		err = -EIO;
-		if ((in_iovs + out_iovs) * sizeof(struct iovec) != transferred)
-			goto out;
-
-		/* okay, copy in iovs and retry */
 		vaddr = kmap_atomic(pages[0], KM_USER0);
-		memcpy(page_address(iov_page), vaddr, transferred);
+		err = fuse_copy_ioctl_iovec(page_address(iov_page), vaddr,
+					    transferred, in_iovs + out_iovs,
+					    (flags & FUSE_IOCTL_COMPAT) != 0);
 		kunmap_atomic(vaddr, KM_USER0);
+		if (err)
+			goto out;
 
 		in_iov = page_address(iov_page);
 		out_iov = in_iov + in_iovs;
 
+		err = fuse_verify_ioctl_iov(in_iov, in_iovs);
+		if (err)
+			goto out;
+
+		err = fuse_verify_ioctl_iov(out_iov, out_iovs);
+		if (err)
+			goto out;
+
 		goto retry;
 	}
 
diff --git a/fs/namei.c b/fs/namei.c
index 5362af9b737..4ff7ca53053 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1748,6 +1748,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
 	if (!(open_flag & O_CREAT))
 		mode = 0;
 
+	/* Must never be set by userspace */
+	open_flag &= ~FMODE_NONOTIFY;
+
 	/*
 	 * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
 	 * check for O_DSYNC if the need any syncing at all we enforce it's
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f0a384e2ae6..996dd8989a9 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -57,7 +57,7 @@ static int nfs_rename(struct inode *, struct dentry *,
 		      struct inode *, struct dentry *);
 static int nfs_fsync_dir(struct file *, int);
 static loff_t nfs_llseek_dir(struct file *, loff_t, int);
-static int nfs_readdir_clear_array(struct page*, gfp_t);
+static void nfs_readdir_clear_array(struct page*);
 
 const struct file_operations nfs_dir_operations = {
 	.llseek		= nfs_llseek_dir,
@@ -83,8 +83,8 @@ const struct inode_operations nfs_dir_inode_operations = {
 	.setattr	= nfs_setattr,
 };
 
-const struct address_space_operations nfs_dir_addr_space_ops = {
-	.releasepage = nfs_readdir_clear_array,
+const struct address_space_operations nfs_dir_aops = {
+	.freepage = nfs_readdir_clear_array,
 };
 
 #ifdef CONFIG_NFS_V3
@@ -178,6 +178,7 @@ typedef struct {
 	struct page	*page;
 	unsigned long	page_index;
 	u64		*dir_cookie;
+	u64		last_cookie;
 	loff_t		current_index;
 	decode_dirent_t	decode;
 
@@ -213,17 +214,15 @@ void nfs_readdir_release_array(struct page *page)
  * we are freeing strings created by nfs_add_to_readdir_array()
  */
 static
-int nfs_readdir_clear_array(struct page *page, gfp_t mask)
+void nfs_readdir_clear_array(struct page *page)
 {
-	struct nfs_cache_array *array = nfs_readdir_get_array(page);
+	struct nfs_cache_array *array;
 	int i;
 
-	if (IS_ERR(array))
-		return PTR_ERR(array);
+	array = kmap_atomic(page, KM_USER0);
 	for (i = 0; i < array->size; i++)
 		kfree(array->array[i].string.name);
-	nfs_readdir_release_array(page);
-	return 0;
+	kunmap_atomic(array, KM_USER0);
 }
 
 /*
@@ -272,7 +271,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 		goto out;
 	array->last_cookie = entry->cookie;
 	array->size++;
-	if (entry->eof == 1)
+	if (entry->eof != 0)
 		array->eof_index = array->size;
 out:
 	nfs_readdir_release_array(page);
@@ -312,15 +311,14 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 	for (i = 0; i < array->size; i++) {
 		if (array->array[i].cookie == *desc->dir_cookie) {
 			desc->cache_entry_index = i;
-			status = 0;
-			goto out;
+			return 0;
 		}
 	}
-	if (i == array->eof_index) {
-		desc->eof = 1;
+	if (array->eof_index >= 0) {
 		status = -EBADCOOKIE;
+		if (*desc->dir_cookie == array->last_cookie)
+			desc->eof = 1;
 	}
-out:
 	return status;
 }
 
@@ -328,10 +326,7 @@ static
 int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 {
 	struct nfs_cache_array *array;
-	int status = -EBADCOOKIE;
-
-	if (desc->dir_cookie == NULL)
-		goto out;
+	int status;
 
 	array = nfs_readdir_get_array(desc->page);
 	if (IS_ERR(array)) {
@@ -344,6 +339,10 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 	else
 		status = nfs_readdir_search_for_cookie(array, desc);
 
+	if (status == -EAGAIN) {
+		desc->last_cookie = array->last_cookie;
+		desc->page_index++;
+	}
 	nfs_readdir_release_array(desc->page);
 out:
 	return status;
@@ -490,7 +489,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 
 		count++;
 
-		if (desc->plus == 1)
+		if (desc->plus != 0)
 			nfs_prime_dcache(desc->file->f_path.dentry, entry);
 
 		status = nfs_readdir_add_to_array(entry, page);
@@ -498,7 +497,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 			break;
 	} while (!entry->eof);
 
-	if (count == 0 || (status == -EBADCOOKIE && entry->eof == 1)) {
+	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
 		array = nfs_readdir_get_array(page);
 		if (!IS_ERR(array)) {
 			array->eof_index = array->size;
@@ -563,7 +562,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
 	unsigned int array_size = ARRAY_SIZE(pages);
 
 	entry.prev_cookie = 0;
-	entry.cookie = *desc->dir_cookie;
+	entry.cookie = desc->last_cookie;
 	entry.eof = 0;
 	entry.fh = nfs_alloc_fhandle();
 	entry.fattr = nfs_alloc_fattr();
@@ -636,6 +635,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
 static
 void cache_page_release(nfs_readdir_descriptor_t *desc)
 {
+	if (!desc->page->mapping)
+		nfs_readdir_clear_array(desc->page);
 	page_cache_release(desc->page);
 	desc->page = NULL;
 }
@@ -660,9 +661,8 @@ int find_cache_page(nfs_readdir_descriptor_t *desc)
 		return PTR_ERR(desc->page);
 
 	res = nfs_readdir_search_array(desc);
-	if (res == 0)
-		return 0;
-	cache_page_release(desc);
+	if (res != 0)
+		cache_page_release(desc);
 	return res;
 }
 
@@ -672,22 +672,16 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 {
 	int res;
 
-	if (desc->page_index == 0)
+	if (desc->page_index == 0) {
 		desc->current_index = 0;
-	while (1) {
-		res = find_cache_page(desc);
-		if (res != -EAGAIN)
-			break;
-		desc->page_index++;
+		desc->last_cookie = 0;
 	}
+	do {
+		res = find_cache_page(desc);
+	} while (res == -EAGAIN);
 	return res;
 }
 
-static inline unsigned int dt_type(struct inode *inode)
-{
-	return (inode->i_mode >> 12) & 15;
-}
-
 /*
  * Once we've found the start of the dirent within a page: fill 'er up...
  */
@@ -717,13 +711,12 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
 			break;
 		}
 		file->f_pos++;
-		desc->cache_entry_index = i;
 		if (i < (array->size-1))
 			*desc->dir_cookie = array->array[i+1].cookie;
 		else
 			*desc->dir_cookie = array->last_cookie;
 	}
-	if (i == array->eof_index)
+	if (array->eof_index >= 0)
 		desc->eof = 1;
 
 	nfs_readdir_release_array(desc->page);
@@ -764,6 +757,7 @@ int uncached_readdir(nfs_readdir_descriptor_t *desc, void *dirent,
 	}
 
 	desc->page_index = 0;
+	desc->last_cookie = *desc->dir_cookie;
 	desc->page = page;
 
 	status = nfs_readdir_xdr_to_array(desc, page, inode);
@@ -791,7 +785,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	struct inode	*inode = dentry->d_inode;
 	nfs_readdir_descriptor_t my_desc,
 			*desc = &my_desc;
-	int res = -ENOMEM;
+	int res;
 
 	dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
 			dentry->d_parent->d_name.name, dentry->d_name.name,
@@ -816,7 +810,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 	if (res < 0)
 		goto out;
 
-	while (desc->eof != 1) {
+	do {
 		res = readdir_search_pagecache(desc);
 
 		if (res == -EBADCOOKIE) {
@@ -844,7 +838,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 		res = nfs_do_filldir(desc, dirent, filldir);
 		if (res < 0)
 			break;
-	}
+	} while (!desc->eof);
 out:
 	nfs_unblock_sillyrename(dentry);
 	if (res > 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 60677f9f131..7bf029ef408 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -693,6 +693,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 {
 	struct inode *inode = filp->f_mapping->host;
 	int status = 0;
+	unsigned int saved_type = fl->fl_type;
 
 	/* Try local locking first */
 	posix_test_lock(filp, fl);
@@ -700,6 +701,7 @@ do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
 		/* found a conflict */
 		goto out;
 	}
+	fl->fl_type = saved_type;
 
 	if (nfs_have_delegation(inode, FMODE_READ))
 		goto out_noconflict;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 314f5716460..e67e31c7341 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -289,6 +289,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
 		} else if (S_ISDIR(inode->i_mode)) {
 			inode->i_op = NFS_SB(sb)->nfs_client->rpc_ops->dir_inode_ops;
 			inode->i_fop = &nfs_dir_operations;
+			inode->i_data.a_ops = &nfs_dir_aops;
 			if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
 				set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
 			/* Deal with crossing mountpoints */
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index eceafe74f47..4f981f1f668 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -505,13 +505,13 @@ static struct rpc_procinfo mnt3_procedures[] = {
 
 static struct rpc_version mnt_version1 = {
 	.number		= 1,
-	.nrprocs	= 2,
+	.nrprocs	= ARRAY_SIZE(mnt_procedures),
 	.procs		= mnt_procedures,
 };
 
 static struct rpc_version mnt_version3 = {
 	.number		= 3,
-	.nrprocs	= 2,
+	.nrprocs	= ARRAY_SIZE(mnt3_procedures),
 	.procs		= mnt3_procedures,
 };
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 6a653ffd8e4..4435e5e1f90 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3361,6 +3361,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen)
 	ret = nfs_revalidate_inode(server, inode);
 	if (ret < 0)
 		return ret;
+	if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
+		nfs_zap_acl_cache(inode);
 	ret = nfs4_read_cached_acl(inode, buf, buflen);
 	if (ret != -ENOENT)
 		return ret;
@@ -3389,6 +3391,13 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
 	nfs_inode_return_delegation(inode);
 	buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
 	ret = nfs4_call_sync(server, &msg, &arg, &res, 1);
+	/*
+	 * Acl update can result in inode attribute update.
+	 * so mark the attribute cache invalid.
+	 */
+	spin_lock(&inode->i_lock);
+	NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR;
+	spin_unlock(&inode->i_lock);
 	nfs_access_zap_cache(inode);
 	nfs_zap_acl_cache(inode);
 	return ret;
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 137b549e63d..b68536cc904 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -115,7 +115,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
 {
 	if (!nfs_lock_request_dontget(req))
 		return 0;
-	if (req->wb_page != NULL)
+	if (test_bit(PG_MAPPED, &req->wb_flags))
 		radix_tree_tag_set(&NFS_I(req->wb_context->path.dentry->d_inode)->nfs_page_tree, req->wb_index, NFS_PAGE_TAG_LOCKED);
 	return 1;
 }
@@ -125,7 +125,7 @@ int nfs_set_page_tag_locked(struct nfs_page *req)
  */
 void nfs_clear_page_tag_locked(struct nfs_page *req)
 {
-	if (req->wb_page != NULL) {
+	if (test_bit(PG_MAPPED, &req->wb_flags)) {
 		struct inode *inode = req->wb_context->path.dentry->d_inode;
 		struct nfs_inode *nfsi = NFS_I(inode);
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index e4b62c6f5a6..aedcaa7f291 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -152,7 +152,6 @@ static void nfs_readpage_release(struct nfs_page *req)
 			(long long)NFS_FILEID(req->wb_context->path.dentry->d_inode),
 			req->wb_bytes,
 			(long long)req_offset(req));
-	nfs_clear_request(req);
 	nfs_release_request(req);
 }
 
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 3c045044fca..4100630c9a5 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -1069,12 +1069,10 @@ static int nfs_parse_mount_options(char *raw,
 			mnt->flags |= NFS_MOUNT_VER3;
 			mnt->version = 3;
 			break;
-#ifdef CONFIG_NFS_V4
 		case Opt_v4:
 			mnt->flags &= ~NFS_MOUNT_VER3;
 			mnt->version = 4;
 			break;
-#endif
 		case Opt_udp:
 			mnt->flags &= ~NFS_MOUNT_TCP;
 			mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
@@ -1286,12 +1284,10 @@ static int nfs_parse_mount_options(char *raw,
 				mnt->flags |= NFS_MOUNT_VER3;
 				mnt->version = 3;
 				break;
-#ifdef CONFIG_NFS_V4
 			case NFS4_VERSION:
 				mnt->flags &= ~NFS_MOUNT_VER3;
 				mnt->version = 4;
 				break;
-#endif
 			default:
 				goto out_invalid_value;
 			}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 4c14c17a527..10d648ea128 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -390,6 +390,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		if (nfs_have_delegation(inode, FMODE_WRITE))
 			nfsi->change_attr++;
 	}
+	set_bit(PG_MAPPED, &req->wb_flags);
 	SetPagePrivate(req->wb_page);
 	set_page_private(req->wb_page, (unsigned long)req);
 	nfsi->npages++;
@@ -415,6 +416,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 	spin_lock(&inode->i_lock);
 	set_page_private(req->wb_page, 0);
 	ClearPagePrivate(req->wb_page);
+	clear_bit(PG_MAPPED, &req->wb_flags);
 	radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
 	nfsi->npages--;
 	if (!nfsi->npages) {
@@ -422,7 +424,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 		iput(inode);
 	} else
 		spin_unlock(&inode->i_lock);
-	nfs_clear_request(req);
 	nfs_release_request(req);
 }
 
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2a533a0af2a..7e84a852cda 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -260,9 +260,11 @@ void fill_post_wcc(struct svc_fh *fhp)
 	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
 			&fhp->fh_post_attr);
 	fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
-	if (err)
+	if (err) {
 		fhp->fh_post_saved = 0;
-	else
+		/* Grab the ctime anyway - set_change_info might use it */
+		fhp->fh_post_attr.ctime = fhp->fh_dentry->d_inode->i_ctime;
+	} else
 		fhp->fh_post_saved = 1;
 }
 
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 4d476ff08ae..60fce3dc5cb 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -484,18 +484,17 @@ static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
 static inline void
 set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 {
-	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
-	cinfo->atomic = 1;
+	BUG_ON(!fhp->fh_pre_saved);
+	cinfo->atomic = fhp->fh_post_saved;
 	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
-	if (cinfo->change_supported) {
-		cinfo->before_change = fhp->fh_pre_change;
-		cinfo->after_change = fhp->fh_post_change;
-	} else {
-		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
-		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
-		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
-		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
-	}
+
+	cinfo->before_change = fhp->fh_pre_change;
+	cinfo->after_change = fhp->fh_post_change;
+	cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+	cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+	cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+	cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+
 }
 
 int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 33ad25ddd5c..caf9a6a3fb5 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -176,7 +176,6 @@ int nilfs_gccache_wait_and_mark_dirty(struct buffer_head *bh)
 int nilfs_init_gcinode(struct inode *inode)
 {
 	struct nilfs_inode_info *ii = NILFS_I(inode);
-	struct the_nilfs *nilfs = NILFS_SB(inode->i_sb)->s_nilfs;
 
 	inode->i_mode = S_IFREG;
 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
@@ -186,14 +185,6 @@ int nilfs_init_gcinode(struct inode *inode)
 	ii->i_flags = 0;
 	nilfs_bmap_init_gc(ii->i_bmap);
 
-	/*
-	 * Add the inode to GC inode list. Garbage Collection
-	 * is serialized and no two processes manipulate the
-	 * list simultaneously.
-	 */
-	igrab(inode);
-	list_add(&NILFS_I(inode)->i_dirty, &nilfs->ns_gc_inodes);
-
 	return 0;
 }
 
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index e00d9457c25..b185e937a33 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -337,6 +337,7 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
 				   struct nilfs_argv *argv, void *buf)
 {
 	size_t nmembs = argv->v_nmembs;
+	struct the_nilfs *nilfs = NILFS_SB(sb)->s_nilfs;
 	struct inode *inode;
 	struct nilfs_vdesc *vdesc;
 	struct buffer_head *bh, *n;
@@ -353,6 +354,17 @@ static int nilfs_ioctl_move_blocks(struct super_block *sb,
 			ret = PTR_ERR(inode);
 			goto failed;
 		}
+		if (list_empty(&NILFS_I(inode)->i_dirty)) {
+			/*
+			 * Add the inode to GC inode list. Garbage Collection
+			 * is serialized and no two processes manipulate the
+			 * list simultaneously.
+			 */
+			igrab(inode);
+			list_add(&NILFS_I(inode)->i_dirty,
+				 &nilfs->ns_gc_inodes);
+		}
+
 		do {
 			ret = nilfs_ioctl_move_inode_block(inode, vdesc,
 							   &buffers);
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index b04f88eed09..f35794b97e8 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -92,7 +92,11 @@ static int fanotify_get_response_from_access(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	wait_event(group->fanotify_data.access_waitq, event->response);
+	wait_event(group->fanotify_data.access_waitq, event->response ||
+				atomic_read(&group->fanotify_data.bypass_perm));
+
+	if (!event->response) /* bypass_perm set */
+		return 0;
 
 	/* userspace responded, convert to something usable */
 	spin_lock(&event->lock);
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 063224812b7..8b61220cffc 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -106,20 +106,29 @@ static int create_fd(struct fsnotify_group *group, struct fsnotify_event *event)
 	return client_fd;
 }
 
-static ssize_t fill_event_metadata(struct fsnotify_group *group,
+static int fill_event_metadata(struct fsnotify_group *group,
 				   struct fanotify_event_metadata *metadata,
 				   struct fsnotify_event *event)
 {
+	int ret = 0;
+
 	pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
 		 group, metadata, event);
 
 	metadata->event_len = FAN_EVENT_METADATA_LEN;
+	metadata->metadata_len = FAN_EVENT_METADATA_LEN;
 	metadata->vers = FANOTIFY_METADATA_VERSION;
 	metadata->mask = event->mask & FAN_ALL_OUTGOING_EVENTS;
 	metadata->pid = pid_vnr(event->tgid);
-	metadata->fd = create_fd(group, event);
+	if (unlikely(event->mask & FAN_Q_OVERFLOW))
+		metadata->fd = FAN_NOFD;
+	else {
+		metadata->fd = create_fd(group, event);
+		if (metadata->fd < 0)
+			ret = metadata->fd;
+	}
 
-	return metadata->fd;
+	return ret;
 }
 
 #ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
@@ -200,7 +209,7 @@ static int prepare_for_access_response(struct fsnotify_group *group,
 
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	if (group->fanotify_data.bypass_perm) {
+	if (atomic_read(&group->fanotify_data.bypass_perm)) {
 		mutex_unlock(&group->fanotify_data.access_mutex);
 		kmem_cache_free(fanotify_response_event_cache, re);
 		event->response = FAN_ALLOW;
@@ -257,24 +266,34 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
 
 	pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 
-	fd = fill_event_metadata(group, &fanotify_event_metadata, event);
-	if (fd < 0)
-		return fd;
+	ret = fill_event_metadata(group, &fanotify_event_metadata, event);
+	if (ret < 0)
+		goto out;
 
+	fd = fanotify_event_metadata.fd;
 	ret = prepare_for_access_response(group, event, fd);
 	if (ret)
 		goto out_close_fd;
 
 	ret = -EFAULT;
-	if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
+	if (copy_to_user(buf, &fanotify_event_metadata,
+			 fanotify_event_metadata.event_len))
 		goto out_kill_access_response;
 
-	return FAN_EVENT_METADATA_LEN;
+	return fanotify_event_metadata.event_len;
 
 out_kill_access_response:
 	remove_access_response(group, event, fd);
 out_close_fd:
-	sys_close(fd);
+	if (fd != FAN_NOFD)
+		sys_close(fd);
+out:
+#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS
+	if (event->mask & FAN_ALL_PERM_EVENTS) {
+		event->response = FAN_DENY;
+		wake_up(&group->fanotify_data.access_waitq);
+	}
+#endif
 	return ret;
 }
 
@@ -382,7 +401,7 @@ static int fanotify_release(struct inode *ignored, struct file *file)
 
 	mutex_lock(&group->fanotify_data.access_mutex);
 
-	group->fanotify_data.bypass_perm = true;
+	atomic_inc(&group->fanotify_data.bypass_perm);
 
 	list_for_each_entry_safe(re, lre, &group->fanotify_data.access_list, list) {
 		pr_debug("%s: found group=%p re=%p event=%p\n", __func__, group,
@@ -586,11 +605,10 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
+	int ret = 0;
 
 	fsn_mark = fsnotify_find_vfsmount_mark(group, mnt);
 	if (!fsn_mark) {
-		int ret;
-
 		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
 			return -ENOSPC;
 
@@ -600,17 +618,16 @@ static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, NULL, mnt, 0);
-		if (ret) {
-			fanotify_free_mark(fsn_mark);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	fsnotify_put_mark(fsn_mark);
+
 	if (added & ~mnt->mnt_fsnotify_mask)
 		fsnotify_recalc_vfsmount_mask(mnt);
-
-	return 0;
+err:
+	fsnotify_put_mark(fsn_mark);
+	return ret;
 }
 
 static int fanotify_add_inode_mark(struct fsnotify_group *group,
@@ -619,6 +636,7 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 {
 	struct fsnotify_mark *fsn_mark;
 	__u32 added;
+	int ret = 0;
 
 	pr_debug("%s: group=%p inode=%p\n", __func__, group, inode);
 
@@ -634,8 +652,6 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 
 	fsn_mark = fsnotify_find_inode_mark(group, inode);
 	if (!fsn_mark) {
-		int ret;
-
 		if (atomic_read(&group->num_marks) > group->fanotify_data.max_marks)
 			return -ENOSPC;
 
@@ -645,16 +661,16 @@ static int fanotify_add_inode_mark(struct fsnotify_group *group,
 
 		fsnotify_init_mark(fsn_mark, fanotify_free_mark);
 		ret = fsnotify_add_mark(fsn_mark, group, inode, NULL, 0);
-		if (ret) {
-			fanotify_free_mark(fsn_mark);
-			return ret;
-		}
+		if (ret)
+			goto err;
 	}
 	added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
-	fsnotify_put_mark(fsn_mark);
+
 	if (added & ~inode->i_fsnotify_mask)
 		fsnotify_recalc_inode_mask(inode);
-	return 0;
+err:
+	fsnotify_put_mark(fsn_mark);
+	return ret;
 }
 
 /* fanotify syscalls */
@@ -687,8 +703,10 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 
 	/* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
 	group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
-	if (IS_ERR(group))
+	if (IS_ERR(group)) {
+		free_uid(user);
 		return PTR_ERR(group);
+	}
 
 	group->fanotify_data.user = user;
 	atomic_inc(&user->fanotify_listeners);
@@ -698,6 +716,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags)
 	mutex_init(&group->fanotify_data.access_mutex);
 	init_waitqueue_head(&group->fanotify_data.access_waitq);
 	INIT_LIST_HEAD(&group->fanotify_data.access_list);
+	atomic_set(&group->fanotify_data.bypass_perm, 0);
 #endif
 	switch (flags & FAN_ALL_CLASS_BITS) {
 	case FAN_CLASS_NOTIF:
@@ -764,8 +783,10 @@ SYSCALL_DEFINE(fanotify_mark)(int fanotify_fd, unsigned int flags,
 	if (flags & ~FAN_ALL_MARK_FLAGS)
 		return -EINVAL;
 	switch (flags & (FAN_MARK_ADD | FAN_MARK_REMOVE | FAN_MARK_FLUSH)) {
-	case FAN_MARK_ADD:
+	case FAN_MARK_ADD:		/* fallthrough */
 	case FAN_MARK_REMOVE:
+		if (!mask)
+			return -EINVAL;
 	case FAN_MARK_FLUSH:
 		break;
 	default:
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 444c305a468..4cd5d5d78f9 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -752,6 +752,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
 	if (ret >= 0)
 		return ret;
 
+	fsnotify_put_group(group);
 	atomic_dec(&user->inotify_devs);
 out_free_uid:
 	free_uid(user);
diff --git a/fs/xfs/xfs_rename.c b/fs/xfs/xfs_rename.c
index d2af0a8381a..77a59891734 100644
--- a/fs/xfs/xfs_rename.c
+++ b/fs/xfs/xfs_rename.c
@@ -297,6 +297,7 @@ xfs_rename(
 	 * it and some incremental backup programs won't work without it.
 	 */
 	xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
+	xfs_trans_log_inode(tp, src_ip, XFS_ILOG_CORE);
 
 	/*
 	 * Adjust the link count on src_dp.  This is necessary when
author	Ingo Molnar <mingo@elte.hu>	2010-12-22 11:53:20 +0100
committer	Ingo Molnar <mingo@elte.hu>	2010-12-22 11:53:23 +0100
commit	6c529a266bdc590a870ee2d2092ff6527eff427b (patch)
tree	7be65fa2578820a1258b5a1e8e063a509a5d6176 /fs
parent	7639dae0ca11038286bbbcda05f2bef601c1eb8d (diff)
parent	90a8a73c06cc32b609a880d48449d7083327e11a (diff)