aboutsummaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorDavid Woodhouse <David.Woodhouse@intel.com>2010-02-26 19:04:15 +0000
committerDavid Woodhouse <David.Woodhouse@intel.com>2010-02-26 19:06:24 +0000
commita7790532f5b7358c33a6b1834dc2b318de209f31 (patch)
tree0ceb9e24b3f54cb5c8453fb5a218e2a94a0f1cce /fs
parent2764fb4244cc1bc08df3667924ca4a972e90ac70 (diff)
parent60b341b778cc2929df16c0a504c91621b3c6a4ad (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
The SmartMedia FTL code depends on new kfifo bits from 2.6.33
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/v9fs.c33
-rw-r--r--fs/9p/v9fs_vfs.h1
-rw-r--r--fs/9p/vfs_file.c19
-rw-r--r--fs/9p/vfs_inode.c43
-rw-r--r--fs/9p/vfs_super.c3
-rw-r--r--fs/Kconfig4
-rw-r--r--fs/affs/affs.h2
-rw-r--r--fs/affs/namei.c7
-rw-r--r--fs/affs/super.c31
-rw-r--r--fs/affs/symlink.c7
-rw-r--r--fs/anon_inodes.c35
-rw-r--r--fs/befs/linuxvfs.c1
-rw-r--r--fs/bfs/inode.c43
-rw-r--r--fs/binfmt_aout.c14
-rw-r--r--fs/binfmt_elf.c51
-rw-r--r--fs/binfmt_elf_fdpic.c49
-rw-r--r--fs/binfmt_flat.c7
-rw-r--r--fs/binfmt_som.c3
-rw-r--r--fs/bio-integrity.c3
-rw-r--r--fs/bio.c9
-rw-r--r--fs/block_dev.c7
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/acl.c81
-rw-r--r--fs/btrfs/btrfs_inode.h5
-rw-r--r--fs/btrfs/ctree.c229
-rw-r--r--fs/btrfs/ctree.h41
-rw-r--r--fs/btrfs/dir-item.c19
-rw-r--r--fs/btrfs/disk-io.c40
-rw-r--r--fs/btrfs/extent-tree.c112
-rw-r--r--fs/btrfs/extent_io.c3
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/file.c729
-rw-r--r--fs/btrfs/inode.c627
-rw-r--r--fs/btrfs/ioctl.c34
-rw-r--r--fs/btrfs/ordered-data.c117
-rw-r--r--fs/btrfs/ordered-data.h5
-rw-r--r--fs/btrfs/relocation.c43
-rw-r--r--fs/btrfs/super.c24
-rw-r--r--fs/btrfs/transaction.c44
-rw-r--r--fs/btrfs/transaction.h6
-rw-r--r--fs/btrfs/tree-log.c86
-rw-r--r--fs/btrfs/volumes.c19
-rw-r--r--fs/btrfs/xattr.c80
-rw-r--r--fs/btrfs/xattr.h9
-rw-r--r--fs/cachefiles/bind.c11
-rw-r--r--fs/cachefiles/namei.c12
-rw-r--r--fs/cachefiles/rdwr.c2
-rw-r--r--fs/cifs/CHANGES8
-rw-r--r--fs/cifs/cifs_dfs_ref.c3
-rw-r--r--fs/cifs/cifsfs.c3
-rw-r--r--fs/cifs/cifsfs.h2
-rw-r--r--fs/cifs/cifsglob.h1
-rw-r--r--fs/cifs/connect.c43
-rw-r--r--fs/cifs/export.c2
-rw-r--r--fs/cifs/inode.c12
-rw-r--r--fs/cifs/readdir.c8
-rw-r--r--fs/cifs/sess.c11
-rw-r--r--fs/compat.c2
-rw-r--r--fs/compat_ioctl.c14
-rw-r--r--fs/configfs/symlink.c4
-rw-r--r--fs/dcache.c1
-rw-r--r--fs/debugfs/inode.c11
-rw-r--r--fs/direct-io.c2
-rw-r--r--fs/ecryptfs/crypto.c4
-rw-r--r--fs/ecryptfs/dentry.c2
-rw-r--r--fs/ecryptfs/file.c17
-rw-r--r--fs/ecryptfs/inode.c164
-rw-r--r--fs/ecryptfs/main.c13
-rw-r--r--fs/eventfd.c91
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c96
-rw-r--r--fs/exofs/inode.c17
-rw-r--r--fs/exofs/pnfs.h10
-rw-r--r--fs/exportfs/expfs.c2
-rw-r--r--fs/ext2/acl.c79
-rw-r--r--fs/ext2/xattr.c11
-rw-r--r--fs/ext2/xattr_security.c16
-rw-r--r--fs/ext2/xattr_trusted.c16
-rw-r--r--fs/ext2/xattr_user.c25
-rw-r--r--fs/ext3/acl.c74
-rw-r--r--fs/ext3/inode.c8
-rw-r--r--fs/ext3/namei.c28
-rw-r--r--fs/ext3/resize.c35
-rw-r--r--fs/ext3/super.c19
-rw-r--r--fs/ext3/xattr.c31
-rw-r--r--fs/ext3/xattr_security.c20
-rw-r--r--fs/ext3/xattr_trusted.c18
-rw-r--r--fs/ext3/xattr_user.c25
-rw-r--r--fs/ext4/Kconfig2
-rw-r--r--fs/ext4/acl.c74
-rw-r--r--fs/ext4/block_validity.c1
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/ext4_extents.h3
-rw-r--r--fs/ext4/extents.c98
-rw-r--r--fs/ext4/fsync.c16
-rw-r--r--fs/ext4/inode.c289
-rw-r--r--fs/ext4/mballoc.c6
-rw-r--r--fs/ext4/mballoc.h1
-rw-r--r--fs/ext4/super.c12
-rw-r--r--fs/ext4/xattr.c33
-rw-r--r--fs/ext4/xattr_security.c20
-rw-r--r--fs/ext4/xattr_trusted.c20
-rw-r--r--fs/ext4/xattr_user.c25
-rw-r--r--fs/fat/fat.h3
-rw-r--r--fs/fat/fatent.c25
-rw-r--r--fs/fat/inode.c8
-rw-r--r--fs/fcntl.c102
-rw-r--r--fs/file_table.c51
-rw-r--r--fs/fs-writeback.c18
-rw-r--r--fs/fuse/file.c3
-rw-r--r--fs/generic_acl.c158
-rw-r--r--fs/gfs2/Kconfig1
-rw-r--r--fs/gfs2/acl.c16
-rw-r--r--fs/gfs2/bmap.c2
-rw-r--r--fs/gfs2/file.c38
-rw-r--r--fs/gfs2/glock.c4
-rw-r--r--fs/gfs2/glock.h2
-rw-r--r--fs/gfs2/incore.h2
-rw-r--r--fs/gfs2/inode.c5
-rw-r--r--fs/gfs2/lock_dlm.c11
-rw-r--r--fs/gfs2/meta_io.c2
-rw-r--r--fs/gfs2/ops_fstype.c14
-rw-r--r--fs/gfs2/ops_inode.c9
-rw-r--r--fs/gfs2/rgrp.c8
-rw-r--r--fs/gfs2/super.c1
-rw-r--r--fs/gfs2/xattr.c90
-rw-r--r--fs/gfs2/xattr.h7
-rw-r--r--fs/hppfs/hppfs.c18
-rw-r--r--fs/hugetlbfs/inode.c17
-rw-r--r--fs/inode.c26
-rw-r--r--fs/internal.h8
-rw-r--r--fs/isofs/export.c2
-rw-r--r--fs/jbd/Kconfig1
-rw-r--r--fs/jbd/journal.c2
-rw-r--r--fs/jbd2/Kconfig1
-rw-r--r--fs/jbd2/checkpoint.c15
-rw-r--r--fs/jbd2/commit.c19
-rw-r--r--fs/jbd2/journal.c5
-rw-r--r--fs/jffs2/acl.c65
-rw-r--r--fs/jffs2/security.c18
-rw-r--r--fs/jffs2/xattr.c6
-rw-r--r--fs/jffs2/xattr_trusted.c18
-rw-r--r--fs/jffs2/xattr_user.c18
-rw-r--r--fs/jfs/jfs_txnmgr.c2
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/libfs.c1
-rw-r--r--fs/lockd/svc4proc.c4
-rw-r--r--fs/lockd/svcproc.c4
-rw-r--r--fs/namei.c486
-rw-r--r--fs/namespace.c14
-rw-r--r--fs/nfs/Kconfig2
-rw-r--r--fs/nfs/dir.c1
-rw-r--r--fs/nfs/direct.c3
-rw-r--r--fs/nfs/file.c2
-rw-r--r--fs/nfs/fscache.c9
-rw-r--r--fs/nfs/inode.c4
-rw-r--r--fs/nfs/mount_clnt.c2
-rw-r--r--fs/nfs/nfs2xdr.c2
-rw-r--r--fs/nfs/nfs4_fs.h7
-rw-r--r--fs/nfs/nfs4proc.c281
-rw-r--r--fs/nfs/nfs4state.c62
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nfs/pagelist.c17
-rw-r--r--fs/nfs/super.c15
-rw-r--r--fs/nfs/sysctl.c2
-rw-r--r--fs/nfs/write.c6
-rw-r--r--fs/nfsctl.c2
-rw-r--r--fs/nfsd/auth.c12
-rw-r--r--fs/nfsd/cache.h83
-rw-r--r--fs/nfsd/export.c57
-rw-r--r--fs/nfsd/lockd.c10
-rw-r--r--fs/nfsd/nfs2acl.c27
-rw-r--r--fs/nfsd/nfs3acl.c15
-rw-r--r--fs/nfsd/nfs3proc.c20
-rw-r--r--fs/nfsd/nfs3xdr.c15
-rw-r--r--fs/nfsd/nfs4acl.c12
-rw-r--r--fs/nfsd/nfs4callback.c19
-rw-r--r--fs/nfsd/nfs4idmap.c17
-rw-r--r--fs/nfsd/nfs4proc.c19
-rw-r--r--fs/nfsd/nfs4recover.c16
-rw-r--r--fs/nfsd/nfs4state.c84
-rw-r--r--fs/nfsd/nfs4xdr.c26
-rw-r--r--fs/nfsd/nfscache.c14
-rw-r--r--fs/nfsd/nfsctl.c51
-rw-r--r--fs/nfsd/nfsd.h338
-rw-r--r--fs/nfsd/nfsfh.c102
-rw-r--r--fs/nfsd/nfsfh.h208
-rw-r--r--fs/nfsd/nfsproc.c22
-rw-r--r--fs/nfsd/nfssvc.c22
-rw-r--r--fs/nfsd/nfsxdr.c12
-rw-r--r--fs/nfsd/state.h408
-rw-r--r--fs/nfsd/stats.c11
-rw-r--r--fs/nfsd/vfs.c143
-rw-r--r--fs/nfsd/vfs.h101
-rw-r--r--fs/nfsd/xdr.h173
-rw-r--r--fs/nfsd/xdr3.h344
-rw-r--r--fs/nfsd/xdr4.h562
-rw-r--r--fs/nilfs2/Kconfig1
-rw-r--r--fs/nilfs2/bmap.c4
-rw-r--r--fs/nilfs2/cpfile.c31
-rw-r--r--fs/nilfs2/direct.c17
-rw-r--r--fs/nilfs2/ioctl.c2
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/nilfs2/super.c3
-rw-r--r--fs/notify/inotify/inotify_fsnotify.c2
-rw-r--r--fs/notify/inotify/inotify_user.c33
-rw-r--r--fs/ntfs/inode.c6
-rw-r--r--fs/ocfs2/Kconfig10
-rw-r--r--fs/ocfs2/Makefile7
-rw-r--r--fs/ocfs2/acl.c91
-rw-r--r--fs/ocfs2/acl.h22
-rw-r--r--fs/ocfs2/alloc.c14
-rw-r--r--fs/ocfs2/alloc.h5
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/buffer_head_io.c2
-rw-r--r--fs/ocfs2/cluster/heartbeat.c12
-rw-r--r--fs/ocfs2/cluster/nodemanager.c51
-rw-r--r--fs/ocfs2/cluster/nodemanager.h7
-rw-r--r--fs/ocfs2/cluster/quorum.c16
-rw-r--r--fs/ocfs2/cluster/tcp.c10
-rw-r--r--fs/ocfs2/cluster/tcp_internal.h4
-rw-r--r--fs/ocfs2/dlm/dlmapi.h2
-rw-r--r--fs/ocfs2/dlm/dlmast.c2
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c2
-rw-r--r--fs/ocfs2/dlm/dlmdebug.c2
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c2
-rw-r--r--fs/ocfs2/dlm/dlmlock.c2
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c38
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c163
-rw-r--r--fs/ocfs2/dlm/dlmunlock.c8
-rw-r--r--fs/ocfs2/dlmglue.c85
-rw-r--r--fs/ocfs2/export.c2
-rw-r--r--fs/ocfs2/extent_map.c27
-rw-r--r--fs/ocfs2/file.c35
-rw-r--r--fs/ocfs2/inode.c4
-rw-r--r--fs/ocfs2/ioctl.c14
-rw-r--r--fs/ocfs2/journal.c2
-rw-r--r--fs/ocfs2/namei.c6
-rw-r--r--fs/ocfs2/ocfs2.h12
-rw-r--r--fs/ocfs2/ocfs2_fs.h13
-rw-r--r--fs/ocfs2/refcounttree.c162
-rw-r--r--fs/ocfs2/stack_o2cb.c12
-rw-r--r--fs/ocfs2/stack_user.c2
-rw-r--r--fs/ocfs2/super.c97
-rw-r--r--fs/ocfs2/symlink.c12
-rw-r--r--fs/ocfs2/uptodate.c4
-rw-r--r--fs/ocfs2/xattr.c78
-rw-r--r--fs/ocfs2/xattr.h2
-rw-r--r--fs/open.c17
-rw-r--r--fs/pipe.c45
-rw-r--r--fs/proc/array.c108
-rw-r--r--fs/proc/base.c25
-rw-r--r--fs/proc/page.c45
-rw-r--r--fs/proc/task_mmu.c3
-rw-r--r--fs/quota/dquot.c291
-rw-r--r--fs/quota/quota_v2.c9
-rw-r--r--fs/ramfs/file-nommu.c28
-rw-r--r--fs/reiserfs/Kconfig1
-rw-r--r--fs/reiserfs/bitmap.c3
-rw-r--r--fs/reiserfs/inode.c44
-rw-r--r--fs/reiserfs/ioctl.c3
-rw-r--r--fs/reiserfs/journal.c20
-rw-r--r--fs/reiserfs/lock.c9
-rw-r--r--fs/reiserfs/namei.c7
-rw-r--r--fs/reiserfs/xattr.c74
-rw-r--r--fs/reiserfs/xattr_acl.c71
-rw-r--r--fs/reiserfs/xattr_security.c21
-rw-r--r--fs/reiserfs/xattr_trusted.c21
-rw-r--r--fs/reiserfs/xattr_user.c21
-rw-r--r--fs/romfs/super.c1
-rw-r--r--fs/signalfd.c2
-rw-r--r--fs/stack.c71
-rw-r--r--fs/stat.c10
-rw-r--r--fs/super.c3
-rw-r--r--fs/sync.c59
-rw-r--r--fs/sysfs/bin.c6
-rw-r--r--fs/sysfs/dir.c14
-rw-r--r--fs/sysfs/inode.c35
-rw-r--r--fs/sysfs/sysfs.h15
-rw-r--r--fs/timerfd.c2
-rw-r--r--fs/ubifs/file.c2
-rw-r--r--fs/ubifs/gc.c96
-rw-r--r--fs/xattr.c28
-rw-r--r--fs/xfs/linux-2.6/xfs_acl.c60
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.c10
-rw-r--r--fs/xfs/linux-2.6/xfs_buf.h1
-rw-r--r--fs/xfs/linux-2.6/xfs_iops.c2
-rw-r--r--fs/xfs/linux-2.6/xfs_super.c14
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.c183
-rw-r--r--fs/xfs/linux-2.6/xfs_sync.h2
-rw-r--r--fs/xfs/linux-2.6/xfs_trace.h1145
-rw-r--r--fs/xfs/linux-2.6/xfs_xattr.c71
-rw-r--r--fs/xfs/quota/xfs_qm_syscalls.c2
-rw-r--r--fs/xfs/xfs_acl.h3
-rw-r--r--fs/xfs/xfs_alloc.c44
-rw-r--r--fs/xfs/xfs_bmap_btree.h14
-rw-r--r--fs/xfs/xfs_dfrag.c106
-rw-r--r--fs/xfs/xfs_iget.c17
-rw-r--r--fs/xfs/xfs_inode.c17
-rw-r--r--fs/xfs/xfs_inode_item.h6
-rw-r--r--fs/xfs/xfs_log.c2
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_vnodeops.c93
303 files changed, 8156 insertions, 5024 deletions
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index cf62b05e296..7d6c2139891 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -84,7 +84,7 @@ static const match_table_t tokens = {
static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
{
- char *options;
+ char *options, *tmp_options;
substring_t args[MAX_OPT_ARGS];
char *p;
int option = 0;
@@ -102,9 +102,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
if (!opts)
return 0;
- options = kstrdup(opts, GFP_KERNEL);
- if (!options)
+ tmp_options = kstrdup(opts, GFP_KERNEL);
+ if (!tmp_options) {
+ ret = -ENOMEM;
goto fail_option_alloc;
+ }
+ options = tmp_options;
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -159,8 +162,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
break;
case Opt_cache:
s = match_strdup(&args[0]);
- if (!s)
- goto fail_option_alloc;
+ if (!s) {
+ ret = -ENOMEM;
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "problem allocating copy of cache arg\n");
+ goto free_and_return;
+ }
if (strcmp(s, "loose") == 0)
v9ses->cache = CACHE_LOOSE;
@@ -173,8 +180,12 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
case Opt_access:
s = match_strdup(&args[0]);
- if (!s)
- goto fail_option_alloc;
+ if (!s) {
+ ret = -ENOMEM;
+ P9_DPRINTK(P9_DEBUG_ERROR,
+ "problem allocating copy of access arg\n");
+ goto free_and_return;
+ }
v9ses->flags &= ~V9FS_ACCESS_MASK;
if (strcmp(s, "user") == 0)
@@ -194,13 +205,11 @@ static int v9fs_parse_options(struct v9fs_session_info *v9ses, char *opts)
continue;
}
}
- kfree(options);
- return ret;
+free_and_return:
+ kfree(tmp_options);
fail_option_alloc:
- P9_DPRINTK(P9_DEBUG_ERROR,
- "failed to allocate copy of option argument\n");
- return -ENOMEM;
+ return ret;
}
/**
diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h
index 3a7560e3586..ed835836e0d 100644
--- a/fs/9p/v9fs_vfs.h
+++ b/fs/9p/v9fs_vfs.h
@@ -60,3 +60,4 @@ void v9fs_dentry_release(struct dentry *);
int v9fs_uflags2omode(int uflags, int extended);
ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64);
+void v9fs_blank_wstat(struct p9_wstat *wstat);
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 3902bf43a08..74a0461a9ac 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -257,6 +257,23 @@ v9fs_file_write(struct file *filp, const char __user * data,
return total;
}
+static int v9fs_file_fsync(struct file *filp, struct dentry *dentry,
+ int datasync)
+{
+ struct p9_fid *fid;
+ struct p9_wstat wstat;
+ int retval;
+
+ P9_DPRINTK(P9_DEBUG_VFS, "filp %p dentry %p datasync %x\n", filp,
+ dentry, datasync);
+
+ fid = filp->private_data;
+ v9fs_blank_wstat(&wstat);
+
+ retval = p9_client_wstat(fid, &wstat);
+ return retval;
+}
+
static const struct file_operations v9fs_cached_file_operations = {
.llseek = generic_file_llseek,
.read = do_sync_read,
@@ -266,6 +283,7 @@ static const struct file_operations v9fs_cached_file_operations = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
.mmap = generic_file_readonly_mmap,
+ .fsync = v9fs_file_fsync,
};
const struct file_operations v9fs_file_operations = {
@@ -276,4 +294,5 @@ const struct file_operations v9fs_file_operations = {
.release = v9fs_dir_release,
.lock = v9fs_file_lock,
.mmap = generic_file_readonly_mmap,
+ .fsync = v9fs_file_fsync,
};
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 18f74ec4dce..a407fa3388c 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -176,7 +176,7 @@ int v9fs_uflags2omode(int uflags, int extended)
*
*/
-static void
+void
v9fs_blank_wstat(struct p9_wstat *wstat)
{
wstat->type = ~0;
@@ -1001,44 +1001,6 @@ done:
}
/**
- * v9fs_vfs_readlink - read a symlink's location
- * @dentry: dentry for symlink
- * @buffer: buffer to load symlink location into
- * @buflen: length of buffer
- *
- */
-
-static int v9fs_vfs_readlink(struct dentry *dentry, char __user * buffer,
- int buflen)
-{
- int retval;
- int ret;
- char *link = __getname();
-
- if (unlikely(!link))
- return -ENOMEM;
-
- if (buflen > PATH_MAX)
- buflen = PATH_MAX;
-
- P9_DPRINTK(P9_DEBUG_VFS, " dentry: %s (%p)\n", dentry->d_name.name,
- dentry);
-
- retval = v9fs_readlink(dentry, link, buflen);
-
- if (retval > 0) {
- if ((ret = copy_to_user(buffer, link, retval)) != 0) {
- P9_DPRINTK(P9_DEBUG_ERROR,
- "problem copying to user: %d\n", ret);
- retval = ret;
- }
- }
-
- __putname(link);
- return retval;
-}
-
-/**
* v9fs_vfs_follow_link - follow a symlink path
* @dentry: dentry for symlink
* @nd: nameidata
@@ -1230,7 +1192,6 @@ static const struct inode_operations v9fs_dir_inode_operations_ext = {
.rmdir = v9fs_vfs_rmdir,
.mknod = v9fs_vfs_mknod,
.rename = v9fs_vfs_rename,
- .readlink = v9fs_vfs_readlink,
.getattr = v9fs_vfs_getattr,
.setattr = v9fs_vfs_setattr,
};
@@ -1253,7 +1214,7 @@ static const struct inode_operations v9fs_file_inode_operations = {
};
static const struct inode_operations v9fs_symlink_inode_operations = {
- .readlink = v9fs_vfs_readlink,
+ .readlink = generic_readlink,
.follow_link = v9fs_vfs_follow_link,
.put_link = v9fs_vfs_put_link,
.getattr = v9fs_vfs_getattr,
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 14a86448572..69357c0d989 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -188,7 +188,8 @@ static void v9fs_kill_super(struct super_block *s)
P9_DPRINTK(P9_DEBUG_VFS, " %p\n", s);
- v9fs_dentry_release(s->s_root); /* clunk root */
+ if (s->s_root)
+ v9fs_dentry_release(s->s_root); /* clunk root */
kill_anon_super(s);
diff --git a/fs/Kconfig b/fs/Kconfig
index f8fccaaad62..64d44efad7a 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -6,10 +6,6 @@ menu "File systems"
if BLOCK
-config FS_JOURNAL_INFO
- bool
- default n
-
source "fs/ext2/Kconfig"
source "fs/ext3/Kconfig"
source "fs/ext4/Kconfig"
diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index e511dc621a2..0e40caaba45 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -106,8 +106,8 @@ struct affs_sb_info {
u32 s_last_bmap;
struct buffer_head *s_bmap_bh;
char *s_prefix; /* Prefix for volumes and assigns. */
- int s_prefix_len; /* Length of prefix. */
char s_volume[32]; /* Volume prefix for absolute symlinks. */
+ spinlock_t symlink_lock; /* protects the previous two */
};
#define SF_INTL 0x0001 /* International filesystem. */
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 960d336ec69..d70bbbac6b7 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -341,10 +341,13 @@ affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
p = (char *)AFFS_HEAD(bh)->table;
lc = '/';
if (*symname == '/') {
+ struct affs_sb_info *sbi = AFFS_SB(sb);
while (*symname == '/')
symname++;
- while (AFFS_SB(sb)->s_volume[i]) /* Cannot overflow */
- *p++ = AFFS_SB(sb)->s_volume[i++];
+ spin_lock(&sbi->symlink_lock);
+ while (sbi->s_volume[i]) /* Cannot overflow */
+ *p++ = sbi->s_volume[i++];
+ spin_unlock(&sbi->symlink_lock);
}
while (i < maxlen && (c = *symname++)) {
if (c == '.' && lc == '/' && *symname == '.' && symname[1] == '/') {
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 104fdcb3a7f..d41e9673cd9 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -203,7 +203,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
switch (token) {
case Opt_bs:
if (match_int(&args[0], &n))
- return -EINVAL;
+ return 0;
if (n != 512 && n != 1024 && n != 2048
&& n != 4096) {
printk ("AFFS: Invalid blocksize (512, 1024, 2048, 4096 allowed)\n");
@@ -213,7 +213,7 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
break;
case Opt_mode:
if (match_octal(&args[0], &option))
- return 1;
+ return 0;
*mode = option & 0777;
*mount_opts |= SF_SETMODE;
break;
@@ -221,8 +221,6 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
*mount_opts |= SF_MUFS;
break;
case Opt_prefix:
- /* Free any previous prefix */
- kfree(*prefix);
*prefix = match_strdup(&args[0]);
if (!*prefix)
return 0;
@@ -233,21 +231,21 @@ parse_options(char *options, uid_t *uid, gid_t *gid, int *mode, int *reserved, s
break;
case Opt_reserved:
if (match_int(&args[0], reserved))
- return 1;
+ return 0;
break;
case Opt_root:
if (match_int(&args[0], root))
- return 1;
+ return 0;
break;
case Opt_setgid:
if (match_int(&args[0], &option))
- return 1;
+ return 0;
*gid = option;
*mount_opts |= SF_SETGID;
break;
case Opt_setuid:
if (match_int(&args[0], &option))
- return -EINVAL;
+ return 0;
*uid = option;
*mount_opts |= SF_SETUID;
break;
@@ -311,11 +309,14 @@ static int affs_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
sb->s_fs_info = sbi;
mutex_init(&sbi->s_bmlock);
+ spin_lock_init(&sbi->symlink_lock);
if (!parse_options(data,&uid,&gid,&i,&reserved,&root_block,
&blocksize,&sbi->s_prefix,
sbi->s_volume, &mount_flags)) {
printk(KERN_ERR "AFFS: Error parsing options\n");
+ kfree(sbi->s_prefix);
+ kfree(sbi);
return -EINVAL;
}
/* N.B. after this point s_prefix must be released */
@@ -516,14 +517,18 @@ affs_remount(struct super_block *sb, int *flags, char *data)
unsigned long mount_flags;
int res = 0;
char *new_opts = kstrdup(data, GFP_KERNEL);
+ char volume[32];
+ char *prefix = NULL;
pr_debug("AFFS: remount(flags=0x%x,opts=\"%s\")\n",*flags,data);
*flags |= MS_NODIRATIME;
+ memcpy(volume, sbi->s_volume, 32);
if (!parse_options(data, &uid, &gid, &mode, &reserved, &root_block,
- &blocksize, &sbi->s_prefix, sbi->s_volume,
+ &blocksize, &prefix, volume,
&mount_flags)) {
+ kfree(prefix);
kfree(new_opts);
return -EINVAL;
}
@@ -534,6 +539,14 @@ affs_remount(struct super_block *sb, int *flags, char *data)
sbi->s_mode = mode;
sbi->s_uid = uid;
sbi->s_gid = gid;
+ /* protect against readers */
+ spin_lock(&sbi->symlink_lock);
+ if (prefix) {
+ kfree(sbi->s_prefix);
+ sbi->s_prefix = prefix;
+ }
+ memcpy(sbi->s_volume, volume, 32);
+ spin_unlock(&sbi->symlink_lock);
if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
unlock_kernel();
diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c
index 41782539c90..ee00f08c4f5 100644
--- a/fs/affs/symlink.c
+++ b/fs/affs/symlink.c
@@ -20,7 +20,6 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
int i, j;
char c;
char lc;
- char *pf;
pr_debug("AFFS: follow_link(ino=%lu)\n",inode->i_ino);
@@ -32,11 +31,15 @@ static int affs_symlink_readpage(struct file *file, struct page *page)
j = 0;
lf = (struct slink_front *)bh->b_data;
lc = 0;
- pf = AFFS_SB(inode->i_sb)->s_prefix ? AFFS_SB(inode->i_sb)->s_prefix : "/";
if (strchr(lf->symname,':')) { /* Handle assign or volume name */
+ struct affs_sb_info *sbi = AFFS_SB(inode->i_sb);
+ char *pf;
+ spin_lock(&sbi->symlink_lock);
+ pf = sbi->s_prefix ? sbi->s_prefix : "/";
while (i < 1023 && (c = pf[i]))
link[i++] = c;
+ spin_unlock(&sbi->symlink_lock);
while (i < 1023 && lf->symname[j] != ':')
link[i++] = lf->symname[j++];
if (i < 1023)
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 2ca7a7cafdb..9f0bf13291e 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -35,14 +35,13 @@ static int anon_inodefs_get_sb(struct file_system_type *fs_type, int flags,
mnt);
}
-static int anon_inodefs_delete_dentry(struct dentry *dentry)
+/*
+ * anon_inodefs_dname() is called from d_path().
+ */
+static char *anon_inodefs_dname(struct dentry *dentry, char *buffer, int buflen)
{
- /*
- * We faked vfs to believe the dentry was hashed when we created it.
- * Now we restore the flag so that dput() will work correctly.
- */
- dentry->d_flags |= DCACHE_UNHASHED;
- return 1;
+ return dynamic_dname(dentry, buffer, buflen, "anon_inode:%s",
+ dentry->d_name.name);
}
static struct file_system_type anon_inode_fs_type = {
@@ -51,7 +50,7 @@ static struct file_system_type anon_inode_fs_type = {
.kill_sb = kill_anon_super,
};
static const struct dentry_operations anon_inodefs_dentry_operations = {
- .d_delete = anon_inodefs_delete_dentry,
+ .d_dname = anon_inodefs_dname,
};
/*
@@ -88,7 +87,7 @@ struct file *anon_inode_getfile(const char *name,
void *priv, int flags)
{
struct qstr this;
- struct dentry *dentry;
+ struct path path;
struct file *file;
int error;
@@ -106,10 +105,11 @@ struct file *anon_inode_getfile(const char *name,
this.name = name;
this.len = strlen(name);
this.hash = 0;
- dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
- if (!dentry)
+ path.dentry = d_alloc(anon_inode_mnt->mnt_sb->s_root, &this);
+ if (!path.dentry)
goto err_module;
+ path.mnt = mntget(anon_inode_mnt);
/*
* We know the anon_inode inode count is always greater than zero,
* so we can avoid doing an igrab() and we can use an open-coded
@@ -117,27 +117,24 @@ struct file *anon_inode_getfile(const char *name,
*/
atomic_inc(&anon_inode_inode->i_count);
- dentry->d_op = &anon_inodefs_dentry_operations;
- /* Do not publish this dentry inside the global dentry hash table */
- dentry->d_flags &= ~DCACHE_UNHASHED;
- d_instantiate(dentry, anon_inode_inode);
+ path.dentry->d_op = &anon_inodefs_dentry_operations;
+ d_instantiate(path.dentry, anon_inode_inode);
error = -ENFILE;
- file = alloc_file(anon_inode_mnt, dentry,
- FMODE_READ | FMODE_WRITE, fops);
+ file = alloc_file(&path, OPEN_FMODE(flags), fops);
if (!file)
goto err_dput;
file->f_mapping = anon_inode_inode->i_mapping;
file->f_pos = 0;
- file->f_flags = O_RDWR | (flags & O_NONBLOCK);
+ file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
file->f_version = 0;
file->private_data = priv;
return file;
err_dput:
- dput(dentry);
+ path_put(&path);
err_module:
module_put(fops->owner);
return ERR_PTR(error);
diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c
index 33baf27fac7..34ddda888e6 100644
--- a/fs/befs/linuxvfs.c
+++ b/fs/befs/linuxvfs.c
@@ -873,6 +873,7 @@ befs_fill_super(struct super_block *sb, void *data, int silent)
brelse(bh);
unacquire_priv_sbp:
+ kfree(befs_sb->mount_opts.iocharset);
kfree(sb->s_fs_info);
unacquire_none:
diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index 6f60336c662..8f3d9fd8960 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -353,35 +353,35 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
struct inode *inode;
unsigned i, imap_len;
struct bfs_sb_info *info;
- long ret = -EINVAL;
+ int ret = -EINVAL;
unsigned long i_sblock, i_eblock, i_eoff, s_size;
info = kzalloc(sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
+ mutex_init(&info->bfs_lock);
s->s_fs_info = info;
sb_set_blocksize(s, BFS_BSIZE);
- bh = sb_bread(s, 0);
- if(!bh)
+ info->si_sbh = sb_bread(s, 0);
+ if (!info->si_sbh)
goto out;
- bfs_sb = (struct bfs_super_block *)bh->b_data;
+ bfs_sb = (struct bfs_super_block *)info->si_sbh->b_data;
if (le32_to_cpu(bfs_sb->s_magic) != BFS_MAGIC) {
if (!silent)
printf("No BFS filesystem on %s (magic=%08x)\n",
s->s_id, le32_to_cpu(bfs_sb->s_magic));
- goto out;
+ goto out1;
}
if (BFS_UNCLEAN(bfs_sb, s) && !silent)
printf("%s is unclean, continuing\n", s->s_id);
s->s_magic = BFS_MAGIC;
- info->si_sbh = bh;
if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
printf("Superblock is corrupted\n");
- goto out;
+ goto out1;
}
info->si_lasti = (le32_to_cpu(bfs_sb->s_start) - BFS_BSIZE) /
@@ -390,7 +390,7 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
imap_len = (info->si_lasti / 8) + 1;
info->si_imap = kzalloc(imap_len, GFP_KERNEL);
if (!info->si_imap)
- goto out;
+ goto out1;
for (i = 0; i < BFS_ROOT_INO; i++)
set_bit(i, info->si_imap);
@@ -398,15 +398,13 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
inode = bfs_iget(s, BFS_ROOT_INO);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
- kfree(info->si_imap);
- goto out;
+ goto out2;
}
s->s_root = d_alloc_root(inode);
if (!s->s_root) {
iput(inode);
ret = -ENOMEM;
- kfree(info->si_imap);
- goto out;
+ goto out2;
}
info->si_blocks = (le32_to_cpu(bfs_sb->s_end) + 1) >> BFS_BSIZE_BITS;
@@ -419,10 +417,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
bh = sb_bread(s, info->si_blocks - 1);
if (!bh) {
printf("Last block not available: %lu\n", info->si_blocks - 1);
- iput(inode);
ret = -EIO;
- kfree(info->si_imap);
- goto out;
+ goto out3;
}
brelse(bh);
@@ -459,11 +455,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
printf("Inode 0x%08x corrupted\n", i);
brelse(bh);
- s->s_root = NULL;
- kfree(info->si_imap);
- kfree(info);
- s->s_fs_info = NULL;
- return -EIO;
+ ret = -EIO;
+ goto out3;
}
if (!di->i_ino) {
@@ -483,11 +476,17 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
s->s_dirt = 1;
}
dump_imap("read_super", s);
- mutex_init(&info->bfs_lock);
return 0;
+out3:
+ dput(s->s_root);
+ s->s_root = NULL;
+out2:
+ kfree(info->si_imap);
+out1:
+ brelse(info->si_sbh);
out:
- brelse(bh);
+ mutex_destroy(&info->bfs_lock);
kfree(info);
s->s_fs_info = NULL;
return ret;
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index b639dcf7c77..fdd39709917 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -32,7 +32,7 @@
static int load_aout_binary(struct linux_binprm *, struct pt_regs * regs);
static int load_aout_library(struct file*);
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int aout_core_dump(struct coredump_params *cprm);
static struct linux_binfmt aout_format = {
.module = THIS_MODULE,
@@ -89,8 +89,9 @@ if (file->f_op->llseek) { \
* dumping of the process results in another error..
*/
-static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int aout_core_dump(struct coredump_params *cprm)
{
+ struct file *file = cprm->file;
mm_segment_t fs;
int has_dumped = 0;
unsigned long dump_start, dump_size;
@@ -108,16 +109,16 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file, u
current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
dump.u_ar0 = offsetof(struct user, regs);
- dump.signal = signr;
- aout_dump_thread(regs, &dump);
+ dump.signal = cprm->signr;
+ aout_dump_thread(cprm->regs, &dump);
/* If the size of the dump file exceeds the rlimit, then see what would happen
if we wrote the stack, but not the data area. */
- if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > limit)
+ if ((dump.u_dsize + dump.u_ssize+1) * PAGE_SIZE > cprm->limit)
dump.u_dsize = 0;
/* Make sure we have enough room to write the stack and data areas. */
- if ((dump.u_ssize + 1) * PAGE_SIZE > limit)
+ if ((dump.u_ssize + 1) * PAGE_SIZE > cprm->limit)
dump.u_ssize = 0;
/* make sure we actually have a data and stack area to dump */
@@ -263,6 +264,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs)
#else
set_personality(PER_LINUX);
#endif
+ setup_new_exec(bprm);
current->mm->end_code = ex.a_text +
(current->mm->start_code = N_TXTADDR(ex));
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 97b6e9efeb7..fd5b2ea5d29 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -45,7 +45,7 @@ static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
* don't even try.
*/
#ifdef CONFIG_ELF_CORE
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int elf_core_dump(struct coredump_params *cprm);
#else
#define elf_core_dump NULL
#endif
@@ -662,27 +662,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
goto out_free_interp;
- /*
- * The early SET_PERSONALITY here is so that the lookup
- * for the interpreter happens in the namespace of the
- * to-be-execed image. SET_PERSONALITY can select an
- * alternate root.
- *
- * However, SET_PERSONALITY is NOT allowed to switch
- * this task into the new images's memory mapping
- * policy - that is, TASK_SIZE must still evaluate to
- * that which is appropriate to the execing application.
- * This is because exit_mmap() needs to have TASK_SIZE
- * evaluate to the size of the old image.
- *
- * So if (say) a 64-bit application is execing a 32-bit
- * application it is the architecture's responsibility
- * to defer changing the value of TASK_SIZE until the
- * switch really is going to happen - do this in
- * flush_thread(). - akpm
- */
- SET_PERSONALITY(loc->elf_ex);
-
interpreter = open_exec(elf_interpreter);
retval = PTR_ERR(interpreter);
if (IS_ERR(interpreter))
@@ -730,9 +709,6 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
/* Verify the interpreter has a valid arch */
if (!elf_check_arch(&loc->interp_elf_ex))
goto out_free_dentry;
- } else {
- /* Executables without an interpreter also need a personality */
- SET_PERSONALITY(loc->elf_ex);
}
/* Flush all traces of the currently running executable */
@@ -752,7 +728,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
current->flags |= PF_RANDOMIZE;
- arch_pick_mmap_layout(current->mm);
+
+ setup_new_exec(bprm);
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
@@ -1272,8 +1249,9 @@ static int writenote(struct memelfnote *men, struct file *file,
}
#undef DUMP_WRITE
-#define DUMP_WRITE(addr, nr) \
- if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr) \
+ if ((size += (nr)) > cprm->limit || \
+ !dump_write(cprm->file, (addr), (nr))) \
goto end_coredump;
static void fill_elf_header(struct elfhdr *elf, int segs,
@@ -1901,7 +1879,7 @@ static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
* and then they are actually written out. If we run out of core limit
* we just truncate.
*/
-static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int elf_core_dump(struct coredump_params *cprm)
{
int has_dumped = 0;
mm_segment_t fs;
@@ -1947,7 +1925,7 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
* notes. This also sets up the file header.
*/
if (!fill_note_info(elf, segs + 1, /* including notes section */
- &info, signr, regs))
+ &info, cprm->signr, cprm->regs))
goto cleanup;
has_dumped = 1;
@@ -2009,14 +1987,14 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
#endif
/* write out the notes section */
- if (!write_note_info(&info, file, &foffset))
+ if (!write_note_info(&info, cprm->file, &foffset))
goto end_coredump;
- if (elf_coredump_extra_notes_write(file, &foffset))
+ if (elf_coredump_extra_notes_write(cprm->file, &foffset))
goto end_coredump;
/* Align to page */
- if (!dump_seek(file, dataoff - foffset))
+ if (!dump_seek(cprm->file, dataoff - foffset))
goto end_coredump;
for (vma = first_vma(current, gate_vma); vma != NULL;
@@ -2033,12 +2011,13 @@ static int elf_core_dump(long signr, struct pt_regs *regs, struct file *file, un
page = get_dump_page(addr);
if (page) {
void *kaddr = kmap(page);
- stop = ((size += PAGE_SIZE) > limit) ||
- !dump_write(file, kaddr, PAGE_SIZE);
+ stop = ((size += PAGE_SIZE) > cprm->limit) ||
+ !dump_write(cprm->file, kaddr,
+ PAGE_SIZE);
kunmap(page);
page_cache_release(page);
} else
- stop = !dump_seek(file, PAGE_SIZE);
+ stop = !dump_seek(cprm->file, PAGE_SIZE);
if (stop)
goto end_coredump;
}
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 7b055385db8..18d77297ccc 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -76,7 +76,7 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *,
struct file *, struct mm_struct *);
#ifdef CONFIG_ELF_CORE
-static int elf_fdpic_core_dump(long, struct pt_regs *, struct file *, unsigned long limit);
+static int elf_fdpic_core_dump(struct coredump_params *cprm);
#endif
static struct linux_binfmt elf_fdpic_format = {
@@ -171,6 +171,9 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
#ifdef ELF_FDPIC_PLAT_INIT
unsigned long dynaddr;
#endif
+#ifndef CONFIG_MMU
+ unsigned long stack_prot;
+#endif
struct file *interpreter = NULL; /* to shut gcc up */
char *interpreter_name = NULL;
int executable_stack;
@@ -316,6 +319,11 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
* defunct, deceased, etc. after this point we have to exit via
* error_kill */
set_personality(PER_LINUX_FDPIC);
+ if (elf_read_implies_exec(&exec_params.hdr, executable_stack))
+ current->personality |= READ_IMPLIES_EXEC;
+
+ setup_new_exec(bprm);
+
set_binfmt(&elf_fdpic_format);
current->mm->start_code = 0;
@@ -377,9 +385,13 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm,
if (stack_size < PAGE_SIZE * 2)
stack_size = PAGE_SIZE * 2;
+ stack_prot = PROT_READ | PROT_WRITE;
+ if (executable_stack == EXSTACK_ENABLE_X ||
+ (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC))
+ stack_prot |= PROT_EXEC;
+
down_write(&current->mm->mmap_sem);
- current->mm->start_brk = do_mmap(NULL, 0, stack_size,
- PROT_READ | PROT_WRITE | PROT_EXEC,
+ current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot,
MAP_PRIVATE | MAP_ANONYMOUS |
MAP_UNINITIALIZED | MAP_GROWSDOWN,
0);
@@ -1326,8 +1338,9 @@ static int writenote(struct memelfnote *men, struct file *file)
#undef DUMP_WRITE
#undef DUMP_SEEK
-#define DUMP_WRITE(addr, nr) \
- if ((size += (nr)) > limit || !dump_write(file, (addr), (nr))) \
+#define DUMP_WRITE(addr, nr) \
+ if ((size += (nr)) > cprm->limit || \
+ !dump_write(cprm->file, (addr), (nr))) \
goto end_coredump;
static inline void fill_elf_fdpic_header(struct elfhdr *elf, int segs)
@@ -1582,8 +1595,7 @@ static int elf_fdpic_dump_segments(struct file *file, size_t *size,
* and then they are actually written out. If we run out of core limit
* we just truncate.
*/
-static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
- struct file *file, unsigned long limit)
+static int elf_fdpic_core_dump(struct coredump_params *cprm)
{
#define NUM_NOTES 6
int has_dumped = 0;
@@ -1642,7 +1654,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
goto cleanup;
#endif
- if (signr) {
+ if (cprm->signr) {
struct core_thread *ct;
struct elf_thread_status *tmp;
@@ -1661,14 +1673,14 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
int sz;
tmp = list_entry(t, struct elf_thread_status, list);
- sz = elf_dump_thread_status(signr, tmp);
+ sz = elf_dump_thread_status(cprm->signr, tmp);
thread_status_size += sz;
}
}
/* now collect the dump for the current */
- fill_prstatus(prstatus, current, signr);
- elf_core_copy_regs(&prstatus->pr_reg, regs);
+ fill_prstatus(prstatus, current, cprm->signr);
+ elf_core_copy_regs(&prstatus->pr_reg, cprm->regs);
segs = current->mm->map_count;
#ifdef ELF_CORE_EXTRA_PHDRS
@@ -1703,7 +1715,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
/* Try to dump the FPU. */
if ((prstatus->pr_fpvalid =
- elf_core_copy_task_fpregs(current, regs, fpu)))
+ elf_core_copy_task_fpregs(current, cprm->regs, fpu)))
fill_note(notes + numnote++,
"CORE", NT_PRFPREG, sizeof(*fpu), fpu);
#ifdef ELF_CORE_COPY_XFPREGS
@@ -1774,7 +1786,7 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
/* write out the notes section */
for (i = 0; i < numnote; i++)
- if (!writenote(notes + i, file))
+ if (!writenote(notes + i, cprm->file))
goto end_coredump;
/* write out the thread status notes section */
@@ -1783,25 +1795,26 @@ static int elf_fdpic_core_dump(long signr, struct pt_regs *regs,
list_entry(t, struct elf_thread_status, list);
for (i = 0; i < tmp->num_notes; i++)
- if (!writenote(&tmp->notes[i], file))
+ if (!writenote(&tmp->notes[i], cprm->file))
goto end_coredump;
}
- if (!dump_seek(file, dataoff))
+ if (!dump_seek(cprm->file, dataoff))
goto end_coredump;
- if (elf_fdpic_dump_segments(file, &size, &limit, mm_flags) < 0)
+ if (elf_fdpic_dump_segments(cprm->file, &size, &cprm->limit,
+ mm_flags) < 0)
goto end_coredump;
#ifdef ELF_CORE_WRITE_EXTRA_DATA
ELF_CORE_WRITE_EXTRA_DATA;
#endif
- if (file->f_pos != offset) {
+ if (cprm->file->f_pos != offset) {
/* Sanity check */
printk(KERN_WARNING
"elf_core_dump: file->f_pos (%lld) != offset (%lld)\n",
- file->f_pos, offset);
+ cprm->file->f_pos, offset);
}
end_coredump:
diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c
index a2796651e75..42c6b4a5444 100644
--- a/fs/binfmt_flat.c
+++ b/fs/binfmt_flat.c
@@ -87,7 +87,7 @@ static int load_flat_shared_library(int id, struct lib_info *p);
#endif
static int load_flat_binary(struct linux_binprm *, struct pt_regs * regs);
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit);
+static int flat_core_dump(struct coredump_params *cprm);
static struct linux_binfmt flat_format = {
.module = THIS_MODULE,
@@ -102,10 +102,10 @@ static struct linux_binfmt flat_format = {
* Currently only a stub-function.
*/
-static int flat_core_dump(long signr, struct pt_regs *regs, struct file *file, unsigned long limit)
+static int flat_core_dump(struct coredump_params *cprm)
{
printk("Process %s:%d received signr %d and should have core dumped\n",
- current->comm, current->pid, (int) signr);
+ current->comm, current->pid, (int) cprm->signr);
return(1);
}
@@ -519,6 +519,7 @@ static int load_flat_file(struct linux_binprm * bprm,
/* OK, This is the point of no return */
set_personality(PER_LINUX_32BIT);
+ setup_new_exec(bprm);
}
/*
diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c
index eff74b9c9e7..cc8560f6c9b 100644
--- a/fs/binfmt_som.c
+++ b/fs/binfmt_som.c
@@ -43,7 +43,7 @@ static int load_som_library(struct file *);
* don't even try.
*/
#if 0
-static int som_core_dump(long signr, struct pt_regs *regs, unsigned long limit);
+static int som_core_dump(struct coredump_params *cprm);
#else
#define som_core_dump NULL
#endif
@@ -227,6 +227,7 @@ load_som_binary(struct linux_binprm * bprm, struct pt_regs * regs)
/* OK, This is the point of no return */
current->flags &= ~PF_FORKNOEXEC;
current->personality = PER_HPUX;
+ setup_new_exec(bprm);
/* Set the task size for HP-UX processes such that
* the gateway page is outside the address space.
diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c
index 49a34e7f730..a16f29e888c 100644
--- a/fs/bio-integrity.c
+++ b/fs/bio-integrity.c
@@ -61,7 +61,7 @@ static inline unsigned int vecs_to_idx(unsigned int nr)
static inline int use_bip_pool(unsigned int idx)
{
- if (idx == BIOVEC_NR_POOLS)
+ if (idx == BIOVEC_MAX_IDX)
return 1;
return 0;
@@ -95,6 +95,7 @@ struct bio_integrity_payload *bio_integrity_alloc_bioset(struct bio *bio,
/* Use mempool if lower order alloc failed or max vecs were requested */
if (bip == NULL) {
+ idx = BIOVEC_MAX_IDX; /* so we free the payload properly later */
bip = mempool_alloc(bs->bio_integrity_pool, gfp_mask);
if (unlikely(bip == NULL)) {
diff --git a/fs/bio.c b/fs/bio.c
index 76e6713abf9..88094afc29e 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -78,7 +78,7 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size)
i = 0;
while (i < bio_slab_nr) {
- struct bio_slab *bslab = &bio_slabs[i];
+ bslab = &bio_slabs[i];
if (!bslab->slab && entry == -1)
entry = i;
@@ -542,13 +542,18 @@ static int __bio_add_page(struct request_queue *q, struct bio *bio, struct page
if (page == prev->bv_page &&
offset == prev->bv_offset + prev->bv_len) {
+ unsigned int prev_bv_len = prev->bv_len;
prev->bv_len += len;
if (q->merge_bvec_fn) {
struct bvec_merge_data bvm = {
+ /* prev_bvec is already charged in
+ bi_size, discharge it in order to
+ simulate merging updated prev_bvec
+ as new bvec. */
.bi_bdev = bio->bi_bdev,
.bi_sector = bio->bi_sector,
- .bi_size = bio->bi_size,
+ .bi_size = bio->bi_size - prev_bv_len,
.bi_rw = bio->bi_rw,
};
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 73d6a735b8f..d11d0289f3d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -246,7 +246,8 @@ struct super_block *freeze_bdev(struct block_device *bdev)
if (!sb)
goto out;
if (sb->s_flags & MS_RDONLY) {
- deactivate_locked_super(sb);
+ sb->s_frozen = SB_FREEZE_TRANS;
+ up_write(&sb->s_umount);
mutex_unlock(&bdev->bd_fsfreeze_mutex);
return sb;
}
@@ -307,7 +308,7 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
BUG_ON(sb->s_bdev != bdev);
down_write(&sb->s_umount);
if (sb->s_flags & MS_RDONLY)
- goto out_deactivate;
+ goto out_unfrozen;
if (sb->s_op->unfreeze_fs) {
error = sb->s_op->unfreeze_fs(sb);
@@ -321,11 +322,11 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
}
}
+out_unfrozen:
sb->s_frozen = SB_UNFROZEN;
smp_wmb();
wake_up(&sb->s_wait_unfrozen);
-out_deactivate:
if (sb)
deactivate_locked_super(sb);
out_unlock:
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 402afe0a0bf..7bb3c020e57 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,7 +4,6 @@ config BTRFS_FS
select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
- select FS_JOURNAL_INFO
help
Btrfs is a new filesystem with extents, writable snapshotting,
support for multiple devices and many more features.
diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
index 36160424427..6df6d6ed74f 100644
--- a/fs/btrfs/acl.c
+++ b/fs/btrfs/acl.c
@@ -73,13 +73,13 @@ static struct posix_acl *btrfs_get_acl(struct inode *inode, int type)
return acl;
}
-static int btrfs_xattr_get_acl(struct inode *inode, int type,
- void *value, size_t size)
+static int btrfs_xattr_acl_get(struct dentry *dentry, const char *name,
+ void *value, size_t size, int type)
{
struct posix_acl *acl;
int ret = 0;
- acl = btrfs_get_acl(inode, type);
+ acl = btrfs_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
@@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
/*
* Needs to be called with fs_mutex held
*/
-static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+static int btrfs_set_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct posix_acl *acl, int type)
{
int ret, size = 0;
const char *name;
@@ -111,12 +112,14 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
switch (type) {
case ACL_TYPE_ACCESS:
mode = inode->i_mode;
- ret = posix_acl_equiv_mode(acl, &mode);
- if (ret < 0)
- return ret;
- ret = 0;
- inode->i_mode = mode;
name = POSIX_ACL_XATTR_ACCESS;
+ if (acl) {
+ ret = posix_acl_equiv_mode(acl, &mode);
+ if (ret < 0)
+ return ret;
+ inode->i_mode = mode;
+ }
+ ret = 0;
break;
case ACL_TYPE_DEFAULT:
if (!S_ISDIR(inode->i_mode))
@@ -140,8 +143,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
goto out;
}
- ret = __btrfs_setxattr(inode, name, value, size, 0);
-
+ ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
out:
kfree(value);
@@ -151,10 +153,10 @@ out:
return ret;
}
-static int btrfs_xattr_set_acl(struct inode *inode, int type,
- const void *value, size_t size)
+static int btrfs_xattr_acl_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
- int ret = 0;
+ int ret;
struct posix_acl *acl = NULL;
if (value) {
@@ -167,38 +169,13 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
}
}
- ret = btrfs_set_acl(inode, acl, type);
+ ret = btrfs_set_acl(NULL, dentry->d_inode, acl, type);
posix_acl_release(acl);
return ret;
}
-
-static int btrfs_xattr_acl_access_get(struct inode *inode, const char *name,
- void *value, size_t size)
-{
- return btrfs_xattr_get_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_access_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- return btrfs_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int btrfs_xattr_acl_default_get(struct inode *inode, const char *name,
- void *value, size_t size)
-{
- return btrfs_xattr_get_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static int btrfs_xattr_acl_default_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- return btrfs_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
int btrfs_check_acl(struct inode *inode, int mask)
{
struct posix_acl *acl;
@@ -221,7 +198,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
* stuff has been fixed to work with that. If the locking stuff changes, we
* need to re-evaluate the acl locking stuff.
*/
-int btrfs_init_acl(struct inode *inode, struct inode *dir)
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
int ret = 0;
@@ -246,7 +224,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
mode_t mode;
if (S_ISDIR(inode->i_mode)) {
- ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
+ ret = btrfs_set_acl(trans, inode, acl,
+ ACL_TYPE_DEFAULT);
if (ret)
goto failed;
}
@@ -261,10 +240,11 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
inode->i_mode = mode;
if (ret > 0) {
/* we need an acl */
- ret = btrfs_set_acl(inode, clone,
+ ret = btrfs_set_acl(trans, inode, clone,
ACL_TYPE_ACCESS);
}
}
+ posix_acl_release(clone);
}
failed:
posix_acl_release(acl);
@@ -294,7 +274,7 @@ int btrfs_acl_chmod(struct inode *inode)
ret = posix_acl_chmod_masq(clone, inode->i_mode);
if (!ret)
- ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
+ ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
posix_acl_release(clone);
@@ -303,14 +283,16 @@ int btrfs_acl_chmod(struct inode *inode)
struct xattr_handler btrfs_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
- .get = btrfs_xattr_acl_default_get,
- .set = btrfs_xattr_acl_default_set,
+ .flags = ACL_TYPE_DEFAULT,
+ .get = btrfs_xattr_acl_get,
+ .set = btrfs_xattr_acl_set,
};
struct xattr_handler btrfs_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
- .get = btrfs_xattr_acl_access_get,
- .set = btrfs_xattr_acl_access_set,
+ .flags = ACL_TYPE_ACCESS,
+ .get = btrfs_xattr_acl_get,
+ .set = btrfs_xattr_acl_set,
};
#else /* CONFIG_BTRFS_FS_POSIX_ACL */
@@ -320,7 +302,8 @@ int btrfs_acl_chmod(struct inode *inode)
return 0;
}
-int btrfs_init_acl(struct inode *inode, struct inode *dir)
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
return 0;
}
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index f6783a42f01..3f1f50d9d91 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -44,9 +44,6 @@ struct btrfs_inode {
*/
struct extent_io_tree io_failure_tree;
- /* held while inesrting or deleting extents from files */
- struct mutex extent_mutex;
-
/* held while logging the inode in tree-log.c */
struct mutex log_mutex;
@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
static inline void btrfs_i_size_write(struct inode *inode, u64 size)
{
- inode->i_size = size;
+ i_size_write(inode, size);
BTRFS_I(inode)->disk_i_size = size;
}
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index ec96f3a6d53..c4bc570a396 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
struct extent_buffer *src_buf);
static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
struct btrfs_path *path, int level, int slot);
+static int setup_items_for_insert(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, struct btrfs_path *path,
+ struct btrfs_key *cpu_key, u32 *data_size,
+ u32 total_data, u32 total_size, int nr);
+
struct btrfs_path *btrfs_alloc_path(void)
{
@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
extent_buffer_get(cow);
spin_unlock(&root->node_lock);
- btrfs_free_extent(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid,
- level, 0);
+ btrfs_free_tree_block(trans, root, buf->start, buf->len,
+ parent_start, root->root_key.objectid, level);
free_extent_buffer(buf);
add_root_to_dirty_list(root);
} else {
@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
btrfs_set_node_ptr_generation(parent, parent_slot,
trans->transid);
btrfs_mark_buffer_dirty(parent);
- btrfs_free_extent(trans, root, buf->start, buf->len,
- parent_start, root->root_key.objectid,
- level, 0);
+ btrfs_free_tree_block(trans, root, buf->start, buf->len,
+ parent_start, root->root_key.objectid, level);
}
if (unlock_orig)
btrfs_tree_unlock(buf);
@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
btrfs_tree_unlock(mid);
/* once for the path */
free_extent_buffer(mid);
- ret = btrfs_free_extent(trans, root, mid->start, mid->len,
- 0, root->root_key.objectid, level, 1);
+ ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
+ 0, root->root_key.objectid, level);
/* once for the root ptr */
free_extent_buffer(mid);
return ret;
@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
1);
if (wret)
ret = wret;
- wret = btrfs_free_extent(trans, root, bytenr,
- blocksize, 0,
- root->root_key.objectid,
- level, 0);
+ wret = btrfs_free_tree_block(trans, root,
+ bytenr, blocksize, 0,
+ root->root_key.objectid,
+ level);
if (wret)
ret = wret;
} else {
@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
wret = del_ptr(trans, root, path, level + 1, pslot);
if (wret)
ret = wret;
- wret = btrfs_free_extent(trans, root, bytenr, blocksize,
- 0, root->root_key.objectid,
- level, 0);
+ wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
+ 0, root->root_key.objectid, level);
if (wret)
ret = wret;
} else {
@@ -2997,75 +2999,85 @@ again:
return ret;
}
-/*
- * This function splits a single item into two items,
- * giving 'new_key' to the new item and splitting the
- * old one at split_offset (from the start of the item).
- *
- * The path may be released by this operation. After
- * the split, the path is pointing to the old item. The
- * new item is going to be in the same node as the old one.
- *
- * Note, the item being split must be smaller enough to live alone on
- * a tree block with room for one extra struct btrfs_item
- *
- * This allows us to split the item in place, keeping a lock on the
- * leaf the entire time.
- */
-int btrfs_split_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_key *new_key,
- unsigned long split_offset)
+static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, int ins_len)
{
- u32 item_size;
+ struct btrfs_key key;
struct extent_buffer *leaf;
- struct btrfs_key orig_key;
- struct btrfs_item *item;
- struct btrfs_item *new_item;
- int ret = 0;
- int slot;
- u32 nritems;
- u32 orig_offset;
- struct btrfs_disk_key disk_key;
- char *buf;
+ struct btrfs_file_extent_item *fi;
+ u64 extent_len = 0;
+ u32 item_size;
+ int ret;
leaf = path->nodes[0];
- btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
- if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
- goto split;
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+
+ BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
+ key.type != BTRFS_EXTENT_CSUM_KEY);
+
+ if (btrfs_leaf_free_space(root, leaf) >= ins_len)
+ return 0;
item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ if (key.type == BTRFS_EXTENT_DATA_KEY) {
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_len = btrfs_file_extent_num_bytes(leaf, fi);
+ }
btrfs_release_path(root, path);
- path->search_for_split = 1;
path->keep_locks = 1;
-
- ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
+ path->search_for_split = 1;
+ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
path->search_for_split = 0;
+ if (ret < 0)
+ goto err;
+ ret = -EAGAIN;
+ leaf = path->nodes[0];
/* if our item isn't there or got smaller, return now */
- if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
- path->slots[0])) {
- path->keep_locks = 0;
- return -EAGAIN;
+ if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
+ goto err;
+
+ if (key.type == BTRFS_EXTENT_DATA_KEY) {
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
+ goto err;
}
btrfs_set_path_blocking(path);
- ret = split_leaf(trans, root, &orig_key, path,
- sizeof(struct btrfs_item), 1);
- path->keep_locks = 0;
+ ret = split_leaf(trans, root, &key, path, ins_len, 1);
BUG_ON(ret);
+ path->keep_locks = 0;
btrfs_unlock_up_safe(path, 1);
+ return 0;
+err:
+ path->keep_locks = 0;
+ return ret;
+}
+
+static noinline int split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ unsigned long split_offset)
+{
+ struct extent_buffer *leaf;
+ struct btrfs_item *item;
+ struct btrfs_item *new_item;
+ int slot;
+ char *buf;
+ u32 nritems;
+ u32 item_size;
+ u32 orig_offset;
+ struct btrfs_disk_key disk_key;
+
leaf = path->nodes[0];
BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
-split:
- /*
- * make sure any changes to the path from split_leaf leave it
- * in a blocking state
- */
btrfs_set_path_blocking(path);
item = btrfs_item_nr(leaf, path->slots[0]);
@@ -3073,19 +3085,19 @@ split:
item_size = btrfs_item_size(leaf, item);
buf = kmalloc(item_size, GFP_NOFS);
+ if (!buf)
+ return -ENOMEM;
+
read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
path->slots[0]), item_size);
- slot = path->slots[0] + 1;
- leaf = path->nodes[0];
+ slot = path->slots[0] + 1;
nritems = btrfs_header_nritems(leaf);
-
if (slot != nritems) {
/* shift the items */
memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
- btrfs_item_nr_offset(slot),
- (nritems - slot) * sizeof(struct btrfs_item));
-
+ btrfs_item_nr_offset(slot),
+ (nritems - slot) * sizeof(struct btrfs_item));
}
btrfs_cpu_key_to_disk(&disk_key, new_key);
@@ -3113,16 +3125,81 @@ split:
item_size - split_offset);
btrfs_mark_buffer_dirty(leaf);
- ret = 0;
- if (btrfs_leaf_free_space(root, leaf) < 0) {
- btrfs_print_leaf(root, leaf);
- BUG();
- }
+ BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
kfree(buf);
+ return 0;
+}
+
+/*
+ * This function splits a single item into two items,
+ * giving 'new_key' to the new item and splitting the
+ * old one at split_offset (from the start of the item).
+ *
+ * The path may be released by this operation. After
+ * the split, the path is pointing to the old item. The
+ * new item is going to be in the same node as the old one.
+ *
+ * Note, the item being split must be smaller enough to live alone on
+ * a tree block with room for one extra struct btrfs_item
+ *
+ * This allows us to split the item in place, keeping a lock on the
+ * leaf the entire time.
+ */
+int btrfs_split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ unsigned long split_offset)
+{
+ int ret;
+ ret = setup_leaf_for_split(trans, root, path,
+ sizeof(struct btrfs_item));
+ if (ret)
+ return ret;
+
+ ret = split_item(trans, root, path, new_key, split_offset);
return ret;
}
/*
+ * This function duplicate a item, giving 'new_key' to the new item.
+ * It guarantees both items live in the same tree leaf and the new item
+ * is contiguous with the original item.
+ *
+ * This allows us to split file extent in place, keeping a lock on the
+ * leaf the entire time.
+ */
+int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key)
+{
+ struct extent_buffer *leaf;
+ int ret;
+ u32 item_size;
+
+ leaf = path->nodes[0];
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+ ret = setup_leaf_for_split(trans, root, path,
+ item_size + sizeof(struct btrfs_item));
+ if (ret)
+ return ret;
+
+ path->slots[0]++;
+ ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
+ item_size, item_size +
+ sizeof(struct btrfs_item), 1);
+ BUG_ON(ret);
+
+ leaf = path->nodes[0];
+ memcpy_extent_buffer(leaf,
+ btrfs_item_ptr_offset(leaf, path->slots[0]),
+ btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
+ item_size);
+ return 0;
+}
+
+/*
* make the item pointed to by the path smaller. new_size indicates
* how small to make it, and from_end tells us if we just chop bytes
* off the end of the item or if we shift the item to chop bytes off
@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
*/
btrfs_unlock_up_safe(path, 0);
- ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
- 0, root->root_key.objectid, 0, 0);
+ ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
+ 0, root->root_key.objectid, 0);
return ret;
}
/*
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 444b3e9b92a..2aa8ec6a098 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -310,6 +310,9 @@ struct btrfs_header {
#define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) - \
sizeof(struct btrfs_file_extent_item))
+#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
+ sizeof(struct btrfs_item) -\
+ sizeof(struct btrfs_dir_item))
/*
@@ -859,8 +862,9 @@ struct btrfs_fs_info {
struct mutex ordered_operations_mutex;
struct rw_semaphore extent_commit_sem;
- struct rw_semaphore subvol_sem;
+ struct rw_semaphore cleanup_work_sem;
+ struct rw_semaphore subvol_sem;
struct srcu_struct subvol_srcu;
struct list_head trans_list;
@@ -868,6 +872,9 @@ struct btrfs_fs_info {
struct list_head dead_roots;
struct list_head caching_block_groups;
+ spinlock_t delayed_iput_lock;
+ struct list_head delayed_iputs;
+
atomic_t nr_async_submits;
atomic_t async_submit_draining;
atomic_t nr_async_bios;
@@ -1034,12 +1041,12 @@ struct btrfs_root {
int ref_cows;
int track_dirty;
int in_radix;
+ int clean_orphans;
u64 defrag_trans_start;
struct btrfs_key defrag_progress;
struct btrfs_key defrag_max;
int defrag_running;
- int defrag_level;
char *name;
int in_sysfs;
@@ -1154,6 +1161,7 @@ struct btrfs_root {
#define BTRFS_MOUNT_SSD_SPREAD (1 << 8)
#define BTRFS_MOUNT_NOSSD (1 << 9)
#define BTRFS_MOUNT_DISCARD (1 << 10)
+#define BTRFS_MOUNT_FORCE_COMPRESS (1 << 11)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1975,6 +1983,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
u64 parent, u64 root_objectid,
struct btrfs_disk_key *key, int level,
u64 hint, u64 empty_size);
+int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u32 blocksize,
+ u64 parent, u64 root_objectid, int level);
struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
u64 bytenr, u32 blocksize,
@@ -2089,6 +2101,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_key *new_key,
unsigned long split_offset);
+int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key);
int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
*root, struct btrfs_key *key, struct btrfs_path *p, int
ins_len, int cow);
@@ -2196,9 +2212,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
struct btrfs_path *path,
struct btrfs_dir_item *di);
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *name,
- u16 name_len, const void *data, u16 data_len,
- u64 dir);
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 objectid,
+ const char *name, u16 name_len,
+ const void *data, u16 data_len);
struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path, u64 dir,
@@ -2292,7 +2309,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct inode *inode, u64 new_size,
u32 min_type);
-int btrfs_start_delalloc_inodes(struct btrfs_root *root);
+int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
int btrfs_writepages(struct address_space *mapping,
struct writeback_control *wbc);
@@ -2332,6 +2349,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
void btrfs_orphan_cleanup(struct btrfs_root *root);
int btrfs_cont_expand(struct inode *inode, loff_t size);
int btrfs_invalidate_inodes(struct btrfs_root *root);
+void btrfs_add_delayed_iput(struct inode *inode);
+void btrfs_run_delayed_iputs(struct btrfs_root *root);
extern const struct dentry_operations btrfs_dentry_operations;
/* ioctl.c */
@@ -2345,12 +2364,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
int skip_pinned);
int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
extern const struct file_operations btrfs_file_operations;
-int btrfs_drop_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_block, int drop_cache);
+int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
+ u64 start, u64 end, u64 *hint_byte, int drop_cache);
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct inode *inode, u64 start, u64 end);
int btrfs_release_file(struct inode *inode, struct file *file);
@@ -2380,7 +2396,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
#else
#define btrfs_check_acl NULL
#endif
-int btrfs_init_acl(struct inode *inode, struct inode *dir);
+int btrfs_init_acl(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir);
int btrfs_acl_chmod(struct inode *inode);
/* relocation.c */
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
index f3a6075519c..e9103b3baa4 100644
--- a/fs/btrfs/dir-item.c
+++ b/fs/btrfs/dir-item.c
@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
* into the tree
*/
int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, const char *name,
- u16 name_len, const void *data, u16 data_len,
- u64 dir)
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 objectid,
+ const char *name, u16 name_len,
+ const void *data, u16 data_len)
{
int ret = 0;
- struct btrfs_path *path;
struct btrfs_dir_item *dir_item;
unsigned long name_ptr, data_ptr;
struct btrfs_key key, location;
@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
u32 data_size;
- key.objectid = dir;
+ BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
+
+ key.objectid = objectid;
btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
key.offset = btrfs_name_hash(name, name_len);
- path = btrfs_alloc_path();
- if (!path)
- return -ENOMEM;
- if (name_len + data_len + sizeof(struct btrfs_dir_item) >
- BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
- return -ENOSPC;
data_size = sizeof(*dir_item) + name_len + data_len;
dir_item = insert_with_overflow(trans, root, path, &key, data_size,
@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
write_extent_buffer(leaf, data, data_ptr, data_len);
btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_free_path(path);
return ret;
}
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 02b6afbd745..2b59201b955 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->stripesize = stripesize;
root->ref_cows = 0;
root->track_dirty = 0;
+ root->in_radix = 0;
+ root->clean_orphans = 0;
root->fs_info = fs_info;
root->objectid = objectid;
@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
root->defrag_trans_start = fs_info->generation;
init_completion(&root->kobj_unregister);
root->defrag_running = 0;
- root->defrag_level = 0;
root->root_key.objectid = objectid;
root->anon_super.s_root = NULL;
root->anon_super.s_dev = 0;
@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
while (1) {
ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY);
+ 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
if (ret)
break;
- clear_extent_dirty(&log_root_tree->dirty_log_pages,
- start, end, GFP_NOFS);
+ clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
+ EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
eb = fs_info->log_root_tree->node;
@@ -1210,8 +1211,10 @@ again:
ret = radix_tree_insert(&fs_info->fs_roots_radix,
(unsigned long)root->root_key.objectid,
root);
- if (ret == 0)
+ if (ret == 0) {
root->in_radix = 1;
+ root->clean_orphans = 1;
+ }
spin_unlock(&fs_info->fs_roots_radix_lock);
radix_tree_preload_end();
if (ret) {
@@ -1225,10 +1228,6 @@ again:
ret = btrfs_find_dead_roots(fs_info->tree_root,
root->root_key.objectid);
WARN_ON(ret);
-
- if (!(fs_info->sb->s_flags & MS_RDONLY))
- btrfs_orphan_cleanup(root);
-
return root;
fail:
free_fs_root(root);
@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
mutex_trylock(&root->fs_info->cleaner_mutex)) {
+ btrfs_run_delayed_iputs(root);
btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
}
@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
+ INIT_LIST_HEAD(&fs_info->delayed_iputs);
INIT_LIST_HEAD(&fs_info->hashers);
INIT_LIST_HEAD(&fs_info->delalloc_inodes);
INIT_LIST_HEAD(&fs_info->ordered_operations);
@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
spin_lock_init(&fs_info->new_trans_lock);
spin_lock_init(&fs_info->ref_cache_lock);
spin_lock_init(&fs_info->fs_roots_radix_lock);
+ spin_lock_init(&fs_info->delayed_iput_lock);
init_completion(&fs_info->kobj_unregister);
fs_info->tree_root = tree_root;
@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
mutex_init(&fs_info->cleaner_mutex);
mutex_init(&fs_info->volume_mutex);
init_rwsem(&fs_info->extent_commit_sem);
+ init_rwsem(&fs_info->cleanup_work_sem);
init_rwsem(&fs_info->subvol_sem);
btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
@@ -1979,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!(sb->s_flags & MS_RDONLY)) {
ret = btrfs_recover_relocation(tree_root);
- BUG_ON(ret);
+ if (ret < 0) {
+ printk(KERN_WARNING
+ "btrfs: failed to recover relocation\n");
+ err = -EINVAL;
+ goto fail_trans_kthread;
+ }
}
location.objectid = BTRFS_FS_TREE_OBJECTID;
@@ -1990,6 +1998,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
if (!fs_info->fs_root)
goto fail_trans_kthread;
+ if (!(sb->s_flags & MS_RDONLY)) {
+ down_read(&fs_info->cleanup_work_sem);
+ btrfs_orphan_cleanup(fs_info->fs_root);
+ up_read(&fs_info->cleanup_work_sem);
+ }
+
return tree_root;
fail_trans_kthread:
@@ -2386,8 +2400,14 @@ int btrfs_commit_super(struct btrfs_root *root)
int ret;
mutex_lock(&root->fs_info->cleaner_mutex);
+ btrfs_run_delayed_iputs(root);
btrfs_clean_old_snapshots(root);
mutex_unlock(&root->fs_info->cleaner_mutex);
+
+ /* wait until ongoing cleanup work done */
+ down_write(&root->fs_info->cleanup_work_sem);
+ up_write(&root->fs_info->cleanup_work_sem);
+
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
BUG_ON(ret);
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 94627c4cc19..559f72489b3 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
return (cache->flags & bits) == bits;
}
+void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
+{
+ atomic_inc(&cache->count);
+}
+
+void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
+{
+ if (atomic_dec_and_test(&cache->count))
+ kfree(cache);
+}
+
/*
* this adds the block group to the fs_info rb tree for the block group
* cache
@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
}
}
if (ret)
- atomic_inc(&ret->count);
+ btrfs_get_block_group(ret);
spin_unlock(&info->block_group_cache_lock);
return ret;
@@ -195,6 +206,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
int stripe_len;
int i, nr, ret;
+ if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
+ stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
+ cache->bytes_super += stripe_len;
+ ret = add_excluded_extent(root, cache->key.objectid,
+ stripe_len);
+ BUG_ON(ret);
+ }
+
for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
bytenr = btrfs_sb_offset(i);
ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
@@ -255,7 +274,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
if (ret)
break;
- if (extent_start == start) {
+ if (extent_start <= start) {
start = extent_end + 1;
} else if (extent_start > start && extent_start < end) {
size = extent_start - start;
@@ -399,6 +418,8 @@ err:
put_caching_control(caching_ctl);
atomic_dec(&block_group->space_info->caching_threads);
+ btrfs_put_block_group(block_group);
+
return 0;
}
@@ -439,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
up_write(&fs_info->extent_commit_sem);
atomic_inc(&cache->space_info->caching_threads);
+ btrfs_get_block_group(cache);
tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
cache->key.objectid);
@@ -478,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
return cache;
}
-void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
-{
- if (atomic_dec_and_test(&cache->count))
- kfree(cache);
-}
-
static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
u64 flags)
{
@@ -2574,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
if (node) {
cache = rb_entry(node, struct btrfs_block_group_cache,
cache_node);
- atomic_inc(&cache->count);
+ btrfs_get_block_group(cache);
} else
cache = NULL;
spin_unlock(&root->fs_info->block_group_cache_lock);
@@ -2880,9 +2896,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
root = async->root;
info = async->info;
- btrfs_start_delalloc_inodes(root);
+ btrfs_start_delalloc_inodes(root, 0);
wake_up(&info->flush_wait);
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock);
info->flushing = 0;
@@ -2956,8 +2972,8 @@ static void flush_delalloc(struct btrfs_root *root,
return;
flush:
- btrfs_start_delalloc_inodes(root);
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_start_delalloc_inodes(root, 0);
+ btrfs_wait_ordered_extents(root, 0, 0);
spin_lock(&info->lock);
info->flushing = 0;
@@ -3454,14 +3470,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
else
old_val -= num_bytes;
btrfs_set_super_bytes_used(&info->super_copy, old_val);
-
- /* block accounting for root item */
- old_val = btrfs_root_used(&root->root_item);
- if (alloc)
- old_val += num_bytes;
- else
- old_val -= num_bytes;
- btrfs_set_root_used(&root->root_item, old_val);
spin_unlock(&info->delalloc_lock);
while (total) {
@@ -4049,6 +4057,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
return ret;
}
+int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u32 blocksize,
+ u64 parent, u64 root_objectid, int level)
+{
+ u64 used;
+ spin_lock(&root->node_lock);
+ used = btrfs_root_used(&root->root_item) - blocksize;
+ btrfs_set_root_used(&root->root_item, used);
+ spin_unlock(&root->node_lock);
+
+ return btrfs_free_extent(trans, root, bytenr, blocksize,
+ parent, root_objectid, level, 0);
+}
+
static u64 stripe_align(struct btrfs_root *root, u64 val)
{
u64 mask = ((u64)root->stripesize - 1);
@@ -4212,7 +4235,7 @@ search:
u64 offset;
int cached;
- atomic_inc(&block_group->count);
+ btrfs_get_block_group(block_group);
search_start = block_group->key.objectid;
have_block_group:
@@ -4300,7 +4323,7 @@ have_block_group:
btrfs_put_block_group(block_group);
block_group = last_ptr->block_group;
- atomic_inc(&block_group->count);
+ btrfs_get_block_group(block_group);
spin_unlock(&last_ptr->lock);
spin_unlock(&last_ptr->refill_lock);
@@ -4578,7 +4601,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
{
int ret;
u64 search_start = 0;
- struct btrfs_fs_info *info = root->fs_info;
data = btrfs_get_alloc_profile(root, data);
again:
@@ -4586,17 +4608,9 @@ again:
* the only place that sets empty_size is btrfs_realloc_node, which
* is not called recursively on allocations
*/
- if (empty_size || root->ref_cows) {
- if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
- 2 * 1024 * 1024,
- BTRFS_BLOCK_GROUP_METADATA |
- (info->metadata_alloc_profile &
- info->avail_metadata_alloc_bits), 0);
- }
+ if (empty_size || root->ref_cows)
ret = do_chunk_alloc(trans, root->fs_info->extent_root,
num_bytes + 2 * 1024 * 1024, data, 0);
- }
WARN_ON(num_bytes < root->sectorsize);
ret = find_free_extent(trans, root, num_bytes, empty_size,
@@ -4897,6 +4911,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
extent_op);
BUG_ON(ret);
}
+
+ if (root_objectid == root->root_key.objectid) {
+ u64 used;
+ spin_lock(&root->node_lock);
+ used = btrfs_root_used(&root->root_item) + num_bytes;
+ btrfs_set_root_used(&root->root_item, used);
+ spin_unlock(&root->node_lock);
+ }
return ret;
}
@@ -4919,8 +4941,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
btrfs_set_buffer_uptodate(buf);
if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
- set_extent_dirty(&root->dirty_log_pages, buf->start,
- buf->start + buf->len - 1, GFP_NOFS);
+ /*
+ * we allow two log transactions at a time, use different
+ * EXENT bit to differentiate dirty pages.
+ */
+ if (root->log_transid % 2 == 0)
+ set_extent_dirty(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1, GFP_NOFS);
+ else
+ set_extent_new(&root->dirty_log_pages, buf->start,
+ buf->start + buf->len - 1, GFP_NOFS);
} else {
set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
buf->start + buf->len - 1, GFP_NOFS);
@@ -5372,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
int ret;
while (level >= 0) {
- if (path->slots[level] >=
- btrfs_header_nritems(path->nodes[level]))
- break;
-
ret = walk_down_proc(trans, root, path, wc, lookup_info);
if (ret > 0)
break;
@@ -5383,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
if (level == 0)
break;
+ if (path->slots[level] >=
+ btrfs_header_nritems(path->nodes[level]))
+ break;
+
ret = do_walk_down(trans, root, path, wc, &lookup_info);
if (ret > 0) {
path->slots[level]++;
@@ -7373,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
wait_block_group_cache_done(block_group);
btrfs_remove_free_space_cache(block_group);
-
- WARN_ON(atomic_read(&block_group->count) != 1);
- kfree(block_group);
+ btrfs_put_block_group(block_group);
spin_lock(&info->block_group_cache_lock);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 96577e8bf9f..b177ed31961 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
spin_unlock(&tree->buffer_lock);
goto free_eb;
}
- spin_unlock(&tree->buffer_lock);
-
/* add one reference for the tree */
atomic_inc(&eb->refs);
+ spin_unlock(&tree->buffer_lock);
return eb;
free_eb:
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 46bea0f4dc7..428fcac45f9 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -155,20 +155,6 @@ static struct rb_node *__tree_search(struct rb_root *root, u64 offset,
return NULL;
}
-/*
- * look for an offset in the tree, and if it can't be found, return
- * the first offset we can find smaller than 'offset'.
- */
-static inline struct rb_node *tree_search(struct rb_root *root, u64 offset)
-{
- struct rb_node *prev;
- struct rb_node *ret;
- ret = __tree_search(root, offset, &prev, NULL);
- if (!ret)
- return prev;
- return ret;
-}
-
/* check to see if two extent_map structs are adjacent and safe to merge */
static int mergable_maps(struct extent_map *prev, struct extent_map *next)
{
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77f759302e1..6ed434ac037 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
}
flags = em->flags;
if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
- if (em->start <= start &&
- (!testend || em->start + em->len >= start + len)) {
+ if (testend && em->start + em->len >= start + len) {
free_extent_map(em);
write_unlock(&em_tree->lock);
break;
}
- if (start < em->start) {
- len = em->start - start;
- } else {
+ start = em->start + em->len;
+ if (testend)
len = start + len - (em->start + em->len);
- start = em->start + em->len;
- }
free_extent_map(em);
write_unlock(&em_tree->lock);
continue;
@@ -265,324 +261,253 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
* If an extent intersects the range but is not entirely inside the range
* it is either truncated or split. Anything entirely inside the range
* is deleted from the tree.
- *
- * inline_limit is used to tell this code which offsets in the file to keep
- * if they contain inline extents.
*/
-noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
- struct btrfs_root *root, struct inode *inode,
- u64 start, u64 end, u64 locked_end,
- u64 inline_limit, u64 *hint_byte, int drop_cache)
+int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
+ u64 start, u64 end, u64 *hint_byte, int drop_cache)
{
- u64 extent_end = 0;
- u64 search_start = start;
- u64 ram_bytes = 0;
- u64 disk_bytenr = 0;
- u64 orig_locked_end = locked_end;
- u8 compression;
- u8 encryption;
- u16 other_encoding = 0;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf;
- struct btrfs_file_extent_item *extent;
+ struct btrfs_file_extent_item *fi;
struct btrfs_path *path;
struct btrfs_key key;
- struct btrfs_file_extent_item old;
- int keep;
- int slot;
- int bookend;
- int found_type = 0;
- int found_extent;
- int found_inline;
+ struct btrfs_key new_key;
+ u64 search_start = start;
+ u64 disk_bytenr = 0;
+ u64 num_bytes = 0;
+ u64 extent_offset = 0;
+ u64 extent_end = 0;
+ int del_nr = 0;
+ int del_slot = 0;
+ int extent_type;
int recow;
int ret;
- inline_limit = 0;
if (drop_cache)
btrfs_drop_extent_cache(inode, start, end - 1, 0);
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
+
while (1) {
recow = 0;
- btrfs_release_path(root, path);
ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
search_start, -1);
if (ret < 0)
- goto out;
- if (ret > 0) {
- if (path->slots[0] == 0) {
- ret = 0;
- goto out;
- }
- path->slots[0]--;
+ break;
+ if (ret > 0 && path->slots[0] > 0 && search_start == start) {
+ leaf = path->nodes[0];
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
+ if (key.objectid == inode->i_ino &&
+ key.type == BTRFS_EXTENT_DATA_KEY)
+ path->slots[0]--;
}
+ ret = 0;
next_slot:
- keep = 0;
- bookend = 0;
- found_extent = 0;
- found_inline = 0;
- compression = 0;
- encryption = 0;
- extent = NULL;
leaf = path->nodes[0];
- slot = path->slots[0];
- ret = 0;
- btrfs_item_key_to_cpu(leaf, &key, slot);
- if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
- key.offset >= end) {
- goto out;
- }
- if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
- key.objectid != inode->i_ino) {
- goto out;
- }
- if (recow) {
- search_start = max(key.offset, start);
- continue;
- }
- if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
- extent = btrfs_item_ptr(leaf, slot,
- struct btrfs_file_extent_item);
- found_type = btrfs_file_extent_type(leaf, extent);
- compression = btrfs_file_extent_compression(leaf,
- extent);
- encryption = btrfs_file_extent_encryption(leaf,
- extent);
- other_encoding = btrfs_file_extent_other_encoding(leaf,
- extent);
- if (found_type == BTRFS_FILE_EXTENT_REG ||
- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
- extent_end =
- btrfs_file_extent_disk_bytenr(leaf,
- extent);
- if (extent_end)
- *hint_byte = extent_end;
-
- extent_end = key.offset +
- btrfs_file_extent_num_bytes(leaf, extent);
- ram_bytes = btrfs_file_extent_ram_bytes(leaf,
- extent);
- found_extent = 1;
- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
- found_inline = 1;
- extent_end = key.offset +
- btrfs_file_extent_inline_len(leaf, extent);
+ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
+ BUG_ON(del_nr > 0);
+ ret = btrfs_next_leaf(root, path);
+ if (ret < 0)
+ break;
+ if (ret > 0) {
+ ret = 0;
+ break;
}
+ leaf = path->nodes[0];
+ recow = 1;
+ }
+
+ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
+ if (key.objectid > inode->i_ino ||
+ key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
+ break;
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ extent_type = btrfs_file_extent_type(leaf, fi);
+
+ if (extent_type == BTRFS_FILE_EXTENT_REG ||
+ extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
+ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ extent_offset = btrfs_file_extent_offset(leaf, fi);
+ extent_end = key.offset +
+ btrfs_file_extent_num_bytes(leaf, fi);
+ } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ extent_end = key.offset +
+ btrfs_file_extent_inline_len(leaf, fi);
} else {
+ WARN_ON(1);
extent_end = search_start;
}
- /* we found nothing we can drop */
- if ((!found_extent && !found_inline) ||
- search_start >= extent_end) {
- int nextret;
- u32 nritems;
- nritems = btrfs_header_nritems(leaf);
- if (slot >= nritems - 1) {
- nextret = btrfs_next_leaf(root, path);
- if (nextret)
- goto out;
- recow = 1;
- } else {
- path->slots[0]++;
- }
+ if (extent_end <= search_start) {
+ path->slots[0]++;
goto next_slot;
}
- if (end <= extent_end && start >= key.offset && found_inline)
- *hint_byte = EXTENT_MAP_INLINE;
-
- if (found_extent) {
- read_extent_buffer(leaf, &old, (unsigned long)extent,
- sizeof(old));
- }
-
- if (end < extent_end && end >= key.offset) {
- bookend = 1;
- if (found_inline && start <= key.offset)
- keep = 1;
+ search_start = max(key.offset, start);
+ if (recow) {
+ btrfs_release_path(root, path);
+ continue;
}
- if (bookend && found_extent) {
- if (locked_end < extent_end) {
- ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1,
- GFP_NOFS);
- if (!ret) {
- btrfs_release_path(root, path);
- lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1,
- GFP_NOFS);
- locked_end = extent_end;
- continue;
- }
- locked_end = extent_end;
+ /*
+ * | - range to drop - |
+ * | -------- extent -------- |
+ */
+ if (start > key.offset && end < extent_end) {
+ BUG_ON(del_nr > 0);
+ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
+
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = start;
+ ret = btrfs_duplicate_item(trans, root, path,
+ &new_key);
+ if (ret == -EAGAIN) {
+ btrfs_release_path(root, path);
+ continue;
}
- disk_bytenr = le64_to_cpu(old.disk_bytenr);
- if (disk_bytenr != 0) {
+ if (ret < 0)
+ break;
+
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ start - key.offset);
+
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ extent_offset += start - key.offset;
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - start);
+ btrfs_mark_buffer_dirty(leaf);
+
+ if (disk_bytenr > 0) {
ret = btrfs_inc_extent_ref(trans, root,
- disk_bytenr,
- le64_to_cpu(old.disk_num_bytes), 0,
- root->root_key.objectid,
- key.objectid, key.offset -
- le64_to_cpu(old.offset));
+ disk_bytenr, num_bytes, 0,
+ root->root_key.objectid,
+ new_key.objectid,
+ start - extent_offset);
BUG_ON(ret);
+ *hint_byte = disk_bytenr;
}
+ key.offset = start;
}
+ /*
+ * | ---- range to drop ----- |
+ * | -------- extent -------- |
+ */
+ if (start <= key.offset && end < extent_end) {
+ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
- if (found_inline) {
- u64 mask = root->sectorsize - 1;
- search_start = (extent_end + mask) & ~mask;
- } else
- search_start = extent_end;
-
- /* truncate existing extent */
- if (start > key.offset) {
- u64 new_num;
- u64 old_num;
- keep = 1;
- WARN_ON(start & (root->sectorsize - 1));
- if (found_extent) {
- new_num = start - key.offset;
- old_num = btrfs_file_extent_num_bytes(leaf,
- extent);
- *hint_byte =
- btrfs_file_extent_disk_bytenr(leaf,
- extent);
- if (btrfs_file_extent_disk_bytenr(leaf,
- extent)) {
- inode_sub_bytes(inode, old_num -
- new_num);
- }
- btrfs_set_file_extent_num_bytes(leaf,
- extent, new_num);
- btrfs_mark_buffer_dirty(leaf);
- } else if (key.offset < inline_limit &&
- (end > extent_end) &&
- (inline_limit < extent_end)) {
- u32 new_size;
- new_size = btrfs_file_extent_calc_inline_size(
- inline_limit - key.offset);
- inode_sub_bytes(inode, extent_end -
- inline_limit);
- btrfs_set_file_extent_ram_bytes(leaf, extent,
- new_size);
- if (!compression && !encryption) {
- btrfs_truncate_item(trans, root, path,
- new_size, 1);
- }
+ memcpy(&new_key, &key, sizeof(new_key));
+ new_key.offset = end;
+ btrfs_set_item_key_safe(trans, root, path, &new_key);
+
+ extent_offset += end - key.offset;
+ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - end);
+ btrfs_mark_buffer_dirty(leaf);
+ if (disk_bytenr > 0) {
+ inode_sub_bytes(inode, end - key.offset);
+ *hint_byte = disk_bytenr;
}
+ break;
}
- /* delete the entire extent */
- if (!keep) {
- if (found_inline)
- inode_sub_bytes(inode, extent_end -
- key.offset);
- ret = btrfs_del_item(trans, root, path);
- /* TODO update progress marker and return */
- BUG_ON(ret);
- extent = NULL;
- btrfs_release_path(root, path);
- /* the extent will be freed later */
- }
- if (bookend && found_inline && start <= key.offset) {
- u32 new_size;
- new_size = btrfs_file_extent_calc_inline_size(
- extent_end - end);
- inode_sub_bytes(inode, end - key.offset);
- btrfs_set_file_extent_ram_bytes(leaf, extent,
- new_size);
- if (!compression && !encryption)
- ret = btrfs_truncate_item(trans, root, path,
- new_size, 0);
- BUG_ON(ret);
- }
- /* create bookend, splitting the extent in two */
- if (bookend && found_extent) {
- struct btrfs_key ins;
- ins.objectid = inode->i_ino;
- ins.offset = end;
- btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
- btrfs_release_path(root, path);
- path->leave_spinning = 1;
- ret = btrfs_insert_empty_item(trans, root, path, &ins,
- sizeof(*extent));
- BUG_ON(ret);
+ search_start = extent_end;
+ /*
+ * | ---- range to drop ----- |
+ * | -------- extent -------- |
+ */
+ if (start > key.offset && end >= extent_end) {
+ BUG_ON(del_nr > 0);
+ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
- leaf = path->nodes[0];
- extent = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- write_extent_buffer(leaf, &old,
- (unsigned long)extent, sizeof(old));
-
- btrfs_set_file_extent_compression(leaf, extent,
- compression);
- btrfs_set_file_extent_encryption(leaf, extent,
- encryption);
- btrfs_set_file_extent_other_encoding(leaf, extent,
- other_encoding);
- btrfs_set_file_extent_offset(leaf, extent,
- le64_to_cpu(old.offset) + end - key.offset);
- WARN_ON(le64_to_cpu(old.num_bytes) <
- (extent_end - end));
- btrfs_set_file_extent_num_bytes(leaf, extent,
- extent_end - end);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ start - key.offset);
+ btrfs_mark_buffer_dirty(leaf);
+ if (disk_bytenr > 0) {
+ inode_sub_bytes(inode, extent_end - start);
+ *hint_byte = disk_bytenr;
+ }
+ if (end == extent_end)
+ break;
- /*
- * set the ram bytes to the size of the full extent
- * before splitting. This is a worst case flag,
- * but its the best we can do because we don't know
- * how splitting affects compression
- */
- btrfs_set_file_extent_ram_bytes(leaf, extent,
- ram_bytes);
- btrfs_set_file_extent_type(leaf, extent, found_type);
-
- btrfs_unlock_up_safe(path, 1);
- btrfs_mark_buffer_dirty(path->nodes[0]);
- btrfs_set_lock_blocking(path->nodes[0]);
-
- path->leave_spinning = 0;
- btrfs_release_path(root, path);
- if (disk_bytenr != 0)
- inode_add_bytes(inode, extent_end - end);
+ path->slots[0]++;
+ goto next_slot;
}
- if (found_extent && !keep) {
- u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr);
+ /*
+ * | ---- range to drop ----- |
+ * | ------ extent ------ |
+ */
+ if (start <= key.offset && end >= extent_end) {
+ if (del_nr == 0) {
+ del_slot = path->slots[0];
+ del_nr = 1;
+ } else {
+ BUG_ON(del_slot + del_nr != path->slots[0]);
+ del_nr++;
+ }
- if (old_disk_bytenr != 0) {
+ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
inode_sub_bytes(inode,
- le64_to_cpu(old.num_bytes));
+ extent_end - key.offset);
+ extent_end = ALIGN(extent_end,
+ root->sectorsize);
+ } else if (disk_bytenr > 0) {
ret = btrfs_free_extent(trans, root,
- old_disk_bytenr,
- le64_to_cpu(old.disk_num_bytes),
- 0, root->root_key.objectid,
+ disk_bytenr, num_bytes, 0,
+ root->root_key.objectid,
key.objectid, key.offset -
- le64_to_cpu(old.offset));
+ extent_offset);
BUG_ON(ret);
- *hint_byte = old_disk_bytenr;
+ inode_sub_bytes(inode,
+ extent_end - key.offset);
+ *hint_byte = disk_bytenr;
}
- }
- if (search_start >= end) {
- ret = 0;
- goto out;
+ if (end == extent_end)
+ break;
+
+ if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
+ path->slots[0]++;
+ goto next_slot;
+ }
+
+ ret = btrfs_del_items(trans, root, path, del_slot,
+ del_nr);
+ BUG_ON(ret);
+
+ del_nr = 0;
+ del_slot = 0;
+
+ btrfs_release_path(root, path);
+ continue;
}
+
+ BUG_ON(1);
}
-out:
- btrfs_free_path(path);
- if (locked_end > orig_locked_end) {
- unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
- locked_end - 1, GFP_NOFS);
+
+ if (del_nr > 0) {
+ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+ BUG_ON(ret);
}
+
+ btrfs_free_path(path);
return ret;
}
static int extent_mergeable(struct extent_buffer *leaf, int slot,
- u64 objectid, u64 bytenr, u64 *start, u64 *end)
+ u64 objectid, u64 bytenr, u64 orig_offset,
+ u64 *start, u64 *end)
{
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
@@ -598,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
+ btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
btrfs_file_extent_compression(leaf, fi) ||
btrfs_file_extent_encryption(leaf, fi) ||
btrfs_file_extent_other_encoding(leaf, fi))
@@ -620,23 +546,24 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
* two or three.
*/
int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
struct inode *inode, u64 start, u64 end)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_buffer *leaf;
struct btrfs_path *path;
struct btrfs_file_extent_item *fi;
struct btrfs_key key;
+ struct btrfs_key new_key;
u64 bytenr;
u64 num_bytes;
u64 extent_end;
u64 orig_offset;
u64 other_start;
u64 other_end;
- u64 split = start;
- u64 locked_end = end;
- int extent_type;
- int split_end = 1;
+ u64 split;
+ int del_nr = 0;
+ int del_slot = 0;
+ int recow;
int ret;
btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -644,12 +571,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
path = btrfs_alloc_path();
BUG_ON(!path);
again:
+ recow = 0;
+ split = start;
key.objectid = inode->i_ino;
key.type = BTRFS_EXTENT_DATA_KEY;
- if (split == start)
- key.offset = split;
- else
- key.offset = split - 1;
+ key.offset = split;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
if (ret > 0 && path->slots[0] > 0)
@@ -661,159 +587,158 @@ again:
key.type != BTRFS_EXTENT_DATA_KEY);
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- extent_type = btrfs_file_extent_type(leaf, fi);
- BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
+ BUG_ON(btrfs_file_extent_type(leaf, fi) !=
+ BTRFS_FILE_EXTENT_PREALLOC);
extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
BUG_ON(key.offset > start || extent_end < end);
bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
+ memcpy(&new_key, &key, sizeof(new_key));
- if (key.offset == start)
- split = end;
-
- if (key.offset == start && extent_end == end) {
- int del_nr = 0;
- int del_slot = 0;
- other_start = end;
- other_end = 0;
- if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
- bytenr, &other_start, &other_end)) {
- extent_end = other_end;
- del_slot = path->slots[0] + 1;
- del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- inode->i_ino, orig_offset);
- BUG_ON(ret);
- }
+ if (start == key.offset && end < extent_end) {
other_start = 0;
other_end = start;
- if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
- bytenr, &other_start, &other_end)) {
- key.offset = other_start;
- del_slot = path->slots[0];
- del_nr++;
- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
- 0, root->root_key.objectid,
- inode->i_ino, orig_offset);
- BUG_ON(ret);
- }
- split_end = 0;
- if (del_nr == 0) {
- btrfs_set_file_extent_type(leaf, fi,
- BTRFS_FILE_EXTENT_REG);
- goto done;
- }
-
- fi = btrfs_item_ptr(leaf, del_slot - 1,
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
- btrfs_set_file_extent_num_bytes(leaf, fi,
- extent_end - key.offset);
- btrfs_mark_buffer_dirty(leaf);
-
- ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
- BUG_ON(ret);
- goto release;
- } else if (split == start) {
- if (locked_end < extent_end) {
- ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1, GFP_NOFS);
- if (!ret) {
- btrfs_release_path(root, path);
- lock_extent(&BTRFS_I(inode)->io_tree,
- locked_end, extent_end - 1, GFP_NOFS);
- locked_end = extent_end;
- goto again;
- }
- locked_end = extent_end;
+ if (extent_mergeable(leaf, path->slots[0] - 1,
+ inode->i_ino, bytenr, orig_offset,
+ &other_start, &other_end)) {
+ new_key.offset = end;
+ btrfs_set_item_key_safe(trans, root, path, &new_key);
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - end);
+ btrfs_set_file_extent_offset(leaf, fi,
+ end - orig_offset);
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ end - other_start);
+ btrfs_mark_buffer_dirty(leaf);
+ goto out;
}
- btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
- } else {
- BUG_ON(key.offset != start);
- key.offset = split;
- btrfs_set_file_extent_offset(leaf, fi, key.offset -
- orig_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
- btrfs_set_item_key_safe(trans, root, path, &key);
- extent_end = split;
}
- if (extent_end == end) {
- split_end = 0;
- extent_type = BTRFS_FILE_EXTENT_REG;
- }
- if (extent_end == end && split == start) {
+ if (start > key.offset && end == extent_end) {
other_start = end;
other_end = 0;
- if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
- bytenr, &other_start, &other_end)) {
- path->slots[0]++;
+ if (extent_mergeable(leaf, path->slots[0] + 1,
+ inode->i_ino, bytenr, orig_offset,
+ &other_start, &other_end)) {
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- key.offset = split;
- btrfs_set_item_key_safe(trans, root, path, &key);
- btrfs_set_file_extent_offset(leaf, fi, key.offset -
- orig_offset);
btrfs_set_file_extent_num_bytes(leaf, fi,
- other_end - split);
- goto done;
- }
- }
- if (extent_end == end && split == end) {
- other_start = 0;
- other_end = start;
- if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
- bytenr, &other_start, &other_end)) {
- path->slots[0]--;
+ start - key.offset);
+ path->slots[0]++;
+ new_key.offset = start;
+ btrfs_set_item_key_safe(trans, root, path, &new_key);
+
fi = btrfs_item_ptr(leaf, path->slots[0],
struct btrfs_file_extent_item);
- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
- other_start);
- goto done;
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ other_end - start);
+ btrfs_set_file_extent_offset(leaf, fi,
+ start - orig_offset);
+ btrfs_mark_buffer_dirty(leaf);
+ goto out;
}
}
- btrfs_mark_buffer_dirty(leaf);
+ while (start > key.offset || end < extent_end) {
+ if (key.offset == start)
+ split = end;
- ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
- root->root_key.objectid,
- inode->i_ino, orig_offset);
- BUG_ON(ret);
- btrfs_release_path(root, path);
+ new_key.offset = split;
+ ret = btrfs_duplicate_item(trans, root, path, &new_key);
+ if (ret == -EAGAIN) {
+ btrfs_release_path(root, path);
+ goto again;
+ }
+ BUG_ON(ret < 0);
- key.offset = start;
- ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
- BUG_ON(ret);
+ leaf = path->nodes[0];
+ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ split - key.offset);
- leaf = path->nodes[0];
- fi = btrfs_item_ptr(leaf, path->slots[0],
- struct btrfs_file_extent_item);
- btrfs_set_file_extent_generation(leaf, fi, trans->transid);
- btrfs_set_file_extent_type(leaf, fi, extent_type);
- btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
- btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
- btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
- btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
- btrfs_set_file_extent_compression(leaf, fi, 0);
- btrfs_set_file_extent_encryption(leaf, fi, 0);
- btrfs_set_file_extent_other_encoding(leaf, fi, 0);
-done:
- btrfs_mark_buffer_dirty(leaf);
-
-release:
- btrfs_release_path(root, path);
- if (split_end && split == start) {
- split = end;
- goto again;
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+
+ btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - split);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
+ root->root_key.objectid,
+ inode->i_ino, orig_offset);
+ BUG_ON(ret);
+
+ if (split == start) {
+ key.offset = start;
+ } else {
+ BUG_ON(start != key.offset);
+ path->slots[0]--;
+ extent_end = end;
+ }
+ recow = 1;
+ }
+
+ other_start = end;
+ other_end = 0;
+ if (extent_mergeable(leaf, path->slots[0] + 1,
+ inode->i_ino, bytenr, orig_offset,
+ &other_start, &other_end)) {
+ if (recow) {
+ btrfs_release_path(root, path);
+ goto again;
+ }
+ extent_end = other_end;
+ del_slot = path->slots[0] + 1;
+ del_nr++;
+ ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+ 0, root->root_key.objectid,
+ inode->i_ino, orig_offset);
+ BUG_ON(ret);
+ }
+ other_start = 0;
+ other_end = start;
+ if (extent_mergeable(leaf, path->slots[0] - 1,
+ inode->i_ino, bytenr, orig_offset,
+ &other_start, &other_end)) {
+ if (recow) {
+ btrfs_release_path(root, path);
+ goto again;
+ }
+ key.offset = other_start;
+ del_slot = path->slots[0];
+ del_nr++;
+ ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+ 0, root->root_key.objectid,
+ inode->i_ino, orig_offset);
+ BUG_ON(ret);
}
- if (locked_end > end) {
- unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
- GFP_NOFS);
+ if (del_nr == 0) {
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_type(leaf, fi,
+ BTRFS_FILE_EXTENT_REG);
+ btrfs_mark_buffer_dirty(leaf);
+ } else {
+ fi = btrfs_item_ptr(leaf, del_slot - 1,
+ struct btrfs_file_extent_item);
+ btrfs_set_file_extent_type(leaf, fi,
+ BTRFS_FILE_EXTENT_REG);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+ extent_end - key.offset);
+ btrfs_mark_buffer_dirty(leaf);
+
+ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+ BUG_ON(ret);
}
+out:
btrfs_free_path(path);
return 0;
}
@@ -1210,7 +1135,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
}
mutex_lock(&dentry->d_inode->i_mutex);
out:
- return ret > 0 ? EIO : ret;
+ return ret > 0 ? -EIO : ret;
}
static const struct vm_operations_struct btrfs_file_vm_ops = {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b3ad168a0bf..4deb280f896 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
u64 start, u64 end, int *page_started,
unsigned long *nr_written, int unlock);
-static int btrfs_init_inode_security(struct inode *inode, struct inode *dir)
+static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
int err;
- err = btrfs_init_acl(inode, dir);
+ err = btrfs_init_acl(trans, inode, dir);
if (!err)
- err = btrfs_xattr_security_init(inode, dir);
+ err = btrfs_xattr_security_init(trans, inode, dir);
return err;
}
@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
btrfs_mark_buffer_dirty(leaf);
btrfs_free_path(path);
+ /*
+ * we're an inline extent, so nobody can
+ * extend the file past i_size without locking
+ * a page we already have locked.
+ *
+ * We must do any isize and inode updates
+ * before we unlock the pages. Otherwise we
+ * could end up racing with unlink.
+ */
BTRFS_I(inode)->disk_i_size = inode->i_size;
btrfs_update_inode(trans, root, inode);
+
return 0;
fail:
btrfs_free_path(path);
@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
return 1;
}
- ret = btrfs_drop_extents(trans, root, inode, start,
- aligned_end, aligned_end, start,
+ ret = btrfs_drop_extents(trans, inode, start, aligned_end,
&hint_byte, 1);
BUG_ON(ret);
@@ -416,7 +426,6 @@ again:
start, end,
total_compressed, pages);
}
- btrfs_end_transaction(trans, root);
if (ret == 0) {
/*
* inline extent creation worked, we don't need
@@ -430,9 +439,11 @@ again:
EXTENT_CLEAR_DELALLOC |
EXTENT_CLEAR_ACCOUNTING |
EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
- ret = 0;
+
+ btrfs_end_transaction(trans, root);
goto free_pages_out;
}
+ btrfs_end_transaction(trans, root);
}
if (will_compress) {
@@ -472,7 +483,8 @@ again:
nr_pages_ret = 0;
/* flag the file so we don't compress in the future */
- BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
+ if (!btrfs_test_opt(root, FORCE_COMPRESS))
+ BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS;
}
if (will_compress) {
*num_added += 1;
@@ -543,7 +555,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
if (list_empty(&async_cow->extents))
return 0;
- trans = btrfs_join_transaction(root, 1);
while (!list_empty(&async_cow->extents)) {
async_extent = list_entry(async_cow->extents.next,
@@ -590,19 +601,15 @@ retry:
lock_extent(io_tree, async_extent->start,
async_extent->start + async_extent->ram_size - 1,
GFP_NOFS);
- /*
- * here we're doing allocation and writeback of the
- * compressed pages
- */
- btrfs_drop_extent_cache(inode, async_extent->start,
- async_extent->start +
- async_extent->ram_size - 1, 0);
+ trans = btrfs_join_transaction(root, 1);
ret = btrfs_reserve_extent(trans, root,
async_extent->compressed_size,
async_extent->compressed_size,
0, alloc_hint,
(u64)-1, &ins, 1);
+ btrfs_end_transaction(trans, root);
+
if (ret) {
int i;
for (i = 0; i < async_extent->nr_pages; i++) {
@@ -618,6 +625,14 @@ retry:
goto retry;
}
+ /*
+ * here we're doing allocation and writeback of the
+ * compressed pages
+ */
+ btrfs_drop_extent_cache(inode, async_extent->start,
+ async_extent->start +
+ async_extent->ram_size - 1, 0);
+
em = alloc_extent_map(GFP_NOFS);
em->start = async_extent->start;
em->len = async_extent->ram_size;
@@ -649,8 +664,6 @@ retry:
BTRFS_ORDERED_COMPRESSED);
BUG_ON(ret);
- btrfs_end_transaction(trans, root);
-
/*
* clear dirty, set writeback and unlock the pages.
*/
@@ -672,13 +685,11 @@ retry:
async_extent->nr_pages);
BUG_ON(ret);
- trans = btrfs_join_transaction(root, 1);
alloc_hint = ins.objectid + ins.offset;
kfree(async_extent);
cond_resched();
}
- btrfs_end_transaction(trans, root);
return 0;
}
@@ -742,6 +753,7 @@ static noinline int cow_file_range(struct inode *inode,
EXTENT_CLEAR_DIRTY |
EXTENT_SET_WRITEBACK |
EXTENT_END_WRITEBACK);
+
*nr_written = *nr_written +
(end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
*page_started = 1;
@@ -1596,7 +1608,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
struct inode *inode, u64 file_pos,
u64 disk_bytenr, u64 disk_num_bytes,
u64 num_bytes, u64 ram_bytes,
- u64 locked_end,
u8 compression, u8 encryption,
u16 other_encoding, int extent_type)
{
@@ -1622,9 +1633,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* the caller is expected to unpin it and allow it to be merged
* with the others.
*/
- ret = btrfs_drop_extents(trans, root, inode, file_pos,
- file_pos + num_bytes, locked_end,
- file_pos, &hint, 0);
+ ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
+ &hint, 0);
BUG_ON(ret);
ins.objectid = inode->i_ino;
@@ -1671,24 +1681,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
* before we start the transaction. It limits the amount of btree
* reads required while inside the transaction.
*/
-static noinline void reada_csum(struct btrfs_root *root,
- struct btrfs_path *path,
- struct btrfs_ordered_extent *ordered_extent)
-{
- struct btrfs_ordered_sum *sum;
- u64 bytenr;
-
- sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
- list);
- bytenr = sum->sums[0].bytenr;
-
- /*
- * we don't care about the results, the point of this search is
- * just to get the btree leaves into ram
- */
- btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
-}
-
/* as ordered data IO finishes, this gets called so we can finish
* an ordered extent if the range of bytes in the file it covers are
* fully written.
@@ -1699,7 +1691,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
struct btrfs_trans_handle *trans;
struct btrfs_ordered_extent *ordered_extent = NULL;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
- struct btrfs_path *path;
int compressed = 0;
int ret;
@@ -1707,46 +1698,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
if (!ret)
return 0;
- /*
- * before we join the transaction, try to do some of our IO.
- * This will limit the amount of IO that we have to do with
- * the transaction running. We're unlikely to need to do any
- * IO if the file extents are new, the disk_i_size checks
- * covers the most common case.
- */
- if (start < BTRFS_I(inode)->disk_i_size) {
- path = btrfs_alloc_path();
- if (path) {
- ret = btrfs_lookup_file_extent(NULL, root, path,
- inode->i_ino,
- start, 0);
- ordered_extent = btrfs_lookup_ordered_extent(inode,
- start);
- if (!list_empty(&ordered_extent->list)) {
- btrfs_release_path(root, path);
- reada_csum(root, path, ordered_extent);
- }
- btrfs_free_path(path);
+ ordered_extent = btrfs_lookup_ordered_extent(inode, start);
+ BUG_ON(!ordered_extent);
+
+ if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
+ BUG_ON(!list_empty(&ordered_extent->list));
+ ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ if (!ret) {
+ trans = btrfs_join_transaction(root, 1);
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+ btrfs_end_transaction(trans, root);
}
+ goto out;
}
- trans = btrfs_join_transaction(root, 1);
-
- if (!ordered_extent)
- ordered_extent = btrfs_lookup_ordered_extent(inode, start);
- BUG_ON(!ordered_extent);
- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
- goto nocow;
-
lock_extent(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
GFP_NOFS);
+ trans = btrfs_join_transaction(root, 1);
+
if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
compressed = 1;
if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
BUG_ON(compressed);
- ret = btrfs_mark_extent_written(trans, root, inode,
+ ret = btrfs_mark_extent_written(trans, inode,
ordered_extent->file_offset,
ordered_extent->file_offset +
ordered_extent->len);
@@ -1758,8 +1735,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
ordered_extent->disk_len,
ordered_extent->len,
ordered_extent->len,
- ordered_extent->file_offset +
- ordered_extent->len,
compressed, 0, 0,
BTRFS_FILE_EXTENT_REG);
unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
@@ -1770,22 +1745,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
unlock_extent(io_tree, ordered_extent->file_offset,
ordered_extent->file_offset + ordered_extent->len - 1,
GFP_NOFS);
-nocow:
add_pending_csums(trans, inode, ordered_extent->file_offset,
&ordered_extent->list);
- mutex_lock(&BTRFS_I(inode)->extent_mutex);
- btrfs_ordered_update_i_size(inode, ordered_extent);
- btrfs_update_inode(trans, root, inode);
- btrfs_remove_ordered_extent(inode, ordered_extent);
- mutex_unlock(&BTRFS_I(inode)->extent_mutex);
-
+ /* this also removes the ordered extent from the tree */
+ btrfs_ordered_update_i_size(inode, 0, ordered_extent);
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+ btrfs_end_transaction(trans, root);
+out:
/* once for us */
btrfs_put_ordered_extent(ordered_extent);
/* once for the tree */
btrfs_put_ordered_extent(ordered_extent);
- btrfs_end_transaction(trans, root);
return 0;
}
@@ -2008,6 +1981,54 @@ zeroit:
return -EIO;
}
+struct delayed_iput {
+ struct list_head list;
+ struct inode *inode;
+};
+
+void btrfs_add_delayed_iput(struct inode *inode)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ struct delayed_iput *delayed;
+
+ if (atomic_add_unless(&inode->i_count, -1, 1))
+ return;
+
+ delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
+ delayed->inode = inode;
+
+ spin_lock(&fs_info->delayed_iput_lock);
+ list_add_tail(&delayed->list, &fs_info->delayed_iputs);
+ spin_unlock(&fs_info->delayed_iput_lock);
+}
+
+void btrfs_run_delayed_iputs(struct btrfs_root *root)
+{
+ LIST_HEAD(list);
+ struct btrfs_fs_info *fs_info = root->fs_info;
+ struct delayed_iput *delayed;
+ int empty;
+
+ spin_lock(&fs_info->delayed_iput_lock);
+ empty = list_empty(&fs_info->delayed_iputs);
+ spin_unlock(&fs_info->delayed_iput_lock);
+ if (empty)
+ return;
+
+ down_read(&root->fs_info->cleanup_work_sem);
+ spin_lock(&fs_info->delayed_iput_lock);
+ list_splice_init(&fs_info->delayed_iputs, &list);
+ spin_unlock(&fs_info->delayed_iput_lock);
+
+ while (!list_empty(&list)) {
+ delayed = list_entry(list.next, struct delayed_iput, list);
+ list_del(&delayed->list);
+ iput(delayed->inode);
+ kfree(delayed);
+ }
+ up_read(&root->fs_info->cleanup_work_sem);
+}
+
/*
* This creates an orphan entry for the given inode in case something goes
* wrong in the middle of an unlink/truncate.
@@ -2080,16 +2101,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
struct inode *inode;
int ret = 0, nr_unlink = 0, nr_truncate = 0;
- path = btrfs_alloc_path();
- if (!path)
+ if (!xchg(&root->clean_orphans, 0))
return;
+
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
path->reada = -1;
key.objectid = BTRFS_ORPHAN_OBJECTID;
btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
key.offset = (u64)-1;
-
while (1) {
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
if (ret < 0) {
@@ -2834,37 +2856,40 @@ out:
* min_type is the minimum key type to truncate down to. If set to 0, this
* will kill all the items on this inode, including the INODE_ITEM_KEY.
*/
-noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
- struct btrfs_root *root,
- struct inode *inode,
- u64 new_size, u32 min_type)
+int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode,
+ u64 new_size, u32 min_type)
{
- int ret;
struct btrfs_path *path;
- struct btrfs_key key;
- struct btrfs_key found_key;
- u32 found_type = (u8)-1;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+ struct btrfs_key found_key;
u64 extent_start = 0;
u64 extent_num_bytes = 0;
u64 extent_offset = 0;
u64 item_end = 0;
+ u64 mask = root->sectorsize - 1;
+ u32 found_type = (u8)-1;
int found_extent;
int del_item;
int pending_del_nr = 0;
int pending_del_slot = 0;
int extent_type = -1;
int encoding;
- u64 mask = root->sectorsize - 1;
+ int ret;
+ int err = 0;
+
+ BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
if (root->ref_cows)
btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
+
path = btrfs_alloc_path();
BUG_ON(!path);
path->reada = -1;
- /* FIXME, add redo link to tree so we don't leak on crash */
key.objectid = inode->i_ino;
key.offset = (u64)-1;
key.type = (u8)-1;
@@ -2872,17 +2897,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
search_again:
path->leave_spinning = 1;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
- if (ret < 0)
- goto error;
+ if (ret < 0) {
+ err = ret;
+ goto out;
+ }
if (ret > 0) {
/* there are no items in the tree for us to truncate, we're
* done
*/
- if (path->slots[0] == 0) {
- ret = 0;
- goto error;
- }
+ if (path->slots[0] == 0)
+ goto out;
path->slots[0]--;
}
@@ -2917,28 +2942,17 @@ search_again:
}
item_end--;
}
- if (item_end < new_size) {
- if (found_type == BTRFS_DIR_ITEM_KEY)
- found_type = BTRFS_INODE_ITEM_KEY;
- else if (found_type == BTRFS_EXTENT_ITEM_KEY)
- found_type = BTRFS_EXTENT_DATA_KEY;
- else if (found_type == BTRFS_EXTENT_DATA_KEY)
- found_type = BTRFS_XATTR_ITEM_KEY;
- else if (found_type == BTRFS_XATTR_ITEM_KEY)
- found_type = BTRFS_INODE_REF_KEY;
- else if (found_type)
- found_type--;
- else
+ if (found_type > min_type) {
+ del_item = 1;
+ } else {
+ if (item_end < new_size)
break;
- btrfs_set_key_type(&key, found_type);
- goto next;
+ if (found_key.offset >= new_size)
+ del_item = 1;
+ else
+ del_item = 0;
}
- if (found_key.offset >= new_size)
- del_item = 1;
- else
- del_item = 0;
found_extent = 0;
-
/* FIXME, shrink the extent if the ref count is only 1 */
if (found_type != BTRFS_EXTENT_DATA_KEY)
goto delete;
@@ -3025,42 +3039,36 @@ delete:
inode->i_ino, extent_offset);
BUG_ON(ret);
}
-next:
- if (path->slots[0] == 0) {
- if (pending_del_nr)
- goto del_pending;
- btrfs_release_path(root, path);
- if (found_type == BTRFS_INODE_ITEM_KEY)
- break;
- goto search_again;
- }
- path->slots[0]--;
- if (pending_del_nr &&
- path->slots[0] + 1 != pending_del_slot) {
- struct btrfs_key debug;
-del_pending:
- btrfs_item_key_to_cpu(path->nodes[0], &debug,
- pending_del_slot);
- ret = btrfs_del_items(trans, root, path,
- pending_del_slot,
- pending_del_nr);
- BUG_ON(ret);
- pending_del_nr = 0;
+ if (found_type == BTRFS_INODE_ITEM_KEY)
+ break;
+
+ if (path->slots[0] == 0 ||
+ path->slots[0] != pending_del_slot) {
+ if (root->ref_cows) {
+ err = -EAGAIN;
+ goto out;
+ }
+ if (pending_del_nr) {
+ ret = btrfs_del_items(trans, root, path,
+ pending_del_slot,
+ pending_del_nr);
+ BUG_ON(ret);
+ pending_del_nr = 0;
+ }
btrfs_release_path(root, path);
- if (found_type == BTRFS_INODE_ITEM_KEY)
- break;
goto search_again;
+ } else {
+ path->slots[0]--;
}
}
- ret = 0;
-error:
+out:
if (pending_del_nr) {
ret = btrfs_del_items(trans, root, path, pending_del_slot,
pending_del_nr);
}
btrfs_free_path(path);
- return ret;
+ return err;
}
/*
@@ -3180,10 +3188,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
if (size <= hole_start)
return 0;
- err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
- if (err)
- return err;
-
while (1) {
struct btrfs_ordered_extent *ordered;
btrfs_wait_ordered_range(inode, hole_start,
@@ -3196,9 +3200,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
btrfs_put_ordered_extent(ordered);
}
- trans = btrfs_start_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
-
cur_offset = hole_start;
while (1) {
em = btrfs_get_extent(inode, NULL, 0, cur_offset,
@@ -3206,40 +3207,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
BUG_ON(IS_ERR(em) || !em);
last_byte = min(extent_map_end(em), block_end);
last_byte = (last_byte + mask) & ~mask;
- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
+ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
u64 hint_byte = 0;
hole_size = last_byte - cur_offset;
- err = btrfs_drop_extents(trans, root, inode,
- cur_offset,
- cur_offset + hole_size,
- block_end,
- cur_offset, &hint_byte, 1);
- if (err)
- break;
- err = btrfs_reserve_metadata_space(root, 1);
+ err = btrfs_reserve_metadata_space(root, 2);
if (err)
break;
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+
+ err = btrfs_drop_extents(trans, inode, cur_offset,
+ cur_offset + hole_size,
+ &hint_byte, 1);
+ BUG_ON(err);
+
err = btrfs_insert_file_extent(trans, root,
inode->i_ino, cur_offset, 0,
0, hole_size, 0, hole_size,
0, 0, 0);
+ BUG_ON(err);
+
btrfs_drop_extent_cache(inode, hole_start,
last_byte - 1, 0);
- btrfs_unreserve_metadata_space(root, 1);
+
+ btrfs_end_transaction(trans, root);
+ btrfs_unreserve_metadata_space(root, 2);
}
free_extent_map(em);
cur_offset = last_byte;
- if (err || cur_offset >= block_end)
+ if (cur_offset >= block_end)
break;
}
- btrfs_end_transaction(trans, root);
unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
return err;
}
+static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_trans_handle *trans;
+ unsigned long nr;
+ int ret;
+
+ if (attr->ia_size == inode->i_size)
+ return 0;
+
+ if (attr->ia_size > inode->i_size) {
+ unsigned long limit;
+ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+ if (attr->ia_size > inode->i_sb->s_maxbytes)
+ return -EFBIG;
+ if (limit != RLIM_INFINITY && attr->ia_size > limit) {
+ send_sig(SIGXFSZ, current, 0);
+ return -EFBIG;
+ }
+ }
+
+ ret = btrfs_reserve_metadata_space(root, 1);
+ if (ret)
+ return ret;
+
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+
+ ret = btrfs_orphan_add(trans, inode);
+ BUG_ON(ret);
+
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_unreserve_metadata_space(root, 1);
+ btrfs_btree_balance_dirty(root, nr);
+
+ if (attr->ia_size > inode->i_size) {
+ ret = btrfs_cont_expand(inode, attr->ia_size);
+ if (ret) {
+ btrfs_truncate(inode);
+ return ret;
+ }
+
+ i_size_write(inode, attr->ia_size);
+ btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+ if (inode->i_nlink > 0) {
+ ret = btrfs_orphan_del(trans, inode);
+ BUG_ON(ret);
+ }
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+ return 0;
+ }
+
+ /*
+ * We're truncating a file that used to have good data down to
+ * zero. Make sure it gets into the ordered flush list so that
+ * any new writes get down to disk quickly.
+ */
+ if (attr->ia_size == 0)
+ BTRFS_I(inode)->ordered_data_close = 1;
+
+ /* we don't support swapfiles, so vmtruncate shouldn't fail */
+ ret = vmtruncate(inode, attr->ia_size);
+ BUG_ON(ret);
+
+ return 0;
+}
+
static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
@@ -3250,23 +3331,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
return err;
if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
- if (attr->ia_size > inode->i_size) {
- err = btrfs_cont_expand(inode, attr->ia_size);
- if (err)
- return err;
- } else if (inode->i_size > 0 &&
- attr->ia_size == 0) {
-
- /* we're truncating a file that used to have good
- * data down to zero. Make sure it gets into
- * the ordered flush list so that any new writes
- * get down to disk quickly.
- */
- BTRFS_I(inode)->ordered_data_close = 1;
- }
+ err = btrfs_setattr_size(inode, attr);
+ if (err)
+ return err;
}
+ attr->ia_valid &= ~ATTR_SIZE;
- err = inode_setattr(inode, attr);
+ if (attr->ia_valid)
+ err = inode_setattr(inode, attr);
if (!err && ((attr->ia_valid & ATTR_MODE)))
err = btrfs_acl_chmod(inode);
@@ -3287,36 +3359,43 @@ void btrfs_delete_inode(struct inode *inode)
}
btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ if (root->fs_info->log_root_recovering) {
+ BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
+ goto no_delete;
+ }
+
if (inode->i_nlink > 0) {
BUG_ON(btrfs_root_refs(&root->root_item) != 0);
goto no_delete;
}
btrfs_i_size_write(inode, 0);
- trans = btrfs_join_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
- ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
- if (ret) {
- btrfs_orphan_del(NULL, inode);
- goto no_delete_lock;
- }
+ while (1) {
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
- btrfs_orphan_del(trans, inode);
+ if (ret != -EAGAIN)
+ break;
- nr = trans->blocks_used;
- clear_inode(inode);
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ trans = NULL;
+ btrfs_btree_balance_dirty(root, nr);
+ }
- btrfs_end_transaction(trans, root);
- btrfs_btree_balance_dirty(root, nr);
- return;
+ if (ret == 0) {
+ ret = btrfs_orphan_del(trans, inode);
+ BUG_ON(ret);
+ }
-no_delete_lock:
nr = trans->blocks_used;
btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty(root, nr);
no_delete:
clear_inode(inode);
+ return;
}
/*
@@ -3569,7 +3648,6 @@ static noinline void init_btrfs_i(struct inode *inode)
INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
- mutex_init(&BTRFS_I(inode)->extent_mutex);
mutex_init(&BTRFS_I(inode)->log_mutex);
}
@@ -3695,6 +3773,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
}
srcu_read_unlock(&root->fs_info->subvol_srcu, index);
+ if (root != sub_root) {
+ down_read(&root->fs_info->cleanup_work_sem);
+ if (!(inode->i_sb->s_flags & MS_RDONLY))
+ btrfs_orphan_cleanup(sub_root);
+ up_read(&root->fs_info->cleanup_work_sem);
+ }
+
return inode;
}
@@ -3869,7 +3954,11 @@ skip:
/* Reached end of directory/root. Bump pos past the last item. */
if (key_type == BTRFS_DIR_INDEX_KEY)
- filp->f_pos = INT_LIMIT(off_t);
+ /*
+ * 32-bit glibc will use getdents64, but then strtol -
+ * so the last number we can serve is this.
+ */
+ filp->f_pos = 0x7fffffff;
else
filp->f_pos++;
nopos:
@@ -4219,7 +4308,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4290,7 +4379,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -4336,6 +4425,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
if (inode->i_nlink == 0)
return -ENOENT;
+ /* do not allow sys_link's with other subvols of the same device */
+ if (root->objectid != BTRFS_I(inode)->root->objectid)
+ return -EPERM;
+
/*
* 1 item for inode ref
* 2 items for dir items
@@ -4423,7 +4516,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
drop_on_err = 1;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err)
goto out_fail;
@@ -5074,17 +5167,20 @@ static void btrfs_truncate(struct inode *inode)
unsigned long nr;
u64 mask = root->sectorsize - 1;
- if (!S_ISREG(inode->i_mode))
- return;
- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ if (!S_ISREG(inode->i_mode)) {
+ WARN_ON(1);
return;
+ }
ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
if (ret)
return;
+
btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
+ btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
/*
* setattr is responsible for setting the ordered_data_close flag,
@@ -5106,21 +5202,32 @@ static void btrfs_truncate(struct inode *inode)
if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
btrfs_add_ordered_operation(trans, root, inode);
- btrfs_set_trans_block_group(trans, inode);
- btrfs_i_size_write(inode, inode->i_size);
+ while (1) {
+ ret = btrfs_truncate_inode_items(trans, root, inode,
+ inode->i_size,
+ BTRFS_EXTENT_DATA_KEY);
+ if (ret != -EAGAIN)
+ break;
- ret = btrfs_orphan_add(trans, inode);
- if (ret)
- goto out;
- /* FIXME, add redo link to tree so we don't leak on crash */
- ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
- BTRFS_EXTENT_DATA_KEY);
- btrfs_update_inode(trans, root, inode);
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
- ret = btrfs_orphan_del(trans, inode);
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+
+ trans = btrfs_start_transaction(root, 1);
+ btrfs_set_trans_block_group(trans, inode);
+ }
+
+ if (ret == 0 && inode->i_nlink > 0) {
+ ret = btrfs_orphan_del(trans, inode);
+ BUG_ON(ret);
+ }
+
+ ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
-out:
nr = trans->blocks_used;
ret = btrfs_end_transaction_throttle(trans, root);
BUG_ON(ret);
@@ -5217,9 +5324,9 @@ void btrfs_destroy_inode(struct inode *inode)
spin_lock(&root->list_lock);
if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
- printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
- " list\n", inode->i_ino);
- dump_stack();
+ printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
+ inode->i_ino);
+ list_del_init(&BTRFS_I(inode)->i_orphan);
}
spin_unlock(&root->list_lock);
@@ -5476,7 +5583,7 @@ out_fail:
* some fairly slow code that needs optimization. This walks the list
* of all the inodes with pending delalloc and forces them to disk.
*/
-int btrfs_start_delalloc_inodes(struct btrfs_root *root)
+int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
{
struct list_head *head = &root->fs_info->delalloc_inodes;
struct btrfs_inode *binode;
@@ -5495,7 +5602,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
spin_unlock(&root->fs_info->delalloc_lock);
if (inode) {
filemap_flush(inode->i_mapping);
- iput(inode);
+ if (delay_iput)
+ btrfs_add_delayed_iput(inode);
+ else
+ iput(inode);
}
cond_resched();
spin_lock(&root->fs_info->delalloc_lock);
@@ -5569,7 +5679,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
goto out_unlock;
- err = btrfs_init_inode_security(inode, dir);
+ err = btrfs_init_inode_security(trans, inode, dir);
if (err) {
drop_inode = 1;
goto out_unlock;
@@ -5641,57 +5751,77 @@ out_fail:
return err;
}
-static int prealloc_file_range(struct btrfs_trans_handle *trans,
- struct inode *inode, u64 start, u64 end,
- u64 locked_end, u64 alloc_hint, int mode)
+static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
+ u64 alloc_hint, int mode, loff_t actual_len)
{
+ struct btrfs_trans_handle *trans;
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_key ins;
u64 alloc_size;
u64 cur_offset = start;
u64 num_bytes = end - start;
int ret = 0;
+ u64 i_size;
while (num_bytes > 0) {
alloc_size = min(num_bytes, root->fs_info->max_extent);
- ret = btrfs_reserve_metadata_space(root, 1);
- if (ret)
- goto out;
+ trans = btrfs_start_transaction(root, 1);
ret = btrfs_reserve_extent(trans, root, alloc_size,
root->sectorsize, 0, alloc_hint,
(u64)-1, &ins, 1);
if (ret) {
WARN_ON(1);
- goto out;
+ goto stop_trans;
}
+
+ ret = btrfs_reserve_metadata_space(root, 3);
+ if (ret) {
+ btrfs_free_reserved_extent(root, ins.objectid,
+ ins.offset);
+ goto stop_trans;
+ }
+
ret = insert_reserved_file_extent(trans, inode,
cur_offset, ins.objectid,
ins.offset, ins.offset,
- ins.offset, locked_end,
- 0, 0, 0,
+ ins.offset, 0, 0, 0,
BTRFS_FILE_EXTENT_PREALLOC);
BUG_ON(ret);
btrfs_drop_extent_cache(inode, cur_offset,
cur_offset + ins.offset -1, 0);
+
num_bytes -= ins.offset;
cur_offset += ins.offset;
alloc_hint = ins.objectid + ins.offset;
- btrfs_unreserve_metadata_space(root, 1);
- }
-out:
- if (cur_offset > start) {
+
inode->i_ctime = CURRENT_TIME;
BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
- cur_offset > i_size_read(inode))
- btrfs_i_size_write(inode, cur_offset);
+ (actual_len > inode->i_size) &&
+ (cur_offset > inode->i_size)) {
+
+ if (cur_offset > actual_len)
+ i_size = actual_len;
+ else
+ i_size = cur_offset;
+ i_size_write(inode, i_size);
+ btrfs_ordered_update_i_size(inode, i_size, NULL);
+ }
+
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
+
+ btrfs_end_transaction(trans, root);
+ btrfs_unreserve_metadata_space(root, 3);
}
+ return ret;
+stop_trans:
+ btrfs_end_transaction(trans, root);
return ret;
+
}
static long btrfs_fallocate(struct inode *inode, int mode,
@@ -5705,8 +5835,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
u64 locked_end;
u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
struct extent_map *em;
- struct btrfs_trans_handle *trans;
- struct btrfs_root *root;
int ret;
alloc_start = offset & ~mask;
@@ -5725,9 +5853,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
goto out;
}
- root = BTRFS_I(inode)->root;
-
- ret = btrfs_check_data_free_space(root, inode,
+ ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
alloc_end - alloc_start);
if (ret)
goto out;
@@ -5736,12 +5862,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
while (1) {
struct btrfs_ordered_extent *ordered;
- trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
- if (!trans) {
- ret = -EIO;
- goto out_free;
- }
-
/* the extent lock is ordered inside the running
* transaction
*/
@@ -5755,8 +5875,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
btrfs_put_ordered_extent(ordered);
unlock_extent(&BTRFS_I(inode)->io_tree,
alloc_start, locked_end, GFP_NOFS);
- btrfs_end_transaction(trans, BTRFS_I(inode)->root);
-
/*
* we can't wait on the range with the transaction
* running or with the extent lock held
@@ -5777,10 +5895,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
BUG_ON(IS_ERR(em) || !em);
last_byte = min(extent_map_end(em), alloc_end);
last_byte = (last_byte + mask) & ~mask;
- if (em->block_start == EXTENT_MAP_HOLE) {
- ret = prealloc_file_range(trans, inode, cur_offset,
- last_byte, locked_end + 1,
- alloc_hint, mode);
+ if (em->block_start == EXTENT_MAP_HOLE ||
+ (cur_offset >= inode->i_size &&
+ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+ ret = prealloc_file_range(inode,
+ cur_offset, last_byte,
+ alloc_hint, mode, offset+len);
if (ret < 0) {
free_extent_map(em);
break;
@@ -5799,9 +5919,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
GFP_NOFS);
- btrfs_end_transaction(trans, BTRFS_I(inode)->root);
-out_free:
- btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
+ btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
+ alloc_end - alloc_start);
out:
mutex_unlock(&inode->i_mutex);
return ret;
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index cdbb054102b..645a17927a8 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
u64 objectid;
u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
u64 index = 0;
- unsigned long nr = 1;
/*
* 1 - inode item
@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
btrfs_set_root_generation(&root_item, trans->transid);
btrfs_set_root_level(&root_item, 0);
btrfs_set_root_refs(&root_item, 1);
- btrfs_set_root_used(&root_item, 0);
+ btrfs_set_root_used(&root_item, leaf->len);
btrfs_set_root_last_snapshot(&root_item, 0);
memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
fail:
- nr = trans->blocks_used;
err = btrfs_commit_transaction(trans, root);
if (err && !ret)
ret = err;
btrfs_unreserve_metadata_space(root, 6);
- btrfs_btree_balance_dirty(root, nr);
return ret;
}
static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
char *name, int namelen)
{
+ struct inode *inode;
struct btrfs_pending_snapshot *pending_snapshot;
struct btrfs_trans_handle *trans;
- int ret = 0;
- int err;
- unsigned long nr = 0;
+ int ret;
if (!root->ref_cows)
return -EINVAL;
@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
*/
ret = btrfs_reserve_metadata_space(root, 6);
if (ret)
- goto fail_unlock;
+ goto fail;
pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
if (!pending_snapshot) {
ret = -ENOMEM;
btrfs_unreserve_metadata_space(root, 6);
- goto fail_unlock;
+ goto fail;
}
pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
if (!pending_snapshot->name) {
ret = -ENOMEM;
kfree(pending_snapshot);
btrfs_unreserve_metadata_space(root, 6);
- goto fail_unlock;
+ goto fail;
}
memcpy(pending_snapshot->name, name, namelen);
pending_snapshot->name[namelen] = '\0';
@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
pending_snapshot->root = root;
list_add(&pending_snapshot->list,
&trans->transaction->pending_snapshots);
- err = btrfs_commit_transaction(trans, root);
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+ btrfs_unreserve_metadata_space(root, 6);
-fail_unlock:
- btrfs_btree_balance_dirty(root, nr);
+ inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
+ if (IS_ERR(inode)) {
+ ret = PTR_ERR(inode);
+ goto fail;
+ }
+ BUG_ON(!inode);
+ d_instantiate(dentry, inode);
+ ret = 0;
+fail:
return ret;
}
@@ -1027,8 +1032,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
BUG_ON(!trans);
/* punch hole in destination first */
- btrfs_drop_extents(trans, root, inode, off, off + len,
- off + len, 0, &hint_byte, 1);
+ btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
/* clone data */
key.objectid = src->i_ino;
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 5799bc46a30..5c2a9e78a94 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
/*
* remove an ordered extent from the tree. No references are dropped
- * but, anyone waiting on this extent is woken up.
+ * and you must wake_up entry->wait. You must hold the tree mutex
+ * while you call this function.
*/
-int btrfs_remove_ordered_extent(struct inode *inode,
+static int __btrfs_remove_ordered_extent(struct inode *inode,
struct btrfs_ordered_extent *entry)
{
struct btrfs_ordered_inode_tree *tree;
struct rb_node *node;
tree = &BTRFS_I(inode)->ordered_tree;
- mutex_lock(&tree->mutex);
node = &entry->rb_node;
rb_erase(node, &tree->tree);
tree->last = NULL;
@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
}
spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
+ return 0;
+}
+
+/*
+ * remove an ordered extent from the tree. No references are dropped
+ * but any waiters are woken.
+ */
+int btrfs_remove_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry)
+{
+ struct btrfs_ordered_inode_tree *tree;
+ int ret;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+ mutex_lock(&tree->mutex);
+ ret = __btrfs_remove_ordered_extent(inode, entry);
mutex_unlock(&tree->mutex);
wake_up(&entry->wait);
- return 0;
+
+ return ret;
}
/*
* wait for all the ordered extents in a root. This is done when balancing
* space between drives.
*/
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
+int btrfs_wait_ordered_extents(struct btrfs_root *root,
+ int nocow_only, int delay_iput)
{
struct list_head splice;
struct list_head *cur;
@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
if (inode) {
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
- iput(inode);
+ if (delay_iput)
+ btrfs_add_delayed_iput(inode);
+ else
+ iput(inode);
} else {
btrfs_put_ordered_extent(ordered);
}
@@ -430,7 +451,7 @@ again:
btrfs_wait_ordered_range(inode, 0, (u64)-1);
else
filemap_flush(inode->i_mapping);
- iput(inode);
+ btrfs_add_delayed_iput(inode);
}
cond_resched();
@@ -589,7 +610,7 @@ out:
* After an extent is done, call this to conditionally update the on disk
* i_size. i_size is updated to cover any fully written part of the file.
*/
-int btrfs_ordered_update_i_size(struct inode *inode,
+int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered)
{
struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
@@ -597,18 +618,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
u64 disk_i_size;
u64 new_i_size;
u64 i_size_test;
+ u64 i_size = i_size_read(inode);
struct rb_node *node;
+ struct rb_node *prev = NULL;
struct btrfs_ordered_extent *test;
+ int ret = 1;
+
+ if (ordered)
+ offset = entry_end(ordered);
+ else
+ offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
mutex_lock(&tree->mutex);
disk_i_size = BTRFS_I(inode)->disk_i_size;
+ /* truncate file */
+ if (disk_i_size > i_size) {
+ BTRFS_I(inode)->disk_i_size = i_size;
+ ret = 0;
+ goto out;
+ }
+
/*
* if the disk i_size is already at the inode->i_size, or
* this ordered extent is inside the disk i_size, we're done
*/
- if (disk_i_size >= inode->i_size ||
- ordered->file_offset + ordered->len <= disk_i_size) {
+ if (disk_i_size == i_size || offset <= disk_i_size) {
goto out;
}
@@ -616,8 +651,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* we can't update the disk_isize if there are delalloc bytes
* between disk_i_size and this ordered extent
*/
- if (test_range_bit(io_tree, disk_i_size,
- ordered->file_offset + ordered->len - 1,
+ if (test_range_bit(io_tree, disk_i_size, offset - 1,
EXTENT_DELALLOC, 0, NULL)) {
goto out;
}
@@ -626,20 +660,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* if we find an ordered extent then we can't update disk i_size
* yet
*/
- node = &ordered->rb_node;
- while (1) {
- node = rb_prev(node);
- if (!node)
- break;
+ if (ordered) {
+ node = rb_prev(&ordered->rb_node);
+ } else {
+ prev = tree_search(tree, offset);
+ /*
+ * we insert file extents without involving ordered struct,
+ * so there should be no ordered struct cover this offset
+ */
+ if (prev) {
+ test = rb_entry(prev, struct btrfs_ordered_extent,
+ rb_node);
+ BUG_ON(offset_in_entry(test, offset));
+ }
+ node = prev;
+ }
+ while (node) {
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
if (test->file_offset + test->len <= disk_i_size)
break;
- if (test->file_offset >= inode->i_size)
+ if (test->file_offset >= i_size)
break;
if (test->file_offset >= disk_i_size)
goto out;
+ node = rb_prev(node);
}
- new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode));
+ new_i_size = min_t(u64, offset, i_size);
/*
* at this point, we know we can safely update i_size to at least
@@ -647,7 +693,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* walk forward and see if ios from higher up in the file have
* finished.
*/
- node = rb_next(&ordered->rb_node);
+ if (ordered) {
+ node = rb_next(&ordered->rb_node);
+ } else {
+ if (prev)
+ node = rb_next(prev);
+ else
+ node = rb_first(&tree->tree);
+ }
i_size_test = 0;
if (node) {
/*
@@ -655,10 +708,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* between our ordered extent and the next one.
*/
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- if (test->file_offset > entry_end(ordered))
+ if (test->file_offset > offset)
i_size_test = test->file_offset;
} else {
- i_size_test = i_size_read(inode);
+ i_size_test = i_size;
}
/*
@@ -667,15 +720,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
* are no delalloc bytes in this area, it is safe to update
* disk_i_size to the end of the region.
*/
- if (i_size_test > entry_end(ordered) &&
- !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
- EXTENT_DELALLOC, 0, NULL)) {
- new_i_size = min_t(u64, i_size_test, i_size_read(inode));
+ if (i_size_test > offset &&
+ !test_range_bit(io_tree, offset, i_size_test - 1,
+ EXTENT_DELALLOC, 0, NULL)) {
+ new_i_size = min_t(u64, i_size_test, i_size);
}
BTRFS_I(inode)->disk_i_size = new_i_size;
+ ret = 0;
out:
+ /*
+ * we need to remove the ordered extent with the tree lock held
+ * so that other people calling this function don't find our fully
+ * processed ordered entry and skip updating the i_size
+ */
+ if (ordered)
+ __btrfs_remove_ordered_extent(inode, ordered);
mutex_unlock(&tree->mutex);
- return 0;
+ if (ordered)
+ wake_up(&ordered->wait);
+ return ret;
}
/*
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index f82e87488ca..1fe1282ef47 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
struct btrfs_ordered_extent *
btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
-int btrfs_ordered_update_i_size(struct inode *inode,
+int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
struct btrfs_ordered_extent *ordered);
int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *inode);
+int btrfs_wait_ordered_extents(struct btrfs_root *root,
+ int nocow_only, int delay_iput);
#endif
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfcc93c93a7..ab7ab531874 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
return 0;
}
+static void put_inodes(struct list_head *list)
+{
+ struct inodevec *ivec;
+ while (!list_empty(list)) {
+ ivec = list_entry(list->next, struct inodevec, list);
+ list_del(&ivec->list);
+ while (ivec->nr > 0) {
+ ivec->nr--;
+ iput(ivec->inode[ivec->nr]);
+ }
+ kfree(ivec);
+ }
+}
+
static int find_next_key(struct btrfs_path *path, int level,
struct btrfs_key *key)
@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
btrfs_btree_balance_dirty(root, nr);
+ /*
+ * put inodes outside transaction, otherwise we may deadlock.
+ */
+ put_inodes(&inode_list);
+
if (replaced && rc->stage == UPDATE_DATA_PTRS)
invalidate_extent_cache(root, &key, &next_key);
}
@@ -1752,19 +1771,7 @@ out:
btrfs_btree_balance_dirty(root, nr);
- /*
- * put inodes while we aren't holding the tree locks
- */
- while (!list_empty(&inode_list)) {
- struct inodevec *ivec;
- ivec = list_entry(inode_list.next, struct inodevec, list);
- list_del(&ivec->list);
- while (ivec->nr > 0) {
- ivec->nr--;
- iput(ivec->inode[ivec->nr]);
- }
- kfree(ivec);
- }
+ put_inodes(&inode_list);
if (replaced && rc->stage == UPDATE_DATA_PTRS)
invalidate_extent_cache(root, &key, &next_key);
@@ -3274,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
return -ENOMEM;
path = btrfs_alloc_path();
- if (!path)
+ if (!path) {
+ kfree(cluster);
return -ENOMEM;
+ }
rc->extents_found = 0;
rc->extents_skipped = 0;
@@ -3534,8 +3543,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
(unsigned long long)rc->block_group->key.objectid,
(unsigned long long)rc->block_group->flags);
- btrfs_start_delalloc_inodes(fs_info->tree_root);
- btrfs_wait_ordered_extents(fs_info->tree_root, 0);
+ btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
+ btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
while (1) {
rc->extents_found = 0;
@@ -3755,6 +3764,8 @@ out:
BTRFS_DATA_RELOC_TREE_OBJECTID);
if (IS_ERR(fs_root))
err = PTR_ERR(fs_root);
+ else
+ btrfs_orphan_cleanup(fs_root);
}
return err;
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 752a5463bf5..8a1ea6e6457 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -66,7 +66,8 @@ enum {
Opt_degraded, Opt_subvol, Opt_device, Opt_nodatasum, Opt_nodatacow,
Opt_max_extent, Opt_max_inline, Opt_alloc_start, Opt_nobarrier,
Opt_ssd, Opt_nossd, Opt_ssd_spread, Opt_thread_pool, Opt_noacl,
- Opt_compress, Opt_notreelog, Opt_ratio, Opt_flushoncommit,
+ Opt_compress, Opt_compress_force, Opt_notreelog, Opt_ratio,
+ Opt_flushoncommit,
Opt_discard, Opt_err,
};
@@ -82,6 +83,7 @@ static match_table_t tokens = {
{Opt_alloc_start, "alloc_start=%s"},
{Opt_thread_pool, "thread_pool=%d"},
{Opt_compress, "compress"},
+ {Opt_compress_force, "compress-force"},
{Opt_ssd, "ssd"},
{Opt_ssd_spread, "ssd_spread"},
{Opt_nossd, "nossd"},
@@ -128,6 +130,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
substring_t args[MAX_OPT_ARGS];
char *p, *num;
int intarg;
+ int ret = 0;
if (!options)
return 0;
@@ -172,6 +175,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
printk(KERN_INFO "btrfs: use compression\n");
btrfs_set_opt(info->mount_opt, COMPRESS);
break;
+ case Opt_compress_force:
+ printk(KERN_INFO "btrfs: forcing compression\n");
+ btrfs_set_opt(info->mount_opt, FORCE_COMPRESS);
+ btrfs_set_opt(info->mount_opt, COMPRESS);
+ break;
case Opt_ssd:
printk(KERN_INFO "btrfs: use ssd allocation scheme\n");
btrfs_set_opt(info->mount_opt, SSD);
@@ -262,12 +270,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
case Opt_discard:
btrfs_set_opt(info->mount_opt, DISCARD);
break;
+ case Opt_err:
+ printk(KERN_INFO "btrfs: unrecognized mount option "
+ "'%s'\n", p);
+ ret = -EINVAL;
+ goto out;
default:
break;
}
}
+out:
kfree(options);
- return 0;
+ return ret;
}
/*
@@ -405,8 +419,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
return 0;
}
- btrfs_start_delalloc_inodes(root);
- btrfs_wait_ordered_extents(root, 0);
+ btrfs_start_delalloc_inodes(root, 0);
+ btrfs_wait_ordered_extents(root, 0, 0);
trans = btrfs_start_transaction(root, 1);
ret = btrfs_commit_transaction(trans, root);
@@ -450,6 +464,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
seq_puts(seq, ",notreelog");
if (btrfs_test_opt(root, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
+ if (btrfs_test_opt(root, DISCARD))
+ seq_puts(seq, ",discard");
if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
seq_puts(seq, ",noacl");
return 0;
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c207e8c32c9..b2acc79f1b3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
memset(trans, 0, sizeof(*trans));
kmem_cache_free(btrfs_trans_handle_cachep, trans);
+ if (throttle)
+ btrfs_run_delayed_iputs(root);
+
return 0;
}
@@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
* those extents are sent to disk but does not wait on them
*/
int btrfs_write_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages)
+ struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int err = 0;
@@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
while (1) {
ret = find_first_extent_bit(dirty_pages, start, &start, &end,
- EXTENT_DIRTY);
+ mark);
if (ret)
break;
while (start <= end) {
@@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
* on all the pages and clear them from the dirty pages state tree
*/
int btrfs_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages)
+ struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int err = 0;
@@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
unsigned long index;
while (1) {
- ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
- EXTENT_DIRTY);
+ ret = find_first_extent_bit(dirty_pages, start, &start, &end,
+ mark);
if (ret)
break;
- clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
+ clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
while (start <= end) {
index = start >> PAGE_CACHE_SHIFT;
start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
@@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
* those extents are on disk for transaction or log commit
*/
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages)
+ struct extent_io_tree *dirty_pages, int mark)
{
int ret;
int ret2;
- ret = btrfs_write_marked_extents(root, dirty_pages);
- ret2 = btrfs_wait_marked_extents(root, dirty_pages);
+ ret = btrfs_write_marked_extents(root, dirty_pages, mark);
+ ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
return ret || ret2;
}
@@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
return filemap_write_and_wait(btree_inode->i_mapping);
}
return btrfs_write_and_wait_marked_extents(root,
- &trans->transaction->dirty_pages);
+ &trans->transaction->dirty_pages,
+ EXTENT_DIRTY);
}
/*
@@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
{
int ret;
u64 old_root_bytenr;
+ u64 old_root_used;
struct btrfs_root *tree_root = root->fs_info->tree_root;
+ old_root_used = btrfs_root_used(&root->root_item);
btrfs_write_dirty_block_groups(trans, root);
while (1) {
old_root_bytenr = btrfs_root_bytenr(&root->root_item);
- if (old_root_bytenr == root->node->start)
+ if (old_root_bytenr == root->node->start &&
+ old_root_used == btrfs_root_used(&root->root_item))
break;
btrfs_set_root_node(&root->root_item, root->node);
@@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
&root->root_item);
BUG_ON(ret);
+ old_root_used = btrfs_root_used(&root->root_item);
ret = btrfs_write_dirty_block_groups(trans, root);
BUG_ON(ret);
}
@@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
memcpy(&pending->root_key, &key, sizeof(key));
fail:
kfree(new_root_item);
- btrfs_unreserve_metadata_space(root, 6);
return ret;
}
@@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
u64 index = 0;
struct btrfs_trans_handle *trans;
struct inode *parent_inode;
- struct inode *inode;
struct btrfs_root *parent_root;
parent_inode = pending->dentry->d_parent->d_inode;
@@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
BUG_ON(ret);
- inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
- d_instantiate(pending->dentry, inode);
fail:
btrfs_end_transaction(trans, fs_info->fs_root);
return ret;
@@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
mutex_unlock(&root->fs_info->trans_mutex);
if (flush_on_commit) {
- btrfs_start_delalloc_inodes(root);
- ret = btrfs_wait_ordered_extents(root, 0);
+ btrfs_start_delalloc_inodes(root, 1);
+ ret = btrfs_wait_ordered_extents(root, 0, 1);
BUG_ON(ret);
} else if (snap_pending) {
- ret = btrfs_wait_ordered_extents(root, 1);
+ ret = btrfs_wait_ordered_extents(root, 0, 1);
BUG_ON(ret);
}
@@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
current->journal_info = NULL;
kmem_cache_free(btrfs_trans_handle_cachep, trans);
+
+ if (current != root->fs_info->transaction_kthread)
+ btrfs_run_delayed_iputs(root);
+
return ret;
}
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index d4e3e7a6938..93c7ccb3311 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
struct btrfs_root *root);
int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages);
+ struct extent_io_tree *dirty_pages, int mark);
int btrfs_write_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages);
+ struct extent_io_tree *dirty_pages, int mark);
int btrfs_wait_marked_extents(struct btrfs_root *root,
- struct extent_io_tree *dirty_pages);
+ struct extent_io_tree *dirty_pages, int mark);
int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
#endif
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 741666a7676..4a9434b622e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
saved_nbytes = inode_get_bytes(inode);
/* drop any overlapping extents */
- ret = btrfs_drop_extents(trans, root, inode,
- start, extent_end, extent_end, start, &alloc_hint, 1);
+ ret = btrfs_drop_extents(trans, inode, start, extent_end,
+ &alloc_hint, 1);
BUG_ON(ret);
if (found_type == BTRFS_FILE_EXTENT_REG ||
@@ -930,6 +930,17 @@ out_nowrite:
return 0;
}
+static int insert_orphan_item(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root, u64 offset)
+{
+ int ret;
+ ret = btrfs_find_orphan_item(root, offset);
+ if (ret > 0)
+ ret = btrfs_insert_orphan_item(trans, root, offset);
+ return ret;
+}
+
+
/*
* There are a few corners where the link count of the file can't
* be properly maintained during replay. So, instead of adding
@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
}
BTRFS_I(inode)->index_cnt = (u64)-1;
- if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) {
- ret = replay_dir_deletes(trans, root, NULL, path,
- inode->i_ino, 1);
+ if (inode->i_nlink == 0) {
+ if (S_ISDIR(inode->i_mode)) {
+ ret = replay_dir_deletes(trans, root, NULL, path,
+ inode->i_ino, 1);
+ BUG_ON(ret);
+ }
+ ret = insert_orphan_item(trans, root, inode->i_ino);
BUG_ON(ret);
}
btrfs_free_path(path);
@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
/* inode keys are done during the first stage */
if (key.type == BTRFS_INODE_ITEM_KEY &&
wc->stage == LOG_WALK_REPLAY_INODES) {
- struct inode *inode;
struct btrfs_inode_item *inode_item;
u32 mode;
@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
eb, i, &key);
BUG_ON(ret);
- /* for regular files, truncate away
- * extents past the new EOF
+ /* for regular files, make sure corresponding
+ * orhpan item exist. extents past the new EOF
+ * will be truncated later by orphan cleanup.
*/
if (S_ISREG(mode)) {
- inode = read_one_inode(root,
- key.objectid);
- BUG_ON(!inode);
-
- ret = btrfs_truncate_inode_items(wc->trans,
- root, inode, inode->i_size,
- BTRFS_EXTENT_DATA_KEY);
+ ret = insert_orphan_item(wc->trans, root,
+ key.objectid);
BUG_ON(ret);
-
- /* if the nlink count is zero here, the iput
- * will free the inode. We bump it to make
- * sure it doesn't get freed until the link
- * count fixup is done
- */
- if (inode->i_nlink == 0) {
- btrfs_inc_nlink(inode);
- btrfs_update_inode(wc->trans,
- root, inode);
- }
- iput(inode);
}
+
ret = link_to_fixup_dir(wc->trans, root,
path, key.objectid);
BUG_ON(ret);
@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
{
int index1;
int index2;
+ int mark;
int ret;
struct btrfs_root *log = root->log_root;
struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
- u64 log_transid = 0;
+ unsigned long log_transid = 0;
mutex_lock(&root->log_mutex);
index1 = root->log_transid % 2;
@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
goto out;
}
+ log_transid = root->log_transid;
+ if (log_transid % 2 == 0)
+ mark = EXTENT_DIRTY;
+ else
+ mark = EXTENT_NEW;
+
/* we start IO on all the marked extents here, but we don't actually
* wait for them until later.
*/
- ret = btrfs_write_marked_extents(log, &log->dirty_log_pages);
+ ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
BUG_ON(ret);
btrfs_set_root_node(&log->root_item, log->node);
root->log_batch = 0;
- log_transid = root->log_transid;
root->log_transid++;
log->log_transid = root->log_transid;
root->log_start_pid = 0;
smp_mb();
/*
- * log tree has been flushed to disk, new modifications of
- * the log will be written to new positions. so it's safe to
- * allow log writers to go in.
+ * IO has been started, blocks of the log tree have WRITTEN flag set
+ * in their headers. new modifications of the log will be written to
+ * new positions. so it's safe to allow log writers to go in.
*/
mutex_unlock(&root->log_mutex);
@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
index2 = log_root_tree->log_transid % 2;
if (atomic_read(&log_root_tree->log_commit[index2])) {
- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
wait_log_commit(trans, log_root_tree,
log_root_tree->log_transid);
mutex_unlock(&log_root_tree->log_mutex);
@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
* check the full commit flag again
*/
if (root->fs_info->last_trans_log_full_commit == trans->transid) {
- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
mutex_unlock(&log_root_tree->log_mutex);
ret = -EAGAIN;
goto out_wake_log_root;
}
ret = btrfs_write_and_wait_marked_extents(log_root_tree,
- &log_root_tree->dirty_log_pages);
+ &log_root_tree->dirty_log_pages,
+ EXTENT_DIRTY | EXTENT_NEW);
BUG_ON(ret);
- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
+ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
btrfs_set_super_log_root(&root->fs_info->super_for_commit,
log_root_tree->node->start);
@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
while (1) {
ret = find_first_extent_bit(&log->dirty_log_pages,
- 0, &start, &end, EXTENT_DIRTY);
+ 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
if (ret)
break;
- clear_extent_dirty(&log->dirty_log_pages,
- start, end, GFP_NOFS);
+ clear_extent_bits(&log->dirty_log_pages, start, end,
+ EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
}
if (log->log_transid > 0) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 7eda483d7b5..41ecbb2347f 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
root->fs_info->avail_metadata_alloc_bits;
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
- root->fs_info->fs_devices->rw_devices <= 4) {
+ root->fs_info->fs_devices->num_devices <= 4) {
printk(KERN_ERR "btrfs: unable to go below four devices "
"on raid10\n");
ret = -EINVAL;
@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
}
if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
- root->fs_info->fs_devices->rw_devices <= 2) {
+ root->fs_info->fs_devices->num_devices <= 2) {
printk(KERN_ERR "btrfs: unable to go below two "
"devices on raid1\n");
ret = -EINVAL;
@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
return -EINVAL;
bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
- if (!bdev)
- return -EIO;
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
if (root->fs_info->fs_devices->seeding) {
seeding_dev = 1;
@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
max_chunk_size = 10 * calc_size;
min_stripe_size = 64 * 1024 * 1024;
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
- max_chunk_size = 4 * calc_size;
+ max_chunk_size = 256 * 1024 * 1024;
min_stripe_size = 32 * 1024 * 1024;
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
calc_size = 8 * 1024 * 1024;
@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
if (!em)
return 1;
+ if (btrfs_test_opt(root, DEGRADED)) {
+ free_extent_map(em);
+ return 0;
+ }
+
map = (struct map_lookup *)em->bdev;
for (i = 0; i < map->num_stripes; i++) {
if (!map->stripes[i].dev->writeable) {
@@ -2649,8 +2654,10 @@ again:
em = lookup_extent_mapping(em_tree, logical, *length);
read_unlock(&em_tree->lock);
- if (!em && unplug_page)
+ if (!em && unplug_page) {
+ kfree(multi);
return 0;
+ }
if (!em) {
printk(KERN_CRIT "unable to find logical %llu len %llu\n",
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index b6dd5967c48..193b58f7d3f 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -85,22 +85,23 @@ out:
return ret;
}
-int __btrfs_setxattr(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+static int do_setxattr(struct btrfs_trans_handle *trans,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
{
struct btrfs_dir_item *di;
struct btrfs_root *root = BTRFS_I(inode)->root;
- struct btrfs_trans_handle *trans;
struct btrfs_path *path;
- int ret = 0, mod = 0;
+ size_t name_len = strlen(name);
+ int ret = 0;
+
+ if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
+ return -ENOSPC;
path = btrfs_alloc_path();
if (!path)
return -ENOMEM;
- trans = btrfs_join_transaction(root, 1);
- btrfs_set_trans_block_group(trans, inode);
-
/* first lets see if we already have this xattr */
di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
strlen(name), -1);
@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
}
ret = btrfs_delete_one_dir_name(trans, root, path, di);
- if (ret)
- goto out;
+ BUG_ON(ret);
btrfs_release_path(root, path);
/* if we don't have a value then we are removing the xattr */
- if (!value) {
- mod = 1;
+ if (!value)
goto out;
- }
} else {
btrfs_release_path(root, path);
@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
}
/* ok we have to create a completely new xattr */
- ret = btrfs_insert_xattr_item(trans, root, name, strlen(name),
- value, size, inode->i_ino);
+ ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
+ name, name_len, value, size);
+ BUG_ON(ret);
+out:
+ btrfs_free_path(path);
+ return ret;
+}
+
+int __btrfs_setxattr(struct btrfs_trans_handle *trans,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags)
+{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ int ret;
+
+ if (trans)
+ return do_setxattr(trans, inode, name, value, size, flags);
+
+ ret = btrfs_reserve_metadata_space(root, 2);
if (ret)
- goto out;
- mod = 1;
+ return ret;
-out:
- if (mod) {
- inode->i_ctime = CURRENT_TIME;
- ret = btrfs_update_inode(trans, root, inode);
+ trans = btrfs_start_transaction(root, 1);
+ if (!trans) {
+ ret = -ENOMEM;
+ goto out;
}
+ btrfs_set_trans_block_group(trans, inode);
- btrfs_end_transaction(trans, root);
- btrfs_free_path(path);
+ ret = do_setxattr(trans, inode, name, value, size, flags);
+ if (ret)
+ goto out;
+
+ inode->i_ctime = CURRENT_TIME;
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+out:
+ btrfs_end_transaction_throttle(trans, root);
+ btrfs_unreserve_metadata_space(root, 2);
return ret;
}
@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
if (size == 0)
value = ""; /* empty EA, do not remove */
- return __btrfs_setxattr(dentry->d_inode, name, value, size, flags);
+
+ return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
+ flags);
}
int btrfs_removexattr(struct dentry *dentry, const char *name)
@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
if (!btrfs_is_valid_xattr(name))
return -EOPNOTSUPP;
- return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
+
+ return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
+ XATTR_REPLACE);
}
-int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
+int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir)
{
int err;
size_t len;
@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
} else {
strcpy(name, XATTR_SECURITY_PREFIX);
strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
- err = __btrfs_setxattr(inode, name, value, len, 0);
+ err = __btrfs_setxattr(trans, inode, name, value, len, 0);
kfree(name);
}
diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
index c71e9c3cf3f..721efa0346e 100644
--- a/fs/btrfs/xattr.h
+++ b/fs/btrfs/xattr.h
@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
void *buffer, size_t size);
-extern int __btrfs_setxattr(struct inode *inode, const char *name,
- const void *value, size_t size, int flags);
-
+extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
+ struct inode *inode, const char *name,
+ const void *value, size_t size, int flags);
extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
void *buffer, size_t size);
extern int btrfs_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags);
extern int btrfs_removexattr(struct dentry *dentry, const char *name);
-extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir);
+extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
+ struct inode *inode, struct inode *dir);
#endif /* __XATTR__ */
diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c
index 3797e0077b3..2906077ac79 100644
--- a/fs/cachefiles/bind.c
+++ b/fs/cachefiles/bind.c
@@ -84,7 +84,7 @@ int cachefiles_daemon_bind(struct cachefiles_cache *cache, char *args)
static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
{
struct cachefiles_object *fsdef;
- struct nameidata nd;
+ struct path path;
struct kstatfs stats;
struct dentry *graveyard, *cachedir, *root;
const struct cred *saved_cred;
@@ -114,15 +114,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
_debug("- fsdef %p", fsdef);
/* look up the directory at the root of the cache */
- memset(&nd, 0, sizeof(nd));
-
- ret = path_lookup(cache->rootdirname, LOOKUP_DIRECTORY, &nd);
+ ret = kern_path(cache->rootdirname, LOOKUP_DIRECTORY, &path);
if (ret < 0)
goto error_open_root;
- cache->mnt = mntget(nd.path.mnt);
- root = dget(nd.path.dentry);
- path_put(&nd.path);
+ cache->mnt = path.mnt;
+ root = path.dentry;
/* check parameters */
ret = -EOPNOTSUPP;
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 14ac4806e29..eeb4986ea7d 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -348,7 +348,17 @@ int cachefiles_delete_object(struct cachefiles_cache *cache,
dir = dget_parent(object->dentry);
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_PARENT);
- ret = cachefiles_bury_object(cache, dir, object->dentry);
+
+ /* we need to check that our parent is _still_ our parent - it may have
+ * been renamed */
+ if (dir == object->dentry->d_parent) {
+ ret = cachefiles_bury_object(cache, dir, object->dentry);
+ } else {
+ /* it got moved, presumably by cachefilesd culling it, so it's
+ * no longer in the key path and we can ignore it */
+ mutex_unlock(&dir->d_inode->i_mutex);
+ ret = 0;
+ }
dput(dir);
_leave(" = %d", ret);
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index a6c8c6fe8df..1d833256386 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -11,7 +11,6 @@
#include <linux/mount.h>
#include <linux/file.h>
-#include <linux/ima.h>
#include "internal.h"
/*
@@ -923,7 +922,6 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page)
if (IS_ERR(file)) {
ret = PTR_ERR(file);
} else {
- ima_counts_get(file);
ret = -EIO;
if (file->f_op->write) {
pos = (loff_t) page->index << PAGE_SHIFT;
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 094ea65afc8..49503d2edc7 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -1,3 +1,7 @@
+Version 1.62
+------------
+Add sockopt=TCP_NODELAY mount option.
+
Version 1.61
------------
Fix append problem to Samba servers (files opened with O_APPEND could
@@ -5,7 +9,9 @@ have duplicated data). Fix oops in cifs_lookup. Workaround problem
mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
Disable use of server inode numbers when server only
partially supports them (e.g. for one server querying inode numbers on
-FindFirst fails but QPathInfo queries works).
+FindFirst fails but QPathInfo queries works). Fix oops with dfs in
+cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
+for OpenOffice when on forcedirectio mount e.g.)
Version 1.60
-------------
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index fea9e898c4b..b44ce0a0711 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -269,7 +269,7 @@ static int add_mount_helper(struct vfsmount *newmnt, struct nameidata *nd,
int err;
mntget(newmnt);
- err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags, mntlist);
+ err = do_add_mount(newmnt, &nd->path, nd->path.mnt->mnt_flags | MNT_SHRINKABLE, mntlist);
switch (err) {
case 0:
path_put(&nd->path);
@@ -371,7 +371,6 @@ cifs_dfs_follow_mountpoint(struct dentry *dentry, struct nameidata *nd)
if (IS_ERR(mnt))
goto out_err;
- nd->path.mnt->mnt_flags |= MNT_SHRINKABLE;
rc = add_mount_helper(mnt, nd, &cifs_dfs_automount_list);
out:
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 29f1da761bb..8c6a0362717 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -758,7 +758,7 @@ const struct file_operations cifs_file_ops = {
};
const struct file_operations cifs_file_direct_ops = {
- /* no mmap, no aio, no readv -
+ /* no aio, no readv -
BB reevaluate whether they can be done with directio, no cache */
.read = cifs_user_read,
.write = cifs_user_write,
@@ -767,6 +767,7 @@ const struct file_operations cifs_file_direct_ops = {
.lock = cifs_lock,
.fsync = cifs_fsync,
.flush = cifs_flush,
+ .mmap = cifs_file_mmap,
.splice_read = generic_file_splice_read,
#ifdef CONFIG_CIFS_POSIX
.unlocked_ioctl = cifs_ioctl,
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index ac2b24c192f..78c1b86d55f 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -113,5 +113,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
extern const struct export_operations cifs_export_ops;
#endif /* EXPERIMENTAL */
-#define CIFS_VERSION "1.61"
+#define CIFS_VERSION "1.62"
#endif /* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 4b35f7ec058..ed751bb657d 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -149,6 +149,7 @@ struct TCP_Server_Info {
bool svlocal:1; /* local server or remote */
bool noblocksnd; /* use blocking sendmsg */
bool noautotune; /* do not autotune send buf sizes */
+ bool tcp_nodelay;
atomic_t inFlight; /* number of requests on the wire to server */
#ifdef CONFIG_CIFS_STATS2
atomic_t inSend; /* requests trying to send */
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 63ea83ff687..2e9e09ca0e3 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -98,7 +98,7 @@ struct smb_vol {
bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
unsigned int rsize;
unsigned int wsize;
- unsigned int sockopt;
+ bool sockopt_tcp_nodelay:1;
unsigned short int port;
char *prepath;
};
@@ -1142,9 +1142,11 @@ cifs_parse_mount_options(char *options, const char *devname,
simple_strtoul(value, &value, 0);
}
} else if (strnicmp(data, "sockopt", 5) == 0) {
- if (value && *value) {
- vol->sockopt =
- simple_strtoul(value, &value, 0);
+ if (!value || !*value) {
+ cERROR(1, ("no socket option specified"));
+ continue;
+ } else if (strnicmp(value, "TCP_NODELAY", 11) == 0) {
+ vol->sockopt_tcp_nodelay = 1;
}
} else if (strnicmp(data, "netbiosname", 4) == 0) {
if (!value || !*value || (*value == ' ')) {
@@ -1514,6 +1516,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
tcp_ses->noblocksnd = volume_info->noblocksnd;
tcp_ses->noautotune = volume_info->noautotune;
+ tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
atomic_set(&tcp_ses->inFlight, 0);
init_waitqueue_head(&tcp_ses->response_q);
init_waitqueue_head(&tcp_ses->request_q);
@@ -1764,6 +1767,7 @@ static int
ipv4_connect(struct TCP_Server_Info *server)
{
int rc = 0;
+ int val;
bool connected = false;
__be16 orig_port = 0;
struct socket *socket = server->ssocket;
@@ -1845,6 +1849,14 @@ ipv4_connect(struct TCP_Server_Info *server)
socket->sk->sk_rcvbuf = 140 * 1024;
}
+ if (server->tcp_nodelay) {
+ val = 1;
+ rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
+ (char *)&val, sizeof(val));
+ if (rc)
+ cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
+ }
+
cFYI(1, ("sndbuf %d rcvbuf %d rcvtimeo 0x%lx",
socket->sk->sk_sndbuf,
socket->sk->sk_rcvbuf, socket->sk->sk_rcvtimeo));
@@ -1916,6 +1928,7 @@ static int
ipv6_connect(struct TCP_Server_Info *server)
{
int rc = 0;
+ int val;
bool connected = false;
__be16 orig_port = 0;
struct socket *socket = server->ssocket;
@@ -1987,6 +2000,15 @@ ipv6_connect(struct TCP_Server_Info *server)
*/
socket->sk->sk_rcvtimeo = 7 * HZ;
socket->sk->sk_sndtimeo = 5 * HZ;
+
+ if (server->tcp_nodelay) {
+ val = 1;
+ rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
+ (char *)&val, sizeof(val));
+ if (rc)
+ cFYI(1, ("set TCP_NODELAY socket option error %d", rc));
+ }
+
server->ssocket = socket;
return rc;
@@ -2287,12 +2309,12 @@ int
cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
char *mount_data_global, const char *devname)
{
- int rc = 0;
+ int rc;
int xid;
struct smb_vol *volume_info;
- struct cifsSesInfo *pSesInfo = NULL;
- struct cifsTconInfo *tcon = NULL;
- struct TCP_Server_Info *srvTcp = NULL;
+ struct cifsSesInfo *pSesInfo;
+ struct cifsTconInfo *tcon;
+ struct TCP_Server_Info *srvTcp;
char *full_path;
char *mount_data = mount_data_global;
#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -2301,6 +2323,10 @@ cifs_mount(struct super_block *sb, struct cifs_sb_info *cifs_sb,
int referral_walks_count = 0;
try_mount_again:
#endif
+ rc = 0;
+ tcon = NULL;
+ pSesInfo = NULL;
+ srvTcp = NULL;
full_path = NULL;
xid = GetXid();
@@ -2597,6 +2623,7 @@ remote_path_check:
cleanup_volume_info(&volume_info);
referral_walks_count++;
+ FreeXid(xid);
goto try_mount_again;
}
#else /* No DFS support, return error on mount */
diff --git a/fs/cifs/export.c b/fs/cifs/export.c
index 75949d6a5f1..6177f7cca16 100644
--- a/fs/cifs/export.c
+++ b/fs/cifs/export.c
@@ -24,7 +24,7 @@
*/
/*
- * See Documentation/filesystems/Exporting
+ * See Documentation/filesystems/nfs/Exporting
* and examples in fs/exportfs
*
* Since cifs is a network file system, an "fsid" must be included for
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index cf18ee76559..e3fda978f48 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -1762,8 +1762,18 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
CIFS_MOUNT_MAP_SPECIAL_CHR);
}
- if (!rc)
+ if (!rc) {
rc = inode_setattr(inode, attrs);
+
+ /* force revalidate when any of these times are set since some
+ of the fs types (eg ext3, fat) do not have fine enough
+ time granularity to match protocol, and we do not have a
+ a way (yet) to query the server fs's time granularity (and
+ whether it rounds times down).
+ */
+ if (!rc && (attrs->ia_valid & (ATTR_MTIME | ATTR_CTIME)))
+ cifsInode->time = 0;
+ }
out:
kfree(args);
kfree(full_path);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index f84062f9a98..c343b14ba2d 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -77,6 +77,11 @@ cifs_readdir_lookup(struct dentry *parent, struct qstr *name,
cFYI(1, ("For %s", name->name));
+ if (parent->d_op && parent->d_op->d_hash)
+ parent->d_op->d_hash(parent, name);
+ else
+ name->hash = full_name_hash(name->name, name->len);
+
dentry = d_lookup(parent, name);
if (dentry) {
/* FIXME: check for inode number changes? */
@@ -666,12 +671,11 @@ static int cifs_get_name_from_search_buf(struct qstr *pqst,
min(len, max_len), nlt,
cifs_sb->mnt_cifs_flags &
CIFS_MOUNT_MAP_SPECIAL_CHR);
+ pqst->len -= nls_nullsize(nlt);
} else {
pqst->name = filename;
pqst->len = len;
}
- pqst->hash = full_name_hash(pqst->name, pqst->len);
-/* cFYI(1, ("filldir on %s",pqst->name)); */
return rc;
}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 7085a6275c4..aaa9c1c5a5b 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -223,9 +223,9 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
/* null user mount */
*bcc_ptr = 0;
*(bcc_ptr+1) = 0;
- } else { /* 300 should be long enough for any conceivable user name */
+ } else {
bytes_ret = cifs_strtoUCS((__le16 *) bcc_ptr, ses->userName,
- 300, nls_cp);
+ MAX_USERNAME_SIZE, nls_cp);
}
bcc_ptr += 2 * bytes_ret;
bcc_ptr += 2; /* account for null termination */
@@ -246,11 +246,10 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifsSesInfo *ses,
/* copy user */
if (ses->userName == NULL) {
/* BB what about null user mounts - check that we do this BB */
- } else { /* 300 should be long enough for any conceivable user name */
- strncpy(bcc_ptr, ses->userName, 300);
+ } else {
+ strncpy(bcc_ptr, ses->userName, MAX_USERNAME_SIZE);
}
- /* BB improve check for overflow */
- bcc_ptr += strnlen(ses->userName, 300);
+ bcc_ptr += strnlen(ses->userName, MAX_USERNAME_SIZE);
*bcc_ptr = 0;
bcc_ptr++; /* account for null termination */
diff --git a/fs/compat.c b/fs/compat.c
index 6c19040ffee..00d90c2e66f 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -38,8 +38,6 @@
#include <linux/dirent.h>
#include <linux/fsnotify.h>
#include <linux/highuid.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/syscall.h>
#include <linux/personality.h>
#include <linux/rwsem.h>
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 14cbc831422..0ca9ec4a79c 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -301,6 +301,12 @@ static int sg_ioctl_trans(unsigned int fd, unsigned int cmd,
u32 data;
void __user *dxferp;
int err;
+ int interface_id;
+
+ if (get_user(interface_id, &sgio32->interface_id))
+ return -EFAULT;
+ if (interface_id != 'S')
+ return sys_ioctl(fd, cmd, (unsigned long)sgio32);
if (get_user(iovec_count, &sgio32->iovec_count))
return -EFAULT;
@@ -936,6 +942,7 @@ COMPATIBLE_IOCTL(TCSETSF)
COMPATIBLE_IOCTL(TIOCLINUX)
COMPATIBLE_IOCTL(TIOCSBRK)
COMPATIBLE_IOCTL(TIOCCBRK)
+COMPATIBLE_IOCTL(TIOCGSID)
COMPATIBLE_IOCTL(TIOCGICOUNT)
/* Little t */
COMPATIBLE_IOCTL(TIOCGETD)
@@ -1005,6 +1012,9 @@ COMPATIBLE_IOCTL(SCSI_IOCTL_SEND_COMMAND)
COMPATIBLE_IOCTL(SCSI_IOCTL_PROBE_HOST)
COMPATIBLE_IOCTL(SCSI_IOCTL_GET_PCI)
#endif
+/* Big V (don't complain on serial console) */
+IGNORE_IOCTL(VT_OPENQRY)
+IGNORE_IOCTL(VT_GETMODE)
/* Little p (/dev/rtc, /dev/envctrl, etc.) */
COMPATIBLE_IOCTL(RTC_AIE_ON)
COMPATIBLE_IOCTL(RTC_AIE_OFF)
@@ -1035,6 +1045,8 @@ COMPATIBLE_IOCTL(FIOQSIZE)
#ifdef CONFIG_BLOCK
/* loop */
IGNORE_IOCTL(LOOP_CLR_FD)
+/* md calls this on random blockdevs */
+IGNORE_IOCTL(RAID_VERSION)
/* SG stuff */
COMPATIBLE_IOCTL(SG_SET_TIMEOUT)
COMPATIBLE_IOCTL(SG_GET_TIMEOUT)
@@ -1600,8 +1612,6 @@ static long do_ioctl_trans(int fd, unsigned int cmd,
case KDSKBMETA:
case KDSKBLED:
case KDSETLED:
- /* SG stuff */
- case SG_SET_TRANSFORM:
/* AUTOFS */
case AUTOFS_IOC_READY:
case AUTOFS_IOC_FAIL:
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index c8afa6b1d91..32a5f46b115 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -121,8 +121,10 @@ static int get_target(const char *symname, struct path *path,
ret = -ENOENT;
path_put(path);
}
- } else
+ } else {
ret = -EPERM;
+ path_put(path);
+ }
}
return ret;
diff --git a/fs/dcache.c b/fs/dcache.c
index a100fa35a48..953173a293a 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -978,6 +978,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
q.hash = full_name_hash(q.name, q.len);
return d_alloc(parent, &q);
}
+EXPORT_SYMBOL(d_alloc_name);
/* the caller must hold dcache_lock */
static void __d_instantiate(struct dentry *dentry, struct inode *inode)
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index b486169f42b..274ac865bae 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -160,15 +160,8 @@ static int debugfs_create_by_name(const char *name, mode_t mode,
* block. A pointer to that is in the struct vfsmount that we
* have around.
*/
- if (!parent) {
- if (debugfs_mount && debugfs_mount->mnt_sb) {
- parent = debugfs_mount->mnt_sb->s_root;
- }
- }
- if (!parent) {
- pr_debug("debugfs: Ah! can not find a parent!\n");
- return -EFAULT;
- }
+ if (!parent)
+ parent = debugfs_mount->mnt_sb->s_root;
*dentry = NULL;
mutex_lock(&parent->d_inode->i_mutex);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 4012885d027..e82adc2debb 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -1206,7 +1206,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
* NOTE: filesystems with their own locking have to handle this
* on their own.
*/
- if (dio->flags & DIO_LOCKING) {
+ if (flags & DIO_LOCKING) {
if (unlikely((rw & WRITE) && retval < 0)) {
loff_t isize = i_size_read(inode);
if (end > isize)
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index fbb6e5eed69..7cb0a59f4b9 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1748,7 +1748,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
char *cipher_name, size_t *key_size)
{
char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
- char *full_alg_name;
+ char *full_alg_name = NULL;
int rc;
*key_tfm = NULL;
@@ -1763,7 +1763,6 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
if (rc)
goto out;
*key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
- kfree(full_alg_name);
if (IS_ERR(*key_tfm)) {
rc = PTR_ERR(*key_tfm);
printk(KERN_ERR "Unable to allocate crypto cipher with name "
@@ -1786,6 +1785,7 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
goto out;
}
out:
+ kfree(full_alg_name);
return rc;
}
diff --git a/fs/ecryptfs/dentry.c b/fs/ecryptfs/dentry.c
index 2dda5ade75b..8f006a0d607 100644
--- a/fs/ecryptfs/dentry.c
+++ b/fs/ecryptfs/dentry.c
@@ -62,7 +62,7 @@ static int ecryptfs_d_revalidate(struct dentry *dentry, struct nameidata *nd)
struct inode *lower_inode =
ecryptfs_inode_to_lower(dentry->d_inode);
- fsstack_copy_attr_all(dentry->d_inode, lower_inode, NULL);
+ fsstack_copy_attr_all(dentry->d_inode, lower_inode);
}
out:
return rc;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 9e944057001..678172b61be 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -158,7 +158,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
struct dentry *ecryptfs_dentry = file->f_path.dentry;
/* Private value of ecryptfs_dentry allocated in
* ecryptfs_lookup() */
- struct dentry *lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
+ struct dentry *lower_dentry;
struct ecryptfs_file_info *file_info;
mount_crypt_stat = &ecryptfs_superblock_to_private(
@@ -191,13 +191,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
| ECRYPTFS_ENCRYPTED);
}
mutex_unlock(&crypt_stat->cs_mutex);
- if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
- && !(file->f_flags & O_RDONLY)) {
- rc = -EPERM;
- printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
- "file must hence be opened RO\n", __func__);
- goto out;
- }
if (!ecryptfs_inode_to_private(inode)->lower_file) {
rc = ecryptfs_init_persistent_file(ecryptfs_dentry);
if (rc) {
@@ -208,6 +201,13 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
goto out;
}
}
+ if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
+ && !(file->f_flags & O_RDONLY)) {
+ rc = -EPERM;
+ printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
+ "file must hence be opened RO\n", __func__);
+ goto out;
+ }
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
if (S_ISDIR(ecryptfs_dentry->d_inode->i_mode)) {
@@ -299,7 +299,6 @@ static int ecryptfs_ioctl(struct inode *inode, struct file *file,
const struct file_operations ecryptfs_dir_fops = {
.readdir = ecryptfs_readdir,
.ioctl = ecryptfs_ioctl,
- .mmap = generic_file_mmap,
.open = ecryptfs_open,
.flush = ecryptfs_flush,
.release = ecryptfs_release,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 056fed62d0d..4a430ab4115 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -282,7 +282,8 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
goto out;
}
rc = ecryptfs_interpose(lower_dentry, ecryptfs_dentry,
- ecryptfs_dir_inode->i_sb, 1);
+ ecryptfs_dir_inode->i_sb,
+ ECRYPTFS_INTERPOSE_FLAG_D_ADD);
if (rc) {
printk(KERN_ERR "%s: Error interposing; rc = [%d]\n",
__func__, rc);
@@ -463,9 +464,6 @@ out_lock:
unlock_dir(lower_dir_dentry);
dput(lower_new_dentry);
dput(lower_old_dentry);
- d_drop(lower_old_dentry);
- d_drop(new_dentry);
- d_drop(old_dentry);
return rc;
}
@@ -614,6 +612,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct dentry *lower_new_dentry;
struct dentry *lower_old_dir_dentry;
struct dentry *lower_new_dir_dentry;
+ struct dentry *trap = NULL;
lower_old_dentry = ecryptfs_dentry_to_lower(old_dentry);
lower_new_dentry = ecryptfs_dentry_to_lower(new_dentry);
@@ -621,14 +620,24 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
dget(lower_new_dentry);
lower_old_dir_dentry = dget_parent(lower_old_dentry);
lower_new_dir_dentry = dget_parent(lower_new_dentry);
- lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ trap = lock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
+ /* source should not be ancestor of target */
+ if (trap == lower_old_dentry) {
+ rc = -EINVAL;
+ goto out_lock;
+ }
+ /* target should not be ancestor of source */
+ if (trap == lower_new_dentry) {
+ rc = -ENOTEMPTY;
+ goto out_lock;
+ }
rc = vfs_rename(lower_old_dir_dentry->d_inode, lower_old_dentry,
lower_new_dir_dentry->d_inode, lower_new_dentry);
if (rc)
goto out_lock;
- fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(new_dir, lower_new_dir_dentry->d_inode);
if (new_dir != old_dir)
- fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode, NULL);
+ fsstack_copy_attr_all(old_dir, lower_old_dir_dentry->d_inode);
out_lock:
unlock_rename(lower_old_dir_dentry, lower_new_dir_dentry);
dput(lower_new_dentry->d_parent);
@@ -715,31 +724,31 @@ static void *ecryptfs_follow_link(struct dentry *dentry, struct nameidata *nd)
/* Released in ecryptfs_put_link(); only release here on error */
buf = kmalloc(len, GFP_KERNEL);
if (!buf) {
- rc = -ENOMEM;
+ buf = ERR_PTR(-ENOMEM);
goto out;
}
old_fs = get_fs();
set_fs(get_ds());
rc = dentry->d_inode->i_op->readlink(dentry, (char __user *)buf, len);
set_fs(old_fs);
- if (rc < 0)
- goto out_free;
- else
+ if (rc < 0) {
+ kfree(buf);
+ buf = ERR_PTR(rc);
+ } else
buf[rc] = '\0';
- rc = 0;
- nd_set_link(nd, buf);
- goto out;
-out_free:
- kfree(buf);
out:
- return ERR_PTR(rc);
+ nd_set_link(nd, buf);
+ return NULL;
}
static void
ecryptfs_put_link(struct dentry *dentry, struct nameidata *nd, void *ptr)
{
- /* Free the char* */
- kfree(nd_get_link(nd));
+ char *buf = nd_get_link(nd);
+ if (!IS_ERR(buf)) {
+ /* Free the char* */
+ kfree(buf);
+ }
}
/**
@@ -772,18 +781,23 @@ upper_size_to_lower_size(struct ecryptfs_crypt_stat *crypt_stat,
}
/**
- * ecryptfs_truncate
+ * truncate_upper
* @dentry: The ecryptfs layer dentry
- * @new_length: The length to expand the file to
+ * @ia: Address of the ecryptfs inode's attributes
+ * @lower_ia: Address of the lower inode's attributes
*
* Function to handle truncations modifying the size of the file. Note
* that the file sizes are interpolated. When expanding, we are simply
- * writing strings of 0's out. When truncating, we need to modify the
- * underlying file size according to the page index interpolations.
+ * writing strings of 0's out. When truncating, we truncate the upper
+ * inode and update the lower_ia according to the page index
+ * interpolations. If ATTR_SIZE is set in lower_ia->ia_valid upon return,
+ * the caller must use lower_ia in a call to notify_change() to perform
+ * the truncation of the lower inode.
*
* Returns zero on success; non-zero otherwise
*/
-int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
+static int truncate_upper(struct dentry *dentry, struct iattr *ia,
+ struct iattr *lower_ia)
{
int rc = 0;
struct inode *inode = dentry->d_inode;
@@ -794,8 +808,10 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
loff_t lower_size_before_truncate;
loff_t lower_size_after_truncate;
- if (unlikely((new_length == i_size)))
+ if (unlikely((ia->ia_size == i_size))) {
+ lower_ia->ia_valid &= ~ATTR_SIZE;
goto out;
+ }
crypt_stat = &ecryptfs_inode_to_private(dentry->d_inode)->crypt_stat;
/* Set up a fake ecryptfs file, this is used to interface with
* the file in the underlying filesystem so that the
@@ -815,28 +831,30 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
&fake_ecryptfs_file,
ecryptfs_inode_to_private(dentry->d_inode)->lower_file);
/* Switch on growing or shrinking file */
- if (new_length > i_size) {
+ if (ia->ia_size > i_size) {
char zero[] = { 0x00 };
+ lower_ia->ia_valid &= ~ATTR_SIZE;
/* Write a single 0 at the last position of the file;
* this triggers code that will fill in 0's throughout
* the intermediate portion of the previous end of the
* file and the new and of the file */
rc = ecryptfs_write(&fake_ecryptfs_file, zero,
- (new_length - 1), 1);
- } else { /* new_length < i_size_read(inode) */
- /* We're chopping off all the pages down do the page
- * in which new_length is located. Fill in the end of
- * that page from (new_length & ~PAGE_CACHE_MASK) to
+ (ia->ia_size - 1), 1);
+ } else { /* ia->ia_size < i_size_read(inode) */
+ /* We're chopping off all the pages down to the page
+ * in which ia->ia_size is located. Fill in the end of
+ * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
* PAGE_CACHE_SIZE with zeros. */
size_t num_zeros = (PAGE_CACHE_SIZE
- - (new_length & ~PAGE_CACHE_MASK));
+ - (ia->ia_size & ~PAGE_CACHE_MASK));
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
- rc = vmtruncate(inode, new_length);
+ rc = vmtruncate(inode, ia->ia_size);
if (rc)
goto out_free;
- rc = vmtruncate(lower_dentry->d_inode, new_length);
+ lower_ia->ia_size = ia->ia_size;
+ lower_ia->ia_valid |= ATTR_SIZE;
goto out_free;
}
if (num_zeros) {
@@ -848,7 +866,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
goto out_free;
}
rc = ecryptfs_write(&fake_ecryptfs_file, zeros_virt,
- new_length, num_zeros);
+ ia->ia_size, num_zeros);
kfree(zeros_virt);
if (rc) {
printk(KERN_ERR "Error attempting to zero out "
@@ -857,7 +875,7 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
goto out_free;
}
}
- vmtruncate(inode, new_length);
+ vmtruncate(inode, ia->ia_size);
rc = ecryptfs_write_inode_size_to_metadata(inode);
if (rc) {
printk(KERN_ERR "Problem with "
@@ -870,10 +888,12 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
lower_size_before_truncate =
upper_size_to_lower_size(crypt_stat, i_size);
lower_size_after_truncate =
- upper_size_to_lower_size(crypt_stat, new_length);
- if (lower_size_after_truncate < lower_size_before_truncate)
- vmtruncate(lower_dentry->d_inode,
- lower_size_after_truncate);
+ upper_size_to_lower_size(crypt_stat, ia->ia_size);
+ if (lower_size_after_truncate < lower_size_before_truncate) {
+ lower_ia->ia_size = lower_size_after_truncate;
+ lower_ia->ia_valid |= ATTR_SIZE;
+ } else
+ lower_ia->ia_valid &= ~ATTR_SIZE;
}
out_free:
if (ecryptfs_file_to_private(&fake_ecryptfs_file))
@@ -883,6 +903,33 @@ out:
return rc;
}
+/**
+ * ecryptfs_truncate
+ * @dentry: The ecryptfs layer dentry
+ * @new_length: The length to expand the file to
+ *
+ * Simple function that handles the truncation of an eCryptfs inode and
+ * its corresponding lower inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
+{
+ struct iattr ia = { .ia_valid = ATTR_SIZE, .ia_size = new_length };
+ struct iattr lower_ia = { .ia_valid = 0 };
+ int rc;
+
+ rc = truncate_upper(dentry, &ia, &lower_ia);
+ if (!rc && lower_ia.ia_valid & ATTR_SIZE) {
+ struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
+
+ mutex_lock(&lower_dentry->d_inode->i_mutex);
+ rc = notify_change(lower_dentry, &lower_ia);
+ mutex_unlock(&lower_dentry->d_inode->i_mutex);
+ }
+ return rc;
+}
+
static int
ecryptfs_permission(struct inode *inode, int mask)
{
@@ -905,6 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
{
int rc = 0;
struct dentry *lower_dentry;
+ struct iattr lower_ia;
struct inode *inode;
struct inode *lower_inode;
struct ecryptfs_crypt_stat *crypt_stat;
@@ -943,15 +991,11 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
}
}
mutex_unlock(&crypt_stat->cs_mutex);
+ memcpy(&lower_ia, ia, sizeof(lower_ia));
+ if (ia->ia_valid & ATTR_FILE)
+ lower_ia.ia_file = ecryptfs_file_to_lower(ia->ia_file);
if (ia->ia_valid & ATTR_SIZE) {
- ecryptfs_printk(KERN_DEBUG,
- "ia->ia_valid = [0x%x] ATTR_SIZE" " = [0x%x]\n",
- ia->ia_valid, ATTR_SIZE);
- rc = ecryptfs_truncate(dentry, ia->ia_size);
- /* ecryptfs_truncate handles resizing of the lower file */
- ia->ia_valid &= ~ATTR_SIZE;
- ecryptfs_printk(KERN_DEBUG, "ia->ia_valid = [%x]\n",
- ia->ia_valid);
+ rc = truncate_upper(dentry, ia, &lower_ia);
if (rc < 0)
goto out;
}
@@ -960,14 +1004,29 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
* mode change is for clearing setuid/setgid bits. Allow lower fs
* to interpret this in its own way.
*/
- if (ia->ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
- ia->ia_valid &= ~ATTR_MODE;
+ if (lower_ia.ia_valid & (ATTR_KILL_SUID | ATTR_KILL_SGID))
+ lower_ia.ia_valid &= ~ATTR_MODE;
mutex_lock(&lower_dentry->d_inode->i_mutex);
- rc = notify_change(lower_dentry, ia);
+ rc = notify_change(lower_dentry, &lower_ia);
mutex_unlock(&lower_dentry->d_inode->i_mutex);
out:
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
+ return rc;
+}
+
+int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+{
+ struct kstat lower_stat;
+ int rc;
+
+ rc = vfs_getattr(ecryptfs_dentry_to_lower_mnt(dentry),
+ ecryptfs_dentry_to_lower(dentry), &lower_stat);
+ if (!rc) {
+ generic_fillattr(dentry->d_inode, stat);
+ stat->blocks = lower_stat.blocks;
+ }
return rc;
}
@@ -1100,6 +1159,7 @@ const struct inode_operations ecryptfs_dir_iops = {
const struct inode_operations ecryptfs_main_iops = {
.permission = ecryptfs_permission,
.setattr = ecryptfs_setattr,
+ .getattr = ecryptfs_getattr,
.setxattr = ecryptfs_setxattr,
.getxattr = ecryptfs_getxattr,
.listxattr = ecryptfs_listxattr,
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index c6ac85d6c70..ea2f92101df 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -35,7 +35,6 @@
#include <linux/key.h>
#include <linux/parser.h>
#include <linux/fs_stack.h>
-#include <linux/ima.h>
#include "ecryptfs_kernel.h"
/**
@@ -119,7 +118,6 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
const struct cred *cred = current_cred();
struct ecryptfs_inode_info *inode_info =
ecryptfs_inode_to_private(ecryptfs_dentry->d_inode);
- int opened_lower_file = 0;
int rc = 0;
mutex_lock(&inode_info->lower_file_mutex);
@@ -136,12 +134,9 @@ int ecryptfs_init_persistent_file(struct dentry *ecryptfs_dentry)
"for lower_dentry [0x%p] and lower_mnt [0x%p]; "
"rc = [%d]\n", lower_dentry, lower_mnt, rc);
inode_info->lower_file = NULL;
- } else
- opened_lower_file = 1;
+ }
}
mutex_unlock(&inode_info->lower_file_mutex);
- if (opened_lower_file)
- ima_counts_get(inode_info->lower_file);
return rc;
}
@@ -194,7 +189,7 @@ int ecryptfs_interpose(struct dentry *lower_dentry, struct dentry *dentry,
init_special_inode(inode, lower_inode->i_mode,
lower_inode->i_rdev);
dentry->d_op = &ecryptfs_dops;
- fsstack_copy_attr_all(inode, lower_inode, NULL);
+ fsstack_copy_attr_all(inode, lower_inode);
/* This size will be overwritten for real files w/ headers and
* other metadata */
fsstack_copy_inode_size(inode, lower_inode);
@@ -590,8 +585,8 @@ out:
* with as much information as it can before needing
* the lower filesystem.
* ecryptfs_read_super(): this accesses the lower filesystem and uses
- * ecryptfs_interpolate to perform most of the linking
- * ecryptfs_interpolate(): links the lower filesystem into ecryptfs
+ * ecryptfs_interpose to perform most of the linking
+ * ecryptfs_interpose(): links the lower filesystem into ecryptfs (inode.c)
*/
static int ecryptfs_get_sb(struct file_system_type *fs_type, int flags,
const char *dev_name, void *raw_data,
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 8b47e4200e6..7758cc382ef 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -135,26 +135,71 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
return events;
}
-static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
- loff_t *ppos)
+static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+{
+ *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
+ ctx->count -= *cnt;
+}
+
+/**
+ * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
+ * @ctx: [in] Pointer to eventfd context.
+ * @wait: [in] Wait queue to be removed.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN : The operation would have blocked.
+ *
+ * This is used to atomically remove a wait queue entry from the eventfd wait
+ * queue head, and read/reset the counter value.
+ */
+int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_t *wait,
+ __u64 *cnt)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->wqh.lock, flags);
+ eventfd_ctx_do_read(ctx, cnt);
+ __remove_wait_queue(&ctx->wqh, wait);
+ if (*cnt != 0 && waitqueue_active(&ctx->wqh))
+ wake_up_locked_poll(&ctx->wqh, POLLOUT);
+ spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+
+ return *cnt != 0 ? 0 : -EAGAIN;
+}
+EXPORT_SYMBOL_GPL(eventfd_ctx_remove_wait_queue);
+
+/**
+ * eventfd_ctx_read - Reads the eventfd counter or wait if it is zero.
+ * @ctx: [in] Pointer to eventfd context.
+ * @no_wait: [in] Different from zero if the operation should not block.
+ * @cnt: [out] Pointer to the 64bit conter value.
+ *
+ * Returns zero if successful, or the following error codes:
+ *
+ * -EAGAIN : The operation would have blocked but @no_wait was nonzero.
+ * -ERESTARTSYS : A signal interrupted the wait operation.
+ *
+ * If @no_wait is zero, the function might sleep until the eventfd internal
+ * counter becomes greater than zero.
+ */
+ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt)
{
- struct eventfd_ctx *ctx = file->private_data;
ssize_t res;
- __u64 ucnt = 0;
DECLARE_WAITQUEUE(wait, current);
- if (count < sizeof(ucnt))
- return -EINVAL;
spin_lock_irq(&ctx->wqh.lock);
+ *cnt = 0;
res = -EAGAIN;
if (ctx->count > 0)
- res = sizeof(ucnt);
- else if (!(file->f_flags & O_NONBLOCK)) {
+ res = 0;
+ else if (!no_wait) {
__add_wait_queue(&ctx->wqh, &wait);
- for (res = 0;;) {
+ for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
if (ctx->count > 0) {
- res = sizeof(ucnt);
+ res = 0;
break;
}
if (signal_pending(current)) {
@@ -168,18 +213,32 @@ static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
__remove_wait_queue(&ctx->wqh, &wait);
__set_current_state(TASK_RUNNING);
}
- if (likely(res > 0)) {
- ucnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
- ctx->count -= ucnt;
+ if (likely(res == 0)) {
+ eventfd_ctx_do_read(ctx, cnt);
if (waitqueue_active(&ctx->wqh))
wake_up_locked_poll(&ctx->wqh, POLLOUT);
}
spin_unlock_irq(&ctx->wqh.lock);
- if (res > 0 && put_user(ucnt, (__u64 __user *) buf))
- return -EFAULT;
return res;
}
+EXPORT_SYMBOL_GPL(eventfd_ctx_read);
+
+static ssize_t eventfd_read(struct file *file, char __user *buf, size_t count,
+ loff_t *ppos)
+{
+ struct eventfd_ctx *ctx = file->private_data;
+ ssize_t res;
+ __u64 cnt;
+
+ if (count < sizeof(cnt))
+ return -EINVAL;
+ res = eventfd_ctx_read(ctx, file->f_flags & O_NONBLOCK, &cnt);
+ if (res < 0)
+ return res;
+
+ return put_user(cnt, (__u64 __user *) buf) ? -EFAULT : sizeof(cnt);
+}
static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t count,
loff_t *ppos)
@@ -339,7 +398,7 @@ struct file *eventfd_file_create(unsigned int count, int flags)
ctx->flags = flags;
file = anon_inode_getfile("[eventfd]", &eventfd_fops, ctx,
- flags & EFD_SHARED_FCNTL_FLAGS);
+ O_RDWR | (flags & EFD_SHARED_FCNTL_FLAGS));
if (IS_ERR(file))
eventfd_free_ctx(ctx);
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 366c503f965..bd056a5b4ef 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1206,7 +1206,7 @@ SYSCALL_DEFINE1(epoll_create1, int, flags)
* a file structure and a free file descriptor.
*/
error = anon_inode_getfd("[eventpoll]", &eventpoll_fops, ep,
- flags & O_CLOEXEC);
+ O_RDWR | (flags & O_CLOEXEC));
if (error < 0)
ep_free(ep);
diff --git a/fs/exec.c b/fs/exec.c
index 623a5cc3076..cce6bbdbdbb 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -571,6 +571,9 @@ int setup_arg_pages(struct linux_binprm *bprm,
struct vm_area_struct *prev = NULL;
unsigned long vm_flags;
unsigned long stack_base;
+ unsigned long stack_size;
+ unsigned long stack_expand;
+ unsigned long rlim_stack;
#ifdef CONFIG_STACK_GROWSUP
/* Limit stack size to 1GB */
@@ -627,10 +630,23 @@ int setup_arg_pages(struct linux_binprm *bprm,
goto out_unlock;
}
+ stack_expand = EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+ stack_size = vma->vm_end - vma->vm_start;
+ /*
+ * Align this down to a page boundary as expand_stack
+ * will align it up.
+ */
+ rlim_stack = rlimit(RLIMIT_STACK) & PAGE_MASK;
#ifdef CONFIG_STACK_GROWSUP
- stack_base = vma->vm_end + EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+ if (stack_size + stack_expand > rlim_stack)
+ stack_base = vma->vm_start + rlim_stack;
+ else
+ stack_base = vma->vm_end + stack_expand;
#else
- stack_base = vma->vm_start - EXTRA_STACK_VM_PAGES * PAGE_SIZE;
+ if (stack_size + stack_expand > rlim_stack)
+ stack_base = vma->vm_end - rlim_stack;
+ else
+ stack_base = vma->vm_start - stack_expand;
#endif
ret = expand_stack(vma, stack_base);
if (ret)
@@ -826,7 +842,9 @@ static int de_thread(struct task_struct *tsk)
attach_pid(tsk, PIDTYPE_PID, task_pid(leader));
transfer_pid(leader, tsk, PIDTYPE_PGID);
transfer_pid(leader, tsk, PIDTYPE_SID);
+
list_replace_rcu(&leader->tasks, &tsk->tasks);
+ list_replace_init(&leader->sibling, &tsk->sibling);
tsk->group_leader = tsk;
leader->group_leader = tsk;
@@ -939,9 +957,7 @@ void set_task_comm(struct task_struct *tsk, char *buf)
int flush_old_exec(struct linux_binprm * bprm)
{
- char * name;
- int i, ch, retval;
- char tcomm[sizeof(current->comm)];
+ int retval;
/*
* Make sure we have a private signal table and that
@@ -962,6 +978,25 @@ int flush_old_exec(struct linux_binprm * bprm)
bprm->mm = NULL; /* We're using it now */
+ current->flags &= ~PF_RANDOMIZE;
+ flush_thread();
+ current->personality &= ~bprm->per_clear;
+
+ return 0;
+
+out:
+ return retval;
+}
+EXPORT_SYMBOL(flush_old_exec);
+
+void setup_new_exec(struct linux_binprm * bprm)
+{
+ int i, ch;
+ char * name;
+ char tcomm[sizeof(current->comm)];
+
+ arch_pick_mmap_layout(current->mm);
+
/* This is the point of no return */
current->sas_ss_sp = current->sas_ss_size = 0;
@@ -983,9 +1018,6 @@ int flush_old_exec(struct linux_binprm * bprm)
tcomm[i] = '\0';
set_task_comm(current, tcomm);
- current->flags &= ~PF_RANDOMIZE;
- flush_thread();
-
/* Set the new mm task size. We have to do that late because it may
* depend on TIF_32BIT which is only updated in flush_thread() on
* some architectures like powerpc
@@ -1001,8 +1033,6 @@ int flush_old_exec(struct linux_binprm * bprm)
set_dumpable(current->mm, suid_dumpable);
}
- current->personality &= ~bprm->per_clear;
-
/*
* Flush performance counters when crossing a
* security domain:
@@ -1017,14 +1047,8 @@ int flush_old_exec(struct linux_binprm * bprm)
flush_signal_handlers(current, 0);
flush_old_files(current->files);
-
- return 0;
-
-out:
- return retval;
}
-
-EXPORT_SYMBOL(flush_old_exec);
+EXPORT_SYMBOL(setup_new_exec);
/*
* Prepare credentials and lock ->cred_guard_mutex.
@@ -1761,17 +1785,20 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
struct mm_struct *mm = current->mm;
struct linux_binfmt * binfmt;
struct inode * inode;
- struct file * file;
const struct cred *old_cred;
struct cred *cred;
int retval = 0;
int flag = 0;
int ispipe = 0;
- unsigned long core_limit = current->signal->rlim[RLIMIT_CORE].rlim_cur;
char **helper_argv = NULL;
int helper_argc = 0;
int dump_count = 0;
static atomic_t core_dump_count = ATOMIC_INIT(0);
+ struct coredump_params cprm = {
+ .signr = signr,
+ .regs = regs,
+ .limit = current->signal->rlim[RLIMIT_CORE].rlim_cur,
+ };
audit_core_dumps(signr);
@@ -1827,15 +1854,15 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
ispipe = format_corename(corename, signr);
unlock_kernel();
- if ((!ispipe) && (core_limit < binfmt->min_coredump))
+ if ((!ispipe) && (cprm.limit < binfmt->min_coredump))
goto fail_unlock;
if (ispipe) {
- if (core_limit == 0) {
+ if (cprm.limit == 0) {
/*
* Normally core limits are irrelevant to pipes, since
* we're not writing to the file system, but we use
- * core_limit of 0 here as a speacial value. Any
+ * cprm.limit of 0 here as a speacial value. Any
* non-zero limit gets set to RLIM_INFINITY below, but
* a limit of 0 skips the dump. This is a consistent
* way to catch recursive crashes. We can still crash
@@ -1868,25 +1895,25 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
goto fail_dropcount;
}
- core_limit = RLIM_INFINITY;
+ cprm.limit = RLIM_INFINITY;
/* SIGPIPE can happen, but it's just never processed */
if (call_usermodehelper_pipe(helper_argv[0], helper_argv, NULL,
- &file)) {
+ &cprm.file)) {
printk(KERN_INFO "Core dump to %s pipe failed\n",
corename);
goto fail_dropcount;
}
} else
- file = filp_open(corename,
+ cprm.file = filp_open(corename,
O_CREAT | 2 | O_NOFOLLOW | O_LARGEFILE | flag,
0600);
- if (IS_ERR(file))
+ if (IS_ERR(cprm.file))
goto fail_dropcount;
- inode = file->f_path.dentry->d_inode;
+ inode = cprm.file->f_path.dentry->d_inode;
if (inode->i_nlink > 1)
goto close_fail; /* multiple links - don't dump */
- if (!ispipe && d_unhashed(file->f_path.dentry))
+ if (!ispipe && d_unhashed(cprm.file->f_path.dentry))
goto close_fail;
/* AK: actually i see no reason to not allow this for named pipes etc.,
@@ -1899,21 +1926,22 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs)
*/
if (inode->i_uid != current_fsuid())
goto close_fail;
- if (!file->f_op)
+ if (!cprm.file->f_op)
goto close_fail;
- if (!file->f_op->write)
+ if (!cprm.file->f_op->write)
goto close_fail;
- if (!ispipe && do_truncate(file->f_path.dentry, 0, 0, file) != 0)
+ if (!ispipe &&
+ do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file) != 0)
goto close_fail;
- retval = binfmt->core_dump(signr, regs, file, core_limit);
+ retval = binfmt->core_dump(&cprm);
if (retval)
current->signal->group_exit_code |= 0x80;
close_fail:
if (ispipe && core_pipe_limit)
- wait_for_dump_helpers(file);
- filp_close(file, NULL);
+ wait_for_dump_helpers(cprm.file);
+ filp_close(cprm.file, NULL);
fail_dropcount:
if (dump_count)
atomic_dec(&core_dump_count);
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 698a8636d39..2afbcebeda7 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -738,13 +738,28 @@ static int exofs_write_begin_export(struct file *file,
fsdata);
}
+static int exofs_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied,
+ struct page *page, void *fsdata)
+{
+ struct inode *inode = mapping->host;
+ /* According to comment in simple_write_end i_mutex is held */
+ loff_t i_size = inode->i_size;
+ int ret;
+
+ ret = simple_write_end(file, mapping,pos, len, copied, page, fsdata);
+ if (i_size != inode->i_size)
+ mark_inode_dirty(inode);
+ return ret;
+}
+
const struct address_space_operations exofs_aops = {
.readpage = exofs_readpage,
.readpages = exofs_readpages,
.writepage = exofs_writepage,
.writepages = exofs_writepages,
.write_begin = exofs_write_begin_export,
- .write_end = simple_write_end,
+ .write_end = exofs_write_end,
};
/******************************************************************************
diff --git a/fs/exofs/pnfs.h b/fs/exofs/pnfs.h
index 423033addd1..c52e9888b8a 100644
--- a/fs/exofs/pnfs.h
+++ b/fs/exofs/pnfs.h
@@ -15,13 +15,7 @@
#ifndef __EXOFS_PNFS_H__
#define __EXOFS_PNFS_H__
-#if defined(CONFIG_PNFS)
-
-
-/* FIXME: move this file to: linux/exportfs/pnfs_osd_xdr.h */
-#include "../nfs/objlayout/pnfs_osd_xdr.h"
-
-#else /* defined(CONFIG_PNFS) */
+#if ! defined(__PNFS_OSD_XDR_H__)
enum pnfs_iomode {
IOMODE_READ = 1,
@@ -46,6 +40,6 @@ struct pnfs_osd_data_map {
u32 odm_raid_algorithm;
};
-#endif /* else defined(CONFIG_PNFS) */
+#endif /* ! defined(__PNFS_OSD_XDR_H__) */
#endif /* __EXOFS_PNFS_H__ */
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 197c7db583c..e9e175949a6 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -6,7 +6,7 @@
* and for mapping back from file handles to dentries.
*
* For details on why we do all the strange and hairy things in here
- * take a look at Documentation/filesystems/Exporting.
+ * take a look at Documentation/filesystems/nfs/Exporting.
*/
#include <linux/exportfs.h>
#include <linux/fs.h>
diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c
index a63d44256a7..a99e54318c3 100644
--- a/fs/ext2/acl.c
+++ b/fs/ext2/acl.c
@@ -339,12 +339,12 @@ ext2_acl_chmod(struct inode *inode)
* Extended attribut handlers
*/
static size_t
-ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext2_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return 0;
if (list && size <= list_size)
memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -352,12 +352,12 @@ ext2_xattr_list_acl_access(struct inode *inode, char *list, size_t list_size,
}
static size_t
-ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext2_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return 0;
if (list && size <= list_size)
memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -365,15 +365,18 @@ ext2_xattr_list_acl_default(struct inode *inode, char *list, size_t list_size,
}
static int
-ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext2_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
{
struct posix_acl *acl;
int error;
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return -EOPNOTSUPP;
- acl = ext2_get_acl(inode, type);
+ acl = ext2_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -385,33 +388,17 @@ ext2_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
}
static int
-ext2_xattr_get_acl_access(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext2_xattr_get_acl_default(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
- size_t size)
+ext2_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags, int type)
{
struct posix_acl *acl;
int error;
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return -EOPNOTSUPP;
- if (!is_owner_or_cap(inode))
+ if (!is_owner_or_cap(dentry->d_inode))
return -EPERM;
if (value) {
@@ -426,41 +413,25 @@ ext2_xattr_set_acl(struct inode *inode, int type, const void *value,
} else
acl = NULL;
- error = ext2_set_acl(inode, type, acl);
+ error = ext2_set_acl(dentry->d_inode, type, acl);
release_and_out:
posix_acl_release(acl);
return error;
}
-static int
-ext2_xattr_set_acl_access(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext2_xattr_set_acl_default(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
struct xattr_handler ext2_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
.list = ext2_xattr_list_acl_access,
- .get = ext2_xattr_get_acl_access,
- .set = ext2_xattr_set_acl_access,
+ .get = ext2_xattr_get_acl,
+ .set = ext2_xattr_set_acl,
};
struct xattr_handler ext2_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
.list = ext2_xattr_list_acl_default,
- .get = ext2_xattr_get_acl_default,
- .set = ext2_xattr_set_acl_default,
+ .get = ext2_xattr_get_acl,
+ .set = ext2_xattr_set_acl,
};
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index 7913531ec6d..904f00642f8 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -60,6 +60,7 @@
#include <linux/mbcache.h>
#include <linux/quotaops.h>
#include <linux/rwsem.h>
+#include <linux/security.h>
#include "ext2.h"
#include "xattr.h"
#include "acl.h"
@@ -249,8 +250,9 @@ cleanup:
* used / required on success.
*/
static int
-ext2_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
+ struct inode *inode = dentry->d_inode;
struct buffer_head *bh = NULL;
struct ext2_xattr_entry *entry;
char *end;
@@ -300,9 +302,10 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
ext2_xattr_handler(entry->e_name_index);
if (handler) {
- size_t size = handler->list(inode, buffer, rest,
+ size_t size = handler->list(dentry, buffer, rest,
entry->e_name,
- entry->e_name_len);
+ entry->e_name_len,
+ handler->flags);
if (buffer) {
if (size > rest) {
error = -ERANGE;
@@ -330,7 +333,7 @@ cleanup:
ssize_t
ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
- return ext2_xattr_list(dentry->d_inode, buffer, size);
+ return ext2_xattr_list(dentry, buffer, size);
}
/*
diff --git a/fs/ext2/xattr_security.c b/fs/ext2/xattr_security.c
index 70c0dbdcdcb..c8155845ac0 100644
--- a/fs/ext2/xattr_security.c
+++ b/fs/ext2/xattr_security.c
@@ -11,8 +11,8 @@
#include "xattr.h"
static size_t
-ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext2_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const int prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
@@ -26,22 +26,22 @@ ext2_xattr_security_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext2_xattr_security_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext2_xattr_security_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext2_xattr_get(inode, EXT2_XATTR_INDEX_SECURITY, name,
+ return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
buffer, size);
}
static int
-ext2_xattr_security_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext2_xattr_security_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext2_xattr_set(inode, EXT2_XATTR_INDEX_SECURITY, name,
+ return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_SECURITY, name,
value, size, flags);
}
diff --git a/fs/ext2/xattr_trusted.c b/fs/ext2/xattr_trusted.c
index e8219f8eae9..2a26d71f477 100644
--- a/fs/ext2/xattr_trusted.c
+++ b/fs/ext2/xattr_trusted.c
@@ -13,8 +13,8 @@
#include "xattr.h"
static size_t
-ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext2_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const int prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
@@ -31,22 +31,22 @@ ext2_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext2_xattr_trusted_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext2_xattr_trusted_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext2_xattr_get(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+ return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
buffer, size);
}
static int
-ext2_xattr_trusted_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext2_xattr_trusted_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext2_xattr_set(inode, EXT2_XATTR_INDEX_TRUSTED, name,
+ return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_TRUSTED, name,
value, size, flags);
}
diff --git a/fs/ext2/xattr_user.c b/fs/ext2/xattr_user.c
index 92495d28c62..3f6caf3684b 100644
--- a/fs/ext2/xattr_user.c
+++ b/fs/ext2/xattr_user.c
@@ -12,13 +12,13 @@
#include "xattr.h"
static size_t
-ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext2_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return 0;
if (list && total_len <= list_size) {
@@ -30,27 +30,28 @@ ext2_xattr_user_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext2_xattr_user_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext2_xattr_user_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
- return ext2_xattr_get(inode, EXT2_XATTR_INDEX_USER, name, buffer, size);
+ return ext2_xattr_get(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+ name, buffer, size);
}
static int
-ext2_xattr_user_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext2_xattr_user_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
- return ext2_xattr_set(inode, EXT2_XATTR_INDEX_USER, name,
- value, size, flags);
+ return ext2_xattr_set(dentry->d_inode, EXT2_XATTR_INDEX_USER,
+ name, value, size, flags);
}
struct xattr_handler ext2_xattr_user_handler = {
diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c
index c9b0df376b5..82ba3415866 100644
--- a/fs/ext3/acl.c
+++ b/fs/ext3/acl.c
@@ -366,12 +366,12 @@ out:
* Extended attribute handlers
*/
static size_t
-ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
- const char *name, size_t name_len)
+ext3_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+ const char *name, size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -379,12 +379,12 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
}
static size_t
-ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
- const char *name, size_t name_len)
+ext3_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+ const char *name, size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -392,15 +392,18 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
}
static int
-ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext3_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
{
struct posix_acl *acl;
int error;
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return -EOPNOTSUPP;
- acl = ext3_get_acl(inode, type);
+ acl = ext3_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -412,31 +415,16 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
}
static int
-ext3_xattr_get_acl_access(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext3_xattr_get_acl_default(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
- size_t size)
+ext3_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags, int type)
{
+ struct inode *inode = dentry->d_inode;
handle_t *handle;
struct posix_acl *acl;
int error, retries = 0;
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
if (!test_opt(inode->i_sb, POSIX_ACL))
return -EOPNOTSUPP;
if (!is_owner_or_cap(inode))
@@ -468,34 +456,18 @@ release_and_out:
return error;
}
-static int
-ext3_xattr_set_acl_access(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext3_xattr_set_acl_default(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
struct xattr_handler ext3_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
.list = ext3_xattr_list_acl_access,
- .get = ext3_xattr_get_acl_access,
- .set = ext3_xattr_set_acl_access,
+ .get = ext3_xattr_get_acl,
+ .set = ext3_xattr_set_acl,
};
struct xattr_handler ext3_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
.list = ext3_xattr_list_acl_default,
- .get = ext3_xattr_get_acl_default,
- .set = ext3_xattr_set_acl_default,
+ .get = ext3_xattr_get_acl,
+ .set = ext3_xattr_set_acl,
};
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ad14227f509..455e6e6e5cb 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -970,7 +970,7 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
if (max_blocks > DIO_MAX_BLOCKS)
max_blocks = DIO_MAX_BLOCKS;
handle = ext3_journal_start(inode, DIO_CREDITS +
- 2 * EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb));
+ EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb));
if (IS_ERR(handle)) {
ret = PTR_ERR(handle);
goto out;
@@ -3146,8 +3146,8 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
/* (user+group)*(old+new) structure, inode write (sb,
* inode block, ? - but truncate inode update has it) */
- handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
- EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+ handle = ext3_journal_start(inode, EXT3_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
+ EXT3_MAXQUOTAS_DEL_BLOCKS(inode->i_sb)+3);
if (IS_ERR(handle)) {
error = PTR_ERR(handle);
goto err_out;
@@ -3239,7 +3239,7 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
#ifdef CONFIG_QUOTA
/* We know that structure was already allocated during vfs_dq_init so
* we will be updating only the data blocks + inodes */
- ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+ ret += EXT3_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
#endif
return ret;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c
index aad6400c9b7..7b0e44f7d66 100644
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -1699,7 +1699,7 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1733,7 +1733,7 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1769,7 +1769,7 @@ static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
@@ -1920,7 +1920,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
struct ext3_iloc iloc;
int err = 0, rc;
- lock_super(sb);
+ mutex_lock(&EXT3_SB(sb)->s_orphan_lock);
if (!list_empty(&EXT3_I(inode)->i_orphan))
goto out_unlock;
@@ -1929,9 +1929,13 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
/* @@@ FIXME: Observation from aviro:
* I think I can trigger J_ASSERT in ext3_orphan_add(). We block
- * here (on lock_super()), so race with ext3_link() which might bump
+ * here (on s_orphan_lock), so race with ext3_link() which might bump
* ->i_nlink. For, say it, character device. Not a regular file,
* not a directory, not a symlink and ->i_nlink > 0.
+ *
+ * tytso, 4/25/2009: I'm not sure how that could happen;
+ * shouldn't the fs core protect us from these sort of
+ * unlink()/link() races?
*/
J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
@@ -1968,7 +1972,7 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
jbd_debug(4, "orphan inode %lu will point to %d\n",
inode->i_ino, NEXT_ORPHAN(inode));
out_unlock:
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_orphan_lock);
ext3_std_error(inode->i_sb, err);
return err;
}
@@ -1986,11 +1990,9 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
struct ext3_iloc iloc;
int err = 0;
- lock_super(inode->i_sb);
- if (list_empty(&ei->i_orphan)) {
- unlock_super(inode->i_sb);
- return 0;
- }
+ mutex_lock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
+ if (list_empty(&ei->i_orphan))
+ goto out;
ino_next = NEXT_ORPHAN(inode);
prev = ei->i_orphan.prev;
@@ -2040,7 +2042,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
out_err:
ext3_std_error(inode->i_sb, err);
out:
- unlock_super(inode->i_sb);
+ mutex_unlock(&EXT3_SB(inode->i_sb)->s_orphan_lock);
return err;
out_brelse:
@@ -2175,7 +2177,7 @@ static int ext3_symlink (struct inode * dir,
retry:
handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
- 2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+ EXT3_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
if (IS_ERR(handle))
return PTR_ERR(handle);
diff --git a/fs/ext3/resize.c b/fs/ext3/resize.c
index 5f83b617917..54351ac7cef 100644
--- a/fs/ext3/resize.c
+++ b/fs/ext3/resize.c
@@ -209,7 +209,7 @@ static int setup_new_group_blocks(struct super_block *sb,
if (IS_ERR(handle))
return PTR_ERR(handle);
- lock_super(sb);
+ mutex_lock(&sbi->s_resize_lock);
if (input->group != sbi->s_groups_count) {
err = -EBUSY;
goto exit_journal;
@@ -324,7 +324,7 @@ exit_bh:
brelse(bh);
exit_journal:
- unlock_super(sb);
+ mutex_unlock(&sbi->s_resize_lock);
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
@@ -662,11 +662,12 @@ exit_free:
* important part is that the new block and inode counts are in the backup
* superblocks, and the location of the new group metadata in the GDT backups.
*
- * We do not need lock_super() for this, because these blocks are not
- * otherwise touched by the filesystem code when it is mounted. We don't
- * need to worry about last changing from sbi->s_groups_count, because the
- * worst that can happen is that we do not copy the full number of backups
- * at this time. The resize which changed s_groups_count will backup again.
+ * We do not need take the s_resize_lock for this, because these
+ * blocks are not otherwise touched by the filesystem code when it is
+ * mounted. We don't need to worry about last changing from
+ * sbi->s_groups_count, because the worst that can happen is that we
+ * do not copy the full number of backups at this time. The resize
+ * which changed s_groups_count will backup again.
*/
static void update_backups(struct super_block *sb,
int blk_off, char *data, int size)
@@ -825,7 +826,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
goto exit_put;
}
- lock_super(sb);
+ mutex_lock(&sbi->s_resize_lock);
if (input->group != sbi->s_groups_count) {
ext3_warning(sb, __func__,
"multiple resizers run on filesystem!");
@@ -856,7 +857,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
/*
* OK, now we've set up the new group. Time to make it active.
*
- * Current kernels don't lock all allocations via lock_super(),
+ * We do not lock all allocations via s_resize_lock
* so we have to be safe wrt. concurrent accesses the group
* data. So we need to be careful to set all of the relevant
* group descriptor data etc. *before* we enable the group.
@@ -900,12 +901,12 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
*
* The precise rules we use are:
*
- * * Writers of s_groups_count *must* hold lock_super
+ * * Writers of s_groups_count *must* hold s_resize_lock
* AND
* * Writers must perform a smp_wmb() after updating all dependent
* data and before modifying the groups count
*
- * * Readers must hold lock_super() over the access
+ * * Readers must hold s_resize_lock over the access
* OR
* * Readers must perform an smp_rmb() after reading the groups count
* and before reading any dependent data.
@@ -936,7 +937,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
ext3_journal_dirty_metadata(handle, sbi->s_sbh);
exit_journal:
- unlock_super(sb);
+ mutex_unlock(&sbi->s_resize_lock);
if ((err2 = ext3_journal_stop(handle)) && !err)
err = err2;
if (!err) {
@@ -973,7 +974,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
/* We don't need to worry about locking wrt other resizers just
* yet: we're going to revalidate es->s_blocks_count after
- * taking lock_super() below. */
+ * taking the s_resize_lock below. */
o_blocks_count = le32_to_cpu(es->s_blocks_count);
o_groups_count = EXT3_SB(sb)->s_groups_count;
@@ -1045,11 +1046,11 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
goto exit_put;
}
- lock_super(sb);
+ mutex_lock(&EXT3_SB(sb)->s_resize_lock);
if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
ext3_warning(sb, __func__,
"multiple resizers run on filesystem!");
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
ext3_journal_stop(handle);
err = -EBUSY;
goto exit_put;
@@ -1059,13 +1060,13 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
EXT3_SB(sb)->s_sbh))) {
ext3_warning(sb, __func__,
"error %d on journal write access", err);
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
ext3_journal_stop(handle);
goto exit_put;
}
es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
- unlock_super(sb);
+ mutex_unlock(&EXT3_SB(sb)->s_resize_lock);
ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
o_blocks_count + add);
ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 7ad1e8c30bd..afa2b569da1 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -1928,6 +1928,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
sb->dq_op = &ext3_quota_operations;
#endif
INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+ mutex_init(&sbi->s_orphan_lock);
+ mutex_init(&sbi->s_resize_lock);
sb->s_root = NULL;
@@ -2014,14 +2016,7 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
- /*
- * akpm: core read_super() calls in here with the superblock locked.
- * That deadlocks, because orphan cleanup needs to lock the superblock
- * in numerous places. Here we just pop the lock - it's relatively
- * harmless, because we are now ready to accept write_super() requests,
- * and aviro says that's the only reason for hanging onto the
- * superblock lock.
- */
+
EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
ext3_orphan_cleanup(sb, es);
EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
@@ -2403,13 +2398,11 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
if (journal_flush(journal) < 0)
goto out;
- lock_super(sb);
if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
sb->s_flags & MS_RDONLY) {
EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
ext3_commit_super(sb, es, 1);
}
- unlock_super(sb);
out:
journal_unlock_updates(journal);
@@ -2601,13 +2594,7 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
(sbi->s_mount_state & EXT3_VALID_FS))
es->s_state = cpu_to_le16(sbi->s_mount_state);
- /*
- * We have to unlock super so that we can wait for
- * transactions.
- */
- unlock_super(sb);
ext3_mark_recovery_complete(sb, es);
- lock_super(sb);
} else {
__le32 ret;
if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c
index 387d92d00b9..66895ccf76c 100644
--- a/fs/ext3/xattr.c
+++ b/fs/ext3/xattr.c
@@ -99,7 +99,7 @@ static struct buffer_head *ext3_xattr_cache_find(struct inode *,
struct mb_cache_entry **);
static void ext3_xattr_rehash(struct ext3_xattr_header *,
struct ext3_xattr_entry *);
-static int ext3_xattr_list(struct inode *inode, char *buffer,
+static int ext3_xattr_list(struct dentry *dentry, char *buffer,
size_t buffer_size);
static struct mb_cache *ext3_xattr_cache;
@@ -147,7 +147,7 @@ ext3_xattr_handler(int name_index)
ssize_t
ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
- return ext3_xattr_list(dentry->d_inode, buffer, size);
+ return ext3_xattr_list(dentry, buffer, size);
}
static int
@@ -332,7 +332,7 @@ ext3_xattr_get(struct inode *inode, int name_index, const char *name,
}
static int
-ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+ext3_xattr_list_entries(struct dentry *dentry, struct ext3_xattr_entry *entry,
char *buffer, size_t buffer_size)
{
size_t rest = buffer_size;
@@ -342,9 +342,10 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
ext3_xattr_handler(entry->e_name_index);
if (handler) {
- size_t size = handler->list(inode, buffer, rest,
+ size_t size = handler->list(dentry, buffer, rest,
entry->e_name,
- entry->e_name_len);
+ entry->e_name_len,
+ handler->flags);
if (buffer) {
if (size > rest)
return -ERANGE;
@@ -357,8 +358,9 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
}
static int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
+ struct inode *inode = dentry->d_inode;
struct buffer_head *bh = NULL;
int error;
@@ -383,7 +385,7 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
goto cleanup;
}
ext3_xattr_cache_insert(bh);
- error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+ error = ext3_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
cleanup:
brelse(bh);
@@ -392,8 +394,9 @@ cleanup:
}
static int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
+ struct inode *inode = dentry->d_inode;
struct ext3_xattr_ibody_header *header;
struct ext3_inode *raw_inode;
struct ext3_iloc iloc;
@@ -411,7 +414,7 @@ ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
error = ext3_xattr_check_names(IFIRST(header), end);
if (error)
goto cleanup;
- error = ext3_xattr_list_entries(inode, IFIRST(header),
+ error = ext3_xattr_list_entries(dentry, IFIRST(header),
buffer, buffer_size);
cleanup:
@@ -430,12 +433,12 @@ cleanup:
* used / required on success.
*/
static int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext3_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
int i_error, b_error;
- down_read(&EXT3_I(inode)->xattr_sem);
- i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+ down_read(&EXT3_I(dentry->d_inode)->xattr_sem);
+ i_error = ext3_xattr_ibody_list(dentry, buffer, buffer_size);
if (i_error < 0) {
b_error = 0;
} else {
@@ -443,11 +446,11 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
buffer += i_error;
buffer_size -= i_error;
}
- b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+ b_error = ext3_xattr_block_list(dentry, buffer, buffer_size);
if (b_error < 0)
i_error = 0;
}
- up_read(&EXT3_I(inode)->xattr_sem);
+ up_read(&EXT3_I(dentry->d_inode)->xattr_sem);
return i_error + b_error;
}
diff --git a/fs/ext3/xattr_security.c b/fs/ext3/xattr_security.c
index 37b81097bdf..474348788dd 100644
--- a/fs/ext3/xattr_security.c
+++ b/fs/ext3/xattr_security.c
@@ -12,8 +12,8 @@
#include "xattr.h"
static size_t
-ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext3_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext3_xattr_security_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext3_xattr_security_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
- buffer, size);
+ return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+ name, buffer, size);
}
static int
-ext3_xattr_security_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext3_xattr_security_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
- value, size, flags);
+ return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_SECURITY,
+ name, value, size, flags);
}
int
diff --git a/fs/ext3/xattr_trusted.c b/fs/ext3/xattr_trusted.c
index c7c41a410c4..e5562845ed9 100644
--- a/fs/ext3/xattr_trusted.c
+++ b/fs/ext3/xattr_trusted.c
@@ -14,8 +14,8 @@
#include "xattr.h"
static size_t
-ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext3_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
@@ -32,22 +32,22 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext3_xattr_trusted_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext3_xattr_trusted_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
- buffer, size);
+ return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED,
+ name, buffer, size);
}
static int
-ext3_xattr_trusted_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext3_xattr_trusted_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+ return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_TRUSTED, name,
value, size, flags);
}
diff --git a/fs/ext3/xattr_user.c b/fs/ext3/xattr_user.c
index 430fe63b31b..3bcfe9ee0a6 100644
--- a/fs/ext3/xattr_user.c
+++ b/fs/ext3/xattr_user.c
@@ -13,13 +13,13 @@
#include "xattr.h"
static size_t
-ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext3_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return 0;
if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext3_xattr_user_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext3_xattr_user_get(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
- return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+ return ext3_xattr_get(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+ name, buffer, size);
}
static int
-ext3_xattr_user_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext3_xattr_user_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
- return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
- value, size, flags);
+ return ext3_xattr_set(dentry->d_inode, EXT3_XATTR_INDEX_USER,
+ name, value, size, flags);
}
struct xattr_handler ext3_xattr_user_handler = {
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index e5f6774846e..9ed1bb1f319 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -2,7 +2,6 @@ config EXT4_FS
tristate "The Extended 4 (ext4) filesystem"
select JBD2
select CRC16
- select FS_JOURNAL_INFO
help
This is the next generation of the ext3 filesystem.
@@ -29,6 +28,7 @@ config EXT4_FS
config EXT4_USE_FOR_EXT23
bool "Use ext4 for ext2/ext3 file systems"
+ depends on EXT4_FS
depends on EXT3_FS=n || EXT2_FS=n
default y
help
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 0df88b2a69b..8a2a29d35a6 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -364,12 +364,12 @@ out:
* Extended attribute handlers
*/
static size_t
-ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
- const char *name, size_t name_len)
+ext4_xattr_list_acl_access(struct dentry *dentry, char *list, size_t list_len,
+ const char *name, size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -377,12 +377,12 @@ ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
}
static size_t
-ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
- const char *name, size_t name_len)
+ext4_xattr_list_acl_default(struct dentry *dentry, char *list, size_t list_len,
+ const char *name, size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return 0;
if (list && size <= list_len)
memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -390,15 +390,18 @@ ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
}
static int
-ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext4_xattr_get_acl(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
{
struct posix_acl *acl;
int error;
- if (!test_opt(inode->i_sb, POSIX_ACL))
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+ if (!test_opt(dentry->d_sb, POSIX_ACL))
return -EOPNOTSUPP;
- acl = ext4_get_acl(inode, type);
+ acl = ext4_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -410,31 +413,16 @@ ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
}
static int
-ext4_xattr_get_acl_access(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-ext4_xattr_get_acl_default(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
- size_t size)
+ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags, int type)
{
+ struct inode *inode = dentry->d_inode;
handle_t *handle;
struct posix_acl *acl;
int error, retries = 0;
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
if (!test_opt(inode->i_sb, POSIX_ACL))
return -EOPNOTSUPP;
if (!is_owner_or_cap(inode))
@@ -466,34 +454,18 @@ release_and_out:
return error;
}
-static int
-ext4_xattr_set_acl_access(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int
-ext4_xattr_set_acl_default(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
struct xattr_handler ext4_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
.list = ext4_xattr_list_acl_access,
- .get = ext4_xattr_get_acl_access,
- .set = ext4_xattr_set_acl_access,
+ .get = ext4_xattr_get_acl,
+ .set = ext4_xattr_set_acl,
};
struct xattr_handler ext4_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
.list = ext4_xattr_list_acl_default,
- .get = ext4_xattr_get_acl_default,
- .set = ext4_xattr_set_acl_default,
+ .get = ext4_xattr_get_acl,
+ .set = ext4_xattr_set_acl,
};
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 4df8621ec31..a60ab9aad57 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -16,7 +16,6 @@
#include <linux/module.h>
#include <linux/swap.h>
#include <linux/pagemap.h>
-#include <linux/version.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include "ext4.h"
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index ab31e65d46d..874d169a193 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -361,14 +361,11 @@ struct ext4_new_group_data {
so set the magic i_delalloc_reserve_flag after taking the
inode allocation semaphore for */
#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
- /* Call ext4_da_update_reserve_space() after successfully
- allocating the blocks */
-#define EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE 0x0008
/* caller is from the direct IO path, request to creation of an
unitialized extents if not allocated, split the uninitialized
extent if blocks has been preallocated already*/
-#define EXT4_GET_BLOCKS_DIO 0x0010
-#define EXT4_GET_BLOCKS_CONVERT 0x0020
+#define EXT4_GET_BLOCKS_DIO 0x0008
+#define EXT4_GET_BLOCKS_CONVERT 0x0010
#define EXT4_GET_BLOCKS_DIO_CREATE_EXT (EXT4_GET_BLOCKS_DIO|\
EXT4_GET_BLOCKS_CREATE_UNINIT_EXT)
/* Convert extent to initialized after direct IO complete */
@@ -699,11 +696,17 @@ struct ext4_inode_info {
unsigned int i_reserved_meta_blocks;
unsigned int i_allocated_meta_blocks;
unsigned short i_delalloc_reserved_flag;
+ sector_t i_da_metadata_calc_last_lblock;
+ int i_da_metadata_calc_len;
/* on-disk additional length */
__u16 i_extra_isize;
spinlock_t i_block_reservation_lock;
+#ifdef CONFIG_QUOTA
+ /* quota space reservation, managed internally by quota code */
+ qsize_t i_reserved_quota;
+#endif
/* completed async DIOs that might need unwritten extents handling */
struct list_head i_aio_dio_complete_list;
@@ -1435,8 +1438,10 @@ extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
extern int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from);
extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
-extern qsize_t ext4_get_reserved_space(struct inode *inode);
+extern qsize_t *ext4_get_reserved_space(struct inode *inode);
extern int flush_aio_dio_completed_IO(struct inode *inode);
+extern void ext4_da_update_reserve_space(struct inode *inode,
+ int used, int quota_claim);
/* ioctl.c */
extern long ext4_ioctl(struct file *, unsigned int, unsigned long);
extern long ext4_compat_ioctl(struct file *, unsigned int, unsigned long);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2ca686454e8..bdb6ce7e2eb 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -225,7 +225,8 @@ static inline void ext4_ext_mark_initialized(struct ext4_extent *ext)
ext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ext));
}
-extern int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks);
+extern int ext4_ext_calc_metadata_amount(struct inode *inode,
+ sector_t lblocks);
extern ext4_fsblk_t ext_pblock(struct ext4_extent *ex);
extern ext4_fsblk_t idx_pblock(struct ext4_extent_idx *);
extern void ext4_ext_store_pblock(struct ext4_extent *, ext4_fsblk_t);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3a7928f825e..765a4826b11 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -296,29 +296,44 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
* to allocate @blocks
* Worse case is one block per extent
*/
-int ext4_ext_calc_metadata_amount(struct inode *inode, int blocks)
+int ext4_ext_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- int lcap, icap, rcap, leafs, idxs, num;
- int newextents = blocks;
-
- rcap = ext4_ext_space_root_idx(inode, 0);
- lcap = ext4_ext_space_block(inode, 0);
- icap = ext4_ext_space_block_idx(inode, 0);
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int idxs, num = 0;
- /* number of new leaf blocks needed */
- num = leafs = (newextents + lcap - 1) / lcap;
+ idxs = ((inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+ / sizeof(struct ext4_extent_idx));
/*
- * Worse case, we need separate index block(s)
- * to link all new leaf blocks
+ * If the new delayed allocation block is contiguous with the
+ * previous da block, it can share index blocks with the
+ * previous block, so we only need to allocate a new index
+ * block every idxs leaf blocks. At ldxs**2 blocks, we need
+ * an additional index block, and at ldxs**3 blocks, yet
+ * another index blocks.
*/
- idxs = (leafs + icap - 1) / icap;
- do {
- num += idxs;
- idxs = (idxs + icap - 1) / icap;
- } while (idxs > rcap);
+ if (ei->i_da_metadata_calc_len &&
+ ei->i_da_metadata_calc_last_lblock+1 == lblock) {
+ if ((ei->i_da_metadata_calc_len % idxs) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs)) == 0)
+ num++;
+ if ((ei->i_da_metadata_calc_len % (idxs*idxs*idxs)) == 0) {
+ num++;
+ ei->i_da_metadata_calc_len = 0;
+ } else
+ ei->i_da_metadata_calc_len++;
+ ei->i_da_metadata_calc_last_lblock++;
+ return num;
+ }
- return num;
+ /*
+ * In the worst case we need a new set of index blocks at
+ * every level of the inode's extent tree.
+ */
+ ei->i_da_metadata_calc_len = 1;
+ ei->i_da_metadata_calc_last_lblock = lblock;
+ return ext_depth(inode) + 1;
}
static int
@@ -3023,6 +3038,14 @@ out:
return err;
}
+static void unmap_underlying_metadata_blocks(struct block_device *bdev,
+ sector_t block, int count)
+{
+ int i;
+ for (i = 0; i < count; i++)
+ unmap_underlying_metadata(bdev, block + i);
+}
+
static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
ext4_lblk_t iblock, unsigned int max_blocks,
@@ -3098,6 +3121,30 @@ out:
} else
allocated = ret;
set_buffer_new(bh_result);
+ /*
+ * if we allocated more blocks than requested
+ * we need to make sure we unmap the extra block
+ * allocated. The actual needed block will get
+ * unmapped later when we find the buffer_head marked
+ * new.
+ */
+ if (allocated > max_blocks) {
+ unmap_underlying_metadata_blocks(inode->i_sb->s_bdev,
+ newblock + max_blocks,
+ allocated - max_blocks);
+ allocated = max_blocks;
+ }
+
+ /*
+ * If we have done fallocate with the offset that is already
+ * delayed allocated, we would have block reservation
+ * and quota reservation done in the delayed write path.
+ * But fallocate would have already updated quota and block
+ * count for this offset. So cancel these reservation
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, allocated, 0);
+
map_out:
set_buffer_mapped(bh_result);
out1:
@@ -3190,7 +3237,13 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
* this situation is possible, though, _during_ tree modification;
* this is why assert can't be put in ext4_ext_find_extent()
*/
- BUG_ON(path[depth].p_ext == NULL && depth != 0);
+ if (path[depth].p_ext == NULL && depth != 0) {
+ ext4_error(inode->i_sb, __func__, "bad extent address "
+ "inode: %lu, iblock: %d, depth: %d",
+ inode->i_ino, iblock, depth);
+ err = -EIO;
+ goto out2;
+ }
eh = path[depth].p_hdr;
ex = path[depth].p_ext;
@@ -3327,9 +3380,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
/* previous routine could use block we allocated */
newblock = ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
+ if (allocated > max_blocks)
+ allocated = max_blocks;
set_buffer_new(bh_result);
/*
+ * Update reserved blocks/metadata blocks after successful
+ * block allocation which had been deferred till now.
+ */
+ if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
+ ext4_da_update_reserve_space(inode, allocated, 1);
+
+ /*
* Cache the extent and update transaction to commit on fdatasync only
* when it is _not_ an uninitialized extent.
*/
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e..98bd140aad0 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
return ext4_force_commit(inode->i_sb);
commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
- if (jbd2_log_start_commit(journal, commit_tid))
+ if (jbd2_log_start_commit(journal, commit_tid)) {
+ /*
+ * When the journal is on a different device than the
+ * fs data disk, we need to issue the barrier in
+ * writeback mode. (In ordered mode, the jbd2 layer
+ * will take care of issuing the barrier. In
+ * data=journal, all of the data blocks are written to
+ * the journal device.)
+ */
+ if (ext4_should_writeback_data(inode) &&
+ (journal->j_fs_dev != journal->j_dev) &&
+ (journal->j_flags & JBD2_BARRIER))
+ blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
jbd2_log_wait_commit(journal, commit_tid);
- else if (journal->j_flags & JBD2_BARRIER)
+ } else if (journal->j_flags & JBD2_BARRIER)
blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
return ret;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5352db1a308..e11952404e0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1003,92 +1003,120 @@ out:
return err;
}
-qsize_t ext4_get_reserved_space(struct inode *inode)
+#ifdef CONFIG_QUOTA
+qsize_t *ext4_get_reserved_space(struct inode *inode)
{
- unsigned long long total;
-
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- total = EXT4_I(inode)->i_reserved_data_blocks +
- EXT4_I(inode)->i_reserved_meta_blocks;
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
-
- return (total << inode->i_blkbits);
+ return &EXT4_I(inode)->i_reserved_quota;
}
+#endif
+
/*
* Calculate the number of metadata blocks need to reserve
- * to allocate @blocks for non extent file based file
+ * to allocate a new block at @lblocks for non extent file based file
*/
-static int ext4_indirect_calc_metadata_amount(struct inode *inode, int blocks)
+static int ext4_indirect_calc_metadata_amount(struct inode *inode,
+ sector_t lblock)
{
- int icap = EXT4_ADDR_PER_BLOCK(inode->i_sb);
- int ind_blks, dind_blks, tind_blks;
-
- /* number of new indirect blocks needed */
- ind_blks = (blocks + icap - 1) / icap;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int dind_mask = EXT4_ADDR_PER_BLOCK(inode->i_sb) - 1;
+ int blk_bits;
- dind_blks = (ind_blks + icap - 1) / icap;
+ if (lblock < EXT4_NDIR_BLOCKS)
+ return 0;
- tind_blks = 1;
+ lblock -= EXT4_NDIR_BLOCKS;
- return ind_blks + dind_blks + tind_blks;
+ if (ei->i_da_metadata_calc_len &&
+ (lblock & dind_mask) == ei->i_da_metadata_calc_last_lblock) {
+ ei->i_da_metadata_calc_len++;
+ return 0;
+ }
+ ei->i_da_metadata_calc_last_lblock = lblock & dind_mask;
+ ei->i_da_metadata_calc_len = 1;
+ blk_bits = roundup_pow_of_two(lblock + 1);
+ return (blk_bits / EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb)) + 1;
}
/*
* Calculate the number of metadata blocks need to reserve
- * to allocate given number of blocks
+ * to allocate a block located at @lblock
*/
-static int ext4_calc_metadata_amount(struct inode *inode, int blocks)
+static int ext4_calc_metadata_amount(struct inode *inode, sector_t lblock)
{
- if (!blocks)
- return 0;
-
if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
- return ext4_ext_calc_metadata_amount(inode, blocks);
+ return ext4_ext_calc_metadata_amount(inode, lblock);
- return ext4_indirect_calc_metadata_amount(inode, blocks);
+ return ext4_indirect_calc_metadata_amount(inode, lblock);
}
-static void ext4_da_update_reserve_space(struct inode *inode, int used)
+/*
+ * Called with i_data_sem down, which is important since we can call
+ * ext4_discard_preallocations() from here.
+ */
+void ext4_da_update_reserve_space(struct inode *inode,
+ int used, int quota_claim)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int total, mdb, mdb_free;
-
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- /* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - used;
- mdb = ext4_calc_metadata_amount(inode, total);
-
- /* figure out how many metablocks to release */
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
- if (mdb_free) {
- /* Account for allocated meta_blocks */
- mdb_free -= EXT4_I(inode)->i_allocated_meta_blocks;
-
- /* update fs dirty blocks counter */
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int mdb_free = 0, allocated_meta_blocks = 0;
+
+ spin_lock(&ei->i_block_reservation_lock);
+ if (unlikely(used > ei->i_reserved_data_blocks)) {
+ ext4_msg(inode->i_sb, KERN_NOTICE, "%s: ino %lu, used %d "
+ "with only %d reserved data blocks\n",
+ __func__, inode->i_ino, used,
+ ei->i_reserved_data_blocks);
+ WARN_ON(1);
+ used = ei->i_reserved_data_blocks;
+ }
+
+ /* Update per-inode reservations */
+ ei->i_reserved_data_blocks -= used;
+ used += ei->i_allocated_meta_blocks;
+ ei->i_reserved_meta_blocks -= ei->i_allocated_meta_blocks;
+ allocated_meta_blocks = ei->i_allocated_meta_blocks;
+ ei->i_allocated_meta_blocks = 0;
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, used);
+
+ if (ei->i_reserved_data_blocks == 0) {
+ /*
+ * We can release all of the reserved metadata blocks
+ * only when we have written all of the delayed
+ * allocation blocks.
+ */
+ mdb_free = ei->i_reserved_meta_blocks;
+ ei->i_reserved_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
percpu_counter_sub(&sbi->s_dirtyblocks_counter, mdb_free);
- EXT4_I(inode)->i_allocated_meta_blocks = 0;
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
}
-
- /* update per-inode reservations */
- BUG_ON(used > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= used;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- /*
- * free those over-booking quota for metadata blocks
- */
- if (mdb_free)
- vfs_dq_release_reservation_block(inode, mdb_free);
+ /* Update quota subsystem */
+ if (quota_claim) {
+ vfs_dq_claim_block(inode, used);
+ if (mdb_free)
+ vfs_dq_release_reservation_block(inode, mdb_free);
+ } else {
+ /*
+ * We did fallocate with an offset that is already delayed
+ * allocated. So on delayed allocated writeback we should
+ * not update the quota for allocated blocks. But then
+ * converting an fallocate region to initialized region would
+ * have caused a metadata allocation. So claim quota for
+ * that
+ */
+ if (allocated_meta_blocks)
+ vfs_dq_claim_block(inode, allocated_meta_blocks);
+ vfs_dq_release_reservation_block(inode, mdb_free + used);
+ }
/*
* If we have done all the pending block allocations and if
* there aren't any writers on the inode, we can discard the
* inode's preallocations.
*/
- if (!total && (atomic_read(&inode->i_writecount) == 0))
+ if ((ei->i_reserved_data_blocks == 0) &&
+ (atomic_read(&inode->i_writecount) == 0))
ext4_discard_preallocations(inode);
}
@@ -1280,18 +1308,20 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block,
*/
EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE;
}
- }
+ /*
+ * Update reserved blocks/metadata blocks after successful
+ * block allocation which had been deferred till now. We don't
+ * support fallocate for non extent files. So we can update
+ * reserve space here.
+ */
+ if ((retval > 0) &&
+ (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
+ ext4_da_update_reserve_space(inode, retval, 1);
+ }
if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
EXT4_I(inode)->i_delalloc_reserved_flag = 0;
- /*
- * Update reserved blocks/metadata blocks after successful
- * block allocation which had been deferred till now.
- */
- if ((retval > 0) && (flags & EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE))
- ext4_da_update_reserve_space(inode, retval);
-
up_write((&EXT4_I(inode)->i_data_sem));
if (retval > 0 && buffer_mapped(bh)) {
int ret = check_block_validity(inode, "file system "
@@ -1797,11 +1827,15 @@ static int ext4_journalled_write_end(struct file *file,
return ret ? ret : copied;
}
-static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
+/*
+ * Reserve a single block located at lblock
+ */
+static int ext4_da_reserve_space(struct inode *inode, sector_t lblock)
{
int retries = 0;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- unsigned long md_needed, mdblocks, total = 0;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long md_needed, md_reserved;
/*
* recalculate the amount of metadata blocks to reserve
@@ -1809,86 +1843,78 @@ static int ext4_da_reserve_space(struct inode *inode, int nrblocks)
* worse case is one extent per block
*/
repeat:
- spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- total = EXT4_I(inode)->i_reserved_data_blocks + nrblocks;
- mdblocks = ext4_calc_metadata_amount(inode, total);
- BUG_ON(mdblocks < EXT4_I(inode)->i_reserved_meta_blocks);
-
- md_needed = mdblocks - EXT4_I(inode)->i_reserved_meta_blocks;
- total = md_needed + nrblocks;
+ spin_lock(&ei->i_block_reservation_lock);
+ md_reserved = ei->i_reserved_meta_blocks;
+ md_needed = ext4_calc_metadata_amount(inode, lblock);
+ spin_unlock(&ei->i_block_reservation_lock);
/*
* Make quota reservation here to prevent quota overflow
* later. Real quota accounting is done at pages writeout
* time.
*/
- if (vfs_dq_reserve_block(inode, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
+ if (vfs_dq_reserve_block(inode, md_needed + 1))
return -EDQUOT;
- }
- if (ext4_claim_free_blocks(sbi, total)) {
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- vfs_dq_release_reservation_block(inode, total);
+ if (ext4_claim_free_blocks(sbi, md_needed + 1)) {
+ vfs_dq_release_reservation_block(inode, md_needed + 1);
if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
yield();
goto repeat;
}
return -ENOSPC;
}
- EXT4_I(inode)->i_reserved_data_blocks += nrblocks;
- EXT4_I(inode)->i_reserved_meta_blocks = mdblocks;
+ spin_lock(&ei->i_block_reservation_lock);
+ ei->i_reserved_data_blocks++;
+ ei->i_reserved_meta_blocks += md_needed;
+ spin_unlock(&ei->i_block_reservation_lock);
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
return 0; /* success */
}
static void ext4_da_release_space(struct inode *inode, int to_free)
{
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- int total, mdb, mdb_free, release;
+ struct ext4_inode_info *ei = EXT4_I(inode);
if (!to_free)
return; /* Nothing to release, exit */
spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
- if (!EXT4_I(inode)->i_reserved_data_blocks) {
+ if (unlikely(to_free > ei->i_reserved_data_blocks)) {
/*
- * if there is no reserved blocks, but we try to free some
- * then the counter is messed up somewhere.
- * but since this function is called from invalidate
- * page, it's harmless to return without any action
+ * if there aren't enough reserved blocks, then the
+ * counter is messed up somewhere. Since this
+ * function is called from invalidate page, it's
+ * harmless to return without any action.
*/
- printk(KERN_INFO "ext4 delalloc try to release %d reserved "
- "blocks for inode %lu, but there is no reserved "
- "data blocks\n", to_free, inode->i_ino);
- spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- return;
+ ext4_msg(inode->i_sb, KERN_NOTICE, "ext4_da_release_space: "
+ "ino %lu, to_free %d with only %d reserved "
+ "data blocks\n", inode->i_ino, to_free,
+ ei->i_reserved_data_blocks);
+ WARN_ON(1);
+ to_free = ei->i_reserved_data_blocks;
}
+ ei->i_reserved_data_blocks -= to_free;
- /* recalculate the number of metablocks still need to be reserved */
- total = EXT4_I(inode)->i_reserved_data_blocks - to_free;
- mdb = ext4_calc_metadata_amount(inode, total);
-
- /* figure out how many metablocks to release */
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- mdb_free = EXT4_I(inode)->i_reserved_meta_blocks - mdb;
-
- release = to_free + mdb_free;
-
- /* update fs dirty blocks counter for truncate case */
- percpu_counter_sub(&sbi->s_dirtyblocks_counter, release);
+ if (ei->i_reserved_data_blocks == 0) {
+ /*
+ * We can release all of the reserved metadata blocks
+ * only when we have written all of the delayed
+ * allocation blocks.
+ */
+ to_free += ei->i_reserved_meta_blocks;
+ ei->i_reserved_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
+ }
- /* update per-inode reservations */
- BUG_ON(to_free > EXT4_I(inode)->i_reserved_data_blocks);
- EXT4_I(inode)->i_reserved_data_blocks -= to_free;
+ /* update fs dirty blocks counter */
+ percpu_counter_sub(&sbi->s_dirtyblocks_counter, to_free);
- BUG_ON(mdb > EXT4_I(inode)->i_reserved_meta_blocks);
- EXT4_I(inode)->i_reserved_meta_blocks = mdb;
spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
- vfs_dq_release_reservation_block(inode, release);
+ vfs_dq_release_reservation_block(inode, to_free);
}
static void ext4_da_page_release_reservation(struct page *page,
@@ -2193,10 +2219,10 @@ static int mpage_da_map_blocks(struct mpage_da_data *mpd)
* variables are updated after the blocks have been allocated.
*/
new.b_state = 0;
- get_blocks_flags = (EXT4_GET_BLOCKS_CREATE |
- EXT4_GET_BLOCKS_DELALLOC_RESERVE);
+ get_blocks_flags = EXT4_GET_BLOCKS_CREATE;
if (mpd->b_state & (1 << BH_Delay))
- get_blocks_flags |= EXT4_GET_BLOCKS_UPDATE_RESERVE_SPACE;
+ get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
+
blks = ext4_get_blocks(handle, mpd->inode, next, max_blocks,
&new, get_blocks_flags);
if (blks < 0) {
@@ -2494,7 +2520,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
* XXX: __block_prepare_write() unmaps passed block,
* is it OK?
*/
- ret = ext4_da_reserve_space(inode, 1);
+ ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/* not enough space to reserve */
return ret;
@@ -2968,8 +2994,7 @@ retry:
out_writepages:
if (!no_nrwrite_index_update)
wbc->no_nrwrite_index_update = 0;
- if (wbc->nr_to_write > nr_to_writebump)
- wbc->nr_to_write -= nr_to_writebump;
+ wbc->nr_to_write -= nr_to_writebump;
wbc->range_start = range_start;
trace_ext4_da_writepages_result(inode, wbc, ret, pages_written);
return ret;
@@ -2994,11 +3019,18 @@ static int ext4_nonda_switch(struct super_block *sb)
if (2 * free_blocks < 3 * dirty_blocks ||
free_blocks < (dirty_blocks + EXT4_FREEBLOCKS_WATERMARK)) {
/*
- * free block count is less that 150% of dirty blocks
- * or free blocks is less that watermark
+ * free block count is less than 150% of dirty blocks
+ * or free blocks is less than watermark
*/
return 1;
}
+ /*
+ * Even if we don't switch but are nearing capacity,
+ * start pushing delalloc when 1/2 of free blocks are dirty.
+ */
+ if (free_blocks < 2 * dirty_blocks)
+ writeback_inodes_sb_if_idle(sb);
+
return 0;
}
@@ -3006,7 +3038,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- int ret, retries = 0;
+ int ret, retries = 0, quota_retries = 0;
struct page *page;
pgoff_t index;
unsigned from, to;
@@ -3065,6 +3097,22 @@ retry:
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
+
+ if ((ret == -EDQUOT) &&
+ EXT4_I(inode)->i_reserved_meta_blocks &&
+ (quota_retries++ < 3)) {
+ /*
+ * Since we often over-estimate the number of meta
+ * data blocks required, we may sometimes get a
+ * spurios out of quota error even though there would
+ * be enough space once we write the data blocks and
+ * find out how many meta data blocks were _really_
+ * required. So try forcing the inode write to see if
+ * that helps.
+ */
+ write_inode_now(inode, (quota_retries == 3));
+ goto retry;
+ }
out:
return ret;
}
@@ -4794,6 +4842,9 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
inode->i_size = ext4_isize(raw_inode);
ei->i_disksize = inode->i_size;
+#ifdef CONFIG_QUOTA
+ ei->i_reserved_quota = 0;
+#endif
inode->i_generation = le32_to_cpu(raw_inode->i_generation);
ei->i_block_group = iloc.block_group;
ei->i_last_alloc_group = ~0;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index b1fd3daadc9..d34afad3e13 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2755,12 +2755,6 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
if (!(ac->ac_flags & EXT4_MB_DELALLOC_RESERVED))
/* release all the reserved blocks if non delalloc */
percpu_counter_sub(&sbi->s_dirtyblocks_counter, reserv_blks);
- else {
- percpu_counter_sub(&sbi->s_dirtyblocks_counter,
- ac->ac_b_ex.fe_len);
- /* convert reserved quota blocks to real quota blocks */
- vfs_dq_claim_block(ac->ac_inode, ac->ac_b_ex.fe_len);
- }
if (sbi->s_log_groups_per_flex) {
ext4_group_t flex_group = ext4_flex_group(sbi,
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index 0ca811061bc..436521cae45 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -17,7 +17,6 @@
#include <linux/proc_fs.h>
#include <linux/pagemap.h>
#include <linux/seq_file.h>
-#include <linux/version.h>
#include <linux/blkdev.h>
#include <linux/mutex.h>
#include "ext4_jbd2.h"
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 827bde1f259..735c20d5fd5 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -702,8 +702,12 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
ei->i_reserved_data_blocks = 0;
ei->i_reserved_meta_blocks = 0;
ei->i_allocated_meta_blocks = 0;
+ ei->i_da_metadata_calc_len = 0;
ei->i_delalloc_reserved_flag = 0;
spin_lock_init(&(ei->i_block_reservation_lock));
+#ifdef CONFIG_QUOTA
+ ei->i_reserved_quota = 0;
+#endif
INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
ei->cur_aio_dio = NULL;
ei->i_sync_tid = 0;
@@ -1014,7 +1018,9 @@ static const struct dquot_operations ext4_quota_operations = {
.reserve_space = dquot_reserve_space,
.claim_space = dquot_claim_space,
.release_rsv = dquot_release_reserved_space,
+#ifdef CONFIG_QUOTA
.get_reserved_space = ext4_get_reserved_space,
+#endif
.alloc_inode = dquot_alloc_inode,
.free_space = dquot_free_space,
.free_inode = dquot_free_inode,
@@ -2169,9 +2175,9 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a,
struct super_block *sb = sbi->s_buddy_cache->i_sb;
return snprintf(buf, PAGE_SIZE, "%llu\n",
- sbi->s_kbytes_written +
+ (unsigned long long)(sbi->s_kbytes_written +
((part_stat_read(sb->s_bdev->bd_part, sectors[1]) -
- EXT4_SB(sb)->s_sectors_written_start) >> 1));
+ EXT4_SB(sb)->s_sectors_written_start) >> 1)));
}
static ssize_t inode_readahead_blks_store(struct ext4_attr *a,
@@ -4000,6 +4006,7 @@ static inline void unregister_as_ext2(void)
{
unregister_filesystem(&ext2_fs_type);
}
+MODULE_ALIAS("ext2");
#else
static inline void register_as_ext2(void) { }
static inline void unregister_as_ext2(void) { }
@@ -4026,6 +4033,7 @@ static inline void unregister_as_ext3(void)
{
unregister_filesystem(&ext3_fs_type);
}
+MODULE_ALIAS("ext3");
#else
static inline void register_as_ext3(void) { }
static inline void unregister_as_ext3(void) { }
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 910bf9a59cb..f3a2f7ed45a 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -92,7 +92,7 @@ static struct buffer_head *ext4_xattr_cache_find(struct inode *,
struct mb_cache_entry **);
static void ext4_xattr_rehash(struct ext4_xattr_header *,
struct ext4_xattr_entry *);
-static int ext4_xattr_list(struct inode *inode, char *buffer,
+static int ext4_xattr_list(struct dentry *dentry, char *buffer,
size_t buffer_size);
static struct mb_cache *ext4_xattr_cache;
@@ -140,7 +140,7 @@ ext4_xattr_handler(int name_index)
ssize_t
ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
- return ext4_xattr_list(dentry->d_inode, buffer, size);
+ return ext4_xattr_list(dentry, buffer, size);
}
static int
@@ -325,7 +325,7 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
}
static int
-ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
+ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
char *buffer, size_t buffer_size)
{
size_t rest = buffer_size;
@@ -335,9 +335,10 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
ext4_xattr_handler(entry->e_name_index);
if (handler) {
- size_t size = handler->list(inode, buffer, rest,
+ size_t size = handler->list(dentry, buffer, rest,
entry->e_name,
- entry->e_name_len);
+ entry->e_name_len,
+ handler->flags);
if (buffer) {
if (size > rest)
return -ERANGE;
@@ -350,8 +351,9 @@ ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
}
static int
-ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
+ struct inode *inode = dentry->d_inode;
struct buffer_head *bh = NULL;
int error;
@@ -376,7 +378,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
goto cleanup;
}
ext4_xattr_cache_insert(bh);
- error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+ error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
cleanup:
brelse(bh);
@@ -385,8 +387,9 @@ cleanup:
}
static int
-ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
+ struct inode *inode = dentry->d_inode;
struct ext4_xattr_ibody_header *header;
struct ext4_inode *raw_inode;
struct ext4_iloc iloc;
@@ -404,7 +407,7 @@ ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
error = ext4_xattr_check_names(IFIRST(header), end);
if (error)
goto cleanup;
- error = ext4_xattr_list_entries(inode, IFIRST(header),
+ error = ext4_xattr_list_entries(dentry, IFIRST(header),
buffer, buffer_size);
cleanup:
@@ -423,12 +426,12 @@ cleanup:
* used / required on success.
*/
static int
-ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
{
int i_error, b_error;
- down_read(&EXT4_I(inode)->xattr_sem);
- i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size);
+ down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
+ i_error = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
if (i_error < 0) {
b_error = 0;
} else {
@@ -436,11 +439,11 @@ ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
buffer += i_error;
buffer_size -= i_error;
}
- b_error = ext4_xattr_block_list(inode, buffer, buffer_size);
+ b_error = ext4_xattr_block_list(dentry, buffer, buffer_size);
if (b_error < 0)
i_error = 0;
}
- up_read(&EXT4_I(inode)->xattr_sem);
+ up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
return i_error + b_error;
}
@@ -1329,6 +1332,8 @@ retry:
goto cleanup;
kfree(b_entry_name);
kfree(buffer);
+ b_entry_name = NULL;
+ buffer = NULL;
brelse(is->iloc.bh);
kfree(is);
kfree(bs);
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index ca5f89fc6ca..983c253999a 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -12,8 +12,8 @@
#include "xattr.h"
static size_t
-ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext4_xattr_security_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
const size_t total_len = prefix_len + name_len + 1;
@@ -28,23 +28,23 @@ ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext4_xattr_security_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext4_xattr_security_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name,
- buffer, size);
+ return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+ name, buffer, size);
}
static int
-ext4_xattr_security_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext4_xattr_security_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name,
- value, size, flags);
+ return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_SECURITY,
+ name, value, size, flags);
}
int
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index ac1a52cf2a3..15b50edc658 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -14,8 +14,8 @@
#include "xattr.h"
static size_t
-ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext4_xattr_trusted_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
@@ -32,23 +32,23 @@ ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext4_xattr_trusted_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext4_xattr_trusted_get(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name,
- buffer, size);
+ return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+ name, buffer, size);
}
static int
-ext4_xattr_trusted_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext4_xattr_trusted_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name,
- value, size, flags);
+ return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_TRUSTED,
+ name, value, size, flags);
}
struct xattr_handler ext4_xattr_trusted_handler = {
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index d91aa61b42a..c4ce05746ce 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -13,13 +13,13 @@
#include "xattr.h"
static size_t
-ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+ext4_xattr_user_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return 0;
if (list && total_len <= list_size) {
@@ -31,26 +31,27 @@ ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
}
static int
-ext4_xattr_user_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+ext4_xattr_user_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
- return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size);
+ return ext4_xattr_get(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+ name, buffer, size);
}
static int
-ext4_xattr_user_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+ext4_xattr_user_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- if (!test_opt(inode->i_sb, XATTR_USER))
+ if (!test_opt(dentry->d_sb, XATTR_USER))
return -EOPNOTSUPP;
- return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name,
- value, size, flags);
+ return ext4_xattr_set(dentry->d_inode, EXT4_XATTR_INDEX_USER,
+ name, value, size, flags);
}
struct xattr_handler ext4_xattr_user_handler = {
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index 7db0979c6b7..e6efdfa0f6d 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -44,7 +44,8 @@ struct fat_mount_options {
nocase:1, /* Does this need case conversion? 0=need case conversion*/
usefree:1, /* Use free_clusters for FAT32 */
tz_utc:1, /* Filesystem timestamps are in UTC */
- rodir:1; /* allow ATTR_RO for directory */
+ rodir:1, /* allow ATTR_RO for directory */
+ discard:1; /* Issue discard requests on deletions */
};
#define FAT_HASH_BITS 8
diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a81037721a6..81184d3b75a 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -566,16 +566,21 @@ int fat_free_clusters(struct inode *inode, int cluster)
goto error;
}
- /*
- * Issue discard for the sectors we no longer care about,
- * batching contiguous clusters into one request
- */
- if (cluster != fatent.entry + 1) {
- int nr_clus = fatent.entry - first_cl + 1;
-
- sb_issue_discard(sb, fat_clus_to_blknr(sbi, first_cl),
- nr_clus * sbi->sec_per_clus);
- first_cl = cluster;
+ if (sbi->options.discard) {
+ /*
+ * Issue discard for the sectors we no longer
+ * care about, batching contiguous clusters
+ * into one request
+ */
+ if (cluster != fatent.entry + 1) {
+ int nr_clus = fatent.entry - first_cl + 1;
+
+ sb_issue_discard(sb,
+ fat_clus_to_blknr(sbi, first_cl),
+ nr_clus * sbi->sec_per_clus);
+
+ first_cl = cluster;
+ }
}
ops->ent_put(&fatent, FAT_ENT_FREE);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index 76b7961ab66..14da530b05c 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -858,6 +858,8 @@ static int fat_show_options(struct seq_file *m, struct vfsmount *mnt)
seq_puts(m, ",errors=panic");
else
seq_puts(m, ",errors=remount-ro");
+ if (opts->discard)
+ seq_puts(m, ",discard");
return 0;
}
@@ -871,7 +873,7 @@ enum {
Opt_shortname_winnt, Opt_shortname_mixed, Opt_utf8_no, Opt_utf8_yes,
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
Opt_obsolate, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
- Opt_err_panic, Opt_err_ro, Opt_err,
+ Opt_err_panic, Opt_err_ro, Opt_discard, Opt_err,
};
static const match_table_t fat_tokens = {
@@ -899,6 +901,7 @@ static const match_table_t fat_tokens = {
{Opt_err_cont, "errors=continue"},
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
+ {Opt_discard, "discard"},
{Opt_obsolate, "conv=binary"},
{Opt_obsolate, "conv=text"},
{Opt_obsolate, "conv=auto"},
@@ -1136,6 +1139,9 @@ static int parse_options(char *options, int is_vfat, int silent, int *debug,
case Opt_rodir:
opts->rodir = 1;
break;
+ case Opt_discard:
+ opts->discard = 1;
+ break;
/* obsolete mount options */
case Opt_obsolate:
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 2cf93ec40a6..97e01dc0d95 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -618,60 +618,90 @@ static DEFINE_RWLOCK(fasync_lock);
static struct kmem_cache *fasync_cache __read_mostly;
/*
- * fasync_helper() is used by almost all character device drivers
- * to set up the fasync queue. It returns negative on error, 0 if it did
- * no changes and positive if it added/deleted the entry.
+ * Remove a fasync entry. If successfully removed, return
+ * positive and clear the FASYNC flag. If no entry exists,
+ * do nothing and return 0.
+ *
+ * NOTE! It is very important that the FASYNC flag always
+ * match the state "is the filp on a fasync list".
+ *
+ * We always take the 'filp->f_lock', in since fasync_lock
+ * needs to be irq-safe.
*/
-int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
+static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
{
struct fasync_struct *fa, **fp;
- struct fasync_struct *new = NULL;
int result = 0;
- if (on) {
- new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
- if (!new)
- return -ENOMEM;
+ spin_lock(&filp->f_lock);
+ write_lock_irq(&fasync_lock);
+ for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
+ if (fa->fa_file != filp)
+ continue;
+ *fp = fa->fa_next;
+ kmem_cache_free(fasync_cache, fa);
+ filp->f_flags &= ~FASYNC;
+ result = 1;
+ break;
}
+ write_unlock_irq(&fasync_lock);
+ spin_unlock(&filp->f_lock);
+ return result;
+}
+
+/*
+ * Add a fasync entry. Return negative on error, positive if
+ * added, and zero if did nothing but change an existing one.
+ *
+ * NOTE! It is very important that the FASYNC flag always
+ * match the state "is the filp on a fasync list".
+ */
+static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
+{
+ struct fasync_struct *new, *fa, **fp;
+ int result = 0;
+
+ new = kmem_cache_alloc(fasync_cache, GFP_KERNEL);
+ if (!new)
+ return -ENOMEM;
- /*
- * We need to take f_lock first since it's not an IRQ-safe
- * lock.
- */
spin_lock(&filp->f_lock);
write_lock_irq(&fasync_lock);
for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
- if (fa->fa_file == filp) {
- if(on) {
- fa->fa_fd = fd;
- kmem_cache_free(fasync_cache, new);
- } else {
- *fp = fa->fa_next;
- kmem_cache_free(fasync_cache, fa);
- result = 1;
- }
- goto out;
- }
+ if (fa->fa_file != filp)
+ continue;
+ fa->fa_fd = fd;
+ kmem_cache_free(fasync_cache, new);
+ goto out;
}
- if (on) {
- new->magic = FASYNC_MAGIC;
- new->fa_file = filp;
- new->fa_fd = fd;
- new->fa_next = *fapp;
- *fapp = new;
- result = 1;
- }
+ new->magic = FASYNC_MAGIC;
+ new->fa_file = filp;
+ new->fa_fd = fd;
+ new->fa_next = *fapp;
+ *fapp = new;
+ result = 1;
+ filp->f_flags |= FASYNC;
+
out:
- if (on)
- filp->f_flags |= FASYNC;
- else
- filp->f_flags &= ~FASYNC;
write_unlock_irq(&fasync_lock);
spin_unlock(&filp->f_lock);
return result;
}
+/*
+ * fasync_helper() is used by almost all character device drivers
+ * to set up the fasync queue, and for regular files by the file
+ * lease code. It returns negative on error, 0 if it did no changes
+ * and positive if it added/deleted the entry.
+ */
+int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
+{
+ if (!on)
+ return fasync_remove_entry(filp, fapp);
+ return fasync_add_entry(fd, filp, fapp);
+}
+
EXPORT_SYMBOL(fasync_helper);
void __kill_fasync(struct fasync_struct *fa, int sig, int band)
diff --git a/fs/file_table.c b/fs/file_table.c
index 4bef4c01ec6..b98404b5438 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -21,9 +21,12 @@
#include <linux/fsnotify.h>
#include <linux/sysctl.h>
#include <linux/percpu_counter.h>
+#include <linux/ima.h>
#include <asm/atomic.h>
+#include "internal.h"
+
/* sysctl tunables... */
struct files_stat_struct files_stat = {
.max_files = NR_FILE
@@ -147,8 +150,6 @@ fail:
return NULL;
}
-EXPORT_SYMBOL(get_empty_filp);
-
/**
* alloc_file - allocate and initialize a 'struct file'
* @mnt: the vfsmount on which the file will reside
@@ -164,8 +165,8 @@ EXPORT_SYMBOL(get_empty_filp);
* If all the callers of init_file() are eliminated, its
* code should be moved into this function.
*/
-struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
- fmode_t mode, const struct file_operations *fop)
+struct file *alloc_file(struct path *path, fmode_t mode,
+ const struct file_operations *fop)
{
struct file *file;
@@ -173,35 +174,8 @@ struct file *alloc_file(struct vfsmount *mnt, struct dentry *dentry,
if (!file)
return NULL;
- init_file(file, mnt, dentry, mode, fop);
- return file;
-}
-EXPORT_SYMBOL(alloc_file);
-
-/**
- * init_file - initialize a 'struct file'
- * @file: the already allocated 'struct file' to initialized
- * @mnt: the vfsmount on which the file resides
- * @dentry: the dentry representing this file
- * @mode: the mode the file is opened with
- * @fop: the 'struct file_operations' for this file
- *
- * Use this instead of setting the members directly. Doing so
- * avoids making mistakes like forgetting the mntget() or
- * forgetting to take a write on the mnt.
- *
- * Note: This is a crappy interface. It is here to make
- * merging with the existing users of get_empty_filp()
- * who have complex failure logic easier. All users
- * of this should be moving to alloc_file().
- */
-int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
- fmode_t mode, const struct file_operations *fop)
-{
- int error = 0;
- file->f_path.dentry = dentry;
- file->f_path.mnt = mntget(mnt);
- file->f_mapping = dentry->d_inode->i_mapping;
+ file->f_path = *path;
+ file->f_mapping = path->dentry->d_inode->i_mapping;
file->f_mode = mode;
file->f_op = fop;
@@ -211,14 +185,14 @@ int init_file(struct file *file, struct vfsmount *mnt, struct dentry *dentry,
* visible. We do this for consistency, and so
* that we can do debugging checks at __fput()
*/
- if ((mode & FMODE_WRITE) && !special_file(dentry->d_inode->i_mode)) {
+ if ((mode & FMODE_WRITE) && !special_file(path->dentry->d_inode->i_mode)) {
file_take_write(file);
- error = mnt_clone_write(mnt);
- WARN_ON(error);
+ WARN_ON(mnt_clone_write(path->mnt));
}
- return error;
+ ima_counts_get(file);
+ return file;
}
-EXPORT_SYMBOL(init_file);
+EXPORT_SYMBOL(alloc_file);
void fput(struct file *file)
{
@@ -279,6 +253,7 @@ void __fput(struct file *file)
if (file->f_op && file->f_op->release)
file->f_op->release(inode, file);
security_file_free(file);
+ ima_file_free(file);
if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL))
cdev_put(inode->i_cdev);
fops_put(file->f_op);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f1..1a7c42c64ff 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -242,6 +242,7 @@ static void bdi_sync_writeback(struct backing_dev_info *bdi,
/**
* bdi_start_writeback - start writeback
* @bdi: the backing device to write from
+ * @sb: write inodes from this super_block
* @nr_pages: the number of pages to write
*
* Description:
@@ -1187,6 +1188,23 @@ void writeback_inodes_sb(struct super_block *sb)
EXPORT_SYMBOL(writeback_inodes_sb);
/**
+ * writeback_inodes_sb_if_idle - start writeback if none underway
+ * @sb: the superblock
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_if_idle(struct super_block *sb)
+{
+ if (!writeback_in_progress(sb->s_bdi)) {
+ writeback_inodes_sb(sb);
+ return 1;
+ } else
+ return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+
+/**
* sync_inodes_sb - sync sb inode pages
* @sb: the superblock
*
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c18913a777a..a9f5e137f1d 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -828,6 +828,9 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
if (!page)
break;
+ if (mapping_writably_mapped(mapping))
+ flush_dcache_page(page);
+
pagefault_disable();
tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes);
pagefault_enable();
diff --git a/fs/generic_acl.c b/fs/generic_acl.c
index e0b53aa7bbe..55458031e50 100644
--- a/fs/generic_acl.c
+++ b/fs/generic_acl.c
@@ -1,62 +1,58 @@
/*
- * fs/generic_acl.c
- *
* (C) 2005 Andreas Gruenbacher <agruen@suse.de>
*
* This file is released under the GPL.
+ *
+ * Generic ACL support for in-memory filesystems.
*/
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/generic_acl.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
-/**
- * generic_acl_list - Generic xattr_handler->list() operation
- * @ops: Filesystem specific getacl and setacl callbacks
- */
-size_t
-generic_acl_list(struct inode *inode, struct generic_acl_operations *ops,
- int type, char *list, size_t list_size)
+
+static size_t
+generic_acl_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int type)
{
struct posix_acl *acl;
- const char *name;
+ const char *xname;
size_t size;
- acl = ops->getacl(inode, type);
+ acl = get_cached_acl(dentry->d_inode, type);
if (!acl)
return 0;
posix_acl_release(acl);
- switch(type) {
- case ACL_TYPE_ACCESS:
- name = POSIX_ACL_XATTR_ACCESS;
- break;
-
- case ACL_TYPE_DEFAULT:
- name = POSIX_ACL_XATTR_DEFAULT;
- break;
-
- default:
- return 0;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ xname = POSIX_ACL_XATTR_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ xname = POSIX_ACL_XATTR_DEFAULT;
+ break;
+ default:
+ return 0;
}
- size = strlen(name) + 1;
+ size = strlen(xname) + 1;
if (list && size <= list_size)
- memcpy(list, name, size);
+ memcpy(list, xname, size);
return size;
}
-/**
- * generic_acl_get - Generic xattr_handler->get() operation
- * @ops: Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
- int type, void *buffer, size_t size)
+static int
+generic_acl_get(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
{
struct posix_acl *acl;
int error;
- acl = ops->getacl(inode, type);
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
+
+ acl = get_cached_acl(dentry->d_inode, type);
if (!acl)
return -ENODATA;
error = posix_acl_to_xattr(acl, buffer, size);
@@ -65,17 +61,16 @@ generic_acl_get(struct inode *inode, struct generic_acl_operations *ops,
return error;
}
-/**
- * generic_acl_set - Generic xattr_handler->set() operation
- * @ops: Filesystem specific getacl and setacl callbacks
- */
-int
-generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
- int type, const void *value, size_t size)
+static int
+generic_acl_set(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags, int type)
{
+ struct inode *inode = dentry->d_inode;
struct posix_acl *acl = NULL;
int error;
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
if (!is_owner_or_cap(inode))
@@ -91,28 +86,27 @@ generic_acl_set(struct inode *inode, struct generic_acl_operations *ops,
error = posix_acl_valid(acl);
if (error)
goto failed;
- switch(type) {
- case ACL_TYPE_ACCESS:
- mode = inode->i_mode;
- error = posix_acl_equiv_mode(acl, &mode);
- if (error < 0)
- goto failed;
- inode->i_mode = mode;
- if (error == 0) {
- posix_acl_release(acl);
- acl = NULL;
- }
- break;
-
- case ACL_TYPE_DEFAULT:
- if (!S_ISDIR(inode->i_mode)) {
- error = -EINVAL;
- goto failed;
- }
- break;
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ mode = inode->i_mode;
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0)
+ goto failed;
+ inode->i_mode = mode;
+ if (error == 0) {
+ posix_acl_release(acl);
+ acl = NULL;
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode)) {
+ error = -EINVAL;
+ goto failed;
+ }
+ break;
}
}
- ops->setacl(inode, type, acl);
+ set_cached_acl(inode, type, acl);
error = 0;
failed:
posix_acl_release(acl);
@@ -121,14 +115,12 @@ failed:
/**
* generic_acl_init - Take care of acl inheritance at @inode create time
- * @ops: Filesystem specific getacl and setacl callbacks
*
* Files created inside a directory with a default ACL inherit the
* directory's default ACL.
*/
int
-generic_acl_init(struct inode *inode, struct inode *dir,
- struct generic_acl_operations *ops)
+generic_acl_init(struct inode *inode, struct inode *dir)
{
struct posix_acl *acl = NULL;
mode_t mode = inode->i_mode;
@@ -136,7 +128,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
inode->i_mode = mode & ~current_umask();
if (!S_ISLNK(inode->i_mode))
- acl = ops->getacl(dir, ACL_TYPE_DEFAULT);
+ acl = get_cached_acl(dir, ACL_TYPE_DEFAULT);
if (acl) {
struct posix_acl *clone;
@@ -145,7 +137,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
error = -ENOMEM;
if (!clone)
goto cleanup;
- ops->setacl(inode, ACL_TYPE_DEFAULT, clone);
+ set_cached_acl(inode, ACL_TYPE_DEFAULT, clone);
posix_acl_release(clone);
}
clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -156,7 +148,7 @@ generic_acl_init(struct inode *inode, struct inode *dir,
if (error >= 0) {
inode->i_mode = mode;
if (error > 0)
- ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
}
posix_acl_release(clone);
}
@@ -169,20 +161,19 @@ cleanup:
/**
* generic_acl_chmod - change the access acl of @inode upon chmod()
- * @ops: FIlesystem specific getacl and setacl callbacks
*
* A chmod also changes the permissions of the owner, group/mask, and
* other ACL entries.
*/
int
-generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
+generic_acl_chmod(struct inode *inode)
{
struct posix_acl *acl, *clone;
int error = 0;
if (S_ISLNK(inode->i_mode))
return -EOPNOTSUPP;
- acl = ops->getacl(inode, ACL_TYPE_ACCESS);
+ acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
if (acl) {
clone = posix_acl_clone(acl, GFP_KERNEL);
posix_acl_release(acl);
@@ -190,8 +181,37 @@ generic_acl_chmod(struct inode *inode, struct generic_acl_operations *ops)
return -ENOMEM;
error = posix_acl_chmod_masq(clone, inode->i_mode);
if (!error)
- ops->setacl(inode, ACL_TYPE_ACCESS, clone);
+ set_cached_acl(inode, ACL_TYPE_ACCESS, clone);
posix_acl_release(clone);
}
return error;
}
+
+int
+generic_check_acl(struct inode *inode, int mask)
+{
+ struct posix_acl *acl = get_cached_acl(inode, ACL_TYPE_ACCESS);
+
+ if (acl) {
+ int error = posix_acl_permission(inode, acl, mask);
+ posix_acl_release(acl);
+ return error;
+ }
+ return -EAGAIN;
+}
+
+struct xattr_handler generic_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+ .list = generic_acl_list,
+ .get = generic_acl_get,
+ .set = generic_acl_set,
+};
+
+struct xattr_handler generic_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
+ .list = generic_acl_list,
+ .get = generic_acl_get,
+ .set = generic_acl_set,
+};
diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig
index b192c661caa..4dcddf83326 100644
--- a/fs/gfs2/Kconfig
+++ b/fs/gfs2/Kconfig
@@ -10,7 +10,6 @@ config GFS2_FS
select SLOW_WORK
select QUOTA
select QUOTACTL
- select FS_JOURNAL_INFO
help
A cluster filesystem.
diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c
index 3eb1ea84617..87ee309d4c2 100644
--- a/fs/gfs2/acl.c
+++ b/fs/gfs2/acl.c
@@ -126,7 +126,7 @@ static int gfs2_acl_set(struct inode *inode, int type, struct posix_acl *acl)
error = posix_acl_to_xattr(acl, data, len);
if (error < 0)
goto out;
- error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, data, len, 0);
+ error = __gfs2_xattr_set(inode, name, data, len, 0, GFS2_EATYPE_SYS);
if (!error)
set_cached_acl(inode, type, acl);
out:
@@ -232,9 +232,10 @@ static int gfs2_acl_type(const char *name)
return -EINVAL;
}
-static int gfs2_xattr_system_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+static int gfs2_xattr_system_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int xtype)
{
+ struct inode *inode = dentry->d_inode;
struct posix_acl *acl;
int type;
int error;
@@ -255,9 +256,11 @@ static int gfs2_xattr_system_get(struct inode *inode, const char *name,
return error;
}
-static int gfs2_xattr_system_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+static int gfs2_xattr_system_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags,
+ int xtype)
{
+ struct inode *inode = dentry->d_inode;
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct posix_acl *acl = NULL;
int error = 0, type;
@@ -319,7 +322,7 @@ static int gfs2_xattr_system_set(struct inode *inode, const char *name,
}
set_acl:
- error = gfs2_xattr_set(inode, GFS2_EATYPE_SYS, name, value, size, 0);
+ error = __gfs2_xattr_set(inode, name, value, size, 0, GFS2_EATYPE_SYS);
if (!error) {
if (acl)
set_cached_acl(inode, type, acl);
@@ -334,6 +337,7 @@ out:
struct xattr_handler gfs2_xattr_system_handler = {
.prefix = XATTR_SYSTEM_PREFIX,
+ .flags = GFS2_EATYPE_SYS,
.get = gfs2_xattr_system_get,
.set = gfs2_xattr_system_set,
};
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 6d47379e794..583e823307a 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -541,7 +541,7 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock,
*ptr++ = cpu_to_be64(bn++);
break;
}
- } while (state != ALLOC_DATA);
+ } while ((state != ALLOC_DATA) || !dblock);
ip->i_height = height;
gfs2_add_inode_blocks(&ip->i_inode, alloced);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 4eb308aa323..a6abbae8a27 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -569,6 +569,40 @@ static int gfs2_fsync(struct file *file, struct dentry *dentry, int datasync)
return ret;
}
+/**
+ * gfs2_file_aio_write - Perform a write to a file
+ * @iocb: The io context
+ * @iov: The data to write
+ * @nr_segs: Number of @iov segments
+ * @pos: The file position
+ *
+ * We have to do a lock/unlock here to refresh the inode size for
+ * O_APPEND writes, otherwise we can land up writing at the wrong
+ * offset. There is still a race, but provided the app is using its
+ * own file locking, this will make O_APPEND work as expected.
+ *
+ */
+
+static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ struct file *file = iocb->ki_filp;
+
+ if (file->f_flags & O_APPEND) {
+ struct dentry *dentry = file->f_dentry;
+ struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
+ struct gfs2_holder gh;
+ int ret;
+
+ ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh);
+ if (ret)
+ return ret;
+ gfs2_glock_dq_uninit(&gh);
+ }
+
+ return generic_file_aio_write(iocb, iov, nr_segs, pos);
+}
+
#ifdef CONFIG_GFS2_FS_LOCKING_DLM
/**
@@ -711,7 +745,7 @@ const struct file_operations gfs2_file_fops = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
- .aio_write = generic_file_aio_write,
+ .aio_write = gfs2_file_aio_write,
.unlocked_ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
.open = gfs2_open,
@@ -741,7 +775,7 @@ const struct file_operations gfs2_file_fops_nolock = {
.read = do_sync_read,
.aio_read = generic_file_aio_read,
.write = do_sync_write,
- .aio_write = generic_file_aio_write,
+ .aio_write = gfs2_file_aio_write,
.unlocked_ioctl = gfs2_ioctl,
.mmap = gfs2_mmap,
.open = gfs2_open,
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index f455a03a09e..f4266332593 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -769,6 +769,7 @@ int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
if (!gl)
return -ENOMEM;
+ atomic_inc(&sdp->sd_glock_disposal);
gl->gl_flags = 0;
gl->gl_name = name;
atomic_set(&gl->gl_ref, 1);
@@ -1538,6 +1539,9 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
up_write(&gfs2_umount_flush_sem);
msleep(10);
}
+ flush_workqueue(glock_workqueue);
+ wait_event(sdp->sd_glock_wait, atomic_read(&sdp->sd_glock_disposal) == 0);
+ gfs2_dump_lockstate(sdp);
}
void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h
index 13f0bd22813..c0262faf472 100644
--- a/fs/gfs2/glock.h
+++ b/fs/gfs2/glock.h
@@ -123,7 +123,7 @@ struct lm_lockops {
int (*lm_mount) (struct gfs2_sbd *sdp, const char *fsname);
void (*lm_unmount) (struct gfs2_sbd *sdp);
void (*lm_withdraw) (struct gfs2_sbd *sdp);
- void (*lm_put_lock) (struct kmem_cache *cachep, void *gl);
+ void (*lm_put_lock) (struct kmem_cache *cachep, struct gfs2_glock *gl);
unsigned int (*lm_lock) (struct gfs2_glock *gl,
unsigned int req_state, unsigned int flags);
void (*lm_cancel) (struct gfs2_glock *gl);
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 4792200978c..bc0ad158e6b 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -544,6 +544,8 @@ struct gfs2_sbd {
struct gfs2_holder sd_live_gh;
struct gfs2_glock *sd_rename_gl;
struct gfs2_glock *sd_trans_gl;
+ wait_queue_head_t sd_glock_wait;
+ atomic_t sd_glock_disposal;
/* Inode Stuff */
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 26ba2a4c4a2..6e220f4eee7 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -125,7 +125,7 @@ static struct inode *gfs2_iget_skip(struct super_block *sb,
* directory entry when gfs2_inode_lookup() is invoked. Part of the code
* segment inside gfs2_inode_lookup code needs to get moved around.
*
- * Clean up I_LOCK and I_NEW as well.
+ * Clears I_NEW as well.
**/
void gfs2_set_iop(struct inode *inode)
@@ -801,7 +801,8 @@ static int gfs2_security_init(struct gfs2_inode *dip, struct gfs2_inode *ip)
return err;
}
- err = gfs2_xattr_set(&ip->i_inode, GFS2_EATYPE_SECURITY, name, value, len, 0);
+ err = __gfs2_xattr_set(&ip->i_inode, name, value, len, 0,
+ GFS2_EATYPE_SECURITY);
kfree(value);
kfree(name);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index 46df988323b..0e5e0e7022e 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -21,6 +21,7 @@ static void gdlm_ast(void *arg)
{
struct gfs2_glock *gl = arg;
unsigned ret = gl->gl_state;
+ struct gfs2_sbd *sdp = gl->gl_sbd;
BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED);
@@ -30,6 +31,8 @@ static void gdlm_ast(void *arg)
switch (gl->gl_lksb.sb_status) {
case -DLM_EUNLOCK: /* Unlocked, so glock can be freed */
kmem_cache_free(gfs2_glock_cachep, gl);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
return;
case -DLM_ECANCEL: /* Cancel while getting lock */
ret |= LM_OUT_CANCELED;
@@ -164,14 +167,16 @@ static unsigned int gdlm_lock(struct gfs2_glock *gl,
return LM_OUT_ASYNC;
}
-static void gdlm_put_lock(struct kmem_cache *cachep, void *ptr)
+static void gdlm_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
{
- struct gfs2_glock *gl = ptr;
- struct lm_lockstruct *ls = &gl->gl_sbd->sd_lockstruct;
+ struct gfs2_sbd *sdp = gl->gl_sbd;
+ struct lm_lockstruct *ls = &sdp->sd_lockstruct;
int error;
if (gl->gl_lksb.sb_lkid == 0) {
kmem_cache_free(cachep, gl);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
return;
}
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index cb8d7a93d5e..6f68a5f18eb 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -121,7 +121,7 @@ struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp)
if (aspace) {
mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS);
aspace->i_mapping->a_ops = &aspace_aops;
- aspace->i_size = ~0ULL;
+ aspace->i_size = MAX_LFS_FILESIZE;
ip = GFS2_I(aspace);
clear_bit(GIF_USER, &ip->i_flags);
insert_inode_hash(aspace);
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index edfee24f363..a86ed638156 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -82,6 +82,8 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
gfs2_tune_init(&sdp->sd_tune);
+ init_waitqueue_head(&sdp->sd_glock_wait);
+ atomic_set(&sdp->sd_glock_disposal, 0);
spin_lock_init(&sdp->sd_statfs_spin);
spin_lock_init(&sdp->sd_rindex_spin);
@@ -723,7 +725,7 @@ static int init_journal(struct gfs2_sbd *sdp, int undo)
goto fail;
}
- error = -EINVAL;
+ error = -EUSERS;
if (!gfs2_jindex_size(sdp)) {
fs_err(sdp, "no journals!\n");
goto fail_jindex;
@@ -983,9 +985,17 @@ static const match_table_t nolock_tokens = {
{ Opt_err, NULL },
};
+static void nolock_put_lock(struct kmem_cache *cachep, struct gfs2_glock *gl)
+{
+ struct gfs2_sbd *sdp = gl->gl_sbd;
+ kmem_cache_free(cachep, gl);
+ if (atomic_dec_and_test(&sdp->sd_glock_disposal))
+ wake_up(&sdp->sd_glock_wait);
+}
+
static const struct lm_lockops nolock_ops = {
.lm_proto_name = "lock_nolock",
- .lm_put_lock = kmem_cache_free,
+ .lm_put_lock = nolock_put_lock,
.lm_tokens = &nolock_tokens,
};
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c
index 247436c10de..84350e1be66 100644
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -748,7 +748,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
struct gfs2_rgrpd *nrgd;
unsigned int num_gh;
int dir_rename = 0;
- int alloc_required;
+ int alloc_required = 0;
unsigned int x;
int error;
@@ -867,7 +867,9 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
goto out_gunlock;
}
- alloc_required = error = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
+ if (nip == NULL)
+ alloc_required = gfs2_diradd_alloc_required(ndir, &ndentry->d_name);
+ error = alloc_required;
if (error < 0)
goto out_gunlock;
error = 0;
@@ -1086,7 +1088,8 @@ static void *gfs2_follow_link(struct dentry *dentry, struct nameidata *nd)
error = vfs_follow_link(nd, buf);
if (buf != array)
kfree(buf);
- }
+ } else
+ path_put(&nd->path);
return ERR_PTR(error);
}
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 0608f490c29..503b842f3ba 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -591,11 +591,7 @@ static int gfs2_ri_update(struct gfs2_inode *ip)
u64 rgrp_count = ip->i_disksize;
int error;
- if (do_div(rgrp_count, sizeof(struct gfs2_rindex))) {
- gfs2_consist_inode(ip);
- return -EIO;
- }
-
+ do_div(rgrp_count, sizeof(struct gfs2_rindex));
clear_rgrpdi(sdp);
file_ra_state_init(&ra_state, inode->i_mapping);
@@ -915,7 +911,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd)
struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip)
{
BUG_ON(ip->i_alloc != NULL);
- ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_KERNEL);
+ ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS);
return ip->i_alloc;
}
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index c282ad41f3d..b9dd3da22c0 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -21,6 +21,7 @@
#include <linux/gfs2_ondisk.h>
#include <linux/crc32.h>
#include <linux/time.h>
+#include <linux/wait.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c
index 912f5cbc474..c2ebdf2c01d 100644
--- a/fs/gfs2/xattr.c
+++ b/fs/gfs2/xattr.c
@@ -567,18 +567,17 @@ out:
/**
* gfs2_xattr_get - Get a GFS2 extended attribute
* @inode: The inode
- * @type: The type of extended attribute
* @name: The name of the extended attribute
* @buffer: The buffer to write the result into
* @size: The size of the buffer
+ * @type: The type of extended attribute
*
* Returns: actual size of data on success, -errno on error
*/
-
-int gfs2_xattr_get(struct inode *inode, int type, const char *name,
- void *buffer, size_t size)
+static int gfs2_xattr_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
- struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_inode *ip = GFS2_I(dentry->d_inode);
struct gfs2_ea_location el;
int error;
@@ -1119,7 +1118,7 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
/**
* gfs2_xattr_remove - Remove a GFS2 extended attribute
- * @inode: The inode
+ * @ip: The inode
* @type: The type of the extended attribute
* @name: The name of the extended attribute
*
@@ -1130,9 +1129,8 @@ static int ea_remove_stuffed(struct gfs2_inode *ip, struct gfs2_ea_location *el)
* Returns: 0, or errno on failure
*/
-static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
+static int gfs2_xattr_remove(struct gfs2_inode *ip, int type, const char *name)
{
- struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_ea_location el;
int error;
@@ -1156,24 +1154,24 @@ static int gfs2_xattr_remove(struct inode *inode, int type, const char *name)
}
/**
- * gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
- * @inode: The inode
- * @type: The type of the extended attribute
+ * __gfs2_xattr_set - Set (or remove) a GFS2 extended attribute
+ * @ip: The inode
* @name: The name of the extended attribute
* @value: The value of the extended attribute (NULL for remove)
* @size: The size of the @value argument
* @flags: Create or Replace
+ * @type: The type of the extended attribute
*
* See gfs2_xattr_remove() for details of the removal of xattrs.
*
* Returns: 0 or errno on failure
*/
-int gfs2_xattr_set(struct inode *inode, int type, const char *name,
- const void *value, size_t size, int flags)
+int __gfs2_xattr_set(struct inode *inode, const char *name,
+ const void *value, size_t size, int flags, int type)
{
- struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *ip = GFS2_I(inode);
+ struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_ea_location el;
unsigned int namel = strlen(name);
int error;
@@ -1184,7 +1182,7 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
return -ERANGE;
if (value == NULL)
- return gfs2_xattr_remove(inode, type, name);
+ return gfs2_xattr_remove(ip, type, name);
if (ea_check_size(sdp, namel, size))
return -ERANGE;
@@ -1224,6 +1222,13 @@ int gfs2_xattr_set(struct inode *inode, int type, const char *name,
return error;
}
+static int gfs2_xattr_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
+{
+ return __gfs2_xattr_set(dentry->d_inode, name, value,
+ size, flags, type);
+}
+
static int ea_acl_chmod_unstuffed(struct gfs2_inode *ip,
struct gfs2_ea_header *ea, char *data)
{
@@ -1291,6 +1296,7 @@ fail:
int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
{
+ struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
struct gfs2_ea_location el;
struct buffer_head *dibh;
int error;
@@ -1300,16 +1306,17 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
return error;
if (GFS2_EA_IS_STUFFED(el.el_ea)) {
- error = gfs2_trans_begin(GFS2_SB(&ip->i_inode), RES_DINODE + RES_EATTR, 0);
- if (error)
- return error;
-
- gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
- memcpy(GFS2_EA2DATA(el.el_ea), data,
- GFS2_EA_DATA_LEN(el.el_ea));
- } else
+ error = gfs2_trans_begin(sdp, RES_DINODE + RES_EATTR, 0);
+ if (error == 0) {
+ gfs2_trans_add_bh(ip->i_gl, el.el_bh, 1);
+ memcpy(GFS2_EA2DATA(el.el_ea), data,
+ GFS2_EA_DATA_LEN(el.el_ea));
+ }
+ } else {
error = ea_acl_chmod_unstuffed(ip, el.el_ea, data);
+ }
+ brelse(el.el_bh);
if (error)
return error;
@@ -1322,8 +1329,7 @@ int gfs2_xattr_acl_chmod(struct gfs2_inode *ip, struct iattr *attr, char *data)
brelse(dibh);
}
- gfs2_trans_end(GFS2_SB(&ip->i_inode));
-
+ gfs2_trans_end(sdp);
return error;
}
@@ -1529,40 +1535,18 @@ out_alloc:
return error;
}
-static int gfs2_xattr_user_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- return gfs2_xattr_get(inode, GFS2_EATYPE_USR, name, buffer, size);
-}
-
-static int gfs2_xattr_user_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- return gfs2_xattr_set(inode, GFS2_EATYPE_USR, name, value, size, flags);
-}
-
-static int gfs2_xattr_security_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- return gfs2_xattr_get(inode, GFS2_EATYPE_SECURITY, name, buffer, size);
-}
-
-static int gfs2_xattr_security_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- return gfs2_xattr_set(inode, GFS2_EATYPE_SECURITY, name, value, size, flags);
-}
-
static struct xattr_handler gfs2_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
- .get = gfs2_xattr_user_get,
- .set = gfs2_xattr_user_set,
+ .flags = GFS2_EATYPE_USR,
+ .get = gfs2_xattr_get,
+ .set = gfs2_xattr_set,
};
static struct xattr_handler gfs2_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .get = gfs2_xattr_security_get,
- .set = gfs2_xattr_security_set,
+ .flags = GFS2_EATYPE_SECURITY,
+ .get = gfs2_xattr_get,
+ .set = gfs2_xattr_set,
};
struct xattr_handler *gfs2_xattr_handlers[] = {
diff --git a/fs/gfs2/xattr.h b/fs/gfs2/xattr.h
index 8d6ae5813c4..d392f8358f2 100644
--- a/fs/gfs2/xattr.h
+++ b/fs/gfs2/xattr.h
@@ -53,10 +53,9 @@ struct gfs2_ea_location {
struct gfs2_ea_header *el_prev;
};
-extern int gfs2_xattr_get(struct inode *inode, int type, const char *name,
- void *buffer, size_t size);
-extern int gfs2_xattr_set(struct inode *inode, int type, const char *name,
- const void *value, size_t size, int flags);
+extern int __gfs2_xattr_set(struct inode *inode, const char *name,
+ const void *value, size_t size,
+ int flags, int type);
extern ssize_t gfs2_listxattr(struct dentry *dentry, char *buffer, size_t size);
extern int gfs2_ea_dealloc(struct gfs2_inode *ip);
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index a5089a6dd67..7239efc690d 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -646,22 +646,27 @@ static const struct super_operations hppfs_sbops = {
static int hppfs_readlink(struct dentry *dentry, char __user *buffer,
int buflen)
{
- struct dentry *proc_dentry;
-
- proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+ struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
return proc_dentry->d_inode->i_op->readlink(proc_dentry, buffer,
buflen);
}
static void *hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
- struct dentry *proc_dentry;
-
- proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+ struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
return proc_dentry->d_inode->i_op->follow_link(proc_dentry, nd);
}
+static void hppfs_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *cookie)
+{
+ struct dentry *proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
+
+ if (proc_dentry->d_inode->i_op->put_link)
+ proc_dentry->d_inode->i_op->put_link(proc_dentry, nd, cookie);
+}
+
static const struct inode_operations hppfs_dir_iops = {
.lookup = hppfs_lookup,
};
@@ -669,6 +674,7 @@ static const struct inode_operations hppfs_dir_iops = {
static const struct inode_operations hppfs_link_iops = {
.readlink = hppfs_readlink,
.follow_link = hppfs_follow_link,
+ .put_link = hppfs_put_link,
};
static struct inode *get_inode(struct super_block *sb, struct dentry *dentry)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 87a1258953b..a0bbd3d1b41 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -30,7 +30,6 @@
#include <linux/dnotify.h>
#include <linux/statfs.h>
#include <linux/security.h>
-#include <linux/ima.h>
#include <linux/magic.h>
#include <asm/uaccess.h>
@@ -922,7 +921,8 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
int error = -ENOMEM;
struct file *file;
struct inode *inode;
- struct dentry *dentry, *root;
+ struct path path;
+ struct dentry *root;
struct qstr quick_string;
*user = NULL;
@@ -944,10 +944,11 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
quick_string.name = name;
quick_string.len = strlen(quick_string.name);
quick_string.hash = 0;
- dentry = d_alloc(root, &quick_string);
- if (!dentry)
+ path.dentry = d_alloc(root, &quick_string);
+ if (!path.dentry)
goto out_shm_unlock;
+ path.mnt = mntget(hugetlbfs_vfsmount);
error = -ENOSPC;
inode = hugetlbfs_get_inode(root->d_sb, current_fsuid(),
current_fsgid(), S_IFREG | S_IRWXUGO, 0);
@@ -960,24 +961,22 @@ struct file *hugetlb_file_setup(const char *name, size_t size, int acctflag,
acctflag))
goto out_inode;
- d_instantiate(dentry, inode);
+ d_instantiate(path.dentry, inode);
inode->i_size = size;
inode->i_nlink = 0;
error = -ENFILE;
- file = alloc_file(hugetlbfs_vfsmount, dentry,
- FMODE_WRITE | FMODE_READ,
+ file = alloc_file(&path, FMODE_WRITE | FMODE_READ,
&hugetlbfs_file_operations);
if (!file)
goto out_dentry; /* inode is already attached */
- ima_counts_get(file);
return file;
out_inode:
iput(inode);
out_dentry:
- dput(dentry);
+ path_put(&path);
out_shm_unlock:
if (*user) {
user_shm_unlock(size, *user);
diff --git a/fs/inode.c b/fs/inode.c
index 06c1f02de61..03dfeb2e392 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -113,7 +113,7 @@ static void wake_up_inode(struct inode *inode)
* Prevent speculative execution through spin_unlock(&inode_lock);
*/
smp_mb();
- wake_up_bit(&inode->i_state, __I_LOCK);
+ wake_up_bit(&inode->i_state, __I_NEW);
}
/**
@@ -690,17 +690,17 @@ void unlock_new_inode(struct inode *inode)
}
#endif
/*
- * This is special! We do not need the spinlock when clearing I_LOCK,
+ * This is special! We do not need the spinlock when clearing I_NEW,
* because we're guaranteed that nobody else tries to do anything about
* the state of the inode when it is locked, as we just created it (so
- * there can be no old holders that haven't tested I_LOCK).
+ * there can be no old holders that haven't tested I_NEW).
* However we must emit the memory barrier so that other CPUs reliably
- * see the clearing of I_LOCK after the other inode initialisation has
+ * see the clearing of I_NEW after the other inode initialisation has
* completed.
*/
smp_mb();
- WARN_ON((inode->i_state & (I_LOCK|I_NEW)) != (I_LOCK|I_NEW));
- inode->i_state &= ~(I_LOCK|I_NEW);
+ WARN_ON(!(inode->i_state & I_NEW));
+ inode->i_state &= ~I_NEW;
wake_up_inode(inode);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -731,7 +731,7 @@ static struct inode *get_new_inode(struct super_block *sb,
goto set_failed;
__inode_add_to_lists(sb, head, inode);
- inode->i_state = I_LOCK|I_NEW;
+ inode->i_state = I_NEW;
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
@@ -778,7 +778,7 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
if (!old) {
inode->i_ino = ino;
__inode_add_to_lists(sb, head, inode);
- inode->i_state = I_LOCK|I_NEW;
+ inode->i_state = I_NEW;
spin_unlock(&inode_lock);
/* Return the locked inode with I_NEW set, the
@@ -1083,7 +1083,7 @@ int insert_inode_locked(struct inode *inode)
ino_t ino = inode->i_ino;
struct hlist_head *head = inode_hashtable + hash(sb, ino);
- inode->i_state |= I_LOCK|I_NEW;
+ inode->i_state |= I_NEW;
while (1) {
struct hlist_node *node;
struct inode *old = NULL;
@@ -1120,7 +1120,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
struct super_block *sb = inode->i_sb;
struct hlist_head *head = inode_hashtable + hash(sb, hashval);
- inode->i_state |= I_LOCK|I_NEW;
+ inode->i_state |= I_NEW;
while (1) {
struct hlist_node *node;
@@ -1510,7 +1510,7 @@ EXPORT_SYMBOL(inode_wait);
* until the deletion _might_ have completed. Callers are responsible
* to recheck inode state.
*
- * It doesn't matter if I_LOCK is not set initially, a call to
+ * It doesn't matter if I_NEW is not set initially, a call to
* wake_up_inode() after removing from the hash list will DTRT.
*
* This is called with inode_lock held.
@@ -1518,8 +1518,8 @@ EXPORT_SYMBOL(inode_wait);
static void __wait_on_freeing_inode(struct inode *inode)
{
wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_LOCK);
- wq = bit_waitqueue(&inode->i_state, __I_LOCK);
+ DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+ wq = bit_waitqueue(&inode->i_state, __I_NEW);
prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode_lock);
schedule();
diff --git a/fs/internal.h b/fs/internal.h
index 515175b8b72..e96a1667d74 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -79,8 +79,16 @@ extern void chroot_fs_refs(struct path *, struct path *);
* file_table.c
*/
extern void mark_files_ro(struct super_block *);
+extern struct file *get_empty_filp(void);
/*
* super.c
*/
extern int do_remount_sb(struct super_block *, int, void *, int);
+
+/*
+ * open.c
+ */
+struct nameidata;
+extern struct file *nameidata_to_filp(struct nameidata *);
+extern void release_open_intent(struct nameidata *);
diff --git a/fs/isofs/export.c b/fs/isofs/export.c
index e81a30593ba..ed752cb3847 100644
--- a/fs/isofs/export.c
+++ b/fs/isofs/export.c
@@ -9,7 +9,7 @@
*
* The following files are helpful:
*
- * Documentation/filesystems/Exporting
+ * Documentation/filesystems/nfs/Exporting
* fs/exportfs/expfs.c.
*/
diff --git a/fs/jbd/Kconfig b/fs/jbd/Kconfig
index a8408983abd..4e28beeed15 100644
--- a/fs/jbd/Kconfig
+++ b/fs/jbd/Kconfig
@@ -1,6 +1,5 @@
config JBD
tristate
- select FS_JOURNAL_INFO
help
This is a generic journalling layer for block devices. It is
currently used by the ext3 file system, but it could also be
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index 4160afad6d0..bd224eec9b0 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -1913,7 +1913,7 @@ static void __init jbd_create_debugfs_entry(void)
{
jbd_debugfs_dir = debugfs_create_dir("jbd", NULL);
if (jbd_debugfs_dir)
- jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO,
+ jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR,
jbd_debugfs_dir,
&journal_enable_debug);
}
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig
index 0f7d1ceafdf..f32f346f4b0 100644
--- a/fs/jbd2/Kconfig
+++ b/fs/jbd2/Kconfig
@@ -1,7 +1,6 @@
config JBD2
tristate
select CRC32
- select FS_JOURNAL_INFO
help
This is a generic journaling layer for block devices that support
both 32-bit and 64-bit block numbers. It is currently used by
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b2..88684937095 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
#include <linux/jbd2.h>
#include <linux/errno.h>
#include <linux/slab.h>
+#include <linux/blkdev.h>
#include <trace/events/jbd2.h>
/*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
journal->j_tail_sequence = first_tid;
journal->j_tail = blocknr;
spin_unlock(&journal->j_state_lock);
+
+ /*
+ * If there is an external journal, we need to make sure that
+ * any data blocks that were recently written out --- perhaps
+ * by jbd2_log_do_checkpoint() --- are flushed out before we
+ * drop the transactions from the external journal. It's
+ * unlikely this will be necessary, especially with a
+ * appropriately sized journal, but we need this to guarantee
+ * correctness. Fortunately jbd2_cleanup_journal_tail()
+ * doesn't get called all that often.
+ */
+ if ((journal->j_fs_dev != journal->j_dev) &&
+ (journal->j_flags & JBD2_BARRIER))
+ blkdev_issue_flush(journal->j_fs_dev, NULL);
if (!(journal->j_flags & JBD2_ABORT))
jbd2_journal_update_superblock(journal, 1);
return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c6..1bc74b6f26d 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
ret = err;
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
+ commit_transaction->t_flushed_data_blocks = 1;
jinode->i_flags &= ~JI_COMMIT_RUNNING;
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -708,8 +709,17 @@ start_journal_io:
}
}
- /* Done it all: now write the commit record asynchronously. */
+ /*
+ * If the journal is not located on the file system device,
+ * then we must flush the file system device before we issue
+ * the commit record
+ */
+ if (commit_transaction->t_flushed_data_blocks &&
+ (journal->j_fs_dev != journal->j_dev) &&
+ (journal->j_flags & JBD2_BARRIER))
+ blkdev_issue_flush(journal->j_fs_dev, NULL);
+ /* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
blkdev_issue_flush(journal->j_dev, NULL);
}
- /*
- * This is the right place to wait for data buffers both for ASYNC
- * and !ASYNC commit. If commit is ASYNC, we need to wait only after
- * the commit block went to disk (which happens above). If commit is
- * SYNC, we need to wait for data buffers before we start writing
- * commit block, which happens below in such setting.
- */
err = journal_finish_inode_data_buffers(journal, commit_transaction);
if (err) {
printk(KERN_WARNING
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index b7ca3a92a4d..ac0d027595d 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -814,7 +814,7 @@ static journal_t * journal_init_common (void)
journal_t *journal;
int err;
- journal = kzalloc(sizeof(*journal), GFP_KERNEL|__GFP_NOFAIL);
+ journal = kzalloc(sizeof(*journal), GFP_KERNEL);
if (!journal)
goto fail;
@@ -2115,7 +2115,8 @@ static void __init jbd2_create_debugfs_entry(void)
{
jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL);
if (jbd2_debugfs_dir)
- jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, S_IRUGO,
+ jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME,
+ S_IRUGO | S_IWUSR,
jbd2_debugfs_dir,
&jbd2_journal_enable_debug);
}
diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c
index 7edb62e9741..7cdc3196476 100644
--- a/fs/jffs2/acl.c
+++ b/fs/jffs2/acl.c
@@ -350,8 +350,8 @@ int jffs2_acl_chmod(struct inode *inode)
return rc;
}
-static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+static size_t jffs2_acl_access_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
{
const int retlen = sizeof(POSIX_ACL_XATTR_ACCESS);
@@ -360,8 +360,8 @@ static size_t jffs2_acl_access_listxattr(struct inode *inode, char *list, size_t
return retlen;
}
-static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+static size_t jffs2_acl_default_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
{
const int retlen = sizeof(POSIX_ACL_XATTR_DEFAULT);
@@ -370,12 +370,16 @@ static size_t jffs2_acl_default_listxattr(struct inode *inode, char *list, size_
return retlen;
}
-static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_t size)
+static int jffs2_acl_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
struct posix_acl *acl;
int rc;
- acl = jffs2_get_acl(inode, type);
+ if (name[0] != '\0')
+ return -EINVAL;
+
+ acl = jffs2_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (!acl)
@@ -386,26 +390,15 @@ static int jffs2_acl_getxattr(struct inode *inode, int type, void *buffer, size_
return rc;
}
-static int jffs2_acl_access_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
- if (name[0] != '\0')
- return -EINVAL;
- return jffs2_acl_getxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_getxattr(struct inode *inode, const char *name, void *buffer, size_t size)
-{
- if (name[0] != '\0')
- return -EINVAL;
- return jffs2_acl_getxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value, size_t size)
+static int jffs2_acl_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
struct posix_acl *acl;
int rc;
- if (!is_owner_or_cap(inode))
+ if (name[0] != '\0')
+ return -EINVAL;
+ if (!is_owner_or_cap(dentry->d_inode))
return -EPERM;
if (value) {
@@ -420,38 +413,24 @@ static int jffs2_acl_setxattr(struct inode *inode, int type, const void *value,
} else {
acl = NULL;
}
- rc = jffs2_set_acl(inode, type, acl);
+ rc = jffs2_set_acl(dentry->d_inode, type, acl);
out:
posix_acl_release(acl);
return rc;
}
-static int jffs2_acl_access_setxattr(struct inode *inode, const char *name,
- const void *buffer, size_t size, int flags)
-{
- if (name[0] != '\0')
- return -EINVAL;
- return jffs2_acl_setxattr(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int jffs2_acl_default_setxattr(struct inode *inode, const char *name,
- const void *buffer, size_t size, int flags)
-{
- if (name[0] != '\0')
- return -EINVAL;
- return jffs2_acl_setxattr(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
struct xattr_handler jffs2_acl_access_xattr_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_DEFAULT,
.list = jffs2_acl_access_listxattr,
- .get = jffs2_acl_access_getxattr,
- .set = jffs2_acl_access_setxattr,
+ .get = jffs2_acl_getxattr,
+ .set = jffs2_acl_setxattr,
};
struct xattr_handler jffs2_acl_default_xattr_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
.list = jffs2_acl_default_listxattr,
- .get = jffs2_acl_default_getxattr,
- .set = jffs2_acl_default_setxattr,
+ .get = jffs2_acl_getxattr,
+ .set = jffs2_acl_setxattr,
};
diff --git a/fs/jffs2/security.c b/fs/jffs2/security.c
index 02c39c64ecb..eaccee05858 100644
--- a/fs/jffs2/security.c
+++ b/fs/jffs2/security.c
@@ -44,26 +44,28 @@ int jffs2_init_security(struct inode *inode, struct inode *dir)
}
/* ---- XATTR Handler for "security.*" ----------------- */
-static int jffs2_security_getxattr(struct inode *inode, const char *name,
- void *buffer, size_t size)
+static int jffs2_security_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (!strcmp(name, ""))
return -EINVAL;
- return do_jffs2_getxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size);
+ return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+ name, buffer, size);
}
-static int jffs2_security_setxattr(struct inode *inode, const char *name, const void *buffer,
- size_t size, int flags)
+static int jffs2_security_setxattr(struct dentry *dentry, const char *name,
+ const void *buffer, size_t size, int flags, int type)
{
if (!strcmp(name, ""))
return -EINVAL;
- return do_jffs2_setxattr(inode, JFFS2_XPREFIX_SECURITY, name, buffer, size, flags);
+ return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_SECURITY,
+ name, buffer, size, flags);
}
-static size_t jffs2_security_listxattr(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+static size_t jffs2_security_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
{
size_t retlen = XATTR_SECURITY_PREFIX_LEN + name_len + 1;
diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4b107881acd..9e75c62c85d 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -990,9 +990,11 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
if (!xhandle)
continue;
if (buffer) {
- rc = xhandle->list(inode, buffer+len, size-len, xd->xname, xd->name_len);
+ rc = xhandle->list(dentry, buffer+len, size-len,
+ xd->xname, xd->name_len, xd->flags);
} else {
- rc = xhandle->list(inode, NULL, 0, xd->xname, xd->name_len);
+ rc = xhandle->list(dentry, NULL, 0, xd->xname,
+ xd->name_len, xd->flags);
}
if (rc < 0)
goto out;
diff --git a/fs/jffs2/xattr_trusted.c b/fs/jffs2/xattr_trusted.c
index 8ec5765ef34..3e5a5e356e0 100644
--- a/fs/jffs2/xattr_trusted.c
+++ b/fs/jffs2/xattr_trusted.c
@@ -16,24 +16,26 @@
#include <linux/mtd/mtd.h>
#include "nodelist.h"
-static int jffs2_trusted_getxattr(struct inode *inode, const char *name,
- void *buffer, size_t size)
+static int jffs2_trusted_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (!strcmp(name, ""))
return -EINVAL;
- return do_jffs2_getxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size);
+ return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+ name, buffer, size);
}
-static int jffs2_trusted_setxattr(struct inode *inode, const char *name, const void *buffer,
- size_t size, int flags)
+static int jffs2_trusted_setxattr(struct dentry *dentry, const char *name,
+ const void *buffer, size_t size, int flags, int type)
{
if (!strcmp(name, ""))
return -EINVAL;
- return do_jffs2_setxattr(inode, JFFS2_XPREFIX_TRUSTED, name, buffer, size, flags);
+ return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_TRUSTED,
+ name, buffer, size, flags);
}
-static size_t jffs2_trusted_listxattr(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+static size_t jffs2_trusted_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
{
size_t retlen = XATTR_TRUSTED_PREFIX_LEN + name_len + 1;
diff --git a/fs/jffs2/xattr_user.c b/fs/jffs2/xattr_user.c
index 8bbeab90ada..8544af67dff 100644
--- a/fs/jffs2/xattr_user.c
+++ b/fs/jffs2/xattr_user.c
@@ -16,24 +16,26 @@
#include <linux/mtd/mtd.h>
#include "nodelist.h"
-static int jffs2_user_getxattr(struct inode *inode, const char *name,
- void *buffer, size_t size)
+static int jffs2_user_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (!strcmp(name, ""))
return -EINVAL;
- return do_jffs2_getxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size);
+ return do_jffs2_getxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+ name, buffer, size);
}
-static int jffs2_user_setxattr(struct inode *inode, const char *name, const void *buffer,
- size_t size, int flags)
+static int jffs2_user_setxattr(struct dentry *dentry, const char *name,
+ const void *buffer, size_t size, int flags, int type)
{
if (!strcmp(name, ""))
return -EINVAL;
- return do_jffs2_setxattr(inode, JFFS2_XPREFIX_USER, name, buffer, size, flags);
+ return do_jffs2_setxattr(dentry->d_inode, JFFS2_XPREFIX_USER,
+ name, buffer, size, flags);
}
-static size_t jffs2_user_listxattr(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+static size_t jffs2_user_listxattr(struct dentry *dentry, char *list,
+ size_t list_size, const char *name, size_t name_len, int type)
{
size_t retlen = XATTR_USER_PREFIX_LEN + name_len + 1;
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f26e4d03ada..d945ea76b44 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1292,7 +1292,7 @@ int txCommit(tid_t tid, /* transaction identifier */
*/
/*
* I believe this code is no longer needed. Splitting I_LOCK
- * into two bits, I_LOCK and I_SYNC should prevent this
+ * into two bits, I_NEW and I_SYNC should prevent this
* deadlock as well. But since I don't have a JFS testload
* to verify this, only a trivial s/I_LOCK/I_SYNC/ was done.
* Joern
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 2234c73fc57..d929a822a74 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -524,7 +524,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
* Page cache is indexed by long.
* I would use MAX_LFS_FILESIZE, but it's only half as big
*/
- sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, sb->s_maxbytes);
+ sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1, (u64)sb->s_maxbytes);
#endif
sb->s_time_gran = 1;
return 0;
diff --git a/fs/libfs.c b/fs/libfs.c
index 219576c52d8..6e8d17e1dc4 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -848,7 +848,6 @@ EXPORT_SYMBOL(simple_write_end);
EXPORT_SYMBOL(simple_dir_inode_operations);
EXPORT_SYMBOL(simple_dir_operations);
EXPORT_SYMBOL(simple_empty);
-EXPORT_SYMBOL(d_alloc_name);
EXPORT_SYMBOL(simple_fill_super);
EXPORT_SYMBOL(simple_getattr);
EXPORT_SYMBOL(simple_link);
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index bd173a6ca3b..a7966eed3c1 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -11,10 +11,6 @@
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
-#include <linux/in.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index e1d28ddd216..56c9519d900 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -11,10 +11,6 @@
#include <linux/time.h>
#include <linux/slab.h>
#include <linux/smp_lock.h>
-#include <linux/in.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
diff --git a/fs/namei.c b/fs/namei.c
index 87f97ba90ad..a4855af776a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,7 +35,7 @@
#include <linux/fs_struct.h>
#include <asm/uaccess.h>
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#include "internal.h"
/* [Feb-1997 T. Schoebel-Theuer]
* Fundamental changes in the pathname lookup mechanisms (namei)
@@ -108,8 +108,6 @@
* any extra contention...
*/
-static int __link_path_walk(const char *name, struct nameidata *nd);
-
/* In order to reduce some races, while at the same time doing additional
* checking and hopefully speeding things up, we copy filenames to the
* kernel data space before using them..
@@ -234,6 +232,7 @@ int generic_permission(struct inode *inode, int mask,
/*
* Searching includes executable on directories, else just read.
*/
+ mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
if (capable(CAP_DAC_READ_SEARCH))
return 0;
@@ -414,36 +413,55 @@ do_revalidate(struct dentry *dentry, struct nameidata *nd)
}
/*
- * Internal lookup() using the new generic dcache.
- * SMP-safe
+ * force_reval_path - force revalidation of a dentry
+ *
+ * In some situations the path walking code will trust dentries without
+ * revalidating them. This causes problems for filesystems that depend on
+ * d_revalidate to handle file opens (e.g. NFSv4). When FS_REVAL_DOT is set
+ * (which indicates that it's possible for the dentry to go stale), force
+ * a d_revalidate call before proceeding.
+ *
+ * Returns 0 if the revalidation was successful. If the revalidation fails,
+ * either return the error returned by d_revalidate or -ESTALE if the
+ * revalidation it just returned 0. If d_revalidate returns 0, we attempt to
+ * invalidate the dentry. It's up to the caller to handle putting references
+ * to the path if necessary.
*/
-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
+static int
+force_reval_path(struct path *path, struct nameidata *nd)
{
- struct dentry * dentry = __d_lookup(parent, name);
+ int status;
+ struct dentry *dentry = path->dentry;
- /* lockess __d_lookup may fail due to concurrent d_move()
- * in some unrelated directory, so try with d_lookup
+ /*
+ * only check on filesystems where it's possible for the dentry to
+ * become stale. It's assumed that if this flag is set then the
+ * d_revalidate op will also be defined.
*/
- if (!dentry)
- dentry = d_lookup(parent, name);
+ if (!(dentry->d_sb->s_type->fs_flags & FS_REVAL_DOT))
+ return 0;
- if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
- dentry = do_revalidate(dentry, nd);
+ status = dentry->d_op->d_revalidate(dentry, nd);
+ if (status > 0)
+ return 0;
- return dentry;
+ if (!status) {
+ d_invalidate(dentry);
+ status = -ESTALE;
+ }
+ return status;
}
/*
- * Short-cut version of permission(), for calling by
- * path_walk(), when dcache lock is held. Combines parts
- * of permission() and generic_permission(), and tests ONLY for
- * MAY_EXEC permission.
+ * Short-cut version of permission(), for calling on directories
+ * during pathname resolution. Combines parts of permission()
+ * and generic_permission(), and tests ONLY for MAY_EXEC permission.
*
* If appropriate, check DAC only. If not appropriate, or
- * short-cut DAC fails, then call permission() to do more
+ * short-cut DAC fails, then call ->permission() to do more
* complete permission check.
*/
-static int exec_permission_lite(struct inode *inode)
+static int exec_permission(struct inode *inode)
{
int ret;
@@ -465,99 +483,6 @@ ok:
return security_inode_permission(inode, MAY_EXEC);
}
-/*
- * This is called when everything else fails, and we actually have
- * to go to the low-level filesystem to find out what we should do..
- *
- * We get the directory semaphore, and after getting that we also
- * make sure that nobody added the entry to the dcache in the meantime..
- * SMP-safe
- */
-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, struct nameidata *nd)
-{
- struct dentry * result;
- struct inode *dir = parent->d_inode;
-
- mutex_lock(&dir->i_mutex);
- /*
- * First re-do the cached lookup just in case it was created
- * while we waited for the directory semaphore..
- *
- * FIXME! This could use version numbering or similar to
- * avoid unnecessary cache lookups.
- *
- * The "dcache_lock" is purely to protect the RCU list walker
- * from concurrent renames at this point (we mustn't get false
- * negatives from the RCU list walk here, unlike the optimistic
- * fast walk).
- *
- * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
- */
- result = d_lookup(parent, name);
- if (!result) {
- struct dentry *dentry;
-
- /* Don't create child dentry for a dead directory. */
- result = ERR_PTR(-ENOENT);
- if (IS_DEADDIR(dir))
- goto out_unlock;
-
- dentry = d_alloc(parent, name);
- result = ERR_PTR(-ENOMEM);
- if (dentry) {
- result = dir->i_op->lookup(dir, dentry, nd);
- if (result)
- dput(dentry);
- else
- result = dentry;
- }
-out_unlock:
- mutex_unlock(&dir->i_mutex);
- return result;
- }
-
- /*
- * Uhhuh! Nasty case: the cache was re-populated while
- * we waited on the semaphore. Need to revalidate.
- */
- mutex_unlock(&dir->i_mutex);
- if (result->d_op && result->d_op->d_revalidate) {
- result = do_revalidate(result, nd);
- if (!result)
- result = ERR_PTR(-ENOENT);
- }
- return result;
-}
-
-/*
- * Wrapper to retry pathname resolution whenever the underlying
- * file system returns an ESTALE.
- *
- * Retry the whole path once, forcing real lookup requests
- * instead of relying on the dcache.
- */
-static __always_inline int link_path_walk(const char *name, struct nameidata *nd)
-{
- struct path save = nd->path;
- int result;
-
- /* make sure the stuff we saved doesn't go away */
- path_get(&save);
-
- result = __link_path_walk(name, nd);
- if (result == -ESTALE) {
- /* nd->path had been dropped */
- nd->path = save;
- path_get(&nd->path);
- nd->flags |= LOOKUP_REVAL;
- result = __link_path_walk(name, nd);
- }
-
- path_put(&save);
-
- return result;
-}
-
static __always_inline void set_root(struct nameidata *nd)
{
if (!nd->root.mnt) {
@@ -569,6 +494,8 @@ static __always_inline void set_root(struct nameidata *nd)
}
}
+static int link_path_walk(const char *, struct nameidata *);
+
static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
{
int res = 0;
@@ -634,6 +561,7 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
dget(dentry);
}
mntget(path->mnt);
+ nd->last_type = LAST_BIND;
cookie = dentry->d_inode->i_op->follow_link(dentry, nd);
error = PTR_ERR(cookie);
if (!IS_ERR(cookie)) {
@@ -641,11 +569,14 @@ static __always_inline int __do_follow_link(struct path *path, struct nameidata
error = 0;
if (s)
error = __vfs_follow_link(nd, s);
+ else if (nd->last_type == LAST_BIND) {
+ error = force_reval_path(&nd->path, nd);
+ if (error)
+ path_put(&nd->path);
+ }
if (dentry->d_inode->i_op->put_link)
dentry->d_inode->i_op->put_link(dentry, nd, cookie);
}
- path_put(path);
-
return error;
}
@@ -672,6 +603,7 @@ static inline int do_follow_link(struct path *path, struct nameidata *nd)
current->total_link_count++;
nd->depth++;
err = __do_follow_link(path, nd);
+ path_put(path);
current->link_count--;
nd->depth--;
return err;
@@ -797,8 +729,19 @@ static int do_lookup(struct nameidata *nd, struct qstr *name,
struct path *path)
{
struct vfsmount *mnt = nd->path.mnt;
- struct dentry *dentry = __d_lookup(nd->path.dentry, name);
+ struct dentry *dentry, *parent;
+ struct inode *dir;
+ /*
+ * See if the low-level filesystem might want
+ * to use its own hash..
+ */
+ if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
+ int err = nd->path.dentry->d_op->d_hash(nd->path.dentry, name);
+ if (err < 0)
+ return err;
+ }
+ dentry = __d_lookup(nd->path.dentry, name);
if (!dentry)
goto need_lookup;
if (dentry->d_op && dentry->d_op->d_revalidate)
@@ -810,7 +753,59 @@ done:
return 0;
need_lookup:
- dentry = real_lookup(nd->path.dentry, name, nd);
+ parent = nd->path.dentry;
+ dir = parent->d_inode;
+
+ mutex_lock(&dir->i_mutex);
+ /*
+ * First re-do the cached lookup just in case it was created
+ * while we waited for the directory semaphore..
+ *
+ * FIXME! This could use version numbering or similar to
+ * avoid unnecessary cache lookups.
+ *
+ * The "dcache_lock" is purely to protect the RCU list walker
+ * from concurrent renames at this point (we mustn't get false
+ * negatives from the RCU list walk here, unlike the optimistic
+ * fast walk).
+ *
+ * so doing d_lookup() (with seqlock), instead of lockfree __d_lookup
+ */
+ dentry = d_lookup(parent, name);
+ if (!dentry) {
+ struct dentry *new;
+
+ /* Don't create child dentry for a dead directory. */
+ dentry = ERR_PTR(-ENOENT);
+ if (IS_DEADDIR(dir))
+ goto out_unlock;
+
+ new = d_alloc(parent, name);
+ dentry = ERR_PTR(-ENOMEM);
+ if (new) {
+ dentry = dir->i_op->lookup(dir, new, nd);
+ if (dentry)
+ dput(new);
+ else
+ dentry = new;
+ }
+out_unlock:
+ mutex_unlock(&dir->i_mutex);
+ if (IS_ERR(dentry))
+ goto fail;
+ goto done;
+ }
+
+ /*
+ * Uhhuh! Nasty case: the cache was re-populated while
+ * we waited on the semaphore. Need to revalidate.
+ */
+ mutex_unlock(&dir->i_mutex);
+ if (dentry->d_op && dentry->d_op->d_revalidate) {
+ dentry = do_revalidate(dentry, nd);
+ if (!dentry)
+ dentry = ERR_PTR(-ENOENT);
+ }
if (IS_ERR(dentry))
goto fail;
goto done;
@@ -828,6 +823,17 @@ fail:
}
/*
+ * This is a temporary kludge to deal with "automount" symlinks; proper
+ * solution is to trigger them on follow_mount(), so that do_lookup()
+ * would DTRT. To be killed before 2.6.34-final.
+ */
+static inline int follow_on_final(struct inode *inode, unsigned lookup_flags)
+{
+ return inode && unlikely(inode->i_op->follow_link) &&
+ ((lookup_flags & LOOKUP_FOLLOW) || S_ISDIR(inode->i_mode));
+}
+
+/*
* Name resolution.
* This is the basic name resolution function, turning a pathname into
* the final dentry. We expect 'base' to be positive and a directory.
@@ -835,7 +841,7 @@ fail:
* Returns 0 and nd will have valid dentry and mnt on success.
* Returns error and drops reference to input namei data on failure.
*/
-static int __link_path_walk(const char *name, struct nameidata *nd)
+static int link_path_walk(const char *name, struct nameidata *nd)
{
struct path next;
struct inode *inode;
@@ -858,7 +864,7 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
unsigned int c;
nd->flags |= LOOKUP_CONTINUE;
- err = exec_permission_lite(inode);
+ err = exec_permission(inode);
if (err)
break;
@@ -898,16 +904,6 @@ static int __link_path_walk(const char *name, struct nameidata *nd)
case 1:
continue;
}
- /*
- * See if the low-level filesystem might want
- * to use its own hash..
- */
- if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
- err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
- &this);
- if (err < 0)
- break;
- }
/* This does the actual lookups.. */
err = do_lookup(nd, &this, &next);
if (err)
@@ -953,18 +949,11 @@ last_component:
case 1:
goto return_reval;
}
- if (nd->path.dentry->d_op && nd->path.dentry->d_op->d_hash) {
- err = nd->path.dentry->d_op->d_hash(nd->path.dentry,
- &this);
- if (err < 0)
- break;
- }
err = do_lookup(nd, &this, &next);
if (err)
break;
inode = next.dentry->d_inode;
- if ((lookup_flags & LOOKUP_FOLLOW)
- && inode && inode->i_op->follow_link) {
+ if (follow_on_final(inode, lookup_flags)) {
err = do_follow_link(&next, nd);
if (err)
goto return_err;
@@ -1017,8 +1006,27 @@ return_err:
static int path_walk(const char *name, struct nameidata *nd)
{
+ struct path save = nd->path;
+ int result;
+
current->total_link_count = 0;
- return link_path_walk(name, nd);
+
+ /* make sure the stuff we saved doesn't go away */
+ path_get(&save);
+
+ result = link_path_walk(name, nd);
+ if (result == -ESTALE) {
+ /* nd->path had been dropped */
+ current->total_link_count = 0;
+ nd->path = save;
+ path_get(&nd->path);
+ nd->flags |= LOOKUP_REVAL;
+ result = link_path_walk(name, nd);
+ }
+
+ path_put(&save);
+
+ return result;
}
static int path_init(int dfd, const char *name, unsigned int flags, struct nameidata *nd)
@@ -1141,36 +1149,6 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
return retval;
}
-/**
- * path_lookup_open - lookup a file path with open intent
- * @dfd: the directory to use as base, or AT_FDCWD
- * @name: pointer to file name
- * @lookup_flags: lookup intent flags
- * @nd: pointer to nameidata
- * @open_flags: open intent flags
- */
-static int path_lookup_open(int dfd, const char *name,
- unsigned int lookup_flags, struct nameidata *nd, int open_flags)
-{
- struct file *filp = get_empty_filp();
- int err;
-
- if (filp == NULL)
- return -ENFILE;
- nd->intent.open.file = filp;
- nd->intent.open.flags = open_flags;
- nd->intent.open.create_mode = 0;
- err = do_path_lookup(dfd, name, lookup_flags|LOOKUP_OPEN, nd);
- if (IS_ERR(nd->intent.open.file)) {
- if (err == 0) {
- err = PTR_ERR(nd->intent.open.file);
- path_put(&nd->path);
- }
- } else if (err != 0)
- release_open_intent(nd);
- return err;
-}
-
static struct dentry *__lookup_hash(struct qstr *name,
struct dentry *base, struct nameidata *nd)
{
@@ -1191,7 +1169,17 @@ static struct dentry *__lookup_hash(struct qstr *name,
goto out;
}
- dentry = cached_lookup(base, name, nd);
+ dentry = __d_lookup(base, name);
+
+ /* lockess __d_lookup may fail due to concurrent d_move()
+ * in some unrelated directory, so try with d_lookup
+ */
+ if (!dentry)
+ dentry = d_lookup(base, name);
+
+ if (dentry && dentry->d_op && dentry->d_op->d_revalidate)
+ dentry = do_revalidate(dentry, nd);
+
if (!dentry) {
struct dentry *new;
@@ -1223,7 +1211,7 @@ static struct dentry *lookup_hash(struct nameidata *nd)
{
int err;
- err = inode_permission(nd->path.dentry->d_inode, MAY_EXEC);
+ err = exec_permission(nd->path.dentry->d_inode);
if (err)
return ERR_PTR(err);
return __lookup_hash(&nd->last, nd->path.dentry, nd);
@@ -1273,7 +1261,7 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
if (err)
return ERR_PTR(err);
- err = inode_permission(base->d_inode, MAY_EXEC);
+ err = exec_permission(base->d_inode);
if (err)
return ERR_PTR(err);
return __lookup_hash(&this, base, NULL);
@@ -1511,69 +1499,45 @@ int may_open(struct path *path, int acc_mode, int flag)
if (error)
return error;
- error = ima_path_check(path, acc_mode ?
- acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
- ACC_MODE(flag) & (MAY_READ | MAY_WRITE),
- IMA_COUNT_UPDATE);
-
- if (error)
- return error;
/*
* An append-only file must be opened in append mode for writing.
*/
if (IS_APPEND(inode)) {
- error = -EPERM;
if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
- goto err_out;
+ return -EPERM;
if (flag & O_TRUNC)
- goto err_out;
+ return -EPERM;
}
/* O_NOATIME can only be set by the owner or superuser */
- if (flag & O_NOATIME)
- if (!is_owner_or_cap(inode)) {
- error = -EPERM;
- goto err_out;
- }
+ if (flag & O_NOATIME && !is_owner_or_cap(inode))
+ return -EPERM;
/*
* Ensure there are no outstanding leases on the file.
*/
- error = break_lease(inode, flag);
- if (error)
- goto err_out;
-
- if (flag & O_TRUNC) {
- error = get_write_access(inode);
- if (error)
- goto err_out;
-
- /*
- * Refuse to truncate files with mandatory locks held on them.
- */
- error = locks_verify_locked(inode);
- if (!error)
- error = security_path_truncate(path, 0,
- ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
- if (!error) {
- vfs_dq_init(inode);
-
- error = do_truncate(dentry, 0,
- ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
- NULL);
- }
- put_write_access(inode);
- if (error)
- goto err_out;
- } else
- if (flag & FMODE_WRITE)
- vfs_dq_init(inode);
+ return break_lease(inode, flag);
+}
- return 0;
-err_out:
- ima_counts_put(path, acc_mode ?
- acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC) :
- ACC_MODE(flag) & (MAY_READ | MAY_WRITE));
+static int handle_truncate(struct path *path)
+{
+ struct inode *inode = path->dentry->d_inode;
+ int error = get_write_access(inode);
+ if (error)
+ return error;
+ /*
+ * Refuse to truncate files with mandatory locks held on them.
+ */
+ error = locks_verify_locked(inode);
+ if (!error)
+ error = security_path_truncate(path, 0,
+ ATTR_MTIME|ATTR_CTIME|ATTR_OPEN);
+ if (!error) {
+ error = do_truncate(path->dentry, 0,
+ ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
+ NULL);
+ }
+ put_write_access(inode);
return error;
}
@@ -1628,7 +1592,7 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
-static int open_will_write_to_fs(int flag, struct inode *inode)
+static int open_will_truncate(int flag, struct inode *inode)
{
/*
* We'll never write to the fs underlying
@@ -1653,8 +1617,9 @@ struct file *do_filp_open(int dfd, const char *pathname,
struct path path;
struct dentry *dir;
int count = 0;
- int will_write;
+ int will_truncate;
int flag = open_to_namei_flags(open_flag);
+ int force_reval = 0;
/*
* O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
@@ -1666,7 +1631,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
open_flag |= O_DSYNC;
if (!acc_mode)
- acc_mode = MAY_OPEN | ACC_MODE(flag);
+ acc_mode = MAY_OPEN | ACC_MODE(open_flag);
/* O_TRUNC implies we need access checks for write permissions */
if (flag & O_TRUNC)
@@ -1681,8 +1646,23 @@ struct file *do_filp_open(int dfd, const char *pathname,
* The simplest case - just a plain lookup.
*/
if (!(flag & O_CREAT)) {
- error = path_lookup_open(dfd, pathname, lookup_flags(flag),
- &nd, flag);
+ filp = get_empty_filp();
+
+ if (filp == NULL)
+ return ERR_PTR(-ENFILE);
+ nd.intent.open.file = filp;
+ filp->f_flags = open_flag;
+ nd.intent.open.flags = flag;
+ nd.intent.open.create_mode = 0;
+ error = do_path_lookup(dfd, pathname,
+ lookup_flags(flag)|LOOKUP_OPEN, &nd);
+ if (IS_ERR(nd.intent.open.file)) {
+ if (error == 0) {
+ error = PTR_ERR(nd.intent.open.file);
+ path_put(&nd.path);
+ }
+ } else if (error)
+ release_open_intent(&nd);
if (error)
return ERR_PTR(error);
goto ok;
@@ -1691,9 +1671,12 @@ struct file *do_filp_open(int dfd, const char *pathname,
/*
* Create - we need to know the parent.
*/
+reval:
error = path_init(dfd, pathname, LOOKUP_PARENT, &nd);
if (error)
return ERR_PTR(error);
+ if (force_reval)
+ nd.flags |= LOOKUP_REVAL;
error = path_walk(pathname, &nd);
if (error) {
if (nd.root.mnt)
@@ -1717,6 +1700,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
if (filp == NULL)
goto exit_parent;
nd.intent.open.file = filp;
+ filp->f_flags = open_flag;
nd.intent.open.flags = flag;
nd.intent.open.create_mode = mode;
dir = nd.path.dentry;
@@ -1757,14 +1741,17 @@ do_last:
mnt_drop_write(nd.path.mnt);
goto exit;
}
- filp = nameidata_to_filp(&nd, open_flag);
- if (IS_ERR(filp))
- ima_counts_put(&nd.path,
- acc_mode & (MAY_READ | MAY_WRITE |
- MAY_EXEC));
+ filp = nameidata_to_filp(&nd);
mnt_drop_write(nd.path.mnt);
if (nd.root.mnt)
path_put(&nd.root);
+ if (!IS_ERR(filp)) {
+ error = ima_file_check(filp, acc_mode);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
return filp;
}
@@ -1792,7 +1779,7 @@ do_last:
path_to_nameidata(&path, &nd);
error = -EISDIR;
- if (path.dentry->d_inode && S_ISDIR(path.dentry->d_inode->i_mode))
+ if (S_ISDIR(path.dentry->d_inode->i_mode))
goto exit;
ok:
/*
@@ -1805,28 +1792,44 @@ ok:
* be avoided. Taking this mnt write here
* ensures that (2) can not occur.
*/
- will_write = open_will_write_to_fs(flag, nd.path.dentry->d_inode);
- if (will_write) {
+ will_truncate = open_will_truncate(flag, nd.path.dentry->d_inode);
+ if (will_truncate) {
error = mnt_want_write(nd.path.mnt);
if (error)
goto exit;
}
error = may_open(&nd.path, acc_mode, flag);
if (error) {
- if (will_write)
+ if (will_truncate)
mnt_drop_write(nd.path.mnt);
goto exit;
}
- filp = nameidata_to_filp(&nd, open_flag);
- if (IS_ERR(filp))
- ima_counts_put(&nd.path,
- acc_mode & (MAY_READ | MAY_WRITE | MAY_EXEC));
+ filp = nameidata_to_filp(&nd);
+ if (!IS_ERR(filp)) {
+ error = ima_file_check(filp, acc_mode);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ if (!IS_ERR(filp)) {
+ if (acc_mode & MAY_WRITE)
+ vfs_dq_init(nd.path.dentry->d_inode);
+
+ if (will_truncate) {
+ error = handle_truncate(&nd.path);
+ if (error) {
+ fput(filp);
+ filp = ERR_PTR(error);
+ }
+ }
+ }
/*
* It is now safe to drop the mnt write
* because the filp has had a write taken
* on its behalf.
*/
- if (will_write)
+ if (will_truncate)
mnt_drop_write(nd.path.mnt);
if (nd.root.mnt)
path_put(&nd.root);
@@ -1864,6 +1867,7 @@ do_link:
if (error)
goto exit_dput;
error = __do_follow_link(&path, &nd);
+ path_put(&path);
if (error) {
/* Does someone understand code flow here? Or it is only
* me so stupid? Anathema to whoever designed this non-sense
@@ -1872,6 +1876,10 @@ do_link:
release_open_intent(&nd);
if (nd.root.mnt)
path_put(&nd.root);
+ if (error == -ESTALE && !force_reval) {
+ force_reval = 1;
+ goto reval;
+ }
return ERR_PTR(error);
}
nd.flags &= ~LOOKUP_PARENT;
diff --git a/fs/namespace.c b/fs/namespace.c
index 7d70d63ceb2..c768f733c8d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -965,10 +965,12 @@ EXPORT_SYMBOL(may_umount_tree);
int may_umount(struct vfsmount *mnt)
{
int ret = 1;
+ down_read(&namespace_sem);
spin_lock(&vfsmount_lock);
if (propagate_mount_busy(mnt, 2))
ret = 0;
spin_unlock(&vfsmount_lock);
+ up_read(&namespace_sem);
return ret;
}
@@ -1352,12 +1354,12 @@ static int attach_recursive_mnt(struct vfsmount *source_mnt,
if (err)
goto out_cleanup_ids;
+ spin_lock(&vfsmount_lock);
+
if (IS_MNT_SHARED(dest_mnt)) {
for (p = source_mnt; p; p = next_mnt(p, source_mnt))
set_mnt_shared(p);
}
-
- spin_lock(&vfsmount_lock);
if (parent_path) {
detach_mnt(source_mnt, parent_path);
attach_mnt(source_mnt, path);
@@ -1534,8 +1536,12 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
err = change_mount_flags(path->mnt, flags);
else
err = do_remount_sb(sb, flags, data, 0);
- if (!err)
+ if (!err) {
+ spin_lock(&vfsmount_lock);
+ mnt_flags |= path->mnt->mnt_flags & MNT_PNODE_MASK;
path->mnt->mnt_flags = mnt_flags;
+ spin_unlock(&vfsmount_lock);
+ }
up_write(&sb->s_umount);
if (!err) {
security_sb_post_remount(path->mnt, flags, data);
@@ -1665,6 +1671,8 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
{
int err;
+ mnt_flags &= ~(MNT_SHARED | MNT_WRITE_HOLD);
+
down_write(&namespace_sem);
/* Something was mounted here while we slept */
while (d_mountpoint(path->dentry) &&
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 2a77bc25d5a..59e5673b459 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -90,7 +90,7 @@ config ROOT_NFS
If you want your system to mount its root file system via NFS,
choose Y here. This is common practice for managing systems
without local permanent storage. For details, read
- <file:Documentation/filesystems/nfsroot.txt>.
+ <file:Documentation/filesystems/nfs/nfsroot.txt>.
Most people say N here.
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c5ace4f00a..3c7f03b669f 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1615,6 +1615,7 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
goto out;
new_dentry = dentry;
+ rehash = NULL;
new_inode = NULL;
}
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index e1d415e9784..0d289823e85 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -342,6 +342,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_direct_req *dreq,
data->res.fattr = &data->fattr;
data->res.eof = 0;
data->res.count = bytes;
+ nfs_fattr_init(&data->fattr);
msg.rpc_argp = &data->args;
msg.rpc_resp = &data->res;
@@ -575,6 +576,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
data->res.count = 0;
data->res.fattr = &data->fattr;
data->res.verf = &data->verf;
+ nfs_fattr_init(&data->fattr);
NFS_PROTO(data->inode)->commit_setup(data, &msg);
@@ -766,6 +768,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_direct_req *dreq,
data->res.fattr = &data->fattr;
data->res.count = bytes;
data->res.verf = &data->verf;
+ nfs_fattr_init(&data->fattr);
task_setup_data.task = &data->task;
task_setup_data.callback_data = data;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 6b891328f33..63f2071d644 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -486,6 +486,8 @@ static int nfs_release_page(struct page *page, gfp_t gfp)
{
dfprintk(PAGECACHE, "NFS: release_page(%p)\n", page);
+ if (gfp & __GFP_WAIT)
+ nfs_wb_page(page->mapping->host, page);
/* If PagePrivate() is set, then the page is not freeable */
if (PagePrivate(page))
return 0;
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index fa588006588..237874f1af2 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -354,12 +354,11 @@ void nfs_fscache_reset_inode_cookie(struct inode *inode)
*/
int nfs_fscache_release_page(struct page *page, gfp_t gfp)
{
- struct nfs_inode *nfsi = NFS_I(page->mapping->host);
- struct fscache_cookie *cookie = nfsi->fscache;
-
- BUG_ON(!cookie);
-
if (PageFsCache(page)) {
+ struct nfs_inode *nfsi = NFS_I(page->mapping->host);
+ struct fscache_cookie *cookie = nfsi->fscache;
+
+ BUG_ON(!cookie);
dfprintk(FSCACHE, "NFS: fscache releasepage (0x%p/0x%p/0x%p)\n",
cookie, page, nfsi);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index faa091865ad..f141bde7756 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1261,8 +1261,10 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)) {
+ umode_t newmode = inode->i_mode & S_IFMT;
+ newmode |= fattr->mode & S_IALLUGO;
+ inode->i_mode = newmode;
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
- inode->i_mode = fattr->mode;
}
} else if (server->caps & NFS_CAP_MODE)
invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0adefc40cc8..59047f8d7d7 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -120,7 +120,7 @@ static struct {
{ .status = MNT3ERR_INVAL, .errno = -EINVAL, },
{ .status = MNT3ERR_NAMETOOLONG, .errno = -ENAMETOOLONG, },
{ .status = MNT3ERR_NOTSUPP, .errno = -ENOTSUPP, },
- { .status = MNT3ERR_SERVERFAULT, .errno = -ESERVERFAULT, },
+ { .status = MNT3ERR_SERVERFAULT, .errno = -EREMOTEIO, },
};
struct mountres {
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 5e078b222b4..7bc2da8efd4 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -699,7 +699,7 @@ static struct {
{ NFSERR_BAD_COOKIE, -EBADCOOKIE },
{ NFSERR_NOTSUPP, -ENOTSUPP },
{ NFSERR_TOOSMALL, -ETOOSMALL },
- { NFSERR_SERVERFAULT, -ESERVERFAULT },
+ { NFSERR_SERVERFAULT, -EREMOTEIO },
{ NFSERR_BADTYPE, -EBADTYPE },
{ NFSERR_JUKEBOX, -EJUKEBOX },
{ -1, -EIO }
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7e57b04e401..0c6fda33d66 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -108,6 +108,10 @@ enum {
NFS_OWNER_RECLAIM_NOGRACE
};
+#define NFS_LOCK_NEW 0
+#define NFS_LOCK_RECLAIM 1
+#define NFS_LOCK_EXPIRED 2
+
/*
* struct nfs4_state maintains the client-side state for a given
* (state_owner,inode) tuple (OPEN) or state_owner (LOCK).
@@ -142,6 +146,7 @@ enum {
NFS_O_RDWR_STATE, /* OPEN stateid has read/write state */
NFS_STATE_RECLAIM_REBOOT, /* OPEN stateid server rebooted */
NFS_STATE_RECLAIM_NOGRACE, /* OPEN stateid needs to recover state */
+ NFS_STATE_POSIX_LOCKS, /* Posix locks are supported */
};
struct nfs4_state {
@@ -273,6 +278,7 @@ extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
extern void nfs4_schedule_state_recovery(struct nfs_client *);
extern void nfs4_schedule_state_manager(struct nfs_client *);
extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state);
+extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
@@ -282,6 +288,7 @@ extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter);
extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
extern void nfs_increment_open_seqid(int status, struct nfs_seqid *seqid);
extern void nfs_increment_lock_seqid(int status, struct nfs_seqid *seqid);
+extern void nfs_release_seqid(struct nfs_seqid *seqid);
extern void nfs_free_seqid(struct nfs_seqid *seqid);
extern const nfs4_stateid zero_stateid;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 9f5f11ecfd9..375f0fae2c6 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -64,6 +64,7 @@
struct nfs4_opendata;
static int _nfs4_proc_open(struct nfs4_opendata *data);
+static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
@@ -248,19 +249,15 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
if (state == NULL)
break;
nfs4_state_mark_reclaim_nograce(clp, state);
- case -NFS4ERR_STALE_CLIENTID:
+ goto do_state_recovery;
case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_EXPIRED:
- nfs4_schedule_state_recovery(clp);
- ret = nfs4_wait_clnt_recover(clp);
- if (ret == 0)
- exception->retry = 1;
-#if !defined(CONFIG_NFS_V4_1)
- break;
-#else /* !defined(CONFIG_NFS_V4_1) */
- if (!nfs4_has_session(server->nfs_client))
+ if (state == NULL)
break;
- /* FALLTHROUGH */
+ nfs4_state_mark_reclaim_reboot(clp, state);
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_EXPIRED:
+ goto do_state_recovery;
+#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
@@ -273,7 +270,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
nfs4_schedule_state_recovery(clp);
exception->retry = 1;
break;
-#endif /* !defined(CONFIG_NFS_V4_1) */
+#endif /* defined(CONFIG_NFS_V4_1) */
case -NFS4ERR_FILE_OPEN:
if (exception->timeout > HZ) {
/* We have retried a decent amount, time to
@@ -292,6 +289,12 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
}
/* We failed to handle the error */
return nfs4_map_errors(ret);
+do_state_recovery:
+ nfs4_schedule_state_recovery(clp);
+ ret = nfs4_wait_clnt_recover(clp);
+ if (ret == 0)
+ exception->retry = 1;
+ return ret;
}
@@ -341,6 +344,27 @@ nfs4_free_slot(struct nfs4_slot_table *tbl, u8 free_slotid)
free_slotid, tbl->highest_used_slotid);
}
+/*
+ * Signal state manager thread if session is drained
+ */
+static void nfs41_check_drain_session_complete(struct nfs4_session *ses)
+{
+ struct rpc_task *task;
+
+ if (!test_bit(NFS4CLNT_SESSION_DRAINING, &ses->clp->cl_state)) {
+ task = rpc_wake_up_next(&ses->fc_slot_table.slot_tbl_waitq);
+ if (task)
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ return;
+ }
+
+ if (ses->fc_slot_table.highest_used_slotid != -1)
+ return;
+
+ dprintk("%s COMPLETE: Session Drained\n", __func__);
+ complete(&ses->complete);
+}
+
static void nfs41_sequence_free_slot(const struct nfs_client *clp,
struct nfs4_sequence_res *res)
{
@@ -356,15 +380,7 @@ static void nfs41_sequence_free_slot(const struct nfs_client *clp,
spin_lock(&tbl->slot_tbl_lock);
nfs4_free_slot(tbl, res->sr_slotid);
-
- /* Signal state manager thread if session is drained */
- if (test_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
- if (tbl->highest_used_slotid == -1) {
- dprintk("%s COMPLETE: Session Drained\n", __func__);
- complete(&clp->cl_session->complete);
- }
- } else
- rpc_wake_up_next(&tbl->slot_tbl_waitq);
+ nfs41_check_drain_session_complete(clp->cl_session);
spin_unlock(&tbl->slot_tbl_lock);
res->sr_slotid = NFS4_MAX_SLOT_TABLE;
}
@@ -421,7 +437,7 @@ out:
* Note: must be called with under the slot_tbl_lock.
*/
static u8
-nfs4_find_slot(struct nfs4_slot_table *tbl, struct rpc_task *task)
+nfs4_find_slot(struct nfs4_slot_table *tbl)
{
int slotid;
u8 ret_id = NFS4_MAX_SLOT_TABLE;
@@ -463,7 +479,8 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
tbl = &session->fc_slot_table;
spin_lock(&tbl->slot_tbl_lock);
- if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state)) {
+ if (test_bit(NFS4CLNT_SESSION_DRAINING, &session->clp->cl_state) &&
+ !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
/*
* The state manager will wait until the slot table is empty.
* Schedule the reset thread
@@ -474,7 +491,15 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
return -EAGAIN;
}
- slotid = nfs4_find_slot(tbl, task);
+ if (!rpc_queue_empty(&tbl->slot_tbl_waitq) &&
+ !rpc_task_has_priority(task, RPC_PRIORITY_PRIVILEGED)) {
+ rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
+ spin_unlock(&tbl->slot_tbl_lock);
+ dprintk("%s enforce FIFO order\n", __func__);
+ return -EAGAIN;
+ }
+
+ slotid = nfs4_find_slot(tbl);
if (slotid == NFS4_MAX_SLOT_TABLE) {
rpc_sleep_on(&tbl->slot_tbl_waitq, task, NULL);
spin_unlock(&tbl->slot_tbl_lock);
@@ -483,6 +508,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
}
spin_unlock(&tbl->slot_tbl_lock);
+ rpc_task_set_priority(task, RPC_PRIORITY_NORMAL);
slot = tbl->slots + slotid;
args->sa_session = session;
args->sa_slotid = slotid;
@@ -545,6 +571,12 @@ static void nfs41_call_sync_prepare(struct rpc_task *task, void *calldata)
rpc_call_start(task);
}
+static void nfs41_call_priv_sync_prepare(struct rpc_task *task, void *calldata)
+{
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ nfs41_call_sync_prepare(task, calldata);
+}
+
static void nfs41_call_sync_done(struct rpc_task *task, void *calldata)
{
struct nfs41_call_sync_data *data = calldata;
@@ -557,12 +589,18 @@ struct rpc_call_ops nfs41_call_sync_ops = {
.rpc_call_done = nfs41_call_sync_done,
};
+struct rpc_call_ops nfs41_call_priv_sync_ops = {
+ .rpc_call_prepare = nfs41_call_priv_sync_prepare,
+ .rpc_call_done = nfs41_call_sync_done,
+};
+
static int nfs4_call_sync_sequence(struct nfs_client *clp,
struct rpc_clnt *clnt,
struct rpc_message *msg,
struct nfs4_sequence_args *args,
struct nfs4_sequence_res *res,
- int cache_reply)
+ int cache_reply,
+ int privileged)
{
int ret;
struct rpc_task *task;
@@ -580,6 +618,8 @@ static int nfs4_call_sync_sequence(struct nfs_client *clp,
};
res->sr_slotid = NFS4_MAX_SLOT_TABLE;
+ if (privileged)
+ task_setup.callback_ops = &nfs41_call_priv_sync_ops;
task = rpc_run_task(&task_setup);
if (IS_ERR(task))
ret = PTR_ERR(task);
@@ -597,7 +637,7 @@ int _nfs4_call_sync_session(struct nfs_server *server,
int cache_reply)
{
return nfs4_call_sync_sequence(server->nfs_client, server->client,
- msg, args, res, cache_reply);
+ msg, args, res, cache_reply, 0);
}
#endif /* CONFIG_NFS_V4_1 */
@@ -1035,7 +1075,7 @@ static int nfs4_open_recover_helper(struct nfs4_opendata *opendata, fmode_t fmod
memset(&opendata->o_res, 0, sizeof(opendata->o_res));
memset(&opendata->c_res, 0, sizeof(opendata->c_res));
nfs4_init_opendata_res(opendata);
- ret = _nfs4_proc_open(opendata);
+ ret = _nfs4_recover_proc_open(opendata);
if (ret != 0)
return ret;
newstate = nfs4_opendata_to_nfs4_state(opendata);
@@ -1326,6 +1366,12 @@ out_no_action:
}
+static void nfs4_recover_open_prepare(struct rpc_task *task, void *calldata)
+{
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ nfs4_open_prepare(task, calldata);
+}
+
static void nfs4_open_done(struct rpc_task *task, void *calldata)
{
struct nfs4_opendata *data = calldata;
@@ -1384,10 +1430,13 @@ static const struct rpc_call_ops nfs4_open_ops = {
.rpc_release = nfs4_open_release,
};
-/*
- * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
- */
-static int _nfs4_proc_open(struct nfs4_opendata *data)
+static const struct rpc_call_ops nfs4_recover_open_ops = {
+ .rpc_call_prepare = nfs4_recover_open_prepare,
+ .rpc_call_done = nfs4_open_done,
+ .rpc_release = nfs4_open_release,
+};
+
+static int nfs4_run_open_task(struct nfs4_opendata *data, int isrecover)
{
struct inode *dir = data->dir->d_inode;
struct nfs_server *server = NFS_SERVER(dir);
@@ -1414,21 +1463,57 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
data->rpc_done = 0;
data->rpc_status = 0;
data->cancelled = 0;
+ if (isrecover)
+ task_setup_data.callback_ops = &nfs4_recover_open_ops;
task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
- status = nfs4_wait_for_completion_rpc_task(task);
- if (status != 0) {
- data->cancelled = 1;
- smp_wmb();
- } else
- status = data->rpc_status;
- rpc_put_task(task);
+ if (IS_ERR(task))
+ return PTR_ERR(task);
+ status = nfs4_wait_for_completion_rpc_task(task);
+ if (status != 0) {
+ data->cancelled = 1;
+ smp_wmb();
+ } else
+ status = data->rpc_status;
+ rpc_put_task(task);
+
+ return status;
+}
+
+static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
+{
+ struct inode *dir = data->dir->d_inode;
+ struct nfs_openres *o_res = &data->o_res;
+ int status;
+
+ status = nfs4_run_open_task(data, 1);
if (status != 0 || !data->rpc_done)
return status;
- if (o_res->fh.size == 0)
- _nfs4_proc_lookup(dir, o_arg->name, &o_res->fh, o_res->f_attr);
+ nfs_refresh_inode(dir, o_res->dir_attr);
+
+ if (o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
+ status = _nfs4_proc_open_confirm(data);
+ if (status != 0)
+ return status;
+ }
+
+ return status;
+}
+
+/*
+ * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
+ */
+static int _nfs4_proc_open(struct nfs4_opendata *data)
+{
+ struct inode *dir = data->dir->d_inode;
+ struct nfs_server *server = NFS_SERVER(dir);
+ struct nfs_openargs *o_arg = &data->o_arg;
+ struct nfs_openres *o_res = &data->o_res;
+ int status;
+
+ status = nfs4_run_open_task(data, 0);
+ if (status != 0 || !data->rpc_done)
+ return status;
if (o_arg->open_flags & O_CREAT) {
update_changeattr(dir, &o_res->cinfo);
@@ -1575,6 +1660,8 @@ static int _nfs4_do_open(struct inode *dir, struct path *path, fmode_t fmode, in
status = PTR_ERR(state);
if (IS_ERR(state))
goto err_opendata_put;
+ if ((opendata->o_res.rflags & NFS4_OPEN_RESULT_LOCKTYPE_POSIX) != 0)
+ set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
nfs4_opendata_put(opendata);
nfs4_put_state_owner(sp);
*res = state;
@@ -1752,11 +1839,10 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
if (calldata->arg.fmode == 0)
break;
default:
- if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
- nfs_restart_rpc(task, server->nfs_client);
- return;
- }
+ if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
+ rpc_restart_call_prepare(task);
}
+ nfs_release_seqid(calldata->arg.seqid);
nfs_refresh_inode(calldata->inode, calldata->res.fattr);
}
@@ -1848,8 +1934,6 @@ int nfs4_do_close(struct path *path, struct nfs4_state *state, int wait)
calldata->state = state;
calldata->arg.fh = NFS_FH(state->inode);
calldata->arg.stateid = &state->open_stateid;
- if (nfs4_has_session(server->nfs_client))
- memset(calldata->arg.stateid->data, 0, 4); /* clear seqid */
/* Serialization for the sequence id */
calldata->arg.seqid = nfs_alloc_seqid(&state->owner->so_seqid);
if (calldata->arg.seqid == NULL)
@@ -3342,15 +3426,14 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
if (state == NULL)
break;
nfs4_state_mark_reclaim_nograce(clp, state);
- case -NFS4ERR_STALE_CLIENTID:
+ goto do_state_recovery;
case -NFS4ERR_STALE_STATEID:
+ if (state == NULL)
+ break;
+ nfs4_state_mark_reclaim_reboot(clp, state);
+ case -NFS4ERR_STALE_CLIENTID:
case -NFS4ERR_EXPIRED:
- rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
- nfs4_schedule_state_recovery(clp);
- if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
- rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
- task->tk_status = 0;
- return -EAGAIN;
+ goto do_state_recovery;
#if defined(CONFIG_NFS_V4_1)
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
@@ -3378,6 +3461,13 @@ _nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
}
task->tk_status = nfs4_map_errors(task->tk_status);
return 0;
+do_state_recovery:
+ rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
+ nfs4_schedule_state_recovery(clp);
+ if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
+ rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
+ task->tk_status = 0;
+ return -EAGAIN;
}
static int
@@ -3941,6 +4031,12 @@ static void nfs4_lock_prepare(struct rpc_task *task, void *calldata)
dprintk("%s: done!, ret = %d\n", __func__, data->rpc_status);
}
+static void nfs4_recover_lock_prepare(struct rpc_task *task, void *calldata)
+{
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ nfs4_lock_prepare(task, calldata);
+}
+
static void nfs4_lock_done(struct rpc_task *task, void *calldata)
{
struct nfs4_lockdata *data = calldata;
@@ -3996,7 +4092,35 @@ static const struct rpc_call_ops nfs4_lock_ops = {
.rpc_release = nfs4_lock_release,
};
-static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int reclaim)
+static const struct rpc_call_ops nfs4_recover_lock_ops = {
+ .rpc_call_prepare = nfs4_recover_lock_prepare,
+ .rpc_call_done = nfs4_lock_done,
+ .rpc_release = nfs4_lock_release,
+};
+
+static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
+{
+ struct nfs_client *clp = server->nfs_client;
+ struct nfs4_state *state = lsp->ls_state;
+
+ switch (error) {
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_EXPIRED:
+ if (new_lock_owner != 0 ||
+ (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ nfs4_state_mark_reclaim_nograce(clp, state);
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ break;
+ case -NFS4ERR_STALE_STATEID:
+ if (new_lock_owner != 0 ||
+ (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
+ nfs4_state_mark_reclaim_reboot(clp, state);
+ lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
+ };
+}
+
+static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *fl, int recovery_type)
{
struct nfs4_lockdata *data;
struct rpc_task *task;
@@ -4020,8 +4144,11 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
return -ENOMEM;
if (IS_SETLKW(cmd))
data->arg.block = 1;
- if (reclaim != 0)
- data->arg.reclaim = 1;
+ if (recovery_type > NFS_LOCK_NEW) {
+ if (recovery_type == NFS_LOCK_RECLAIM)
+ data->arg.reclaim = NFS_LOCK_RECLAIM;
+ task_setup_data.callback_ops = &nfs4_recover_lock_ops;
+ }
msg.rpc_argp = &data->arg,
msg.rpc_resp = &data->res,
task_setup_data.callback_data = data;
@@ -4031,6 +4158,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
ret = nfs4_wait_for_completion_rpc_task(task);
if (ret == 0) {
ret = data->rpc_status;
+ if (ret)
+ nfs4_handle_setlk_error(data->server, data->lsp,
+ data->arg.new_lock_owner, ret);
} else
data->cancelled = 1;
rpc_put_task(task);
@@ -4048,7 +4178,7 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request
/* Cache the lock if possible... */
if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
return 0;
- err = _nfs4_do_setlk(state, F_SETLK, request, 1);
+ err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_RECLAIM);
if (err != -NFS4ERR_DELAY)
break;
nfs4_handle_exception(server, err, &exception);
@@ -4068,7 +4198,7 @@ static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request
do {
if (test_bit(NFS_DELEGATED_STATE, &state->flags) != 0)
return 0;
- err = _nfs4_do_setlk(state, F_SETLK, request, 0);
+ err = _nfs4_do_setlk(state, F_SETLK, request, NFS_LOCK_EXPIRED);
switch (err) {
default:
goto out;
@@ -4086,8 +4216,11 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
{
struct nfs_inode *nfsi = NFS_I(state->inode);
unsigned char fl_flags = request->fl_flags;
- int status;
+ int status = -ENOLCK;
+ if ((fl_flags & FL_POSIX) &&
+ !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
+ goto out;
/* Is this a delegated open? */
status = nfs4_set_lock_state(state, request);
if (status != 0)
@@ -4104,7 +4237,7 @@ static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock
status = do_vfs_lock(request->fl_file, request);
goto out_unlock;
}
- status = _nfs4_do_setlk(state, cmd, request, 0);
+ status = _nfs4_do_setlk(state, cmd, request, NFS_LOCK_NEW);
if (status != 0)
goto out_unlock;
/* Note: we always want to sleep here! */
@@ -4187,7 +4320,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
if (err != 0)
goto out;
do {
- err = _nfs4_do_setlk(state, F_SETLK, fl, 0);
+ err = _nfs4_do_setlk(state, F_SETLK, fl, NFS_LOCK_NEW);
switch (err) {
default:
printk(KERN_ERR "%s: unhandled error %d.\n",
@@ -4395,11 +4528,12 @@ static void nfs4_get_lease_time_prepare(struct rpc_task *task,
(struct nfs4_get_lease_time_data *)calldata;
dprintk("--> %s\n", __func__);
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
/* just setup sequence, do not trigger session recovery
since we're invoked within one */
ret = nfs41_setup_sequence(data->clp->cl_session,
- &data->args->la_seq_args,
- &data->res->lr_seq_res, 0, task);
+ &data->args->la_seq_args,
+ &data->res->lr_seq_res, 0, task);
BUG_ON(ret == -EAGAIN);
rpc_call_start(task);
@@ -4619,7 +4753,7 @@ struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp)
tbl = &session->fc_slot_table;
tbl->highest_used_slotid = -1;
spin_lock_init(&tbl->slot_tbl_lock);
- rpc_init_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
+ rpc_init_priority_wait_queue(&tbl->slot_tbl_waitq, "ForeChannel Slot table");
tbl = &session->bc_slot_table;
tbl->highest_used_slotid = -1;
@@ -4838,14 +4972,22 @@ int nfs4_init_session(struct nfs_server *server)
{
struct nfs_client *clp = server->nfs_client;
struct nfs4_session *session;
+ unsigned int rsize, wsize;
int ret;
if (!nfs4_has_session(clp))
return 0;
+ rsize = server->rsize;
+ if (rsize == 0)
+ rsize = NFS_MAX_FILE_IO_SIZE;
+ wsize = server->wsize;
+ if (wsize == 0)
+ wsize = NFS_MAX_FILE_IO_SIZE;
+
session = clp->cl_session;
- session->fc_attrs.max_rqst_sz = server->wsize + nfs41_maxwrite_overhead;
- session->fc_attrs.max_resp_sz = server->rsize + nfs41_maxread_overhead;
+ session->fc_attrs.max_rqst_sz = wsize + nfs41_maxwrite_overhead;
+ session->fc_attrs.max_resp_sz = rsize + nfs41_maxread_overhead;
ret = nfs4_recover_expired_lease(server);
if (!ret)
@@ -4871,7 +5013,7 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
args.sa_cache_this = 0;
return nfs4_call_sync_sequence(clp, clp->cl_rpcclient, &msg, &args,
- &res, 0);
+ &res, args.sa_cache_this, 1);
}
void nfs41_sequence_call_done(struct rpc_task *task, void *data)
@@ -4953,6 +5095,7 @@ static void nfs4_reclaim_complete_prepare(struct rpc_task *task, void *data)
{
struct nfs4_reclaim_complete_data *calldata = data;
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
if (nfs4_setup_sequence(calldata->clp, &calldata->arg.seq_args,
&calldata->res.seq_res, 0, task))
return;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e76427e6346..c1e2733f4fa 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -135,16 +135,30 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
return status;
}
-static void nfs41_end_drain_session(struct nfs_client *clp,
- struct nfs4_session *ses)
+static void nfs4_end_drain_session(struct nfs_client *clp)
{
- if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state))
- rpc_wake_up(&ses->fc_slot_table.slot_tbl_waitq);
+ struct nfs4_session *ses = clp->cl_session;
+ int max_slots;
+
+ if (test_and_clear_bit(NFS4CLNT_SESSION_DRAINING, &clp->cl_state)) {
+ spin_lock(&ses->fc_slot_table.slot_tbl_lock);
+ max_slots = ses->fc_slot_table.max_slots;
+ while (max_slots--) {
+ struct rpc_task *task;
+
+ task = rpc_wake_up_next(&ses->fc_slot_table.
+ slot_tbl_waitq);
+ if (!task)
+ break;
+ rpc_task_set_priority(task, RPC_PRIORITY_PRIVILEGED);
+ }
+ spin_unlock(&ses->fc_slot_table.slot_tbl_lock);
+ }
}
-static int nfs41_begin_drain_session(struct nfs_client *clp,
- struct nfs4_session *ses)
+static int nfs4_begin_drain_session(struct nfs_client *clp)
{
+ struct nfs4_session *ses = clp->cl_session;
struct nfs4_slot_table *tbl = &ses->fc_slot_table;
spin_lock(&tbl->slot_tbl_lock);
@@ -162,16 +176,13 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
{
int status;
- status = nfs41_begin_drain_session(clp, clp->cl_session);
- if (status != 0)
- goto out;
+ nfs4_begin_drain_session(clp);
status = nfs4_proc_exchange_id(clp, cred);
if (status != 0)
goto out;
status = nfs4_proc_create_session(clp);
if (status != 0)
goto out;
- nfs41_end_drain_session(clp, clp->cl_session);
nfs41_setup_state_renewal(clp);
nfs_mark_client_ready(clp, NFS_CS_READY);
out:
@@ -755,16 +766,21 @@ struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter)
return new;
}
-void nfs_free_seqid(struct nfs_seqid *seqid)
+void nfs_release_seqid(struct nfs_seqid *seqid)
{
if (!list_empty(&seqid->list)) {
struct rpc_sequence *sequence = seqid->sequence->sequence;
spin_lock(&sequence->lock);
- list_del(&seqid->list);
+ list_del_init(&seqid->list);
spin_unlock(&sequence->lock);
rpc_wake_up(&sequence->wait);
}
+}
+
+void nfs_free_seqid(struct nfs_seqid *seqid)
+{
+ nfs_release_seqid(seqid);
kfree(seqid);
}
@@ -885,7 +901,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
nfs4_schedule_state_manager(clp);
}
-static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
+int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
{
set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1257,13 +1273,9 @@ void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
static int nfs4_reset_session(struct nfs_client *clp)
{
- struct nfs4_session *ses = clp->cl_session;
int status;
- status = nfs41_begin_drain_session(clp, ses);
- if (status != 0)
- return status;
-
+ nfs4_begin_drain_session(clp);
status = nfs4_proc_destroy_session(clp->cl_session);
if (status && status != -NFS4ERR_BADSESSION &&
status != -NFS4ERR_DEADSESSION) {
@@ -1279,19 +1291,17 @@ static int nfs4_reset_session(struct nfs_client *clp)
out:
/*
* Let the state manager reestablish state
- * without waking other tasks yet.
*/
- if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
- /* Wake up the next rpc task */
- nfs41_end_drain_session(clp, ses);
- if (status == 0)
- nfs41_setup_state_renewal(clp);
- }
+ if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
+ status == 0)
+ nfs41_setup_state_renewal(clp);
+
return status;
}
#else /* CONFIG_NFS_V4_1 */
static int nfs4_reset_session(struct nfs_client *clp) { return 0; }
+static int nfs4_end_drain_session(struct nfs_client *clp) { return 0; }
#endif /* CONFIG_NFS_V4_1 */
/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors
@@ -1382,6 +1392,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
goto out_error;
}
+ nfs4_end_drain_session(clp);
if (test_and_clear_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state)) {
nfs_client_return_marked_delegations(clp);
continue;
@@ -1398,6 +1409,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
out_error:
printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
" with error %d\n", clp->cl_hostname, -status);
+ nfs4_end_drain_session(clp);
nfs4_clear_state_manager_bit(clp);
}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e437fd6a819..5cd5184b56d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4631,7 +4631,7 @@ static int decode_sequence(struct xdr_stream *xdr,
* If the server returns different values for sessionID, slotID or
* sequence number, the server is looney tunes.
*/
- status = -ESERVERFAULT;
+ status = -EREMOTEIO;
if (memcmp(id.data, res->sr_session->sess_id.data,
NFS4_MAX_SESSIONID_LEN)) {
@@ -5774,7 +5774,7 @@ static struct {
{ NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
{ NFS4ERR_NOTSUPP, -ENOTSUPP },
{ NFS4ERR_TOOSMALL, -ETOOSMALL },
- { NFS4ERR_SERVERFAULT, -ESERVERFAULT },
+ { NFS4ERR_SERVERFAULT, -EREMOTEIO },
{ NFS4ERR_BADTYPE, -EBADTYPE },
{ NFS4ERR_LOCKED, -EAGAIN },
{ NFS4ERR_SYMLINK, -ELOOP },
@@ -5801,7 +5801,7 @@ nfs4_stat_to_errno(int stat)
}
if (stat <= 10000 || stat > 10100) {
/* The server is looney tunes. */
- return -ESERVERFAULT;
+ return -EREMOTEIO;
}
/* If we cannot translate the error, the recovery routines should
* handle it.
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e2975939126..a12c45b65dd 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -176,6 +176,12 @@ void nfs_release_request(struct nfs_page *req)
kref_put(&req->wb_kref, nfs_free_request);
}
+static int nfs_wait_bit_uninterruptible(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
/**
* nfs_wait_on_request - Wait for a request to complete.
* @req: request to wait upon.
@@ -186,14 +192,9 @@ void nfs_release_request(struct nfs_page *req)
int
nfs_wait_on_request(struct nfs_page *req)
{
- int ret = 0;
-
- if (!test_bit(PG_BUSY, &req->wb_flags))
- goto out;
- ret = out_of_line_wait_on_bit(&req->wb_flags, PG_BUSY,
- nfs_wait_bit_killable, TASK_KILLABLE);
-out:
- return ret;
+ return wait_on_bit(&req->wb_flags, PG_BUSY,
+ nfs_wait_bit_uninterruptible,
+ TASK_UNINTERRUPTIBLE);
}
/**
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index ce907efc550..f1afee4eea7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -243,6 +243,7 @@ static int nfs_show_stats(struct seq_file *, struct vfsmount *);
static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *);
static int nfs_xdev_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt);
+static void nfs_put_super(struct super_block *);
static void nfs_kill_super(struct super_block *);
static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
@@ -266,6 +267,7 @@ static const struct super_operations nfs_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode,
+ .put_super = nfs_put_super,
.statfs = nfs_statfs,
.clear_inode = nfs_clear_inode,
.umount_begin = nfs_umount_begin,
@@ -335,6 +337,7 @@ static const struct super_operations nfs4_sops = {
.alloc_inode = nfs_alloc_inode,
.destroy_inode = nfs_destroy_inode,
.write_inode = nfs_write_inode,
+ .put_super = nfs_put_super,
.statfs = nfs_statfs,
.clear_inode = nfs4_clear_inode,
.umount_begin = nfs_umount_begin,
@@ -2258,6 +2261,17 @@ error_splat_super:
}
/*
+ * Ensure that we unregister the bdi before kill_anon_super
+ * releases the device name
+ */
+static void nfs_put_super(struct super_block *s)
+{
+ struct nfs_server *server = NFS_SB(s);
+
+ bdi_unregister(&server->backing_dev_info);
+}
+
+/*
* Destroy an NFS2/3 superblock
*/
static void nfs_kill_super(struct super_block *s)
@@ -2265,7 +2279,6 @@ static void nfs_kill_super(struct super_block *s)
struct nfs_server *server = NFS_SB(s);
kill_anon_super(s);
- bdi_unregister(&server->backing_dev_info);
nfs_fscache_release_super_cookie(s);
nfs_free_server(server);
}
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 70e1fbbaaea..ad4d2e787b2 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -15,8 +15,10 @@
#include "callback.h"
+#ifdef CONFIG_NFS_V4
static const int nfs_set_port_min = 0;
static const int nfs_set_port_max = 65535;
+#endif
static struct ctl_table_header *nfs_callback_sysctl_table;
static ctl_table nfs_cb_sysctls[] = {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index d171696017f..d63d964a039 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1233,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-void nfs_commitdata_release(void *data)
+static void nfs_commitdata_release(void *data)
{
struct nfs_write_data *wdata = data;
@@ -1541,6 +1541,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
break;
}
ret = nfs_wait_on_request(req);
+ nfs_release_request(req);
if (ret < 0)
goto out;
}
@@ -1597,8 +1598,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
struct nfs_page *req;
int ret;
- if (PageFsCache(page))
- nfs_fscache_release_page(page, GFP_KERNEL);
+ nfs_fscache_release_page(page, GFP_KERNEL);
req = nfs_find_and_lock_request(page);
ret = PTR_ERR(req);
diff --git a/fs/nfsctl.c b/fs/nfsctl.c
index 8f9a20556f7..d3854d94b7c 100644
--- a/fs/nfsctl.c
+++ b/fs/nfsctl.c
@@ -7,8 +7,6 @@
#include <linux/types.h>
#include <linux/file.h>
#include <linux/fs.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/syscall.h>
#include <linux/cred.h>
#include <linux/sched.h>
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 36fcabbf518..79717a40dab 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -1,15 +1,7 @@
-/*
- * linux/fs/nfsd/auth.c
- *
- * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
- */
+/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
-#include <linux/types.h>
#include <linux/sched.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/svcauth.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/export.h>
+#include "nfsd.h"
#include "auth.h"
int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp)
diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h
new file mode 100644
index 00000000000..d892be61016
--- /dev/null
+++ b/fs/nfsd/cache.h
@@ -0,0 +1,83 @@
+/*
+ * Request reply cache. This was heavily inspired by the
+ * implementation in 4.3BSD/4.4BSD.
+ *
+ * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef NFSCACHE_H
+#define NFSCACHE_H
+
+#include <linux/sunrpc/svc.h>
+
+/*
+ * Representation of a reply cache entry.
+ */
+struct svc_cacherep {
+ struct hlist_node c_hash;
+ struct list_head c_lru;
+
+ unsigned char c_state, /* unused, inprog, done */
+ c_type, /* status, buffer */
+ c_secure : 1; /* req came from port < 1024 */
+ struct sockaddr_in c_addr;
+ __be32 c_xid;
+ u32 c_prot;
+ u32 c_proc;
+ u32 c_vers;
+ unsigned long c_timestamp;
+ union {
+ struct kvec u_vec;
+ __be32 u_status;
+ } c_u;
+};
+
+#define c_replvec c_u.u_vec
+#define c_replstat c_u.u_status
+
+/* cache entry states */
+enum {
+ RC_UNUSED,
+ RC_INPROG,
+ RC_DONE
+};
+
+/* return values */
+enum {
+ RC_DROPIT,
+ RC_REPLY,
+ RC_DOIT,
+ RC_INTR
+};
+
+/*
+ * Cache types.
+ * We may want to add more types one day, e.g. for diropres and
+ * attrstat replies. Using cache entries with fixed length instead
+ * of buffer pointers may be more efficient.
+ */
+enum {
+ RC_NOCACHE,
+ RC_REPLSTAT,
+ RC_REPLBUFF,
+};
+
+/*
+ * If requests are retransmitted within this interval, they're dropped.
+ */
+#define RC_DELAY (HZ/5)
+
+int nfsd_reply_cache_init(void);
+void nfsd_reply_cache_shutdown(void);
+int nfsd_cache_lookup(struct svc_rqst *, int);
+void nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+
+#ifdef CONFIG_NFSD_V4
+void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
+#else /* CONFIG_NFSD_V4 */
+static inline void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp)
+{
+}
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* NFSCACHE_H */
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index c1c9e035d4a..a0c4016413f 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1,7 +1,5 @@
#define MSNFS /* HACK HACK */
/*
- * linux/fs/nfsd/export.c
- *
* NFS exporting and validation.
*
* We maintain a list of clients, each of which has a list of
@@ -14,29 +12,16 @@
* Copyright (C) 1995, 1996 Olaf Kirch, <okir@monad.swb.de>
*/
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/in.h>
-#include <linux/seq_file.h>
-#include <linux/syscalls.h>
-#include <linux/rwsem.h>
-#include <linux/dcache.h>
#include <linux/namei.h>
-#include <linux/mount.h>
-#include <linux/hash.h>
#include <linux/module.h>
#include <linux/exportfs.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/nfsfh.h>
#include <linux/nfsd/syscall.h>
-#include <linux/lockd/bind.h>
-#include <linux/sunrpc/msg_prot.h>
-#include <linux/sunrpc/gss_api.h>
#include <net/ipv6.h>
+#include "nfsd.h"
+#include "nfsfh.h"
+
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
typedef struct auth_domain svc_client;
@@ -369,16 +354,25 @@ static struct svc_export *svc_export_update(struct svc_export *new,
struct svc_export *old);
static struct svc_export *svc_export_lookup(struct svc_export *);
-static int check_export(struct inode *inode, int flags, unsigned char *uuid)
+static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
{
- /* We currently export only dirs and regular files.
- * This is what umountd does.
+ /*
+ * We currently export only dirs, regular files, and (for v4
+ * pseudoroot) symlinks.
*/
if (!S_ISDIR(inode->i_mode) &&
+ !S_ISLNK(inode->i_mode) &&
!S_ISREG(inode->i_mode))
return -ENOTDIR;
+ /*
+ * Mountd should never pass down a writeable V4ROOT export, but,
+ * just to make sure:
+ */
+ if (*flags & NFSEXP_V4ROOT)
+ *flags |= NFSEXP_READONLY;
+
/* There are two requirements on a filesystem to be exportable.
* 1: We must be able to identify the filesystem from a number.
* either a device number (so FS_REQUIRES_DEV needed)
@@ -387,7 +381,7 @@ static int check_export(struct inode *inode, int flags, unsigned char *uuid)
* This means that s_export_op must be set.
*/
if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
- !(flags & NFSEXP_FSID) &&
+ !(*flags & NFSEXP_FSID) &&
uuid == NULL) {
dprintk("exp_export: export of non-dev fs without fsid\n");
return -EINVAL;
@@ -602,7 +596,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
goto out4;
}
- err = check_export(exp.ex_path.dentry->d_inode, exp.ex_flags,
+ err = check_export(exp.ex_path.dentry->d_inode, &exp.ex_flags,
exp.ex_uuid);
if (err)
goto out4;
@@ -1041,7 +1035,7 @@ exp_export(struct nfsctl_export *nxp)
goto finish;
}
- err = check_export(path.dentry->d_inode, nxp->ex_flags, NULL);
+ err = check_export(path.dentry->d_inode, &nxp->ex_flags, NULL);
if (err) goto finish;
err = -ENOMEM;
@@ -1320,6 +1314,15 @@ rqst_exp_parent(struct svc_rqst *rqstp, struct path *path)
return exp;
}
+static struct svc_export *find_fsidzero_export(struct svc_rqst *rqstp)
+{
+ u32 fsidv[2];
+
+ mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
+
+ return rqst_exp_find(rqstp, FSID_NUM, fsidv);
+}
+
/*
* Called when we need the filehandle for the root of the pseudofs,
* for a given NFSv4 client. The root is defined to be the
@@ -1330,11 +1333,8 @@ exp_pseudoroot(struct svc_rqst *rqstp, struct svc_fh *fhp)
{
struct svc_export *exp;
__be32 rv;
- u32 fsidv[2];
-
- mk_fsid(FSID_NUM, fsidv, 0, 0, 0, NULL);
- exp = rqst_exp_find(rqstp, FSID_NUM, fsidv);
+ exp = find_fsidzero_export(rqstp);
if (IS_ERR(exp))
return nfserrno(PTR_ERR(exp));
rv = fh_compose(fhp, exp, exp->ex_path.dentry, NULL);
@@ -1425,6 +1425,7 @@ static struct flags {
{ NFSEXP_CROSSMOUNT, {"crossmnt", ""}},
{ NFSEXP_NOSUBTREECHECK, {"no_subtree_check", ""}},
{ NFSEXP_NOAUTHNLM, {"insecure_locks", ""}},
+ { NFSEXP_V4ROOT, {"v4root", ""}},
#ifdef MSNFS
{ NFSEXP_MSNFS, {"msnfs", ""}},
#endif
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index b2786a5f9af..0c6d8167013 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -1,6 +1,4 @@
/*
- * linux/fs/nfsd/lockd.c
- *
* This file contains all the stubs needed when communicating with lockd.
* This level of indirection is necessary so we can run nfsd+lockd without
* requiring the nfs client to be compiled in/loaded, and vice versa.
@@ -8,14 +6,10 @@
* Copyright (C) 1996, Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/types.h>
-#include <linux/fs.h>
#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
#include <linux/lockd/bind.h>
+#include "nfsd.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_LOCKD
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 4e3219e8411..f20589d2ae2 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -1,19 +1,15 @@
/*
- * linux/fs/nfsd/nfs2acl.c
- *
* Process version 2 NFSACL requests.
*
* Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
*/
-#include <linux/sunrpc/svc.h>
-#include <linux/nfs.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/nfsd/xdr3.h>
-#include <linux/posix_acl.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
#include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
#define RETURN_STATUS(st) { resp->status = (st); return (st); }
@@ -217,6 +213,16 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
* XDR encode functions
*/
+/*
+ * There must be an encoding function for void results so svc_process
+ * will work properly.
+ */
+int
+nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
/* GETACL */
static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclres *resp)
@@ -308,7 +314,6 @@ static int nfsaclsvc_release_access(struct svc_rqst *rqstp, __be32 *p,
}
#define nfsaclsvc_decode_voidargs NULL
-#define nfsaclsvc_encode_voidres NULL
#define nfsaclsvc_release_void NULL
#define nfsd3_fhandleargs nfsd_fhandle
#define nfsd3_attrstatres nfsd_attrstat
@@ -346,5 +351,5 @@ struct svc_version nfsd_acl_version2 = {
.vs_proc = nfsd_acl_procedures2,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 1,
+ .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 9981dbb377a..e0c4846bad9 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -1,18 +1,15 @@
/*
- * linux/fs/nfsd/nfs3acl.c
- *
* Process version 3 NFSACL requests.
*
* Copyright (C) 2002-2003 Andreas Gruenbacher <agruen@suse.de>
*/
-#include <linux/sunrpc/svc.h>
-#include <linux/nfs3.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
-#include <linux/posix_acl.h>
+#include "nfsd.h"
+/* FIXME: nfsacl.h is a broken header */
#include <linux/nfsacl.h>
+#include "cache.h"
+#include "xdr3.h"
+#include "vfs.h"
#define RETURN_STATUS(st) { resp->status = (st); return (st); }
@@ -264,6 +261,6 @@ struct svc_version nfsd_acl_version3 = {
.vs_proc = nfsd_acl_procedures3,
.vs_dispatch = nfsd_dispatch,
.vs_xdrsize = NFS3_SVC_XDRSIZE,
- .vs_hidden = 1,
+ .vs_hidden = 0,
};
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a713c418a92..3d68f45a37b 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -1,30 +1,16 @@
/*
- * linux/fs/nfsd/nfs3proc.c
- *
* Process version 3 NFS requests.
*
* Copyright (C) 1996, 1997, 1998 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/linkage.h>
-#include <linux/time.h>
-#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/ext2_fs.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/in.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/major.h>
#include <linux/magic.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr3.h>
-#include <linux/nfs3.h>
+#include "cache.h"
+#include "xdr3.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index d0a2ce1b432..2a533a0af2a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -1,6 +1,4 @@
/*
- * linux/fs/nfsd/nfs3xdr.c
- *
* XDR support for nfsd/protocol version 3.
*
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
@@ -8,19 +6,8 @@
* 2003-08-09 Jamie Lokier: Use htonl() for nanoseconds, not htons()!
*/
-#include <linux/types.h>
-#include <linux/time.h>
-#include <linux/nfs3.h>
-#include <linux/list.h>
-#include <linux/spinlock.h>
-#include <linux/dcache.h>
#include <linux/namei.h>
-#include <linux/mm.h>
-#include <linux/vfs.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/xdr3.h>
+#include "xdr3.h"
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index 725d02f210e..88150685df3 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -1,6 +1,4 @@
/*
- * fs/nfs4acl/acl.c
- *
* Common NFSv4 ACL handling code.
*
* Copyright (c) 2002, 2003 The Regents of the University of Michigan.
@@ -36,15 +34,7 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <linux/string.h>
-#include <linux/slab.h>
-#include <linux/list.h>
-#include <linux/types.h>
-#include <linux/fs.h>
-#include <linux/module.h>
#include <linux/nfs_fs.h>
-#include <linux/posix_acl.h>
-#include <linux/nfs4.h>
#include <linux/nfs4_acl.h>
@@ -389,7 +379,7 @@ sort_pacl(struct posix_acl *pacl)
sort_pacl_range(pacl, 1, i-1);
BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
- j = i++;
+ j = ++i;
while (pacl->a_entries[j].e_tag == ACL_GROUP)
j++;
sort_pacl_range(pacl, i, j-1);
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 24e8d78f8dd..c6eed2a3b09 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -1,6 +1,4 @@
/*
- * linux/fs/nfsd/nfs4callback.c
- *
* Copyright (c) 2001 The Regents of the University of Michigan.
* All rights reserved.
*
@@ -33,22 +31,9 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/inet.h>
-#include <linux/errno.h>
-#include <linux/delay.h>
-#include <linux/sched.h>
-#include <linux/kthread.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
-#include <linux/sunrpc/sched.h>
-#include <linux/nfs4.h>
-#include <linux/sunrpc/xprtsock.h>
+#include "nfsd.h"
+#include "state.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index ba2c199592f..6e2983b27f3 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -1,6 +1,4 @@
/*
- * fs/nfsd/nfs4idmap.c
- *
* Mapping of UID/GIDs to name and vice versa.
*
* Copyright (c) 2002, 2003 The Regents of the University of
@@ -35,22 +33,9 @@
*/
#include <linux/module.h>
-#include <linux/init.h>
-
-#include <linux/mm.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/nfs.h>
-#include <linux/nfs4.h>
-#include <linux/nfs_fs.h>
-#include <linux/nfs_page.h>
-#include <linux/sunrpc/cache.h>
#include <linux/nfsd_idmap.h>
-#include <linux/list.h>
-#include <linux/time.h>
#include <linux/seq_file.h>
-#include <linux/sunrpc/svcauth.h>
+#include <linux/sched.h>
/*
* Cache entry
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bebc0c2e1b0..37514c46984 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1,6 +1,4 @@
/*
- * fs/nfsd/nfs4proc.c
- *
* Server-side procedures for NFSv4.
*
* Copyright (c) 2002 The Regents of the University of Michigan.
@@ -34,20 +32,11 @@
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
-
-#include <linux/param.h>
-#include <linux/major.h>
-#include <linux/slab.h>
#include <linux/file.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfs4.h>
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/xdr4.h>
-#include <linux/nfs4_acl.h>
-#include <linux/sunrpc/gss_api.h>
+#include "cache.h"
+#include "xdr4.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -170,7 +159,7 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
accmode |= NFSD_MAY_READ;
if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
accmode |= (NFSD_MAY_WRITE | NFSD_MAY_TRUNC);
- if (open->op_share_deny & NFS4_SHARE_DENY_WRITE)
+ if (open->op_share_deny & NFS4_SHARE_DENY_READ)
accmode |= NFSD_MAY_WRITE;
status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index b5348405046..5a754f7b71e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -1,6 +1,4 @@
/*
-* linux/fs/nfsd/nfs4recover.c
-*
* Copyright (c) 2004 The Regents of the University of Michigan.
* All rights reserved.
*
@@ -33,20 +31,14 @@
*
*/
-#include <linux/err.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfs4.h>
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/xdr4.h>
-#include <linux/param.h>
#include <linux/file.h>
#include <linux/namei.h>
-#include <asm/uaccess.h>
-#include <linux/scatterlist.h>
#include <linux/crypto.h>
#include <linux/sched.h>
-#include <linux/mount.h>
+
+#include "nfsd.h"
+#include "state.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2153f9bdbeb..f19ed866c95 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1,6 +1,4 @@
/*
-* linux/fs/nfsd/nfs4state.c
-*
* Copyright (c) 2001 The Regents of the University of Michigan.
* All rights reserved.
*
@@ -34,28 +32,14 @@
*
*/
-#include <linux/param.h>
-#include <linux/major.h>
-#include <linux/slab.h>
-
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/workqueue.h>
#include <linux/smp_lock.h>
-#include <linux/kthread.h>
-#include <linux/nfs4.h>
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/xdr4.h>
#include <linux/namei.h>
#include <linux/swap.h>
-#include <linux/mutex.h>
-#include <linux/lockd/bind.h>
-#include <linux/module.h>
#include <linux/sunrpc/svcauth_gss.h>
#include <linux/sunrpc/clnt.h>
+#include "xdr4.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_PROC
@@ -477,13 +461,14 @@ static int set_forechannel_drc_size(struct nfsd4_channel_attrs *fchan)
/*
* fchan holds the client values on input, and the server values on output
+ * sv_max_mesg is the maximum payload plus one page for overhead.
*/
static int init_forechannel_attrs(struct svc_rqst *rqstp,
struct nfsd4_channel_attrs *session_fchan,
struct nfsd4_channel_attrs *fchan)
{
int status = 0;
- __u32 maxcount = svc_max_payload(rqstp);
+ __u32 maxcount = nfsd_serv->sv_max_mesg;
/* headerpadsz set to zero in encode routine */
@@ -523,6 +508,15 @@ free_session_slots(struct nfsd4_session *ses)
kfree(ses->se_slots[i]);
}
+/*
+ * We don't actually need to cache the rpc and session headers, so we
+ * can allocate a little less for each slot:
+ */
+static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
+{
+ return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+}
+
static int
alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
struct nfsd4_create_session *cses)
@@ -554,7 +548,7 @@ alloc_init_session(struct svc_rqst *rqstp, struct nfs4_client *clp,
memcpy(new, &tmp, sizeof(*new));
/* allocate each struct nfsd4_slot and data cache in one piece */
- cachesize = new->se_fchannel.maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+ cachesize = slot_bytes(&new->se_fchannel);
for (i = 0; i < new->se_fchannel.maxreqs; i++) {
sp = kzalloc(sizeof(*sp) + cachesize, GFP_KERNEL);
if (!sp)
@@ -628,10 +622,12 @@ void
free_session(struct kref *kref)
{
struct nfsd4_session *ses;
+ int mem;
ses = container_of(kref, struct nfsd4_session, se_ref);
spin_lock(&nfsd_drc_lock);
- nfsd_drc_mem_used -= ses->se_fchannel.maxreqs * NFSD_SLOT_CACHE_SIZE;
+ mem = ses->se_fchannel.maxreqs * slot_bytes(&ses->se_fchannel);
+ nfsd_drc_mem_used -= mem;
spin_unlock(&nfsd_drc_lock);
free_session_slots(ses);
kfree(ses);
@@ -2404,11 +2400,8 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open, struct nfs4_sta
memcpy(&open->op_delegate_stateid, &dp->dl_stateid, sizeof(dp->dl_stateid));
- dprintk("NFSD: delegation stateid=(%08x/%08x/%08x/%08x)\n\n",
- dp->dl_stateid.si_boot,
- dp->dl_stateid.si_stateownerid,
- dp->dl_stateid.si_fileid,
- dp->dl_stateid.si_generation);
+ dprintk("NFSD: delegation stateid=" STATEID_FMT "\n",
+ STATEID_VAL(&dp->dl_stateid));
out:
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS
&& flag == NFS4_OPEN_DELEGATE_NONE
@@ -2498,9 +2491,8 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
status = nfs_ok;
- dprintk("nfs4_process_open2: stateid=(%08x/%08x/%08x/%08x)\n",
- stp->st_stateid.si_boot, stp->st_stateid.si_stateownerid,
- stp->st_stateid.si_fileid, stp->st_stateid.si_generation);
+ dprintk("%s: stateid=" STATEID_FMT "\n", __func__,
+ STATEID_VAL(&stp->st_stateid));
out:
if (fp)
put_nfs4_file(fp);
@@ -2666,9 +2658,8 @@ STALE_STATEID(stateid_t *stateid)
{
if (time_after((unsigned long)boot_time,
(unsigned long)stateid->si_boot)) {
- dprintk("NFSD: stale stateid (%08x/%08x/%08x/%08x)!\n",
- stateid->si_boot, stateid->si_stateownerid,
- stateid->si_fileid, stateid->si_generation);
+ dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
+ STATEID_VAL(stateid));
return 1;
}
return 0;
@@ -2680,9 +2671,8 @@ EXPIRED_STATEID(stateid_t *stateid)
if (time_before((unsigned long)boot_time,
((unsigned long)stateid->si_boot)) &&
time_before((unsigned long)(stateid->si_boot + lease_time), get_seconds())) {
- dprintk("NFSD: expired stateid (%08x/%08x/%08x/%08x)!\n",
- stateid->si_boot, stateid->si_stateownerid,
- stateid->si_fileid, stateid->si_generation);
+ dprintk("NFSD: expired stateid " STATEID_FMT "!\n",
+ STATEID_VAL(stateid));
return 1;
}
return 0;
@@ -2696,9 +2686,8 @@ stateid_error_map(stateid_t *stateid)
if (EXPIRED_STATEID(stateid))
return nfserr_expired;
- dprintk("NFSD: bad stateid (%08x/%08x/%08x/%08x)!\n",
- stateid->si_boot, stateid->si_stateownerid,
- stateid->si_fileid, stateid->si_generation);
+ dprintk("NFSD: bad stateid " STATEID_FMT "!\n",
+ STATEID_VAL(stateid));
return nfserr_bad_stateid;
}
@@ -2884,10 +2873,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
struct svc_fh *current_fh = &cstate->current_fh;
__be32 status;
- dprintk("NFSD: preprocess_seqid_op: seqid=%d "
- "stateid = (%08x/%08x/%08x/%08x)\n", seqid,
- stateid->si_boot, stateid->si_stateownerid, stateid->si_fileid,
- stateid->si_generation);
+ dprintk("NFSD: %s: seqid=%d stateid = " STATEID_FMT "\n", __func__,
+ seqid, STATEID_VAL(stateid));
*stpp = NULL;
*sopp = NULL;
@@ -3019,12 +3006,8 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
sop->so_confirmed = 1;
update_stateid(&stp->st_stateid);
memcpy(&oc->oc_resp_stateid, &stp->st_stateid, sizeof(stateid_t));
- dprintk("NFSD: nfsd4_open_confirm: success, seqid=%d "
- "stateid=(%08x/%08x/%08x/%08x)\n", oc->oc_seqid,
- stp->st_stateid.si_boot,
- stp->st_stateid.si_stateownerid,
- stp->st_stateid.si_fileid,
- stp->st_stateid.si_generation);
+ dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
+ __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stateid));
nfsd4_create_clid_dir(sop->so_client);
out:
@@ -3283,9 +3266,8 @@ find_delegation_stateid(struct inode *ino, stateid_t *stid)
struct nfs4_file *fp;
struct nfs4_delegation *dl;
- dprintk("NFSD:find_delegation_stateid stateid=(%08x/%08x/%08x/%08x)\n",
- stid->si_boot, stid->si_stateownerid,
- stid->si_fileid, stid->si_generation);
+ dprintk("NFSD: %s: stateid=" STATEID_FMT "\n", __func__,
+ STATEID_VAL(stid));
fp = find_file(ino);
if (!fp)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0fbd50cee1f..a8587e90fd5 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -40,24 +40,16 @@
* at the end of nfs4svc_decode_compoundargs.
*/
-#include <linux/param.h>
-#include <linux/smp.h>
-#include <linux/fs.h>
#include <linux/namei.h>
-#include <linux/vfs.h>
+#include <linux/statfs.h>
#include <linux/utsname.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/xdr4.h>
#include <linux/nfsd_idmap.h>
-#include <linux/nfs4.h>
#include <linux/nfs4_acl.h>
-#include <linux/sunrpc/gss_api.h>
#include <linux/sunrpc/svcauth_gss.h>
+#include "xdr4.h"
+#include "vfs.h"
+
#define NFSDDBG_FACILITY NFSDDBG_XDR
/*
@@ -2204,11 +2196,14 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
* we will not follow the cross mount and will fill the attribtutes
* directly from the mountpoint dentry.
*/
- if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval))
- ignore_crossmnt = 1;
- else if (d_mountpoint(dentry)) {
+ if (nfsd_mountpoint(dentry, exp)) {
int err;
+ if (!(exp->ex_flags & NFSEXP_V4ROOT)
+ && !attributes_need_mount(cd->rd_bmval)) {
+ ignore_crossmnt = 1;
+ goto out_encode;
+ }
/*
* Why the heck aren't we just using nfsd_lookup??
* Different "."/".." handling? Something else?
@@ -2224,6 +2219,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
goto out_put;
}
+out_encode:
nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
cd->rd_rqstp, ignore_crossmnt);
out_put:
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 4638635c5d8..da08560c481 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -1,6 +1,4 @@
/*
- * linux/fs/nfsd/nfscache.c
- *
* Request reply cache. This is currently a global cache, but this may
* change in the future and be a per-client cache.
*
@@ -10,16 +8,8 @@
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/slab.h>
-#include <linux/string.h>
-#include <linux/spinlock.h>
-#include <linux/list.h>
-
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
+#include "nfsd.h"
+#include "cache.h"
/* Size of reply cache. Common values are:
* 4.3BSD: 128
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 5c01fc148ce..2604c3e70ea 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1,46 +1,20 @@
/*
- * linux/fs/nfsd/nfsctl.c
- *
* Syscall interface to knfsd.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/module.h>
-
-#include <linux/linkage.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
#include <linux/namei.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/in.h>
-#include <linux/syscalls.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/pagemap.h>
-#include <linux/init.h>
-#include <linux/inet.h>
-#include <linux/string.h>
#include <linux/ctype.h>
-#include <linux/nfs.h>
#include <linux/nfsd_idmap.h>
-#include <linux/lockd/bind.h>
-#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
#include <linux/nfsd/syscall.h>
#include <linux/lockd/lockd.h>
#include <linux/sunrpc/clnt.h>
-#include <asm/uaccess.h>
-#include <net/ipv6.h>
+#include "nfsd.h"
+#include "cache.h"
/*
* We have a single directory with 9 nodes in it.
@@ -55,6 +29,7 @@ enum {
NFSD_Getfd,
NFSD_Getfs,
NFSD_List,
+ NFSD_Export_features,
NFSD_Fh,
NFSD_FO_UnlockIP,
NFSD_FO_UnlockFS,
@@ -173,6 +148,24 @@ static const struct file_operations exports_operations = {
.owner = THIS_MODULE,
};
+static int export_features_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "0x%x 0x%x\n", NFSEXP_ALLFLAGS, NFSEXP_SECINFO_FLAGS);
+ return 0;
+}
+
+static int export_features_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, export_features_show, NULL);
+}
+
+static struct file_operations export_features_operations = {
+ .open = export_features_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
extern int nfsd_pool_stats_open(struct inode *inode, struct file *file);
extern int nfsd_pool_stats_release(struct inode *inode, struct file *file);
@@ -1330,6 +1323,8 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
[NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_List] = {"exports", &exports_operations, S_IRUGO},
+ [NFSD_Export_features] = {"export_features",
+ &export_features_operations, S_IRUGO},
[NFSD_FO_UnlockIP] = {"unlock_ip",
&transaction_ops, S_IWUSR|S_IRUSR},
[NFSD_FO_UnlockFS] = {"unlock_filesystem",
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
new file mode 100644
index 00000000000..e942a1aaac9
--- /dev/null
+++ b/fs/nfsd/nfsd.h
@@ -0,0 +1,338 @@
+/*
+ * Hodge-podge collection of knfsd-related stuff.
+ * I will sort this out later.
+ *
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_NFSD_H
+#define LINUX_NFSD_NFSD_H
+
+#include <linux/types.h>
+#include <linux/mount.h>
+
+#include <linux/nfsd/debug.h>
+#include <linux/nfsd/export.h>
+#include <linux/nfsd/stats.h>
+/*
+ * nfsd version
+ */
+#define NFSD_SUPPORTED_MINOR_VERSION 1
+
+struct readdir_cd {
+ __be32 err; /* 0, nfserr, or nfserr_eof */
+};
+
+
+extern struct svc_program nfsd_program;
+extern struct svc_version nfsd_version2, nfsd_version3,
+ nfsd_version4;
+extern u32 nfsd_supported_minorversion;
+extern struct mutex nfsd_mutex;
+extern struct svc_serv *nfsd_serv;
+extern spinlock_t nfsd_drc_lock;
+extern unsigned int nfsd_drc_max_mem;
+extern unsigned int nfsd_drc_mem_used;
+
+extern const struct seq_operations nfs_exports_op;
+
+/*
+ * Function prototypes.
+ */
+int nfsd_svc(unsigned short port, int nrservs);
+int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
+
+int nfsd_nrthreads(void);
+int nfsd_nrpools(void);
+int nfsd_get_nrthreads(int n, int *);
+int nfsd_set_nrthreads(int n, int *);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+#ifdef CONFIG_NFSD_V2_ACL
+extern struct svc_version nfsd_acl_version2;
+#else
+#define nfsd_acl_version2 NULL
+#endif
+#ifdef CONFIG_NFSD_V3_ACL
+extern struct svc_version nfsd_acl_version3;
+#else
+#define nfsd_acl_version3 NULL
+#endif
+#endif
+
+enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL };
+int nfsd_vers(int vers, enum vers_op change);
+int nfsd_minorversion(u32 minorversion, enum vers_op change);
+void nfsd_reset_versions(void);
+int nfsd_create_serv(void);
+
+extern int nfsd_max_blksize;
+
+static inline int nfsd_v4client(struct svc_rqst *rq)
+{
+ return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
+}
+
+/*
+ * NFSv4 State
+ */
+#ifdef CONFIG_NFSD_V4
+extern unsigned int max_delegations;
+int nfs4_state_init(void);
+void nfsd4_free_slabs(void);
+int nfs4_state_start(void);
+void nfs4_state_shutdown(void);
+time_t nfs4_lease_time(void);
+void nfs4_reset_lease(time_t leasetime);
+int nfs4_reset_recoverydir(char *recdir);
+#else
+static inline int nfs4_state_init(void) { return 0; }
+static inline void nfsd4_free_slabs(void) { }
+static inline int nfs4_state_start(void) { return 0; }
+static inline void nfs4_state_shutdown(void) { }
+static inline time_t nfs4_lease_time(void) { return 0; }
+static inline void nfs4_reset_lease(time_t leasetime) { }
+static inline int nfs4_reset_recoverydir(char *recdir) { return 0; }
+#endif
+
+/*
+ * lockd binding
+ */
+void nfsd_lockd_init(void);
+void nfsd_lockd_shutdown(void);
+
+
+/*
+ * These macros provide pre-xdr'ed values for faster operation.
+ */
+#define nfs_ok cpu_to_be32(NFS_OK)
+#define nfserr_perm cpu_to_be32(NFSERR_PERM)
+#define nfserr_noent cpu_to_be32(NFSERR_NOENT)
+#define nfserr_io cpu_to_be32(NFSERR_IO)
+#define nfserr_nxio cpu_to_be32(NFSERR_NXIO)
+#define nfserr_eagain cpu_to_be32(NFSERR_EAGAIN)
+#define nfserr_acces cpu_to_be32(NFSERR_ACCES)
+#define nfserr_exist cpu_to_be32(NFSERR_EXIST)
+#define nfserr_xdev cpu_to_be32(NFSERR_XDEV)
+#define nfserr_nodev cpu_to_be32(NFSERR_NODEV)
+#define nfserr_notdir cpu_to_be32(NFSERR_NOTDIR)
+#define nfserr_isdir cpu_to_be32(NFSERR_ISDIR)
+#define nfserr_inval cpu_to_be32(NFSERR_INVAL)
+#define nfserr_fbig cpu_to_be32(NFSERR_FBIG)
+#define nfserr_nospc cpu_to_be32(NFSERR_NOSPC)
+#define nfserr_rofs cpu_to_be32(NFSERR_ROFS)
+#define nfserr_mlink cpu_to_be32(NFSERR_MLINK)
+#define nfserr_opnotsupp cpu_to_be32(NFSERR_OPNOTSUPP)
+#define nfserr_nametoolong cpu_to_be32(NFSERR_NAMETOOLONG)
+#define nfserr_notempty cpu_to_be32(NFSERR_NOTEMPTY)
+#define nfserr_dquot cpu_to_be32(NFSERR_DQUOT)
+#define nfserr_stale cpu_to_be32(NFSERR_STALE)
+#define nfserr_remote cpu_to_be32(NFSERR_REMOTE)
+#define nfserr_wflush cpu_to_be32(NFSERR_WFLUSH)
+#define nfserr_badhandle cpu_to_be32(NFSERR_BADHANDLE)
+#define nfserr_notsync cpu_to_be32(NFSERR_NOT_SYNC)
+#define nfserr_badcookie cpu_to_be32(NFSERR_BAD_COOKIE)
+#define nfserr_notsupp cpu_to_be32(NFSERR_NOTSUPP)
+#define nfserr_toosmall cpu_to_be32(NFSERR_TOOSMALL)
+#define nfserr_serverfault cpu_to_be32(NFSERR_SERVERFAULT)
+#define nfserr_badtype cpu_to_be32(NFSERR_BADTYPE)
+#define nfserr_jukebox cpu_to_be32(NFSERR_JUKEBOX)
+#define nfserr_denied cpu_to_be32(NFSERR_DENIED)
+#define nfserr_deadlock cpu_to_be32(NFSERR_DEADLOCK)
+#define nfserr_expired cpu_to_be32(NFSERR_EXPIRED)
+#define nfserr_bad_cookie cpu_to_be32(NFSERR_BAD_COOKIE)
+#define nfserr_same cpu_to_be32(NFSERR_SAME)
+#define nfserr_clid_inuse cpu_to_be32(NFSERR_CLID_INUSE)
+#define nfserr_stale_clientid cpu_to_be32(NFSERR_STALE_CLIENTID)
+#define nfserr_resource cpu_to_be32(NFSERR_RESOURCE)
+#define nfserr_moved cpu_to_be32(NFSERR_MOVED)
+#define nfserr_nofilehandle cpu_to_be32(NFSERR_NOFILEHANDLE)
+#define nfserr_minor_vers_mismatch cpu_to_be32(NFSERR_MINOR_VERS_MISMATCH)
+#define nfserr_share_denied cpu_to_be32(NFSERR_SHARE_DENIED)
+#define nfserr_stale_stateid cpu_to_be32(NFSERR_STALE_STATEID)
+#define nfserr_old_stateid cpu_to_be32(NFSERR_OLD_STATEID)
+#define nfserr_bad_stateid cpu_to_be32(NFSERR_BAD_STATEID)
+#define nfserr_bad_seqid cpu_to_be32(NFSERR_BAD_SEQID)
+#define nfserr_symlink cpu_to_be32(NFSERR_SYMLINK)
+#define nfserr_not_same cpu_to_be32(NFSERR_NOT_SAME)
+#define nfserr_restorefh cpu_to_be32(NFSERR_RESTOREFH)
+#define nfserr_attrnotsupp cpu_to_be32(NFSERR_ATTRNOTSUPP)
+#define nfserr_bad_xdr cpu_to_be32(NFSERR_BAD_XDR)
+#define nfserr_openmode cpu_to_be32(NFSERR_OPENMODE)
+#define nfserr_locks_held cpu_to_be32(NFSERR_LOCKS_HELD)
+#define nfserr_op_illegal cpu_to_be32(NFSERR_OP_ILLEGAL)
+#define nfserr_grace cpu_to_be32(NFSERR_GRACE)
+#define nfserr_no_grace cpu_to_be32(NFSERR_NO_GRACE)
+#define nfserr_reclaim_bad cpu_to_be32(NFSERR_RECLAIM_BAD)
+#define nfserr_badname cpu_to_be32(NFSERR_BADNAME)
+#define nfserr_cb_path_down cpu_to_be32(NFSERR_CB_PATH_DOWN)
+#define nfserr_locked cpu_to_be32(NFSERR_LOCKED)
+#define nfserr_wrongsec cpu_to_be32(NFSERR_WRONGSEC)
+#define nfserr_badiomode cpu_to_be32(NFS4ERR_BADIOMODE)
+#define nfserr_badlayout cpu_to_be32(NFS4ERR_BADLAYOUT)
+#define nfserr_bad_session_digest cpu_to_be32(NFS4ERR_BAD_SESSION_DIGEST)
+#define nfserr_badsession cpu_to_be32(NFS4ERR_BADSESSION)
+#define nfserr_badslot cpu_to_be32(NFS4ERR_BADSLOT)
+#define nfserr_complete_already cpu_to_be32(NFS4ERR_COMPLETE_ALREADY)
+#define nfserr_conn_not_bound_to_session cpu_to_be32(NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
+#define nfserr_deleg_already_wanted cpu_to_be32(NFS4ERR_DELEG_ALREADY_WANTED)
+#define nfserr_back_chan_busy cpu_to_be32(NFS4ERR_BACK_CHAN_BUSY)
+#define nfserr_layouttrylater cpu_to_be32(NFS4ERR_LAYOUTTRYLATER)
+#define nfserr_layoutunavailable cpu_to_be32(NFS4ERR_LAYOUTUNAVAILABLE)
+#define nfserr_nomatching_layout cpu_to_be32(NFS4ERR_NOMATCHING_LAYOUT)
+#define nfserr_recallconflict cpu_to_be32(NFS4ERR_RECALLCONFLICT)
+#define nfserr_unknown_layouttype cpu_to_be32(NFS4ERR_UNKNOWN_LAYOUTTYPE)
+#define nfserr_seq_misordered cpu_to_be32(NFS4ERR_SEQ_MISORDERED)
+#define nfserr_sequence_pos cpu_to_be32(NFS4ERR_SEQUENCE_POS)
+#define nfserr_req_too_big cpu_to_be32(NFS4ERR_REQ_TOO_BIG)
+#define nfserr_rep_too_big cpu_to_be32(NFS4ERR_REP_TOO_BIG)
+#define nfserr_rep_too_big_to_cache cpu_to_be32(NFS4ERR_REP_TOO_BIG_TO_CACHE)
+#define nfserr_retry_uncached_rep cpu_to_be32(NFS4ERR_RETRY_UNCACHED_REP)
+#define nfserr_unsafe_compound cpu_to_be32(NFS4ERR_UNSAFE_COMPOUND)
+#define nfserr_too_many_ops cpu_to_be32(NFS4ERR_TOO_MANY_OPS)
+#define nfserr_op_not_in_session cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION)
+#define nfserr_hash_alg_unsupp cpu_to_be32(NFS4ERR_HASH_ALG_UNSUPP)
+#define nfserr_clientid_busy cpu_to_be32(NFS4ERR_CLIENTID_BUSY)
+#define nfserr_pnfs_io_hole cpu_to_be32(NFS4ERR_PNFS_IO_HOLE)
+#define nfserr_seq_false_retry cpu_to_be32(NFS4ERR_SEQ_FALSE_RETRY)
+#define nfserr_bad_high_slot cpu_to_be32(NFS4ERR_BAD_HIGH_SLOT)
+#define nfserr_deadsession cpu_to_be32(NFS4ERR_DEADSESSION)
+#define nfserr_encr_alg_unsupp cpu_to_be32(NFS4ERR_ENCR_ALG_UNSUPP)
+#define nfserr_pnfs_no_layout cpu_to_be32(NFS4ERR_PNFS_NO_LAYOUT)
+#define nfserr_not_only_op cpu_to_be32(NFS4ERR_NOT_ONLY_OP)
+#define nfserr_wrong_cred cpu_to_be32(NFS4ERR_WRONG_CRED)
+#define nfserr_wrong_type cpu_to_be32(NFS4ERR_WRONG_TYPE)
+#define nfserr_dirdeleg_unavail cpu_to_be32(NFS4ERR_DIRDELEG_UNAVAIL)
+#define nfserr_reject_deleg cpu_to_be32(NFS4ERR_REJECT_DELEG)
+#define nfserr_returnconflict cpu_to_be32(NFS4ERR_RETURNCONFLICT)
+#define nfserr_deleg_revoked cpu_to_be32(NFS4ERR_DELEG_REVOKED)
+
+/* error codes for internal use */
+/* if a request fails due to kmalloc failure, it gets dropped.
+ * Client should resend eventually
+ */
+#define nfserr_dropit cpu_to_be32(30000)
+/* end-of-file indicator in readdir */
+#define nfserr_eof cpu_to_be32(30001)
+/* replay detected */
+#define nfserr_replay_me cpu_to_be32(11001)
+/* nfs41 replay detected */
+#define nfserr_replay_cache cpu_to_be32(11002)
+
+/* Check for dir entries '.' and '..' */
+#define isdotent(n, l) (l < 3 && n[0] == '.' && (l == 1 || n[1] == '.'))
+
+/*
+ * Time of server startup
+ */
+extern struct timeval nfssvc_boot;
+
+#ifdef CONFIG_NFSD_V4
+
+/* before processing a COMPOUND operation, we have to check that there
+ * is enough space in the buffer for XDR encode to succeed. otherwise,
+ * we might process an operation with side effects, and be unable to
+ * tell the client that the operation succeeded.
+ *
+ * COMPOUND_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an "ordinary" _successful_ operation. (GETATTR,
+ * READ, READDIR, and READLINK have their own buffer checks.) if we
+ * fall below this level, we fail the next operation with NFS4ERR_RESOURCE.
+ *
+ * COMPOUND_ERR_SLACK_SPACE - this is the minimum bytes of buffer space
+ * needed to encode an operation which has failed with NFS4ERR_RESOURCE.
+ * care is taken to ensure that we never fall below this level for any
+ * reason.
+ */
+#define COMPOUND_SLACK_SPACE 140 /* OP_GETFH */
+#define COMPOUND_ERR_SLACK_SPACE 12 /* OP_SETATTR */
+
+#define NFSD_LEASE_TIME (nfs4_lease_time())
+#define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */
+
+/*
+ * The following attributes are currently not supported by the NFSv4 server:
+ * ARCHIVE (deprecated anyway)
+ * HIDDEN (unlikely to be supported any time soon)
+ * MIMETYPE (unlikely to be supported any time soon)
+ * QUOTA_* (will be supported in a forthcoming patch)
+ * SYSTEM (unlikely to be supported any time soon)
+ * TIME_BACKUP (unlikely to be supported any time soon)
+ * TIME_CREATE (unlikely to be supported any time soon)
+ */
+#define NFSD4_SUPPORTED_ATTRS_WORD0 \
+(FATTR4_WORD0_SUPPORTED_ATTRS | FATTR4_WORD0_TYPE | FATTR4_WORD0_FH_EXPIRE_TYPE \
+ | FATTR4_WORD0_CHANGE | FATTR4_WORD0_SIZE | FATTR4_WORD0_LINK_SUPPORT \
+ | FATTR4_WORD0_SYMLINK_SUPPORT | FATTR4_WORD0_NAMED_ATTR | FATTR4_WORD0_FSID \
+ | FATTR4_WORD0_UNIQUE_HANDLES | FATTR4_WORD0_LEASE_TIME | FATTR4_WORD0_RDATTR_ERROR \
+ | FATTR4_WORD0_ACLSUPPORT | FATTR4_WORD0_CANSETTIME | FATTR4_WORD0_CASE_INSENSITIVE \
+ | FATTR4_WORD0_CASE_PRESERVING | FATTR4_WORD0_CHOWN_RESTRICTED \
+ | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \
+ | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_HOMOGENEOUS \
+ | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
+ | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD1 \
+(FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
+ | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP | FATTR4_WORD1_RAWDEV \
+ | FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE | FATTR4_WORD1_SPACE_TOTAL \
+ | FATTR4_WORD1_SPACE_USED | FATTR4_WORD1_TIME_ACCESS | FATTR4_WORD1_TIME_ACCESS_SET \
+ | FATTR4_WORD1_TIME_DELTA | FATTR4_WORD1_TIME_METADATA \
+ | FATTR4_WORD1_TIME_MODIFY | FATTR4_WORD1_TIME_MODIFY_SET | FATTR4_WORD1_MOUNTED_ON_FILEID)
+
+#define NFSD4_SUPPORTED_ATTRS_WORD2 0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
+ NFSD4_SUPPORTED_ATTRS_WORD0
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
+ NFSD4_SUPPORTED_ATTRS_WORD1
+
+#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
+ (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+
+static inline u32 nfsd_suppattrs0(u32 minorversion)
+{
+ return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD0
+ : NFSD4_SUPPORTED_ATTRS_WORD0;
+}
+
+static inline u32 nfsd_suppattrs1(u32 minorversion)
+{
+ return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD1
+ : NFSD4_SUPPORTED_ATTRS_WORD1;
+}
+
+static inline u32 nfsd_suppattrs2(u32 minorversion)
+{
+ return minorversion ? NFSD4_1_SUPPORTED_ATTRS_WORD2
+ : NFSD4_SUPPORTED_ATTRS_WORD2;
+}
+
+/* These will return ERR_INVAL if specified in GETATTR or READDIR. */
+#define NFSD_WRITEONLY_ATTRS_WORD1 \
+(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+
+/* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
+#define NFSD_WRITEABLE_ATTRS_WORD0 \
+(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
+#define NFSD_WRITEABLE_ATTRS_WORD1 \
+(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD2 0
+
+#define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
+ NFSD_WRITEABLE_ATTRS_WORD0
+/*
+ * we currently store the exclusive create verifier in the v_{a,m}time
+ * attributes so the client can't set these at create time using EXCLUSIVE4_1
+ */
+#define NFSD_SUPPATTR_EXCLCREAT_WORD1 \
+ (NFSD_WRITEABLE_ATTRS_WORD1 & \
+ ~(FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET))
+#define NFSD_SUPPATTR_EXCLCREAT_WORD2 \
+ NFSD_WRITEABLE_ATTRS_WORD2
+
+#endif /* CONFIG_NFSD_V4 */
+
+#endif /* LINUX_NFSD_NFSD_H */
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 01965b2f3a7..55c8e63af0b 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -1,6 +1,4 @@
/*
- * linux/fs/nfsd/nfsfh.c
- *
* NFS server file handle treatment.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
@@ -9,19 +7,11 @@
* ... and again Southern-Winter 2001 to support export_operations
*/
-#include <linux/slab.h>
-#include <linux/fs.h>
-#include <linux/unistd.h>
-#include <linux/string.h>
-#include <linux/stat.h>
-#include <linux/dcache.h>
#include <linux/exportfs.h>
-#include <linux/mount.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcauth_gss.h>
-#include <linux/nfsd/nfsd.h>
+#include "nfsd.h"
+#include "vfs.h"
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_FH
@@ -96,8 +86,10 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
struct svc_export *exp)
{
+ int flags = nfsexp_flags(rqstp, exp);
+
/* Check if the request originated from a secure port. */
- if (!rqstp->rq_secure && EX_SECURE(exp)) {
+ if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
dprintk(KERN_WARNING
"nfsd: request from insecure port %s!\n",
@@ -109,6 +101,36 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
return nfserrno(nfsd_setuser(rqstp, exp));
}
+static inline __be32 check_pseudo_root(struct svc_rqst *rqstp,
+ struct dentry *dentry, struct svc_export *exp)
+{
+ if (!(exp->ex_flags & NFSEXP_V4ROOT))
+ return nfs_ok;
+ /*
+ * v2/v3 clients have no need for the V4ROOT export--they use
+ * the mount protocl instead; also, further V4ROOT checks may be
+ * in v4-specific code, in which case v2/v3 clients could bypass
+ * them.
+ */
+ if (!nfsd_v4client(rqstp))
+ return nfserr_stale;
+ /*
+ * We're exposing only the directories and symlinks that have to be
+ * traversed on the way to real exports:
+ */
+ if (unlikely(!S_ISDIR(dentry->d_inode->i_mode) &&
+ !S_ISLNK(dentry->d_inode->i_mode)))
+ return nfserr_stale;
+ /*
+ * A pseudoroot export gives permission to access only one
+ * single directory; the kernel has to make another upcall
+ * before granting access to anything else under it:
+ */
+ if (unlikely(dentry != exp->ex_path.dentry))
+ return nfserr_stale;
+ return nfs_ok;
+}
+
/*
* Use the given filehandle to look up the corresponding export and
* dentry. On success, the results are used to set fh_export and
@@ -232,14 +254,6 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
goto out;
}
- if (exp->ex_flags & NFSEXP_NOSUBTREECHECK) {
- error = nfsd_setuser_and_check_port(rqstp, exp);
- if (error) {
- dput(dentry);
- goto out;
- }
- }
-
if (S_ISDIR(dentry->d_inode->i_mode) &&
(dentry->d_flags & DCACHE_DISCONNECTED)) {
printk("nfsd: find_fh_dentry returned a DISCONNECTED directory: %s/%s\n",
@@ -294,28 +308,32 @@ fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
error = nfsd_set_fh_dentry(rqstp, fhp);
if (error)
goto out;
- dentry = fhp->fh_dentry;
- exp = fhp->fh_export;
- } else {
- /*
- * just rechecking permissions
- * (e.g. nfsproc_create calls fh_verify, then nfsd_create
- * does as well)
- */
- dprintk("nfsd: fh_verify - just checking\n");
- dentry = fhp->fh_dentry;
- exp = fhp->fh_export;
- /*
- * Set user creds for this exportpoint; necessary even
- * in the "just checking" case because this may be a
- * filehandle that was created by fh_compose, and that
- * is about to be used in another nfsv4 compound
- * operation.
- */
- error = nfsd_setuser_and_check_port(rqstp, exp);
- if (error)
- goto out;
}
+ dentry = fhp->fh_dentry;
+ exp = fhp->fh_export;
+ /*
+ * We still have to do all these permission checks, even when
+ * fh_dentry is already set:
+ * - fh_verify may be called multiple times with different
+ * "access" arguments (e.g. nfsd_proc_create calls
+ * fh_verify(...,NFSD_MAY_EXEC) first, then later (in
+ * nfsd_create) calls fh_verify(...,NFSD_MAY_CREATE).
+ * - in the NFSv4 case, the filehandle may have been filled
+ * in by fh_compose, and given a dentry, but further
+ * compound operations performed with that filehandle
+ * still need permissions checks. In the worst case, a
+ * mountpoint crossing may have changed the export
+ * options, and we may now need to use a different uid
+ * (for example, if different id-squashing options are in
+ * effect on the new filesystem).
+ */
+ error = check_pseudo_root(rqstp, dentry, exp);
+ if (error)
+ goto out;
+
+ error = nfsd_setuser_and_check_port(rqstp, exp);
+ if (error)
+ goto out;
error = nfsd_mode_check(rqstp, dentry->d_inode->i_mode, type);
if (error)
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
new file mode 100644
index 00000000000..cdfb8c6a420
--- /dev/null
+++ b/fs/nfsd/nfsfh.h
@@ -0,0 +1,208 @@
+/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+
+#ifndef _LINUX_NFSD_FH_INT_H
+#define _LINUX_NFSD_FH_INT_H
+
+#include <linux/nfsd/nfsfh.h>
+
+enum nfsd_fsid {
+ FSID_DEV = 0,
+ FSID_NUM,
+ FSID_MAJOR_MINOR,
+ FSID_ENCODE_DEV,
+ FSID_UUID4_INUM,
+ FSID_UUID8,
+ FSID_UUID16,
+ FSID_UUID16_INUM,
+};
+
+enum fsid_source {
+ FSIDSOURCE_DEV,
+ FSIDSOURCE_FSID,
+ FSIDSOURCE_UUID,
+};
+extern enum fsid_source fsid_source(struct svc_fh *fhp);
+
+
+/* This might look a little large to "inline" but in all calls except
+ * one, 'vers' is constant so moste of the function disappears.
+ */
+static inline void mk_fsid(int vers, u32 *fsidv, dev_t dev, ino_t ino,
+ u32 fsid, unsigned char *uuid)
+{
+ u32 *up;
+ switch(vers) {
+ case FSID_DEV:
+ fsidv[0] = htonl((MAJOR(dev)<<16) |
+ MINOR(dev));
+ fsidv[1] = ino_t_to_u32(ino);
+ break;
+ case FSID_NUM:
+ fsidv[0] = fsid;
+ break;
+ case FSID_MAJOR_MINOR:
+ fsidv[0] = htonl(MAJOR(dev));
+ fsidv[1] = htonl(MINOR(dev));
+ fsidv[2] = ino_t_to_u32(ino);
+ break;
+
+ case FSID_ENCODE_DEV:
+ fsidv[0] = new_encode_dev(dev);
+ fsidv[1] = ino_t_to_u32(ino);
+ break;
+
+ case FSID_UUID4_INUM:
+ /* 4 byte fsid and inode number */
+ up = (u32*)uuid;
+ fsidv[0] = ino_t_to_u32(ino);
+ fsidv[1] = up[0] ^ up[1] ^ up[2] ^ up[3];
+ break;
+
+ case FSID_UUID8:
+ /* 8 byte fsid */
+ up = (u32*)uuid;
+ fsidv[0] = up[0] ^ up[2];
+ fsidv[1] = up[1] ^ up[3];
+ break;
+
+ case FSID_UUID16:
+ /* 16 byte fsid - NFSv3+ only */
+ memcpy(fsidv, uuid, 16);
+ break;
+
+ case FSID_UUID16_INUM:
+ /* 8 byte inode and 16 byte fsid */
+ *(u64*)fsidv = (u64)ino;
+ memcpy(fsidv+2, uuid, 16);
+ break;
+ default: BUG();
+ }
+}
+
+static inline int key_len(int type)
+{
+ switch(type) {
+ case FSID_DEV: return 8;
+ case FSID_NUM: return 4;
+ case FSID_MAJOR_MINOR: return 12;
+ case FSID_ENCODE_DEV: return 8;
+ case FSID_UUID4_INUM: return 8;
+ case FSID_UUID8: return 8;
+ case FSID_UUID16: return 16;
+ case FSID_UUID16_INUM: return 24;
+ default: return 0;
+ }
+}
+
+/*
+ * Shorthand for dprintk()'s
+ */
+extern char * SVCFH_fmt(struct svc_fh *fhp);
+
+/*
+ * Function prototypes
+ */
+__be32 fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
+__be32 fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
+__be32 fh_update(struct svc_fh *);
+void fh_put(struct svc_fh *);
+
+static __inline__ struct svc_fh *
+fh_copy(struct svc_fh *dst, struct svc_fh *src)
+{
+ WARN_ON(src->fh_dentry || src->fh_locked);
+
+ *dst = *src;
+ return dst;
+}
+
+static inline void
+fh_copy_shallow(struct knfsd_fh *dst, struct knfsd_fh *src)
+{
+ dst->fh_size = src->fh_size;
+ memcpy(&dst->fh_base, &src->fh_base, src->fh_size);
+}
+
+static __inline__ struct svc_fh *
+fh_init(struct svc_fh *fhp, int maxsize)
+{
+ memset(fhp, 0, sizeof(*fhp));
+ fhp->fh_maxsize = maxsize;
+ return fhp;
+}
+
+#ifdef CONFIG_NFSD_V3
+/*
+ * Fill in the pre_op attr for the wcc data
+ */
+static inline void
+fill_pre_wcc(struct svc_fh *fhp)
+{
+ struct inode *inode;
+
+ inode = fhp->fh_dentry->d_inode;
+ if (!fhp->fh_pre_saved) {
+ fhp->fh_pre_mtime = inode->i_mtime;
+ fhp->fh_pre_ctime = inode->i_ctime;
+ fhp->fh_pre_size = inode->i_size;
+ fhp->fh_pre_change = inode->i_version;
+ fhp->fh_pre_saved = 1;
+ }
+}
+
+extern void fill_post_wcc(struct svc_fh *);
+#else
+#define fill_pre_wcc(ignored)
+#define fill_post_wcc(notused)
+#endif /* CONFIG_NFSD_V3 */
+
+
+/*
+ * Lock a file handle/inode
+ * NOTE: both fh_lock and fh_unlock are done "by hand" in
+ * vfs.c:nfsd_rename as it needs to grab 2 i_mutex's at once
+ * so, any changes here should be reflected there.
+ */
+
+static inline void
+fh_lock_nested(struct svc_fh *fhp, unsigned int subclass)
+{
+ struct dentry *dentry = fhp->fh_dentry;
+ struct inode *inode;
+
+ BUG_ON(!dentry);
+
+ if (fhp->fh_locked) {
+ printk(KERN_WARNING "fh_lock: %s/%s already locked!\n",
+ dentry->d_parent->d_name.name, dentry->d_name.name);
+ return;
+ }
+
+ inode = dentry->d_inode;
+ mutex_lock_nested(&inode->i_mutex, subclass);
+ fill_pre_wcc(fhp);
+ fhp->fh_locked = 1;
+}
+
+static inline void
+fh_lock(struct svc_fh *fhp)
+{
+ fh_lock_nested(fhp, I_MUTEX_NORMAL);
+}
+
+/*
+ * Unlock a file handle/inode
+ */
+static inline void
+fh_unlock(struct svc_fh *fhp)
+{
+ BUG_ON(!fhp->fh_dentry);
+
+ if (fhp->fh_locked) {
+ fill_post_wcc(fhp);
+ mutex_unlock(&fhp->fh_dentry->d_inode->i_mutex);
+ fhp->fh_locked = 0;
+ }
+}
+
+#endif /* _LINUX_NFSD_FH_INT_H */
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0eb9c820b7a..a047ad6111e 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -1,29 +1,14 @@
/*
- * nfsproc2.c Process version 2 NFS requests.
- * linux/fs/nfsd/nfs2proc.c
- *
* Process version 2 NFS requests.
*
* Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/linkage.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/stat.h>
-#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/in.h>
#include <linux/namei.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/xdr.h>
+#include "cache.h"
+#include "xdr.h"
+#include "vfs.h"
typedef struct svc_rqst svc_rqst;
typedef struct svc_buf svc_buf;
@@ -758,6 +743,7 @@ nfserrno (int errno)
{ nfserr_io, -ETXTBSY },
{ nfserr_notsupp, -EOPNOTSUPP },
{ nfserr_toosmall, -ETOOSMALL },
+ { nfserr_serverfault, -ESERVERFAULT },
};
int i;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 67ea83eedd4..171699eb07c 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -1,6 +1,4 @@
/*
- * linux/fs/nfsd/nfssvc.c
- *
* Central processing for nfsd.
*
* Authors: Olaf Kirch (okir@monad.swb.de)
@@ -8,33 +6,19 @@
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/module.h>
#include <linux/sched.h>
-#include <linux/time.h>
-#include <linux/errno.h>
-#include <linux/nfs.h>
-#include <linux/in.h>
-#include <linux/uio.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
#include <linux/freezer.h>
#include <linux/fs_struct.h>
-#include <linux/kthread.h>
#include <linux/swap.h>
-#include <linux/sunrpc/types.h>
#include <linux/sunrpc/stats.h>
-#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/svcsock.h>
-#include <linux/sunrpc/cache.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/stats.h>
-#include <linux/nfsd/cache.h>
-#include <linux/nfsd/syscall.h>
#include <linux/lockd/bind.h>
#include <linux/nfsacl.h>
#include <linux/seq_file.h>
+#include "nfsd.h"
+#include "cache.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_SVC
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index afd08e2c90a..4ce005dbf3e 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -1,20 +1,10 @@
/*
- * linux/fs/nfsd/nfsxdr.c
- *
* XDR support for nfsd
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/types.h>
-#include <linux/time.h>
-#include <linux/nfs.h>
-#include <linux/vfs.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/xdr.h>
-#include <linux/mm.h>
+#include "xdr.h"
#include "auth.h"
#define NFSDDBG_FACILITY NFSDDBG_XDR
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
new file mode 100644
index 00000000000..fefeae27f25
--- /dev/null
+++ b/fs/nfsd/state.h
@@ -0,0 +1,408 @@
+/*
+ * Copyright (c) 2001 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NFSD4_STATE_H
+#define _NFSD4_STATE_H
+
+#include <linux/nfsd/nfsfh.h>
+#include "nfsfh.h"
+
+typedef struct {
+ u32 cl_boot;
+ u32 cl_id;
+} clientid_t;
+
+typedef struct {
+ u32 so_boot;
+ u32 so_stateownerid;
+ u32 so_fileid;
+} stateid_opaque_t;
+
+typedef struct {
+ u32 si_generation;
+ stateid_opaque_t si_opaque;
+} stateid_t;
+#define si_boot si_opaque.so_boot
+#define si_stateownerid si_opaque.so_stateownerid
+#define si_fileid si_opaque.so_fileid
+
+#define STATEID_FMT "(%08x/%08x/%08x/%08x)"
+#define STATEID_VAL(s) \
+ (s)->si_boot, \
+ (s)->si_stateownerid, \
+ (s)->si_fileid, \
+ (s)->si_generation
+
+struct nfsd4_cb_sequence {
+ /* args/res */
+ u32 cbs_minorversion;
+ struct nfs4_client *cbs_clp;
+};
+
+struct nfs4_delegation {
+ struct list_head dl_perfile;
+ struct list_head dl_perclnt;
+ struct list_head dl_recall_lru; /* delegation recalled */
+ atomic_t dl_count; /* ref count */
+ struct nfs4_client *dl_client;
+ struct nfs4_file *dl_file;
+ struct file_lock *dl_flock;
+ struct file *dl_vfs_file;
+ u32 dl_type;
+ time_t dl_time;
+/* For recall: */
+ u32 dl_ident;
+ stateid_t dl_stateid;
+ struct knfsd_fh dl_fh;
+ int dl_retries;
+};
+
+/* client delegation callback info */
+struct nfs4_cb_conn {
+ /* SETCLIENTID info */
+ struct sockaddr_storage cb_addr;
+ size_t cb_addrlen;
+ u32 cb_prog;
+ u32 cb_minorversion;
+ u32 cb_ident; /* minorversion 0 only */
+ /* RPC client info */
+ atomic_t cb_set; /* successful CB_NULL call */
+ struct rpc_clnt * cb_client;
+};
+
+/* Maximum number of slots per session. 160 is useful for long haul TCP */
+#define NFSD_MAX_SLOTS_PER_SESSION 160
+/* Maximum number of operations per session compound */
+#define NFSD_MAX_OPS_PER_COMPOUND 16
+/* Maximum session per slot cache size */
+#define NFSD_SLOT_CACHE_SIZE 1024
+/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
+#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
+#define NFSD_MAX_MEM_PER_SESSION \
+ (NFSD_CACHE_SIZE_SLOTS_PER_SESSION * NFSD_SLOT_CACHE_SIZE)
+
+struct nfsd4_slot {
+ bool sl_inuse;
+ bool sl_cachethis;
+ u16 sl_opcnt;
+ u32 sl_seqid;
+ __be32 sl_status;
+ u32 sl_datalen;
+ char sl_data[];
+};
+
+struct nfsd4_channel_attrs {
+ u32 headerpadsz;
+ u32 maxreq_sz;
+ u32 maxresp_sz;
+ u32 maxresp_cached;
+ u32 maxops;
+ u32 maxreqs;
+ u32 nr_rdma_attrs;
+ u32 rdma_attrs;
+};
+
+struct nfsd4_create_session {
+ clientid_t clientid;
+ struct nfs4_sessionid sessionid;
+ u32 seqid;
+ u32 flags;
+ struct nfsd4_channel_attrs fore_channel;
+ struct nfsd4_channel_attrs back_channel;
+ u32 callback_prog;
+ u32 uid;
+ u32 gid;
+};
+
+/* The single slot clientid cache structure */
+struct nfsd4_clid_slot {
+ u32 sl_seqid;
+ __be32 sl_status;
+ struct nfsd4_create_session sl_cr_ses;
+};
+
+struct nfsd4_session {
+ struct kref se_ref;
+ struct list_head se_hash; /* hash by sessionid */
+ struct list_head se_perclnt;
+ u32 se_flags;
+ struct nfs4_client *se_client; /* for expire_client */
+ struct nfs4_sessionid se_sessionid;
+ struct nfsd4_channel_attrs se_fchannel;
+ struct nfsd4_channel_attrs se_bchannel;
+ struct nfsd4_slot *se_slots[]; /* forward channel slots */
+};
+
+static inline void
+nfsd4_put_session(struct nfsd4_session *ses)
+{
+ extern void free_session(struct kref *kref);
+ kref_put(&ses->se_ref, free_session);
+}
+
+static inline void
+nfsd4_get_session(struct nfsd4_session *ses)
+{
+ kref_get(&ses->se_ref);
+}
+
+/* formatted contents of nfs4_sessionid */
+struct nfsd4_sessionid {
+ clientid_t clientid;
+ u32 sequence;
+ u32 reserved;
+};
+
+#define HEXDIR_LEN 33 /* hex version of 16 byte md5 of cl_name plus '\0' */
+
+/*
+ * struct nfs4_client - one per client. Clientids live here.
+ * o Each nfs4_client is hashed by clientid.
+ *
+ * o Each nfs4_clients is also hashed by name
+ * (the opaque quantity initially sent by the client to identify itself).
+ *
+ * o cl_perclient list is used to ensure no dangling stateowner references
+ * when we expire the nfs4_client
+ */
+struct nfs4_client {
+ struct list_head cl_idhash; /* hash by cl_clientid.id */
+ struct list_head cl_strhash; /* hash by cl_name */
+ struct list_head cl_openowners;
+ struct list_head cl_delegations;
+ struct list_head cl_lru; /* tail queue */
+ struct xdr_netobj cl_name; /* id generated by client */
+ char cl_recdir[HEXDIR_LEN]; /* recovery dir */
+ nfs4_verifier cl_verifier; /* generated by client */
+ time_t cl_time; /* time of last lease renewal */
+ struct sockaddr_storage cl_addr; /* client ipaddress */
+ u32 cl_flavor; /* setclientid pseudoflavor */
+ char *cl_principal; /* setclientid principal name */
+ struct svc_cred cl_cred; /* setclientid principal */
+ clientid_t cl_clientid; /* generated by server */
+ nfs4_verifier cl_confirm; /* generated by server */
+ struct nfs4_cb_conn cl_cb_conn; /* callback info */
+ atomic_t cl_count; /* ref count */
+ u32 cl_firststate; /* recovery dir creation */
+
+ /* for nfs41 */
+ struct list_head cl_sessions;
+ struct nfsd4_clid_slot cl_cs_slot; /* create_session slot */
+ u32 cl_exchange_flags;
+ struct nfs4_sessionid cl_sessionid;
+
+ /* for nfs41 callbacks */
+ /* We currently support a single back channel with a single slot */
+ unsigned long cl_cb_slot_busy;
+ u32 cl_cb_seq_nr;
+ struct svc_xprt *cl_cb_xprt; /* 4.1 callback transport */
+ struct rpc_wait_queue cl_cb_waitq; /* backchannel callers may */
+ /* wait here for slots */
+};
+
+/* struct nfs4_client_reset
+ * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
+ * upon lease reset, or from upcall to state_daemon (to read in state
+ * from non-volitile storage) upon reboot.
+ */
+struct nfs4_client_reclaim {
+ struct list_head cr_strhash; /* hash by cr_name */
+ char cr_recdir[HEXDIR_LEN]; /* recover dir */
+};
+
+static inline void
+update_stateid(stateid_t *stateid)
+{
+ stateid->si_generation++;
+}
+
+/* A reasonable value for REPLAY_ISIZE was estimated as follows:
+ * The OPEN response, typically the largest, requires
+ * 4(status) + 8(stateid) + 20(changeinfo) + 4(rflags) + 8(verifier) +
+ * 4(deleg. type) + 8(deleg. stateid) + 4(deleg. recall flag) +
+ * 20(deleg. space limit) + ~32(deleg. ace) = 112 bytes
+ */
+
+#define NFSD4_REPLAY_ISIZE 112
+
+/*
+ * Replay buffer, where the result of the last seqid-mutating operation
+ * is cached.
+ */
+struct nfs4_replay {
+ __be32 rp_status;
+ unsigned int rp_buflen;
+ char *rp_buf;
+ unsigned intrp_allocated;
+ struct knfsd_fh rp_openfh;
+ char rp_ibuf[NFSD4_REPLAY_ISIZE];
+};
+
+/*
+* nfs4_stateowner can either be an open_owner, or a lock_owner
+*
+* so_idhash: stateid_hashtbl[] for open owner, lockstateid_hashtbl[]
+* for lock_owner
+* so_strhash: ownerstr_hashtbl[] for open_owner, lock_ownerstr_hashtbl[]
+* for lock_owner
+* so_perclient: nfs4_client->cl_perclient entry - used when nfs4_client
+* struct is reaped.
+* so_perfilestate: heads the list of nfs4_stateid (either open or lock)
+* and is used to ensure no dangling nfs4_stateid references when we
+* release a stateowner.
+* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
+* close is called to reap associated byte-range locks
+* so_close_lru: (open) stateowner is placed on this list instead of being
+* reaped (when so_perfilestate is empty) to hold the last close replay.
+* reaped by laundramat thread after lease period.
+*/
+struct nfs4_stateowner {
+ struct kref so_ref;
+ struct list_head so_idhash; /* hash by so_id */
+ struct list_head so_strhash; /* hash by op_name */
+ struct list_head so_perclient;
+ struct list_head so_stateids;
+ struct list_head so_perstateid; /* for lockowners only */
+ struct list_head so_close_lru; /* tail queue */
+ time_t so_time; /* time of placement on so_close_lru */
+ int so_is_open_owner; /* 1=openowner,0=lockowner */
+ u32 so_id;
+ struct nfs4_client * so_client;
+ /* after increment in ENCODE_SEQID_OP_TAIL, represents the next
+ * sequence id expected from the client: */
+ u32 so_seqid;
+ struct xdr_netobj so_owner; /* open owner name */
+ int so_confirmed; /* successful OPEN_CONFIRM? */
+ struct nfs4_replay so_replay;
+};
+
+/*
+* nfs4_file: a file opened by some number of (open) nfs4_stateowners.
+* o fi_perfile list is used to search for conflicting
+* share_acces, share_deny on the file.
+*/
+struct nfs4_file {
+ atomic_t fi_ref;
+ struct list_head fi_hash; /* hash by "struct inode *" */
+ struct list_head fi_stateids;
+ struct list_head fi_delegations;
+ struct inode *fi_inode;
+ u32 fi_id; /* used with stateowner->so_id
+ * for stateid_hashtbl hash */
+ bool fi_had_conflict;
+};
+
+/*
+* nfs4_stateid can either be an open stateid or (eventually) a lock stateid
+*
+* (open)nfs4_stateid: one per (open)nfs4_stateowner, nfs4_file
+*
+* st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
+* st_perfile: file_hashtbl[] entry.
+* st_perfile_state: nfs4_stateowner->so_perfilestate
+* st_perlockowner: (open stateid) list of lock nfs4_stateowners
+* st_access_bmap: used only for open stateid
+* st_deny_bmap: used only for open stateid
+* st_openstp: open stateid lock stateid was derived from
+*
+* XXX: open stateids and lock stateids have diverged sufficiently that
+* we should consider defining separate structs for the two cases.
+*/
+
+struct nfs4_stateid {
+ struct list_head st_hash;
+ struct list_head st_perfile;
+ struct list_head st_perstateowner;
+ struct list_head st_lockowners;
+ struct nfs4_stateowner * st_stateowner;
+ struct nfs4_file * st_file;
+ stateid_t st_stateid;
+ struct file * st_vfs_file;
+ unsigned long st_access_bmap;
+ unsigned long st_deny_bmap;
+ struct nfs4_stateid * st_openstp;
+};
+
+/* flags for preprocess_seqid_op() */
+#define HAS_SESSION 0x00000001
+#define CONFIRM 0x00000002
+#define OPEN_STATE 0x00000004
+#define LOCK_STATE 0x00000008
+#define RD_STATE 0x00000010
+#define WR_STATE 0x00000020
+#define CLOSE_STATE 0x00000040
+
+#define seqid_mutating_err(err) \
+ (((err) != nfserr_stale_clientid) && \
+ ((err) != nfserr_bad_seqid) && \
+ ((err) != nfserr_stale_stateid) && \
+ ((err) != nfserr_bad_stateid))
+
+struct nfsd4_compound_state;
+
+extern __be32 nfs4_preprocess_stateid_op(struct nfsd4_compound_state *cstate,
+ stateid_t *stateid, int flags, struct file **filp);
+extern void nfs4_lock_state(void);
+extern void nfs4_unlock_state(void);
+extern int nfs4_in_grace(void);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
+extern void put_nfs4_client(struct nfs4_client *clp);
+extern void nfs4_free_stateowner(struct kref *kref);
+extern int set_callback_cred(void);
+extern void nfsd4_probe_callback(struct nfs4_client *clp);
+extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
+extern void nfs4_put_delegation(struct nfs4_delegation *dp);
+extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+extern void nfsd4_init_recdir(char *recdir_name);
+extern int nfsd4_recdir_load(void);
+extern void nfsd4_shutdown_recdir(void);
+extern int nfs4_client_to_reclaim(const char *name);
+extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
+extern void nfsd4_recdir_purge_old(void);
+extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
+
+static inline void
+nfs4_put_stateowner(struct nfs4_stateowner *so)
+{
+ kref_put(&so->so_ref, nfs4_free_stateowner);
+}
+
+static inline void
+nfs4_get_stateowner(struct nfs4_stateowner *so)
+{
+ kref_get(&so->so_ref);
+}
+
+#endif /* NFSD4_STATE_H */
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 71944cddf68..5232d3e8fb2 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -1,6 +1,4 @@
/*
- * linux/fs/nfsd/stats.c
- *
* procfs-based user access to knfsd statistics
*
* /proc/net/rpc/nfsd
@@ -23,18 +21,13 @@
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
-#include <linux/kernel.h>
-#include <linux/time.h>
-#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-#include <linux/stat.h>
#include <linux/module.h>
-
-#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/stats.h>
-#include <linux/nfsd/nfsd.h>
#include <linux/nfsd/stats.h>
+#include "nfsd.h"
+
struct nfsd_stats nfsdstats;
struct svc_stat nfsd_svcstats = {
.program = &nfsd_program,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index a293f027326..8715d194561 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1,7 +1,5 @@
#define MSNFS /* HACK HACK */
/*
- * linux/fs/nfsd/vfs.c
- *
* File operations used by nfsd. Some of these have been ripped from
* other parts of the kernel because they weren't exported, others
* are partial duplicates with added or changed functionality.
@@ -16,48 +14,31 @@
* Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
*/
-#include <linux/string.h>
-#include <linux/time.h>
-#include <linux/errno.h>
#include <linux/fs.h>
#include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/major.h>
#include <linux/splice.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
#include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/in.h>
-#include <linux/module.h>
#include <linux/namei.h>
-#include <linux/vfs.h>
#include <linux/delay.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#ifdef CONFIG_NFSD_V3
-#include <linux/nfs3.h>
-#include <linux/nfsd/xdr3.h>
-#endif /* CONFIG_NFSD_V3 */
-#include <linux/nfsd/nfsfh.h>
#include <linux/quotaops.h>
#include <linux/fsnotify.h>
-#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
+#include <linux/jhash.h>
+#include <linux/ima.h>
+#include <asm/uaccess.h>
+
+#ifdef CONFIG_NFSD_V3
+#include "xdr3.h"
+#endif /* CONFIG_NFSD_V3 */
+
#ifdef CONFIG_NFSD_V4
-#include <linux/nfs4.h>
#include <linux/nfs4_acl.h>
#include <linux/nfsd_idmap.h>
-#include <linux/security.h>
#endif /* CONFIG_NFSD_V4 */
-#include <linux/jhash.h>
-#include <linux/ima.h>
-#include <asm/uaccess.h>
+#include "nfsd.h"
+#include "vfs.h"
#define NFSDDBG_FACILITY NFSDDBG_FILEOP
@@ -89,12 +70,6 @@ struct raparm_hbucket {
#define RAPARM_HASH_MASK (RAPARM_HASH_SIZE-1)
static struct raparm_hbucket raparm_hash[RAPARM_HASH_SIZE];
-static inline int
-nfsd_v4client(struct svc_rqst *rq)
-{
- return rq->rq_prog == NFS_PROGRAM && rq->rq_vers == 4;
-}
-
/*
* Called from nfsd_lookup and encode_dirent. Check if we have crossed
* a mount point.
@@ -116,8 +91,16 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
exp2 = rqst_exp_get_by_name(rqstp, &path);
if (IS_ERR(exp2)) {
- if (PTR_ERR(exp2) != -ENOENT)
- err = PTR_ERR(exp2);
+ err = PTR_ERR(exp2);
+ /*
+ * We normally allow NFS clients to continue
+ * "underneath" a mountpoint that is not exported.
+ * The exception is V4ROOT, where no traversal is ever
+ * allowed without an explicit export of the new
+ * directory.
+ */
+ if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
+ err = 0;
path_put(&path);
goto out;
}
@@ -141,6 +124,53 @@ out:
return err;
}
+static void follow_to_parent(struct path *path)
+{
+ struct dentry *dp;
+
+ while (path->dentry == path->mnt->mnt_root && follow_up(path))
+ ;
+ dp = dget_parent(path->dentry);
+ dput(path->dentry);
+ path->dentry = dp;
+}
+
+static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
+{
+ struct svc_export *exp2;
+ struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
+ .dentry = dget(dparent)};
+
+ follow_to_parent(&path);
+
+ exp2 = rqst_exp_parent(rqstp, &path);
+ if (PTR_ERR(exp2) == -ENOENT) {
+ *dentryp = dget(dparent);
+ } else if (IS_ERR(exp2)) {
+ path_put(&path);
+ return PTR_ERR(exp2);
+ } else {
+ *dentryp = dget(path.dentry);
+ exp_put(*exp);
+ *exp = exp2;
+ }
+ path_put(&path);
+ return 0;
+}
+
+/*
+ * For nfsd purposes, we treat V4ROOT exports as though there was an
+ * export at *every* directory.
+ */
+int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
+{
+ if (d_mountpoint(dentry))
+ return 1;
+ if (!(exp->ex_flags & NFSEXP_V4ROOT))
+ return 0;
+ return dentry->d_inode != NULL;
+}
+
__be32
nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
const char *name, unsigned int len,
@@ -169,35 +199,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
dentry = dget(dparent);
else if (dparent != exp->ex_path.dentry)
dentry = dget_parent(dparent);
- else if (!EX_NOHIDE(exp))
+ else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
dentry = dget(dparent); /* .. == . just like at / */
else {
/* checking mountpoint crossing is very different when stepping up */
- struct svc_export *exp2 = NULL;
- struct dentry *dp;
- struct path path = {.mnt = mntget(exp->ex_path.mnt),
- .dentry = dget(dparent)};
-
- while (path.dentry == path.mnt->mnt_root &&
- follow_up(&path))
- ;
- dp = dget_parent(path.dentry);
- dput(path.dentry);
- path.dentry = dp;
-
- exp2 = rqst_exp_parent(rqstp, &path);
- if (PTR_ERR(exp2) == -ENOENT) {
- dentry = dget(dparent);
- } else if (IS_ERR(exp2)) {
- host_err = PTR_ERR(exp2);
- path_put(&path);
+ host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
+ if (host_err)
goto out_nfserr;
- } else {
- dentry = dget(path.dentry);
- exp_put(exp);
- exp = exp2;
- }
- path_put(&path);
}
} else {
fh_lock(fhp);
@@ -208,7 +216,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
/*
* check if we have crossed a mount point ...
*/
- if (d_mountpoint(dentry)) {
+ if (nfsd_mountpoint(dentry, exp)) {
if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
dput(dentry);
goto out_nfserr;
@@ -745,7 +753,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
if (IS_ERR(*filp))
host_err = PTR_ERR(*filp);
else
- ima_counts_get(*filp);
+ host_err = ima_file_check(*filp, access);
out_nfserr:
err = nfserrno(host_err);
out:
@@ -774,12 +782,9 @@ static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
int (*fsync) (struct file *, struct dentry *, int);
int err;
- err = filemap_fdatawrite(inode->i_mapping);
+ err = filemap_write_and_wait(inode->i_mapping);
if (err == 0 && fop && (fsync = fop->fsync))
err = fsync(filp, dp, 0);
- if (err == 0)
- err = filemap_fdatawait(inode->i_mapping);
-
return err;
}
@@ -2124,8 +2129,6 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
*/
path.mnt = exp->ex_path.mnt;
path.dentry = dentry;
- err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
- IMA_COUNT_LEAVE);
nfsd_out:
return err? nfserrno(err) : 0;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
new file mode 100644
index 00000000000..4b1de0a9ea7
--- /dev/null
+++ b/fs/nfsd/vfs.h
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef LINUX_NFSD_VFS_H
+#define LINUX_NFSD_VFS_H
+
+#include "nfsfh.h"
+
+/*
+ * Flags for nfsd_permission
+ */
+#define NFSD_MAY_NOP 0
+#define NFSD_MAY_EXEC 1 /* == MAY_EXEC */
+#define NFSD_MAY_WRITE 2 /* == MAY_WRITE */
+#define NFSD_MAY_READ 4 /* == MAY_READ */
+#define NFSD_MAY_SATTR 8
+#define NFSD_MAY_TRUNC 16
+#define NFSD_MAY_LOCK 32
+#define NFSD_MAY_OWNER_OVERRIDE 64
+#define NFSD_MAY_LOCAL_ACCESS 128 /* IRIX doing local access check on device special file*/
+#define NFSD_MAY_BYPASS_GSS_ON_ROOT 256
+
+#define NFSD_MAY_CREATE (NFSD_MAY_EXEC|NFSD_MAY_WRITE)
+#define NFSD_MAY_REMOVE (NFSD_MAY_EXEC|NFSD_MAY_WRITE|NFSD_MAY_TRUNC)
+
+/*
+ * Callback function for readdir
+ */
+typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+
+/* nfsd/vfs.c */
+int fh_lock_parent(struct svc_fh *, struct dentry *);
+int nfsd_racache_init(int);
+void nfsd_racache_shutdown(void);
+int nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
+ struct svc_export **expp);
+__be32 nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+ const char *, unsigned int, struct svc_fh *);
+__be32 nfsd_lookup_dentry(struct svc_rqst *, struct svc_fh *,
+ const char *, unsigned int,
+ struct svc_export **, struct dentry **);
+__be32 nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+ struct iattr *, int, time_t);
+int nfsd_mountpoint(struct dentry *, struct svc_export *);
+#ifdef CONFIG_NFSD_V4
+__be32 nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
+ struct nfs4_acl *);
+int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
+#endif /* CONFIG_NFSD_V4 */
+__be32 nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
+#ifdef CONFIG_NFSD_V3
+__be32 nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+__be32 nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ struct svc_fh *res, int createmode,
+ u32 *verifier, int *truncp, int *created);
+__be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
+ loff_t, unsigned long);
+#endif /* CONFIG_NFSD_V3 */
+__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, int,
+ int, struct file **);
+void nfsd_close(struct file *);
+__be32 nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
+ loff_t, struct kvec *, int, unsigned long *);
+__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
+ loff_t, struct kvec *,int, unsigned long *, int *);
+__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
+ char *, int *);
+__be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, char *path, int plen,
+ struct svc_fh *res, struct iattr *);
+__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
+ char *, int, struct svc_fh *);
+__be32 nfsd_rename(struct svc_rqst *,
+ struct svc_fh *, char *, int,
+ struct svc_fh *, char *, int);
+__be32 nfsd_remove(struct svc_rqst *,
+ struct svc_fh *, char *, int);
+__be32 nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
+ char *name, int len);
+int nfsd_truncate(struct svc_rqst *, struct svc_fh *,
+ unsigned long size);
+__be32 nfsd_readdir(struct svc_rqst *, struct svc_fh *,
+ loff_t *, struct readdir_cd *, filldir_t);
+__be32 nfsd_statfs(struct svc_rqst *, struct svc_fh *,
+ struct kstatfs *, int access);
+
+int nfsd_notify_change(struct inode *, struct iattr *);
+__be32 nfsd_permission(struct svc_rqst *, struct svc_export *,
+ struct dentry *, int);
+int nfsd_sync_dir(struct dentry *dp);
+
+#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
+struct posix_acl *nfsd_get_posix_acl(struct svc_fh *, int);
+int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *);
+#endif
+
+#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
new file mode 100644
index 00000000000..53b1863dd8f
--- /dev/null
+++ b/fs/nfsd/xdr.h
@@ -0,0 +1,173 @@
+/* XDR types for nfsd. This is mainly a typing exercise. */
+
+#ifndef LINUX_NFSD_H
+#define LINUX_NFSD_H
+
+#include <linux/vfs.h>
+#include "nfsd.h"
+#include "nfsfh.h"
+
+struct nfsd_fhandle {
+ struct svc_fh fh;
+};
+
+struct nfsd_sattrargs {
+ struct svc_fh fh;
+ struct iattr attrs;
+};
+
+struct nfsd_diropargs {
+ struct svc_fh fh;
+ char * name;
+ unsigned int len;
+};
+
+struct nfsd_readargs {
+ struct svc_fh fh;
+ __u32 offset;
+ __u32 count;
+ int vlen;
+};
+
+struct nfsd_writeargs {
+ svc_fh fh;
+ __u32 offset;
+ int len;
+ int vlen;
+};
+
+struct nfsd_createargs {
+ struct svc_fh fh;
+ char * name;
+ unsigned int len;
+ struct iattr attrs;
+};
+
+struct nfsd_renameargs {
+ struct svc_fh ffh;
+ char * fname;
+ unsigned int flen;
+ struct svc_fh tfh;
+ char * tname;
+ unsigned int tlen;
+};
+
+struct nfsd_readlinkargs {
+ struct svc_fh fh;
+ char * buffer;
+};
+
+struct nfsd_linkargs {
+ struct svc_fh ffh;
+ struct svc_fh tfh;
+ char * tname;
+ unsigned int tlen;
+};
+
+struct nfsd_symlinkargs {
+ struct svc_fh ffh;
+ char * fname;
+ unsigned int flen;
+ char * tname;
+ unsigned int tlen;
+ struct iattr attrs;
+};
+
+struct nfsd_readdirargs {
+ struct svc_fh fh;
+ __u32 cookie;
+ __u32 count;
+ __be32 * buffer;
+};
+
+struct nfsd_attrstat {
+ struct svc_fh fh;
+ struct kstat stat;
+};
+
+struct nfsd_diropres {
+ struct svc_fh fh;
+ struct kstat stat;
+};
+
+struct nfsd_readlinkres {
+ int len;
+};
+
+struct nfsd_readres {
+ struct svc_fh fh;
+ unsigned long count;
+ struct kstat stat;
+};
+
+struct nfsd_readdirres {
+ int count;
+
+ struct readdir_cd common;
+ __be32 * buffer;
+ int buflen;
+ __be32 * offset;
+};
+
+struct nfsd_statfsres {
+ struct kstatfs stats;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd_xdrstore {
+ struct nfsd_sattrargs sattr;
+ struct nfsd_diropargs dirop;
+ struct nfsd_readargs read;
+ struct nfsd_writeargs write;
+ struct nfsd_createargs create;
+ struct nfsd_renameargs rename;
+ struct nfsd_linkargs link;
+ struct nfsd_symlinkargs symlink;
+ struct nfsd_readdirargs readdir;
+};
+
+#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
+
+
+int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
+ struct nfsd_sattrargs *);
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
+ struct nfsd_diropargs *);
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
+ struct nfsd_readargs *);
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
+ struct nfsd_writeargs *);
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
+ struct nfsd_createargs *);
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
+ struct nfsd_renameargs *);
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+ struct nfsd_readlinkargs *);
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
+ struct nfsd_linkargs *);
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+ struct nfsd_symlinkargs *);
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
+ struct nfsd_readdirargs *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
+
+int nfssvc_encode_entry(void *, const char *name,
+ int namlen, loff_t offset, u64 ino, unsigned int);
+
+int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+
+/* Helper functions for NFSv2 ACL code */
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+#endif /* LINUX_NFSD_H */
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
new file mode 100644
index 00000000000..7df980eb056
--- /dev/null
+++ b/fs/nfsd/xdr3.h
@@ -0,0 +1,344 @@
+/*
+ * XDR types for NFSv3 in nfsd.
+ *
+ * Copyright (C) 1996-1998, Olaf Kirch <okir@monad.swb.de>
+ */
+
+#ifndef _LINUX_NFSD_XDR3_H
+#define _LINUX_NFSD_XDR3_H
+
+#include "xdr.h"
+
+struct nfsd3_sattrargs {
+ struct svc_fh fh;
+ struct iattr attrs;
+ int check_guard;
+ time_t guardtime;
+};
+
+struct nfsd3_diropargs {
+ struct svc_fh fh;
+ char * name;
+ unsigned int len;
+};
+
+struct nfsd3_accessargs {
+ struct svc_fh fh;
+ unsigned int access;
+};
+
+struct nfsd3_readargs {
+ struct svc_fh fh;
+ __u64 offset;
+ __u32 count;
+ int vlen;
+};
+
+struct nfsd3_writeargs {
+ svc_fh fh;
+ __u64 offset;
+ __u32 count;
+ int stable;
+ __u32 len;
+ int vlen;
+};
+
+struct nfsd3_createargs {
+ struct svc_fh fh;
+ char * name;
+ unsigned int len;
+ int createmode;
+ struct iattr attrs;
+ __be32 * verf;
+};
+
+struct nfsd3_mknodargs {
+ struct svc_fh fh;
+ char * name;
+ unsigned int len;
+ __u32 ftype;
+ __u32 major, minor;
+ struct iattr attrs;
+};
+
+struct nfsd3_renameargs {
+ struct svc_fh ffh;
+ char * fname;
+ unsigned int flen;
+ struct svc_fh tfh;
+ char * tname;
+ unsigned int tlen;
+};
+
+struct nfsd3_readlinkargs {
+ struct svc_fh fh;
+ char * buffer;
+};
+
+struct nfsd3_linkargs {
+ struct svc_fh ffh;
+ struct svc_fh tfh;
+ char * tname;
+ unsigned int tlen;
+};
+
+struct nfsd3_symlinkargs {
+ struct svc_fh ffh;
+ char * fname;
+ unsigned int flen;
+ char * tname;
+ unsigned int tlen;
+ struct iattr attrs;
+};
+
+struct nfsd3_readdirargs {
+ struct svc_fh fh;
+ __u64 cookie;
+ __u32 dircount;
+ __u32 count;
+ __be32 * verf;
+ __be32 * buffer;
+};
+
+struct nfsd3_commitargs {
+ struct svc_fh fh;
+ __u64 offset;
+ __u32 count;
+};
+
+struct nfsd3_getaclargs {
+ struct svc_fh fh;
+ int mask;
+};
+
+struct posix_acl;
+struct nfsd3_setaclargs {
+ struct svc_fh fh;
+ int mask;
+ struct posix_acl *acl_access;
+ struct posix_acl *acl_default;
+};
+
+struct nfsd3_attrstat {
+ __be32 status;
+ struct svc_fh fh;
+ struct kstat stat;
+};
+
+/* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
+struct nfsd3_diropres {
+ __be32 status;
+ struct svc_fh dirfh;
+ struct svc_fh fh;
+};
+
+struct nfsd3_accessres {
+ __be32 status;
+ struct svc_fh fh;
+ __u32 access;
+};
+
+struct nfsd3_readlinkres {
+ __be32 status;
+ struct svc_fh fh;
+ __u32 len;
+};
+
+struct nfsd3_readres {
+ __be32 status;
+ struct svc_fh fh;
+ unsigned long count;
+ int eof;
+};
+
+struct nfsd3_writeres {
+ __be32 status;
+ struct svc_fh fh;
+ unsigned long count;
+ int committed;
+};
+
+struct nfsd3_renameres {
+ __be32 status;
+ struct svc_fh ffh;
+ struct svc_fh tfh;
+};
+
+struct nfsd3_linkres {
+ __be32 status;
+ struct svc_fh tfh;
+ struct svc_fh fh;
+};
+
+struct nfsd3_readdirres {
+ __be32 status;
+ struct svc_fh fh;
+ int count;
+ __be32 verf[2];
+
+ struct readdir_cd common;
+ __be32 * buffer;
+ int buflen;
+ __be32 * offset;
+ __be32 * offset1;
+ struct svc_rqst * rqstp;
+
+};
+
+struct nfsd3_fsstatres {
+ __be32 status;
+ struct kstatfs stats;
+ __u32 invarsec;
+};
+
+struct nfsd3_fsinfores {
+ __be32 status;
+ __u32 f_rtmax;
+ __u32 f_rtpref;
+ __u32 f_rtmult;
+ __u32 f_wtmax;
+ __u32 f_wtpref;
+ __u32 f_wtmult;
+ __u32 f_dtpref;
+ __u64 f_maxfilesize;
+ __u32 f_properties;
+};
+
+struct nfsd3_pathconfres {
+ __be32 status;
+ __u32 p_link_max;
+ __u32 p_name_max;
+ __u32 p_no_trunc;
+ __u32 p_chown_restricted;
+ __u32 p_case_insensitive;
+ __u32 p_case_preserving;
+};
+
+struct nfsd3_commitres {
+ __be32 status;
+ struct svc_fh fh;
+};
+
+struct nfsd3_getaclres {
+ __be32 status;
+ struct svc_fh fh;
+ int mask;
+ struct posix_acl *acl_access;
+ struct posix_acl *acl_default;
+};
+
+/* dummy type for release */
+struct nfsd3_fhandle_pair {
+ __u32 dummy;
+ struct svc_fh fh1;
+ struct svc_fh fh2;
+};
+
+/*
+ * Storage requirements for XDR arguments and results.
+ */
+union nfsd3_xdrstore {
+ struct nfsd3_sattrargs sattrargs;
+ struct nfsd3_diropargs diropargs;
+ struct nfsd3_readargs readargs;
+ struct nfsd3_writeargs writeargs;
+ struct nfsd3_createargs createargs;
+ struct nfsd3_renameargs renameargs;
+ struct nfsd3_linkargs linkargs;
+ struct nfsd3_symlinkargs symlinkargs;
+ struct nfsd3_readdirargs readdirargs;
+ struct nfsd3_diropres diropres;
+ struct nfsd3_accessres accessres;
+ struct nfsd3_readlinkres readlinkres;
+ struct nfsd3_readres readres;
+ struct nfsd3_writeres writeres;
+ struct nfsd3_renameres renameres;
+ struct nfsd3_linkres linkres;
+ struct nfsd3_readdirres readdirres;
+ struct nfsd3_fsstatres fsstatres;
+ struct nfsd3_fsinfores fsinfores;
+ struct nfsd3_pathconfres pathconfres;
+ struct nfsd3_commitres commitres;
+ struct nfsd3_getaclres getaclres;
+};
+
+#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
+
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_sattrargs *);
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_diropargs *);
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_accessargs *);
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_readargs *);
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_writeargs *);
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_createargs *);
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_createargs *);
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_mknodargs *);
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_renameargs *);
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_readlinkargs *);
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_linkargs *);
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_symlinkargs *);
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_readdirargs *);
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_readdirargs *);
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
+ struct nfsd3_commitargs *);
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
+ struct nfsd3_attrstat *);
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
+ struct nfsd3_attrstat *);
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
+ struct nfsd3_diropres *);
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
+ struct nfsd3_accessres *);
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
+ struct nfsd3_readlinkres *);
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
+ struct nfsd3_diropres *);
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
+ struct nfsd3_renameres *);
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
+ struct nfsd3_linkres *);
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
+ struct nfsd3_readdirres *);
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
+ struct nfsd3_fsstatres *);
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
+ struct nfsd3_fsinfores *);
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
+ struct nfsd3_pathconfres *);
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
+ struct nfsd3_commitres *);
+
+int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
+ struct nfsd3_attrstat *);
+int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
+ struct nfsd3_fhandle_pair *);
+int nfs3svc_encode_entry(void *, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int);
+int nfs3svc_encode_entry_plus(void *, const char *name,
+ int namlen, loff_t offset, u64 ino,
+ unsigned int);
+/* Helper functions for NFSv3 ACL code */
+__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
+ struct svc_fh *fhp);
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
+
+
+#endif /* _LINUX_NFSD_XDR3_H */
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
new file mode 100644
index 00000000000..efa33773953
--- /dev/null
+++ b/fs/nfsd/xdr4.h
@@ -0,0 +1,562 @@
+/*
+ * Server-side types for NFSv4.
+ *
+ * Copyright (c) 2002 The Regents of the University of Michigan.
+ * All rights reserved.
+ *
+ * Kendrick Smith <kmsmith@umich.edu>
+ * Andy Adamson <andros@umich.edu>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _LINUX_NFSD_XDR4_H
+#define _LINUX_NFSD_XDR4_H
+
+#include "state.h"
+#include "nfsd.h"
+
+#define NFSD4_MAX_TAGLEN 128
+#define XDR_LEN(n) (((n) + 3) & ~3)
+
+struct nfsd4_compound_state {
+ struct svc_fh current_fh;
+ struct svc_fh save_fh;
+ struct nfs4_stateowner *replay_owner;
+ /* For sessions DRC */
+ struct nfsd4_session *session;
+ struct nfsd4_slot *slot;
+ __be32 *datap;
+ size_t iovlen;
+ u32 minorversion;
+ u32 status;
+};
+
+static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
+{
+ return cs->slot != NULL;
+}
+
+struct nfsd4_change_info {
+ u32 atomic;
+ bool change_supported;
+ u32 before_ctime_sec;
+ u32 before_ctime_nsec;
+ u64 before_change;
+ u32 after_ctime_sec;
+ u32 after_ctime_nsec;
+ u64 after_change;
+};
+
+struct nfsd4_access {
+ u32 ac_req_access; /* request */
+ u32 ac_supported; /* response */
+ u32 ac_resp_access; /* response */
+};
+
+struct nfsd4_close {
+ u32 cl_seqid; /* request */
+ stateid_t cl_stateid; /* request+response */
+ struct nfs4_stateowner * cl_stateowner; /* response */
+};
+
+struct nfsd4_commit {
+ u64 co_offset; /* request */
+ u32 co_count; /* request */
+ nfs4_verifier co_verf; /* response */
+};
+
+struct nfsd4_create {
+ u32 cr_namelen; /* request */
+ char * cr_name; /* request */
+ u32 cr_type; /* request */
+ union { /* request */
+ struct {
+ u32 namelen;
+ char *name;
+ } link; /* NF4LNK */
+ struct {
+ u32 specdata1;
+ u32 specdata2;
+ } dev; /* NF4BLK, NF4CHR */
+ } u;
+ u32 cr_bmval[3]; /* request */
+ struct iattr cr_iattr; /* request */
+ struct nfsd4_change_info cr_cinfo; /* response */
+ struct nfs4_acl *cr_acl;
+};
+#define cr_linklen u.link.namelen
+#define cr_linkname u.link.name
+#define cr_specdata1 u.dev.specdata1
+#define cr_specdata2 u.dev.specdata2
+
+struct nfsd4_delegreturn {
+ stateid_t dr_stateid;
+};
+
+struct nfsd4_getattr {
+ u32 ga_bmval[3]; /* request */
+ struct svc_fh *ga_fhp; /* response */
+};
+
+struct nfsd4_link {
+ u32 li_namelen; /* request */
+ char * li_name; /* request */
+ struct nfsd4_change_info li_cinfo; /* response */
+};
+
+struct nfsd4_lock_denied {
+ clientid_t ld_clientid;
+ struct nfs4_stateowner *ld_sop;
+ u64 ld_start;
+ u64 ld_length;
+ u32 ld_type;
+};
+
+struct nfsd4_lock {
+ /* request */
+ u32 lk_type;
+ u32 lk_reclaim; /* boolean */
+ u64 lk_offset;
+ u64 lk_length;
+ u32 lk_is_new;
+ union {
+ struct {
+ u32 open_seqid;
+ stateid_t open_stateid;
+ u32 lock_seqid;
+ clientid_t clientid;
+ struct xdr_netobj owner;
+ } new;
+ struct {
+ stateid_t lock_stateid;
+ u32 lock_seqid;
+ } old;
+ } v;
+
+ /* response */
+ union {
+ struct {
+ stateid_t stateid;
+ } ok;
+ struct nfsd4_lock_denied denied;
+ } u;
+ /* The lk_replay_owner is the open owner in the open_to_lock_owner
+ * case and the lock owner otherwise: */
+ struct nfs4_stateowner *lk_replay_owner;
+};
+#define lk_new_open_seqid v.new.open_seqid
+#define lk_new_open_stateid v.new.open_stateid
+#define lk_new_lock_seqid v.new.lock_seqid
+#define lk_new_clientid v.new.clientid
+#define lk_new_owner v.new.owner
+#define lk_old_lock_stateid v.old.lock_stateid
+#define lk_old_lock_seqid v.old.lock_seqid
+
+#define lk_rflags u.ok.rflags
+#define lk_resp_stateid u.ok.stateid
+#define lk_denied u.denied
+
+
+struct nfsd4_lockt {
+ u32 lt_type;
+ clientid_t lt_clientid;
+ struct xdr_netobj lt_owner;
+ u64 lt_offset;
+ u64 lt_length;
+ struct nfs4_stateowner * lt_stateowner;
+ struct nfsd4_lock_denied lt_denied;
+};
+
+
+struct nfsd4_locku {
+ u32 lu_type;
+ u32 lu_seqid;
+ stateid_t lu_stateid;
+ u64 lu_offset;
+ u64 lu_length;
+ struct nfs4_stateowner *lu_stateowner;
+};
+
+
+struct nfsd4_lookup {
+ u32 lo_len; /* request */
+ char * lo_name; /* request */
+};
+
+struct nfsd4_putfh {
+ u32 pf_fhlen; /* request */
+ char *pf_fhval; /* request */
+};
+
+struct nfsd4_open {
+ u32 op_claim_type; /* request */
+ struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
+ u32 op_delegate_type; /* request - CLAIM_PREV only */
+ stateid_t op_delegate_stateid; /* request - response */
+ u32 op_create; /* request */
+ u32 op_createmode; /* request */
+ u32 op_bmval[3]; /* request */
+ struct iattr iattr; /* UNCHECKED4, GUARDED4, EXCLUSIVE4_1 */
+ nfs4_verifier verf; /* EXCLUSIVE4 */
+ clientid_t op_clientid; /* request */
+ struct xdr_netobj op_owner; /* request */
+ u32 op_seqid; /* request */
+ u32 op_share_access; /* request */
+ u32 op_share_deny; /* request */
+ stateid_t op_stateid; /* response */
+ u32 op_recall; /* recall */
+ struct nfsd4_change_info op_cinfo; /* response */
+ u32 op_rflags; /* response */
+ int op_truncate; /* used during processing */
+ struct nfs4_stateowner *op_stateowner; /* used during processing */
+ struct nfs4_acl *op_acl;
+};
+#define op_iattr iattr
+#define op_verf verf
+
+struct nfsd4_open_confirm {
+ stateid_t oc_req_stateid /* request */;
+ u32 oc_seqid /* request */;
+ stateid_t oc_resp_stateid /* response */;
+ struct nfs4_stateowner * oc_stateowner; /* response */
+};
+
+struct nfsd4_open_downgrade {
+ stateid_t od_stateid;
+ u32 od_seqid;
+ u32 od_share_access;
+ u32 od_share_deny;
+ struct nfs4_stateowner *od_stateowner;
+};
+
+
+struct nfsd4_read {
+ stateid_t rd_stateid; /* request */
+ u64 rd_offset; /* request */
+ u32 rd_length; /* request */
+ int rd_vlen;
+ struct file *rd_filp;
+
+ struct svc_rqst *rd_rqstp; /* response */
+ struct svc_fh * rd_fhp; /* response */
+};
+
+struct nfsd4_readdir {
+ u64 rd_cookie; /* request */
+ nfs4_verifier rd_verf; /* request */
+ u32 rd_dircount; /* request */
+ u32 rd_maxcount; /* request */
+ u32 rd_bmval[3]; /* request */
+ struct svc_rqst *rd_rqstp; /* response */
+ struct svc_fh * rd_fhp; /* response */
+
+ struct readdir_cd common;
+ __be32 * buffer;
+ int buflen;
+ __be32 * offset;
+};
+
+struct nfsd4_release_lockowner {
+ clientid_t rl_clientid;
+ struct xdr_netobj rl_owner;
+};
+struct nfsd4_readlink {
+ struct svc_rqst *rl_rqstp; /* request */
+ struct svc_fh * rl_fhp; /* request */
+};
+
+struct nfsd4_remove {
+ u32 rm_namelen; /* request */
+ char * rm_name; /* request */
+ struct nfsd4_change_info rm_cinfo; /* response */
+};
+
+struct nfsd4_rename {
+ u32 rn_snamelen; /* request */
+ char * rn_sname; /* request */
+ u32 rn_tnamelen; /* request */
+ char * rn_tname; /* request */
+ struct nfsd4_change_info rn_sinfo; /* response */
+ struct nfsd4_change_info rn_tinfo; /* response */
+};
+
+struct nfsd4_secinfo {
+ u32 si_namelen; /* request */
+ char *si_name; /* request */
+ struct svc_export *si_exp; /* response */
+};
+
+struct nfsd4_setattr {
+ stateid_t sa_stateid; /* request */
+ u32 sa_bmval[3]; /* request */
+ struct iattr sa_iattr; /* request */
+ struct nfs4_acl *sa_acl;
+};
+
+struct nfsd4_setclientid {
+ nfs4_verifier se_verf; /* request */
+ u32 se_namelen; /* request */
+ char * se_name; /* request */
+ u32 se_callback_prog; /* request */
+ u32 se_callback_netid_len; /* request */
+ char * se_callback_netid_val; /* request */
+ u32 se_callback_addr_len; /* request */
+ char * se_callback_addr_val; /* request */
+ u32 se_callback_ident; /* request */
+ clientid_t se_clientid; /* response */
+ nfs4_verifier se_confirm; /* response */
+};
+
+struct nfsd4_setclientid_confirm {
+ clientid_t sc_clientid;
+ nfs4_verifier sc_confirm;
+};
+
+/* also used for NVERIFY */
+struct nfsd4_verify {
+ u32 ve_bmval[3]; /* request */
+ u32 ve_attrlen; /* request */
+ char * ve_attrval; /* request */
+};
+
+struct nfsd4_write {
+ stateid_t wr_stateid; /* request */
+ u64 wr_offset; /* request */
+ u32 wr_stable_how; /* request */
+ u32 wr_buflen; /* request */
+ int wr_vlen;
+
+ u32 wr_bytes_written; /* response */
+ u32 wr_how_written; /* response */
+ nfs4_verifier wr_verifier; /* response */
+};
+
+struct nfsd4_exchange_id {
+ nfs4_verifier verifier;
+ struct xdr_netobj clname;
+ u32 flags;
+ clientid_t clientid;
+ u32 seqid;
+ int spa_how;
+};
+
+struct nfsd4_sequence {
+ struct nfs4_sessionid sessionid; /* request/response */
+ u32 seqid; /* request/response */
+ u32 slotid; /* request/response */
+ u32 maxslots; /* request/response */
+ u32 cachethis; /* request */
+#if 0
+ u32 target_maxslots; /* response */
+ u32 status_flags; /* response */
+#endif /* not yet */
+};
+
+struct nfsd4_destroy_session {
+ struct nfs4_sessionid sessionid;
+};
+
+struct nfsd4_op {
+ int opnum;
+ __be32 status;
+ union {
+ struct nfsd4_access access;
+ struct nfsd4_close close;
+ struct nfsd4_commit commit;
+ struct nfsd4_create create;
+ struct nfsd4_delegreturn delegreturn;
+ struct nfsd4_getattr getattr;
+ struct svc_fh * getfh;
+ struct nfsd4_link link;
+ struct nfsd4_lock lock;
+ struct nfsd4_lockt lockt;
+ struct nfsd4_locku locku;
+ struct nfsd4_lookup lookup;
+ struct nfsd4_verify nverify;
+ struct nfsd4_open open;
+ struct nfsd4_open_confirm open_confirm;
+ struct nfsd4_open_downgrade open_downgrade;
+ struct nfsd4_putfh putfh;
+ struct nfsd4_read read;
+ struct nfsd4_readdir readdir;
+ struct nfsd4_readlink readlink;
+ struct nfsd4_remove remove;
+ struct nfsd4_rename rename;
+ clientid_t renew;
+ struct nfsd4_secinfo secinfo;
+ struct nfsd4_setattr setattr;
+ struct nfsd4_setclientid setclientid;
+ struct nfsd4_setclientid_confirm setclientid_confirm;
+ struct nfsd4_verify verify;
+ struct nfsd4_write write;
+ struct nfsd4_release_lockowner release_lockowner;
+
+ /* NFSv4.1 */
+ struct nfsd4_exchange_id exchange_id;
+ struct nfsd4_create_session create_session;
+ struct nfsd4_destroy_session destroy_session;
+ struct nfsd4_sequence sequence;
+ } u;
+ struct nfs4_replay * replay;
+};
+
+struct nfsd4_compoundargs {
+ /* scratch variables for XDR decode */
+ __be32 * p;
+ __be32 * end;
+ struct page ** pagelist;
+ int pagelen;
+ __be32 tmp[8];
+ __be32 * tmpp;
+ struct tmpbuf {
+ struct tmpbuf *next;
+ void (*release)(const void *);
+ void *buf;
+ } *to_free;
+
+ struct svc_rqst *rqstp;
+
+ u32 taglen;
+ char * tag;
+ u32 minorversion;
+ u32 opcnt;
+ struct nfsd4_op *ops;
+ struct nfsd4_op iops[8];
+};
+
+struct nfsd4_compoundres {
+ /* scratch variables for XDR encode */
+ __be32 * p;
+ __be32 * end;
+ struct xdr_buf * xbuf;
+ struct svc_rqst * rqstp;
+
+ u32 taglen;
+ char * tag;
+ u32 opcnt;
+ __be32 * tagp; /* tag, opcount encode location */
+ struct nfsd4_compound_state cstate;
+};
+
+static inline bool nfsd4_is_solo_sequence(struct nfsd4_compoundres *resp)
+{
+ struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
+ return resp->opcnt == 1 && args->ops[0].opnum == OP_SEQUENCE;
+}
+
+static inline bool nfsd4_not_cached(struct nfsd4_compoundres *resp)
+{
+ return !resp->cstate.slot->sl_cachethis || nfsd4_is_solo_sequence(resp);
+}
+
+#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
+
+static inline void
+set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
+{
+ BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
+ cinfo->atomic = 1;
+ cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
+ if (cinfo->change_supported) {
+ cinfo->before_change = fhp->fh_pre_change;
+ cinfo->after_change = fhp->fh_post_change;
+ } else {
+ cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
+ cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
+ cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
+ cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
+ }
+}
+
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
+ struct nfsd4_compoundargs *);
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
+ struct nfsd4_compoundres *);
+void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
+void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
+__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry, __be32 *buffer, int *countp,
+ u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+ struct nfsd4_setclientid *setclid);
+extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+ struct nfsd4_setclientid_confirm *setclientid_confirm);
+extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp);
+extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
+ struct nfsd4_sequence *seq);
+extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+struct nfsd4_exchange_id *);
+ extern __be32 nfsd4_create_session(struct svc_rqst *,
+ struct nfsd4_compound_state *,
+ struct nfsd4_create_session *);
+extern __be32 nfsd4_sequence(struct svc_rqst *,
+ struct nfsd4_compound_state *,
+ struct nfsd4_sequence *);
+extern __be32 nfsd4_destroy_session(struct svc_rqst *,
+ struct nfsd4_compound_state *,
+ struct nfsd4_destroy_session *);
+extern __be32 nfsd4_process_open1(struct nfsd4_compound_state *,
+ struct nfsd4_open *open);
+extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
+ struct svc_fh *current_fh, struct nfsd4_open *open);
+extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *, struct nfsd4_open_confirm *oc);
+extern __be32 nfsd4_close(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+ struct nfsd4_close *close);
+extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+ struct nfsd4_open_downgrade *od);
+extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *,
+ struct nfsd4_lock *lock);
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+ struct nfsd4_lockt *lockt);
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+ struct nfsd4_locku *locku);
+extern __be32
+nfsd4_release_lockowner(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *,
+ struct nfsd4_release_lockowner *rlockowner);
+extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
+extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
+extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
+ struct nfsd4_compound_state *, clientid_t *clid);
+#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 8
+ * End:
+ */
diff --git a/fs/nilfs2/Kconfig b/fs/nilfs2/Kconfig
index 1225af7b216..251da07b2a1 100644
--- a/fs/nilfs2/Kconfig
+++ b/fs/nilfs2/Kconfig
@@ -2,7 +2,6 @@ config NILFS2_FS
tristate "NILFS2 file system support (EXPERIMENTAL)"
depends on EXPERIMENTAL
select CRC32
- select FS_JOURNAL_INFO
help
NILFS2 is a log-structured file system (LFS) supporting continuous
snapshotting. In addition to versioning capability of the entire
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index f4a14ea2ed9..effdbdbe6c1 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -417,8 +417,8 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
bmap->b_inode->i_blkbits);
- for (pbh = page_buffers(bh->b_page); pbh != bh;
- pbh = pbh->b_this_page, key++);
+ for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
+ key++;
return key;
}
diff --git a/fs/nilfs2/cpfile.c b/fs/nilfs2/cpfile.c
index d5ad54e204a..18737818db6 100644
--- a/fs/nilfs2/cpfile.c
+++ b/fs/nilfs2/cpfile.c
@@ -328,19 +328,24 @@ int nilfs_cpfile_delete_checkpoints(struct inode *cpfile,
tnicps += nicps;
nilfs_mdt_mark_buffer_dirty(cp_bh);
nilfs_mdt_mark_dirty(cpfile);
- if (!nilfs_cpfile_is_in_first(cpfile, cno) &&
- (count = nilfs_cpfile_block_sub_valid_checkpoints(
- cpfile, cp_bh, kaddr, nicps)) == 0) {
- /* make hole */
- kunmap_atomic(kaddr, KM_USER0);
- brelse(cp_bh);
- ret = nilfs_cpfile_delete_checkpoint_block(
- cpfile, cno);
- if (ret == 0)
- continue;
- printk(KERN_ERR "%s: cannot delete block\n",
- __func__);
- break;
+ if (!nilfs_cpfile_is_in_first(cpfile, cno)) {
+ count =
+ nilfs_cpfile_block_sub_valid_checkpoints(
+ cpfile, cp_bh, kaddr, nicps);
+ if (count == 0) {
+ /* make hole */
+ kunmap_atomic(kaddr, KM_USER0);
+ brelse(cp_bh);
+ ret =
+ nilfs_cpfile_delete_checkpoint_block(
+ cpfile, cno);
+ if (ret == 0)
+ continue;
+ printk(KERN_ERR
+ "%s: cannot delete block\n",
+ __func__);
+ break;
+ }
}
}
diff --git a/fs/nilfs2/direct.c b/fs/nilfs2/direct.c
index d369ac71827..236753df5cd 100644
--- a/fs/nilfs2/direct.c
+++ b/fs/nilfs2/direct.c
@@ -51,11 +51,11 @@ static int nilfs_direct_lookup(const struct nilfs_bmap *bmap,
struct nilfs_direct *direct;
__u64 ptr;
- direct = (struct nilfs_direct *)bmap;
- if ((key > NILFS_DIRECT_KEY_MAX) ||
- (level != 1) || /* XXX: use macro for level 1 */
- ((ptr = nilfs_direct_get_ptr(direct, key)) ==
- NILFS_BMAP_INVALID_PTR))
+ direct = (struct nilfs_direct *)bmap; /* XXX: use macro for level 1 */
+ if (key > NILFS_DIRECT_KEY_MAX || level != 1)
+ return -ENOENT;
+ ptr = nilfs_direct_get_ptr(direct, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
return -ENOENT;
if (ptrp != NULL)
@@ -73,9 +73,10 @@ static int nilfs_direct_lookup_contig(const struct nilfs_bmap *bmap,
sector_t blocknr;
int ret, cnt;
- if (key > NILFS_DIRECT_KEY_MAX ||
- (ptr = nilfs_direct_get_ptr(direct, key)) ==
- NILFS_BMAP_INVALID_PTR)
+ if (key > NILFS_DIRECT_KEY_MAX)
+ return -ENOENT;
+ ptr = nilfs_direct_get_ptr(direct, key);
+ if (ptr == NILFS_BMAP_INVALID_PTR)
return -ENOENT;
if (NILFS_BMAP_USE_VBN(bmap)) {
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index f6af76042d8..d6b2b83de36 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -480,7 +480,7 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
unsigned int cmd, void __user *argp)
{
struct nilfs_argv argv[5];
- const static size_t argsz[5] = {
+ static const size_t argsz[5] = {
sizeof(struct nilfs_vdesc),
sizeof(struct nilfs_period),
sizeof(__u64),
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 17584c52448..105b508b47a 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2829,7 +2829,7 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci)
|| sci->sc_seq_request != sci->sc_seq_done);
spin_unlock(&sci->sc_state_lock);
- if (flag || nilfs_segctor_confirm(sci))
+ if (flag || !nilfs_segctor_confirm(sci))
nilfs_segctor_write_out(sci);
WARN_ON(!list_empty(&sci->sc_copied_buffers));
diff --git a/fs/nilfs2/super.c b/fs/nilfs2/super.c
index 5403b3ef3a4..8173faee31e 100644
--- a/fs/nilfs2/super.c
+++ b/fs/nilfs2/super.c
@@ -1118,8 +1118,7 @@ nilfs_get_sb(struct file_system_type *fs_type, int flags,
/* Abandoning the newly allocated superblock */
mutex_unlock(&nilfs->ns_mount_mutex);
put_nilfs(nilfs);
- up_write(&s->s_umount);
- deactivate_super(s);
+ deactivate_locked_super(s);
/*
* deactivate_super() invokes close_bdev_exclusive().
* We must finish all post-cleaning before this call;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index c9ee67b442e..1afb0a10229 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -121,7 +121,7 @@ static int idr_callback(int id, void *p, void *data)
if (warned)
return 0;
- warned = false;
+ warned = true;
entry = p;
ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry);
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 5ef5f365a5c..a94e8bd8eb1 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -552,7 +552,7 @@ retry:
spin_lock(&group->inotify_data.idr_lock);
ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry,
- group->inotify_data.last_wd,
+ group->inotify_data.last_wd+1,
&tmp_ientry->wd);
spin_unlock(&group->inotify_data.idr_lock);
if (ret) {
@@ -632,7 +632,7 @@ static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsign
spin_lock_init(&group->inotify_data.idr_lock);
idr_init(&group->inotify_data.idr);
- group->inotify_data.last_wd = 1;
+ group->inotify_data.last_wd = 0;
group->inotify_data.user = user;
group->inotify_data.fa = NULL;
@@ -646,6 +646,7 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
struct fsnotify_group *group;
struct user_struct *user;
struct file *filp;
+ struct path path;
int fd, ret;
/* Check the IN_* constants for consistency. */
@@ -659,12 +660,6 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
if (fd < 0)
return fd;
- filp = get_empty_filp();
- if (!filp) {
- ret = -ENFILE;
- goto out_put_fd;
- }
-
user = get_current_user();
if (unlikely(atomic_read(&user->inotify_devs) >=
inotify_max_user_instances)) {
@@ -679,24 +674,28 @@ SYSCALL_DEFINE1(inotify_init1, int, flags)
goto out_free_uid;
}
- filp->f_op = &inotify_fops;
- filp->f_path.mnt = mntget(inotify_mnt);
- filp->f_path.dentry = dget(inotify_mnt->mnt_root);
- filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
- filp->f_mode = FMODE_READ;
+ atomic_inc(&user->inotify_devs);
+
+ path.mnt = inotify_mnt;
+ path.dentry = inotify_mnt->mnt_root;
+ path_get(&path);
+ filp = alloc_file(&path, FMODE_READ, &inotify_fops);
+ if (!filp)
+ goto Enfile;
+
filp->f_flags = O_RDONLY | (flags & O_NONBLOCK);
filp->private_data = group;
- atomic_inc(&user->inotify_devs);
-
fd_install(fd, filp);
return fd;
+Enfile:
+ ret = -ENFILE;
+ path_put(&path);
+ atomic_dec(&user->inotify_devs);
out_free_uid:
free_uid(user);
- put_filp(filp);
-out_put_fd:
put_unused_fd(fd);
return ret;
}
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index 9938034762c..dc2505abb6d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -530,7 +530,7 @@ err_corrupt_attr:
* the ntfs inode.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
* i_flags is set to 0 and we have no business touching it. Only an ioctl()
* is allowed to write to them. We should of course be honouring them but
@@ -1207,7 +1207,7 @@ err_out:
* necessary fields in @vi as well as initializing the ntfs inode.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
*
* Return 0 on success and -errno on error. In the error case, the inode will
@@ -1474,7 +1474,7 @@ err_out:
* normal directory inodes.
*
* Q: What locks are held when the function is called?
- * A: i_state has I_LOCK set, hence the inode is locked, also
+ * A: i_state has I_NEW set, hence the inode is locked, also
* i_count is set to 1, so it is not going to go away
*
* Return 0 on success and -errno on error. In the error case, the inode will
diff --git a/fs/ocfs2/Kconfig b/fs/ocfs2/Kconfig
index 701b7a3a872..0d840669698 100644
--- a/fs/ocfs2/Kconfig
+++ b/fs/ocfs2/Kconfig
@@ -6,6 +6,7 @@ config OCFS2_FS
select CRC32
select QUOTA
select QUOTA_TREE
+ select FS_POSIX_ACL
help
OCFS2 is a general purpose extent based shared disk cluster file
system with many similarities to ext3. It supports 64 bit inode
@@ -74,12 +75,3 @@ config OCFS2_DEBUG_FS
This option will enable expensive consistency checks. Enable
this option for debugging only as it is likely to decrease
performance of the filesystem.
-
-config OCFS2_FS_POSIX_ACL
- bool "OCFS2 POSIX Access Control Lists"
- depends on OCFS2_FS
- select FS_POSIX_ACL
- default n
- help
- Posix Access Control Lists (ACLs) support permissions for users and
- groups beyond the owner/group/world scheme.
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index 31f25ce32c9..600d2d2ade1 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -39,11 +39,8 @@ ocfs2-objs := \
ver.o \
quota_local.o \
quota_global.o \
- xattr.o
-
-ifeq ($(CONFIG_OCFS2_FS_POSIX_ACL),y)
-ocfs2-objs += acl.o
-endif
+ xattr.o \
+ acl.o
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index fbeaec76210..0501974bedd 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -98,15 +98,11 @@ static struct posix_acl *ocfs2_get_acl_nolock(struct inode *inode,
int type,
struct buffer_head *di_bh)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int name_index;
char *value = NULL;
struct posix_acl *acl;
int retval;
- if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
- return NULL;
-
switch (type) {
case ACL_TYPE_ACCESS:
name_index = OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS;
@@ -331,13 +327,14 @@ cleanup:
return ret;
}
-static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_access(struct dentry *dentry,
char *list,
size_t list_len,
const char *name,
- size_t name_len)
+ size_t name_len,
+ int type)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -348,13 +345,14 @@ static size_t ocfs2_xattr_list_acl_access(struct inode *inode,
return size;
}
-static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
+static size_t ocfs2_xattr_list_acl_default(struct dentry *dentry,
char *list,
size_t list_len,
const char *name,
- size_t name_len)
+ size_t name_len,
+ int type)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
@@ -365,19 +363,19 @@ static size_t ocfs2_xattr_list_acl_default(struct inode *inode,
return size;
}
-static int ocfs2_xattr_get_acl(struct inode *inode,
- int type,
- void *buffer,
- size_t size)
+static int ocfs2_xattr_get_acl(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
struct posix_acl *acl;
int ret;
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return -EOPNOTSUPP;
- acl = ocfs2_get_acl(inode, type);
+ acl = ocfs2_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -388,35 +386,16 @@ static int ocfs2_xattr_get_acl(struct inode *inode,
return ret;
}
-static int ocfs2_xattr_get_acl_access(struct inode *inode,
- const char *name,
- void *buffer,
- size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ocfs2_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int ocfs2_xattr_get_acl_default(struct inode *inode,
- const char *name,
- void *buffer,
- size_t size)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ocfs2_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int ocfs2_xattr_set_acl(struct inode *inode,
- int type,
- const void *value,
- size_t size)
+static int ocfs2_xattr_set_acl(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
+ struct inode *inode = dentry->d_inode;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct posix_acl *acl;
int ret = 0;
+ if (strcmp(name, "") != 0)
+ return -EINVAL;
if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
return -EOPNOTSUPP;
@@ -442,38 +421,18 @@ cleanup:
return ret;
}
-static int ocfs2_xattr_set_acl_access(struct inode *inode,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ocfs2_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static int ocfs2_xattr_set_acl_default(struct inode *inode,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- if (strcmp(name, "") != 0)
- return -EINVAL;
- return ocfs2_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
struct xattr_handler ocfs2_xattr_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
.list = ocfs2_xattr_list_acl_access,
- .get = ocfs2_xattr_get_acl_access,
- .set = ocfs2_xattr_set_acl_access,
+ .get = ocfs2_xattr_get_acl,
+ .set = ocfs2_xattr_set_acl,
};
struct xattr_handler ocfs2_xattr_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
.list = ocfs2_xattr_list_acl_default,
- .get = ocfs2_xattr_get_acl_default,
- .set = ocfs2_xattr_set_acl_default,
+ .get = ocfs2_xattr_get_acl,
+ .set = ocfs2_xattr_set_acl,
};
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 8f6389ed4da..5c5d31f0585 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -26,8 +26,6 @@ struct ocfs2_acl_entry {
__le32 e_id;
};
-#ifdef CONFIG_OCFS2_FS_POSIX_ACL
-
extern int ocfs2_check_acl(struct inode *, int);
extern int ocfs2_acl_chmod(struct inode *);
extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
@@ -35,24 +33,4 @@ extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
struct ocfs2_alloc_context *,
struct ocfs2_alloc_context *);
-#else /* CONFIG_OCFS2_FS_POSIX_ACL*/
-
-#define ocfs2_check_acl NULL
-static inline int ocfs2_acl_chmod(struct inode *inode)
-{
- return 0;
-}
-static inline int ocfs2_init_acl(handle_t *handle,
- struct inode *inode,
- struct inode *dir,
- struct buffer_head *di_bh,
- struct buffer_head *dir_bh,
- struct ocfs2_alloc_context *meta_ac,
- struct ocfs2_alloc_context *data_ac)
-{
- return 0;
-}
-
-#endif /* CONFIG_OCFS2_FS_POSIX_ACL*/
-
#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index 7c7198a5bc9..d17bdc718f7 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -1765,9 +1765,9 @@ set_and_inc:
*
* The array index of the subtree root is passed back.
*/
-static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
- struct ocfs2_path *left,
- struct ocfs2_path *right)
+int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
+ struct ocfs2_path *left,
+ struct ocfs2_path *right)
{
int i = 0;
@@ -2872,8 +2872,8 @@ out:
* This looks similar, but is subtly different to
* ocfs2_find_cpos_for_left_leaf().
*/
-static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
- struct ocfs2_path *path, u32 *cpos)
+int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
+ struct ocfs2_path *path, u32 *cpos)
{
int i, j, ret = 0;
u64 blkno;
@@ -7190,8 +7190,8 @@ int ocfs2_zero_range_for_truncate(struct inode *inode, handle_t *handle,
* wait on them - the truncate_inode_pages() call later will
* do that for us.
*/
- ret = do_sync_mapping_range(inode->i_mapping, range_start,
- range_end - 1, SYNC_FILE_RANGE_WRITE);
+ ret = filemap_fdatawrite_range(inode->i_mapping, range_start,
+ range_end - 1);
if (ret)
mlog_errno(ret);
diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
index 9c122d57446..1db4359ccb9 100644
--- a/fs/ocfs2/alloc.h
+++ b/fs/ocfs2/alloc.h
@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle,
int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
handle_t *handle,
struct ocfs2_path *path);
+int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
+ struct ocfs2_path *path, u32 *cpos);
+int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
+ struct ocfs2_path *left,
+ struct ocfs2_path *right);
#endif /* OCFS2_ALLOC_H */
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 3dae4a13f6e..7e9df11260f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -599,7 +599,7 @@ bail:
return ret;
}
-/*
+/*
* ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
* particularly interested in the aio/dio case. Like the core uses
* i_alloc_sem, we use the rw_lock DLM lock to protect io on one node from
@@ -670,7 +670,7 @@ static ssize_t ocfs2_direct_IO(int rw,
ret = blockdev_direct_IO_no_locking(rw, iocb, inode,
inode->i_sb->s_bdev, iov, offset,
- nr_segs,
+ nr_segs,
ocfs2_direct_IO_get_blocks,
ocfs2_dio_end_io);
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c
index d43d34a1dd3..21c808f752d 100644
--- a/fs/ocfs2/buffer_head_io.c
+++ b/fs/ocfs2/buffer_head_io.c
@@ -368,7 +368,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr,
}
ocfs2_metadata_cache_io_unlock(ci);
- mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
+ mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
(unsigned long long)block, nr,
((flags & OCFS2_BH_IGNORE_CACHE) || ignore_cache) ? "no" : "yes",
flags);
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index c452d116b89..5c989000670 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -78,7 +78,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
-/* Only sets a new threshold if there are no active regions.
+/* Only sets a new threshold if there are no active regions.
*
* No locking or otherwise interesting code is required for reading
* o2hb_dead_threshold as it can't change once regions are active and
@@ -170,13 +170,14 @@ static void o2hb_write_timeout(struct work_struct *work)
mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
"milliseconds\n", reg->hr_dev_name,
- jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
+ jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
o2quo_disk_timeout();
}
static void o2hb_arm_write_timeout(struct o2hb_region *reg)
{
- mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS);
+ mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
+ O2HB_MAX_WRITE_TIMEOUT_MS);
cancel_delayed_work(&reg->hr_write_timeout_work);
reg->hr_last_timeout_start = jiffies;
@@ -623,7 +624,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
"seq %llu last %llu changed %u equal %u\n",
slot->ds_node_num, (long long)slot->ds_last_generation,
le32_to_cpu(hb_block->hb_cksum),
- (unsigned long long)le64_to_cpu(hb_block->hb_seq),
+ (unsigned long long)le64_to_cpu(hb_block->hb_seq),
(unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
slot->ds_equal_samples);
@@ -874,7 +875,8 @@ static int o2hb_thread(void *data)
do_gettimeofday(&after_hb);
elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
- mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
+ mlog(ML_HEARTBEAT,
+ "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
elapsed_msec);
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index 7ee6188bc79..c81142e3ef8 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -35,6 +35,10 @@
* cluster references throughout where nodes are looked up */
struct o2nm_cluster *o2nm_single_cluster = NULL;
+char *o2nm_fence_method_desc[O2NM_FENCE_METHODS] = {
+ "reset", /* O2NM_FENCE_RESET */
+ "panic", /* O2NM_FENCE_PANIC */
+};
struct o2nm_node *o2nm_get_node_by_num(u8 node_num)
{
@@ -579,6 +583,43 @@ static ssize_t o2nm_cluster_attr_reconnect_delay_ms_write(
return o2nm_cluster_attr_write(page, count,
&cluster->cl_reconnect_delay_ms);
}
+
+static ssize_t o2nm_cluster_attr_fence_method_read(
+ struct o2nm_cluster *cluster, char *page)
+{
+ ssize_t ret = 0;
+
+ if (cluster)
+ ret = sprintf(page, "%s\n",
+ o2nm_fence_method_desc[cluster->cl_fence_method]);
+ return ret;
+}
+
+static ssize_t o2nm_cluster_attr_fence_method_write(
+ struct o2nm_cluster *cluster, const char *page, size_t count)
+{
+ unsigned int i;
+
+ if (page[count - 1] != '\n')
+ goto bail;
+
+ for (i = 0; i < O2NM_FENCE_METHODS; ++i) {
+ if (count != strlen(o2nm_fence_method_desc[i]) + 1)
+ continue;
+ if (strncasecmp(page, o2nm_fence_method_desc[i], count - 1))
+ continue;
+ if (cluster->cl_fence_method != i) {
+ printk(KERN_INFO "ocfs2: Changing fence method to %s\n",
+ o2nm_fence_method_desc[i]);
+ cluster->cl_fence_method = i;
+ }
+ return count;
+ }
+
+bail:
+ return -EINVAL;
+}
+
static struct o2nm_cluster_attribute o2nm_cluster_attr_idle_timeout_ms = {
.attr = { .ca_owner = THIS_MODULE,
.ca_name = "idle_timeout_ms",
@@ -603,10 +644,19 @@ static struct o2nm_cluster_attribute o2nm_cluster_attr_reconnect_delay_ms = {
.store = o2nm_cluster_attr_reconnect_delay_ms_write,
};
+static struct o2nm_cluster_attribute o2nm_cluster_attr_fence_method = {
+ .attr = { .ca_owner = THIS_MODULE,
+ .ca_name = "fence_method",
+ .ca_mode = S_IRUGO | S_IWUSR },
+ .show = o2nm_cluster_attr_fence_method_read,
+ .store = o2nm_cluster_attr_fence_method_write,
+};
+
static struct configfs_attribute *o2nm_cluster_attrs[] = {
&o2nm_cluster_attr_idle_timeout_ms.attr,
&o2nm_cluster_attr_keepalive_delay_ms.attr,
&o2nm_cluster_attr_reconnect_delay_ms.attr,
+ &o2nm_cluster_attr_fence_method.attr,
NULL,
};
static ssize_t o2nm_cluster_show(struct config_item *item,
@@ -778,6 +828,7 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
cluster->cl_idle_timeout_ms = O2NET_IDLE_TIMEOUT_MS_DEFAULT;
cluster->cl_keepalive_delay_ms = O2NET_KEEPALIVE_DELAY_MS_DEFAULT;
+ cluster->cl_fence_method = O2NM_FENCE_RESET;
ret = &cluster->cl_group;
o2nm_single_cluster = cluster;
diff --git a/fs/ocfs2/cluster/nodemanager.h b/fs/ocfs2/cluster/nodemanager.h
index c992ea0da4a..09ea2d388bb 100644
--- a/fs/ocfs2/cluster/nodemanager.h
+++ b/fs/ocfs2/cluster/nodemanager.h
@@ -33,6 +33,12 @@
#include <linux/configfs.h>
#include <linux/rbtree.h>
+enum o2nm_fence_method {
+ O2NM_FENCE_RESET = 0,
+ O2NM_FENCE_PANIC,
+ O2NM_FENCE_METHODS, /* Number of fence methods */
+};
+
struct o2nm_node {
spinlock_t nd_lock;
struct config_item nd_item;
@@ -58,6 +64,7 @@ struct o2nm_cluster {
unsigned int cl_idle_timeout_ms;
unsigned int cl_keepalive_delay_ms;
unsigned int cl_reconnect_delay_ms;
+ enum o2nm_fence_method cl_fence_method;
/* this bitmap is part of a hack for disk bitmap.. will go eventually. - zab */
unsigned long cl_nodes_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
diff --git a/fs/ocfs2/cluster/quorum.c b/fs/ocfs2/cluster/quorum.c
index bbacf7da48a..639024033fc 100644
--- a/fs/ocfs2/cluster/quorum.c
+++ b/fs/ocfs2/cluster/quorum.c
@@ -74,8 +74,20 @@ static void o2quo_fence_self(void)
* threads can still schedule, etc, etc */
o2hb_stop_all_regions();
- printk("ocfs2 is very sorry to be fencing this system by restarting\n");
- emergency_restart();
+ switch (o2nm_single_cluster->cl_fence_method) {
+ case O2NM_FENCE_PANIC:
+ panic("*** ocfs2 is very sorry to be fencing this system by "
+ "panicing ***\n");
+ break;
+ default:
+ WARN_ON(o2nm_single_cluster->cl_fence_method >=
+ O2NM_FENCE_METHODS);
+ case O2NM_FENCE_RESET:
+ printk(KERN_ERR "*** ocfs2 is very sorry to be fencing this "
+ "system by restarting ***\n");
+ emergency_restart();
+ break;
+ };
}
/* Indicate that a timeout occured on a hearbeat region write. The
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 334f231a422..d8d0c65ac03 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -485,7 +485,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
}
if (was_valid && !valid) {
- printk(KERN_INFO "o2net: no longer connected to "
+ printk(KERN_NOTICE "o2net: no longer connected to "
SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
o2net_complete_nodes_nsw(nn);
}
@@ -493,7 +493,7 @@ static void o2net_set_nn_state(struct o2net_node *nn,
if (!was_valid && valid) {
o2quo_conn_up(o2net_num_from_nn(nn));
cancel_delayed_work(&nn->nn_connect_expired);
- printk(KERN_INFO "o2net: %s " SC_NODEF_FMT "\n",
+ printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n",
o2nm_this_node() > sc->sc_node->nd_num ?
"connected to" : "accepted connection from",
SC_NODEF_ARGS(sc));
@@ -930,7 +930,7 @@ static void o2net_sendpage(struct o2net_sock_container *sc,
cond_resched();
continue;
}
- mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
+ mlog(ML_ERROR, "sendpage of size %zu to " SC_NODEF_FMT
" failed with %zd\n", size, SC_NODEF_ARGS(sc), ret);
o2net_ensure_shutdown(nn, sc, 0);
break;
@@ -1476,14 +1476,14 @@ static void o2net_idle_timer(unsigned long data)
do_gettimeofday(&now);
- printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
+ printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u "
"seconds, shutting it down.\n", SC_NODEF_ARGS(sc),
o2net_idle_timeout() / 1000,
o2net_idle_timeout() % 1000);
mlog(ML_NOTICE, "here are some times that might help debug the "
"situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv "
"%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n",
- sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
+ sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec,
now.tv_sec, (long) now.tv_usec,
sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec,
sc->sc_tv_advance_start.tv_sec,
diff --git a/fs/ocfs2/cluster/tcp_internal.h b/fs/ocfs2/cluster/tcp_internal.h
index 8d58cfe410b..96fa7ebc530 100644
--- a/fs/ocfs2/cluster/tcp_internal.h
+++ b/fs/ocfs2/cluster/tcp_internal.h
@@ -32,10 +32,10 @@
* on their number */
#define O2NET_QUORUM_DELAY_MS ((o2hb_dead_threshold + 2) * O2HB_REGION_TIMEOUT_MS)
-/*
+/*
* This version number represents quite a lot, unfortunately. It not
* only represents the raw network message protocol on the wire but also
- * locking semantics of the file system using the protocol. It should
+ * locking semantics of the file system using the protocol. It should
* be somewhere else, I'm sure, but right now it isn't.
*
* With version 11, we separate out the filesystem locking portion. The
diff --git a/fs/ocfs2/dlm/dlmapi.h b/fs/ocfs2/dlm/dlmapi.h
index b5786a787fa..3cfa114aa39 100644
--- a/fs/ocfs2/dlm/dlmapi.h
+++ b/fs/ocfs2/dlm/dlmapi.h
@@ -95,7 +95,7 @@ const char *dlm_errname(enum dlm_status err);
mlog(ML_ERROR, "dlm status = %s\n", dlm_errname((st))); \
} while (0)
-#define DLM_LKSB_UNUSED1 0x01
+#define DLM_LKSB_UNUSED1 0x01
#define DLM_LKSB_PUT_LVB 0x02
#define DLM_LKSB_GET_LVB 0x04
#define DLM_LKSB_UNUSED2 0x08
diff --git a/fs/ocfs2/dlm/dlmast.c b/fs/ocfs2/dlm/dlmast.c
index 01cf8cc3d28..dccc439fa08 100644
--- a/fs/ocfs2/dlm/dlmast.c
+++ b/fs/ocfs2/dlm/dlmast.c
@@ -123,7 +123,7 @@ static void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
dlm_lock_put(lock);
/* free up the reserved bast that we are cancelling.
* guaranteed that this will not be the last reserved
- * ast because *both* an ast and a bast were reserved
+ * ast because *both* an ast and a bast were reserved
* to get to this point. the res->spinlock will not be
* taken here */
dlm_lockres_release_ast(dlm, res);
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index ca96bce50e1..f283bce776b 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -396,7 +396,7 @@ static enum dlm_status dlm_send_remote_convert_request(struct dlm_ctxt *dlm,
/* instead of logging the same network error over
* and over, sleep here and wait for the heartbeat
* to notice the node is dead. times out after 5s. */
- dlm_wait_for_node_death(dlm, res->owner,
+ dlm_wait_for_node_death(dlm, res->owner,
DLM_NODE_DEATH_WAIT_MAX);
ret = DLM_RECOVERING;
mlog(0, "node %u died so returning DLM_RECOVERING "
diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c
index 42b0bad7a61..0cd24cf5439 100644
--- a/fs/ocfs2/dlm/dlmdebug.c
+++ b/fs/ocfs2/dlm/dlmdebug.c
@@ -102,7 +102,7 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
assert_spin_locked(&res->spinlock);
stringify_lockname(res->lockname.name, res->lockname.len,
- buf, sizeof(buf) - 1);
+ buf, sizeof(buf));
printk("lockres: %s, owner=%u, state=%u\n",
buf, res->owner, res->state);
printk(" last used: %lu, refcnt: %u, on purge list: %s\n",
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 0334000676d..988c9055fd4 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -816,7 +816,7 @@ static int dlm_query_join_handler(struct o2net_msg *msg, u32 len, void *data,
}
/* Once the dlm ctxt is marked as leaving then we don't want
- * to be put in someone's domain map.
+ * to be put in someone's domain map.
* Also, explicitly disallow joining at certain troublesome
* times (ie. during recovery). */
if (dlm && dlm->dlm_state != DLM_CTXT_LEAVING) {
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 437698e9465..73333777267 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -269,7 +269,7 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
}
dlm_revert_pending_lock(res, lock);
dlm_lock_put(lock);
- } else if (dlm_is_recovery_lock(res->lockname.name,
+ } else if (dlm_is_recovery_lock(res->lockname.name,
res->lockname.len)) {
/* special case for the $RECOVERY lock.
* there will never be an AST delivered to put
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 03ccf9a7b1f..a659606dcb9 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -366,7 +366,7 @@ void dlm_hb_event_notify_attached(struct dlm_ctxt *dlm, int idx, int node_up)
struct dlm_master_list_entry *mle;
assert_spin_locked(&dlm->spinlock);
-
+
list_for_each_entry(mle, &dlm->mle_hb_events, hb_events) {
if (node_up)
dlm_mle_node_up(dlm, mle, NULL, idx);
@@ -833,7 +833,7 @@ lookup:
__dlm_insert_mle(dlm, mle);
/* still holding the dlm spinlock, check the recovery map
- * to see if there are any nodes that still need to be
+ * to see if there are any nodes that still need to be
* considered. these will not appear in the mle nodemap
* but they might own this lockres. wait on them. */
bit = find_next_bit(dlm->recovery_map, O2NM_MAX_NODES, 0);
@@ -883,7 +883,7 @@ redo_request:
msleep(500);
}
continue;
- }
+ }
dlm_kick_recovery_thread(dlm);
msleep(1000);
@@ -939,8 +939,8 @@ wait:
res->lockname.name, blocked);
if (++tries > 20) {
mlog(ML_ERROR, "%s:%.*s: spinning on "
- "dlm_wait_for_lock_mastery, blocked=%d\n",
- dlm->name, res->lockname.len,
+ "dlm_wait_for_lock_mastery, blocked=%d\n",
+ dlm->name, res->lockname.len,
res->lockname.name, blocked);
dlm_print_one_lock_resource(res);
dlm_print_one_mle(mle);
@@ -1029,7 +1029,7 @@ recheck:
ret = dlm_restart_lock_mastery(dlm, res, mle, *blocked);
b = (mle->type == DLM_MLE_BLOCK);
if ((*blocked && !b) || (!*blocked && b)) {
- mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
+ mlog(0, "%s:%.*s: status change: old=%d new=%d\n",
dlm->name, res->lockname.len, res->lockname.name,
*blocked, b);
*blocked = b;
@@ -1602,7 +1602,7 @@ send_response:
}
mlog(0, "%u is the owner of %.*s, cleaning everyone else\n",
dlm->node_num, res->lockname.len, res->lockname.name);
- ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
+ ret = dlm_dispatch_assert_master(dlm, res, 0, request->node_idx,
DLM_ASSERT_MASTER_MLE_CLEANUP);
if (ret < 0) {
mlog(ML_ERROR, "failed to dispatch assert master work\n");
@@ -1701,7 +1701,7 @@ again:
if (r & DLM_ASSERT_RESPONSE_REASSERT) {
mlog(0, "%.*s: node %u create mles on other "
- "nodes and requests a re-assert\n",
+ "nodes and requests a re-assert\n",
namelen, lockname, to);
reassert = 1;
}
@@ -1812,7 +1812,7 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&dlm->master_lock);
spin_unlock(&dlm->spinlock);
goto done;
- }
+ }
}
}
spin_unlock(&dlm->master_lock);
@@ -1883,7 +1883,7 @@ ok:
int extra_ref = 0;
int nn = -1;
int rr, err = 0;
-
+
spin_lock(&mle->spinlock);
if (mle->type == DLM_MLE_BLOCK || mle->type == DLM_MLE_MIGRATION)
extra_ref = 1;
@@ -1891,7 +1891,7 @@ ok:
/* MASTER mle: if any bits set in the response map
* then the calling node needs to re-assert to clear
* up nodes that this node contacted */
- while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
+ while ((nn = find_next_bit (mle->response_map, O2NM_MAX_NODES,
nn+1)) < O2NM_MAX_NODES) {
if (nn != dlm->node_num && nn != assert->node_idx)
master_request = 1;
@@ -2002,7 +2002,7 @@ kill:
__dlm_print_one_lock_resource(res);
spin_unlock(&res->spinlock);
spin_unlock(&dlm->spinlock);
- *ret_data = (void *)res;
+ *ret_data = (void *)res;
dlm_put(dlm);
return -EINVAL;
}
@@ -2040,10 +2040,10 @@ int dlm_dispatch_assert_master(struct dlm_ctxt *dlm,
item->u.am.request_from = request_from;
item->u.am.flags = flags;
- if (ignore_higher)
- mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
+ if (ignore_higher)
+ mlog(0, "IGNORE HIGHER: %.*s\n", res->lockname.len,
res->lockname.name);
-
+
spin_lock(&dlm->work_lock);
list_add_tail(&item->list, &dlm->work_list);
spin_unlock(&dlm->work_lock);
@@ -2133,7 +2133,7 @@ put:
* think that $RECOVERY is currently mastered by a dead node. If so,
* we wait a short time to allow that node to get notified by its own
* heartbeat stack, then check again. All $RECOVERY lock resources
- * mastered by dead nodes are purged when the hearbeat callback is
+ * mastered by dead nodes are purged when the hearbeat callback is
* fired, so we can know for sure that it is safe to continue once
* the node returns a live node or no node. */
static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
@@ -2174,7 +2174,7 @@ static int dlm_pre_master_reco_lockres(struct dlm_ctxt *dlm,
ret = -EAGAIN;
}
spin_unlock(&dlm->spinlock);
- mlog(0, "%s: reco lock master is %u\n", dlm->name,
+ mlog(0, "%s: reco lock master is %u\n", dlm->name,
master);
break;
}
@@ -2602,7 +2602,7 @@ fail:
mlog(0, "%s:%.*s: timed out during migration\n",
dlm->name, res->lockname.len, res->lockname.name);
- /* avoid hang during shutdown when migrating lockres
+ /* avoid hang during shutdown when migrating lockres
* to a node which also goes down */
if (dlm_is_node_dead(dlm, target)) {
mlog(0, "%s:%.*s: expected migration "
@@ -2738,7 +2738,7 @@ static int dlm_migration_can_proceed(struct dlm_ctxt *dlm,
can_proceed = !!(res->state & DLM_LOCK_RES_MIGRATING);
spin_unlock(&res->spinlock);
- /* target has died, so make the caller break out of the
+ /* target has died, so make the caller break out of the
* wait_event, but caller must recheck the domain_map */
spin_lock(&dlm->spinlock);
if (!test_bit(mig_target, dlm->domain_map))
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index d9fa3d22e17..344bcf90cbf 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1050,7 +1050,7 @@ static void dlm_move_reco_locks_to_list(struct dlm_ctxt *dlm,
if (lock->ml.node == dead_node) {
mlog(0, "AHA! there was "
"a $RECOVERY lock for dead "
- "node %u (%s)!\n",
+ "node %u (%s)!\n",
dead_node, dlm->name);
list_del_init(&lock->list);
dlm_lock_put(lock);
@@ -1164,6 +1164,39 @@ static void dlm_init_migratable_lockres(struct dlm_migratable_lockres *mres,
mres->master = master;
}
+static void dlm_prepare_lvb_for_migration(struct dlm_lock *lock,
+ struct dlm_migratable_lockres *mres,
+ int queue)
+{
+ if (!lock->lksb)
+ return;
+
+ /* Ignore lvb in all locks in the blocked list */
+ if (queue == DLM_BLOCKED_LIST)
+ return;
+
+ /* Only consider lvbs in locks with granted EX or PR lock levels */
+ if (lock->ml.type != LKM_EXMODE && lock->ml.type != LKM_PRMODE)
+ return;
+
+ if (dlm_lvb_is_empty(mres->lvb)) {
+ memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
+ return;
+ }
+
+ /* Ensure the lvb copied for migration matches in other valid locks */
+ if (!memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))
+ return;
+
+ mlog(ML_ERROR, "Mismatched lvb in lock cookie=%u:%llu, name=%.*s, "
+ "node=%u\n",
+ dlm_get_lock_cookie_node(be64_to_cpu(lock->ml.cookie)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(lock->ml.cookie)),
+ lock->lockres->lockname.len, lock->lockres->lockname.name,
+ lock->ml.node);
+ dlm_print_one_lock_resource(lock->lockres);
+ BUG();
+}
/* returns 1 if this lock fills the network structure,
* 0 otherwise */
@@ -1181,20 +1214,7 @@ static int dlm_add_lock_to_array(struct dlm_lock *lock,
ml->list = queue;
if (lock->lksb) {
ml->flags = lock->lksb->flags;
- /* send our current lvb */
- if (ml->type == LKM_EXMODE ||
- ml->type == LKM_PRMODE) {
- /* if it is already set, this had better be a PR
- * and it has to match */
- if (!dlm_lvb_is_empty(mres->lvb) &&
- (ml->type == LKM_EXMODE ||
- memcmp(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN))) {
- mlog(ML_ERROR, "mismatched lvbs!\n");
- dlm_print_one_lock_resource(lock->lockres);
- BUG();
- }
- memcpy(mres->lvb, lock->lksb->lvb, DLM_LVB_LEN);
- }
+ dlm_prepare_lvb_for_migration(lock, mres, queue);
}
ml->node = lock->ml.node;
mres->num_locks++;
@@ -1730,6 +1750,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
struct dlm_lock *lock = NULL;
u8 from = O2NM_MAX_NODES;
unsigned int added = 0;
+ __be64 c;
mlog(0, "running %d locks for this lockres\n", mres->num_locks);
for (i=0; i<mres->num_locks; i++) {
@@ -1777,19 +1798,48 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
/* lock is always created locally first, and
* destroyed locally last. it must be on the list */
if (!lock) {
- __be64 c = ml->cookie;
- mlog(ML_ERROR, "could not find local lock "
- "with cookie %u:%llu!\n",
+ c = ml->cookie;
+ mlog(ML_ERROR, "Could not find local lock "
+ "with cookie %u:%llu, node %u, "
+ "list %u, flags 0x%x, type %d, "
+ "conv %d, highest blocked %d\n",
dlm_get_lock_cookie_node(be64_to_cpu(c)),
- dlm_get_lock_cookie_seq(be64_to_cpu(c)));
+ dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+ ml->node, ml->list, ml->flags, ml->type,
+ ml->convert_type, ml->highest_blocked);
+ __dlm_print_one_lock_resource(res);
+ BUG();
+ }
+
+ if (lock->ml.node != ml->node) {
+ c = lock->ml.cookie;
+ mlog(ML_ERROR, "Mismatched node# in lock "
+ "cookie %u:%llu, name %.*s, node %u\n",
+ dlm_get_lock_cookie_node(be64_to_cpu(c)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+ res->lockname.len, res->lockname.name,
+ lock->ml.node);
+ c = ml->cookie;
+ mlog(ML_ERROR, "Migrate lock cookie %u:%llu, "
+ "node %u, list %u, flags 0x%x, type %d, "
+ "conv %d, highest blocked %d\n",
+ dlm_get_lock_cookie_node(be64_to_cpu(c)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+ ml->node, ml->list, ml->flags, ml->type,
+ ml->convert_type, ml->highest_blocked);
__dlm_print_one_lock_resource(res);
BUG();
}
- BUG_ON(lock->ml.node != ml->node);
if (tmpq != queue) {
- mlog(0, "lock was on %u instead of %u for %.*s\n",
- j, ml->list, res->lockname.len, res->lockname.name);
+ c = ml->cookie;
+ mlog(0, "Lock cookie %u:%llu was on list %u "
+ "instead of list %u for %.*s\n",
+ dlm_get_lock_cookie_node(be64_to_cpu(c)),
+ dlm_get_lock_cookie_seq(be64_to_cpu(c)),
+ j, ml->list, res->lockname.len,
+ res->lockname.name);
+ __dlm_print_one_lock_resource(res);
spin_unlock(&res->spinlock);
continue;
}
@@ -1839,7 +1889,7 @@ static int dlm_process_recovery_data(struct dlm_ctxt *dlm,
* the lvb. */
memcpy(res->lvb, mres->lvb, DLM_LVB_LEN);
} else {
- /* otherwise, the node is sending its
+ /* otherwise, the node is sending its
* most recent valid lvb info */
BUG_ON(ml->type != LKM_EXMODE &&
ml->type != LKM_PRMODE);
@@ -1886,7 +1936,7 @@ skip_lvb:
spin_lock(&res->spinlock);
list_for_each_entry(lock, queue, list) {
if (lock->ml.cookie == ml->cookie) {
- __be64 c = lock->ml.cookie;
+ c = lock->ml.cookie;
mlog(ML_ERROR, "%s:%.*s: %u:%llu: lock already "
"exists on this lockres!\n", dlm->name,
res->lockname.len, res->lockname.name,
@@ -2114,7 +2164,7 @@ static void dlm_revalidate_lvb(struct dlm_ctxt *dlm,
assert_spin_locked(&res->spinlock);
if (res->owner == dlm->node_num)
- /* if this node owned the lockres, and if the dead node
+ /* if this node owned the lockres, and if the dead node
* had an EX when he died, blank out the lvb */
search_node = dead_node;
else {
@@ -2152,7 +2202,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
/* this node is the lockres master:
* 1) remove any stale locks for the dead node
- * 2) if the dead node had an EX when he died, blank out the lvb
+ * 2) if the dead node had an EX when he died, blank out the lvb
*/
assert_spin_locked(&dlm->spinlock);
assert_spin_locked(&res->spinlock);
@@ -2193,7 +2243,12 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
mlog(0, "%s:%.*s: freed %u locks for dead node %u, "
"dropping ref from lockres\n", dlm->name,
res->lockname.len, res->lockname.name, freed, dead_node);
- BUG_ON(!test_bit(dead_node, res->refmap));
+ if(!test_bit(dead_node, res->refmap)) {
+ mlog(ML_ERROR, "%s:%.*s: freed %u locks for dead node %u, "
+ "but ref was not set\n", dlm->name,
+ res->lockname.len, res->lockname.name, freed, dead_node);
+ __dlm_print_one_lock_resource(res);
+ }
dlm_lockres_clear_refmap_bit(dead_node, res);
} else if (test_bit(dead_node, res->refmap)) {
mlog(0, "%s:%.*s: dead node %u had a ref, but had "
@@ -2260,7 +2315,7 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
}
spin_unlock(&res->spinlock);
continue;
- }
+ }
spin_lock(&res->spinlock);
/* zero the lvb if necessary */
dlm_revalidate_lvb(dlm, res, dead_node);
@@ -2411,7 +2466,7 @@ static void dlm_reco_unlock_ast(void *astdata, enum dlm_status st)
* this function on each node racing to become the recovery
* master will not stop attempting this until either:
* a) this node gets the EX (and becomes the recovery master),
- * or b) dlm->reco.new_master gets set to some nodenum
+ * or b) dlm->reco.new_master gets set to some nodenum
* != O2NM_INVALID_NODE_NUM (another node will do the reco).
* so each time a recovery master is needed, the entire cluster
* will sync at this point. if the new master dies, that will
@@ -2424,7 +2479,7 @@ static int dlm_pick_recovery_master(struct dlm_ctxt *dlm)
mlog(0, "starting recovery of %s at %lu, dead=%u, this=%u\n",
dlm->name, jiffies, dlm->reco.dead_node, dlm->node_num);
-again:
+again:
memset(&lksb, 0, sizeof(lksb));
ret = dlmlock(dlm, LKM_EXMODE, &lksb, LKM_NOQUEUE|LKM_RECOVERY,
@@ -2437,8 +2492,8 @@ again:
if (ret == DLM_NORMAL) {
mlog(0, "dlm=%s dlmlock says I got it (this=%u)\n",
dlm->name, dlm->node_num);
-
- /* got the EX lock. check to see if another node
+
+ /* got the EX lock. check to see if another node
* just became the reco master */
if (dlm_reco_master_ready(dlm)) {
mlog(0, "%s: got reco EX lock, but %u will "
@@ -2451,12 +2506,12 @@ again:
/* see if recovery was already finished elsewhere */
spin_lock(&dlm->spinlock);
if (dlm->reco.dead_node == O2NM_INVALID_NODE_NUM) {
- status = -EINVAL;
+ status = -EINVAL;
mlog(0, "%s: got reco EX lock, but "
"node got recovered already\n", dlm->name);
if (dlm->reco.new_master != O2NM_INVALID_NODE_NUM) {
mlog(ML_ERROR, "%s: new master is %u "
- "but no dead node!\n",
+ "but no dead node!\n",
dlm->name, dlm->reco.new_master);
BUG();
}
@@ -2468,7 +2523,7 @@ again:
* set the master and send the messages to begin recovery */
if (!status) {
mlog(0, "%s: dead=%u, this=%u, sending "
- "begin_reco now\n", dlm->name,
+ "begin_reco now\n", dlm->name,
dlm->reco.dead_node, dlm->node_num);
status = dlm_send_begin_reco_message(dlm,
dlm->reco.dead_node);
@@ -2501,7 +2556,7 @@ again:
mlog(0, "dlm=%s dlmlock says another node got it (this=%u)\n",
dlm->name, dlm->node_num);
/* another node is master. wait on
- * reco.new_master != O2NM_INVALID_NODE_NUM
+ * reco.new_master != O2NM_INVALID_NODE_NUM
* for at most one second */
wait_event_timeout(dlm->dlm_reco_thread_wq,
dlm_reco_master_ready(dlm),
@@ -2589,9 +2644,23 @@ retry:
"begin reco msg (%d)\n", dlm->name, nodenum, ret);
ret = 0;
}
+
+ /*
+ * Prior to commit aad1b15310b9bcd59fa81ab8f2b1513b59553ea8,
+ * dlm_begin_reco_handler() returned EAGAIN and not -EAGAIN.
+ * We are handling both for compatibility reasons.
+ */
+ if (ret == -EAGAIN || ret == EAGAIN) {
+ mlog(0, "%s: trying to start recovery of node "
+ "%u, but node %u is waiting for last recovery "
+ "to complete, backoff for a bit\n", dlm->name,
+ dead_node, nodenum);
+ msleep(100);
+ goto retry;
+ }
if (ret < 0) {
struct dlm_lock_resource *res;
- /* this is now a serious problem, possibly ENOMEM
+ /* this is now a serious problem, possibly ENOMEM
* in the network stack. must retry */
mlog_errno(ret);
mlog(ML_ERROR, "begin reco of dlm %s to node %u "
@@ -2604,18 +2673,10 @@ retry:
} else {
mlog(ML_ERROR, "recovery lock not found\n");
}
- /* sleep for a bit in hopes that we can avoid
+ /* sleep for a bit in hopes that we can avoid
* another ENOMEM */
msleep(100);
goto retry;
- } else if (ret == EAGAIN) {
- mlog(0, "%s: trying to start recovery of node "
- "%u, but node %u is waiting for last recovery "
- "to complete, backoff for a bit\n", dlm->name,
- dead_node, nodenum);
- /* TODO Look into replacing msleep with cond_resched() */
- msleep(100);
- goto retry;
}
}
@@ -2639,7 +2700,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
dlm->name, br->node_idx, br->dead_node,
dlm->reco.dead_node, dlm->reco.new_master);
spin_unlock(&dlm->spinlock);
- return EAGAIN;
+ return -EAGAIN;
}
spin_unlock(&dlm->spinlock);
@@ -2664,7 +2725,7 @@ int dlm_begin_reco_handler(struct o2net_msg *msg, u32 len, void *data,
}
if (dlm->reco.dead_node != O2NM_INVALID_NODE_NUM) {
mlog(ML_NOTICE, "%s: dead_node previously set to %u, "
- "node %u changing it to %u\n", dlm->name,
+ "node %u changing it to %u\n", dlm->name,
dlm->reco.dead_node, br->node_idx, br->dead_node);
}
dlm_set_reco_master(dlm, br->node_idx);
@@ -2730,8 +2791,8 @@ stage2:
if (ret < 0) {
mlog_errno(ret);
if (dlm_is_host_down(ret)) {
- /* this has no effect on this recovery
- * session, so set the status to zero to
+ /* this has no effect on this recovery
+ * session, so set the status to zero to
* finish out the last recovery */
mlog(ML_ERROR, "node %u went down after this "
"node finished recovery.\n", nodenum);
@@ -2768,7 +2829,7 @@ int dlm_finalize_reco_handler(struct o2net_msg *msg, u32 len, void *data,
mlog(0, "%s: node %u finalizing recovery stage%d of "
"node %u (%u:%u)\n", dlm->name, fr->node_idx, stage,
fr->dead_node, dlm->reco.dead_node, dlm->reco.new_master);
-
+
spin_lock(&dlm->spinlock);
if (dlm->reco.new_master != fr->node_idx) {
diff --git a/fs/ocfs2/dlm/dlmunlock.c b/fs/ocfs2/dlm/dlmunlock.c
index 00f53b2aea7..49e29ecd020 100644
--- a/fs/ocfs2/dlm/dlmunlock.c
+++ b/fs/ocfs2/dlm/dlmunlock.c
@@ -190,8 +190,8 @@ static enum dlm_status dlmunlock_common(struct dlm_ctxt *dlm,
actions &= ~(DLM_UNLOCK_REMOVE_LOCK|
DLM_UNLOCK_REGRANT_LOCK|
DLM_UNLOCK_CLEAR_CONVERT_TYPE);
- } else if (status == DLM_RECOVERING ||
- status == DLM_MIGRATING ||
+ } else if (status == DLM_RECOVERING ||
+ status == DLM_MIGRATING ||
status == DLM_FORWARD) {
/* must clear the actions because this unlock
* is about to be retried. cannot free or do
@@ -661,14 +661,14 @@ retry:
if (call_ast) {
mlog(0, "calling unlockast(%p, %d)\n", data, status);
if (is_master) {
- /* it is possible that there is one last bast
+ /* it is possible that there is one last bast
* pending. make sure it is flushed, then
* call the unlockast.
* not an issue if this is a mastered remotely,
* since this lock has been removed from the
* lockres queues and cannot be found. */
dlm_kick_thread(dlm, NULL);
- wait_event(dlm->ast_wq,
+ wait_event(dlm->ast_wq,
dlm_lock_basts_flushed(dlm, lock));
}
(*unlockast)(data, status);
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index c5e4a49e3a1..e044019cb3b 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -875,6 +875,14 @@ static inline void ocfs2_generic_handle_convert_action(struct ocfs2_lock_res *lo
lockres_or_flags(lockres, OCFS2_LOCK_NEEDS_REFRESH);
lockres->l_level = lockres->l_requested;
+
+ /*
+ * We set the OCFS2_LOCK_UPCONVERT_FINISHING flag before clearing
+ * the OCFS2_LOCK_BUSY flag to prevent the dc thread from
+ * downconverting the lock before the upconvert has fully completed.
+ */
+ lockres_or_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
mlog_exit_void();
@@ -907,8 +915,6 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
assert_spin_locked(&lockres->l_lock);
- lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
-
if (level > lockres->l_blocking) {
/* only schedule a downconvert if we haven't already scheduled
* one that goes low enough to satisfy the level we're
@@ -921,6 +927,9 @@ static int ocfs2_generic_handle_bast(struct ocfs2_lock_res *lockres,
lockres->l_blocking = level;
}
+ if (needs_downconvert)
+ lockres_or_flags(lockres, OCFS2_LOCK_BLOCKED);
+
mlog_exit(needs_downconvert);
return needs_downconvert;
}
@@ -1133,6 +1142,7 @@ static inline void ocfs2_recover_from_dlm_error(struct ocfs2_lock_res *lockres,
mlog_entry_void();
spin_lock_irqsave(&lockres->l_lock, flags);
lockres_clear_flags(lockres, OCFS2_LOCK_BUSY);
+ lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
if (convert)
lockres->l_action = OCFS2_AST_INVALID;
else
@@ -1323,13 +1333,13 @@ static int __ocfs2_cluster_lock(struct ocfs2_super *osb,
again:
wait = 0;
+ spin_lock_irqsave(&lockres->l_lock, flags);
+
if (catch_signals && signal_pending(current)) {
ret = -ERESTARTSYS;
- goto out;
+ goto unlock;
}
- spin_lock_irqsave(&lockres->l_lock, flags);
-
mlog_bug_on_msg(lockres->l_flags & OCFS2_LOCK_FREEING,
"Cluster lock called on freeing lockres %s! flags "
"0x%lx\n", lockres->l_name, lockres->l_flags);
@@ -1346,6 +1356,25 @@ again:
goto unlock;
}
+ if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING) {
+ /*
+ * We've upconverted. If the lock now has a level we can
+ * work with, we take it. If, however, the lock is not at the
+ * required level, we go thru the full cycle. One way this could
+ * happen is if a process requesting an upconvert to PR is
+ * closely followed by another requesting upconvert to an EX.
+ * If the process requesting EX lands here, we want it to
+ * continue attempting to upconvert and let the process
+ * requesting PR take the lock.
+ * If multiple processes request upconvert to PR, the first one
+ * here will take the lock. The others will have to go thru the
+ * OCFS2_LOCK_BLOCKED check to ensure that there is no pending
+ * downconvert request.
+ */
+ if (level <= lockres->l_level)
+ goto update_holders;
+ }
+
if (lockres->l_flags & OCFS2_LOCK_BLOCKED &&
!ocfs2_may_continue_on_blocked_lock(lockres, level)) {
/* is the lock is currently blocked on behalf of
@@ -1416,11 +1445,14 @@ again:
goto again;
}
+update_holders:
/* Ok, if we get here then we're good to go. */
ocfs2_inc_holders(lockres, level);
ret = 0;
unlock:
+ lockres_clear_flags(lockres, OCFS2_LOCK_UPCONVERT_FINISHING);
+
spin_unlock_irqrestore(&lockres->l_lock, flags);
out:
/*
@@ -3155,7 +3187,7 @@ out:
/* Mark the lockres as being dropped. It will no longer be
* queued if blocking, but we still may have to wait on it
* being dequeued from the downconvert thread before we can consider
- * it safe to drop.
+ * it safe to drop.
*
* You can *not* attempt to call cluster_lock on this lockres anymore. */
void ocfs2_mark_lockres_freeing(struct ocfs2_lock_res *lockres)
@@ -3352,6 +3384,7 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
unsigned long flags;
int blocking;
int new_level;
+ int level;
int ret = 0;
int set_lvb = 0;
unsigned int gen;
@@ -3360,9 +3393,17 @@ static int ocfs2_unblock_lock(struct ocfs2_super *osb,
spin_lock_irqsave(&lockres->l_lock, flags);
- BUG_ON(!(lockres->l_flags & OCFS2_LOCK_BLOCKED));
-
recheck:
+ /*
+ * Is it still blocking? If not, we have no more work to do.
+ */
+ if (!(lockres->l_flags & OCFS2_LOCK_BLOCKED)) {
+ BUG_ON(lockres->l_blocking != DLM_LOCK_NL);
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
+ ret = 0;
+ goto leave;
+ }
+
if (lockres->l_flags & OCFS2_LOCK_BUSY) {
/* XXX
* This is a *big* race. The OCFS2_LOCK_PENDING flag
@@ -3401,6 +3442,31 @@ recheck:
goto leave;
}
+ /*
+ * This prevents livelocks. OCFS2_LOCK_UPCONVERT_FINISHING flag is
+ * set when the ast is received for an upconvert just before the
+ * OCFS2_LOCK_BUSY flag is cleared. Now if the fs received a bast
+ * on the heels of the ast, we want to delay the downconvert just
+ * enough to allow the up requestor to do its task. Because this
+ * lock is in the blocked queue, the lock will be downconverted
+ * as soon as the requestor is done with the lock.
+ */
+ if (lockres->l_flags & OCFS2_LOCK_UPCONVERT_FINISHING)
+ goto leave_requeue;
+
+ /*
+ * How can we block and yet be at NL? We were trying to upconvert
+ * from NL and got canceled. The code comes back here, and now
+ * we notice and clear BLOCKING.
+ */
+ if (lockres->l_level == DLM_LOCK_NL) {
+ BUG_ON(lockres->l_ex_holders || lockres->l_ro_holders);
+ lockres->l_blocking = DLM_LOCK_NL;
+ lockres_clear_flags(lockres, OCFS2_LOCK_BLOCKED);
+ spin_unlock_irqrestore(&lockres->l_lock, flags);
+ goto leave;
+ }
+
/* if we're blocking an exclusive and we have *any* holders,
* then requeue. */
if ((lockres->l_blocking == DLM_LOCK_EX)
@@ -3438,6 +3504,7 @@ recheck:
* may sleep, so we save off a copy of what we're blocking as
* it may change while we're not holding the spin lock. */
blocking = lockres->l_blocking;
+ level = lockres->l_level;
spin_unlock_irqrestore(&lockres->l_lock, flags);
ctl->unblock_action = lockres->l_ops->downconvert_worker(lockres, blocking);
@@ -3446,7 +3513,7 @@ recheck:
goto leave;
spin_lock_irqsave(&lockres->l_lock, flags);
- if (blocking != lockres->l_blocking) {
+ if ((blocking != lockres->l_blocking) || (level != lockres->l_level)) {
/* If this changed underneath us, then we can't drop
* it just yet. */
goto recheck;
diff --git a/fs/ocfs2/export.c b/fs/ocfs2/export.c
index 15713cbb865..19ad145d2af 100644
--- a/fs/ocfs2/export.c
+++ b/fs/ocfs2/export.c
@@ -239,7 +239,7 @@ static int ocfs2_encode_fh(struct dentry *dentry, u32 *fh_in, int *max_len,
mlog(0, "Encoding parent: blkno: %llu, generation: %u\n",
(unsigned long long)blkno, generation);
}
-
+
*max_len = len;
bail:
diff --git a/fs/ocfs2/extent_map.c b/fs/ocfs2/extent_map.c
index 843db64e9d4..5328529e7fd 100644
--- a/fs/ocfs2/extent_map.c
+++ b/fs/ocfs2/extent_map.c
@@ -37,6 +37,7 @@
#include "extent_map.h"
#include "inode.h"
#include "super.h"
+#include "symlink.h"
#include "buffer_head_io.h"
@@ -191,7 +192,7 @@ static int ocfs2_try_to_merge_extent_map(struct ocfs2_extent_map_item *emi,
emi->ei_clusters += ins->ei_clusters;
return 1;
} else if ((ins->ei_phys + ins->ei_clusters) == emi->ei_phys &&
- (ins->ei_cpos + ins->ei_clusters) == emi->ei_phys &&
+ (ins->ei_cpos + ins->ei_clusters) == emi->ei_cpos &&
ins->ei_flags == emi->ei_flags) {
emi->ei_phys = ins->ei_phys;
emi->ei_cpos = ins->ei_cpos;
@@ -703,6 +704,12 @@ out:
return ret;
}
+/*
+ * The ocfs2_fiemap_inline() may be a little bit misleading, since
+ * it not only handles the fiemap for inlined files, but also deals
+ * with the fast symlink, cause they have no difference for extent
+ * mapping per se.
+ */
static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
struct fiemap_extent_info *fieinfo,
u64 map_start)
@@ -715,11 +722,18 @@ static int ocfs2_fiemap_inline(struct inode *inode, struct buffer_head *di_bh,
struct ocfs2_inode_info *oi = OCFS2_I(inode);
di = (struct ocfs2_dinode *)di_bh->b_data;
- id_count = le16_to_cpu(di->id2.i_data.id_count);
+ if (ocfs2_inode_is_fast_symlink(inode))
+ id_count = ocfs2_fast_symlink_chars(inode->i_sb);
+ else
+ id_count = le16_to_cpu(di->id2.i_data.id_count);
if (map_start < id_count) {
phys = oi->ip_blkno << inode->i_sb->s_blocksize_bits;
- phys += offsetof(struct ocfs2_dinode, id2.i_data.id_data);
+ if (ocfs2_inode_is_fast_symlink(inode))
+ phys += offsetof(struct ocfs2_dinode, id2.i_symlink);
+ else
+ phys += offsetof(struct ocfs2_dinode,
+ id2.i_data.id_data);
ret = fiemap_fill_next_extent(fieinfo, 0, phys, id_count,
flags);
@@ -756,9 +770,10 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
down_read(&OCFS2_I(inode)->ip_alloc_sem);
/*
- * Handle inline-data separately.
+ * Handle inline-data and fast symlink separately.
*/
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
+ if ((OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) ||
+ ocfs2_inode_is_fast_symlink(inode)) {
ret = ocfs2_fiemap_inline(inode, di_bh, fieinfo, map_start);
goto out_unlock;
}
@@ -786,6 +801,8 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
fe_flags = 0;
if (rec.e_flags & OCFS2_EXT_UNWRITTEN)
fe_flags |= FIEMAP_EXTENT_UNWRITTEN;
+ if (rec.e_flags & OCFS2_EXT_REFCOUNTED)
+ fe_flags |= FIEMAP_EXTENT_SHARED;
if (is_last)
fe_flags |= FIEMAP_EXTENT_LAST;
len_bytes = (u64)le16_to_cpu(rec.e_leaf_clusters) << osb->s_clustersize_bits;
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 3d30a1c974a..558ce031242 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -749,7 +749,7 @@ static int ocfs2_write_zero_page(struct inode *inode,
int ret;
offset = (size & (PAGE_CACHE_SIZE-1)); /* Within page */
- /* ugh. in prepare/commit_write, if from==to==start of block, we
+ /* ugh. in prepare/commit_write, if from==to==start of block, we
** skip the prepare. make sure we never send an offset for the start
** of a block
*/
@@ -1772,13 +1772,14 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
loff_t *ppos,
size_t count,
int appending,
- int *direct_io)
+ int *direct_io,
+ int *has_refcount)
{
int ret = 0, meta_level = 0;
struct inode *inode = dentry->d_inode;
loff_t saved_pos, end;
- /*
+ /*
* We start with a read level meta lock and only jump to an ex
* if we need to make modifications here.
*/
@@ -1833,6 +1834,8 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
saved_pos,
count,
&meta_level);
+ if (has_refcount)
+ *has_refcount = 1;
}
if (ret < 0) {
@@ -1856,6 +1859,10 @@ static int ocfs2_prepare_inode_for_write(struct dentry *dentry,
break;
}
+ if (has_refcount && *has_refcount == 1) {
+ *direct_io = 0;
+ break;
+ }
/*
* Allowing concurrent direct writes means
* i_size changes wouldn't be synchronized, so
@@ -1899,7 +1906,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
loff_t pos)
{
int ret, direct_io, appending, rw_level, have_alloc_sem = 0;
- int can_do_direct;
+ int can_do_direct, has_refcount = 0;
ssize_t written = 0;
size_t ocount; /* original count */
size_t count; /* after file limit checks */
@@ -1942,7 +1949,7 @@ relock:
can_do_direct = direct_io;
ret = ocfs2_prepare_inode_for_write(file->f_path.dentry, ppos,
iocb->ki_left, appending,
- &can_do_direct);
+ &can_do_direct, &has_refcount);
if (ret < 0) {
mlog_errno(ret);
goto out;
@@ -2006,14 +2013,16 @@ out_dio:
/* buffered aio wouldn't have proper lock coverage today */
BUG_ON(ret == -EIOCBQUEUED && !(file->f_flags & O_DIRECT));
- if ((file->f_flags & O_DSYNC && !direct_io) || IS_SYNC(inode)) {
+ if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
+ ((file->f_flags & O_DIRECT) && has_refcount)) {
ret = filemap_fdatawrite_range(file->f_mapping, pos,
pos + count - 1);
if (ret < 0)
written = ret;
if (!ret && (old_size != i_size_read(inode) ||
- old_clusters != OCFS2_I(inode)->ip_clusters)) {
+ old_clusters != OCFS2_I(inode)->ip_clusters ||
+ has_refcount)) {
ret = jbd2_journal_force_commit(osb->journal->j_journal);
if (ret < 0)
written = ret;
@@ -2024,7 +2033,7 @@ out_dio:
pos + count - 1);
}
- /*
+ /*
* deep in g_f_a_w_n()->ocfs2_direct_IO we pass in a ocfs2_dio_end_io
* function pointer which is called when o_direct io completes so that
* it can unlock our rw lock. (it's the clustered equivalent of
@@ -2062,7 +2071,7 @@ static int ocfs2_splice_to_file(struct pipe_inode_info *pipe,
int ret;
ret = ocfs2_prepare_inode_for_write(out->f_path.dentry, &sd->pos,
- sd->total_len, 0, NULL);
+ sd->total_len, 0, NULL, NULL);
if (ret < 0) {
mlog_errno(ret);
return ret;
@@ -2189,7 +2198,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
goto bail;
}
- /*
+ /*
* buffered reads protect themselves in ->readpage(). O_DIRECT reads
* need locks to protect pending reads from racing with truncate.
*/
@@ -2211,10 +2220,10 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
* We're fine letting folks race truncates and extending
* writes with read across the cluster, just like they can
* locally. Hence no rw_lock during read.
- *
+ *
* Take and drop the meta data lock to update inode fields
* like i_size. This allows the checks down below
- * generic_file_aio_read() a chance of actually working.
+ * generic_file_aio_read() a chance of actually working.
*/
ret = ocfs2_inode_lock_atime(inode, filp->f_vfsmnt, &lock_level);
if (ret < 0) {
@@ -2239,7 +2248,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb,
bail:
if (have_alloc_sem)
up_read(&inode->i_alloc_sem);
- if (rw_level != -1)
+ if (rw_level != -1)
ocfs2_rw_unlock(inode, rw_level);
mlog_exit(ret);
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 0297fb8982b..88459bdd1ff 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -475,7 +475,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
if (args->fi_flags & OCFS2_FI_FLAG_ORPHAN_RECOVERY) {
status = ocfs2_try_open_lock(inode, 0);
if (status) {
- make_bad_inode(inode);
+ make_bad_inode(inode);
return status;
}
}
@@ -684,7 +684,7 @@ bail:
return status;
}
-/*
+/*
* Serialize with orphan dir recovery. If the process doing
* recovery on this orphan dir does an iget() with the dir
* i_mutex held, we'll deadlock here. Instead we detect this
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 31fbb061951..7d9d9c132ce 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -7,6 +7,7 @@
#include <linux/fs.h>
#include <linux/mount.h>
+#include <linux/compat.h>
#define MLOG_MASK_PREFIX ML_INODE
#include <cluster/masklog.h>
@@ -181,6 +182,10 @@ long ocfs2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
#ifdef CONFIG_COMPAT
long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
{
+ bool preserve;
+ struct reflink_arguments args;
+ struct inode *inode = file->f_path.dentry->d_inode;
+
switch (cmd) {
case OCFS2_IOC32_GETFLAGS:
cmd = OCFS2_IOC_GETFLAGS;
@@ -195,8 +200,15 @@ long ocfs2_compat_ioctl(struct file *file, unsigned cmd, unsigned long arg)
case OCFS2_IOC_GROUP_EXTEND:
case OCFS2_IOC_GROUP_ADD:
case OCFS2_IOC_GROUP_ADD64:
- case OCFS2_IOC_REFLINK:
break;
+ case OCFS2_IOC_REFLINK:
+ if (copy_from_user(&args, (struct reflink_arguments *)arg,
+ sizeof(args)))
+ return -EFAULT;
+ preserve = (args.preserve != 0);
+
+ return ocfs2_reflink_ioctl(inode, compat_ptr(args.old_path),
+ compat_ptr(args.new_path), preserve);
default:
return -ENOIOCTLCMD;
}
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index bf34c491ae9..9336c60e3a3 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -2034,7 +2034,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb,
status = -ENOENT;
mlog_errno(status);
return status;
- }
+ }
mutex_lock(&orphan_dir_inode->i_mutex);
status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index f010b22b1c4..50fb26a6a5f 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -2108,6 +2108,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
}
did_quota_inode = 1;
+ inode->i_nlink = 0;
/* do the real work now. */
status = ocfs2_mknod_locked(osb, dir, inode,
0, &new_di_bh, parent_di_bh, handle,
@@ -2136,6 +2137,7 @@ int ocfs2_create_inode_in_orphan(struct inode *dir,
if (status < 0)
mlog_errno(status);
+ insert_inode_hash(inode);
leave:
if (status < 0 && did_quota_inode)
vfs_dq_free_inode(inode);
@@ -2267,6 +2269,8 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
di = (struct ocfs2_dinode *)di_bh->b_data;
le32_add_cpu(&di->i_flags, -OCFS2_ORPHANED_FL);
di->i_orphaned_slot = 0;
+ inode->i_nlink = 1;
+ ocfs2_set_links_count(di, inode->i_nlink);
ocfs2_journal_dirty(handle, di_bh);
status = ocfs2_add_entry(handle, dentry, inode,
@@ -2284,7 +2288,6 @@ int ocfs2_mv_orphaned_inode_to_new(struct inode *dir,
goto out_commit;
}
- insert_inode_hash(inode);
dentry->d_op = &ocfs2_dentry_ops;
d_instantiate(dentry, inode);
status = 0;
@@ -2326,4 +2329,5 @@ const struct inode_operations ocfs2_dir_iops = {
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
+ .fiemap = ocfs2_fiemap,
};
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index d963d863870..740f448041e 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -136,6 +136,10 @@ enum ocfs2_unlock_action {
#define OCFS2_LOCK_PENDING (0x00000400) /* This lockres is pending a
call to dlm_lock. Only
exists with BUSY set. */
+#define OCFS2_LOCK_UPCONVERT_FINISHING (0x00000800) /* blocks the dc thread
+ * from downconverting
+ * before the upconvert
+ * has completed */
struct ocfs2_lock_res_ops;
@@ -245,9 +249,11 @@ enum ocfs2_mount_options
OCFS2_MOUNT_LOCALFLOCKS = 1 << 5, /* No cluster aware user file locks */
OCFS2_MOUNT_NOUSERXATTR = 1 << 6, /* No user xattr */
OCFS2_MOUNT_INODE64 = 1 << 7, /* Allow inode numbers > 2^32 */
- OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* POSIX access control lists */
- OCFS2_MOUNT_USRQUOTA = 1 << 9, /* We support user quotas */
- OCFS2_MOUNT_GRPQUOTA = 1 << 10, /* We support group quotas */
+ OCFS2_MOUNT_POSIX_ACL = 1 << 8, /* Force POSIX access control lists */
+ OCFS2_MOUNT_NO_POSIX_ACL = 1 << 9, /* Disable POSIX access
+ control lists */
+ OCFS2_MOUNT_USRQUOTA = 1 << 10, /* We support user quotas */
+ OCFS2_MOUNT_GRPQUOTA = 1 << 11, /* We support group quotas */
};
#define OCFS2_OSB_SOFT_RO 0x0001
diff --git a/fs/ocfs2/ocfs2_fs.h b/fs/ocfs2/ocfs2_fs.h
index e9431e4a5e7..7638a38c32b 100644
--- a/fs/ocfs2/ocfs2_fs.h
+++ b/fs/ocfs2/ocfs2_fs.h
@@ -1202,7 +1202,7 @@ struct ocfs2_local_disk_dqinfo {
/* Header of one chunk of a quota file */
struct ocfs2_local_disk_chunk {
__le32 dqc_free; /* Number of free entries in the bitmap */
- u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
+ __u8 dqc_bitmap[0]; /* Bitmap of entries in the corresponding
* chunk of quota file */
};
@@ -1417,9 +1417,16 @@ static inline int ocfs2_fast_symlink_chars(int blocksize)
return blocksize - offsetof(struct ocfs2_dinode, id2.i_symlink);
}
-static inline int ocfs2_max_inline_data(int blocksize)
+static inline int ocfs2_max_inline_data_with_xattr(int blocksize,
+ struct ocfs2_dinode *di)
{
- return blocksize - offsetof(struct ocfs2_dinode, id2.i_data.id_data);
+ if (di && (di->i_dyn_features & OCFS2_INLINE_XATTR_FL))
+ return blocksize -
+ offsetof(struct ocfs2_dinode, id2.i_data.id_data) -
+ di->i_xattr_inline_size;
+ else
+ return blocksize -
+ offsetof(struct ocfs2_dinode, id2.i_data.id_data);
}
static inline int ocfs2_extent_recs_per_inode(int blocksize)
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 30967e3f5e4..8ae65c9c020 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -276,7 +276,7 @@ static void ocfs2_erase_refcount_tree_from_list(struct ocfs2_super *osb,
spin_unlock(&osb->osb_lock);
}
-void ocfs2_kref_remove_refcount_tree(struct kref *kref)
+static void ocfs2_kref_remove_refcount_tree(struct kref *kref)
{
struct ocfs2_refcount_tree *tree =
container_of(kref, struct ocfs2_refcount_tree, rf_getcnt);
@@ -524,23 +524,6 @@ out:
return ret;
}
-int ocfs2_lock_refcount_tree_by_inode(struct inode *inode, int rw,
- struct ocfs2_refcount_tree **ret_tree,
- struct buffer_head **ref_bh)
-{
- int ret;
- u64 ref_blkno;
-
- ret = ocfs2_get_refcount_block(inode, &ref_blkno);
- if (ret) {
- mlog_errno(ret);
- return ret;
- }
-
- return ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb), ref_blkno,
- rw, ret_tree, ref_bh);
-}
-
void ocfs2_unlock_refcount_tree(struct ocfs2_super *osb,
struct ocfs2_refcount_tree *tree, int rw)
{
@@ -969,6 +952,103 @@ out:
}
/*
+ * Find the end range for a leaf refcount block indicated by
+ * el->l_recs[index].e_blkno.
+ */
+static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
+ struct buffer_head *ref_root_bh,
+ struct ocfs2_extent_block *eb,
+ struct ocfs2_extent_list *el,
+ int index, u32 *cpos_end)
+{
+ int ret, i, subtree_root;
+ u32 cpos;
+ u64 blkno;
+ struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+ struct ocfs2_path *left_path = NULL, *right_path = NULL;
+ struct ocfs2_extent_tree et;
+ struct ocfs2_extent_list *tmp_el;
+
+ if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
+ /*
+ * We have a extent rec after index, so just use the e_cpos
+ * of the next extent rec.
+ */
+ *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
+ return 0;
+ }
+
+ if (!eb || (eb && !eb->h_next_leaf_blk)) {
+ /*
+ * We are the last extent rec, so any high cpos should
+ * be stored in this leaf refcount block.
+ */
+ *cpos_end = UINT_MAX;
+ return 0;
+ }
+
+ /*
+ * If the extent block isn't the last one, we have to find
+ * the subtree root between this extent block and the next
+ * leaf extent block and get the corresponding e_cpos from
+ * the subroot. Otherwise we may corrupt the b-tree.
+ */
+ ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
+
+ left_path = ocfs2_new_path_from_et(&et);
+ if (!left_path) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
+ ret = ocfs2_find_path(ci, left_path, cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ right_path = ocfs2_new_path_from_path(left_path);
+ if (!right_path) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ ret = ocfs2_find_path(ci, right_path, cpos);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ subtree_root = ocfs2_find_subtree_root(&et, left_path,
+ right_path);
+
+ tmp_el = left_path->p_node[subtree_root].el;
+ blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
+ for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
+ if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
+ *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
+ break;
+ }
+ }
+
+ BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
+
+out:
+ ocfs2_free_path(left_path);
+ ocfs2_free_path(right_path);
+ return ret;
+}
+
+/*
* Given a cpos and len, try to find the refcount record which contains cpos.
* 1. If cpos can be found in one refcount record, return the record.
* 2. If cpos can't be found, return a fake record which start from cpos
@@ -983,10 +1063,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
struct buffer_head **ret_bh)
{
int ret = 0, i, found;
- u32 low_cpos;
+ u32 low_cpos, uninitialized_var(cpos_end);
struct ocfs2_extent_list *el;
- struct ocfs2_extent_rec *tmp, *rec = NULL;
- struct ocfs2_extent_block *eb;
+ struct ocfs2_extent_rec *rec = NULL;
+ struct ocfs2_extent_block *eb = NULL;
struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
struct ocfs2_refcount_block *rb =
@@ -1034,12 +1114,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
}
}
- /* adjust len when we have ocfs2_extent_rec after it. */
- if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
- tmp = &el->l_recs[i+1];
+ if (found) {
+ ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
+ eb, el, i, &cpos_end);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
- if (le32_to_cpu(tmp->e_cpos) < cpos + len)
- len = le32_to_cpu(tmp->e_cpos) - cpos;
+ if (cpos_end < low_cpos + len)
+ len = cpos_end - low_cpos;
}
ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
@@ -1418,7 +1502,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh,
/* change old and new rl_used accordingly. */
le16_add_cpu(&rl->rl_used, -num_moved);
- new_rl->rl_used = cpu_to_le32(num_moved);
+ new_rl->rl_used = cpu_to_le16(num_moved);
sort(&rl->rl_recs, le16_to_cpu(rl->rl_used),
sizeof(struct ocfs2_refcount_rec),
@@ -1797,7 +1881,8 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
recs_need++;
/* If the leaf block don't have enough record, expand it. */
- if (le16_to_cpu(rf_list->rl_used) + recs_need > rf_list->rl_count) {
+ if (le16_to_cpu(rf_list->rl_used) + recs_need >
+ le16_to_cpu(rf_list->rl_count)) {
struct ocfs2_refcount_rec tmp_rec;
u64 cpos = le64_to_cpu(orig_rec->r_cpos);
len = le32_to_cpu(orig_rec->r_clusters);
@@ -1859,7 +1944,7 @@ static int ocfs2_split_refcount_rec(handle_t *handle,
memcpy(tail_rec, orig_rec, sizeof(struct ocfs2_refcount_rec));
le64_add_cpu(&tail_rec->r_cpos,
le32_to_cpu(tail_rec->r_clusters) - len);
- tail_rec->r_clusters = le32_to_cpu(len);
+ tail_rec->r_clusters = cpu_to_le32(len);
}
/*
@@ -2860,7 +2945,7 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
- map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+ map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
if (map_end > end)
map_end = end;
@@ -2872,8 +2957,12 @@ static int ocfs2_duplicate_clusters_by_page(handle_t *handle,
page = grab_cache_page(mapping, page_index);
- /* This page can't be dirtied before we CoW it out. */
- BUG_ON(PageDirty(page));
+ /*
+ * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
+ * can't be dirtied before we CoW it out.
+ */
+ if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
+ BUG_ON(PageDirty(page));
if (!PageUptodate(page)) {
ret = block_read_full_page(page, ocfs2_get_block);
@@ -3085,7 +3174,7 @@ static int ocfs2_cow_sync_writeback(struct super_block *sb,
while (offset < end) {
page_index = offset >> PAGE_CACHE_SHIFT;
- map_end = (page_index + 1) << PAGE_CACHE_SHIFT;
+ map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
if (map_end > end)
map_end = end;
@@ -3840,8 +3929,7 @@ static int ocfs2_add_refcounted_extent(struct inode *inode,
}
ret = ocfs2_insert_extent(handle, et, cpos,
- cpu_to_le64(ocfs2_clusters_to_blocks(inode->i_sb,
- p_cluster)),
+ ocfs2_clusters_to_blocks(inode->i_sb, p_cluster),
num_clusters, ext_flags, meta_ac);
if (ret) {
mlog_errno(ret);
@@ -4253,8 +4341,8 @@ static int ocfs2_user_path_parent(const char __user *path,
* @new_dentry: target dentry
* @preserve: if true, preserve all file attributes
*/
-int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
- struct dentry *new_dentry, bool preserve)
+static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
+ struct dentry *new_dentry, bool preserve)
{
struct inode *inode = old_dentry->d_inode;
int error;
diff --git a/fs/ocfs2/stack_o2cb.c b/fs/ocfs2/stack_o2cb.c
index e49c4105026..3038c92af49 100644
--- a/fs/ocfs2/stack_o2cb.c
+++ b/fs/ocfs2/stack_o2cb.c
@@ -277,7 +277,7 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
u32 dlm_key;
struct dlm_ctxt *dlm;
struct o2dlm_private *priv;
- struct dlm_protocol_version dlm_version;
+ struct dlm_protocol_version fs_version;
BUG_ON(conn == NULL);
BUG_ON(o2cb_stack.sp_proto == NULL);
@@ -304,18 +304,18 @@ static int o2cb_cluster_connect(struct ocfs2_cluster_connection *conn)
/* used by the dlm code to make message headers unique, each
* node in this domain must agree on this. */
dlm_key = crc32_le(0, conn->cc_name, conn->cc_namelen);
- dlm_version.pv_major = conn->cc_version.pv_major;
- dlm_version.pv_minor = conn->cc_version.pv_minor;
+ fs_version.pv_major = conn->cc_version.pv_major;
+ fs_version.pv_minor = conn->cc_version.pv_minor;
- dlm = dlm_register_domain(conn->cc_name, dlm_key, &dlm_version);
+ dlm = dlm_register_domain(conn->cc_name, dlm_key, &fs_version);
if (IS_ERR(dlm)) {
rc = PTR_ERR(dlm);
mlog_errno(rc);
goto out_free;
}
- conn->cc_version.pv_major = dlm_version.pv_major;
- conn->cc_version.pv_minor = dlm_version.pv_minor;
+ conn->cc_version.pv_major = fs_version.pv_major;
+ conn->cc_version.pv_minor = fs_version.pv_minor;
conn->cc_lockspace = dlm;
dlm_register_eviction_cb(dlm, &priv->op_eviction_cb);
diff --git a/fs/ocfs2/stack_user.c b/fs/ocfs2/stack_user.c
index ff4c798a563..da78a2a334f 100644
--- a/fs/ocfs2/stack_user.c
+++ b/fs/ocfs2/stack_user.c
@@ -814,7 +814,7 @@ static int fs_protocol_compare(struct ocfs2_protocol_version *existing,
static int user_cluster_connect(struct ocfs2_cluster_connection *conn)
{
dlm_lockspace_t *fsdlm;
- struct ocfs2_live_connection *control;
+ struct ocfs2_live_connection *uninitialized_var(control);
int rc = 0;
BUG_ON(conn == NULL);
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index 14f47d2bfe0..755cd49a5ef 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -100,6 +100,8 @@ struct mount_options
static int ocfs2_parse_options(struct super_block *sb, char *options,
struct mount_options *mopt,
int is_remount);
+static int ocfs2_check_set_options(struct super_block *sb,
+ struct mount_options *options);
static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt);
static void ocfs2_put_super(struct super_block *sb);
static int ocfs2_mount_volume(struct super_block *sb);
@@ -600,7 +602,8 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data)
lock_kernel();
- if (!ocfs2_parse_options(sb, data, &parsed_options, 1)) {
+ if (!ocfs2_parse_options(sb, data, &parsed_options, 1) ||
+ !ocfs2_check_set_options(sb, &parsed_options)) {
ret = -EINVAL;
goto out;
}
@@ -691,8 +694,6 @@ unlock_osb:
if (!ret) {
/* Only save off the new mount options in case of a successful
* remount. */
- if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
- parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
@@ -701,6 +702,10 @@ unlock_osb:
if (!ocfs2_is_hard_readonly(osb))
ocfs2_set_journal_params(osb);
+
+ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+ ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
+ MS_POSIXACL : 0);
}
out:
unlock_kernel();
@@ -1011,31 +1016,16 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
brelse(bh);
bh = NULL;
- if (!(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_XATTR))
- parsed_options.mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
-
+ if (!ocfs2_check_set_options(sb, &parsed_options)) {
+ status = -EINVAL;
+ goto read_super_error;
+ }
osb->s_mount_opt = parsed_options.mount_opt;
osb->s_atime_quantum = parsed_options.atime_quantum;
osb->preferred_slot = parsed_options.slot;
osb->osb_commit_interval = parsed_options.commit_interval;
osb->local_alloc_default_bits = ocfs2_megabytes_to_clusters(sb, parsed_options.localalloc_opt);
osb->local_alloc_bits = osb->local_alloc_default_bits;
- if (osb->s_mount_opt & OCFS2_MOUNT_USRQUOTA &&
- !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
- OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
- status = -EINVAL;
- mlog(ML_ERROR, "User quotas were requested, but this "
- "filesystem does not have the feature enabled.\n");
- goto read_super_error;
- }
- if (osb->s_mount_opt & OCFS2_MOUNT_GRPQUOTA &&
- !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
- OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
- status = -EINVAL;
- mlog(ML_ERROR, "Group quotas were requested, but this "
- "filesystem does not have the feature enabled.\n");
- goto read_super_error;
- }
status = ocfs2_verify_userspace_stack(osb, &parsed_options);
if (status)
@@ -1072,7 +1062,7 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
"file system, but write access is "
"unavailable.\n");
else
- mlog_errno(status);
+ mlog_errno(status);
goto read_super_error;
}
@@ -1245,6 +1235,40 @@ static struct file_system_type ocfs2_fs_type = {
.next = NULL
};
+static int ocfs2_check_set_options(struct super_block *sb,
+ struct mount_options *options)
+{
+ if (options->mount_opt & OCFS2_MOUNT_USRQUOTA &&
+ !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
+ mlog(ML_ERROR, "User quotas were requested, but this "
+ "filesystem does not have the feature enabled.\n");
+ return 0;
+ }
+ if (options->mount_opt & OCFS2_MOUNT_GRPQUOTA &&
+ !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
+ OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
+ mlog(ML_ERROR, "Group quotas were requested, but this "
+ "filesystem does not have the feature enabled.\n");
+ return 0;
+ }
+ if (options->mount_opt & OCFS2_MOUNT_POSIX_ACL &&
+ !OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR)) {
+ mlog(ML_ERROR, "ACL support requested but extended attributes "
+ "feature is not enabled\n");
+ return 0;
+ }
+ /* No ACL setting specified? Use XATTR feature... */
+ if (!(options->mount_opt & (OCFS2_MOUNT_POSIX_ACL |
+ OCFS2_MOUNT_NO_POSIX_ACL))) {
+ if (OCFS2_HAS_INCOMPAT_FEATURE(sb, OCFS2_FEATURE_INCOMPAT_XATTR))
+ options->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
+ else
+ options->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
+ }
+ return 1;
+}
+
static int ocfs2_parse_options(struct super_block *sb,
char *options,
struct mount_options *mopt,
@@ -1392,40 +1416,19 @@ static int ocfs2_parse_options(struct super_block *sb,
mopt->mount_opt |= OCFS2_MOUNT_INODE64;
break;
case Opt_usrquota:
- /* We check only on remount, otherwise features
- * aren't yet initialized. */
- if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
- OCFS2_FEATURE_RO_COMPAT_USRQUOTA)) {
- mlog(ML_ERROR, "User quota requested but "
- "filesystem feature is not set\n");
- status = 0;
- goto bail;
- }
mopt->mount_opt |= OCFS2_MOUNT_USRQUOTA;
break;
case Opt_grpquota:
- if (is_remount && !OCFS2_HAS_RO_COMPAT_FEATURE(sb,
- OCFS2_FEATURE_RO_COMPAT_GRPQUOTA)) {
- mlog(ML_ERROR, "Group quota requested but "
- "filesystem feature is not set\n");
- status = 0;
- goto bail;
- }
mopt->mount_opt |= OCFS2_MOUNT_GRPQUOTA;
break;
-#ifdef CONFIG_OCFS2_FS_POSIX_ACL
case Opt_acl:
mopt->mount_opt |= OCFS2_MOUNT_POSIX_ACL;
+ mopt->mount_opt &= ~OCFS2_MOUNT_NO_POSIX_ACL;
break;
case Opt_noacl:
+ mopt->mount_opt |= OCFS2_MOUNT_NO_POSIX_ACL;
mopt->mount_opt &= ~OCFS2_MOUNT_POSIX_ACL;
break;
-#else
- case Opt_acl:
- case Opt_noacl:
- printk(KERN_INFO "ocfs2 (no)acl options not supported\n");
- break;
-#endif
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
@@ -1502,12 +1505,10 @@ static int ocfs2_show_options(struct seq_file *s, struct vfsmount *mnt)
if (opts & OCFS2_MOUNT_INODE64)
seq_printf(s, ",inode64");
-#ifdef CONFIG_OCFS2_FS_POSIX_ACL
if (opts & OCFS2_MOUNT_POSIX_ACL)
seq_printf(s, ",acl");
else
seq_printf(s, ",noacl");
-#endif
return 0;
}
diff --git a/fs/ocfs2/symlink.c b/fs/ocfs2/symlink.c
index e3421030a69..32499d213fc 100644
--- a/fs/ocfs2/symlink.c
+++ b/fs/ocfs2/symlink.c
@@ -137,20 +137,20 @@ static void *ocfs2_fast_follow_link(struct dentry *dentry,
}
memcpy(link, target, len);
- nd_set_link(nd, link);
bail:
+ nd_set_link(nd, status ? ERR_PTR(status) : link);
brelse(bh);
mlog_exit(status);
- return status ? ERR_PTR(status) : link;
+ return NULL;
}
static void ocfs2_fast_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie)
{
- char *link = cookie;
-
- kfree(link);
+ char *link = nd_get_link(nd);
+ if (!IS_ERR(link))
+ kfree(link);
}
const struct inode_operations ocfs2_symlink_inode_operations = {
@@ -163,6 +163,7 @@ const struct inode_operations ocfs2_symlink_inode_operations = {
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
+ .fiemap = ocfs2_fiemap,
};
const struct inode_operations ocfs2_fast_symlink_inode_operations = {
.readlink = ocfs2_readlink,
@@ -174,4 +175,5 @@ const struct inode_operations ocfs2_fast_symlink_inode_operations = {
.getxattr = generic_getxattr,
.listxattr = ocfs2_listxattr,
.removexattr = generic_removexattr,
+ .fiemap = ocfs2_fiemap,
};
diff --git a/fs/ocfs2/uptodate.c b/fs/ocfs2/uptodate.c
index c61369342a2..a0a120e82b9 100644
--- a/fs/ocfs2/uptodate.c
+++ b/fs/ocfs2/uptodate.c
@@ -267,8 +267,8 @@ static int ocfs2_buffer_cached(struct ocfs2_caching_info *ci,
}
/* Warning: even if it returns true, this does *not* guarantee that
- * the block is stored in our inode metadata cache.
- *
+ * the block is stored in our inode metadata cache.
+ *
* This can be called under lock_buffer()
*/
int ocfs2_buffer_uptodate(struct ocfs2_caching_info *ci,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index fe3419068df..8fc6fb071c6 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -98,10 +98,8 @@ static struct ocfs2_xattr_def_value_root def_xv = {
struct xattr_handler *ocfs2_xattr_handlers[] = {
&ocfs2_xattr_user_handler,
-#ifdef CONFIG_OCFS2_FS_POSIX_ACL
&ocfs2_xattr_acl_access_handler,
&ocfs2_xattr_acl_default_handler,
-#endif
&ocfs2_xattr_trusted_handler,
&ocfs2_xattr_security_handler,
NULL
@@ -109,12 +107,10 @@ struct xattr_handler *ocfs2_xattr_handlers[] = {
static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
[OCFS2_XATTR_INDEX_USER] = &ocfs2_xattr_user_handler,
-#ifdef CONFIG_OCFS2_FS_POSIX_ACL
[OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
= &ocfs2_xattr_acl_access_handler,
[OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
= &ocfs2_xattr_acl_default_handler,
-#endif
[OCFS2_XATTR_INDEX_TRUSTED] = &ocfs2_xattr_trusted_handler,
[OCFS2_XATTR_INDEX_SECURITY] = &ocfs2_xattr_security_handler,
};
@@ -205,8 +201,6 @@ static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
int offset,
struct ocfs2_xattr_value_root **xv,
struct buffer_head **bh);
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags);
static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
{
@@ -6066,7 +6060,7 @@ static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
* to the extent block, so just calculate a maximum record num.
*/
if (!xv->xr_list.l_tree_depth)
- *num_recs += xv->xr_list.l_next_free_rec;
+ *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
else
*num_recs += ocfs2_clusters_for_bytes(sb,
XATTR_SIZE_MAX);
@@ -6978,9 +6972,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
ret = ocfs2_init_security_get(inode, dir, &si);
if (!ret) {
- ret = ocfs2_xattr_security_set(inode, si.name,
- si.value, si.value_len,
- XATTR_CREATE);
+ ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
+ si.name, si.value, si.value_len,
+ XATTR_CREATE);
if (ret) {
mlog_errno(ret);
goto leave;
@@ -7008,9 +7002,9 @@ leave:
/*
* 'security' attributes support
*/
-static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
size_t list_size, const char *name,
- size_t name_len)
+ size_t name_len, int type)
{
const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
@@ -7023,23 +7017,23 @@ static size_t ocfs2_xattr_security_list(struct inode *inode, char *list,
return total_len;
}
-static int ocfs2_xattr_security_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_SECURITY, name,
- buffer, size);
+ return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+ name, buffer, size);
}
-static int ocfs2_xattr_security_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY, name, value,
- size, flags);
+ return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
+ name, value, size, flags);
}
int ocfs2_init_security_get(struct inode *inode,
@@ -7076,9 +7070,9 @@ struct xattr_handler ocfs2_xattr_security_handler = {
/*
* 'trusted' attributes support
*/
-static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
size_t list_size, const char *name,
- size_t name_len)
+ size_t name_len, int type)
{
const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
@@ -7091,23 +7085,23 @@ static size_t ocfs2_xattr_trusted_list(struct inode *inode, char *list,
return total_len;
}
-static int ocfs2_xattr_trusted_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_TRUSTED, name,
- buffer, size);
+ return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+ name, buffer, size);
}
-static int ocfs2_xattr_trusted_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
if (strcmp(name, "") == 0)
return -EINVAL;
- return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_TRUSTED, name, value,
- size, flags);
+ return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
+ name, value, size, flags);
}
struct xattr_handler ocfs2_xattr_trusted_handler = {
@@ -7120,13 +7114,13 @@ struct xattr_handler ocfs2_xattr_trusted_handler = {
/*
* 'user' attributes support
*/
-static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
+static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
size_t list_size, const char *name,
- size_t name_len)
+ size_t name_len, int type)
{
const size_t prefix_len = XATTR_USER_PREFIX_LEN;
const size_t total_len = prefix_len + name_len + 1;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return 0;
@@ -7139,31 +7133,31 @@ static size_t ocfs2_xattr_user_list(struct inode *inode, char *list,
return total_len;
}
-static int ocfs2_xattr_user_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
+static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
+ void *buffer, size_t size, int type)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
if (strcmp(name, "") == 0)
return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
- return ocfs2_xattr_get(inode, OCFS2_XATTR_INDEX_USER, name,
+ return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
buffer, size);
}
-static int ocfs2_xattr_user_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
if (strcmp(name, "") == 0)
return -EINVAL;
if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
return -EOPNOTSUPP;
- return ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_USER, name, value,
- size, flags);
+ return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
+ name, value, size, flags);
}
struct xattr_handler ocfs2_xattr_user_handler = {
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index 08e36389f56..abd72a47f52 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -40,10 +40,8 @@ struct ocfs2_security_xattr_info {
extern struct xattr_handler ocfs2_xattr_user_handler;
extern struct xattr_handler ocfs2_xattr_trusted_handler;
extern struct xattr_handler ocfs2_xattr_security_handler;
-#ifdef CONFIG_OCFS2_FS_POSIX_ACL
extern struct xattr_handler ocfs2_xattr_acl_access_handler;
extern struct xattr_handler ocfs2_xattr_acl_default_handler;
-#endif
extern struct xattr_handler *ocfs2_xattr_handlers[];
ssize_t ocfs2_listxattr(struct dentry *, char *, size_t);
diff --git a/fs/open.c b/fs/open.c
index b4b31d277f3..040cef72bc0 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,9 @@
#include <linux/audit.h>
#include <linux/falloc.h>
#include <linux/fs_struct.h>
+#include <linux/ima.h>
+
+#include "internal.h"
int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
{
@@ -818,15 +821,14 @@ static inline int __get_file_write_access(struct inode *inode,
}
static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
- int flags, struct file *f,
+ struct file *f,
int (*open)(struct inode *, struct file *),
const struct cred *cred)
{
struct inode *inode;
int error;
- f->f_flags = flags;
- f->f_mode = (__force fmode_t)((flags+1) & O_ACCMODE) | FMODE_LSEEK |
+ f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
FMODE_PREAD | FMODE_PWRITE;
inode = dentry->d_inode;
if (f->f_mode & FMODE_WRITE) {
@@ -855,6 +857,7 @@ static struct file *__dentry_open(struct dentry *dentry, struct vfsmount *mnt,
if (error)
goto cleanup_all;
}
+ ima_counts_get(f);
f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
@@ -926,7 +929,6 @@ struct file *lookup_instantiate_filp(struct nameidata *nd, struct dentry *dentry
if (IS_ERR(dentry))
goto out_err;
nd->intent.open.file = __dentry_open(dget(dentry), mntget(nd->path.mnt),
- nd->intent.open.flags - 1,
nd->intent.open.file,
open, cred);
out:
@@ -945,7 +947,7 @@ EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
*
* Note that this function destroys the original nameidata
*/
-struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+struct file *nameidata_to_filp(struct nameidata *nd)
{
const struct cred *cred = current_cred();
struct file *filp;
@@ -954,7 +956,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
filp = nd->intent.open.file;
/* Has the filesystem initialised the file for us? */
if (filp->f_path.dentry == NULL)
- filp = __dentry_open(nd->path.dentry, nd->path.mnt, flags, filp,
+ filp = __dentry_open(nd->path.dentry, nd->path.mnt, filp,
NULL, cred);
else
path_put(&nd->path);
@@ -993,7 +995,8 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
return ERR_PTR(error);
}
- return __dentry_open(dentry, mnt, flags, f, NULL, cred);
+ f->f_flags = flags;
+ return __dentry_open(dentry, mnt, f, NULL, cred);
}
EXPORT_SYMBOL(dentry_open);
diff --git a/fs/pipe.c b/fs/pipe.c
index ae17d026aaa..37ba29ff315 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -906,17 +906,6 @@ void free_pipe_info(struct inode *inode)
}
static struct vfsmount *pipe_mnt __read_mostly;
-static int pipefs_delete_dentry(struct dentry *dentry)
-{
- /*
- * At creation time, we pretended this dentry was hashed
- * (by clearing DCACHE_UNHASHED bit in d_flags)
- * At delete time, we restore the truth : not hashed.
- * (so that dput() can proceed correctly)
- */
- dentry->d_flags |= DCACHE_UNHASHED;
- return 0;
-}
/*
* pipefs_dname() is called from d_path().
@@ -928,7 +917,6 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
}
static const struct dentry_operations pipefs_dentry_operations = {
- .d_delete = pipefs_delete_dentry,
.d_dname = pipefs_dname,
};
@@ -974,7 +962,7 @@ struct file *create_write_pipe(int flags)
int err;
struct inode *inode;
struct file *f;
- struct dentry *dentry;
+ struct path path;
struct qstr name = { .name = "" };
err = -ENFILE;
@@ -983,21 +971,16 @@ struct file *create_write_pipe(int flags)
goto err;
err = -ENOMEM;
- dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
- if (!dentry)
+ path.dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &name);
+ if (!path.dentry)
goto err_inode;
+ path.mnt = mntget(pipe_mnt);
- dentry->d_op = &pipefs_dentry_operations;
- /*
- * We dont want to publish this dentry into global dentry hash table.
- * We pretend dentry is already hashed, by unsetting DCACHE_UNHASHED
- * This permits a working /proc/$pid/fd/XXX on pipes
- */
- dentry->d_flags &= ~DCACHE_UNHASHED;
- d_instantiate(dentry, inode);
+ path.dentry->d_op = &pipefs_dentry_operations;
+ d_instantiate(path.dentry, inode);
err = -ENFILE;
- f = alloc_file(pipe_mnt, dentry, FMODE_WRITE, &write_pipefifo_fops);
+ f = alloc_file(&path, FMODE_WRITE, &write_pipefifo_fops);
if (!f)
goto err_dentry;
f->f_mapping = inode->i_mapping;
@@ -1009,7 +992,7 @@ struct file *create_write_pipe(int flags)
err_dentry:
free_pipe_info(inode);
- dput(dentry);
+ path_put(&path);
return ERR_PTR(err);
err_inode:
@@ -1028,20 +1011,14 @@ void free_write_pipe(struct file *f)
struct file *create_read_pipe(struct file *wrf, int flags)
{
- struct file *f = get_empty_filp();
+ /* Grab pipe from the writer */
+ struct file *f = alloc_file(&wrf->f_path, FMODE_READ,
+ &read_pipefifo_fops);
if (!f)
return ERR_PTR(-ENFILE);
- /* Grab pipe from the writer */
- f->f_path = wrf->f_path;
path_get(&wrf->f_path);
- f->f_mapping = wrf->f_path.dentry->d_inode->i_mapping;
-
- f->f_pos = 0;
f->f_flags = O_RDONLY | (flags & O_NONBLOCK);
- f->f_op = &read_pipefifo_fops;
- f->f_mode = FMODE_READ;
- f->f_version = 0;
return f;
}
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 4badde179b1..13b5d070817 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -134,13 +134,16 @@ static inline void task_name(struct seq_file *m, struct task_struct *p)
* simple bit tests.
*/
static const char *task_state_array[] = {
- "R (running)", /* 0 */
- "S (sleeping)", /* 1 */
- "D (disk sleep)", /* 2 */
- "T (stopped)", /* 4 */
- "T (tracing stop)", /* 8 */
- "Z (zombie)", /* 16 */
- "X (dead)" /* 32 */
+ "R (running)", /* 0 */
+ "S (sleeping)", /* 1 */
+ "D (disk sleep)", /* 2 */
+ "T (stopped)", /* 4 */
+ "t (tracing stop)", /* 8 */
+ "Z (zombie)", /* 16 */
+ "X (dead)", /* 32 */
+ "x (dead)", /* 64 */
+ "K (wakekill)", /* 128 */
+ "W (waking)", /* 256 */
};
static inline const char *get_task_state(struct task_struct *tsk)
@@ -148,6 +151,8 @@ static inline const char *get_task_state(struct task_struct *tsk)
unsigned int state = (tsk->state & TASK_REPORT) | tsk->exit_state;
const char **p = &task_state_array[0];
+ BUILD_BUG_ON(1 + ilog2(TASK_STATE_MAX) != ARRAY_SIZE(task_state_array));
+
while (state) {
p++;
state >>= 1;
@@ -322,94 +327,6 @@ static inline void task_context_switch_counts(struct seq_file *m,
p->nivcsw);
}
-#ifdef CONFIG_MMU
-
-struct stack_stats {
- struct vm_area_struct *vma;
- unsigned long startpage;
- unsigned long usage;
-};
-
-static int stack_usage_pte_range(pmd_t *pmd, unsigned long addr,
- unsigned long end, struct mm_walk *walk)
-{
- struct stack_stats *ss = walk->private;
- struct vm_area_struct *vma = ss->vma;
- pte_t *pte, ptent;
- spinlock_t *ptl;
- int ret = 0;
-
- pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
- for (; addr != end; pte++, addr += PAGE_SIZE) {
- ptent = *pte;
-
-#ifdef CONFIG_STACK_GROWSUP
- if (pte_present(ptent) || is_swap_pte(ptent))
- ss->usage = addr - ss->startpage + PAGE_SIZE;
-#else
- if (pte_present(ptent) || is_swap_pte(ptent)) {
- ss->usage = ss->startpage - addr + PAGE_SIZE;
- pte++;
- ret = 1;
- break;
- }
-#endif
- }
- pte_unmap_unlock(pte - 1, ptl);
- cond_resched();
- return ret;
-}
-
-static inline unsigned long get_stack_usage_in_bytes(struct vm_area_struct *vma,
- struct task_struct *task)
-{
- struct stack_stats ss;
- struct mm_walk stack_walk = {
- .pmd_entry = stack_usage_pte_range,
- .mm = vma->vm_mm,
- .private = &ss,
- };
-
- if (!vma->vm_mm || is_vm_hugetlb_page(vma))
- return 0;
-
- ss.vma = vma;
- ss.startpage = task->stack_start & PAGE_MASK;
- ss.usage = 0;
-
-#ifdef CONFIG_STACK_GROWSUP
- walk_page_range(KSTK_ESP(task) & PAGE_MASK, vma->vm_end,
- &stack_walk);
-#else
- walk_page_range(vma->vm_start, (KSTK_ESP(task) & PAGE_MASK) + PAGE_SIZE,
- &stack_walk);
-#endif
- return ss.usage;
-}
-
-static inline void task_show_stack_usage(struct seq_file *m,
- struct task_struct *task)
-{
- struct vm_area_struct *vma;
- struct mm_struct *mm = get_task_mm(task);
-
- if (mm) {
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, task->stack_start);
- if (vma)
- seq_printf(m, "Stack usage:\t%lu kB\n",
- get_stack_usage_in_bytes(vma, task) >> 10);
-
- up_read(&mm->mmap_sem);
- mmput(mm);
- }
-}
-#else
-static void task_show_stack_usage(struct seq_file *m, struct task_struct *task)
-{
-}
-#endif /* CONFIG_MMU */
-
static void task_cpus_allowed(struct seq_file *m, struct task_struct *task)
{
seq_printf(m, "Cpus_allowed:\t");
@@ -440,7 +357,6 @@ int proc_pid_status(struct seq_file *m, struct pid_namespace *ns,
task_show_regs(m, task);
#endif
task_context_switch_counts(m, task);
- task_show_stack_usage(m, task);
return 0;
}
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 18d5cc62d8e..58324c29916 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,7 +1419,6 @@ static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
goto out;
error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
- nd->last_type = LAST_BIND;
out:
return ERR_PTR(error);
}
@@ -2370,16 +2369,30 @@ static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct pid_namespace *ns = dentry->d_sb->s_fs_info;
pid_t tgid = task_tgid_nr_ns(current, ns);
- char tmp[PROC_NUMBUF];
- if (!tgid)
- return ERR_PTR(-ENOENT);
- sprintf(tmp, "%d", task_tgid_nr_ns(current, ns));
- return ERR_PTR(vfs_follow_link(nd,tmp));
+ char *name = ERR_PTR(-ENOENT);
+ if (tgid) {
+ name = __getname();
+ if (!name)
+ name = ERR_PTR(-ENOMEM);
+ else
+ sprintf(name, "%d", tgid);
+ }
+ nd_set_link(nd, name);
+ return NULL;
+}
+
+static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd,
+ void *cookie)
+{
+ char *s = nd_get_link(nd);
+ if (!IS_ERR(s))
+ __putname(s);
}
static const struct inode_operations proc_self_inode_operations = {
.readlink = proc_self_readlink,
.follow_link = proc_self_follow_link,
+ .put_link = proc_self_put_link,
};
/*
diff --git a/fs/proc/page.c b/fs/proc/page.c
index 5033ce0d254..180cf5a0bd6 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -8,6 +8,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/hugetlb.h>
+#include <linux/kernel-page-flags.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -71,52 +72,12 @@ static const struct file_operations proc_kpagecount_operations = {
* physical page flags.
*/
-/* These macros are used to decouple internal flags from exported ones */
-
-#define KPF_LOCKED 0
-#define KPF_ERROR 1
-#define KPF_REFERENCED 2
-#define KPF_UPTODATE 3
-#define KPF_DIRTY 4
-#define KPF_LRU 5
-#define KPF_ACTIVE 6
-#define KPF_SLAB 7
-#define KPF_WRITEBACK 8
-#define KPF_RECLAIM 9
-#define KPF_BUDDY 10
-
-/* 11-20: new additions in 2.6.31 */
-#define KPF_MMAP 11
-#define KPF_ANON 12
-#define KPF_SWAPCACHE 13
-#define KPF_SWAPBACKED 14
-#define KPF_COMPOUND_HEAD 15
-#define KPF_COMPOUND_TAIL 16
-#define KPF_HUGE 17
-#define KPF_UNEVICTABLE 18
-#define KPF_HWPOISON 19
-#define KPF_NOPAGE 20
-
-#define KPF_KSM 21
-
-/* kernel hacking assistances
- * WARNING: subject to change, never rely on them!
- */
-#define KPF_RESERVED 32
-#define KPF_MLOCKED 33
-#define KPF_MAPPEDTODISK 34
-#define KPF_PRIVATE 35
-#define KPF_PRIVATE_2 36
-#define KPF_OWNER_PRIVATE 37
-#define KPF_ARCH 38
-#define KPF_UNCACHED 39
-
static inline u64 kpf_copy_bit(u64 kflags, int ubit, int kbit)
{
return ((kflags >> kbit) & 1) << ubit;
}
-static u64 get_uflags(struct page *page)
+u64 stable_page_flags(struct page *page)
{
u64 k;
u64 u;
@@ -219,7 +180,7 @@ static ssize_t kpageflags_read(struct file *file, char __user *buf,
else
ppage = NULL;
- if (put_user(get_uflags(ppage), out)) {
+ if (put_user(stable_page_flags(ppage), out)) {
ret = -EFAULT;
break;
}
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 47c03f4336b..f277c4a111c 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -361,12 +361,11 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (!pte_present(ptent))
continue;
- mss->resident += PAGE_SIZE;
-
page = vm_normal_page(vma, addr, ptent);
if (!page)
continue;
+ mss->resident += PAGE_SIZE;
/* Accumulate the size in pages that have been accessed. */
if (pte_young(ptent) || PageReferenced(page))
mss->referenced += PAGE_SIZE;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index cd6bb9a33c1..3fc62b097be 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -323,6 +323,30 @@ int dquot_mark_dquot_dirty(struct dquot *dquot)
}
EXPORT_SYMBOL(dquot_mark_dquot_dirty);
+/* Dirtify all the dquots - this can block when journalling */
+static inline int mark_all_dquot_dirty(struct dquot * const *dquot)
+{
+ int ret, err, cnt;
+
+ ret = err = 0;
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+ if (dquot[cnt])
+ /* Even in case of error we have to continue */
+ ret = mark_dquot_dirty(dquot[cnt]);
+ if (!err)
+ err = ret;
+ }
+ return err;
+}
+
+static inline void dqput_all(struct dquot **dquot)
+{
+ unsigned int cnt;
+
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ dqput(dquot[cnt]);
+}
+
/* This function needs dq_list_lock */
static inline int clear_dquot_dirty(struct dquot *dquot)
{
@@ -1268,8 +1292,7 @@ int dquot_initialize(struct inode *inode, int type)
out_err:
up_write(&sb_dqopt(sb)->dqptr_sem);
/* Drop unused references */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- dqput(got[cnt]);
+ dqput_all(got);
return ret;
}
EXPORT_SYMBOL(dquot_initialize);
@@ -1288,9 +1311,7 @@ int dquot_drop(struct inode *inode)
inode->i_dquot[cnt] = NULL;
}
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- dqput(put[cnt]);
+ dqput_all(put);
return 0;
}
EXPORT_SYMBOL(dquot_drop);
@@ -1319,6 +1340,70 @@ void vfs_dq_drop(struct inode *inode)
EXPORT_SYMBOL(vfs_dq_drop);
/*
+ * inode_reserved_space is managed internally by quota, and protected by
+ * i_lock similar to i_blocks+i_bytes.
+ */
+static qsize_t *inode_reserved_space(struct inode * inode)
+{
+ /* Filesystem must explicitly define it's own method in order to use
+ * quota reservation interface */
+ BUG_ON(!inode->i_sb->dq_op->get_reserved_space);
+ return inode->i_sb->dq_op->get_reserved_space(inode);
+}
+
+static void inode_add_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) += number;
+ spin_unlock(&inode->i_lock);
+}
+
+
+static void inode_claim_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ __inode_add_bytes(inode, number);
+ spin_unlock(&inode->i_lock);
+}
+
+static void inode_sub_rsv_space(struct inode *inode, qsize_t number)
+{
+ spin_lock(&inode->i_lock);
+ *inode_reserved_space(inode) -= number;
+ spin_unlock(&inode->i_lock);
+}
+
+static qsize_t inode_get_rsv_space(struct inode *inode)
+{
+ qsize_t ret;
+
+ if (!inode->i_sb->dq_op->get_reserved_space)
+ return 0;
+ spin_lock(&inode->i_lock);
+ ret = *inode_reserved_space(inode);
+ spin_unlock(&inode->i_lock);
+ return ret;
+}
+
+static void inode_incr_space(struct inode *inode, qsize_t number,
+ int reserve)
+{
+ if (reserve)
+ inode_add_rsv_space(inode, number);
+ else
+ inode_add_bytes(inode, number);
+}
+
+static void inode_decr_space(struct inode *inode, qsize_t number, int reserve)
+{
+ if (reserve)
+ inode_sub_rsv_space(inode, number);
+ else
+ inode_sub_bytes(inode, number);
+}
+
+/*
* Following four functions update i_blocks+i_bytes fields and
* quota information (together with appropriate checks)
* NOTE: We absolutely rely on the fact that caller dirties
@@ -1336,6 +1421,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
int cnt, ret = QUOTA_OK;
char warntype[MAXQUOTAS];
+ /*
+ * First test before acquiring mutex - solves deadlocks when we
+ * re-enter the quota code and are already holding the mutex
+ */
+ if (IS_NOQUOTA(inode)) {
+ inode_incr_space(inode, number, reserve);
+ goto out;
+ }
+
+ down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+ if (IS_NOQUOTA(inode)) {
+ inode_incr_space(inode, number, reserve);
+ goto out_unlock;
+ }
+
for (cnt = 0; cnt < MAXQUOTAS; cnt++)
warntype[cnt] = QUOTA_NL_NOWARN;
@@ -1346,7 +1446,8 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
== NO_QUOTA) {
ret = NO_QUOTA;
- goto out_unlock;
+ spin_unlock(&dq_data_lock);
+ goto out_flush_warn;
}
}
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1357,64 +1458,29 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
else
dquot_incr_space(inode->i_dquot[cnt], number);
}
- if (!reserve)
- inode_add_bytes(inode, number);
-out_unlock:
+ inode_incr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
+
+ if (reserve)
+ goto out_flush_warn;
+ mark_all_dquot_dirty(inode->i_dquot);
+out_flush_warn:
flush_warnings(inode->i_dquot, warntype);
+out_unlock:
+ up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
+out:
return ret;
}
int dquot_alloc_space(struct inode *inode, qsize_t number, int warn)
{
- int cnt, ret = QUOTA_OK;
-
- /*
- * First test before acquiring mutex - solves deadlocks when we
- * re-enter the quota code and are already holding the mutex
- */
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out;
- }
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
- goto out_unlock;
- }
-
- ret = __dquot_alloc_space(inode, number, warn, 0);
- if (ret == NO_QUOTA)
- goto out_unlock;
-
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return ret;
+ return __dquot_alloc_space(inode, number, warn, 0);
}
EXPORT_SYMBOL(dquot_alloc_space);
int dquot_reserve_space(struct inode *inode, qsize_t number, int warn)
{
- int ret = QUOTA_OK;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- ret = __dquot_alloc_space(inode, number, warn, 1);
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return ret;
+ return __dquot_alloc_space(inode, number, warn, 1);
}
EXPORT_SYMBOL(dquot_reserve_space);
@@ -1455,10 +1521,7 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
warn_put_all:
spin_unlock(&dq_data_lock);
if (ret == QUOTA_OK)
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+ mark_all_dquot_dirty(inode->i_dquot);
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return ret;
@@ -1471,14 +1534,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
int ret = QUOTA_OK;
if (IS_NOQUOTA(inode)) {
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
goto out;
}
down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
if (IS_NOQUOTA(inode)) {
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
goto out;
}
@@ -1490,12 +1553,9 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
number);
}
/* Update inode bytes */
- inode_add_bytes(inode, number);
+ inode_claim_rsv_space(inode, number);
spin_unlock(&dq_data_lock);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+ mark_all_dquot_dirty(inode->i_dquot);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
out:
return ret;
@@ -1503,38 +1563,9 @@ out:
EXPORT_SYMBOL(dquot_claim_space);
/*
- * Release reserved quota space
- */
-void dquot_release_reserved_space(struct inode *inode, qsize_t number)
-{
- int cnt;
-
- if (IS_NOQUOTA(inode))
- goto out;
-
- down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
- if (IS_NOQUOTA(inode))
- goto out_unlock;
-
- spin_lock(&dq_data_lock);
- /* Release reserved dquots */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (inode->i_dquot[cnt])
- dquot_free_reserved_space(inode->i_dquot[cnt], number);
- }
- spin_unlock(&dq_data_lock);
-
-out_unlock:
- up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
-out:
- return;
-}
-EXPORT_SYMBOL(dquot_release_reserved_space);
-
-/*
* This operation can block, but only after everything is updated
*/
-int dquot_free_space(struct inode *inode, qsize_t number)
+int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
{
unsigned int cnt;
char warntype[MAXQUOTAS];
@@ -1543,7 +1574,7 @@ int dquot_free_space(struct inode *inode, qsize_t number)
* re-enter the quota code and are already holding the mutex */
if (IS_NOQUOTA(inode)) {
out_sub:
- inode_sub_bytes(inode, number);
+ inode_decr_space(inode, number, reserve);
return QUOTA_OK;
}
@@ -1558,21 +1589,40 @@ out_sub:
if (!inode->i_dquot[cnt])
continue;
warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
- dquot_decr_space(inode->i_dquot[cnt], number);
+ if (reserve)
+ dquot_free_reserved_space(inode->i_dquot[cnt], number);
+ else
+ dquot_decr_space(inode->i_dquot[cnt], number);
}
- inode_sub_bytes(inode, number);
+ inode_decr_space(inode, number, reserve);
spin_unlock(&dq_data_lock);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+
+ if (reserve)
+ goto out_unlock;
+ mark_all_dquot_dirty(inode->i_dquot);
+out_unlock:
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
}
+
+int dquot_free_space(struct inode *inode, qsize_t number)
+{
+ return __dquot_free_space(inode, number, 0);
+}
EXPORT_SYMBOL(dquot_free_space);
/*
+ * Release reserved quota space
+ */
+void dquot_release_reserved_space(struct inode *inode, qsize_t number)
+{
+ __dquot_free_space(inode, number, 1);
+
+}
+EXPORT_SYMBOL(dquot_release_reserved_space);
+
+/*
* This operation can block, but only after everything is updated
*/
int dquot_free_inode(const struct inode *inode, qsize_t number)
@@ -1599,10 +1649,7 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
dquot_decr_inodes(inode->i_dquot[cnt], number);
}
spin_unlock(&dq_data_lock);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++)
- if (inode->i_dquot[cnt])
- mark_dquot_dirty(inode->i_dquot[cnt]);
+ mark_all_dquot_dirty(inode->i_dquot);
flush_warnings(inode->i_dquot, warntype);
up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
return QUOTA_OK;
@@ -1610,19 +1657,6 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
EXPORT_SYMBOL(dquot_free_inode);
/*
- * call back function, get reserved quota space from underlying fs
- */
-qsize_t dquot_get_reserved_space(struct inode *inode)
-{
- qsize_t reserved_space = 0;
-
- if (sb_any_quota_active(inode->i_sb) &&
- inode->i_sb->dq_op->get_reserved_space)
- reserved_space = inode->i_sb->dq_op->get_reserved_space(inode);
- return reserved_space;
-}
-
-/*
* Transfer the number of inode and blocks from one diskquota to an other.
*
* This operation can block, but only after everything is updated
@@ -1665,7 +1699,7 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
}
spin_lock(&dq_data_lock);
cur_space = inode_get_bytes(inode);
- rsv_space = dquot_get_reserved_space(inode);
+ rsv_space = inode_get_rsv_space(inode);
space = cur_space + rsv_space;
/* Build the transfer_from list and check the limits */
for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1709,25 +1743,18 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
spin_unlock(&dq_data_lock);
up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
- /* Dirtify all the dquots - this can block when journalling */
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- if (transfer_from[cnt])
- mark_dquot_dirty(transfer_from[cnt]);
- if (transfer_to[cnt]) {
- mark_dquot_dirty(transfer_to[cnt]);
- /* The reference we got is transferred to the inode */
- transfer_to[cnt] = NULL;
- }
- }
+ mark_all_dquot_dirty(transfer_from);
+ mark_all_dquot_dirty(transfer_to);
+ /* The reference we got is transferred to the inode */
+ for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+ transfer_to[cnt] = NULL;
warn_put_all:
flush_warnings(transfer_to, warntype_to);
flush_warnings(transfer_from, warntype_from_inodes);
flush_warnings(transfer_from, warntype_from_space);
put_all:
- for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
- dqput(transfer_from[cnt]);
- dqput(transfer_to[cnt]);
- }
+ dqput_all(transfer_from);
+ dqput_all(transfer_to);
return ret;
over_quota:
spin_unlock(&dq_data_lock);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index 3dfc23e0213..e3da02f4986 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -97,8 +97,11 @@ static int v2_read_file_info(struct super_block *sb, int type)
unsigned int version;
if (!v2_read_header(sb, type, &dqhead))
- return 0;
+ return -1;
version = le32_to_cpu(dqhead.dqh_version);
+ if ((info->dqi_fmt_id == QFMT_VFS_V0 && version != 0) ||
+ (info->dqi_fmt_id == QFMT_VFS_V1 && version != 1))
+ return -1;
size = sb->s_op->quota_read(sb, type, (char *)&dinfo,
sizeof(struct v2_disk_dqinfo), V2_DQINFOOFF);
@@ -120,8 +123,8 @@ static int v2_read_file_info(struct super_block *sb, int type)
info->dqi_maxilimit = 0xffffffff;
} else {
/* used space is stored as unsigned 64-bit value */
- info->dqi_maxblimit = 0xffffffffffffffff; /* 2^64-1 */
- info->dqi_maxilimit = 0xffffffffffffffff;
+ info->dqi_maxblimit = 0xffffffffffffffffULL; /* 2^64-1 */
+ info->dqi_maxilimit = 0xffffffffffffffffULL;
}
info->dqi_bgrace = le32_to_cpu(dinfo.dqi_bgrace);
info->dqi_igrace = le32_to_cpu(dinfo.dqi_igrace);
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 32fae4040eb..1739a4aba25 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -60,7 +60,7 @@ const struct inode_operations ramfs_file_inode_operations = {
*/
int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
{
- unsigned long npages, xpages, loop, limit;
+ unsigned long npages, xpages, loop;
struct page *pages;
unsigned order;
void *data;
@@ -123,30 +123,6 @@ add_error:
/*****************************************************************************/
/*
- * check that file shrinkage doesn't leave any VMAs dangling in midair
- */
-static int ramfs_nommu_check_mappings(struct inode *inode,
- size_t newsize, size_t size)
-{
- struct vm_area_struct *vma;
- struct prio_tree_iter iter;
-
- /* search for VMAs that fall within the dead zone */
- vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
- newsize >> PAGE_SHIFT,
- (size + PAGE_SIZE - 1) >> PAGE_SHIFT
- ) {
- /* found one - only interested if it's shared out of the page
- * cache */
- if (vma->vm_flags & VM_SHARED)
- return -ETXTBSY; /* not quite true, but near enough */
- }
-
- return 0;
-}
-
-/*****************************************************************************/
-/*
*
*/
static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
@@ -164,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
/* check that a decrease in size doesn't cut off any shared mappings */
if (newsize < size) {
- ret = ramfs_nommu_check_mappings(inode, newsize, size);
+ ret = nommu_shrink_inode_mappings(inode, size, newsize);
if (ret < 0)
return ret;
}
diff --git a/fs/reiserfs/Kconfig b/fs/reiserfs/Kconfig
index ac7cd75c86f..513f431038f 100644
--- a/fs/reiserfs/Kconfig
+++ b/fs/reiserfs/Kconfig
@@ -1,7 +1,6 @@
config REISERFS_FS
tristate "Reiserfs support"
select CRC32
- select FS_JOURNAL_INFO
help
Stores not just filenames but the files themselves in a balanced
tree. Uses journalling.
diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index 68549570718..65c87276117 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -1277,7 +1277,10 @@ int reiserfs_init_bitmap_cache(struct super_block *sb)
struct reiserfs_bitmap_info *bitmap;
unsigned int bmap_nr = reiserfs_bmap_count(sb);
+ /* Avoid lock recursion in fault case */
+ reiserfs_write_unlock(sb);
bitmap = vmalloc(sizeof(*bitmap) * bmap_nr);
+ reiserfs_write_lock(sb);
if (bitmap == NULL)
return -ENOMEM;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 3a28e7751b3..2df0f5c7c60 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -31,11 +31,12 @@ void reiserfs_delete_inode(struct inode *inode)
JOURNAL_PER_BALANCE_CNT * 2 +
2 * REISERFS_QUOTA_INIT_BLOCKS(inode->i_sb);
struct reiserfs_transaction_handle th;
+ int depth;
int err;
truncate_inode_pages(&inode->i_data, 0);
- reiserfs_write_lock(inode->i_sb);
+ depth = reiserfs_write_lock_once(inode->i_sb);
/* The = 0 happens when we abort creating a new inode for some reason like lack of space.. */
if (!(inode->i_state & I_NEW) && INODE_PKEY(inode)->k_objectid != 0) { /* also handles bad_inode case */
@@ -74,7 +75,7 @@ void reiserfs_delete_inode(struct inode *inode)
out:
clear_inode(inode); /* note this must go after the journal_end to prevent deadlock */
inode->i_blocks = 0;
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, depth);
}
static void _make_cpu_key(struct cpu_key *key, int version, __u32 dirid,
@@ -1496,9 +1497,11 @@ struct inode *reiserfs_iget(struct super_block *s, const struct cpu_key *key)
args.objectid = key->on_disk_key.k_objectid;
args.dirid = key->on_disk_key.k_dir_id;
+ reiserfs_write_unlock(s);
inode = iget5_locked(s, key->on_disk_key.k_objectid,
reiserfs_find_actor, reiserfs_init_locked_inode,
(void *)(&args));
+ reiserfs_write_lock(s);
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -2538,6 +2541,12 @@ static int reiserfs_writepage(struct page *page, struct writeback_control *wbc)
return reiserfs_write_full_page(page, wbc);
}
+static void reiserfs_truncate_failed_write(struct inode *inode)
+{
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ reiserfs_truncate_file(inode, 0);
+}
+
static int reiserfs_write_begin(struct file *file,
struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
@@ -2604,6 +2613,8 @@ static int reiserfs_write_begin(struct file *file,
if (ret) {
unlock_page(page);
page_cache_release(page);
+ /* Truncate allocated blocks */
+ reiserfs_truncate_failed_write(inode);
}
return ret;
}
@@ -2701,9 +2712,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
** transaction tracking stuff when the size changes. So, we have
** to do the i_size updates here.
*/
- pos += copied;
-
- if (pos > inode->i_size) {
+ if (pos + copied > inode->i_size) {
struct reiserfs_transaction_handle myth;
lock_depth = reiserfs_write_lock_once(inode->i_sb);
locked = true;
@@ -2721,7 +2730,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
goto journal_error;
reiserfs_update_inode_transaction(inode);
- inode->i_size = pos;
+ inode->i_size = pos + copied;
/*
* this will just nest into our transaction. It's important
* to use mark_inode_dirty so the inode gets pushed around on the
@@ -2751,6 +2760,10 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
reiserfs_write_unlock_once(inode->i_sb, lock_depth);
unlock_page(page);
page_cache_release(page);
+
+ if (pos + len > inode->i_size)
+ reiserfs_truncate_failed_write(inode);
+
return ret == 0 ? copied : ret;
journal_error:
@@ -3051,13 +3064,14 @@ static ssize_t reiserfs_direct_IO(int rw, struct kiocb *iocb,
int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
{
struct inode *inode = dentry->d_inode;
- int error;
unsigned int ia_valid;
+ int depth;
+ int error;
/* must be turned off for recursive notify_change calls */
ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
- reiserfs_write_lock(inode->i_sb);
+ depth = reiserfs_write_lock_once(inode->i_sb);
if (attr->ia_valid & ATTR_SIZE) {
/* version 2 items will be caught by the s_maxbytes check
** done for us in vmtruncate
@@ -3138,8 +3152,17 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
journal_end(&th, inode->i_sb, jbegin_count);
}
}
- if (!error)
+ if (!error) {
+ /*
+ * Relax the lock here, as it might truncate the
+ * inode pages and wait for inode pages locks.
+ * To release such page lock, the owner needs the
+ * reiserfs lock
+ */
+ reiserfs_write_unlock_once(inode->i_sb, depth);
error = inode_setattr(inode, attr);
+ depth = reiserfs_write_lock_once(inode->i_sb);
+ }
}
if (!error && reiserfs_posixacl(inode->i_sb)) {
@@ -3148,7 +3171,8 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
}
out:
- reiserfs_write_unlock(inode->i_sb);
+ reiserfs_write_unlock_once(inode->i_sb, depth);
+
return error;
}
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index ace77451ceb..f53505de071 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -104,9 +104,10 @@ setflags_out:
err = put_user(inode->i_generation, (int __user *)arg);
break;
case REISERFS_IOC_SETVERSION:
- if (!is_owner_or_cap(inode))
+ if (!is_owner_or_cap(inode)) {
err = -EPERM;
break;
+ }
err = mnt_want_write(filp->f_path.mnt);
if (err)
break;
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 2f8a7e7b8da..ba98546fabb 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2009,10 +2009,11 @@ static int do_journal_release(struct reiserfs_transaction_handle *th,
destroy_workqueue(commit_wq);
commit_wq = NULL;
}
- reiserfs_write_lock(sb);
free_journal_ram(sb);
+ reiserfs_write_lock(sb);
+
return 0;
}
@@ -2758,11 +2759,18 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
struct reiserfs_journal *journal;
struct reiserfs_journal_list *jl;
char b[BDEVNAME_SIZE];
+ int ret;
+ /*
+ * Unlock here to avoid various RECLAIM-FS-ON <-> IN-RECLAIM-FS
+ * dependency inversion warnings.
+ */
+ reiserfs_write_unlock(sb);
journal = SB_JOURNAL(sb) = vmalloc(sizeof(struct reiserfs_journal));
if (!journal) {
reiserfs_warning(sb, "journal-1256",
"unable to get memory for journal structure");
+ reiserfs_write_lock(sb);
return 1;
}
memset(journal, 0, sizeof(struct reiserfs_journal));
@@ -2771,10 +2779,12 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
INIT_LIST_HEAD(&journal->j_working_list);
INIT_LIST_HEAD(&journal->j_journal_list);
journal->j_persistent_trans = 0;
- if (reiserfs_allocate_list_bitmaps(sb,
- journal->j_list_bitmap,
- reiserfs_bmap_count(sb)))
+ ret = reiserfs_allocate_list_bitmaps(sb, journal->j_list_bitmap,
+ reiserfs_bmap_count(sb));
+ reiserfs_write_lock(sb);
+ if (ret)
goto free_and_return;
+
allocate_bitmap_nodes(sb);
/* reserved for journal area support */
@@ -2903,7 +2913,9 @@ int journal_init(struct super_block *sb, const char *j_dev_name,
journal->j_mount_id = 10;
journal->j_state = 0;
atomic_set(&(journal->j_jlock), 0);
+ reiserfs_write_unlock(sb);
journal->j_cnode_free_list = allocate_cnodes(num_cnodes);
+ reiserfs_write_lock(sb);
journal->j_cnode_free_orig = journal->j_cnode_free_list;
journal->j_cnode_free = journal->j_cnode_free_list ? num_cnodes : 0;
journal->j_cnode_used = 0;
diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a..b87aa2c1afc 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
reiserfs_panic(sb, "%s called without kernel lock held %d",
caller);
}
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *sb)
+{
+ struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+ WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
+}
+#endif
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c
index e296ff72a6c..9d4dcf0b07c 100644
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -921,6 +921,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
struct reiserfs_transaction_handle th;
int jbegin_count;
unsigned long savelink;
+ int depth;
inode = dentry->d_inode;
@@ -932,7 +933,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
JOURNAL_PER_BALANCE_CNT * 2 + 2 +
4 * REISERFS_QUOTA_TRANS_BLOCKS(dir->i_sb);
- reiserfs_write_lock(dir->i_sb);
+ depth = reiserfs_write_lock_once(dir->i_sb);
retval = journal_begin(&th, dir->i_sb, jbegin_count);
if (retval)
goto out_unlink;
@@ -993,7 +994,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
retval = journal_end(&th, dir->i_sb, jbegin_count);
reiserfs_check_path(&path);
- reiserfs_write_unlock(dir->i_sb);
+ reiserfs_write_unlock_once(dir->i_sb, depth);
return retval;
end_unlink:
@@ -1003,7 +1004,7 @@ static int reiserfs_unlink(struct inode *dir, struct dentry *dentry)
if (err)
retval = err;
out_unlink:
- reiserfs_write_unlock(dir->i_sb);
+ reiserfs_write_unlock_once(dir->i_sb, depth);
return retval;
}
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 58aa8e75f7f..81f09fab8ae 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -48,6 +48,7 @@
#include <net/checksum.h>
#include <linux/stat.h>
#include <linux/quotaops.h>
+#include <linux/security.h>
#define PRIVROOT_NAME ".reiserfs_priv"
#define XAROOT_NAME "xattrs"
@@ -82,7 +83,8 @@ static int xattr_unlink(struct inode *dir, struct dentry *dentry)
BUG_ON(!mutex_is_locked(&dir->i_mutex));
vfs_dq_init(dir);
- mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+ reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
+ I_MUTEX_CHILD, dir->i_sb);
error = dir->i_op->unlink(dir, dentry);
mutex_unlock(&dentry->d_inode->i_mutex);
@@ -97,7 +99,8 @@ static int xattr_rmdir(struct inode *dir, struct dentry *dentry)
BUG_ON(!mutex_is_locked(&dir->i_mutex));
vfs_dq_init(dir);
- mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_CHILD);
+ reiserfs_mutex_lock_nested_safe(&dentry->d_inode->i_mutex,
+ I_MUTEX_CHILD, dir->i_sb);
dentry_unhash(dentry);
error = dir->i_op->rmdir(dir, dentry);
if (!error)
@@ -234,16 +237,22 @@ static int reiserfs_for_each_xattr(struct inode *inode,
if (IS_PRIVATE(inode) || get_inode_sd_version(inode) == STAT_DATA_V1)
return 0;
+ reiserfs_write_unlock(inode->i_sb);
dir = open_xa_dir(inode, XATTR_REPLACE);
if (IS_ERR(dir)) {
err = PTR_ERR(dir);
+ reiserfs_write_lock(inode->i_sb);
goto out;
} else if (!dir->d_inode) {
err = 0;
+ reiserfs_write_lock(inode->i_sb);
goto out_dir;
}
mutex_lock_nested(&dir->d_inode->i_mutex, I_MUTEX_XATTR);
+
+ reiserfs_write_lock(inode->i_sb);
+
buf.xadir = dir;
err = reiserfs_readdir_dentry(dir, &buf, fill_with_dentries, &pos);
while ((err == 0 || err == -ENOSPC) && buf.count) {
@@ -282,8 +291,9 @@ static int reiserfs_for_each_xattr(struct inode *inode,
err = journal_begin(&th, inode->i_sb, blocks);
if (!err) {
int jerror;
- mutex_lock_nested(&dir->d_parent->d_inode->i_mutex,
- I_MUTEX_XATTR);
+ reiserfs_mutex_lock_nested_safe(
+ &dir->d_parent->d_inode->i_mutex,
+ I_MUTEX_XATTR, inode->i_sb);
err = action(dir, data);
jerror = journal_end(&th, inode->i_sb, blocks);
mutex_unlock(&dir->d_parent->d_inode->i_mutex);
@@ -442,7 +452,9 @@ static int lookup_and_delete_xattr(struct inode *inode, const char *name)
}
if (dentry->d_inode) {
+ reiserfs_write_lock(inode->i_sb);
err = xattr_unlink(xadir->d_inode, dentry);
+ reiserfs_write_unlock(inode->i_sb);
update_ctime(inode);
}
@@ -476,15 +488,24 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
if (get_inode_sd_version(inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
- if (!buffer)
- return lookup_and_delete_xattr(inode, name);
+ reiserfs_write_unlock(inode->i_sb);
+
+ if (!buffer) {
+ err = lookup_and_delete_xattr(inode, name);
+ reiserfs_write_lock(inode->i_sb);
+ return err;
+ }
dentry = xattr_lookup(inode, name, flags);
- if (IS_ERR(dentry))
+ if (IS_ERR(dentry)) {
+ reiserfs_write_lock(inode->i_sb);
return PTR_ERR(dentry);
+ }
down_write(&REISERFS_I(inode)->i_xattr_sem);
+ reiserfs_write_lock(inode->i_sb);
+
xahash = xattr_hash(buffer, buffer_size);
while (buffer_pos < buffer_size || buffer_pos == 0) {
size_t chunk;
@@ -539,8 +560,12 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
.ia_size = buffer_size,
.ia_valid = ATTR_SIZE | ATTR_CTIME,
};
+
+ reiserfs_write_unlock(inode->i_sb);
mutex_lock_nested(&dentry->d_inode->i_mutex, I_MUTEX_XATTR);
down_write(&dentry->d_inode->i_alloc_sem);
+ reiserfs_write_lock(inode->i_sb);
+
err = reiserfs_setattr(dentry, &newattrs);
up_write(&dentry->d_inode->i_alloc_sem);
mutex_unlock(&dentry->d_inode->i_mutex);
@@ -726,15 +751,14 @@ ssize_t
reiserfs_getxattr(struct dentry * dentry, const char *name, void *buffer,
size_t size)
{
- struct inode *inode = dentry->d_inode;
struct xattr_handler *handler;
- handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+ handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
- if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+ if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
- return handler->get(inode, name, buffer, size);
+ return handler->get(dentry, name, buffer, size, handler->flags);
}
/*
@@ -746,15 +770,14 @@ int
reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags)
{
- struct inode *inode = dentry->d_inode;
struct xattr_handler *handler;
- handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+ handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
- if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+ if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
- return handler->set(inode, name, value, size, flags);
+ return handler->set(dentry, name, value, size, flags, handler->flags);
}
/*
@@ -764,21 +787,20 @@ reiserfs_setxattr(struct dentry *dentry, const char *name, const void *value,
*/
int reiserfs_removexattr(struct dentry *dentry, const char *name)
{
- struct inode *inode = dentry->d_inode;
struct xattr_handler *handler;
- handler = find_xattr_handler_prefix(inode->i_sb->s_xattr, name);
+ handler = find_xattr_handler_prefix(dentry->d_sb->s_xattr, name);
- if (!handler || get_inode_sd_version(inode) == STAT_DATA_V1)
+ if (!handler || get_inode_sd_version(dentry->d_inode) == STAT_DATA_V1)
return -EOPNOTSUPP;
- return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+ return handler->set(dentry, name, NULL, 0, XATTR_REPLACE, handler->flags);
}
struct listxattr_buf {
size_t size;
size_t pos;
char *buf;
- struct inode *inode;
+ struct dentry *dentry;
};
static int listxattr_filler(void *buf, const char *name, int namelen,
@@ -789,17 +811,19 @@ static int listxattr_filler(void *buf, const char *name, int namelen,
if (name[0] != '.' ||
(namelen != 1 && (name[1] != '.' || namelen != 2))) {
struct xattr_handler *handler;
- handler = find_xattr_handler_prefix(b->inode->i_sb->s_xattr,
+ handler = find_xattr_handler_prefix(b->dentry->d_sb->s_xattr,
name);
if (!handler) /* Unsupported xattr name */
return 0;
if (b->buf) {
- size = handler->list(b->inode, b->buf + b->pos,
- b->size, name, namelen);
+ size = handler->list(b->dentry, b->buf + b->pos,
+ b->size, name, namelen,
+ handler->flags);
if (size > b->size)
return -ERANGE;
} else {
- size = handler->list(b->inode, NULL, 0, name, namelen);
+ size = handler->list(b->dentry, NULL, 0, name,
+ namelen, handler->flags);
}
b->pos += size;
@@ -820,7 +844,7 @@ ssize_t reiserfs_listxattr(struct dentry * dentry, char *buffer, size_t size)
int err = 0;
loff_t pos = 0;
struct listxattr_buf buf = {
- .inode = dentry->d_inode,
+ .dentry = dentry,
.buf = buffer,
.size = buffer ? size : 0,
};
diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c
index 35d6e672a27..dd20a7883f0 100644
--- a/fs/reiserfs/xattr_acl.c
+++ b/fs/reiserfs/xattr_acl.c
@@ -15,8 +15,10 @@ static int reiserfs_set_acl(struct reiserfs_transaction_handle *th,
struct posix_acl *acl);
static int
-xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
+posix_acl_set(struct dentry *dentry, const char *name, const void *value,
+ size_t size, int flags, int type)
{
+ struct inode *inode = dentry->d_inode;
struct posix_acl *acl;
int error, error2;
struct reiserfs_transaction_handle th;
@@ -60,15 +62,16 @@ xattr_set_acl(struct inode *inode, int type, const void *value, size_t size)
}
static int
-xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+posix_acl_get(struct dentry *dentry, const char *name, void *buffer,
+ size_t size, int type)
{
struct posix_acl *acl;
int error;
- if (!reiserfs_posixacl(inode->i_sb))
+ if (!reiserfs_posixacl(dentry->d_sb))
return -EOPNOTSUPP;
- acl = reiserfs_get_acl(inode, type);
+ acl = reiserfs_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -452,7 +455,9 @@ int reiserfs_acl_chmod(struct inode *inode)
return 0;
}
+ reiserfs_write_unlock(inode->i_sb);
acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS);
+ reiserfs_write_lock(inode->i_sb);
if (!acl)
return 0;
if (IS_ERR(acl))
@@ -482,30 +487,12 @@ int reiserfs_acl_chmod(struct inode *inode)
return error;
}
-static int
-posix_acl_access_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
- return -EINVAL;
- return xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
-}
-
-static int
-posix_acl_access_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strlen(name) != sizeof(POSIX_ACL_XATTR_ACCESS) - 1)
- return -EINVAL;
- return xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
-}
-
-static size_t posix_acl_access_list(struct inode *inode, char *list,
+static size_t posix_acl_access_list(struct dentry *dentry, char *list,
size_t list_size, const char *name,
- size_t name_len)
+ size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
- if (!reiserfs_posixacl(inode->i_sb))
+ if (!reiserfs_posixacl(dentry->d_sb))
return 0;
if (list && size <= list_size)
memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
@@ -514,35 +501,18 @@ static size_t posix_acl_access_list(struct inode *inode, char *list,
struct xattr_handler reiserfs_posix_acl_access_handler = {
.prefix = POSIX_ACL_XATTR_ACCESS,
- .get = posix_acl_access_get,
- .set = posix_acl_access_set,
+ .flags = ACL_TYPE_ACCESS,
+ .get = posix_acl_get,
+ .set = posix_acl_set,
.list = posix_acl_access_list,
};
-static int
-posix_acl_default_get(struct inode *inode, const char *name,
- void *buffer, size_t size)
-{
- if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
- return -EINVAL;
- return xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
-}
-
-static int
-posix_acl_default_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- if (strlen(name) != sizeof(POSIX_ACL_XATTR_DEFAULT) - 1)
- return -EINVAL;
- return xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
-}
-
-static size_t posix_acl_default_list(struct inode *inode, char *list,
+static size_t posix_acl_default_list(struct dentry *dentry, char *list,
size_t list_size, const char *name,
- size_t name_len)
+ size_t name_len, int type)
{
const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
- if (!reiserfs_posixacl(inode->i_sb))
+ if (!reiserfs_posixacl(dentry->d_sb))
return 0;
if (list && size <= list_size)
memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
@@ -551,7 +521,8 @@ static size_t posix_acl_default_list(struct inode *inode, char *list,
struct xattr_handler reiserfs_posix_acl_default_handler = {
.prefix = POSIX_ACL_XATTR_DEFAULT,
- .get = posix_acl_default_get,
- .set = posix_acl_default_set,
+ .flags = ACL_TYPE_DEFAULT,
+ .get = posix_acl_get,
+ .set = posix_acl_set,
.list = posix_acl_default_list,
};
diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
index a92c8792c0f..d8b5bfcbdd3 100644
--- a/fs/reiserfs/xattr_security.c
+++ b/fs/reiserfs/xattr_security.c
@@ -8,36 +8,37 @@
#include <asm/uaccess.h>
static int
-security_get(struct inode *inode, const char *name, void *buffer, size_t size)
+security_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+ int handler_flags)
{
if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
return -EINVAL;
- if (IS_PRIVATE(inode))
+ if (IS_PRIVATE(dentry->d_inode))
return -EPERM;
- return reiserfs_xattr_get(inode, name, buffer, size);
+ return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
}
static int
-security_set(struct inode *inode, const char *name, const void *buffer,
- size_t size, int flags)
+security_set(struct dentry *dentry, const char *name, const void *buffer,
+ size_t size, int flags, int handler_flags)
{
if (strlen(name) < sizeof(XATTR_SECURITY_PREFIX))
return -EINVAL;
- if (IS_PRIVATE(inode))
+ if (IS_PRIVATE(dentry->d_inode))
return -EPERM;
- return reiserfs_xattr_set(inode, name, buffer, size, flags);
+ return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
}
-static size_t security_list(struct inode *inode, char *list, size_t list_len,
- const char *name, size_t namelen)
+static size_t security_list(struct dentry *dentry, char *list, size_t list_len,
+ const char *name, size_t namelen, int handler_flags)
{
const size_t len = namelen + 1;
- if (IS_PRIVATE(inode))
+ if (IS_PRIVATE(dentry->d_inode))
return 0;
if (list && len <= list_len) {
diff --git a/fs/reiserfs/xattr_trusted.c b/fs/reiserfs/xattr_trusted.c
index a865042f75e..5b08aaca3da 100644
--- a/fs/reiserfs/xattr_trusted.c
+++ b/fs/reiserfs/xattr_trusted.c
@@ -8,36 +8,37 @@
#include <asm/uaccess.h>
static int
-trusted_get(struct inode *inode, const char *name, void *buffer, size_t size)
+trusted_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+ int handler_flags)
{
if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
return -EINVAL;
- if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+ if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
return -EPERM;
- return reiserfs_xattr_get(inode, name, buffer, size);
+ return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
}
static int
-trusted_set(struct inode *inode, const char *name, const void *buffer,
- size_t size, int flags)
+trusted_set(struct dentry *dentry, const char *name, const void *buffer,
+ size_t size, int flags, int handler_flags)
{
if (strlen(name) < sizeof(XATTR_TRUSTED_PREFIX))
return -EINVAL;
- if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+ if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
return -EPERM;
- return reiserfs_xattr_set(inode, name, buffer, size, flags);
+ return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
}
-static size_t trusted_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+static size_t trusted_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int handler_flags)
{
const size_t len = name_len + 1;
- if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(inode))
+ if (!capable(CAP_SYS_ADMIN) || IS_PRIVATE(dentry->d_inode))
return 0;
if (list && len <= list_size) {
diff --git a/fs/reiserfs/xattr_user.c b/fs/reiserfs/xattr_user.c
index e3238dc4f3d..75d59c49b91 100644
--- a/fs/reiserfs/xattr_user.c
+++ b/fs/reiserfs/xattr_user.c
@@ -7,34 +7,35 @@
#include <asm/uaccess.h>
static int
-user_get(struct inode *inode, const char *name, void *buffer, size_t size)
+user_get(struct dentry *dentry, const char *name, void *buffer, size_t size,
+ int handler_flags)
{
if (strlen(name) < sizeof(XATTR_USER_PREFIX))
return -EINVAL;
- if (!reiserfs_xattrs_user(inode->i_sb))
+ if (!reiserfs_xattrs_user(dentry->d_sb))
return -EOPNOTSUPP;
- return reiserfs_xattr_get(inode, name, buffer, size);
+ return reiserfs_xattr_get(dentry->d_inode, name, buffer, size);
}
static int
-user_set(struct inode *inode, const char *name, const void *buffer,
- size_t size, int flags)
+user_set(struct dentry *dentry, const char *name, const void *buffer,
+ size_t size, int flags, int handler_flags)
{
if (strlen(name) < sizeof(XATTR_USER_PREFIX))
return -EINVAL;
- if (!reiserfs_xattrs_user(inode->i_sb))
+ if (!reiserfs_xattrs_user(dentry->d_sb))
return -EOPNOTSUPP;
- return reiserfs_xattr_set(inode, name, buffer, size, flags);
+ return reiserfs_xattr_set(dentry->d_inode, name, buffer, size, flags);
}
-static size_t user_list(struct inode *inode, char *list, size_t list_size,
- const char *name, size_t name_len)
+static size_t user_list(struct dentry *dentry, char *list, size_t list_size,
+ const char *name, size_t name_len, int handler_flags)
{
const size_t len = name_len + 1;
- if (!reiserfs_xattrs_user(inode->i_sb))
+ if (!reiserfs_xattrs_user(dentry->d_sb))
return 0;
if (list && len <= list_size) {
memcpy(list, name, name_len);
diff --git a/fs/romfs/super.c b/fs/romfs/super.c
index c117fa80d1e..42d21354689 100644
--- a/fs/romfs/super.c
+++ b/fs/romfs/super.c
@@ -544,6 +544,7 @@ error:
error_rsb_inval:
ret = -EINVAL;
error_rsb:
+ kfree(rsb);
return ret;
}
diff --git a/fs/signalfd.c b/fs/signalfd.c
index b07565c9438..1dabe4ee02f 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -236,7 +236,7 @@ SYSCALL_DEFINE4(signalfd4, int, ufd, sigset_t __user *, user_mask,
* anon_inode_getfd() will install the fd.
*/
ufd = anon_inode_getfd("[signalfd]", &signalfd_fops, ctx,
- flags & (O_CLOEXEC | O_NONBLOCK));
+ O_RDWR | (flags & (O_CLOEXEC | O_NONBLOCK)));
if (ufd < 0)
kfree(ctx);
} else {
diff --git a/fs/stack.c b/fs/stack.c
index 67716f6a1a4..4a6f7f44065 100644
--- a/fs/stack.c
+++ b/fs/stack.c
@@ -7,18 +7,63 @@
* This function cannot be inlined since i_size_{read,write} is rather
* heavy-weight on 32-bit systems
*/
-void fsstack_copy_inode_size(struct inode *dst, const struct inode *src)
+void fsstack_copy_inode_size(struct inode *dst, struct inode *src)
{
- i_size_write(dst, i_size_read((struct inode *)src));
- dst->i_blocks = src->i_blocks;
+ loff_t i_size;
+ blkcnt_t i_blocks;
+
+ /*
+ * i_size_read() includes its own seqlocking and protection from
+ * preemption (see include/linux/fs.h): we need nothing extra for
+ * that here, and prefer to avoid nesting locks than attempt to keep
+ * i_size and i_blocks in sync together.
+ */
+ i_size = i_size_read(src);
+
+ /*
+ * But if CONFIG_LBDAF (on 32-bit), we ought to make an effort to
+ * keep the two halves of i_blocks in sync despite SMP or PREEMPT -
+ * though stat's generic_fillattr() doesn't bother, and we won't be
+ * applying quotas (where i_blocks does become important) at the
+ * upper level.
+ *
+ * We don't actually know what locking is used at the lower level;
+ * but if it's a filesystem that supports quotas, it will be using
+ * i_lock as in inode_add_bytes(). tmpfs uses other locking, and
+ * its 32-bit is (just) able to exceed 2TB i_size with the aid of
+ * holes; but its i_blocks cannot carry into the upper long without
+ * almost 2TB swap - let's ignore that case.
+ */
+ if (sizeof(i_blocks) > sizeof(long))
+ spin_lock(&src->i_lock);
+ i_blocks = src->i_blocks;
+ if (sizeof(i_blocks) > sizeof(long))
+ spin_unlock(&src->i_lock);
+
+ /*
+ * If CONFIG_SMP or CONFIG_PREEMPT on 32-bit, it's vital for
+ * fsstack_copy_inode_size() to hold some lock around
+ * i_size_write(), otherwise i_size_read() may spin forever (see
+ * include/linux/fs.h). We don't necessarily hold i_mutex when this
+ * is called, so take i_lock for that case.
+ *
+ * And if CONFIG_LBADF (on 32-bit), continue our effort to keep the
+ * two halves of i_blocks in sync despite SMP or PREEMPT: use i_lock
+ * for that case too, and do both at once by combining the tests.
+ *
+ * There is none of this locking overhead in the 64-bit case.
+ */
+ if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+ spin_lock(&dst->i_lock);
+ i_size_write(dst, i_size);
+ dst->i_blocks = i_blocks;
+ if (sizeof(i_size) > sizeof(long) || sizeof(i_blocks) > sizeof(long))
+ spin_unlock(&dst->i_lock);
}
EXPORT_SYMBOL_GPL(fsstack_copy_inode_size);
-/* copy all attributes; get_nlinks is optional way to override the i_nlink
- * copying
- */
-void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
- int (*get_nlinks)(struct inode *))
+/* copy all attributes */
+void fsstack_copy_attr_all(struct inode *dest, const struct inode *src)
{
dest->i_mode = src->i_mode;
dest->i_uid = src->i_uid;
@@ -29,14 +74,6 @@ void fsstack_copy_attr_all(struct inode *dest, const struct inode *src,
dest->i_ctime = src->i_ctime;
dest->i_blkbits = src->i_blkbits;
dest->i_flags = src->i_flags;
-
- /*
- * Update the nlinks AFTER updating the above fields, because the
- * get_links callback may depend on them.
- */
- if (!get_nlinks)
- dest->i_nlink = src->i_nlink;
- else
- dest->i_nlink = (*get_nlinks)(dest);
+ dest->i_nlink = src->i_nlink;
}
EXPORT_SYMBOL_GPL(fsstack_copy_attr_all);
diff --git a/fs/stat.c b/fs/stat.c
index 075694e31d8..c4ecd52c573 100644
--- a/fs/stat.c
+++ b/fs/stat.c
@@ -401,9 +401,9 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, char __user *, filename,
}
#endif /* __ARCH_WANT_STAT64 */
-void inode_add_bytes(struct inode *inode, loff_t bytes)
+/* Caller is here responsible for sufficient locking (ie. inode->i_lock) */
+void __inode_add_bytes(struct inode *inode, loff_t bytes)
{
- spin_lock(&inode->i_lock);
inode->i_blocks += bytes >> 9;
bytes &= 511;
inode->i_bytes += bytes;
@@ -411,6 +411,12 @@ void inode_add_bytes(struct inode *inode, loff_t bytes)
inode->i_blocks++;
inode->i_bytes -= 512;
}
+}
+
+void inode_add_bytes(struct inode *inode, loff_t bytes)
+{
+ spin_lock(&inode->i_lock);
+ __inode_add_bytes(inode, bytes);
spin_unlock(&inode->i_lock);
}
diff --git a/fs/super.c b/fs/super.c
index 19eb70b374b..aff046b0fe7 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -901,8 +901,9 @@ int get_sb_single(struct file_system_type *fs_type,
return error;
}
s->s_flags |= MS_ACTIVE;
+ } else {
+ do_remount_sb(s, flags, data, 0);
}
- do_remount_sb(s, flags, data, 0);
simple_set_mnt(mnt, s);
return 0;
}
diff --git a/fs/sync.c b/fs/sync.c
index 36752a68348..418727a2a23 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -355,6 +355,7 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
{
int ret;
struct file *file;
+ struct address_space *mapping;
loff_t endbyte; /* inclusive */
int fput_needed;
umode_t i_mode;
@@ -405,7 +406,28 @@ SYSCALL_DEFINE(sync_file_range)(int fd, loff_t offset, loff_t nbytes,
!S_ISLNK(i_mode))
goto out_put;
- ret = do_sync_mapping_range(file->f_mapping, offset, endbyte, flags);
+ mapping = file->f_mapping;
+ if (!mapping) {
+ ret = -EINVAL;
+ goto out_put;
+ }
+
+ ret = 0;
+ if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
+ ret = filemap_fdatawait_range(mapping, offset, endbyte);
+ if (ret < 0)
+ goto out_put;
+ }
+
+ if (flags & SYNC_FILE_RANGE_WRITE) {
+ ret = filemap_fdatawrite_range(mapping, offset, endbyte);
+ if (ret < 0)
+ goto out_put;
+ }
+
+ if (flags & SYNC_FILE_RANGE_WAIT_AFTER)
+ ret = filemap_fdatawait_range(mapping, offset, endbyte);
+
out_put:
fput_light(file, fput_needed);
out:
@@ -437,38 +459,3 @@ asmlinkage long SyS_sync_file_range2(long fd, long flags,
}
SYSCALL_ALIAS(sys_sync_file_range2, SyS_sync_file_range2);
#endif
-
-/*
- * `endbyte' is inclusive
- */
-int do_sync_mapping_range(struct address_space *mapping, loff_t offset,
- loff_t endbyte, unsigned int flags)
-{
- int ret;
-
- if (!mapping) {
- ret = -EINVAL;
- goto out;
- }
-
- ret = 0;
- if (flags & SYNC_FILE_RANGE_WAIT_BEFORE) {
- ret = filemap_fdatawait_range(mapping, offset, endbyte);
- if (ret < 0)
- goto out;
- }
-
- if (flags & SYNC_FILE_RANGE_WRITE) {
- ret = __filemap_fdatawrite_range(mapping, offset, endbyte,
- WB_SYNC_ALL);
- if (ret < 0)
- goto out;
- }
-
- if (flags & SYNC_FILE_RANGE_WAIT_AFTER) {
- ret = filemap_fdatawait_range(mapping, offset, endbyte);
- }
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(do_sync_mapping_range);
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 60c702bc10a..a0a500af24a 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -483,7 +483,8 @@ void unmap_bin_file(struct sysfs_dirent *attr_sd)
* @attr: attribute descriptor.
*/
-int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+int sysfs_create_bin_file(struct kobject *kobj,
+ const struct bin_attribute *attr)
{
BUG_ON(!kobj || !kobj->sd || !attr);
@@ -497,7 +498,8 @@ int sysfs_create_bin_file(struct kobject * kobj, struct bin_attribute * attr)
* @attr: attribute descriptor.
*/
-void sysfs_remove_bin_file(struct kobject * kobj, struct bin_attribute * attr)
+void sysfs_remove_bin_file(struct kobject *kobj,
+ const struct bin_attribute *attr)
{
sysfs_hash_and_remove(kobj->sd, attr->attr.name);
}
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index f05f2303a8b..699f371b9f1 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -106,8 +106,10 @@ static struct sysfs_dirent *sysfs_get_active(struct sysfs_dirent *sd)
return NULL;
t = atomic_cmpxchg(&sd->s_active, v, v + 1);
- if (likely(t == v))
+ if (likely(t == v)) {
+ rwsem_acquire_read(&sd->dep_map, 0, 1, _RET_IP_);
return sd;
+ }
if (t < 0)
return NULL;
@@ -130,6 +132,7 @@ static void sysfs_put_active(struct sysfs_dirent *sd)
if (unlikely(!sd))
return;
+ rwsem_release(&sd->dep_map, 1, _RET_IP_);
v = atomic_dec_return(&sd->s_active);
if (likely(v != SD_DEACTIVATED_BIAS))
return;
@@ -194,15 +197,21 @@ static void sysfs_deactivate(struct sysfs_dirent *sd)
BUG_ON(sd->s_sibling || !(sd->s_flags & SYSFS_FLAG_REMOVED));
sd->s_sibling = (void *)&wait;
+ rwsem_acquire(&sd->dep_map, 0, 0, _RET_IP_);
/* atomic_add_return() is a mb(), put_active() will always see
* the updated sd->s_sibling.
*/
v = atomic_add_return(SD_DEACTIVATED_BIAS, &sd->s_active);
- if (v != SD_DEACTIVATED_BIAS)
+ if (v != SD_DEACTIVATED_BIAS) {
+ lock_contended(&sd->dep_map, _RET_IP_);
wait_for_completion(&wait);
+ }
sd->s_sibling = NULL;
+
+ lock_acquired(&sd->dep_map, _RET_IP_);
+ rwsem_release(&sd->dep_map, 1, _RET_IP_);
}
static int sysfs_alloc_ino(ino_t *pino)
@@ -345,6 +354,7 @@ struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type)
atomic_set(&sd->s_count, 1);
atomic_set(&sd->s_active, 0);
+ sysfs_dirent_init_lockdep(sd);
sd->s_name = name;
sd->s_mode = mode;
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index 220b758523a..6a06a1d1ea7 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -81,24 +81,23 @@ int sysfs_sd_setattr(struct sysfs_dirent *sd, struct iattr * iattr)
if (!sd_attrs)
return -ENOMEM;
sd->s_iattr = sd_attrs;
- } else {
- /* attributes were changed at least once in past */
- iattrs = &sd_attrs->ia_iattr;
-
- if (ia_valid & ATTR_UID)
- iattrs->ia_uid = iattr->ia_uid;
- if (ia_valid & ATTR_GID)
- iattrs->ia_gid = iattr->ia_gid;
- if (ia_valid & ATTR_ATIME)
- iattrs->ia_atime = iattr->ia_atime;
- if (ia_valid & ATTR_MTIME)
- iattrs->ia_mtime = iattr->ia_mtime;
- if (ia_valid & ATTR_CTIME)
- iattrs->ia_ctime = iattr->ia_ctime;
- if (ia_valid & ATTR_MODE) {
- umode_t mode = iattr->ia_mode;
- iattrs->ia_mode = sd->s_mode = mode;
- }
+ }
+ /* attributes were changed at least once in past */
+ iattrs = &sd_attrs->ia_iattr;
+
+ if (ia_valid & ATTR_UID)
+ iattrs->ia_uid = iattr->ia_uid;
+ if (ia_valid & ATTR_GID)
+ iattrs->ia_gid = iattr->ia_gid;
+ if (ia_valid & ATTR_ATIME)
+ iattrs->ia_atime = iattr->ia_atime;
+ if (ia_valid & ATTR_MTIME)
+ iattrs->ia_mtime = iattr->ia_mtime;
+ if (ia_valid & ATTR_CTIME)
+ iattrs->ia_ctime = iattr->ia_ctime;
+ if (ia_valid & ATTR_MODE) {
+ umode_t mode = iattr->ia_mode;
+ iattrs->ia_mode = sd->s_mode = mode;
}
return 0;
}
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index ca52e7b9d8f..cdd9377a6e0 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -8,6 +8,7 @@
* This file is released under the GPLv2.
*/
+#include <linux/lockdep.h>
#include <linux/fs.h>
struct sysfs_open_dirent;
@@ -50,6 +51,9 @@ struct sysfs_inode_attrs {
struct sysfs_dirent {
atomic_t s_count;
atomic_t s_active;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
struct sysfs_dirent *s_parent;
struct sysfs_dirent *s_sibling;
const char *s_name;
@@ -84,6 +88,17 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
return sd->s_flags & SYSFS_TYPE_MASK;
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#define sysfs_dirent_init_lockdep(sd) \
+do { \
+ static struct lock_class_key __key; \
+ \
+ lockdep_init_map(&sd->dep_map, "s_active", &__key, 0); \
+} while(0)
+#else
+#define sysfs_dirent_init_lockdep(sd) do {} while(0)
+#endif
+
/*
* Context structure to be used while adding/removing nodes.
*/
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b042bd7034b..1bfc95ad5f7 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -200,7 +200,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags)
hrtimer_init(&ctx->tmr, clockid, HRTIMER_MODE_ABS);
ufd = anon_inode_getfd("[timerfd]", &timerfd_fops, ctx,
- flags & TFD_SHARED_FCNTL_FLAGS);
+ O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS));
if (ufd < 0)
kfree(ctx);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 39849f887e7..16a6444330e 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -45,7 +45,7 @@
*
* Similarly, @i_mutex is not always locked in 'ubifs_readpage()', e.g., the
* read-ahead path does not lock it ("sys_read -> generic_file_aio_read ->
- * ondemand_readahead -> readpage"). In case of readahead, @I_LOCK flag is not
+ * ondemand_readahead -> readpage"). In case of readahead, @I_SYNC flag is not
* set as well. However, UBIFS disables readahead.
*/
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a..e5a3d8e96bb 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
*/
#include <linux/pagemap.h>
+#include <linux/list_sort.h>
#include "ubifs.h"
/*
@@ -108,101 +109,6 @@ static int switch_gc_head(struct ubifs_info *c)
}
/**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
- * @head: the list to sort
- * @cmp: the elements comparison function
- *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
- *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * than @b, and a positive value if @a is greater than @b. If @a and @b are
- * equivalent, then it does not matter what this function returns.
- */
-static void list_sort(void *priv, struct list_head *head,
- int (*cmp)(void *priv, struct list_head *a,
- struct list_head *b))
-{
- struct list_head *p, *q, *e, *list, *tail, *oldhead;
- int insize, nmerges, psize, qsize, i;
-
- if (list_empty(head))
- return;
-
- list = head->next;
- list_del(head);
- insize = 1;
- for (;;) {
- p = oldhead = list;
- list = tail = NULL;
- nmerges = 0;
-
- while (p) {
- nmerges++;
- q = p;
- psize = 0;
- for (i = 0; i < insize; i++) {
- psize++;
- q = q->next == oldhead ? NULL : q->next;
- if (!q)
- break;
- }
-
- qsize = insize;
- while (psize > 0 || (qsize > 0 && q)) {
- if (!psize) {
- e = q;
- q = q->next;
- qsize--;
- if (q == oldhead)
- q = NULL;
- } else if (!qsize || !q) {
- e = p;
- p = p->next;
- psize--;
- if (p == oldhead)
- p = NULL;
- } else if (cmp(priv, p, q) <= 0) {
- e = p;
- p = p->next;
- psize--;
- if (p == oldhead)
- p = NULL;
- } else {
- e = q;
- q = q->next;
- qsize--;
- if (q == oldhead)
- q = NULL;
- }
- if (tail)
- tail->next = e;
- else
- list = e;
- e->prev = tail;
- tail = e;
- }
- p = q;
- }
-
- tail->next = list;
- list->prev = tail;
-
- if (nmerges <= 1)
- break;
-
- insize *= 2;
- }
-
- head->next = list;
- head->prev = list->prev;
- list->prev->next = head;
- list->prev = head;
-}
-
-/**
* data_nodes_cmp - compare 2 data nodes.
* @priv: UBIFS file-system description object
* @a: first data node
diff --git a/fs/xattr.c b/fs/xattr.c
index 6d4f6d3449f..46f87e828b4 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -615,12 +615,11 @@ ssize_t
generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size)
{
struct xattr_handler *handler;
- struct inode *inode = dentry->d_inode;
- handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+ handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
if (!handler)
return -EOPNOTSUPP;
- return handler->get(inode, name, buffer, size);
+ return handler->get(dentry, name, buffer, size, handler->flags);
}
/*
@@ -630,18 +629,20 @@ generic_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t s
ssize_t
generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
{
- struct inode *inode = dentry->d_inode;
- struct xattr_handler *handler, **handlers = inode->i_sb->s_xattr;
+ struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
unsigned int size = 0;
if (!buffer) {
- for_each_xattr_handler(handlers, handler)
- size += handler->list(inode, NULL, 0, NULL, 0);
+ for_each_xattr_handler(handlers, handler) {
+ size += handler->list(dentry, NULL, 0, NULL, 0,
+ handler->flags);
+ }
} else {
char *buf = buffer;
for_each_xattr_handler(handlers, handler) {
- size = handler->list(inode, buf, buffer_size, NULL, 0);
+ size = handler->list(dentry, buf, buffer_size,
+ NULL, 0, handler->flags);
if (size > buffer_size)
return -ERANGE;
buf += size;
@@ -659,14 +660,13 @@ int
generic_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags)
{
struct xattr_handler *handler;
- struct inode *inode = dentry->d_inode;
if (size == 0)
value = ""; /* empty EA, do not remove */
- handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+ handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
if (!handler)
return -EOPNOTSUPP;
- return handler->set(inode, name, value, size, flags);
+ return handler->set(dentry, name, value, size, 0, handler->flags);
}
/*
@@ -677,12 +677,12 @@ int
generic_removexattr(struct dentry *dentry, const char *name)
{
struct xattr_handler *handler;
- struct inode *inode = dentry->d_inode;
- handler = xattr_resolve_name(inode->i_sb->s_xattr, &name);
+ handler = xattr_resolve_name(dentry->d_sb->s_xattr, &name);
if (!handler)
return -EOPNOTSUPP;
- return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
+ return handler->set(dentry, name, NULL, 0,
+ XATTR_REPLACE, handler->flags);
}
EXPORT_SYMBOL(generic_getxattr);
diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c
index 69e598b6986..883ca5ab8af 100644
--- a/fs/xfs/linux-2.6/xfs_acl.c
+++ b/fs/xfs/linux-2.6/xfs_acl.c
@@ -251,8 +251,9 @@ xfs_set_mode(struct inode *inode, mode_t mode)
if (mode != inode->i_mode) {
struct iattr iattr;
- iattr.ia_valid = ATTR_MODE;
+ iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
iattr.ia_mode = mode;
+ iattr.ia_ctime = current_fs_time(inode->i_sb);
error = -xfs_setattr(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
}
@@ -354,37 +355,14 @@ xfs_acl_chmod(struct inode *inode)
return error;
}
-/*
- * System xattr handlers.
- *
- * Currently Posix ACLs are the only system namespace extended attribute
- * handlers supported by XFS, so we just implement the handlers here.
- * If we ever support other system extended attributes this will need
- * some refactoring.
- */
-
static int
-xfs_decode_acl(const char *name)
-{
- if (strcmp(name, "posix_acl_access") == 0)
- return ACL_TYPE_ACCESS;
- else if (strcmp(name, "posix_acl_default") == 0)
- return ACL_TYPE_DEFAULT;
- return -EINVAL;
-}
-
-static int
-xfs_xattr_system_get(struct inode *inode, const char *name,
- void *value, size_t size)
+xfs_xattr_acl_get(struct dentry *dentry, const char *name,
+ void *value, size_t size, int type)
{
struct posix_acl *acl;
- int type, error;
-
- type = xfs_decode_acl(name);
- if (type < 0)
- return type;
+ int error;
- acl = xfs_get_acl(inode, type);
+ acl = xfs_get_acl(dentry->d_inode, type);
if (IS_ERR(acl))
return PTR_ERR(acl);
if (acl == NULL)
@@ -397,15 +375,13 @@ xfs_xattr_system_get(struct inode *inode, const char *name,
}
static int
-xfs_xattr_system_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
+xfs_xattr_acl_set(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags, int type)
{
+ struct inode *inode = dentry->d_inode;
struct posix_acl *acl = NULL;
- int error = 0, type;
+ int error = 0;
- type = xfs_decode_acl(name);
- if (type < 0)
- return type;
if (flags & XATTR_CREATE)
return -EINVAL;
if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
@@ -462,8 +438,16 @@ xfs_xattr_system_set(struct inode *inode, const char *name,
return error;
}
-struct xattr_handler xfs_xattr_system_handler = {
- .prefix = XATTR_SYSTEM_PREFIX,
- .get = xfs_xattr_system_get,
- .set = xfs_xattr_system_set,
+struct xattr_handler xfs_xattr_acl_access_handler = {
+ .prefix = POSIX_ACL_XATTR_ACCESS,
+ .flags = ACL_TYPE_ACCESS,
+ .get = xfs_xattr_acl_get,
+ .set = xfs_xattr_acl_set,
+};
+
+struct xattr_handler xfs_xattr_acl_default_handler = {
+ .prefix = POSIX_ACL_XATTR_DEFAULT,
+ .flags = ACL_TYPE_DEFAULT,
+ .get = xfs_xattr_acl_get,
+ .set = xfs_xattr_acl_set,
};
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index b4c7d4248aa..77b8be81c76 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -292,6 +292,7 @@ _xfs_buf_free_pages(
{
if (bp->b_pages != bp->b_page_array) {
kmem_free(bp->b_pages);
+ bp->b_pages = NULL;
}
}
@@ -323,9 +324,8 @@ xfs_buf_free(
ASSERT(!PagePrivate(page));
page_cache_release(page);
}
- _xfs_buf_free_pages(bp);
}
-
+ _xfs_buf_free_pages(bp);
xfs_buf_deallocate(bp);
}
@@ -1149,10 +1149,14 @@ _xfs_buf_ioapply(
if (bp->b_flags & XBF_ORDERED) {
ASSERT(!(bp->b_flags & XBF_READ));
rw = WRITE_BARRIER;
- } else if (bp->b_flags & _XBF_RUN_QUEUES) {
+ } else if (bp->b_flags & XBF_LOG_BUFFER) {
ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
bp->b_flags &= ~_XBF_RUN_QUEUES;
rw = (bp->b_flags & XBF_WRITE) ? WRITE_SYNC : READ_SYNC;
+ } else if (bp->b_flags & _XBF_RUN_QUEUES) {
+ ASSERT(!(bp->b_flags & XBF_READ_AHEAD));
+ bp->b_flags &= ~_XBF_RUN_QUEUES;
+ rw = (bp->b_flags & XBF_WRITE) ? WRITE_META : READ_META;
} else {
rw = (bp->b_flags & XBF_WRITE) ? WRITE :
(bp->b_flags & XBF_READ_AHEAD) ? READA : READ;
diff --git a/fs/xfs/linux-2.6/xfs_buf.h b/fs/xfs/linux-2.6/xfs_buf.h
index a509f4addc2..a34c7b54822 100644
--- a/fs/xfs/linux-2.6/xfs_buf.h
+++ b/fs/xfs/linux-2.6/xfs_buf.h
@@ -55,6 +55,7 @@ typedef enum {
XBF_FS_MANAGED = (1 << 8), /* filesystem controls freeing memory */
XBF_ORDERED = (1 << 11), /* use ordered writes */
XBF_READ_AHEAD = (1 << 12), /* asynchronous read-ahead */
+ XBF_LOG_BUFFER = (1 << 13), /* this is a buffer used for the log */
/* flags used only as arguments to access routines */
XBF_LOCK = (1 << 14), /* lock requested */
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c
index 1d5b298ba8b..225946012d0 100644
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -794,7 +794,7 @@ xfs_setup_inode(
struct inode *inode = &ip->i_vnode;
inode->i_ino = ip->i_ino;
- inode->i_state = I_NEW|I_LOCK;
+ inode->i_state = I_NEW;
inode_add_to_lists(ip->i_mount->m_super, inode);
inode->i_mode = ip->i_d.di_mode;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c
index 09783cc444a..77414db10dc 100644
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -954,16 +954,14 @@ xfs_fs_destroy_inode(
ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
/*
- * If we have nothing to flush with this inode then complete the
- * teardown now, otherwise delay the flush operation.
+ * We always use background reclaim here because even if the
+ * inode is clean, it still may be under IO and hence we have
+ * to take the flush lock. The background reclaim path handles
+ * this more efficiently than we can here, so simply let background
+ * reclaim tear down all inodes.
*/
- if (!xfs_inode_clean(ip)) {
- xfs_inode_set_reclaim_tag(ip);
- return;
- }
-
out_reclaim:
- xfs_ireclaim(ip);
+ xfs_inode_set_reclaim_tag(ip);
}
/*
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c
index 6fed97a8cd3..1f5e4bb5e97 100644
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -65,7 +65,6 @@ xfs_inode_ag_lookup(
* as the tree is sparse and a gang lookup walks to find
* the number of objects requested.
*/
- read_lock(&pag->pag_ici_lock);
if (tag == XFS_ICI_NO_TAG) {
nr_found = radix_tree_gang_lookup(&pag->pag_ici_root,
(void **)&ip, *first_index, 1);
@@ -74,7 +73,7 @@ xfs_inode_ag_lookup(
(void **)&ip, *first_index, 1, tag);
}
if (!nr_found)
- goto unlock;
+ return NULL;
/*
* Update the index for the next lookup. Catch overflows
@@ -84,13 +83,8 @@ xfs_inode_ag_lookup(
*/
*first_index = XFS_INO_TO_AGINO(mp, ip->i_ino + 1);
if (*first_index < XFS_INO_TO_AGINO(mp, ip->i_ino))
- goto unlock;
-
+ return NULL;
return ip;
-
-unlock:
- read_unlock(&pag->pag_ici_lock);
- return NULL;
}
STATIC int
@@ -100,7 +94,8 @@ xfs_inode_ag_walk(
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
int flags,
- int tag)
+ int tag,
+ int exclusive)
{
struct xfs_perag *pag = &mp->m_perag[ag];
uint32_t first_index;
@@ -114,10 +109,20 @@ restart:
int error = 0;
xfs_inode_t *ip;
+ if (exclusive)
+ write_lock(&pag->pag_ici_lock);
+ else
+ read_lock(&pag->pag_ici_lock);
ip = xfs_inode_ag_lookup(mp, pag, &first_index, tag);
- if (!ip)
+ if (!ip) {
+ if (exclusive)
+ write_unlock(&pag->pag_ici_lock);
+ else
+ read_unlock(&pag->pag_ici_lock);
break;
+ }
+ /* execute releases pag->pag_ici_lock */
error = execute(ip, pag, flags);
if (error == EAGAIN) {
skipped++;
@@ -125,9 +130,8 @@ restart:
}
if (error)
last_error = error;
- /*
- * bail out if the filesystem is corrupted.
- */
+
+ /* bail out if the filesystem is corrupted. */
if (error == EFSCORRUPTED)
break;
@@ -148,7 +152,8 @@ xfs_inode_ag_iterator(
int (*execute)(struct xfs_inode *ip,
struct xfs_perag *pag, int flags),
int flags,
- int tag)
+ int tag,
+ int exclusive)
{
int error = 0;
int last_error = 0;
@@ -157,7 +162,8 @@ xfs_inode_ag_iterator(
for (ag = 0; ag < mp->m_sb.sb_agcount; ag++) {
if (!mp->m_perag[ag].pag_ici_init)
continue;
- error = xfs_inode_ag_walk(mp, ag, execute, flags, tag);
+ error = xfs_inode_ag_walk(mp, ag, execute, flags, tag,
+ exclusive);
if (error) {
last_error = error;
if (error == EFSCORRUPTED)
@@ -174,30 +180,31 @@ xfs_sync_inode_valid(
struct xfs_perag *pag)
{
struct inode *inode = VFS_I(ip);
+ int error = EFSCORRUPTED;
/* nothing to sync during shutdown */
- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
- read_unlock(&pag->pag_ici_lock);
- return EFSCORRUPTED;
- }
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
+ goto out_unlock;
- /*
- * If we can't get a reference on the inode, it must be in reclaim.
- * Leave it for the reclaim code to flush. Also avoid inodes that
- * haven't been fully initialised.
- */
- if (!igrab(inode)) {
- read_unlock(&pag->pag_ici_lock);
- return ENOENT;
- }
- read_unlock(&pag->pag_ici_lock);
+ /* avoid new or reclaimable inodes. Leave for reclaim code to flush */
+ error = ENOENT;
+ if (xfs_iflags_test(ip, XFS_INEW | XFS_IRECLAIMABLE | XFS_IRECLAIM))
+ goto out_unlock;
- if (is_bad_inode(inode) || xfs_iflags_test(ip, XFS_INEW)) {
+ /* If we can't grab the inode, it must on it's way to reclaim. */
+ if (!igrab(inode))
+ goto out_unlock;
+
+ if (is_bad_inode(inode)) {
IRELE(ip);
- return ENOENT;
+ goto out_unlock;
}
- return 0;
+ /* inode is valid */
+ error = 0;
+out_unlock:
+ read_unlock(&pag->pag_ici_lock);
+ return error;
}
STATIC int
@@ -282,7 +289,7 @@ xfs_sync_data(
ASSERT((flags & ~(SYNC_TRYLOCK|SYNC_WAIT)) == 0);
error = xfs_inode_ag_iterator(mp, xfs_sync_inode_data, flags,
- XFS_ICI_NO_TAG);
+ XFS_ICI_NO_TAG, 0);
if (error)
return XFS_ERROR(error);
@@ -304,7 +311,7 @@ xfs_sync_attr(
ASSERT((flags & ~SYNC_WAIT) == 0);
return xfs_inode_ag_iterator(mp, xfs_sync_inode_attr, flags,
- XFS_ICI_NO_TAG);
+ XFS_ICI_NO_TAG, 0);
}
STATIC int
@@ -664,60 +671,6 @@ xfs_syncd_stop(
kthread_stop(mp->m_sync_task);
}
-STATIC int
-xfs_reclaim_inode(
- xfs_inode_t *ip,
- int sync_mode)
-{
- xfs_perag_t *pag = xfs_get_perag(ip->i_mount, ip->i_ino);
-
- /* The hash lock here protects a thread in xfs_iget_core from
- * racing with us on linking the inode back with a vnode.
- * Once we have the XFS_IRECLAIM flag set it will not touch
- * us.
- */
- write_lock(&pag->pag_ici_lock);
- spin_lock(&ip->i_flags_lock);
- if (__xfs_iflags_test(ip, XFS_IRECLAIM) ||
- !__xfs_iflags_test(ip, XFS_IRECLAIMABLE)) {
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
- return -EAGAIN;
- }
- __xfs_iflags_set(ip, XFS_IRECLAIM);
- spin_unlock(&ip->i_flags_lock);
- write_unlock(&pag->pag_ici_lock);
- xfs_put_perag(ip->i_mount, pag);
-
- /*
- * If the inode is still dirty, then flush it out. If the inode
- * is not in the AIL, then it will be OK to flush it delwri as
- * long as xfs_iflush() does not keep any references to the inode.
- * We leave that decision up to xfs_iflush() since it has the
- * knowledge of whether it's OK to simply do a delwri flush of
- * the inode or whether we need to wait until the inode is
- * pulled from the AIL.
- * We get the flush lock regardless, though, just to make sure
- * we don't free it while it is being flushed.
- */
- xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_iflock(ip);
-
- /*
- * In the case of a forced shutdown we rely on xfs_iflush() to
- * wait for the inode to be unpinned before returning an error.
- */
- if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
- /* synchronize with xfs_iflush_done */
- xfs_iflock(ip);
- xfs_ifunlock(ip);
- }
-
- xfs_iunlock(ip, XFS_ILOCK_EXCL);
- xfs_ireclaim(ip);
- return 0;
-}
-
void
__xfs_inode_set_reclaim_tag(
struct xfs_perag *pag,
@@ -760,19 +713,55 @@ __xfs_inode_clear_reclaim_tag(
}
STATIC int
-xfs_reclaim_inode_now(
+xfs_reclaim_inode(
struct xfs_inode *ip,
struct xfs_perag *pag,
- int flags)
+ int sync_mode)
{
- /* ignore if already under reclaim */
- if (xfs_iflags_test(ip, XFS_IRECLAIM)) {
- read_unlock(&pag->pag_ici_lock);
+ /*
+ * The radix tree lock here protects a thread in xfs_iget from racing
+ * with us starting reclaim on the inode. Once we have the
+ * XFS_IRECLAIM flag set it will not touch us.
+ */
+ spin_lock(&ip->i_flags_lock);
+ ASSERT_ALWAYS(__xfs_iflags_test(ip, XFS_IRECLAIMABLE));
+ if (__xfs_iflags_test(ip, XFS_IRECLAIM)) {
+ /* ignore as it is already under reclaim */
+ spin_unlock(&ip->i_flags_lock);
+ write_unlock(&pag->pag_ici_lock);
return 0;
}
- read_unlock(&pag->pag_ici_lock);
+ __xfs_iflags_set(ip, XFS_IRECLAIM);
+ spin_unlock(&ip->i_flags_lock);
+ write_unlock(&pag->pag_ici_lock);
- return xfs_reclaim_inode(ip, flags);
+ /*
+ * If the inode is still dirty, then flush it out. If the inode
+ * is not in the AIL, then it will be OK to flush it delwri as
+ * long as xfs_iflush() does not keep any references to the inode.
+ * We leave that decision up to xfs_iflush() since it has the
+ * knowledge of whether it's OK to simply do a delwri flush of
+ * the inode or whether we need to wait until the inode is
+ * pulled from the AIL.
+ * We get the flush lock regardless, though, just to make sure
+ * we don't free it while it is being flushed.
+ */
+ xfs_ilock(ip, XFS_ILOCK_EXCL);
+ xfs_iflock(ip);
+
+ /*
+ * In the case of a forced shutdown we rely on xfs_iflush() to
+ * wait for the inode to be unpinned before returning an error.
+ */
+ if (!is_bad_inode(VFS_I(ip)) && xfs_iflush(ip, sync_mode) == 0) {
+ /* synchronize with xfs_iflush_done */
+ xfs_iflock(ip);
+ xfs_ifunlock(ip);
+ }
+
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_ireclaim(ip);
+ return 0;
}
int
@@ -780,6 +769,6 @@ xfs_reclaim_inodes(
xfs_mount_t *mp,
int mode)
{
- return xfs_inode_ag_iterator(mp, xfs_reclaim_inode_now, mode,
- XFS_ICI_RECLAIM_TAG);
+ return xfs_inode_ag_iterator(mp, xfs_reclaim_inode, mode,
+ XFS_ICI_RECLAIM_TAG, 1);
}
diff --git a/fs/xfs/linux-2.6/xfs_sync.h b/fs/xfs/linux-2.6/xfs_sync.h
index a500b4d9183..ea932b43335 100644
--- a/fs/xfs/linux-2.6/xfs_sync.h
+++ b/fs/xfs/linux-2.6/xfs_sync.h
@@ -54,6 +54,6 @@ void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp, struct xfs_perag *pag,
int xfs_sync_inode_valid(struct xfs_inode *ip, struct xfs_perag *pag);
int xfs_inode_ag_iterator(struct xfs_mount *mp,
int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, int flags),
- int flags, int tag);
+ int flags, int tag, int write_lock);
#endif
diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h
index c40834bdee5..c22a608321a 100644
--- a/fs/xfs/linux-2.6/xfs_trace.h
+++ b/fs/xfs/linux-2.6/xfs_trace.h
@@ -33,51 +33,55 @@ struct xfs_dquot;
struct xlog_ticket;
struct log;
+DECLARE_EVENT_CLASS(xfs_attr_list_class,
+ TP_PROTO(struct xfs_attr_list_context *ctx),
+ TP_ARGS(ctx),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(u32, hashval)
+ __field(u32, blkno)
+ __field(u32, offset)
+ __field(void *, alist)
+ __field(int, bufsize)
+ __field(int, count)
+ __field(int, firstu)
+ __field(int, dupcnt)
+ __field(int, flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev;
+ __entry->ino = ctx->dp->i_ino;
+ __entry->hashval = ctx->cursor->hashval;
+ __entry->blkno = ctx->cursor->blkno;
+ __entry->offset = ctx->cursor->offset;
+ __entry->alist = ctx->alist;
+ __entry->bufsize = ctx->bufsize;
+ __entry->count = ctx->count;
+ __entry->firstu = ctx->firstu;
+ __entry->flags = ctx->flags;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u "
+ "alist 0x%p size %u count %u firstu %u flags %d %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->hashval,
+ __entry->blkno,
+ __entry->offset,
+ __entry->dupcnt,
+ __entry->alist,
+ __entry->bufsize,
+ __entry->count,
+ __entry->firstu,
+ __entry->flags,
+ __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS)
+ )
+)
+
#define DEFINE_ATTR_LIST_EVENT(name) \
-TRACE_EVENT(name, \
+DEFINE_EVENT(xfs_attr_list_class, name, \
TP_PROTO(struct xfs_attr_list_context *ctx), \
- TP_ARGS(ctx), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(u32, hashval) \
- __field(u32, blkno) \
- __field(u32, offset) \
- __field(void *, alist) \
- __field(int, bufsize) \
- __field(int, count) \
- __field(int, firstu) \
- __field(int, dupcnt) \
- __field(int, flags) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ctx->dp)->i_sb->s_dev; \
- __entry->ino = ctx->dp->i_ino; \
- __entry->hashval = ctx->cursor->hashval; \
- __entry->blkno = ctx->cursor->blkno; \
- __entry->offset = ctx->cursor->offset; \
- __entry->alist = ctx->alist; \
- __entry->bufsize = ctx->bufsize; \
- __entry->count = ctx->count; \
- __entry->firstu = ctx->firstu; \
- __entry->flags = ctx->flags; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx cursor h/b/o 0x%x/0x%x/%u dupcnt %u " \
- "alist 0x%p size %u count %u firstu %u flags %d %s", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->hashval, \
- __entry->blkno, \
- __entry->offset, \
- __entry->dupcnt, \
- __entry->alist, \
- __entry->bufsize, \
- __entry->count, \
- __entry->firstu, \
- __entry->flags, \
- __print_flags(__entry->flags, "|", XFS_ATTR_FLAGS) \
- ) \
-)
+ TP_ARGS(ctx))
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_sf_all);
DEFINE_ATTR_LIST_EVENT(xfs_attr_list_leaf);
@@ -178,91 +182,99 @@ TRACE_EVENT(xfs_iext_insert,
(char *)__entry->caller_ip)
);
+DECLARE_EVENT_CLASS(xfs_bmap_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state,
+ unsigned long caller_ip),
+ TP_ARGS(ip, idx, state, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_extnum_t, idx)
+ __field(xfs_fileoff_t, startoff)
+ __field(xfs_fsblock_t, startblock)
+ __field(xfs_filblks_t, blockcount)
+ __field(xfs_exntst_t, state)
+ __field(int, bmap_state)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ?
+ ip->i_afp : &ip->i_df;
+ struct xfs_bmbt_irec r;
+
+ xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r);
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->idx = idx;
+ __entry->startoff = r.br_startoff;
+ __entry->startblock = r.br_startblock;
+ __entry->blockcount = r.br_blockcount;
+ __entry->state = r.br_state;
+ __entry->bmap_state = state;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx state %s idx %ld "
+ "offset %lld block %s count %lld flag %d caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS),
+ (long)__entry->idx,
+ __entry->startoff,
+ xfs_fmtfsblock(__entry->startblock),
+ __entry->blockcount,
+ __entry->state,
+ (char *)__entry->caller_ip)
+)
+
#define DEFINE_BMAP_EVENT(name) \
-TRACE_EVENT(name, \
+DEFINE_EVENT(xfs_bmap_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_extnum_t idx, int state, \
unsigned long caller_ip), \
- TP_ARGS(ip, idx, state, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(xfs_extnum_t, idx) \
- __field(xfs_fileoff_t, startoff) \
- __field(xfs_fsblock_t, startblock) \
- __field(xfs_filblks_t, blockcount) \
- __field(xfs_exntst_t, state) \
- __field(int, bmap_state) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- struct xfs_ifork *ifp = (state & BMAP_ATTRFORK) ? \
- ip->i_afp : &ip->i_df; \
- struct xfs_bmbt_irec r; \
- \
- xfs_bmbt_get_all(xfs_iext_get_ext(ifp, idx), &r); \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->idx = idx; \
- __entry->startoff = r.br_startoff; \
- __entry->startblock = r.br_startblock; \
- __entry->blockcount = r.br_blockcount; \
- __entry->state = r.br_state; \
- __entry->bmap_state = state; \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx state %s idx %ld " \
- "offset %lld block %s count %lld flag %d caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __print_flags(__entry->bmap_state, "|", XFS_BMAP_EXT_FLAGS), \
- (long)__entry->idx, \
- __entry->startoff, \
- xfs_fmtfsblock(__entry->startblock), \
- __entry->blockcount, \
- __entry->state, \
- (char *)__entry->caller_ip) \
-)
-
+ TP_ARGS(ip, idx, state, caller_ip))
DEFINE_BMAP_EVENT(xfs_iext_remove);
DEFINE_BMAP_EVENT(xfs_bmap_pre_update);
DEFINE_BMAP_EVENT(xfs_bmap_post_update);
DEFINE_BMAP_EVENT(xfs_extlist);
-#define DEFINE_BUF_EVENT(tname) \
-TRACE_EVENT(tname, \
- TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
- TP_ARGS(bp, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_daddr_t, bno) \
- __field(size_t, buffer_length) \
- __field(int, hold) \
- __field(int, pincount) \
- __field(unsigned, lockval) \
- __field(unsigned, flags) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- __entry->dev = bp->b_target->bt_dev; \
- __entry->bno = bp->b_bn; \
- __entry->buffer_length = bp->b_buffer_length; \
- __entry->hold = atomic_read(&bp->b_hold); \
- __entry->pincount = atomic_read(&bp->b_pin_count); \
- __entry->lockval = xfs_buf_lock_value(bp); \
- __entry->flags = bp->b_flags; \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \
- "lock %d flags %s caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- (unsigned long long)__entry->bno, \
- __entry->buffer_length, \
- __entry->hold, \
- __entry->pincount, \
- __entry->lockval, \
- __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \
- (void *)__entry->caller_ip) \
+DECLARE_EVENT_CLASS(xfs_buf_class,
+ TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip),
+ TP_ARGS(bp, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_daddr_t, bno)
+ __field(size_t, buffer_length)
+ __field(int, hold)
+ __field(int, pincount)
+ __field(unsigned, lockval)
+ __field(unsigned, flags)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = bp->b_target->bt_dev;
+ __entry->bno = bp->b_bn;
+ __entry->buffer_length = bp->b_buffer_length;
+ __entry->hold = atomic_read(&bp->b_hold);
+ __entry->pincount = atomic_read(&bp->b_pin_count);
+ __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->flags = bp->b_flags;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+ "lock %d flags %s caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->bno,
+ __entry->buffer_length,
+ __entry->hold,
+ __entry->pincount,
+ __entry->lockval,
+ __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+ (void *)__entry->caller_ip)
)
+
+#define DEFINE_BUF_EVENT(name) \
+DEFINE_EVENT(xfs_buf_class, name, \
+ TP_PROTO(struct xfs_buf *bp, unsigned long caller_ip), \
+ TP_ARGS(bp, caller_ip))
DEFINE_BUF_EVENT(xfs_buf_init);
DEFINE_BUF_EVENT(xfs_buf_free);
DEFINE_BUF_EVENT(xfs_buf_hold);
@@ -299,41 +311,45 @@ DEFINE_BUF_EVENT(xfs_reset_dqcounts);
DEFINE_BUF_EVENT(xfs_inode_item_push);
/* pass flags explicitly */
-#define DEFINE_BUF_FLAGS_EVENT(tname) \
-TRACE_EVENT(tname, \
- TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
- TP_ARGS(bp, flags, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_daddr_t, bno) \
- __field(size_t, buffer_length) \
- __field(int, hold) \
- __field(int, pincount) \
- __field(unsigned, lockval) \
- __field(unsigned, flags) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- __entry->dev = bp->b_target->bt_dev; \
- __entry->bno = bp->b_bn; \
- __entry->buffer_length = bp->b_buffer_length; \
- __entry->flags = flags; \
- __entry->hold = atomic_read(&bp->b_hold); \
- __entry->pincount = atomic_read(&bp->b_pin_count); \
- __entry->lockval = xfs_buf_lock_value(bp); \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \
- "lock %d flags %s caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- (unsigned long long)__entry->bno, \
- __entry->buffer_length, \
- __entry->hold, \
- __entry->pincount, \
- __entry->lockval, \
- __print_flags(__entry->flags, "|", XFS_BUF_FLAGS), \
- (void *)__entry->caller_ip) \
+DECLARE_EVENT_CLASS(xfs_buf_flags_class,
+ TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip),
+ TP_ARGS(bp, flags, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_daddr_t, bno)
+ __field(size_t, buffer_length)
+ __field(int, hold)
+ __field(int, pincount)
+ __field(unsigned, lockval)
+ __field(unsigned, flags)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = bp->b_target->bt_dev;
+ __entry->bno = bp->b_bn;
+ __entry->buffer_length = bp->b_buffer_length;
+ __entry->flags = flags;
+ __entry->hold = atomic_read(&bp->b_hold);
+ __entry->pincount = atomic_read(&bp->b_pin_count);
+ __entry->lockval = xfs_buf_lock_value(bp);
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+ "lock %d flags %s caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->bno,
+ __entry->buffer_length,
+ __entry->hold,
+ __entry->pincount,
+ __entry->lockval,
+ __print_flags(__entry->flags, "|", XFS_BUF_FLAGS),
+ (void *)__entry->caller_ip)
)
+
+#define DEFINE_BUF_FLAGS_EVENT(name) \
+DEFINE_EVENT(xfs_buf_flags_class, name, \
+ TP_PROTO(struct xfs_buf *bp, unsigned flags, unsigned long caller_ip), \
+ TP_ARGS(bp, flags, caller_ip))
DEFINE_BUF_FLAGS_EVENT(xfs_buf_find);
DEFINE_BUF_FLAGS_EVENT(xfs_buf_get);
DEFINE_BUF_FLAGS_EVENT(xfs_buf_read);
@@ -376,55 +392,58 @@ TRACE_EVENT(xfs_buf_ioerror,
(void *)__entry->caller_ip)
);
-#define DEFINE_BUF_ITEM_EVENT(tname) \
-TRACE_EVENT(tname, \
- TP_PROTO(struct xfs_buf_log_item *bip), \
- TP_ARGS(bip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_daddr_t, buf_bno) \
- __field(size_t, buf_len) \
- __field(int, buf_hold) \
- __field(int, buf_pincount) \
- __field(int, buf_lockval) \
- __field(unsigned, buf_flags) \
- __field(unsigned, bli_recur) \
- __field(int, bli_refcount) \
- __field(unsigned, bli_flags) \
- __field(void *, li_desc) \
- __field(unsigned, li_flags) \
- ), \
- TP_fast_assign( \
- __entry->dev = bip->bli_buf->b_target->bt_dev; \
- __entry->bli_flags = bip->bli_flags; \
- __entry->bli_recur = bip->bli_recur; \
- __entry->bli_refcount = atomic_read(&bip->bli_refcount); \
- __entry->buf_bno = bip->bli_buf->b_bn; \
- __entry->buf_len = bip->bli_buf->b_buffer_length; \
- __entry->buf_flags = bip->bli_buf->b_flags; \
- __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold); \
- __entry->buf_pincount = \
- atomic_read(&bip->bli_buf->b_pin_count); \
- __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf); \
- __entry->li_desc = bip->bli_item.li_desc; \
- __entry->li_flags = bip->bli_item.li_flags; \
- ), \
- TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d " \
- "lock %d flags %s recur %d refcount %d bliflags %s " \
- "lidesc 0x%p liflags %s", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- (unsigned long long)__entry->buf_bno, \
- __entry->buf_len, \
- __entry->buf_hold, \
- __entry->buf_pincount, \
- __entry->buf_lockval, \
- __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS), \
- __entry->bli_recur, \
- __entry->bli_refcount, \
- __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS), \
- __entry->li_desc, \
- __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS)) \
+DECLARE_EVENT_CLASS(xfs_buf_item_class,
+ TP_PROTO(struct xfs_buf_log_item *bip),
+ TP_ARGS(bip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_daddr_t, buf_bno)
+ __field(size_t, buf_len)
+ __field(int, buf_hold)
+ __field(int, buf_pincount)
+ __field(int, buf_lockval)
+ __field(unsigned, buf_flags)
+ __field(unsigned, bli_recur)
+ __field(int, bli_refcount)
+ __field(unsigned, bli_flags)
+ __field(void *, li_desc)
+ __field(unsigned, li_flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = bip->bli_buf->b_target->bt_dev;
+ __entry->bli_flags = bip->bli_flags;
+ __entry->bli_recur = bip->bli_recur;
+ __entry->bli_refcount = atomic_read(&bip->bli_refcount);
+ __entry->buf_bno = bip->bli_buf->b_bn;
+ __entry->buf_len = bip->bli_buf->b_buffer_length;
+ __entry->buf_flags = bip->bli_buf->b_flags;
+ __entry->buf_hold = atomic_read(&bip->bli_buf->b_hold);
+ __entry->buf_pincount = atomic_read(&bip->bli_buf->b_pin_count);
+ __entry->buf_lockval = xfs_buf_lock_value(bip->bli_buf);
+ __entry->li_desc = bip->bli_item.li_desc;
+ __entry->li_flags = bip->bli_item.li_flags;
+ ),
+ TP_printk("dev %d:%d bno 0x%llx len 0x%zx hold %d pincount %d "
+ "lock %d flags %s recur %d refcount %d bliflags %s "
+ "lidesc 0x%p liflags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ (unsigned long long)__entry->buf_bno,
+ __entry->buf_len,
+ __entry->buf_hold,
+ __entry->buf_pincount,
+ __entry->buf_lockval,
+ __print_flags(__entry->buf_flags, "|", XFS_BUF_FLAGS),
+ __entry->bli_recur,
+ __entry->bli_refcount,
+ __print_flags(__entry->bli_flags, "|", XFS_BLI_FLAGS),
+ __entry->li_desc,
+ __print_flags(__entry->li_flags, "|", XFS_LI_FLAGS))
)
+
+#define DEFINE_BUF_ITEM_EVENT(name) \
+DEFINE_EVENT(xfs_buf_item_class, name, \
+ TP_PROTO(struct xfs_buf_log_item *bip), \
+ TP_ARGS(bip))
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_size_stale);
DEFINE_BUF_ITEM_EVENT(xfs_buf_item_format);
@@ -450,78 +469,90 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold);
DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);
DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);
+DECLARE_EVENT_CLASS(xfs_lock_class,
+ TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,
+ unsigned long caller_ip),
+ TP_ARGS(ip, lock_flags, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, lock_flags)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->lock_flags = lock_flags;
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS),
+ (void *)__entry->caller_ip)
+)
+
#define DEFINE_LOCK_EVENT(name) \
-TRACE_EVENT(name, \
+DEFINE_EVENT(xfs_lock_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned lock_flags, \
unsigned long caller_ip), \
- TP_ARGS(ip, lock_flags, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(int, lock_flags) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->lock_flags = lock_flags; \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx flags %s caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __print_flags(__entry->lock_flags, "|", XFS_LOCK_FLAGS), \
- (void *)__entry->caller_ip) \
-)
-
+ TP_ARGS(ip, lock_flags, caller_ip))
DEFINE_LOCK_EVENT(xfs_ilock);
DEFINE_LOCK_EVENT(xfs_ilock_nowait);
DEFINE_LOCK_EVENT(xfs_ilock_demote);
DEFINE_LOCK_EVENT(xfs_iunlock);
+DECLARE_EVENT_CLASS(xfs_iget_class,
+ TP_PROTO(struct xfs_inode *ip),
+ TP_ARGS(ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino)
+)
+
#define DEFINE_IGET_EVENT(name) \
-TRACE_EVENT(name, \
+DEFINE_EVENT(xfs_iget_class, name, \
TP_PROTO(struct xfs_inode *ip), \
- TP_ARGS(ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino) \
-)
+ TP_ARGS(ip))
DEFINE_IGET_EVENT(xfs_iget_skip);
DEFINE_IGET_EVENT(xfs_iget_reclaim);
DEFINE_IGET_EVENT(xfs_iget_found);
DEFINE_IGET_EVENT(xfs_iget_alloc);
+DECLARE_EVENT_CLASS(xfs_inode_class,
+ TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip),
+ TP_ARGS(ip, caller_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, count)
+ __field(unsigned long, caller_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->count = atomic_read(&VFS_I(ip)->i_count);
+ __entry->caller_ip = caller_ip;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx count %d caller %pf",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->count,
+ (char *)__entry->caller_ip)
+)
+
#define DEFINE_INODE_EVENT(name) \
-TRACE_EVENT(name, \
+DEFINE_EVENT(xfs_inode_class, name, \
TP_PROTO(struct xfs_inode *ip, unsigned long caller_ip), \
- TP_ARGS(ip, caller_ip), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(int, count) \
- __field(unsigned long, caller_ip) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->count = atomic_read(&VFS_I(ip)->i_count); \
- __entry->caller_ip = caller_ip; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx count %d caller %pf", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->count, \
- (char *)__entry->caller_ip) \
-)
+ TP_ARGS(ip, caller_ip))
DEFINE_INODE_EVENT(xfs_ihold);
DEFINE_INODE_EVENT(xfs_irele);
/* the old xfs_itrace_entry tracer - to be replaced by s.th. in the VFS */
@@ -529,55 +560,59 @@ DEFINE_INODE_EVENT(xfs_inode);
#define xfs_itrace_entry(ip) \
trace_xfs_inode(ip, _THIS_IP_)
-#define DEFINE_DQUOT_EVENT(tname) \
-TRACE_EVENT(tname, \
- TP_PROTO(struct xfs_dquot *dqp), \
- TP_ARGS(dqp), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(__be32, id) \
- __field(unsigned, flags) \
- __field(unsigned, nrefs) \
- __field(unsigned long long, res_bcount) \
- __field(unsigned long long, bcount) \
- __field(unsigned long long, icount) \
- __field(unsigned long long, blk_hardlimit) \
- __field(unsigned long long, blk_softlimit) \
- __field(unsigned long long, ino_hardlimit) \
- __field(unsigned long long, ino_softlimit) \
- ), \
- TP_fast_assign( \
- __entry->dev = dqp->q_mount->m_super->s_dev; \
- __entry->id = dqp->q_core.d_id; \
- __entry->flags = dqp->dq_flags; \
- __entry->nrefs = dqp->q_nrefs; \
- __entry->res_bcount = dqp->q_res_bcount; \
- __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount); \
- __entry->icount = be64_to_cpu(dqp->q_core.d_icount); \
- __entry->blk_hardlimit = \
- be64_to_cpu(dqp->q_core.d_blk_hardlimit); \
- __entry->blk_softlimit = \
- be64_to_cpu(dqp->q_core.d_blk_softlimit); \
- __entry->ino_hardlimit = \
- be64_to_cpu(dqp->q_core.d_ino_hardlimit); \
- __entry->ino_softlimit = \
- be64_to_cpu(dqp->q_core.d_ino_softlimit); \
+DECLARE_EVENT_CLASS(xfs_dquot_class,
+ TP_PROTO(struct xfs_dquot *dqp),
+ TP_ARGS(dqp),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(__be32, id)
+ __field(unsigned, flags)
+ __field(unsigned, nrefs)
+ __field(unsigned long long, res_bcount)
+ __field(unsigned long long, bcount)
+ __field(unsigned long long, icount)
+ __field(unsigned long long, blk_hardlimit)
+ __field(unsigned long long, blk_softlimit)
+ __field(unsigned long long, ino_hardlimit)
+ __field(unsigned long long, ino_softlimit)
), \
- TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx " \
- "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] " \
- "icnt 0x%llx [hard 0x%llx | soft 0x%llx]", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- be32_to_cpu(__entry->id), \
- __print_flags(__entry->flags, "|", XFS_DQ_FLAGS), \
- __entry->nrefs, \
- __entry->res_bcount, \
- __entry->bcount, \
- __entry->blk_hardlimit, \
- __entry->blk_softlimit, \
- __entry->icount, \
- __entry->ino_hardlimit, \
- __entry->ino_softlimit) \
+ TP_fast_assign(
+ __entry->dev = dqp->q_mount->m_super->s_dev;
+ __entry->id = dqp->q_core.d_id;
+ __entry->flags = dqp->dq_flags;
+ __entry->nrefs = dqp->q_nrefs;
+ __entry->res_bcount = dqp->q_res_bcount;
+ __entry->bcount = be64_to_cpu(dqp->q_core.d_bcount);
+ __entry->icount = be64_to_cpu(dqp->q_core.d_icount);
+ __entry->blk_hardlimit =
+ be64_to_cpu(dqp->q_core.d_blk_hardlimit);
+ __entry->blk_softlimit =
+ be64_to_cpu(dqp->q_core.d_blk_softlimit);
+ __entry->ino_hardlimit =
+ be64_to_cpu(dqp->q_core.d_ino_hardlimit);
+ __entry->ino_softlimit =
+ be64_to_cpu(dqp->q_core.d_ino_softlimit);
+ ),
+ TP_printk("dev %d:%d id 0x%x flags %s nrefs %u res_bc 0x%llx "
+ "bcnt 0x%llx [hard 0x%llx | soft 0x%llx] "
+ "icnt 0x%llx [hard 0x%llx | soft 0x%llx]",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ be32_to_cpu(__entry->id),
+ __print_flags(__entry->flags, "|", XFS_DQ_FLAGS),
+ __entry->nrefs,
+ __entry->res_bcount,
+ __entry->bcount,
+ __entry->blk_hardlimit,
+ __entry->blk_softlimit,
+ __entry->icount,
+ __entry->ino_hardlimit,
+ __entry->ino_softlimit)
)
+
+#define DEFINE_DQUOT_EVENT(name) \
+DEFINE_EVENT(xfs_dquot_class, name, \
+ TP_PROTO(struct xfs_dquot *dqp), \
+ TP_ARGS(dqp))
DEFINE_DQUOT_EVENT(xfs_dqadjust);
DEFINE_DQUOT_EVENT(xfs_dqshake_dirty);
DEFINE_DQUOT_EVENT(xfs_dqshake_unlink);
@@ -610,72 +645,75 @@ DEFINE_DQUOT_EVENT(xfs_dqflush_done);
DEFINE_IGET_EVENT(xfs_dquot_dqalloc);
DEFINE_IGET_EVENT(xfs_dquot_dqdetach);
+DECLARE_EVENT_CLASS(xfs_loggrant_class,
+ TP_PROTO(struct log *log, struct xlog_ticket *tic),
+ TP_ARGS(log, tic),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(unsigned, trans_type)
+ __field(char, ocnt)
+ __field(char, cnt)
+ __field(int, curr_res)
+ __field(int, unit_res)
+ __field(unsigned int, flags)
+ __field(void *, reserve_headq)
+ __field(void *, write_headq)
+ __field(int, grant_reserve_cycle)
+ __field(int, grant_reserve_bytes)
+ __field(int, grant_write_cycle)
+ __field(int, grant_write_bytes)
+ __field(int, curr_cycle)
+ __field(int, curr_block)
+ __field(xfs_lsn_t, tail_lsn)
+ ),
+ TP_fast_assign(
+ __entry->dev = log->l_mp->m_super->s_dev;
+ __entry->trans_type = tic->t_trans_type;
+ __entry->ocnt = tic->t_ocnt;
+ __entry->cnt = tic->t_cnt;
+ __entry->curr_res = tic->t_curr_res;
+ __entry->unit_res = tic->t_unit_res;
+ __entry->flags = tic->t_flags;
+ __entry->reserve_headq = log->l_reserve_headq;
+ __entry->write_headq = log->l_write_headq;
+ __entry->grant_reserve_cycle = log->l_grant_reserve_cycle;
+ __entry->grant_reserve_bytes = log->l_grant_reserve_bytes;
+ __entry->grant_write_cycle = log->l_grant_write_cycle;
+ __entry->grant_write_bytes = log->l_grant_write_bytes;
+ __entry->curr_cycle = log->l_curr_cycle;
+ __entry->curr_block = log->l_curr_block;
+ __entry->tail_lsn = log->l_tail_lsn;
+ ),
+ TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u "
+ "t_unit_res %u t_flags %s reserve_headq 0x%p "
+ "write_headq 0x%p grant_reserve_cycle %d "
+ "grant_reserve_bytes %d grant_write_cycle %d "
+ "grant_write_bytes %d curr_cycle %d curr_block %d "
+ "tail_cycle %d tail_block %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES),
+ __entry->ocnt,
+ __entry->cnt,
+ __entry->curr_res,
+ __entry->unit_res,
+ __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS),
+ __entry->reserve_headq,
+ __entry->write_headq,
+ __entry->grant_reserve_cycle,
+ __entry->grant_reserve_bytes,
+ __entry->grant_write_cycle,
+ __entry->grant_write_bytes,
+ __entry->curr_cycle,
+ __entry->curr_block,
+ CYCLE_LSN(__entry->tail_lsn),
+ BLOCK_LSN(__entry->tail_lsn)
+ )
+)
-#define DEFINE_LOGGRANT_EVENT(tname) \
-TRACE_EVENT(tname, \
+#define DEFINE_LOGGRANT_EVENT(name) \
+DEFINE_EVENT(xfs_loggrant_class, name, \
TP_PROTO(struct log *log, struct xlog_ticket *tic), \
- TP_ARGS(log, tic), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(unsigned, trans_type) \
- __field(char, ocnt) \
- __field(char, cnt) \
- __field(int, curr_res) \
- __field(int, unit_res) \
- __field(unsigned int, flags) \
- __field(void *, reserve_headq) \
- __field(void *, write_headq) \
- __field(int, grant_reserve_cycle) \
- __field(int, grant_reserve_bytes) \
- __field(int, grant_write_cycle) \
- __field(int, grant_write_bytes) \
- __field(int, curr_cycle) \
- __field(int, curr_block) \
- __field(xfs_lsn_t, tail_lsn) \
- ), \
- TP_fast_assign( \
- __entry->dev = log->l_mp->m_super->s_dev; \
- __entry->trans_type = tic->t_trans_type; \
- __entry->ocnt = tic->t_ocnt; \
- __entry->cnt = tic->t_cnt; \
- __entry->curr_res = tic->t_curr_res; \
- __entry->unit_res = tic->t_unit_res; \
- __entry->flags = tic->t_flags; \
- __entry->reserve_headq = log->l_reserve_headq; \
- __entry->write_headq = log->l_write_headq; \
- __entry->grant_reserve_cycle = log->l_grant_reserve_cycle; \
- __entry->grant_reserve_bytes = log->l_grant_reserve_bytes; \
- __entry->grant_write_cycle = log->l_grant_write_cycle; \
- __entry->grant_write_bytes = log->l_grant_write_bytes; \
- __entry->curr_cycle = log->l_curr_cycle; \
- __entry->curr_block = log->l_curr_block; \
- __entry->tail_lsn = log->l_tail_lsn; \
- ), \
- TP_printk("dev %d:%d type %s t_ocnt %u t_cnt %u t_curr_res %u " \
- "t_unit_res %u t_flags %s reserve_headq 0x%p " \
- "write_headq 0x%p grant_reserve_cycle %d " \
- "grant_reserve_bytes %d grant_write_cycle %d " \
- "grant_write_bytes %d curr_cycle %d curr_block %d " \
- "tail_cycle %d tail_block %d", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __print_symbolic(__entry->trans_type, XFS_TRANS_TYPES), \
- __entry->ocnt, \
- __entry->cnt, \
- __entry->curr_res, \
- __entry->unit_res, \
- __print_flags(__entry->flags, "|", XLOG_TIC_FLAGS), \
- __entry->reserve_headq, \
- __entry->write_headq, \
- __entry->grant_reserve_cycle, \
- __entry->grant_reserve_bytes, \
- __entry->grant_write_cycle, \
- __entry->grant_write_bytes, \
- __entry->curr_cycle, \
- __entry->curr_block, \
- CYCLE_LSN(__entry->tail_lsn), \
- BLOCK_LSN(__entry->tail_lsn) \
- ) \
-)
+ TP_ARGS(log, tic))
DEFINE_LOGGRANT_EVENT(xfs_log_done_nonperm);
DEFINE_LOGGRANT_EVENT(xfs_log_done_perm);
DEFINE_LOGGRANT_EVENT(xfs_log_reserve);
@@ -815,7 +853,7 @@ TRACE_EVENT(name, \
), \
TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx " \
"offset 0x%llx count %zd flags %s " \
- "startoff 0x%llx startblock 0x%llx blockcount 0x%llx", \
+ "startoff 0x%llx startblock %s blockcount 0x%llx", \
MAJOR(__entry->dev), MINOR(__entry->dev), \
__entry->ino, \
__entry->size, \
@@ -824,7 +862,7 @@ TRACE_EVENT(name, \
__entry->count, \
__print_flags(__entry->flags, "|", BMAPI_FLAGS), \
__entry->startoff, \
- __entry->startblock, \
+ xfs_fmtfsblock(__entry->startblock), \
__entry->blockcount) \
)
DEFINE_IOMAP_EVENT(xfs_iomap_enter);
@@ -897,28 +935,32 @@ TRACE_EVENT(xfs_itruncate_start,
__entry->toss_finish)
);
+DECLARE_EVENT_CLASS(xfs_itrunc_class,
+ TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
+ TP_ARGS(ip, new_size),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_fsize_t, size)
+ __field(xfs_fsize_t, new_size)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(ip)->i_sb->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->size = ip->i_d.di_size;
+ __entry->new_size = new_size;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->size,
+ __entry->new_size)
+)
+
#define DEFINE_ITRUNC_EVENT(name) \
-TRACE_EVENT(name, \
+DEFINE_EVENT(xfs_itrunc_class, name, \
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), \
- TP_ARGS(ip, new_size), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(xfs_fsize_t, size) \
- __field(xfs_fsize_t, new_size) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(ip)->i_sb->s_dev; \
- __entry->ino = ip->i_ino; \
- __entry->size = ip->i_d.di_size; \
- __entry->new_size = new_size; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx size 0x%llx new_size 0x%llx", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->size, \
- __entry->new_size) \
-)
+ TP_ARGS(ip, new_size))
DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_start);
DEFINE_ITRUNC_EVENT(xfs_itruncate_finish_end);
@@ -1037,28 +1079,28 @@ TRACE_EVENT(xfs_alloc_unbusy,
TRACE_EVENT(xfs_alloc_busysearch,
TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno,
- xfs_extlen_t len, int found),
- TP_ARGS(mp, agno, agbno, len, found),
+ xfs_extlen_t len, xfs_lsn_t lsn),
+ TP_ARGS(mp, agno, agbno, len, lsn),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_agnumber_t, agno)
__field(xfs_agblock_t, agbno)
__field(xfs_extlen_t, len)
- __field(int, found)
+ __field(xfs_lsn_t, lsn)
),
TP_fast_assign(
__entry->dev = mp->m_super->s_dev;
__entry->agno = agno;
__entry->agbno = agbno;
__entry->len = len;
- __entry->found = found;
+ __entry->lsn = lsn;
),
- TP_printk("dev %d:%d agno %u agbno %u len %u %s",
+ TP_printk("dev %d:%d agno %u agbno %u len %u force lsn 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->agno,
__entry->agbno,
__entry->len,
- __print_symbolic(__entry->found, XFS_BUSY_STATES))
+ __entry->lsn)
);
TRACE_EVENT(xfs_agf,
@@ -1152,77 +1194,80 @@ TRACE_EVENT(xfs_free_extent,
);
-#define DEFINE_ALLOC_EVENT(name) \
-TRACE_EVENT(name, \
- TP_PROTO(struct xfs_alloc_arg *args), \
- TP_ARGS(args), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_agnumber_t, agno) \
- __field(xfs_agblock_t, agbno) \
- __field(xfs_extlen_t, minlen) \
- __field(xfs_extlen_t, maxlen) \
- __field(xfs_extlen_t, mod) \
- __field(xfs_extlen_t, prod) \
- __field(xfs_extlen_t, minleft) \
- __field(xfs_extlen_t, total) \
- __field(xfs_extlen_t, alignment) \
- __field(xfs_extlen_t, minalignslop) \
- __field(xfs_extlen_t, len) \
- __field(short, type) \
- __field(short, otype) \
- __field(char, wasdel) \
- __field(char, wasfromfl) \
- __field(char, isfl) \
- __field(char, userdata) \
- __field(xfs_fsblock_t, firstblock) \
- ), \
- TP_fast_assign( \
- __entry->dev = args->mp->m_super->s_dev; \
- __entry->agno = args->agno; \
- __entry->agbno = args->agbno; \
- __entry->minlen = args->minlen; \
- __entry->maxlen = args->maxlen; \
- __entry->mod = args->mod; \
- __entry->prod = args->prod; \
- __entry->minleft = args->minleft; \
- __entry->total = args->total; \
- __entry->alignment = args->alignment; \
- __entry->minalignslop = args->minalignslop; \
- __entry->len = args->len; \
- __entry->type = args->type; \
- __entry->otype = args->otype; \
- __entry->wasdel = args->wasdel; \
- __entry->wasfromfl = args->wasfromfl; \
- __entry->isfl = args->isfl; \
- __entry->userdata = args->userdata; \
- __entry->firstblock = args->firstblock; \
- ), \
- TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u " \
- "prod %u minleft %u total %u alignment %u minalignslop %u " \
- "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d " \
- "userdata %d firstblock 0x%llx", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->agno, \
- __entry->agbno, \
- __entry->minlen, \
- __entry->maxlen, \
- __entry->mod, \
- __entry->prod, \
- __entry->minleft, \
- __entry->total, \
- __entry->alignment, \
- __entry->minalignslop, \
- __entry->len, \
- __print_symbolic(__entry->type, XFS_ALLOC_TYPES), \
- __print_symbolic(__entry->otype, XFS_ALLOC_TYPES), \
- __entry->wasdel, \
- __entry->wasfromfl, \
- __entry->isfl, \
- __entry->userdata, \
- __entry->firstblock) \
+DECLARE_EVENT_CLASS(xfs_alloc_class,
+ TP_PROTO(struct xfs_alloc_arg *args),
+ TP_ARGS(args),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_agblock_t, agbno)
+ __field(xfs_extlen_t, minlen)
+ __field(xfs_extlen_t, maxlen)
+ __field(xfs_extlen_t, mod)
+ __field(xfs_extlen_t, prod)
+ __field(xfs_extlen_t, minleft)
+ __field(xfs_extlen_t, total)
+ __field(xfs_extlen_t, alignment)
+ __field(xfs_extlen_t, minalignslop)
+ __field(xfs_extlen_t, len)
+ __field(short, type)
+ __field(short, otype)
+ __field(char, wasdel)
+ __field(char, wasfromfl)
+ __field(char, isfl)
+ __field(char, userdata)
+ __field(xfs_fsblock_t, firstblock)
+ ),
+ TP_fast_assign(
+ __entry->dev = args->mp->m_super->s_dev;
+ __entry->agno = args->agno;
+ __entry->agbno = args->agbno;
+ __entry->minlen = args->minlen;
+ __entry->maxlen = args->maxlen;
+ __entry->mod = args->mod;
+ __entry->prod = args->prod;
+ __entry->minleft = args->minleft;
+ __entry->total = args->total;
+ __entry->alignment = args->alignment;
+ __entry->minalignslop = args->minalignslop;
+ __entry->len = args->len;
+ __entry->type = args->type;
+ __entry->otype = args->otype;
+ __entry->wasdel = args->wasdel;
+ __entry->wasfromfl = args->wasfromfl;
+ __entry->isfl = args->isfl;
+ __entry->userdata = args->userdata;
+ __entry->firstblock = args->firstblock;
+ ),
+ TP_printk("dev %d:%d agno %u agbno %u minlen %u maxlen %u mod %u "
+ "prod %u minleft %u total %u alignment %u minalignslop %u "
+ "len %u type %s otype %s wasdel %d wasfromfl %d isfl %d "
+ "userdata %d firstblock 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->agno,
+ __entry->agbno,
+ __entry->minlen,
+ __entry->maxlen,
+ __entry->mod,
+ __entry->prod,
+ __entry->minleft,
+ __entry->total,
+ __entry->alignment,
+ __entry->minalignslop,
+ __entry->len,
+ __print_symbolic(__entry->type, XFS_ALLOC_TYPES),
+ __print_symbolic(__entry->otype, XFS_ALLOC_TYPES),
+ __entry->wasdel,
+ __entry->wasfromfl,
+ __entry->isfl,
+ __entry->userdata,
+ __entry->firstblock)
)
+#define DEFINE_ALLOC_EVENT(name) \
+DEFINE_EVENT(xfs_alloc_class, name, \
+ TP_PROTO(struct xfs_alloc_arg *args), \
+ TP_ARGS(args))
DEFINE_ALLOC_EVENT(xfs_alloc_exact_done);
DEFINE_ALLOC_EVENT(xfs_alloc_exact_error);
DEFINE_ALLOC_EVENT(xfs_alloc_near_nominleft);
@@ -1245,92 +1290,100 @@ DEFINE_ALLOC_EVENT(xfs_alloc_vextent_noagbp);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_loopfailed);
DEFINE_ALLOC_EVENT(xfs_alloc_vextent_allfailed);
-#define DEFINE_DIR2_TRACE(tname) \
-TRACE_EVENT(tname, \
+DECLARE_EVENT_CLASS(xfs_dir2_class,
+ TP_PROTO(struct xfs_da_args *args),
+ TP_ARGS(args),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __dynamic_array(char, name, args->namelen)
+ __field(int, namelen)
+ __field(xfs_dahash_t, hashval)
+ __field(xfs_ino_t, inumber)
+ __field(int, op_flags)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+ __entry->ino = args->dp->i_ino;
+ if (args->namelen)
+ memcpy(__get_str(name), args->name, args->namelen);
+ __entry->namelen = args->namelen;
+ __entry->hashval = args->hashval;
+ __entry->inumber = args->inumber;
+ __entry->op_flags = args->op_flags;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x "
+ "inumber 0x%llx op_flags %s",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->namelen,
+ __entry->namelen ? __get_str(name) : NULL,
+ __entry->namelen,
+ __entry->hashval,
+ __entry->inumber,
+ __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS))
+)
+
+#define DEFINE_DIR2_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_class, name, \
TP_PROTO(struct xfs_da_args *args), \
- TP_ARGS(args), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __dynamic_array(char, name, args->namelen) \
- __field(int, namelen) \
- __field(xfs_dahash_t, hashval) \
- __field(xfs_ino_t, inumber) \
- __field(int, op_flags) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \
- __entry->ino = args->dp->i_ino; \
- if (args->namelen) \
- memcpy(__get_str(name), args->name, args->namelen); \
- __entry->namelen = args->namelen; \
- __entry->hashval = args->hashval; \
- __entry->inumber = args->inumber; \
- __entry->op_flags = args->op_flags; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx name %.*s namelen %d hashval 0x%x " \
- "inumber 0x%llx op_flags %s", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __entry->namelen, \
- __entry->namelen ? __get_str(name) : NULL, \
- __entry->namelen, \
- __entry->hashval, \
- __entry->inumber, \
- __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS)) \
+ TP_ARGS(args))
+DEFINE_DIR2_EVENT(xfs_dir2_sf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_create);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino4);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_toino8);
+DEFINE_DIR2_EVENT(xfs_dir2_sf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_block_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_block_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_block_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_block_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_sf);
+DEFINE_DIR2_EVENT(xfs_dir2_block_to_leaf);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_block);
+DEFINE_DIR2_EVENT(xfs_dir2_leaf_to_node);
+DEFINE_DIR2_EVENT(xfs_dir2_node_addname);
+DEFINE_DIR2_EVENT(xfs_dir2_node_lookup);
+DEFINE_DIR2_EVENT(xfs_dir2_node_replace);
+DEFINE_DIR2_EVENT(xfs_dir2_node_removename);
+DEFINE_DIR2_EVENT(xfs_dir2_node_to_leaf);
+
+DECLARE_EVENT_CLASS(xfs_dir2_space_class,
+ TP_PROTO(struct xfs_da_args *args, int idx),
+ TP_ARGS(args, idx),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(int, op_flags)
+ __field(int, idx)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(args->dp)->i_sb->s_dev;
+ __entry->ino = args->dp->i_ino;
+ __entry->op_flags = args->op_flags;
+ __entry->idx = idx;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS),
+ __entry->idx)
)
-DEFINE_DIR2_TRACE(xfs_dir2_sf_addname);
-DEFINE_DIR2_TRACE(xfs_dir2_sf_create);
-DEFINE_DIR2_TRACE(xfs_dir2_sf_lookup);
-DEFINE_DIR2_TRACE(xfs_dir2_sf_replace);
-DEFINE_DIR2_TRACE(xfs_dir2_sf_removename);
-DEFINE_DIR2_TRACE(xfs_dir2_sf_toino4);
-DEFINE_DIR2_TRACE(xfs_dir2_sf_toino8);
-DEFINE_DIR2_TRACE(xfs_dir2_sf_to_block);
-DEFINE_DIR2_TRACE(xfs_dir2_block_addname);
-DEFINE_DIR2_TRACE(xfs_dir2_block_lookup);
-DEFINE_DIR2_TRACE(xfs_dir2_block_replace);
-DEFINE_DIR2_TRACE(xfs_dir2_block_removename);
-DEFINE_DIR2_TRACE(xfs_dir2_block_to_sf);
-DEFINE_DIR2_TRACE(xfs_dir2_block_to_leaf);
-DEFINE_DIR2_TRACE(xfs_dir2_leaf_addname);
-DEFINE_DIR2_TRACE(xfs_dir2_leaf_lookup);
-DEFINE_DIR2_TRACE(xfs_dir2_leaf_replace);
-DEFINE_DIR2_TRACE(xfs_dir2_leaf_removename);
-DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_block);
-DEFINE_DIR2_TRACE(xfs_dir2_leaf_to_node);
-DEFINE_DIR2_TRACE(xfs_dir2_node_addname);
-DEFINE_DIR2_TRACE(xfs_dir2_node_lookup);
-DEFINE_DIR2_TRACE(xfs_dir2_node_replace);
-DEFINE_DIR2_TRACE(xfs_dir2_node_removename);
-DEFINE_DIR2_TRACE(xfs_dir2_node_to_leaf);
-#define DEFINE_DIR2_SPACE_TRACE(tname) \
-TRACE_EVENT(tname, \
+#define DEFINE_DIR2_SPACE_EVENT(name) \
+DEFINE_EVENT(xfs_dir2_space_class, name, \
TP_PROTO(struct xfs_da_args *args, int idx), \
- TP_ARGS(args, idx), \
- TP_STRUCT__entry( \
- __field(dev_t, dev) \
- __field(xfs_ino_t, ino) \
- __field(int, op_flags) \
- __field(int, idx) \
- ), \
- TP_fast_assign( \
- __entry->dev = VFS_I(args->dp)->i_sb->s_dev; \
- __entry->ino = args->dp->i_ino; \
- __entry->op_flags = args->op_flags; \
- __entry->idx = idx; \
- ), \
- TP_printk("dev %d:%d ino 0x%llx op_flags %s index %d", \
- MAJOR(__entry->dev), MINOR(__entry->dev), \
- __entry->ino, \
- __print_flags(__entry->op_flags, "|", XFS_DA_OP_FLAGS), \
- __entry->idx) \
-)
-DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_add);
-DEFINE_DIR2_SPACE_TRACE(xfs_dir2_leafn_remove);
-DEFINE_DIR2_SPACE_TRACE(xfs_dir2_grow_inode);
-DEFINE_DIR2_SPACE_TRACE(xfs_dir2_shrink_inode);
+ TP_ARGS(args, idx))
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_add);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_leafn_remove);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_grow_inode);
+DEFINE_DIR2_SPACE_EVENT(xfs_dir2_shrink_inode);
TRACE_EVENT(xfs_dir2_leafn_moveents,
TP_PROTO(struct xfs_da_args *args, int src_idx, int dst_idx, int count),
diff --git a/fs/xfs/linux-2.6/xfs_xattr.c b/fs/xfs/linux-2.6/xfs_xattr.c
index 497c7fb75cc..0b1878857fc 100644
--- a/fs/xfs/linux-2.6/xfs_xattr.c
+++ b/fs/xfs/linux-2.6/xfs_xattr.c
@@ -30,10 +30,10 @@
static int
-__xfs_xattr_get(struct inode *inode, const char *name,
+xfs_xattr_get(struct dentry *dentry, const char *name,
void *value, size_t size, int xflags)
{
- struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_inode *ip = XFS_I(dentry->d_inode);
int error, asize = size;
if (strcmp(name, "") == 0)
@@ -52,10 +52,10 @@ __xfs_xattr_get(struct inode *inode, const char *name,
}
static int
-__xfs_xattr_set(struct inode *inode, const char *name, const void *value,
+xfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
size_t size, int flags, int xflags)
{
- struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_inode *ip = XFS_I(dentry->d_inode);
if (strcmp(name, "") == 0)
return -EINVAL;
@@ -71,75 +71,34 @@ __xfs_xattr_set(struct inode *inode, const char *name, const void *value,
return -xfs_attr_set(ip, name, (void *)value, size, xflags);
}
-static int
-xfs_xattr_user_get(struct inode *inode, const char *name,
- void *value, size_t size)
-{
- return __xfs_xattr_get(inode, name, value, size, 0);
-}
-
-static int
-xfs_xattr_user_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- return __xfs_xattr_set(inode, name, value, size, flags, 0);
-}
-
static struct xattr_handler xfs_xattr_user_handler = {
.prefix = XATTR_USER_PREFIX,
- .get = xfs_xattr_user_get,
- .set = xfs_xattr_user_set,
+ .flags = 0, /* no flags implies user namespace */
+ .get = xfs_xattr_get,
+ .set = xfs_xattr_set,
};
-
-static int
-xfs_xattr_trusted_get(struct inode *inode, const char *name,
- void *value, size_t size)
-{
- return __xfs_xattr_get(inode, name, value, size, ATTR_ROOT);
-}
-
-static int
-xfs_xattr_trusted_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- return __xfs_xattr_set(inode, name, value, size, flags, ATTR_ROOT);
-}
-
static struct xattr_handler xfs_xattr_trusted_handler = {
.prefix = XATTR_TRUSTED_PREFIX,
- .get = xfs_xattr_trusted_get,
- .set = xfs_xattr_trusted_set,
+ .flags = ATTR_ROOT,
+ .get = xfs_xattr_get,
+ .set = xfs_xattr_set,
};
-
-static int
-xfs_xattr_secure_get(struct inode *inode, const char *name,
- void *value, size_t size)
-{
- return __xfs_xattr_get(inode, name, value, size, ATTR_SECURE);
-}
-
-static int
-xfs_xattr_secure_set(struct inode *inode, const char *name,
- const void *value, size_t size, int flags)
-{
- return __xfs_xattr_set(inode, name, value, size, flags, ATTR_SECURE);
-}
-
static struct xattr_handler xfs_xattr_security_handler = {
.prefix = XATTR_SECURITY_PREFIX,
- .get = xfs_xattr_secure_get,
- .set = xfs_xattr_secure_set,
+ .flags = ATTR_SECURE,
+ .get = xfs_xattr_get,
+ .set = xfs_xattr_set,
};
-
struct xattr_handler *xfs_xattr_handlers[] = {
&xfs_xattr_user_handler,
&xfs_xattr_trusted_handler,
&xfs_xattr_security_handler,
#ifdef CONFIG_XFS_POSIX_ACL
- &xfs_xattr_system_handler,
+ &xfs_xattr_acl_access_handler,
+ &xfs_xattr_acl_default_handler,
#endif
NULL
};
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 71af76fe8a2..873e07e2907 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -891,7 +891,7 @@ xfs_qm_dqrele_all_inodes(
uint flags)
{
ASSERT(mp->m_quotainfo);
- xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG);
+ xfs_inode_ag_iterator(mp, xfs_dqrele_inode, flags, XFS_ICI_NO_TAG, 0);
}
/*------------------------------------------------------------------------*/
diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h
index 947b150df8e..00fd357c3e4 100644
--- a/fs/xfs/xfs_acl.h
+++ b/fs/xfs/xfs_acl.h
@@ -49,7 +49,8 @@ extern int xfs_acl_chmod(struct inode *inode);
extern int posix_acl_access_exists(struct inode *inode);
extern int posix_acl_default_exists(struct inode *inode);
-extern struct xattr_handler xfs_xattr_system_handler;
+extern struct xattr_handler xfs_xattr_acl_access_handler;
+extern struct xattr_handler xfs_xattr_acl_default_handler;
#else
# define xfs_check_acl NULL
# define xfs_get_acl(inode, type) NULL
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c
index a1c65fc6d9c..275b1f4f943 100644
--- a/fs/xfs/xfs_alloc.c
+++ b/fs/xfs/xfs_alloc.c
@@ -2563,43 +2563,41 @@ xfs_alloc_search_busy(xfs_trans_t *tp,
xfs_mount_t *mp;
xfs_perag_busy_t *bsy;
xfs_agblock_t uend, bend;
- xfs_lsn_t lsn;
+ xfs_lsn_t lsn = 0;
int cnt;
mp = tp->t_mountp;
spin_lock(&mp->m_perag[agno].pagb_lock);
- cnt = mp->m_perag[agno].pagb_count;
uend = bno + len - 1;
- /* search pagb_list for this slot, skipping open slots */
- for (bsy = mp->m_perag[agno].pagb_list; cnt; bsy++) {
+ /*
+ * search pagb_list for this slot, skipping open slots. We have to
+ * search the entire array as there may be multiple overlaps and
+ * we have to get the most recent LSN for the log force to push out
+ * all the transactions that span the range.
+ */
+ for (cnt = 0; cnt < mp->m_perag[agno].pagb_count; cnt++) {
+ bsy = &mp->m_perag[agno].pagb_list[cnt];
+ if (!bsy->busy_tp)
+ continue;
- /*
- * (start1,length1) within (start2, length2)
- */
- if (bsy->busy_tp != NULL) {
- bend = bsy->busy_start + bsy->busy_length - 1;
- if ((bno > bend) || (uend < bsy->busy_start)) {
- cnt--;
- } else {
- break;
- }
- }
- }
+ bend = bsy->busy_start + bsy->busy_length - 1;
+ if (bno > bend || uend < bsy->busy_start)
+ continue;
- trace_xfs_alloc_busysearch(mp, agno, bno, len, !!cnt);
+ /* (start1,length1) within (start2, length2) */
+ if (XFS_LSN_CMP(bsy->busy_tp->t_commit_lsn, lsn) > 0)
+ lsn = bsy->busy_tp->t_commit_lsn;
+ }
+ spin_unlock(&mp->m_perag[agno].pagb_lock);
+ trace_xfs_alloc_busysearch(tp->t_mountp, agno, bno, len, lsn);
/*
* If a block was found, force the log through the LSN of the
* transaction that freed the block
*/
- if (cnt) {
- lsn = bsy->busy_tp->t_commit_lsn;
- spin_unlock(&mp->m_perag[agno].pagb_lock);
+ if (lsn)
xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC);
- } else {
- spin_unlock(&mp->m_perag[agno].pagb_lock);
- }
}
diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h
index 5549d495947..cf07ca7c22e 100644
--- a/fs/xfs/xfs_bmap_btree.h
+++ b/fs/xfs/xfs_bmap_btree.h
@@ -46,20 +46,12 @@ typedef struct xfs_bmdr_block {
#define BMBT_STARTBLOCK_BITLEN 52
#define BMBT_BLOCKCOUNT_BITLEN 21
-
-#define BMBT_USE_64 1
-
-typedef struct xfs_bmbt_rec_32
-{
- __uint32_t l0, l1, l2, l3;
-} xfs_bmbt_rec_32_t;
-typedef struct xfs_bmbt_rec_64
-{
+typedef struct xfs_bmbt_rec {
__be64 l0, l1;
-} xfs_bmbt_rec_64_t;
+} xfs_bmbt_rec_t;
typedef __uint64_t xfs_bmbt_rec_base_t; /* use this for casts */
-typedef xfs_bmbt_rec_64_t xfs_bmbt_rec_t, xfs_bmdr_rec_t;
+typedef xfs_bmbt_rec_t xfs_bmdr_rec_t;
typedef struct xfs_bmbt_rec_host {
__uint64_t l0, l1;
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d1483a4f71b..84ca1cf16a1 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -114,10 +114,82 @@ xfs_swapext(
return error;
}
+/*
+ * We need to check that the format of the data fork in the temporary inode is
+ * valid for the target inode before doing the swap. This is not a problem with
+ * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
+ * data fork depending on the space the attribute fork is taking so we can get
+ * invalid formats on the target inode.
+ *
+ * E.g. target has space for 7 extents in extent format, temp inode only has
+ * space for 6. If we defragment down to 7 extents, then the tmp format is a
+ * btree, but when swapped it needs to be in extent format. Hence we can't just
+ * blindly swap data forks on attr2 filesystems.
+ *
+ * Note that we check the swap in both directions so that we don't end up with
+ * a corrupt temporary inode, either.
+ *
+ * Note that fixing the way xfs_fsr sets up the attribute fork in the source
+ * inode will prevent this situation from occurring, so all we do here is
+ * reject and log the attempt. basically we are putting the responsibility on
+ * userspace to get this right.
+ */
+static int
+xfs_swap_extents_check_format(
+ xfs_inode_t *ip, /* target inode */
+ xfs_inode_t *tip) /* tmp inode */
+{
+
+ /* Should never get a local format */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
+ tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
+ return EINVAL;
+
+ /*
+ * if the target inode has less extents that then temporary inode then
+ * why did userspace call us?
+ */
+ if (ip->i_d.di_nextents < tip->i_d.di_nextents)
+ return EINVAL;
+
+ /*
+ * if the target inode is in extent form and the temp inode is in btree
+ * form then we will end up with the target inode in the wrong format
+ * as we already know there are less extents in the temp inode.
+ */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
+ return EINVAL;
+
+ /* Check temp in extent form to max in target */
+ if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) > ip->i_df.if_ext_max)
+ return EINVAL;
+
+ /* Check target in extent form to max in temp */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) > tip->i_df.if_ext_max)
+ return EINVAL;
+
+ /* Check root block of temp in btree form to max in target */
+ if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+ XFS_IFORK_BOFF(ip) &&
+ tip->i_df.if_broot_bytes > XFS_IFORK_BOFF(ip))
+ return EINVAL;
+
+ /* Check root block of target in btree form to max in temp */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE &&
+ XFS_IFORK_BOFF(tip) &&
+ ip->i_df.if_broot_bytes > XFS_IFORK_BOFF(tip))
+ return EINVAL;
+
+ return 0;
+}
+
int
xfs_swap_extents(
- xfs_inode_t *ip,
- xfs_inode_t *tip,
+ xfs_inode_t *ip, /* target inode */
+ xfs_inode_t *tip, /* tmp inode */
xfs_swapext_t *sxp)
{
xfs_mount_t *mp;
@@ -161,13 +233,6 @@ xfs_swap_extents(
goto out_unlock;
}
- /* Should never get a local format */
- if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
- tip->i_d.di_format == XFS_DINODE_FMT_LOCAL) {
- error = XFS_ERROR(EINVAL);
- goto out_unlock;
- }
-
if (VN_CACHED(VFS_I(tip)) != 0) {
error = xfs_flushinval_pages(tip, 0, -1,
FI_REMAPF_LOCKED);
@@ -189,13 +254,12 @@ xfs_swap_extents(
goto out_unlock;
}
- /*
- * If the target has extended attributes, the tmp file
- * must also in order to ensure the correct data fork
- * format.
- */
- if ( XFS_IFORK_Q(ip) != XFS_IFORK_Q(tip) ) {
- error = XFS_ERROR(EINVAL);
+ /* check inode formats now that data is flushed */
+ error = xfs_swap_extents_check_format(ip, tip);
+ if (error) {
+ xfs_fs_cmn_err(CE_NOTE, mp,
+ "%s: inode 0x%llx format is incompatible for exchanging.",
+ __FILE__, ip->i_ino);
goto out_unlock;
}
@@ -276,6 +340,16 @@ xfs_swap_extents(
*tifp = *tempifp; /* struct copy */
/*
+ * Fix the in-memory data fork values that are dependent on the fork
+ * offset in the inode. We can't assume they remain the same as attr2
+ * has dynamic fork offsets.
+ */
+ ifp->if_ext_max = XFS_IFORK_SIZE(ip, XFS_DATA_FORK) /
+ (uint)sizeof(xfs_bmbt_rec_t);
+ tifp->if_ext_max = XFS_IFORK_SIZE(tip, XFS_DATA_FORK) /
+ (uint)sizeof(xfs_bmbt_rec_t);
+
+ /*
* Fix the on-disk inode values
*/
tmp = (__uint64_t)ip->i_d.di_nblocks;
diff --git a/fs/xfs/xfs_iget.c b/fs/xfs/xfs_iget.c
index f5c904a10c1..155e798f30a 100644
--- a/fs/xfs/xfs_iget.c
+++ b/fs/xfs/xfs_iget.c
@@ -73,7 +73,6 @@ xfs_inode_alloc(
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
ASSERT(completion_done(&ip->i_flush));
- ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
mrlock_init(&ip->i_iolock, MRLOCK_BARRIER, "xfsio", ip->i_ino);
@@ -91,7 +90,7 @@ xfs_inode_alloc(
ip->i_new_size = 0;
/* prevent anyone from using this yet */
- VFS_I(ip)->i_state = I_NEW|I_LOCK;
+ VFS_I(ip)->i_state = I_NEW;
return ip;
}
@@ -217,7 +216,7 @@ xfs_iget_cache_hit(
trace_xfs_iget_reclaim(ip);
goto out_error;
}
- inode->i_state = I_LOCK|I_NEW;
+ inode->i_state = I_NEW;
} else {
/* If the VFS inode is being torn down, pause and try again. */
if (!igrab(inode)) {
@@ -478,17 +477,21 @@ xfs_ireclaim(
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_perag *pag;
+ xfs_agino_t agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
XFS_STATS_INC(xs_ig_reclaims);
/*
- * Remove the inode from the per-AG radix tree. It doesn't matter
- * if it was never added to it because radix_tree_delete can deal
- * with that case just fine.
+ * Remove the inode from the per-AG radix tree.
+ *
+ * Because radix_tree_delete won't complain even if the item was never
+ * added to the tree assert that it's been there before to catch
+ * problems with the inode life time early on.
*/
pag = xfs_get_perag(mp, ip->i_ino);
write_lock(&pag->pag_ici_lock);
- radix_tree_delete(&pag->pag_ici_root, XFS_INO_TO_AGINO(mp, ip->i_ino));
+ if (!radix_tree_delete(&pag->pag_ici_root, agino))
+ ASSERT(0);
write_unlock(&pag->pag_ici_lock);
xfs_put_perag(mp, pag);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ce278b3ae7f..ef77fd88c8e 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2841,8 +2841,8 @@ xfs_iflush(
mp = ip->i_mount;
/*
- * If the inode isn't dirty, then just release the inode
- * flush lock and do nothing.
+ * If the inode isn't dirty, then just release the inode flush lock and
+ * do nothing.
*/
if (xfs_inode_clean(ip)) {
xfs_ifunlock(ip);
@@ -2868,6 +2868,19 @@ xfs_iflush(
xfs_iunpin_wait(ip);
/*
+ * For stale inodes we cannot rely on the backing buffer remaining
+ * stale in cache for the remaining life of the stale inode and so
+ * xfs_itobp() below may give us a buffer that no longer contains
+ * inodes below. We have to check this after ensuring the inode is
+ * unpinned so that it is safe to reclaim the stale inode after the
+ * flush call.
+ */
+ if (xfs_iflags_test(ip, XFS_ISTALE)) {
+ xfs_ifunlock(ip);
+ return 0;
+ }
+
+ /*
* This may have been unpinned because the filesystem is shutting
* down forcibly. If that's the case we must not write this inode
* to disk, because the log record didn't make it to disk!
diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h
index 65bae4c9b8b..cc8df1ac778 100644
--- a/fs/xfs/xfs_inode_item.h
+++ b/fs/xfs/xfs_inode_item.h
@@ -127,7 +127,7 @@ static inline int xfs_ilog_fdata(int w)
#ifdef __KERNEL__
struct xfs_buf;
-struct xfs_bmbt_rec_64;
+struct xfs_bmbt_rec;
struct xfs_inode;
struct xfs_mount;
@@ -140,9 +140,9 @@ typedef struct xfs_inode_log_item {
unsigned short ili_flags; /* misc flags */
unsigned short ili_logged; /* flushed logged data */
unsigned int ili_last_fields; /* fields when flushed */
- struct xfs_bmbt_rec_64 *ili_extents_buf; /* array of logged
+ struct xfs_bmbt_rec *ili_extents_buf; /* array of logged
data exts */
- struct xfs_bmbt_rec_64 *ili_aextents_buf; /* array of logged
+ struct xfs_bmbt_rec *ili_aextents_buf; /* array of logged
attr exts */
unsigned int ili_pushbuf_flag; /* one bit used in push_ail */
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 4cb1792040e..600b5b06aae 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1441,6 +1441,7 @@ xlog_sync(xlog_t *log,
XFS_BUF_ZEROFLAGS(bp);
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
+ bp->b_flags |= XBF_LOG_BUFFER;
/*
* Do an ordered write for the log block.
* Its unnecessary to flush the first split block in the log wrap case.
@@ -1478,6 +1479,7 @@ xlog_sync(xlog_t *log,
XFS_BUF_ZEROFLAGS(bp);
XFS_BUF_BUSY(bp);
XFS_BUF_ASYNC(bp);
+ bp->b_flags |= XBF_LOG_BUFFER;
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
XFS_BUF_ORDERED(bp);
dptr = XFS_BUF_PTR(bp);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 9e15a118536..6be05f756d5 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1517,6 +1517,8 @@ xfs_rtfree_range(
*/
error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1,
&postblock);
+ if (error)
+ return error;
/*
* If there are blocks not being freed at the front of the
* old extent, add summary data for them to be allocated.
diff --git a/fs/xfs/xfs_vnodeops.c b/fs/xfs/xfs_vnodeops.c
index 6558ffd8d14..6f268756bf3 100644
--- a/fs/xfs/xfs_vnodeops.c
+++ b/fs/xfs/xfs_vnodeops.c
@@ -70,7 +70,6 @@ xfs_setattr(
uint commit_flags=0;
uid_t uid=0, iuid=0;
gid_t gid=0, igid=0;
- int timeflags = 0;
struct xfs_dquot *udqp, *gdqp, *olddquot1, *olddquot2;
int need_iolock = 1;
@@ -135,16 +134,13 @@ xfs_setattr(
if (flags & XFS_ATTR_NOLOCK)
need_iolock = 0;
if (!(mask & ATTR_SIZE)) {
- if ((mask != (ATTR_CTIME|ATTR_ATIME|ATTR_MTIME)) ||
- (mp->m_flags & XFS_MOUNT_WSYNC)) {
- tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
- commit_flags = 0;
- if ((code = xfs_trans_reserve(tp, 0,
- XFS_ICHANGE_LOG_RES(mp), 0,
- 0, 0))) {
- lock_flags = 0;
- goto error_return;
- }
+ tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
+ commit_flags = 0;
+ code = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp),
+ 0, 0, 0);
+ if (code) {
+ lock_flags = 0;
+ goto error_return;
}
} else {
if (DM_EVENT_ENABLED(ip, DM_EVENT_TRUNCATE) &&
@@ -295,15 +291,23 @@ xfs_setattr(
* or we are explicitly asked to change it. This handles
* the semantic difference between truncate() and ftruncate()
* as implemented in the VFS.
+ *
+ * The regular truncate() case without ATTR_CTIME and ATTR_MTIME
+ * is a special case where we need to update the times despite
+ * not having these flags set. For all other operations the
+ * VFS set these flags explicitly if it wants a timestamp
+ * update.
*/
- if (iattr->ia_size != ip->i_size || (mask & ATTR_CTIME))
- timeflags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
+ if (iattr->ia_size != ip->i_size &&
+ (!(mask & (ATTR_CTIME | ATTR_MTIME)))) {
+ iattr->ia_ctime = iattr->ia_mtime =
+ current_fs_time(inode->i_sb);
+ mask |= ATTR_CTIME | ATTR_MTIME;
+ }
if (iattr->ia_size > ip->i_size) {
ip->i_d.di_size = iattr->ia_size;
ip->i_size = iattr->ia_size;
- if (!(flags & XFS_ATTR_DMI))
- xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
} else if (iattr->ia_size <= ip->i_size ||
(iattr->ia_size == 0 && ip->i_d.di_nextents)) {
@@ -374,9 +378,6 @@ xfs_setattr(
ip->i_d.di_gid = gid;
inode->i_gid = gid;
}
-
- xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
- timeflags |= XFS_ICHGTIME_CHG;
}
/*
@@ -393,51 +394,37 @@ xfs_setattr(
inode->i_mode &= S_IFMT;
inode->i_mode |= mode & ~S_IFMT;
-
- xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- timeflags |= XFS_ICHGTIME_CHG;
}
/*
* Change file access or modified times.
*/
- if (mask & (ATTR_ATIME|ATTR_MTIME)) {
- if (mask & ATTR_ATIME) {
- inode->i_atime = iattr->ia_atime;
- ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
- ip->i_update_core = 1;
- }
- if (mask & ATTR_MTIME) {
- inode->i_mtime = iattr->ia_mtime;
- ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- timeflags &= ~XFS_ICHGTIME_MOD;
- timeflags |= XFS_ICHGTIME_CHG;
- }
- if (tp && (mask & (ATTR_MTIME_SET|ATTR_ATIME_SET)))
- xfs_trans_log_inode (tp, ip, XFS_ILOG_CORE);
+ if (mask & ATTR_ATIME) {
+ inode->i_atime = iattr->ia_atime;
+ ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
+ ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
+ ip->i_update_core = 1;
}
-
- /*
- * Change file inode change time only if ATTR_CTIME set
- * AND we have been called by a DMI function.
- */
-
- if ((flags & XFS_ATTR_DMI) && (mask & ATTR_CTIME)) {
+ if (mask & ATTR_CTIME) {
inode->i_ctime = iattr->ia_ctime;
ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
ip->i_update_core = 1;
- timeflags &= ~XFS_ICHGTIME_CHG;
+ }
+ if (mask & ATTR_MTIME) {
+ inode->i_mtime = iattr->ia_mtime;
+ ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
+ ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
+ ip->i_update_core = 1;
}
/*
- * Send out timestamp changes that need to be set to the
- * current time. Not done when called by a DMI function.
+ * And finally, log the inode core if any attribute in it
+ * has been changed.
*/
- if (timeflags && !(flags & XFS_ATTR_DMI))
- xfs_ichgtime(ip, timeflags);
+ if (mask & (ATTR_UID|ATTR_GID|ATTR_MODE|
+ ATTR_ATIME|ATTR_CTIME|ATTR_MTIME))
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
XFS_STATS_INC(xs_ig_attrchg);
@@ -452,12 +439,10 @@ xfs_setattr(
* mix so this probably isn't worth the trouble to optimize.
*/
code = 0;
- if (tp) {
- if (mp->m_flags & XFS_MOUNT_WSYNC)
- xfs_trans_set_sync(tp);
+ if (mp->m_flags & XFS_MOUNT_WSYNC)
+ xfs_trans_set_sync(tp);
- code = xfs_trans_commit(tp, commit_flags);
- }
+ code = xfs_trans_commit(tp, commit_flags);
xfs_iunlock(ip, lock_flags);