diff options
Diffstat (limited to 'fs')
144 files changed, 2155 insertions, 2937 deletions
diff --git a/fs/9p/acl.c b/fs/9p/acl.c index e98f56d3787..e9cb57f0754 100644 --- a/fs/9p/acl.c +++ b/fs/9p/acl.c @@ -96,14 +96,10 @@ static struct posix_acl *v9fs_get_cached_acl(struct inode *inode, int type) return acl; } -int v9fs_check_acl(struct inode *inode, int mask) +struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type) { - struct posix_acl *acl; struct v9fs_session_info *v9ses; - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - v9ses = v9fs_inode2v9ses(inode); if (((v9ses->flags & V9FS_ACCESS_MASK) != V9FS_ACCESS_CLIENT) || ((v9ses->flags & V9FS_ACL_MASK) != V9FS_POSIX_ACL)) { @@ -111,18 +107,10 @@ int v9fs_check_acl(struct inode *inode, int mask) * On access = client and acl = on mode get the acl * values from the server */ - return 0; + return NULL; } - acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS); + return v9fs_get_cached_acl(inode, type); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - return -EAGAIN; } static int v9fs_set_acl(struct dentry *dentry, int type, struct posix_acl *acl) @@ -165,32 +153,32 @@ err_free_out: int v9fs_acl_chmod(struct dentry *dentry) { int retval = 0; - struct posix_acl *acl, *clone; + struct posix_acl *acl; struct inode *inode = dentry->d_inode; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; acl = v9fs_get_cached_acl(inode, ACL_TYPE_ACCESS); if (acl) { - clone = posix_acl_clone(acl, GFP_KERNEL); + retval = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (retval) + return retval; + retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, acl); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - retval = posix_acl_chmod_masq(clone, inode->i_mode); - if (!retval) - retval = v9fs_set_acl(dentry, ACL_TYPE_ACCESS, clone); - posix_acl_release(clone); } return retval; } int v9fs_set_create_acl(struct dentry *dentry, - struct posix_acl *dpacl, struct posix_acl *pacl) + struct posix_acl **dpacl, struct posix_acl **pacl) { - v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, dpacl); - v9fs_set_acl(dentry, ACL_TYPE_ACCESS, pacl); - posix_acl_release(dpacl); - posix_acl_release(pacl); + if (dentry) { + v9fs_set_acl(dentry, ACL_TYPE_DEFAULT, *dpacl); + v9fs_set_acl(dentry, ACL_TYPE_ACCESS, *pacl); + } + posix_acl_release(*dpacl); + posix_acl_release(*pacl); + *dpacl = *pacl = NULL; return 0; } @@ -209,29 +197,18 @@ int v9fs_acl_mode(struct inode *dir, mode_t *modep, mode &= ~current_umask(); } if (acl) { - struct posix_acl *clone; - if (S_ISDIR(mode)) - *dpacl = acl; - clone = posix_acl_clone(acl, GFP_NOFS); - retval = -ENOMEM; - if (!clone) - goto cleanup; - - retval = posix_acl_create_masq(clone, &mode); - if (retval < 0) { - posix_acl_release(clone); - goto cleanup; - } + *dpacl = posix_acl_dup(acl); + retval = posix_acl_create(&acl, GFP_NOFS, &mode); + if (retval < 0) + return retval; if (retval > 0) - *pacl = clone; + *pacl = acl; + else + posix_acl_release(acl); } *modep = mode; return 0; -cleanup: - posix_acl_release(acl); - return retval; - } static int v9fs_remote_get_acl(struct dentry *dentry, const char *name, diff --git a/fs/9p/acl.h b/fs/9p/acl.h index 59e18c2e8c7..ddb7ae19d97 100644 --- a/fs/9p/acl.h +++ b/fs/9p/acl.h @@ -16,14 +16,14 @@ #ifdef CONFIG_9P_FS_POSIX_ACL extern int v9fs_get_acl(struct inode *, struct p9_fid *); -extern int v9fs_check_acl(struct inode *inode, int mask); +extern struct posix_acl *v9fs_iop_get_acl(struct inode *inode, int type); extern int v9fs_acl_chmod(struct dentry *); extern int v9fs_set_create_acl(struct dentry *, - struct posix_acl *, struct posix_acl *); + struct posix_acl **, struct posix_acl **); extern int v9fs_acl_mode(struct inode *dir, mode_t *modep, struct posix_acl **dpacl, struct posix_acl **pacl); #else -#define v9fs_check_acl NULL +#define v9fs_iop_get_acl NULL static inline int v9fs_get_acl(struct inode *inode, struct p9_fid *fid) { return 0; @@ -33,8 +33,8 @@ static inline int v9fs_acl_chmod(struct dentry *dentry) return 0; } static inline int v9fs_set_create_acl(struct dentry *dentry, - struct posix_acl *dpacl, - struct posix_acl *pacl) + struct posix_acl **dpacl, + struct posix_acl **pacl) { return 0; } diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 276f4a69ecd..9a26dce5a99 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -287,7 +287,7 @@ v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, int omode, goto error; /* Now set the ACL based on the default value */ - v9fs_set_create_acl(dentry, dacl, pacl); + v9fs_set_create_acl(dentry, &dacl, &pacl); v9inode = V9FS_I(inode); mutex_lock(&v9inode->v_mutex); @@ -328,6 +328,7 @@ error: err_clunk_old_fid: if (ofid) p9_client_clunk(ofid); + v9fs_set_create_acl(NULL, &dacl, &pacl); return err; } @@ -421,12 +422,13 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir, d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ - v9fs_set_create_acl(dentry, dacl, pacl); + v9fs_set_create_acl(dentry, &dacl, &pacl); inc_nlink(dir); v9fs_invalidate_inode_attr(dir); error: if (fid) p9_client_clunk(fid); + v9fs_set_create_acl(NULL, &dacl, &pacl); return err; } @@ -826,10 +828,11 @@ v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, int omode, d_instantiate(dentry, inode); } /* Now set the ACL based on the default value */ - v9fs_set_create_acl(dentry, dacl, pacl); + v9fs_set_create_acl(dentry, &dacl, &pacl); error: if (fid) p9_client_clunk(fid); + v9fs_set_create_acl(NULL, &dacl, &pacl); return err; } @@ -914,7 +917,7 @@ const struct inode_operations v9fs_dir_inode_operations_dotl = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = v9fs_listxattr, - .check_acl = v9fs_check_acl, + .get_acl = v9fs_iop_get_acl, }; const struct inode_operations v9fs_file_inode_operations_dotl = { @@ -924,7 +927,7 @@ const struct inode_operations v9fs_file_inode_operations_dotl = { .getxattr = generic_getxattr, .removexattr = generic_removexattr, .listxattr = v9fs_listxattr, - .check_acl = v9fs_check_acl, + .get_acl = v9fs_iop_get_acl, }; const struct inode_operations v9fs_symlink_inode_operations_dotl = { diff --git a/fs/Makefile b/fs/Makefile index fb68c2b8cf8..afc109691a9 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -29,7 +29,6 @@ obj-$(CONFIG_EVENTFD) += eventfd.o obj-$(CONFIG_AIO) += aio.o obj-$(CONFIG_FILE_LOCKING) += locks.o obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o -obj-$(CONFIG_NFSD_DEPRECATED) += nfsctl.o obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o obj-$(CONFIG_BINFMT_EM86) += binfmt_em86.o obj-$(CONFIG_BINFMT_MISC) += binfmt_misc.o diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index c5567cb7843..4d433d34736 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -233,7 +233,7 @@ static int __init anon_inode_init(void) return 0; err_mntput: - mntput(anon_inode_mnt); + kern_unmount(anon_inode_mnt); err_unregister_filesystem: unregister_filesystem(&anon_inode_fs_type); err_exit: diff --git a/fs/block_dev.c b/fs/block_dev.c index 9fb0b15331d..f55aad4d161 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -44,24 +44,28 @@ inline struct block_device *I_BDEV(struct inode *inode) { return &BDEV_I(inode)->bdev; } - EXPORT_SYMBOL(I_BDEV); /* - * move the inode from it's current bdi to the a new bdi. if the inode is dirty - * we need to move it onto the dirty list of @dst so that the inode is always - * on the right list. + * Move the inode from its current bdi to a new bdi. If the inode is dirty we + * need to move it onto the dirty list of @dst so that the inode is always on + * the right list. */ static void bdev_inode_switch_bdi(struct inode *inode, struct backing_dev_info *dst) { - spin_lock(&inode_wb_list_lock); + struct backing_dev_info *old = inode->i_data.backing_dev_info; + + if (unlikely(dst == old)) /* deadlock avoidance */ + return; + bdi_lock_two(&old->wb, &dst->wb); spin_lock(&inode->i_lock); inode->i_data.backing_dev_info = dst; if (inode->i_state & I_DIRTY) list_move(&inode->i_wb_list, &dst->wb.b_dirty); spin_unlock(&inode->i_lock); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&old->wb.list_lock); + spin_unlock(&dst->wb.list_lock); } static sector_t max_block(struct block_device *bdev) @@ -1448,6 +1452,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) int blkdev_put(struct block_device *bdev, fmode_t mode) { + mutex_lock(&bdev->bd_mutex); + if (mode & FMODE_EXCL) { bool bdev_free; @@ -1456,7 +1462,6 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) * are protected with bdev_lock. bd_mutex is to * synchronize disk_holder unlinking. */ - mutex_lock(&bdev->bd_mutex); spin_lock(&bdev_lock); WARN_ON_ONCE(--bdev->bd_holders < 0); @@ -1474,17 +1479,21 @@ int blkdev_put(struct block_device *bdev, fmode_t mode) * If this was the last claim, remove holder link and * unblock evpoll if it was a write holder. */ - if (bdev_free) { - if (bdev->bd_write_holder) { - disk_unblock_events(bdev->bd_disk); - disk_check_events(bdev->bd_disk); - bdev->bd_write_holder = false; - } + if (bdev_free && bdev->bd_write_holder) { + disk_unblock_events(bdev->bd_disk); + bdev->bd_write_holder = false; } - - mutex_unlock(&bdev->bd_mutex); } + /* + * Trigger event checking and tell drivers to flush MEDIA_CHANGE + * event. This is to ensure detection of media removal commanded + * from userland - e.g. eject(1). + */ + disk_flush_events(bdev->bd_disk, DISK_EVENT_MEDIA_CHANGE); + + mutex_unlock(&bdev->bd_mutex); + return __blkdev_put(bdev, mode, 0); } EXPORT_SYMBOL(blkdev_put); diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c index 9f62ab2a728..65a735d8f6e 100644 --- a/fs/btrfs/acl.c +++ b/fs/btrfs/acl.c @@ -30,7 +30,7 @@ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -static struct posix_acl *btrfs_get_acl(struct inode *inode, int type) +struct posix_acl *btrfs_get_acl(struct inode *inode, int type) { int size; const char *name; @@ -195,27 +195,6 @@ out: return ret; } -int btrfs_check_acl(struct inode *inode, int mask) -{ - int error = -EAGAIN; - - if (mask & MAY_NOT_BLOCK) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - error = -ECHILD; - } else { - struct posix_acl *acl; - acl = btrfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - } - } - - return error; -} - /* * btrfs_init_acl is already generally called under fs_mutex, so the locking * stuff has been fixed to work with that. If the locking stuff changes, we @@ -243,8 +222,7 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, } if (IS_POSIXACL(dir) && acl) { - struct posix_acl *clone; - mode_t mode; + mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { ret = btrfs_set_acl(trans, inode, acl, @@ -252,22 +230,15 @@ int btrfs_init_acl(struct btrfs_trans_handle *trans, if (ret) goto failed; } - clone = posix_acl_clone(acl, GFP_NOFS); - ret = -ENOMEM; - if (!clone) - goto failed; + ret = posix_acl_create(&acl, GFP_NOFS, &mode); + if (ret < 0) + return ret; - mode = inode->i_mode; - ret = posix_acl_create_masq(clone, &mode); - if (ret >= 0) { - inode->i_mode = mode; - if (ret > 0) { - /* we need an acl */ - ret = btrfs_set_acl(trans, inode, clone, - ACL_TYPE_ACCESS); - } + inode->i_mode = mode; + if (ret > 0) { + /* we need an acl */ + ret = btrfs_set_acl(trans, inode, acl, ACL_TYPE_ACCESS); } - posix_acl_release(clone); } failed: posix_acl_release(acl); @@ -277,7 +248,7 @@ failed: int btrfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int ret = 0; if (S_ISLNK(inode->i_mode)) @@ -290,17 +261,11 @@ int btrfs_acl_chmod(struct inode *inode) if (IS_ERR_OR_NULL(acl)) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); + ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (ret) + return ret; + ret = btrfs_set_acl(NULL, inode, acl, ACL_TYPE_ACCESS); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - - ret = posix_acl_chmod_masq(clone, inode->i_mode); - if (!ret) - ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS); - - posix_acl_release(clone); - return ret; } diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 82be74efbb2..fe9287b0649 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2645,9 +2645,9 @@ do { \ /* acl.c */ #ifdef CONFIG_BTRFS_FS_POSIX_ACL -int btrfs_check_acl(struct inode *inode, int mask); +struct posix_acl *btrfs_get_acl(struct inode *inode, int type); #else -#define btrfs_check_acl NULL +#define btrfs_get_acl NULL #endif int btrfs_init_acl(struct btrfs_trans_handle *trans, struct inode *inode, struct inode *dir); diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h index 8d27af4bd8b..7083d08b2a2 100644 --- a/fs/btrfs/delayed-inode.h +++ b/fs/btrfs/delayed-inode.h @@ -25,7 +25,7 @@ #include <linux/mutex.h> #include <linux/list.h> #include <linux/wait.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "ctree.h" diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 7055d11c1ef..561262d3568 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2551,7 +2551,6 @@ int extent_write_full_page(struct extent_io_tree *tree, struct page *page, }; struct writeback_control wbc_writepages = { .sync_mode = wbc->sync_mode, - .older_than_this = NULL, .nr_to_write = 64, .range_start = page_offset(page) + PAGE_CACHE_SIZE, .range_end = (loff_t)-1, @@ -2584,7 +2583,6 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode, }; struct writeback_control wbc_writepages = { .sync_mode = mode, - .older_than_this = NULL, .nr_to_write = nr_pages * 2, .range_start = start, .range_end = end + 1, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 2548a04a023..e91b097e725 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7351,12 +7351,12 @@ static const struct inode_operations btrfs_dir_inode_operations = { .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, .permission = btrfs_permission, - .check_acl = btrfs_check_acl, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_dir_ro_inode_operations = { .lookup = btrfs_lookup, .permission = btrfs_permission, - .check_acl = btrfs_check_acl, + .get_acl = btrfs_get_acl, }; static const struct file_operations btrfs_dir_file_operations = { @@ -7425,7 +7425,7 @@ static const struct inode_operations btrfs_file_inode_operations = { .removexattr = btrfs_removexattr, .permission = btrfs_permission, .fiemap = btrfs_fiemap, - .check_acl = btrfs_check_acl, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_special_inode_operations = { .getattr = btrfs_getattr, @@ -7435,7 +7435,7 @@ static const struct inode_operations btrfs_special_inode_operations = { .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, - .check_acl = btrfs_check_acl, + .get_acl = btrfs_get_acl, }; static const struct inode_operations btrfs_symlink_inode_operations = { .readlink = generic_readlink, @@ -7447,7 +7447,7 @@ static const struct inode_operations btrfs_symlink_inode_operations = { .getxattr = btrfs_getxattr, .listxattr = btrfs_listxattr, .removexattr = btrfs_removexattr, - .check_acl = btrfs_check_acl, + .get_acl = btrfs_get_acl, }; const struct dentry_operations btrfs_dentry_operations = { diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 0dba6915712..fb962efdace 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -102,7 +102,7 @@ static int mdsc_show(struct seq_file *s, void *p) path = NULL; spin_lock(&req->r_old_dentry->d_lock); seq_printf(s, " #%llx/%.*s (%s)", - ceph_ino(req->r_old_dentry->d_parent->d_inode), + ceph_ino(req->r_old_dentry_dir), req->r_old_dentry->d_name.len, req->r_old_dentry->d_name.name, path ? path : ""); diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 1065ac77984..382abc9a6a5 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -40,14 +40,6 @@ int ceph_init_dentry(struct dentry *dentry) if (dentry->d_fsdata) return 0; - if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ - ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) - d_set_d_op(dentry, &ceph_dentry_ops); - else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) - d_set_d_op(dentry, &ceph_snapdir_dentry_ops); - else - d_set_d_op(dentry, &ceph_snap_dentry_ops); - di = kmem_cache_alloc(ceph_dentry_cachep, GFP_NOFS | __GFP_ZERO); if (!di) return -ENOMEM; /* oh well */ @@ -58,16 +50,42 @@ int ceph_init_dentry(struct dentry *dentry) kmem_cache_free(ceph_dentry_cachep, di); goto out_unlock; } + + if (dentry->d_parent == NULL || /* nfs fh_to_dentry */ + ceph_snap(dentry->d_parent->d_inode) == CEPH_NOSNAP) + d_set_d_op(dentry, &ceph_dentry_ops); + else if (ceph_snap(dentry->d_parent->d_inode) == CEPH_SNAPDIR) + d_set_d_op(dentry, &ceph_snapdir_dentry_ops); + else + d_set_d_op(dentry, &ceph_snap_dentry_ops); + di->dentry = dentry; di->lease_session = NULL; - dentry->d_fsdata = di; dentry->d_time = jiffies; + /* avoid reordering d_fsdata setup so that the check above is safe */ + smp_mb(); + dentry->d_fsdata = di; ceph_dentry_lru_add(dentry); out_unlock: spin_unlock(&dentry->d_lock); return 0; } +struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry) +{ + struct inode *inode = NULL; + + if (!dentry) + return NULL; + + spin_lock(&dentry->d_lock); + if (dentry->d_parent) { + inode = dentry->d_parent->d_inode; + ihold(inode); + } + spin_unlock(&dentry->d_lock); + return inode; +} /* @@ -133,7 +151,7 @@ more: d_unhashed(dentry) ? "!hashed" : "hashed", parent->d_subdirs.prev, parent->d_subdirs.next); if (p == &parent->d_subdirs) { - fi->at_end = 1; + fi->flags |= CEPH_F_ATEND; goto out_unlock; } spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); @@ -234,7 +252,7 @@ static int ceph_readdir(struct file *filp, void *dirent, filldir_t filldir) const int max_bytes = fsc->mount_options->max_readdir_bytes; dout("readdir %p filp %p frag %u off %u\n", inode, filp, frag, off); - if (fi->at_end) + if (fi->flags & CEPH_F_ATEND) return 0; /* always start with . and .. */ @@ -403,7 +421,7 @@ more: dout("readdir next frag is %x\n", frag); goto more; } - fi->at_end = 1; + fi->flags |= CEPH_F_ATEND; /* * if dir_release_count still matches the dir, no dentries @@ -435,7 +453,7 @@ static void reset_readdir(struct ceph_file_info *fi) dput(fi->dentry); fi->dentry = NULL; } - fi->at_end = 0; + fi->flags &= ~CEPH_F_ATEND; } static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) @@ -463,7 +481,7 @@ static loff_t ceph_dir_llseek(struct file *file, loff_t offset, int origin) if (offset != file->f_pos) { file->f_pos = offset; file->f_version = 0; - fi->at_end = 0; + fi->flags &= ~CEPH_F_ATEND; } retval = offset; @@ -488,21 +506,13 @@ out: } /* - * Process result of a lookup/open request. - * - * Mainly, make sure we return the final req->r_dentry (if it already - * existed) in place of the original VFS-provided dentry when they - * differ. - * - * Gracefully handle the case where the MDS replies with -ENOENT and - * no trace (which it may do, at its discretion, e.g., if it doesn't - * care to issue a lease on the negative dentry). + * Handle lookups for the hidden .snap directory. */ -struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, - struct dentry *dentry, int err) +int ceph_handle_snapdir(struct ceph_mds_request *req, + struct dentry *dentry, int err) { struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); - struct inode *parent = dentry->d_parent->d_inode; + struct inode *parent = dentry->d_parent->d_inode; /* we hold i_mutex */ /* .snap dir? */ if (err == -ENOENT && @@ -516,7 +526,23 @@ struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, d_add(dentry, inode); err = 0; } + return err; +} +/* + * Figure out final result of a lookup/open request. + * + * Mainly, make sure we return the final req->r_dentry (if it already + * existed) in place of the original VFS-provided dentry when they + * differ. + * + * Gracefully handle the case where the MDS replies with -ENOENT and + * no trace (which it may do, at its discretion, e.g., if it doesn't + * care to issue a lease on the negative dentry). + */ +struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, + struct dentry *dentry, int err) +{ if (err == -ENOENT) { /* no trace? */ err = 0; @@ -610,6 +636,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry, req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE); req->r_locked_dir = dir; err = ceph_mdsc_do_request(mdsc, NULL, req); + err = ceph_handle_snapdir(req, dentry, err); dentry = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); /* will dput(dentry) */ dout("lookup result=%p\n", dentry); @@ -789,6 +816,7 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir, req->r_dentry = dget(dentry); req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); /* or inode? hrm. */ + req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); req->r_locked_dir = dir; req->r_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_dentry_unless = CEPH_CAP_FILE_EXCL; @@ -887,6 +915,7 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry, req->r_dentry = dget(new_dentry); req->r_num_caps = 2; req->r_old_dentry = dget(old_dentry); + req->r_old_dentry_dir = ceph_get_dentry_parent_inode(old_dentry); req->r_locked_dir = new_dir; req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED; req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL; @@ -1002,36 +1031,38 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry) */ static int ceph_d_revalidate(struct dentry *dentry, struct nameidata *nd) { + int valid = 0; struct inode *dir; if (nd && nd->flags & LOOKUP_RCU) return -ECHILD; - dir = dentry->d_parent->d_inode; - dout("d_revalidate %p '%.*s' inode %p offset %lld\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode, ceph_dentry(dentry)->offset); + dir = ceph_get_dentry_parent_inode(dentry); + /* always trust cached snapped dentries, snapdir dentry */ if (ceph_snap(dir) != CEPH_NOSNAP) { dout("d_revalidate %p '%.*s' inode %p is SNAPPED\n", dentry, dentry->d_name.len, dentry->d_name.name, dentry->d_inode); - goto out_touch; + valid = 1; + } else if (dentry->d_inode && + ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) { + valid = 1; + } else if (dentry_lease_is_valid(dentry) || + dir_lease_is_valid(dir, dentry)) { + valid = 1; } - if (dentry->d_inode && ceph_snap(dentry->d_inode) == CEPH_SNAPDIR) - goto out_touch; - - if (dentry_lease_is_valid(dentry) || - dir_lease_is_valid(dir, dentry)) - goto out_touch; - dout("d_revalidate %p invalid\n", dentry); - d_drop(dentry); - return 0; -out_touch: - ceph_dentry_lru_touch(dentry); - return 1; + dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid"); + if (valid) + ceph_dentry_lru_touch(dentry); + else + d_drop(dentry); + iput(dir); + return valid; } /* @@ -1228,9 +1259,8 @@ void ceph_dentry_lru_del(struct dentry *dn) * Return name hash for a given dentry. This is dependent on * the parent directory's hash function. */ -unsigned ceph_dentry_hash(struct dentry *dn) +unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn) { - struct inode *dir = dn->d_parent->d_inode; struct ceph_inode_info *dci = ceph_inode(dir); switch (dci->i_dir_layout.dl_dir_hash) { diff --git a/fs/ceph/export.c b/fs/ceph/export.c index f67b687550d..9fbcdecaacc 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -46,7 +46,7 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, int type; struct ceph_nfs_fh *fh = (void *)rawfh; struct ceph_nfs_confh *cfh = (void *)rawfh; - struct dentry *parent = dentry->d_parent; + struct dentry *parent; struct inode *inode = dentry->d_inode; int connected_handle_length = sizeof(*cfh)/4; int handle_length = sizeof(*fh)/4; @@ -55,26 +55,33 @@ static int ceph_encode_fh(struct dentry *dentry, u32 *rawfh, int *max_len, if (ceph_snap(inode) != CEPH_NOSNAP) return -EINVAL; + spin_lock(&dentry->d_lock); + parent = dget(dentry->d_parent); + spin_unlock(&dentry->d_lock); + if (*max_len >= connected_handle_length) { dout("encode_fh %p connectable\n", dentry); cfh->ino = ceph_ino(dentry->d_inode); cfh->parent_ino = ceph_ino(parent->d_inode); - cfh->parent_name_hash = ceph_dentry_hash(parent); + cfh->parent_name_hash = ceph_dentry_hash(parent->d_inode, + dentry); *max_len = connected_handle_length; type = 2; } else if (*max_len >= handle_length) { if (connectable) { *max_len = connected_handle_length; - return 255; + type = 255; + } else { + dout("encode_fh %p\n", dentry); + fh->ino = ceph_ino(dentry->d_inode); + *max_len = handle_length; + type = 1; } - dout("encode_fh %p\n", dentry); - fh->ino = ceph_ino(dentry->d_inode); - *max_len = handle_length; - type = 1; } else { *max_len = handle_length; - return 255; + type = 255; } + dput(parent); return type; } @@ -123,7 +130,6 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, return dentry; } err = ceph_init_dentry(dentry); - if (err < 0) { iput(inode); return ERR_PTR(err); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 0d0eae05598..ce549d31eeb 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -122,7 +122,7 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct ceph_file_info *cf = file->private_data; - struct inode *parent_inode = file->f_dentry->d_parent->d_inode; + struct inode *parent_inode = NULL; int err; int flags, fmode, wanted; @@ -194,7 +194,10 @@ int ceph_open(struct inode *inode, struct file *file) req->r_inode = inode; ihold(inode); req->r_num_caps = 1; + if (flags & (O_CREAT|O_TRUNC)) + parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); err = ceph_mdsc_do_request(mdsc, parent_inode, req); + iput(parent_inode); if (!err) err = ceph_init_file(inode, file, req->r_fmode); ceph_mdsc_put_request(req); @@ -222,9 +225,9 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, { struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb); struct ceph_mds_client *mdsc = fsc->mdsc; - struct file *file = nd->intent.open.file; - struct inode *parent_inode = get_dentry_parent_inode(file->f_dentry); + struct file *file; struct ceph_mds_request *req; + struct dentry *ret; int err; int flags = nd->intent.open.flags; @@ -242,16 +245,24 @@ struct dentry *ceph_lookup_open(struct inode *dir, struct dentry *dentry, req->r_dentry_unless = CEPH_CAP_FILE_EXCL; } req->r_locked_dir = dir; /* caller holds dir->i_mutex */ - err = ceph_mdsc_do_request(mdsc, parent_inode, req); - dentry = ceph_finish_lookup(req, dentry, err); - if (!err && (flags & O_CREAT) && !req->r_reply_info.head->is_dentry) + err = ceph_mdsc_do_request(mdsc, + (flags & (O_CREAT|O_TRUNC)) ? dir : NULL, + req); + err = ceph_handle_snapdir(req, dentry, err); + if (err) + goto out; + if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry) err = ceph_handle_notrace_create(dir, dentry); - if (!err) - err = ceph_init_file(req->r_dentry->d_inode, file, - req->r_fmode); + if (err) + goto out; + file = lookup_instantiate_filp(nd, req->r_dentry, ceph_open); + if (IS_ERR(file)) + err = PTR_ERR(file); +out: + ret = ceph_finish_lookup(req, dentry, err); ceph_mdsc_put_request(req); - dout("ceph_lookup_open result=%p\n", dentry); - return dentry; + dout("ceph_lookup_open result=%p\n", ret); + return ret; } int ceph_release(struct inode *inode, struct file *file) @@ -643,7 +654,8 @@ again: if ((got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_filp->f_flags & O_DIRECT) || - (inode->i_sb->s_flags & MS_SYNCHRONOUS)) + (inode->i_sb->s_flags & MS_SYNCHRONOUS) || + (fi->flags & CEPH_F_SYNC)) /* hmm, this isn't really async... */ ret = ceph_sync_read(filp, base, len, ppos, &checkeof); else @@ -712,7 +724,7 @@ retry_snap: want = CEPH_CAP_FILE_BUFFER; ret = ceph_get_caps(ci, CEPH_CAP_FILE_WR, want, &got, endoff); if (ret < 0) - goto out; + goto out_put; dout("aio_write %p %llx.%llx %llu~%u got cap refs on %s\n", inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, @@ -720,12 +732,23 @@ retry_snap: if ((got & (CEPH_CAP_FILE_BUFFER|CEPH_CAP_FILE_LAZYIO)) == 0 || (iocb->ki_filp->f_flags & O_DIRECT) || - (inode->i_sb->s_flags & MS_SYNCHRONOUS)) { + (inode->i_sb->s_flags & MS_SYNCHRONOUS) || + (fi->flags & CEPH_F_SYNC)) { ret = ceph_sync_write(file, iov->iov_base, iov->iov_len, &iocb->ki_pos); } else { - ret = generic_file_aio_write(iocb, iov, nr_segs, pos); + /* + * buffered write; drop Fw early to avoid slow + * revocation if we get stuck on balance_dirty_pages + */ + int dirty; + spin_lock(&inode->i_lock); + dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_FILE_WR); + spin_unlock(&inode->i_lock); + ceph_put_cap_refs(ci, got); + + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); if ((ret >= 0 || ret == -EIOCBQUEUED) && ((file->f_flags & O_SYNC) || IS_SYNC(file->f_mapping->host) || ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_NEARFULL))) { @@ -733,7 +756,12 @@ retry_snap: if (err < 0) ret = err; } + + if (dirty) + __mark_inode_dirty(inode, dirty); + goto out; } + if (ret >= 0) { int dirty; spin_lock(&inode->i_lock); @@ -743,12 +771,13 @@ retry_snap: __mark_inode_dirty(inode, dirty); } -out: +out_put: dout("aio_write %p %llx.%llx %llu~%u dropping cap refs on %s\n", inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len, ceph_cap_string(got)); ceph_put_cap_refs(ci, got); +out: if (ret == -EOLDSNAPC) { dout("aio_write %p %llx.%llx %llu~%u got EOLDSNAPC, retrying\n", inode, ceph_vinop(inode), pos, (unsigned)iov->iov_len); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index dfb2831d8d8..095799ba9dd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -560,7 +560,8 @@ static int fill_inode(struct inode *inode, struct ceph_mds_reply_inode *info = iinfo->in; struct ceph_inode_info *ci = ceph_inode(inode); int i; - int issued, implemented; + int issued = 0, implemented; + int updating_inode = 0; struct timespec mtime, atime, ctime; u32 nsplits; struct ceph_buffer *xattr_blob = NULL; @@ -599,7 +600,8 @@ static int fill_inode(struct inode *inode, if (le64_to_cpu(info->version) > 0 && (ci->i_version & ~1) >= le64_to_cpu(info->version)) goto no_change; - + + updating_inode = 1; issued = __ceph_caps_issued(ci, &implemented); issued |= implemented | __ceph_caps_dirty(ci); @@ -707,17 +709,6 @@ static int fill_inode(struct inode *inode, ci->i_rfiles = le64_to_cpu(info->rfiles); ci->i_rsubdirs = le64_to_cpu(info->rsubdirs); ceph_decode_timespec(&ci->i_rctime, &info->rctime); - - /* set dir completion flag? */ - if (ci->i_files == 0 && ci->i_subdirs == 0 && - ceph_snap(inode) == CEPH_NOSNAP && - (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && - (issued & CEPH_CAP_FILE_EXCL) == 0 && - (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { - dout(" marking %p complete (empty)\n", inode); - /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ - ci->i_max_offset = 2; - } break; default: pr_err("fill_inode %llx.%llx BAD mode 0%o\n", @@ -774,6 +765,19 @@ no_change: __ceph_get_fmode(ci, cap_fmode); } + /* set dir completion flag? */ + if (S_ISDIR(inode->i_mode) && + updating_inode && /* didn't jump to no_change */ + ci->i_files == 0 && ci->i_subdirs == 0 && + ceph_snap(inode) == CEPH_NOSNAP && + (le32_to_cpu(info->cap.caps) & CEPH_CAP_FILE_SHARED) && + (issued & CEPH_CAP_FILE_EXCL) == 0 && + (ci->i_ceph_flags & CEPH_I_COMPLETE) == 0) { + dout(" marking %p complete (empty)\n", inode); + /* ci->i_ceph_flags |= CEPH_I_COMPLETE; */ + ci->i_max_offset = 2; + } + /* update delegation info? */ if (dirinfo) ceph_fill_dirfrag(inode, dirinfo); @@ -805,14 +809,14 @@ static void update_dentry_lease(struct dentry *dentry, return; spin_lock(&dentry->d_lock); - dout("update_dentry_lease %p mask %d duration %lu ms ttl %lu\n", - dentry, le16_to_cpu(lease->mask), duration, ttl); + dout("update_dentry_lease %p duration %lu ms ttl %lu\n", + dentry, duration, ttl); /* make lease_rdcache_gen match directory */ dir = dentry->d_parent->d_inode; di->lease_shared_gen = ceph_inode(dir)->i_shared_gen; - if (lease->mask == 0) + if (duration == 0) goto out_unlock; if (di->lease_gen == session->s_cap_gen && @@ -839,11 +843,13 @@ out_unlock: /* * Set dentry's directory position based on the current dir's max, and * order it in d_subdirs, so that dcache_readdir behaves. + * + * Always called under directory's i_mutex. */ static void ceph_set_dentry_offset(struct dentry *dn) { struct dentry *dir = dn->d_parent; - struct inode *inode = dn->d_parent->d_inode; + struct inode *inode = dir->d_inode; struct ceph_dentry_info *di; BUG_ON(!inode); @@ -1022,9 +1028,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req, /* do we have a dn lease? */ have_lease = have_dir_cap || - (le16_to_cpu(rinfo->dlease->mask) & - CEPH_LOCK_DN); - + le32_to_cpu(rinfo->dlease->duration_ms); if (!have_lease) dout("fill_trace no dentry lease or dir cap\n"); @@ -1560,7 +1564,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) { struct inode *inode = dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct inode *parent_inode = dentry->d_parent->d_inode; + struct inode *parent_inode; const unsigned int ia_valid = attr->ia_valid; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = ceph_sb_to_client(dentry->d_sb)->mdsc; @@ -1743,7 +1747,9 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr) req->r_inode_drop = release; req->r_args.setattr.mask = cpu_to_le32(mask); req->r_num_caps = 1; + parent_inode = ceph_get_dentry_parent_inode(dentry); err = ceph_mdsc_do_request(mdsc, parent_inode, req); + iput(parent_inode); } dout("setattr %p result=%d (%s locally, %d remote)\n", inode, err, ceph_cap_string(dirtied), mask); diff --git a/fs/ceph/ioctl.c b/fs/ceph/ioctl.c index ef0b5f48e13..3b256b50f7d 100644 --- a/fs/ceph/ioctl.c +++ b/fs/ceph/ioctl.c @@ -38,7 +38,7 @@ static long ceph_ioctl_get_layout(struct file *file, void __user *arg) static long ceph_ioctl_set_layout(struct file *file, void __user *arg) { struct inode *inode = file->f_dentry->d_inode; - struct inode *parent_inode = file->f_dentry->d_parent->d_inode; + struct inode *parent_inode; struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc; struct ceph_mds_request *req; struct ceph_ioctl_layout l; @@ -87,7 +87,9 @@ static long ceph_ioctl_set_layout(struct file *file, void __user *arg) req->r_args.setlayout.layout.fl_pg_preferred = cpu_to_le32(l.preferred_osd); + parent_inode = ceph_get_dentry_parent_inode(file->f_dentry); err = ceph_mdsc_do_request(mdsc, parent_inode, req); + iput(parent_inode); ceph_mdsc_put_request(req); return err; } @@ -231,6 +233,14 @@ static long ceph_ioctl_lazyio(struct file *file) return 0; } +static long ceph_ioctl_syncio(struct file *file) +{ + struct ceph_file_info *fi = file->private_data; + + fi->flags |= CEPH_F_SYNC; + return 0; +} + long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { dout("ioctl file %p cmd %u arg %lu\n", file, cmd, arg); @@ -249,6 +259,9 @@ long ceph_ioctl(struct file *file, unsigned int cmd, unsigned long arg) case CEPH_IOC_LAZYIO: return ceph_ioctl_lazyio(file); + + case CEPH_IOC_SYNCIO: + return ceph_ioctl_syncio(file); } return -ENOTTY; diff --git a/fs/ceph/ioctl.h b/fs/ceph/ioctl.h index 52e8fd74d45..0c5167e4318 100644 --- a/fs/ceph/ioctl.h +++ b/fs/ceph/ioctl.h @@ -40,5 +40,6 @@ struct ceph_ioctl_dataloc { struct ceph_ioctl_dataloc) #define CEPH_IOC_LAZYIO _IO(CEPH_IOCTL_MAGIC, 4) +#define CEPH_IOC_SYNCIO _IO(CEPH_IOCTL_MAGIC, 5) #endif diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 0c1d9175652..fee028b5332 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -483,22 +483,26 @@ void ceph_mdsc_release_request(struct kref *kref) destroy_reply_info(&req->r_reply_info); } if (req->r_inode) { - ceph_put_cap_refs(ceph_inode(req->r_inode), - CEPH_CAP_PIN); + ceph_put_cap_refs(ceph_inode(req->r_inode), CEPH_CAP_PIN); iput(req->r_inode); } if (req->r_locked_dir) - ceph_put_cap_refs(ceph_inode(req->r_locked_dir), - CEPH_CAP_PIN); + ceph_put_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); if (req->r_target_inode) iput(req->r_target_inode); if (req->r_dentry) dput(req->r_dentry); if (req->r_old_dentry) { - ceph_put_cap_refs( - ceph_inode(req->r_old_dentry->d_parent->d_inode), - CEPH_CAP_PIN); + /* + * track (and drop pins for) r_old_dentry_dir + * separately, since r_old_dentry's d_parent may have + * changed between the dir mutex being dropped and + * this request being freed. + */ + ceph_put_cap_refs(ceph_inode(req->r_old_dentry_dir), + CEPH_CAP_PIN); dput(req->r_old_dentry); + iput(req->r_old_dentry_dir); } kfree(req->r_path1); kfree(req->r_path2); @@ -617,6 +621,12 @@ static void __unregister_request(struct ceph_mds_client *mdsc, */ struct dentry *get_nonsnap_parent(struct dentry *dentry) { + /* + * we don't need to worry about protecting the d_parent access + * here because we never renaming inside the snapped namespace + * except to resplice to another snapdir, and either the old or new + * result is a valid result. + */ while (!IS_ROOT(dentry) && ceph_snap(dentry->d_inode) != CEPH_NOSNAP) dentry = dentry->d_parent; return dentry; @@ -652,7 +662,9 @@ static int __choose_mds(struct ceph_mds_client *mdsc, if (req->r_inode) { inode = req->r_inode; } else if (req->r_dentry) { - struct inode *dir = req->r_dentry->d_parent->d_inode; + /* ignore race with rename; old or new d_parent is okay */ + struct dentry *parent = req->r_dentry->d_parent; + struct inode *dir = parent->d_inode; if (dir->i_sb != mdsc->fsc->sb) { /* not this fs! */ @@ -660,8 +672,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, } else if (ceph_snap(dir) != CEPH_NOSNAP) { /* direct snapped/virtual snapdir requests * based on parent dir inode */ - struct dentry *dn = - get_nonsnap_parent(req->r_dentry->d_parent); + struct dentry *dn = get_nonsnap_parent(parent); inode = dn->d_inode; dout("__choose_mds using nonsnap parent %p\n", inode); } else if (req->r_dentry->d_inode) { @@ -670,7 +681,7 @@ static int __choose_mds(struct ceph_mds_client *mdsc, } else { /* dir + name */ inode = dir; - hash = ceph_dentry_hash(req->r_dentry); + hash = ceph_dentry_hash(dir, req->r_dentry); is_hash = true; } } @@ -1931,9 +1942,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc, if (req->r_locked_dir) ceph_get_cap_refs(ceph_inode(req->r_locked_dir), CEPH_CAP_PIN); if (req->r_old_dentry) - ceph_get_cap_refs( - ceph_inode(req->r_old_dentry->d_parent->d_inode), - CEPH_CAP_PIN); + ceph_get_cap_refs(ceph_inode(req->r_old_dentry_dir), + CEPH_CAP_PIN); /* issue */ mutex_lock(&mdsc->mutex); @@ -2714,7 +2724,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, struct ceph_mds_lease *h = msg->front.iov_base; u32 seq; struct ceph_vino vino; - int mask; struct qstr dname; int release = 0; @@ -2725,7 +2734,6 @@ static void handle_lease(struct ceph_mds_client *mdsc, goto bad; vino.ino = le64_to_cpu(h->ino); vino.snap = CEPH_NOSNAP; - mask = le16_to_cpu(h->mask); seq = le32_to_cpu(h->seq); dname.name = (void *)h + sizeof(*h) + sizeof(u32); dname.len = msg->front.iov_len - sizeof(*h) - sizeof(u32); @@ -2737,8 +2745,8 @@ static void handle_lease(struct ceph_mds_client *mdsc, /* lookup inode */ inode = ceph_find_inode(sb, vino); - dout("handle_lease %s, mask %d, ino %llx %p %.*s\n", - ceph_lease_op_name(h->action), mask, vino.ino, inode, + dout("handle_lease %s, ino %llx %p %.*s\n", + ceph_lease_op_name(h->action), vino.ino, inode, dname.len, dname.name); if (inode == NULL) { dout("handle_lease no inode %llx\n", vino.ino); @@ -2828,7 +2836,6 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, return; lease = msg->front.iov_base; lease->action = action; - lease->mask = cpu_to_le16(1); lease->ino = cpu_to_le64(ceph_vino(inode).ino); lease->first = lease->last = cpu_to_le64(ceph_vino(inode).snap); lease->seq = cpu_to_le32(seq); @@ -2850,7 +2857,7 @@ void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, * Pass @inode always, @dentry is optional. */ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, - struct dentry *dentry, int mask) + struct dentry *dentry) { struct ceph_dentry_info *di; struct ceph_mds_session *session; @@ -2858,7 +2865,6 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, BUG_ON(inode == NULL); BUG_ON(dentry == NULL); - BUG_ON(mask == 0); /* is dentry lease valid? */ spin_lock(&dentry->d_lock); @@ -2868,8 +2874,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, di->lease_gen != di->lease_session->s_cap_gen || !time_before(jiffies, dentry->d_time)) { dout("lease_release inode %p dentry %p -- " - "no lease on %d\n", - inode, dentry, mask); + "no lease\n", + inode, dentry); spin_unlock(&dentry->d_lock); return; } @@ -2880,8 +2886,8 @@ void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, __ceph_mdsc_drop_dentry_lease(dentry); spin_unlock(&dentry->d_lock); - dout("lease_release inode %p dentry %p mask %d to mds%d\n", - inode, dentry, mask, session->s_mds); + dout("lease_release inode %p dentry %p to mds%d\n", + inode, dentry, session->s_mds); ceph_mdsc_lease_send_msg(session, inode, dentry, CEPH_MDS_LEASE_RELEASE, seq); ceph_put_mds_session(session); diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 7d8a0d662d5..4bb239921db 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -171,6 +171,7 @@ struct ceph_mds_request { struct inode *r_inode; /* arg1 */ struct dentry *r_dentry; /* arg1 */ struct dentry *r_old_dentry; /* arg2: rename from or link from */ + struct inode *r_old_dentry_dir; /* arg2: old dentry's parent dir */ char *r_path1, *r_path2; struct ceph_vino r_ino1, r_ino2; @@ -333,7 +334,7 @@ extern void ceph_mdsc_sync(struct ceph_mds_client *mdsc); extern void ceph_mdsc_lease_release(struct ceph_mds_client *mdsc, struct inode *inode, - struct dentry *dn, int mask); + struct dentry *dn); extern void ceph_invalidate_dir_request(struct ceph_mds_request *req); diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c index 54b14de2e72..e2643719133 100644 --- a/fs/ceph/snap.c +++ b/fs/ceph/snap.c @@ -449,6 +449,15 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) spin_lock(&inode->i_lock); used = __ceph_caps_used(ci); dirty = __ceph_caps_dirty(ci); + + /* + * If there is a write in progress, treat that as a dirty Fw, + * even though it hasn't completed yet; by the time we finish + * up this capsnap it will be. + */ + if (used & CEPH_CAP_FILE_WR) + dirty |= CEPH_CAP_FILE_WR; + if (__ceph_have_pending_cap_snap(ci)) { /* there is no point in queuing multiple "pending" cap_snaps, as no new writes are allowed to start when pending, so any @@ -456,13 +465,19 @@ void ceph_queue_cap_snap(struct ceph_inode_info *ci) cap_snap. lucky us. */ dout("queue_cap_snap %p already pending\n", inode); kfree(capsnap); - } else if (ci->i_wrbuffer_ref_head || (used & CEPH_CAP_FILE_WR) || - (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| - CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR))) { + } else if (dirty & (CEPH_CAP_AUTH_EXCL|CEPH_CAP_XATTR_EXCL| + CEPH_CAP_FILE_EXCL|CEPH_CAP_FILE_WR)) { struct ceph_snap_context *snapc = ci->i_head_snapc; - dout("queue_cap_snap %p cap_snap %p queuing under %p\n", inode, - capsnap, snapc); + /* + * if we are a sync write, we may need to go to the snaprealm + * to get the current snapc. + */ + if (!snapc) + snapc = ci->i_snap_realm->cached_context; + + dout("queue_cap_snap %p cap_snap %p queuing under %p %s\n", + inode, capsnap, snapc, ceph_cap_string(dirty)); ihold(inode); atomic_set(&capsnap->nref, 1); diff --git a/fs/ceph/super.c b/fs/ceph/super.c index f2f77fd3c14..d47c5ec7fb1 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -73,8 +73,7 @@ static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf) */ buf->f_bsize = 1 << CEPH_BLOCK_SHIFT; buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10); - buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >> - (CEPH_BLOCK_SHIFT-10); + buf->f_bfree = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10); buf->f_files = le64_to_cpu(st.num_objects); @@ -780,6 +779,10 @@ static int ceph_register_bdi(struct super_block *sb, fsc->backing_dev_info.ra_pages = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_SHIFT; + else + fsc->backing_dev_info.ra_pages = + default_backing_dev_info.ra_pages; + err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%d", atomic_long_inc_return(&bdi_seq)); if (!err) diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 30446b144e3..a23eed526f0 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -543,13 +543,16 @@ extern void ceph_reservation_status(struct ceph_fs_client *client, /* * we keep buffered readdir results attached to file->private_data */ +#define CEPH_F_SYNC 1 +#define CEPH_F_ATEND 2 + struct ceph_file_info { - int fmode; /* initialized on open */ + short fmode; /* initialized on open */ + short flags; /* CEPH_F_* */ /* readdir: position within the dir */ u32 frag; struct ceph_mds_request *last_readdir; - int at_end; /* readdir: position within a frag */ unsigned offset; /* offset of last chunk, adjusted for . and .. */ @@ -789,6 +792,8 @@ extern const struct dentry_operations ceph_dentry_ops, ceph_snap_dentry_ops, ceph_snapdir_dentry_ops; extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry); +extern int ceph_handle_snapdir(struct ceph_mds_request *req, + struct dentry *dentry, int err); extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req, struct dentry *dentry, int err); @@ -796,7 +801,8 @@ extern void ceph_dentry_lru_add(struct dentry *dn); extern void ceph_dentry_lru_touch(struct dentry *dn); extern void ceph_dentry_lru_del(struct dentry *dn); extern void ceph_invalidate_dentry_lease(struct dentry *dentry); -extern unsigned ceph_dentry_hash(struct dentry *dn); +extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn); +extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry); /* * our d_ops vary depending on whether the inode is live, @@ -819,14 +825,6 @@ extern int ceph_encode_locks(struct inode *i, struct ceph_pagelist *p, int p_locks, int f_locks); extern int lock_to_ceph_filelock(struct file_lock *fl, struct ceph_filelock *c); -static inline struct inode *get_dentry_parent_inode(struct dentry *dentry) -{ - if (dentry && dentry->d_parent) - return dentry->d_parent->d_inode; - - return NULL; -} - /* debugfs.c */ extern int ceph_fs_debugfs_init(struct ceph_fs_client *client); extern void ceph_fs_debugfs_cleanup(struct ceph_fs_client *client); diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c index f42d730f1b6..96c6739a028 100644 --- a/fs/ceph/xattr.c +++ b/fs/ceph/xattr.c @@ -629,7 +629,7 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct inode *inode = dentry->d_inode; struct ceph_inode_info *ci = ceph_inode(inode); - struct inode *parent_inode = dentry->d_parent->d_inode; + struct inode *parent_inode; struct ceph_mds_request *req; struct ceph_mds_client *mdsc = fsc->mdsc; int err; @@ -677,7 +677,9 @@ static int ceph_sync_setxattr(struct dentry *dentry, const char *name, req->r_data_len = size; dout("xattr.ver (before): %lld\n", ci->i_xattrs.version); + parent_inode = ceph_get_dentry_parent_inode(dentry); err = ceph_mdsc_do_request(mdsc, parent_inode, req); + iput(parent_inode); ceph_mdsc_put_request(req); dout("xattr.ver (after): %lld\n", ci->i_xattrs.version); @@ -788,7 +790,7 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb); struct ceph_mds_client *mdsc = fsc->mdsc; struct inode *inode = dentry->d_inode; - struct inode *parent_inode = dentry->d_parent->d_inode; + struct inode *parent_inode; struct ceph_mds_request *req; int err; @@ -802,7 +804,9 @@ static int ceph_send_removexattr(struct dentry *dentry, const char *name) req->r_num_caps = 1; req->r_path2 = kstrdup(name, GFP_NOFS); + parent_inode = ceph_get_dentry_parent_inode(dentry); err = ceph_mdsc_do_request(mdsc, parent_inode, req); + iput(parent_inode); ceph_mdsc_put_request(req); return err; } diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index 5a0ee7f2af0..259991bd211 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -52,19 +52,29 @@ static int cifs_calculate_signature(const struct smb_hdr *cifs_pdu, rc = crypto_shash_init(&server->secmech.sdescmd5->shash); if (rc) { - cERROR(1, "%s: Oould not init md5\n", __func__); + cERROR(1, "%s: Could not init md5\n", __func__); return rc; } - crypto_shash_update(&server->secmech.sdescmd5->shash, + rc = crypto_shash_update(&server->secmech.sdescmd5->shash, server->session_key.response, server->session_key.len); + if (rc) { + cERROR(1, "%s: Could not update with response\n", __func__); + return rc; + } - crypto_shash_update(&server->secmech.sdescmd5->shash, + rc = crypto_shash_update(&server->secmech.sdescmd5->shash, cifs_pdu->Protocol, be32_to_cpu(cifs_pdu->smb_buf_length)); + if (rc) { + cERROR(1, "%s: Could not update with payload\n", __func__); + return rc; + } rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); + if (rc) + cERROR(1, "%s: Could not generate md5 hash\n", __func__); - return 0; + return rc; } /* must be called with server->srv_mutex held */ @@ -112,12 +122,16 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, rc = crypto_shash_init(&server->secmech.sdescmd5->shash); if (rc) { - cERROR(1, "%s: Oould not init md5\n", __func__); + cERROR(1, "%s: Could not init md5\n", __func__); return rc; } - crypto_shash_update(&server->secmech.sdescmd5->shash, + rc = crypto_shash_update(&server->secmech.sdescmd5->shash, server->session_key.response, server->session_key.len); + if (rc) { + cERROR(1, "%s: Could not update with response\n", __func__); + return rc; + } for (i = 0; i < n_vec; i++) { if (iov[i].iov_len == 0) @@ -131,14 +145,24 @@ static int cifs_calc_signature2(const struct kvec *iov, int n_vec, if (i == 0) { if (iov[0].iov_len <= 8) /* cmd field at offset 9 */ break; /* nothing to sign or corrupt header */ + rc = crypto_shash_update(&server->secmech.sdescmd5->shash, iov[i].iov_base + 4, iov[i].iov_len - 4); - } else + } else { + rc = crypto_shash_update(&server->secmech.sdescmd5->shash, iov[i].iov_base, iov[i].iov_len); + } + if (rc) { + cERROR(1, "%s: Could not update with payload\n", + __func__); + return rc; + } } rc = crypto_shash_final(&server->secmech.sdescmd5->shash, signature); + if (rc) + cERROR(1, "%s: Could not generate md5 hash\n", __func__); return rc; } @@ -463,8 +487,12 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, /* calculate md4 hash of password */ E_md4hash(ses->password, nt_hash); - crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash, + rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, nt_hash, CIFS_NTHASH_SIZE); + if (rc) { + cERROR(1, "%s: Could not set NT Hash as a key", __func__); + return rc; + } rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { @@ -478,13 +506,18 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, if (user == NULL) { cERROR(1, "calc_ntlmv2_hash: user mem alloc failure\n"); rc = -ENOMEM; - goto calc_exit_2; + return rc; } len = cifs_strtoUCS((__le16 *)user, ses->user_name, len, nls_cp); UniStrupr(user); - crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, (char *)user, 2 * len); + kfree(user); + if (rc) { + cERROR(1, "%s: Could not update with user\n", __func__); + return rc; + } /* convert ses->domainName to unicode and uppercase */ if (ses->domainName) { @@ -494,13 +527,19 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, if (domain == NULL) { cERROR(1, "calc_ntlmv2_hash: domain mem alloc failure"); rc = -ENOMEM; - goto calc_exit_1; + return rc; } len = cifs_strtoUCS((__le16 *)domain, ses->domainName, len, nls_cp); + rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, (char *)domain, 2 * len); kfree(domain); + if (rc) { + cERROR(1, "%s: Could not update with domain\n", + __func__); + return rc; + } } else if (ses->serverName) { len = strlen(ses->serverName); @@ -508,21 +547,26 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash, if (server == NULL) { cERROR(1, "calc_ntlmv2_hash: server mem alloc failure"); rc = -ENOMEM; - goto calc_exit_1; + return rc; } len = cifs_strtoUCS((__le16 *)server, ses->serverName, len, nls_cp); + rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, (char *)server, 2 * len); kfree(server); + if (rc) { + cERROR(1, "%s: Could not update with server\n", + __func__); + return rc; + } } rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ntlmv2_hash); + if (rc) + cERROR(1, "%s: Could not generate md5 hash\n", __func__); -calc_exit_1: - kfree(user); -calc_exit_2: return rc; } @@ -537,8 +581,12 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) return -1; } - crypto_shash_setkey(ses->server->secmech.hmacmd5, + rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + if (rc) { + cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__); + return rc; + } rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { @@ -552,11 +600,17 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash) else memcpy(ses->auth_key.response + offset, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE); - crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response + offset, ses->auth_key.len - offset); + if (rc) { + cERROR(1, "%s: Could not update with response\n", __func__); + return rc; + } rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response + CIFS_SESS_KEY_SIZE); + if (rc) + cERROR(1, "%s: Could not generate md5 hash\n", __func__); return rc; } @@ -626,8 +680,12 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) } /* now calculate the session key for NTLMv2 */ - crypto_shash_setkey(ses->server->secmech.hmacmd5, + rc = crypto_shash_setkey(ses->server->secmech.hmacmd5, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE); + if (rc) { + cERROR(1, "%s: Could not set NTLMV2 Hash as a key", __func__); + goto setup_ntlmv2_rsp_ret; + } rc = crypto_shash_init(&ses->server->secmech.sdeschmacmd5->shash); if (rc) { @@ -635,12 +693,18 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) goto setup_ntlmv2_rsp_ret; } - crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, + rc = crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response + CIFS_SESS_KEY_SIZE, CIFS_HMAC_MD5_HASH_SIZE); + if (rc) { + cERROR(1, "%s: Could not update with response\n", __func__); + goto setup_ntlmv2_rsp_ret; + } rc = crypto_shash_final(&ses->server->secmech.sdeschmacmd5->shash, ses->auth_key.response); + if (rc) + cERROR(1, "%s: Could not generate md5 hash\n", __func__); setup_ntlmv2_rsp_ret: kfree(tiblob); @@ -668,8 +732,12 @@ calc_seckey(struct cifs_ses *ses) desc.tfm = tfm_arc4; - crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response, + rc = crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); + if (rc) { + cERROR(1, "%s: Could not set response as a key", __func__); + return rc; + } sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE); sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE); @@ -688,7 +756,7 @@ calc_seckey(struct cifs_ses *ses) crypto_free_blkcipher(tfm_arc4); - return 0; + return rc; } void diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 6255fa812c7..1fcf4e5b311 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -501,7 +501,7 @@ struct cifs_search_info { char *ntwrk_buf_start; char *srch_entries_start; char *last_entry; - char *presume_name; + const char *presume_name; unsigned int resume_name_len; bool endOfSearch:1; bool emptyDir:1; diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 14d602f178c..ae576fbb514 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -57,11 +57,6 @@ build_path_from_dentry(struct dentry *direntry) struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); unsigned seq; - if (direntry == NULL) - return NULL; /* not much we can do if dentry is freed and - we need to reopen the file after it was closed implicitly - when the server crashed */ - dirsep = CIFS_DIR_SEP(cifs_sb); if (tcon->Flags & SMB_SHARE_IS_IN_DFS) dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); @@ -641,7 +636,7 @@ lookup_out: static int cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) { - if (nd->flags & LOOKUP_RCU) + if (nd && (nd->flags & LOOKUP_RCU)) return -ECHILD; if (direntry->d_inode) { diff --git a/fs/cifs/link.c b/fs/cifs/link.c index 556b1a0b54d..db3f18cdf02 100644 --- a/fs/cifs/link.c +++ b/fs/cifs/link.c @@ -74,8 +74,14 @@ symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash) cERROR(1, "%s: Could not init md5 shash\n", __func__); goto symlink_hash_err; } - crypto_shash_update(&sdescmd5->shash, link_str, link_len); + rc = crypto_shash_update(&sdescmd5->shash, link_str, link_len); + if (rc) { + cERROR(1, "%s: Could not update iwth link_str\n", __func__); + goto symlink_hash_err; + } rc = crypto_shash_final(&sdescmd5->shash, md5_hash); + if (rc) + cERROR(1, "%s: Could not generate md5 hash\n", __func__); symlink_hash_err: crypto_free_shash(md5); diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c index 965a3af186a..5de03ec2014 100644 --- a/fs/cifs/readdir.c +++ b/fs/cifs/readdir.c @@ -4,6 +4,7 @@ * Directory search handling * * Copyright (C) International Business Machines Corp., 2004, 2008 + * Copyright (C) Red Hat, Inc., 2011 * Author(s): Steve French (sfrench@us.ibm.com) * * This library is free software; you can redistribute it and/or modify @@ -290,10 +291,10 @@ error_exit: } /* return length of unicode string in bytes */ -static int cifs_unicode_bytelen(char *str) +static int cifs_unicode_bytelen(const char *str) { int len; - __le16 *ustr = (__le16 *)str; + const __le16 *ustr = (const __le16 *)str; for (len = 0; len <= PATH_MAX; len++) { if (ustr[len] == 0) @@ -334,78 +335,128 @@ static char *nxt_dir_entry(char *old_entry, char *end_of_smb, int level) } +struct cifs_dirent { + const char *name; + size_t namelen; + u32 resume_key; + u64 ino; +}; + +static void cifs_fill_dirent_unix(struct cifs_dirent *de, + const FILE_UNIX_INFO *info, bool is_unicode) +{ + de->name = &info->FileName[0]; + if (is_unicode) + de->namelen = cifs_unicode_bytelen(de->name); + else + de->namelen = strnlen(de->name, PATH_MAX); + de->resume_key = info->ResumeKey; + de->ino = le64_to_cpu(info->basic.UniqueId); +} + +static void cifs_fill_dirent_dir(struct cifs_dirent *de, + const FILE_DIRECTORY_INFO *info) +{ + de->name = &info->FileName[0]; + de->namelen = le32_to_cpu(info->FileNameLength); + de->resume_key = info->FileIndex; +} + +static void cifs_fill_dirent_full(struct cifs_dirent *de, + const FILE_FULL_DIRECTORY_INFO *info) +{ + de->name = &info->FileName[0]; + de->namelen = le32_to_cpu(info->FileNameLength); + de->resume_key = info->FileIndex; +} + +static void cifs_fill_dirent_search(struct cifs_dirent *de, + const SEARCH_ID_FULL_DIR_INFO *info) +{ + de->name = &info->FileName[0]; + de->namelen = le32_to_cpu(info->FileNameLength); + de->resume_key = info->FileIndex; + de->ino = le64_to_cpu(info->UniqueId); +} + +static void cifs_fill_dirent_both(struct cifs_dirent *de, + const FILE_BOTH_DIRECTORY_INFO *info) +{ + de->name = &info->FileName[0]; + de->namelen = le32_to_cpu(info->FileNameLength); + de->resume_key = info->FileIndex; +} + +static void cifs_fill_dirent_std(struct cifs_dirent *de, + const FIND_FILE_STANDARD_INFO *info) +{ + de->name = &info->FileName[0]; + /* one byte length, no endianess conversion */ + de->namelen = info->FileNameLength; + de->resume_key = info->ResumeKey; +} + +static int cifs_fill_dirent(struct cifs_dirent *de, const void *info, + u16 level, bool is_unicode) +{ + memset(de, 0, sizeof(*de)); + + switch (level) { + case SMB_FIND_FILE_UNIX: + cifs_fill_dirent_unix(de, info, is_unicode); + break; + case SMB_FIND_FILE_DIRECTORY_INFO: + cifs_fill_dirent_dir(de, info); + break; + case SMB_FIND_FILE_FULL_DIRECTORY_INFO: + cifs_fill_dirent_full(de, info); + break; + case SMB_FIND_FILE_ID_FULL_DIR_INFO: + cifs_fill_dirent_search(de, info); + break; + case SMB_FIND_FILE_BOTH_DIRECTORY_INFO: + cifs_fill_dirent_both(de, info); + break; + case SMB_FIND_FILE_INFO_STANDARD: + cifs_fill_dirent_std(de, info); + break; + default: + cFYI(1, "Unknown findfirst level %d", level); + return -EINVAL; + } + + return 0; +} + #define UNICODE_DOT cpu_to_le16(0x2e) /* return 0 if no match and 1 for . (current directory) and 2 for .. (parent) */ -static int cifs_entry_is_dot(char *current_entry, struct cifsFileInfo *cfile) +static int cifs_entry_is_dot(struct cifs_dirent *de, bool is_unicode) { int rc = 0; - char *filename = NULL; - int len = 0; - - if (cfile->srch_inf.info_level == SMB_FIND_FILE_UNIX) { - FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; - filename = &pFindData->FileName[0]; - if (cfile->srch_inf.unicode) { - len = cifs_unicode_bytelen(filename); - } else { - /* BB should we make this strnlen of PATH_MAX? */ - len = strnlen(filename, 5); - } - } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_DIRECTORY_INFO) { - FILE_DIRECTORY_INFO *pFindData = - (FILE_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - } else if (cfile->srch_inf.info_level == - SMB_FIND_FILE_FULL_DIRECTORY_INFO) { - FILE_FULL_DIRECTORY_INFO *pFindData = - (FILE_FULL_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - } else if (cfile->srch_inf.info_level == - SMB_FIND_FILE_ID_FULL_DIR_INFO) { - SEARCH_ID_FULL_DIR_INFO *pFindData = - (SEARCH_ID_FULL_DIR_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - } else if (cfile->srch_inf.info_level == - SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { - FILE_BOTH_DIRECTORY_INFO *pFindData = - (FILE_BOTH_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - } else if (cfile->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) { - FIND_FILE_STANDARD_INFO *pFindData = - (FIND_FILE_STANDARD_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = pFindData->FileNameLength; - } else { - cFYI(1, "Unknown findfirst level %d", - cfile->srch_inf.info_level); - } - if (filename) { - if (cfile->srch_inf.unicode) { - __le16 *ufilename = (__le16 *)filename; - if (len == 2) { - /* check for . */ - if (ufilename[0] == UNICODE_DOT) - rc = 1; - } else if (len == 4) { - /* check for .. */ - if ((ufilename[0] == UNICODE_DOT) - && (ufilename[1] == UNICODE_DOT)) - rc = 2; - } - } else /* ASCII */ { - if (len == 1) { - if (filename[0] == '.') - rc = 1; - } else if (len == 2) { - if ((filename[0] == '.') && (filename[1] == '.')) - rc = 2; - } + if (!de->name) + return 0; + + if (is_unicode) { + __le16 *ufilename = (__le16 *)de->name; + if (de->namelen == 2) { + /* check for . */ + if (ufilename[0] == UNICODE_DOT) + rc = 1; + } else if (de->namelen == 4) { + /* check for .. */ + if (ufilename[0] == UNICODE_DOT && + ufilename[1] == UNICODE_DOT) + rc = 2; + } + } else /* ASCII */ { + if (de->namelen == 1) { + if (de->name[0] == '.') + rc = 1; + } else if (de->namelen == 2) { + if (de->name[0] == '.' && de->name[1] == '.') + rc = 2; } } @@ -427,66 +478,18 @@ static int is_dir_changed(struct file *file) } static int cifs_save_resume_key(const char *current_entry, - struct cifsFileInfo *cifsFile) + struct cifsFileInfo *file_info) { - int rc = 0; - unsigned int len = 0; - __u16 level; - char *filename; - - if ((cifsFile == NULL) || (current_entry == NULL)) - return -EINVAL; - - level = cifsFile->srch_inf.info_level; - - if (level == SMB_FIND_FILE_UNIX) { - FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; + struct cifs_dirent de; + int rc; - filename = &pFindData->FileName[0]; - if (cifsFile->srch_inf.unicode) { - len = cifs_unicode_bytelen(filename); - } else { - /* BB should we make this strnlen of PATH_MAX? */ - len = strnlen(filename, PATH_MAX); - } - cifsFile->srch_inf.resume_key = pFindData->ResumeKey; - } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { - FILE_DIRECTORY_INFO *pFindData = - (FILE_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { - FILE_FULL_DIRECTORY_INFO *pFindData = - (FILE_FULL_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { - SEARCH_ID_FULL_DIR_INFO *pFindData = - (SEARCH_ID_FULL_DIR_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { - FILE_BOTH_DIRECTORY_INFO *pFindData = - (FILE_BOTH_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - cifsFile->srch_inf.resume_key = pFindData->FileIndex; - } else if (level == SMB_FIND_FILE_INFO_STANDARD) { - FIND_FILE_STANDARD_INFO *pFindData = - (FIND_FILE_STANDARD_INFO *)current_entry; - filename = &pFindData->FileName[0]; - /* one byte length, no name conversion */ - len = (unsigned int)pFindData->FileNameLength; - cifsFile->srch_inf.resume_key = pFindData->ResumeKey; - } else { - cFYI(1, "Unknown findfirst level %d", level); - return -EINVAL; + rc = cifs_fill_dirent(&de, current_entry, file_info->srch_inf.info_level, + file_info->srch_inf.unicode); + if (!rc) { + file_info->srch_inf.presume_name = de.name; + file_info->srch_inf.resume_name_len = de.namelen; + file_info->srch_inf.resume_key = de.resume_key; } - cifsFile->srch_inf.resume_name_len = len; - cifsFile->srch_inf.presume_name = filename; return rc; } @@ -605,136 +608,70 @@ static int find_cifs_entry(const int xid, struct cifs_tcon *pTcon, return rc; } -/* inode num, inode type and filename returned */ -static int cifs_get_name_from_search_buf(struct qstr *pqst, - char *current_entry, __u16 level, unsigned int unicode, - struct cifs_sb_info *cifs_sb, unsigned int max_len, __u64 *pinum) +static int cifs_filldir(char *find_entry, struct file *file, filldir_t filldir, + void *dirent, char *scratch_buf, unsigned int max_len) { + struct cifsFileInfo *file_info = file->private_data; + struct super_block *sb = file->f_path.dentry->d_sb; + struct cifs_sb_info *cifs_sb = CIFS_SB(sb); + struct cifs_dirent de = { NULL, }; + struct cifs_fattr fattr; + struct dentry *dentry; + struct qstr name; int rc = 0; - unsigned int len = 0; - char *filename; - struct nls_table *nlt = cifs_sb->local_nls; - - *pinum = 0; - - if (level == SMB_FIND_FILE_UNIX) { - FILE_UNIX_INFO *pFindData = (FILE_UNIX_INFO *)current_entry; - - filename = &pFindData->FileName[0]; - if (unicode) { - len = cifs_unicode_bytelen(filename); - } else { - /* BB should we make this strnlen of PATH_MAX? */ - len = strnlen(filename, PATH_MAX); - } + ino_t ino; - *pinum = le64_to_cpu(pFindData->basic.UniqueId); - } else if (level == SMB_FIND_FILE_DIRECTORY_INFO) { - FILE_DIRECTORY_INFO *pFindData = - (FILE_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - } else if (level == SMB_FIND_FILE_FULL_DIRECTORY_INFO) { - FILE_FULL_DIRECTORY_INFO *pFindData = - (FILE_FULL_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - } else if (level == SMB_FIND_FILE_ID_FULL_DIR_INFO) { - SEARCH_ID_FULL_DIR_INFO *pFindData = - (SEARCH_ID_FULL_DIR_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - *pinum = le64_to_cpu(pFindData->UniqueId); - } else if (level == SMB_FIND_FILE_BOTH_DIRECTORY_INFO) { - FILE_BOTH_DIRECTORY_INFO *pFindData = - (FILE_BOTH_DIRECTORY_INFO *)current_entry; - filename = &pFindData->FileName[0]; - len = le32_to_cpu(pFindData->FileNameLength); - } else if (level == SMB_FIND_FILE_INFO_STANDARD) { - FIND_FILE_STANDARD_INFO *pFindData = - (FIND_FILE_STANDARD_INFO *)current_entry; - filename = &pFindData->FileName[0]; - /* one byte length, no name conversion */ - len = (unsigned int)pFindData->FileNameLength; - } else { - cFYI(1, "Unknown findfirst level %d", level); - return -EINVAL; - } + rc = cifs_fill_dirent(&de, find_entry, file_info->srch_inf.info_level, + file_info->srch_inf.unicode); + if (rc) + return rc; - if (len > max_len) { - cERROR(1, "bad search response length %d past smb end", len); + if (de.namelen > max_len) { + cERROR(1, "bad search response length %zd past smb end", + de.namelen); return -EINVAL; } - if (unicode) { - pqst->len = cifs_from_ucs2((char *) pqst->name, - (__le16 *) filename, - UNICODE_NAME_MAX, - min(len, max_len), nlt, - cifs_sb->mnt_cifs_flags & - CIFS_MOUNT_MAP_SPECIAL_CHR); - pqst->len -= nls_nullsize(nlt); - } else { - pqst->name = filename; - pqst->len = len; - } - return rc; -} - -static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, - void *direntry, char *scratch_buf, unsigned int max_len) -{ - int rc = 0; - struct qstr qstring; - struct cifsFileInfo *pCifsF; - u64 inum; - ino_t ino; - struct super_block *sb; - struct cifs_sb_info *cifs_sb; - struct dentry *tmp_dentry; - struct cifs_fattr fattr; - - /* get filename and len into qstring */ - /* get dentry */ - /* decide whether to create and populate ionde */ - if ((direntry == NULL) || (file == NULL)) - return -EINVAL; - - pCifsF = file->private_data; - - if ((scratch_buf == NULL) || (pfindEntry == NULL) || (pCifsF == NULL)) - return -ENOENT; - - rc = cifs_entry_is_dot(pfindEntry, pCifsF); /* skip . and .. since we added them first */ - if (rc != 0) + if (cifs_entry_is_dot(&de, file_info->srch_inf.unicode)) return 0; - sb = file->f_path.dentry->d_sb; - cifs_sb = CIFS_SB(sb); - - qstring.name = scratch_buf; - rc = cifs_get_name_from_search_buf(&qstring, pfindEntry, - pCifsF->srch_inf.info_level, - pCifsF->srch_inf.unicode, cifs_sb, - max_len, &inum /* returned */); + if (file_info->srch_inf.unicode) { + struct nls_table *nlt = cifs_sb->local_nls; - if (rc) - return rc; + name.name = scratch_buf; + name.len = + cifs_from_ucs2((char *)name.name, (__le16 *)de.name, + UNICODE_NAME_MAX, + min(de.namelen, (size_t)max_len), nlt, + cifs_sb->mnt_cifs_flags & + CIFS_MOUNT_MAP_SPECIAL_CHR); + name.len -= nls_nullsize(nlt); + } else { + name.name = de.name; + name.len = de.namelen; + } - if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_UNIX) + switch (file_info->srch_inf.info_level) { + case SMB_FIND_FILE_UNIX: cifs_unix_basic_to_fattr(&fattr, - &((FILE_UNIX_INFO *) pfindEntry)->basic, - cifs_sb); - else if (pCifsF->srch_inf.info_level == SMB_FIND_FILE_INFO_STANDARD) - cifs_std_info_to_fattr(&fattr, (FIND_FILE_STANDARD_INFO *) - pfindEntry, cifs_sb); - else - cifs_dir_info_to_fattr(&fattr, (FILE_DIRECTORY_INFO *) - pfindEntry, cifs_sb); + &((FILE_UNIX_INFO *)find_entry)->basic, + cifs_sb); + break; + case SMB_FIND_FILE_INFO_STANDARD: + cifs_std_info_to_fattr(&fattr, + (FIND_FILE_STANDARD_INFO *)find_entry, + cifs_sb); + break; + default: + cifs_dir_info_to_fattr(&fattr, + (FILE_DIRECTORY_INFO *)find_entry, + cifs_sb); + break; + } - if (inum && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { - fattr.cf_uniqueid = inum; + if (de.ino && (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)) { + fattr.cf_uniqueid = de.ino; } else { fattr.cf_uniqueid = iunique(sb, ROOT_I); cifs_autodisable_serverino(cifs_sb); @@ -750,12 +687,12 @@ static int cifs_filldir(char *pfindEntry, struct file *file, filldir_t filldir, fattr.cf_flags |= CIFS_FATTR_NEED_REVAL; ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid); - tmp_dentry = cifs_readdir_lookup(file->f_dentry, &qstring, &fattr); + dentry = cifs_readdir_lookup(file->f_dentry, &name, &fattr); - rc = filldir(direntry, qstring.name, qstring.len, file->f_pos, - ino, fattr.cf_dtype); + rc = filldir(dirent, name.name, name.len, file->f_pos, ino, + fattr.cf_dtype); - dput(tmp_dentry); + dput(dentry); return rc; } diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c index 1c5b770c314..42b9fff4875 100644 --- a/fs/cifs/smbencrypt.c +++ b/fs/cifs/smbencrypt.c @@ -157,8 +157,14 @@ mdfour(unsigned char *md4_hash, unsigned char *link_str, int link_len) cERROR(1, "%s: Could not init md4 shash\n", __func__); goto mdfour_err; } - crypto_shash_update(&sdescmd4->shash, link_str, link_len); + rc = crypto_shash_update(&sdescmd4->shash, link_str, link_len); + if (rc) { + cERROR(1, "%s: Could not update with link_str\n", __func__); + goto mdfour_err; + } rc = crypto_shash_final(&sdescmd4->shash, md4_hash); + if (rc) + cERROR(1, "%s: Could not genereate md4 hash\n", __func__); mdfour_err: crypto_free_shash(md4); diff --git a/fs/compat.c b/fs/compat.c index 0ea00832de2..0b48d018e38 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -1675,256 +1675,10 @@ asmlinkage long compat_sys_ppoll(struct pollfd __user *ufds, } #endif /* HAVE_SET_RESTORE_SIGMASK */ -#if (defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)) && !defined(CONFIG_NFSD_DEPRECATED) -/* Stuff for NFS server syscalls... */ -struct compat_nfsctl_svc { - u16 svc32_port; - s32 svc32_nthreads; -}; - -struct compat_nfsctl_client { - s8 cl32_ident[NFSCLNT_IDMAX+1]; - s32 cl32_naddr; - struct in_addr cl32_addrlist[NFSCLNT_ADDRMAX]; - s32 cl32_fhkeytype; - s32 cl32_fhkeylen; - u8 cl32_fhkey[NFSCLNT_KEYMAX]; -}; - -struct compat_nfsctl_export { - char ex32_client[NFSCLNT_IDMAX+1]; - char ex32_path[NFS_MAXPATHLEN+1]; - compat_dev_t ex32_dev; - compat_ino_t ex32_ino; - compat_int_t ex32_flags; - __compat_uid_t ex32_anon_uid; - __compat_gid_t ex32_anon_gid; -}; - -struct compat_nfsctl_fdparm { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - compat_int_t gd32_version; -}; - -struct compat_nfsctl_fsparm { - struct sockaddr gd32_addr; - s8 gd32_path[NFS_MAXPATHLEN+1]; - compat_int_t gd32_maxlen; -}; - -struct compat_nfsctl_arg { - compat_int_t ca32_version; /* safeguard */ - union { - struct compat_nfsctl_svc u32_svc; - struct compat_nfsctl_client u32_client; - struct compat_nfsctl_export u32_export; - struct compat_nfsctl_fdparm u32_getfd; - struct compat_nfsctl_fsparm u32_getfs; - } u; -#define ca32_svc u.u32_svc -#define ca32_client u.u32_client -#define ca32_export u.u32_export -#define ca32_getfd u.u32_getfd -#define ca32_getfs u.u32_getfs -}; - -union compat_nfsctl_res { - __u8 cr32_getfh[NFS_FHSIZE]; - struct knfsd_fh cr32_getfs; -}; - -static int compat_nfs_svc_trans(struct nfsctl_arg *karg, - struct compat_nfsctl_arg __user *arg) -{ - if (!access_ok(VERIFY_READ, &arg->ca32_svc, sizeof(arg->ca32_svc)) || - get_user(karg->ca_version, &arg->ca32_version) || - __get_user(karg->ca_svc.svc_port, &arg->ca32_svc.svc32_port) || - __get_user(karg->ca_svc.svc_nthreads, - &arg->ca32_svc.svc32_nthreads)) - return -EFAULT; - return 0; -} - -static int compat_nfs_clnt_trans(struct nfsctl_arg *karg, - struct compat_nfsctl_arg __user *arg) -{ - if (!access_ok(VERIFY_READ, &arg->ca32_client, - sizeof(arg->ca32_client)) || - get_user(karg->ca_version, &arg->ca32_version) || - __copy_from_user(&karg->ca_client.cl_ident[0], - &arg->ca32_client.cl32_ident[0], - NFSCLNT_IDMAX) || - __get_user(karg->ca_client.cl_naddr, - &arg->ca32_client.cl32_naddr) || - __copy_from_user(&karg->ca_client.cl_addrlist[0], - &arg->ca32_client.cl32_addrlist[0], - (sizeof(struct in_addr) * NFSCLNT_ADDRMAX)) || - __get_user(karg->ca_client.cl_fhkeytype, - &arg->ca32_client.cl32_fhkeytype) || - __get_user(karg->ca_client.cl_fhkeylen, - &arg->ca32_client.cl32_fhkeylen) || - __copy_from_user(&karg->ca_client.cl_fhkey[0], - &arg->ca32_client.cl32_fhkey[0], - NFSCLNT_KEYMAX)) - return -EFAULT; - - return 0; -} - -static int compat_nfs_exp_trans(struct nfsctl_arg *karg, - struct compat_nfsctl_arg __user *arg) -{ - if (!access_ok(VERIFY_READ, &arg->ca32_export, - sizeof(arg->ca32_export)) || - get_user(karg->ca_version, &arg->ca32_version) || - __copy_from_user(&karg->ca_export.ex_client[0], - &arg->ca32_export.ex32_client[0], - NFSCLNT_IDMAX) || - __copy_from_user(&karg->ca_export.ex_path[0], - &arg->ca32_export.ex32_path[0], - NFS_MAXPATHLEN) || - __get_user(karg->ca_export.ex_dev, - &arg->ca32_export.ex32_dev) || - __get_user(karg->ca_export.ex_ino, - &arg->ca32_export.ex32_ino) || - __get_user(karg->ca_export.ex_flags, - &arg->ca32_export.ex32_flags) || - __get_user(karg->ca_export.ex_anon_uid, - &arg->ca32_export.ex32_anon_uid) || - __get_user(karg->ca_export.ex_anon_gid, - &arg->ca32_export.ex32_anon_gid)) - return -EFAULT; - SET_UID(karg->ca_export.ex_anon_uid, karg->ca_export.ex_anon_uid); - SET_GID(karg->ca_export.ex_anon_gid, karg->ca_export.ex_anon_gid); - - return 0; -} - -static int compat_nfs_getfd_trans(struct nfsctl_arg *karg, - struct compat_nfsctl_arg __user *arg) -{ - if (!access_ok(VERIFY_READ, &arg->ca32_getfd, - sizeof(arg->ca32_getfd)) || - get_user(karg->ca_version, &arg->ca32_version) || - __copy_from_user(&karg->ca_getfd.gd_addr, - &arg->ca32_getfd.gd32_addr, - (sizeof(struct sockaddr))) || - __copy_from_user(&karg->ca_getfd.gd_path, - &arg->ca32_getfd.gd32_path, - (NFS_MAXPATHLEN+1)) || - __get_user(karg->ca_getfd.gd_version, - &arg->ca32_getfd.gd32_version)) - return -EFAULT; - - return 0; -} - -static int compat_nfs_getfs_trans(struct nfsctl_arg *karg, - struct compat_nfsctl_arg __user *arg) -{ - if (!access_ok(VERIFY_READ,&arg->ca32_getfs,sizeof(arg->ca32_getfs)) || - get_user(karg->ca_version, &arg->ca32_version) || - __copy_from_user(&karg->ca_getfs.gd_addr, - &arg->ca32_getfs.gd32_addr, - (sizeof(struct sockaddr))) || - __copy_from_user(&karg->ca_getfs.gd_path, - &arg->ca32_getfs.gd32_path, - (NFS_MAXPATHLEN+1)) || - __get_user(karg->ca_getfs.gd_maxlen, - &arg->ca32_getfs.gd32_maxlen)) - return -EFAULT; - - return 0; -} - -/* This really doesn't need translations, we are only passing - * back a union which contains opaque nfs file handle data. - */ -static int compat_nfs_getfh_res_trans(union nfsctl_res *kres, - union compat_nfsctl_res __user *res) -{ - int err; - - err = copy_to_user(res, kres, sizeof(*res)); - - return (err) ? -EFAULT : 0; -} - -asmlinkage long compat_sys_nfsservctl(int cmd, - struct compat_nfsctl_arg __user *arg, - union compat_nfsctl_res __user *res) -{ - struct nfsctl_arg *karg; - union nfsctl_res *kres; - mm_segment_t oldfs; - int err; - - karg = kmalloc(sizeof(*karg), GFP_USER); - kres = kmalloc(sizeof(*kres), GFP_USER); - if(!karg || !kres) { - err = -ENOMEM; - goto done; - } - - switch(cmd) { - case NFSCTL_SVC: - err = compat_nfs_svc_trans(karg, arg); - break; - - case NFSCTL_ADDCLIENT: - err = compat_nfs_clnt_trans(karg, arg); - break; - - case NFSCTL_DELCLIENT: - err = compat_nfs_clnt_trans(karg, arg); - break; - - case NFSCTL_EXPORT: - case NFSCTL_UNEXPORT: - err = compat_nfs_exp_trans(karg, arg); - break; - - case NFSCTL_GETFD: - err = compat_nfs_getfd_trans(karg, arg); - break; - - case NFSCTL_GETFS: - err = compat_nfs_getfs_trans(karg, arg); - break; - - default: - err = -EINVAL; - break; - } - - if (err) - goto done; - - oldfs = get_fs(); - set_fs(KERNEL_DS); - /* The __user pointer casts are valid because of the set_fs() */ - err = sys_nfsservctl(cmd, (void __user *) karg, (void __user *) kres); - set_fs(oldfs); - - if (err) - goto done; - - if((cmd == NFSCTL_GETFD) || - (cmd == NFSCTL_GETFS)) - err = compat_nfs_getfh_res_trans(kres, res); - -done: - kfree(karg); - kfree(kres); - return err; -} -#else /* !NFSD */ long asmlinkage compat_sys_nfsservctl(int cmd, void *notused, void *notused2) { return sys_ni_syscall(); } -#endif #ifdef CONFIG_EPOLL diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c index 61abb638b4b..8be086e9abe 100644 --- a/fs/compat_ioctl.c +++ b/fs/compat_ioctl.c @@ -68,6 +68,8 @@ #ifdef CONFIG_BLOCK #include <linux/loop.h> +#include <linux/cdrom.h> +#include <linux/fd.h> #include <scsi/scsi.h> #include <scsi/scsi_ioctl.h> #include <scsi/sg.h> @@ -944,6 +946,9 @@ COMPATIBLE_IOCTL(FIOQSIZE) IGNORE_IOCTL(LOOP_CLR_FD) /* md calls this on random blockdevs */ IGNORE_IOCTL(RAID_VERSION) +/* qemu/qemu-img might call these two on plain files for probing */ +IGNORE_IOCTL(CDROM_DRIVE_STATUS) +IGNORE_IOCTL(FDGETPRM32) /* SG stuff */ COMPATIBLE_IOCTL(SG_SET_TIMEOUT) COMPATIBLE_IOCTL(SG_GET_TIMEOUT) diff --git a/fs/direct-io.c b/fs/direct-io.c index 01d2d9ef609..44a360ca804 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -35,7 +35,7 @@ #include <linux/buffer_head.h> #include <linux/rwsem.h> #include <linux/uio.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * How many user pages to map in one call to get_user_pages(). This determines diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c index e2b87800436..01fd5c11a7f 100644 --- a/fs/dlm/plock.c +++ b/fs/dlm/plock.c @@ -92,7 +92,7 @@ static void do_unlock_close(struct dlm_ls *ls, u64 number, op->info.number = number; op->info.start = 0; op->info.end = OFFSET_MAX; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) + if (fl->fl_lmops && fl->fl_lmops->lm_grant) op->info.owner = (__u64) fl->fl_pid; else op->info.owner = (__u64)(long) fl->fl_owner; @@ -128,11 +128,11 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) { + if (fl->fl_lmops && fl->fl_lmops->lm_grant) { /* fl_owner is lockd which doesn't distinguish processes on the nfs client */ op->info.owner = (__u64) fl->fl_pid; - xop->callback = fl->fl_lmops->fl_grant; + xop->callback = fl->fl_lmops->lm_grant; locks_init_lock(&xop->flc); locks_copy_lock(&xop->flc, fl); xop->fl = fl; @@ -268,7 +268,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) + if (fl->fl_lmops && fl->fl_lmops->lm_grant) op->info.owner = (__u64) fl->fl_pid; else op->info.owner = (__u64)(long) fl->fl_owner; @@ -327,7 +327,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, op->info.number = number; op->info.start = fl->fl_start; op->info.end = fl->fl_end; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) + if (fl->fl_lmops && fl->fl_lmops->lm_grant) op->info.owner = (__u64) fl->fl_pid; else op->info.owner = (__u64)(long) fl->fl_owner; diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 27a7fefb83e..fa8049ecdc6 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -2248,7 +2248,7 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes, auth_tok->token.password.session_key_encryption_key, crypt_stat->key_size); ecryptfs_printk(KERN_DEBUG, - "Cached session key " "encryption key: \n"); + "Cached session key encryption key:\n"); if (ecryptfs_verbosity > 0) ecryptfs_dump_hex(session_key_encryption_key, 16); } diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f9cfd168fbe..fe047d966dc 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -37,7 +37,7 @@ #include <asm/system.h> #include <asm/io.h> #include <asm/mman.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * LOCKING: diff --git a/fs/exec.c b/fs/exec.c index 842d5700c15..da80612a35f 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -181,14 +181,7 @@ static void acct_arg_size(struct linux_binprm *bprm, unsigned long pages) return; bprm->vma_pages = pages; - -#ifdef SPLIT_RSS_COUNTING - add_mm_counter(mm, MM_ANONPAGES, diff); -#else - spin_lock(&mm->page_table_lock); add_mm_counter(mm, MM_ANONPAGES, diff); - spin_unlock(&mm->page_table_lock); -#endif } static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos, @@ -277,7 +270,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) * use STACK_TOP because that can depend on attributes which aren't * configured yet. */ - BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); + BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP); vma->vm_end = STACK_TOP_MAX; vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP; @@ -1430,9 +1423,9 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) } } read_unlock(&binfmt_lock); +#ifdef CONFIG_MODULES if (retval != -ENOEXEC || bprm->mm == NULL) { break; -#ifdef CONFIG_MODULES } else { #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e)) if (printable(bprm->buf[0]) && @@ -1440,9 +1433,13 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) printable(bprm->buf[2]) && printable(bprm->buf[3])) break; /* -ENOEXEC */ + if (try) + break; /* -ENOEXEC */ request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2])); -#endif } +#else + break; +#endif } return retval; } @@ -1649,15 +1646,26 @@ expand_fail: return ret; } +static void cn_escape(char *str) +{ + for (; *str; str++) + if (*str == '/') + *str = '!'; +} + static int cn_print_exe_file(struct core_name *cn) { struct file *exe_file; - char *pathbuf, *path, *p; + char *pathbuf, *path; int ret; exe_file = get_mm_exe_file(current->mm); - if (!exe_file) - return cn_printf(cn, "(unknown)"); + if (!exe_file) { + char *commstart = cn->corename + cn->used; + ret = cn_printf(cn, "%s (path unknown)", current->comm); + cn_escape(commstart); + return ret; + } pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY); if (!pathbuf) { @@ -1671,9 +1679,7 @@ static int cn_print_exe_file(struct core_name *cn) goto free_buf; } - for (p = path; *p; p++) - if (*p == '/') - *p = '!'; + cn_escape(path); ret = cn_printf(cn, "%s", path); @@ -1745,16 +1751,22 @@ static int format_corename(struct core_name *cn, long signr) break; } /* hostname */ - case 'h': + case 'h': { + char *namestart = cn->corename + cn->used; down_read(&uts_sem); err = cn_printf(cn, "%s", utsname()->nodename); up_read(&uts_sem); + cn_escape(namestart); break; + } /* executable */ - case 'e': + case 'e': { + char *commstart = cn->corename + cn->used; err = cn_printf(cn, "%s", current->comm); + cn_escape(commstart); break; + } case 'E': err = cn_print_exe_file(cn); break; @@ -2118,16 +2130,16 @@ void do_coredump(long signr, int exit_code, struct pt_regs *regs) ispipe = format_corename(&cn, signr); - if (ispipe == -ENOMEM) { - printk(KERN_WARNING "format_corename failed\n"); - printk(KERN_WARNING "Aborting core\n"); - goto fail_corename; - } - if (ispipe) { int dump_count; char **helper_argv; + if (ispipe < 0) { + printk(KERN_WARNING "format_corename failed\n"); + printk(KERN_WARNING "Aborting core\n"); + goto fail_corename; + } + if (cprm.limit == 1) { /* * Normally core limits are irrelevant to pipes, since diff --git a/fs/ext2/acl.c b/fs/ext2/acl.c index bfe651f9ae1..52c05376394 100644 --- a/fs/ext2/acl.c +++ b/fs/ext2/acl.c @@ -128,7 +128,7 @@ fail: /* * inode->i_mutex: don't care */ -static struct posix_acl * +struct posix_acl * ext2_get_acl(struct inode *inode, int type) { int name_index; @@ -231,29 +231,6 @@ ext2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return error; } -int -ext2_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl; - - if (mask & MAY_NOT_BLOCK) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - return -ECHILD; - return -EAGAIN; - } - - acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; -} - /* * Initialize the ACLs of a new inode. Called from ext2_new_inode. * @@ -276,29 +253,20 @@ ext2_init_acl(struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - struct posix_acl *clone; - mode_t mode; - + mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { error = ext2_set_acl(inode, ACL_TYPE_DEFAULT, acl); if (error) goto cleanup; } - clone = posix_acl_clone(acl, GFP_KERNEL); - error = -ENOMEM; - if (!clone) - goto cleanup; - mode = inode->i_mode; - error = posix_acl_create_masq(clone, &mode); - if (error >= 0) { - inode->i_mode = mode; - if (error > 0) { - /* This is an extended ACL */ - error = ext2_set_acl(inode, - ACL_TYPE_ACCESS, clone); - } + error = posix_acl_create(&acl, GFP_KERNEL, &mode); + if (error < 0) + return error; + inode->i_mode = mode; + if (error > 0) { + /* This is an extended ACL */ + error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); } - posix_acl_release(clone); } cleanup: posix_acl_release(acl); @@ -322,7 +290,7 @@ cleanup: int ext2_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int error; if (!test_opt(inode->i_sb, POSIX_ACL)) @@ -332,14 +300,11 @@ ext2_acl_chmod(struct inode *inode) acl = ext2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (error) + return error; + error = ext2_set_acl(inode, ACL_TYPE_ACCESS, acl); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) - error = ext2_set_acl(inode, ACL_TYPE_ACCESS, clone); - posix_acl_release(clone); return error; } diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 3ff6cbb9ac4..5c0a6a4fb05 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -54,13 +54,13 @@ static inline int ext2_acl_count(size_t size) #ifdef CONFIG_EXT2_FS_POSIX_ACL /* acl.c */ -extern int ext2_check_acl (struct inode *, int); +extern struct posix_acl *ext2_get_acl(struct inode *inode, int type); extern int ext2_acl_chmod (struct inode *); extern int ext2_init_acl (struct inode *, struct inode *); #else #include <linux/sched.h> -#define ext2_check_acl NULL +#define ext2_get_acl NULL #define ext2_get_acl NULL #define ext2_set_acl NULL diff --git a/fs/ext2/file.c b/fs/ext2/file.c index 82e06321de3..a5b3a5db312 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -102,6 +102,6 @@ const struct inode_operations ext2_file_inode_operations = { .removexattr = generic_removexattr, #endif .setattr = ext2_setattr, - .check_acl = ext2_check_acl, + .get_acl = ext2_get_acl, .fiemap = ext2_fiemap, }; diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index d60b7099e2d..761fde807fc 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -408,7 +408,7 @@ const struct inode_operations ext2_dir_inode_operations = { .removexattr = generic_removexattr, #endif .setattr = ext2_setattr, - .check_acl = ext2_check_acl, + .get_acl = ext2_get_acl, }; const struct inode_operations ext2_special_inode_operations = { @@ -419,5 +419,5 @@ const struct inode_operations ext2_special_inode_operations = { .removexattr = generic_removexattr, #endif .setattr = ext2_setattr, - .check_acl = ext2_check_acl, + .get_acl = ext2_get_acl, }; diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 529970617a2..d27b71f1d18 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -161,6 +161,10 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name, if (name == NULL) return -EINVAL; + name_len = strlen(name); + if (name_len > 255) + return -ERANGE; + down_read(&EXT2_I(inode)->xattr_sem); error = -ENODATA; if (!EXT2_I(inode)->i_file_acl) @@ -181,12 +185,8 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get", error = -EIO; goto cleanup; } - /* find named attribute */ - name_len = strlen(name); - error = -ERANGE; - if (name_len > 255) - goto cleanup; + /* find named attribute */ entry = FIRST_ENTRY(bh); while (!IS_LAST_ENTRY(entry)) { struct ext2_xattr_entry *next = diff --git a/fs/ext3/acl.c b/fs/ext3/acl.c index edfeb293d4c..6c29bf0df04 100644 --- a/fs/ext3/acl.c +++ b/fs/ext3/acl.c @@ -131,7 +131,7 @@ fail: * * inode->i_mutex: don't care */ -static struct posix_acl * +struct posix_acl * ext3_get_acl(struct inode *inode, int type) { int name_index; @@ -239,29 +239,6 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type, return error; } -int -ext3_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl; - - if (mask & MAY_NOT_BLOCK) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - return -ECHILD; - return -EAGAIN; - } - - acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; -} - /* * Initialize the ACLs of a new inode. Called from ext3_new_inode. * @@ -284,8 +261,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - struct posix_acl *clone; - mode_t mode; + mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { error = ext3_set_acl(handle, inode, @@ -293,22 +269,15 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) if (error) goto cleanup; } - clone = posix_acl_clone(acl, GFP_NOFS); - error = -ENOMEM; - if (!clone) - goto cleanup; - - mode = inode->i_mode; - error = posix_acl_create_masq(clone, &mode); - if (error >= 0) { - inode->i_mode = mode; - if (error > 0) { - /* This is an extended ACL */ - error = ext3_set_acl(handle, inode, - ACL_TYPE_ACCESS, clone); - } + error = posix_acl_create(&acl, GFP_NOFS, &mode); + if (error < 0) + return error; + + inode->i_mode = mode; + if (error > 0) { + /* This is an extended ACL */ + error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); } - posix_acl_release(clone); } cleanup: posix_acl_release(acl); @@ -332,7 +301,9 @@ cleanup: int ext3_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; + handle_t *handle; + int retries = 0; int error; if (S_ISLNK(inode->i_mode)) @@ -342,31 +313,24 @@ ext3_acl_chmod(struct inode *inode) acl = ext3_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) { - handle_t *handle; - int retries = 0; - - retry: - handle = ext3_journal_start(inode, - EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - ext3_std_error(inode->i_sb, error); - goto out; - } - error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone); - ext3_journal_stop(handle); - if (error == -ENOSPC && - ext3_should_retry_alloc(inode->i_sb, &retries)) - goto retry; + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (error) + return error; +retry: + handle = ext3_journal_start(inode, + EXT3_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + ext3_std_error(inode->i_sb, error); + goto out; } + error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); + ext3_journal_stop(handle); + if (error == -ENOSPC && + ext3_should_retry_alloc(inode->i_sb, &retries)) + goto retry; out: - posix_acl_release(clone); + posix_acl_release(acl); return error; } diff --git a/fs/ext3/acl.h b/fs/ext3/acl.h index 597334626de..dbc921e458c 100644 --- a/fs/ext3/acl.h +++ b/fs/ext3/acl.h @@ -54,13 +54,13 @@ static inline int ext3_acl_count(size_t size) #ifdef CONFIG_EXT3_FS_POSIX_ACL /* acl.c */ -extern int ext3_check_acl (struct inode *, int); +extern struct posix_acl *ext3_get_acl(struct inode *inode, int type); extern int ext3_acl_chmod (struct inode *); extern int ext3_init_acl (handle_t *, struct inode *, struct inode *); #else /* CONFIG_EXT3_FS_POSIX_ACL */ #include <linux/sched.h> -#define ext3_check_acl NULL +#define ext3_get_acl NULL static inline int ext3_acl_chmod(struct inode *inode) diff --git a/fs/ext3/balloc.c b/fs/ext3/balloc.c index fe52297e31a..6386d76f44a 100644 --- a/fs/ext3/balloc.c +++ b/fs/ext3/balloc.c @@ -21,6 +21,7 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/blkdev.h> +#include <trace/events/ext3.h> /* * balloc.c contains the blocks allocation and deallocation routines @@ -161,6 +162,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group) desc = ext3_get_group_desc(sb, block_group, NULL); if (!desc) return NULL; + trace_ext3_read_block_bitmap(sb, block_group); bitmap_blk = le32_to_cpu(desc->bg_block_bitmap); bh = sb_getblk(sb, bitmap_blk); if (unlikely(!bh)) { @@ -351,6 +353,7 @@ void ext3_rsv_window_add(struct super_block *sb, struct rb_node * parent = NULL; struct ext3_reserve_window_node *this; + trace_ext3_rsv_window_add(sb, rsv); while (*p) { parent = *p; @@ -476,8 +479,10 @@ void ext3_discard_reservation(struct inode *inode) rsv = &block_i->rsv_window_node; if (!rsv_is_empty(&rsv->rsv_window)) { spin_lock(rsv_lock); - if (!rsv_is_empty(&rsv->rsv_window)) + if (!rsv_is_empty(&rsv->rsv_window)) { + trace_ext3_discard_reservation(inode, rsv); rsv_window_remove(inode->i_sb, rsv); + } spin_unlock(rsv_lock); } } @@ -683,14 +688,10 @@ error_return: void ext3_free_blocks(handle_t *handle, struct inode *inode, ext3_fsblk_t block, unsigned long count) { - struct super_block * sb; + struct super_block *sb = inode->i_sb; unsigned long dquot_freed_blocks; - sb = inode->i_sb; - if (!sb) { - printk ("ext3_free_blocks: nonexistent device"); - return; - } + trace_ext3_free_blocks(inode, block, count); ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks); if (dquot_freed_blocks) dquot_free_block(inode, dquot_freed_blocks); @@ -1136,6 +1137,7 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, else start_block = grp_goal + group_first_block; + trace_ext3_alloc_new_reservation(sb, start_block); size = my_rsv->rsv_goal_size; if (!rsv_is_empty(&my_rsv->rsv_window)) { @@ -1230,8 +1232,11 @@ retry: * check if the first free block is within the * free space we just reserved */ - if (start_block >= my_rsv->rsv_start && start_block <= my_rsv->rsv_end) + if (start_block >= my_rsv->rsv_start && + start_block <= my_rsv->rsv_end) { + trace_ext3_reserved(sb, start_block, my_rsv); return 0; /* success */ + } /* * if the first free bit we found is out of the reservable space * continue search for next reservable space, @@ -1514,10 +1519,6 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, *errp = -ENOSPC; sb = inode->i_sb; - if (!sb) { - printk("ext3_new_block: nonexistent device"); - return 0; - } /* * Check quota for allocation of this block. @@ -1528,8 +1529,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode, return 0; } + trace_ext3_request_blocks(inode, goal, num); + sbi = EXT3_SB(sb); - es = EXT3_SB(sb)->s_es; + es = sbi->s_es; ext3_debug("goal=%lu.\n", goal); /* * Allocate a block from reservation only when @@ -1742,6 +1745,10 @@ allocated: brelse(bitmap_bh); dquot_free_block(inode, *count-num); *count = num; + + trace_ext3_allocate_blocks(inode, goal, num, + (unsigned long long)ret_block); + return ret_block; io_error: @@ -1996,6 +2003,7 @@ ext3_grpblk_t ext3_trim_all_free(struct super_block *sb, unsigned int group, if ((next - start) < minblocks) goto free_extent; + trace_ext3_discard_blocks(sb, discard_block, next - start); /* Send the TRIM command down to the device */ err = sb_issue_discard(sb, discard_block, next - start, GFP_NOFS, 0); @@ -2100,7 +2108,7 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) if (unlikely(minlen > EXT3_BLOCKS_PER_GROUP(sb))) return -EINVAL; if (start >= max_blks) - goto out; + return -EINVAL; if (start + len > max_blks) len = max_blks - start; @@ -2148,8 +2156,6 @@ int ext3_trim_fs(struct super_block *sb, struct fstrim_range *range) if (ret >= 0) ret = 0; - -out: range->len = trimmed * sb->s_blocksize; return ret; diff --git a/fs/ext3/file.c b/fs/ext3/file.c index f55df0e61cb..724df69847d 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -71,7 +71,6 @@ const struct file_operations ext3_file_operations = { }; const struct inode_operations ext3_file_inode_operations = { - .truncate = ext3_truncate, .setattr = ext3_setattr, #ifdef CONFIG_EXT3_FS_XATTR .setxattr = generic_setxattr, @@ -79,7 +78,7 @@ const struct inode_operations ext3_file_inode_operations = { .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif - .check_acl = ext3_check_acl, + .get_acl = ext3_get_acl, .fiemap = ext3_fiemap, }; diff --git a/fs/ext3/fsync.c b/fs/ext3/fsync.c index 0bcf63adb80..d494c554c6e 100644 --- a/fs/ext3/fsync.c +++ b/fs/ext3/fsync.c @@ -30,6 +30,7 @@ #include <linux/jbd.h> #include <linux/ext3_fs.h> #include <linux/ext3_jbd.h> +#include <trace/events/ext3.h> /* * akpm: A new design for ext3_sync_file(). @@ -51,12 +52,14 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret, needs_barrier = 0; tid_t commit_tid; + trace_ext3_sync_file_enter(file, datasync); + if (inode->i_sb->s_flags & MS_RDONLY) return 0; ret = filemap_write_and_wait_range(inode->i_mapping, start, end); if (ret) - return ret; + goto out; /* * Taking the mutex here just to keep consistent with how fsync was @@ -83,7 +86,8 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ if (ext3_should_journal_data(inode)) { mutex_unlock(&inode->i_mutex); - return ext3_force_commit(inode->i_sb); + ret = ext3_force_commit(inode->i_sb); + goto out; } if (datasync) @@ -104,6 +108,9 @@ int ext3_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ if (needs_barrier) blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); + mutex_unlock(&inode->i_mutex); +out: + trace_ext3_sync_file_exit(inode, ret); return ret; } diff --git a/fs/ext3/ialloc.c b/fs/ext3/ialloc.c index bfc2dc43681..bf09cbf938c 100644 --- a/fs/ext3/ialloc.c +++ b/fs/ext3/ialloc.c @@ -23,6 +23,7 @@ #include <linux/buffer_head.h> #include <linux/random.h> #include <linux/bitops.h> +#include <trace/events/ext3.h> #include <asm/byteorder.h> @@ -118,6 +119,7 @@ void ext3_free_inode (handle_t *handle, struct inode * inode) ino = inode->i_ino; ext3_debug ("freeing inode %lu\n", ino); + trace_ext3_free_inode(inode); is_directory = S_ISDIR(inode->i_mode); @@ -426,6 +428,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, return ERR_PTR(-EPERM); sb = dir->i_sb; + trace_ext3_request_inode(dir, mode); inode = new_inode(sb); if (!inode) return ERR_PTR(-ENOMEM); @@ -601,6 +604,7 @@ got: } ext3_debug("allocating inode %lu\n", inode->i_ino); + trace_ext3_allocate_inode(inode, dir, mode); goto really_out; fail: ext3_std_error(sb, err); diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 2978a2a17a5..04da6acde85 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -38,10 +38,12 @@ #include <linux/bio.h> #include <linux/fiemap.h> #include <linux/namei.h> +#include <trace/events/ext3.h> #include "xattr.h" #include "acl.h" static int ext3_writepage_trans_blocks(struct inode *inode); +static int ext3_block_truncate_page(struct inode *inode, loff_t from); /* * Test whether an inode is a fast symlink. @@ -70,6 +72,7 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode, might_sleep(); + trace_ext3_forget(inode, is_metadata, blocknr); BUFFER_TRACE(bh, "enter"); jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, " @@ -194,20 +197,47 @@ static int truncate_restart_transaction(handle_t *handle, struct inode *inode) */ void ext3_evict_inode (struct inode *inode) { + struct ext3_inode_info *ei = EXT3_I(inode); struct ext3_block_alloc_info *rsv; handle_t *handle; int want_delete = 0; + trace_ext3_evict_inode(inode); if (!inode->i_nlink && !is_bad_inode(inode)) { dquot_initialize(inode); want_delete = 1; } + /* + * When journalling data dirty buffers are tracked only in the journal. + * So although mm thinks everything is clean and ready for reaping the + * inode might still have some pages to write in the running + * transaction or waiting to be checkpointed. Thus calling + * journal_invalidatepage() (via truncate_inode_pages()) to discard + * these buffers can cause data loss. Also even if we did not discard + * these buffers, we would have no way to find them after the inode + * is reaped and thus user could see stale data if he tries to read + * them before the transaction is checkpointed. So be careful and + * force everything to disk here... We use ei->i_datasync_tid to + * store the newest transaction containing inode's data. + * + * Note that directories do not have this problem because they don't + * use page cache. + */ + if (inode->i_nlink && ext3_should_journal_data(inode) && + (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { + tid_t commit_tid = atomic_read(&ei->i_datasync_tid); + journal_t *journal = EXT3_SB(inode->i_sb)->s_journal; + + log_start_commit(journal, commit_tid); + log_wait_commit(journal, commit_tid); + filemap_write_and_wait(&inode->i_data); + } truncate_inode_pages(&inode->i_data, 0); ext3_discard_reservation(inode); - rsv = EXT3_I(inode)->i_block_alloc_info; - EXT3_I(inode)->i_block_alloc_info = NULL; + rsv = ei->i_block_alloc_info; + ei->i_block_alloc_info = NULL; if (unlikely(rsv)) kfree(rsv); @@ -231,15 +261,13 @@ void ext3_evict_inode (struct inode *inode) if (inode->i_blocks) ext3_truncate(inode); /* - * Kill off the orphan record which ext3_truncate created. - * AKPM: I think this can be inside the above `if'. - * Note that ext3_orphan_del() has to be able to cope with the - * deletion of a non-existent orphan - this is because we don't - * know if ext3_truncate() actually created an orphan record. - * (Well, we could do this if we need to, but heck - it works) + * Kill off the orphan record created when the inode lost the last + * link. Note that ext3_orphan_del() has to be able to cope with the + * deletion of a non-existent orphan - ext3_truncate() could + * have removed the record. */ ext3_orphan_del(handle, inode); - EXT3_I(inode)->i_dtime = get_seconds(); + ei->i_dtime = get_seconds(); /* * One subtle ordering requirement: if anything has gone wrong @@ -842,6 +870,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, ext3_fsblk_t first_block = 0; + trace_ext3_get_blocks_enter(inode, iblock, maxblocks, create); J_ASSERT(handle != NULL || create == 0); depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary); @@ -886,6 +915,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, if (!create || err == -EIO) goto cleanup; + /* + * Block out ext3_truncate while we alter the tree + */ mutex_lock(&ei->truncate_mutex); /* @@ -934,9 +966,6 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode, */ count = ext3_blks_to_allocate(partial, indirect_blks, maxblocks, blocks_to_boundary); - /* - * Block out ext3_truncate while we alter the tree - */ err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal, offsets + (partial - chain), partial); @@ -970,6 +999,9 @@ cleanup: } BUFFER_TRACE(bh_result, "returned"); out: + trace_ext3_get_blocks_exit(inode, iblock, + depth ? le32_to_cpu(chain[depth-1].key) : 0, + count, err); return err; } @@ -1202,6 +1234,16 @@ static void ext3_truncate_failed_write(struct inode *inode) ext3_truncate(inode); } +/* + * Truncate blocks that were not used by direct IO write. We have to zero out + * the last file block as well because direct IO might have written to it. + */ +static void ext3_truncate_failed_direct_write(struct inode *inode) +{ + ext3_block_truncate_page(inode, inode->i_size); + ext3_truncate(inode); +} + static int ext3_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, void **fsdata) @@ -1217,6 +1259,8 @@ static int ext3_write_begin(struct file *file, struct address_space *mapping, * we allocate blocks but write fails for some reason */ int needed_blocks = ext3_writepage_trans_blocks(inode) + 1; + trace_ext3_write_begin(inode, pos, len, flags); + index = pos >> PAGE_CACHE_SHIFT; from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1332,6 +1376,7 @@ static int ext3_ordered_write_end(struct file *file, unsigned from, to; int ret = 0, ret2; + trace_ext3_ordered_write_end(inode, pos, len, copied); copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); from = pos & (PAGE_CACHE_SIZE - 1); @@ -1367,6 +1412,7 @@ static int ext3_writeback_write_end(struct file *file, struct inode *inode = file->f_mapping->host; int ret; + trace_ext3_writeback_write_end(inode, pos, len, copied); copied = block_write_end(file, mapping, pos, len, copied, page, fsdata); update_file_sizes(inode, pos, copied); /* @@ -1391,10 +1437,12 @@ static int ext3_journalled_write_end(struct file *file, { handle_t *handle = ext3_journal_current_handle(); struct inode *inode = mapping->host; + struct ext3_inode_info *ei = EXT3_I(inode); int ret = 0, ret2; int partial = 0; unsigned from, to; + trace_ext3_journalled_write_end(inode, pos, len, copied); from = pos & (PAGE_CACHE_SIZE - 1); to = from + len; @@ -1419,8 +1467,9 @@ static int ext3_journalled_write_end(struct file *file, if (pos + len > inode->i_size && ext3_can_truncate(inode)) ext3_orphan_add(handle, inode); ext3_set_inode_state(inode, EXT3_STATE_JDATA); - if (inode->i_size > EXT3_I(inode)->i_disksize) { - EXT3_I(inode)->i_disksize = inode->i_size; + atomic_set(&ei->i_datasync_tid, handle->h_transaction->t_tid); + if (inode->i_size > ei->i_disksize) { + ei->i_disksize = inode->i_size; ret2 = ext3_mark_inode_dirty(handle, inode); if (!ret) ret = ret2; @@ -1577,6 +1626,7 @@ static int ext3_ordered_writepage(struct page *page, if (ext3_journal_current_handle()) goto out_fail; + trace_ext3_ordered_writepage(page); if (!page_has_buffers(page)) { create_empty_buffers(page, inode->i_sb->s_blocksize, (1 << BH_Dirty)|(1 << BH_Uptodate)); @@ -1647,6 +1697,7 @@ static int ext3_writeback_writepage(struct page *page, if (ext3_journal_current_handle()) goto out_fail; + trace_ext3_writeback_writepage(page); if (page_has_buffers(page)) { if (!walk_page_buffers(NULL, page_buffers(page), 0, PAGE_CACHE_SIZE, NULL, buffer_unmapped)) { @@ -1689,6 +1740,7 @@ static int ext3_journalled_writepage(struct page *page, if (ext3_journal_current_handle()) goto no_write; + trace_ext3_journalled_writepage(page); handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode)); if (IS_ERR(handle)) { ret = PTR_ERR(handle); @@ -1715,6 +1767,8 @@ static int ext3_journalled_writepage(struct page *page, if (ret == 0) ret = err; ext3_set_inode_state(inode, EXT3_STATE_JDATA); + atomic_set(&EXT3_I(inode)->i_datasync_tid, + handle->h_transaction->t_tid); unlock_page(page); } else { /* @@ -1739,6 +1793,7 @@ out_unlock: static int ext3_readpage(struct file *file, struct page *page) { + trace_ext3_readpage(page); return mpage_readpage(page, ext3_get_block); } @@ -1753,6 +1808,8 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset) { journal_t *journal = EXT3_JOURNAL(page->mapping->host); + trace_ext3_invalidatepage(page, offset); + /* * If it's a full truncate we just forget about the pending dirtying */ @@ -1766,6 +1823,7 @@ static int ext3_releasepage(struct page *page, gfp_t wait) { journal_t *journal = EXT3_JOURNAL(page->mapping->host); + trace_ext3_releasepage(page); WARN_ON(PageChecked(page)); if (!page_has_buffers(page)) return 0; @@ -1794,6 +1852,8 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb, size_t count = iov_length(iov, nr_segs); int retries = 0; + trace_ext3_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); + if (rw == WRITE) { loff_t final_size = offset + count; @@ -1827,7 +1887,7 @@ retry: loff_t end = offset + iov_length(iov, nr_segs); if (end > isize) - vmtruncate(inode, isize); + ext3_truncate_failed_direct_write(inode); } if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries)) goto retry; @@ -1841,7 +1901,7 @@ retry: /* This is really bad luck. We've written the data * but cannot extend i_size. Truncate allocated blocks * and pretend the write failed... */ - ext3_truncate(inode); + ext3_truncate_failed_direct_write(inode); ret = PTR_ERR(handle); goto out; } @@ -1867,6 +1927,8 @@ retry: ret = err; } out: + trace_ext3_direct_IO_exit(inode, offset, + iov_length(iov, nr_segs), rw, ret); return ret; } @@ -1949,17 +2011,24 @@ void ext3_set_aops(struct inode *inode) * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ -static int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) +static int ext3_block_truncate_page(struct inode *inode, loff_t from) { ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT; - unsigned offset = from & (PAGE_CACHE_SIZE-1); + unsigned offset = from & (PAGE_CACHE_SIZE - 1); unsigned blocksize, iblock, length, pos; - struct inode *inode = mapping->host; + struct page *page; + handle_t *handle = NULL; struct buffer_head *bh; int err = 0; + /* Truncated on block boundary - nothing to do */ blocksize = inode->i_sb->s_blocksize; + if ((from & (blocksize - 1)) == 0) + return 0; + + page = grab_cache_page(inode->i_mapping, index); + if (!page) + return -ENOMEM; length = blocksize - (offset & (blocksize - 1)); iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits); @@ -2004,11 +2073,23 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, goto unlock; } + /* data=writeback mode doesn't need transaction to zero-out data */ + if (!ext3_should_writeback_data(inode)) { + /* We journal at most one block */ + handle = ext3_journal_start(inode, 1); + if (IS_ERR(handle)) { + clear_highpage(page); + flush_dcache_page(page); + err = PTR_ERR(handle); + goto unlock; + } + } + if (ext3_should_journal_data(inode)) { BUFFER_TRACE(bh, "get write access"); err = ext3_journal_get_write_access(handle, bh); if (err) - goto unlock; + goto stop; } zero_user(page, offset, length); @@ -2022,6 +2103,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page, err = ext3_journal_dirty_data(handle, bh); mark_buffer_dirty(bh); } +stop: + if (handle) + ext3_journal_stop(handle); unlock: unlock_page(page); @@ -2390,8 +2474,6 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode, int ext3_can_truncate(struct inode *inode) { - if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) - return 0; if (S_ISREG(inode->i_mode)) return 1; if (S_ISDIR(inode->i_mode)) @@ -2435,7 +2517,6 @@ void ext3_truncate(struct inode *inode) struct ext3_inode_info *ei = EXT3_I(inode); __le32 *i_data = ei->i_data; int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb); - struct address_space *mapping = inode->i_mapping; int offsets[4]; Indirect chain[4]; Indirect *partial; @@ -2443,7 +2524,8 @@ void ext3_truncate(struct inode *inode) int n; long last_block; unsigned blocksize = inode->i_sb->s_blocksize; - struct page *page; + + trace_ext3_truncate_enter(inode); if (!ext3_can_truncate(inode)) goto out_notrans; @@ -2451,37 +2533,12 @@ void ext3_truncate(struct inode *inode) if (inode->i_size == 0 && ext3_should_writeback_data(inode)) ext3_set_inode_state(inode, EXT3_STATE_FLUSH_ON_CLOSE); - /* - * We have to lock the EOF page here, because lock_page() nests - * outside journal_start(). - */ - if ((inode->i_size & (blocksize - 1)) == 0) { - /* Block boundary? Nothing to do */ - page = NULL; - } else { - page = grab_cache_page(mapping, - inode->i_size >> PAGE_CACHE_SHIFT); - if (!page) - goto out_notrans; - } - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { - clear_highpage(page); - flush_dcache_page(page); - unlock_page(page); - page_cache_release(page); - } + if (IS_ERR(handle)) goto out_notrans; - } last_block = (inode->i_size + blocksize-1) >> EXT3_BLOCK_SIZE_BITS(inode->i_sb); - - if (page) - ext3_block_truncate_page(handle, page, mapping, inode->i_size); - n = ext3_block_to_path(inode, last_block, offsets, NULL); if (n == 0) goto out_stop; /* error */ @@ -2596,6 +2653,7 @@ out_stop: ext3_orphan_del(handle, inode); ext3_journal_stop(handle); + trace_ext3_truncate_exit(inode); return; out_notrans: /* @@ -2604,6 +2662,7 @@ out_notrans: */ if (inode->i_nlink) ext3_orphan_del(NULL, inode); + trace_ext3_truncate_exit(inode); } static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb, @@ -2745,6 +2804,7 @@ make_io: * has in-inode xattrs, or we don't have this inode in memory. * Read the block from disk. */ + trace_ext3_load_inode(inode); get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ_META, bh); @@ -3229,18 +3289,36 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr) } error = ext3_orphan_add(handle, inode); + if (error) { + ext3_journal_stop(handle); + goto err_out; + } EXT3_I(inode)->i_disksize = attr->ia_size; - rc = ext3_mark_inode_dirty(handle, inode); - if (!error) - error = rc; + error = ext3_mark_inode_dirty(handle, inode); ext3_journal_stop(handle); + if (error) { + /* Some hard fs error must have happened. Bail out. */ + ext3_orphan_del(NULL, inode); + goto err_out; + } + rc = ext3_block_truncate_page(inode, attr->ia_size); + if (rc) { + /* Cleanup orphan list and exit */ + handle = ext3_journal_start(inode, 3); + if (IS_ERR(handle)) { + ext3_orphan_del(NULL, inode); + goto err_out; + } + ext3_orphan_del(handle, inode); + ext3_journal_stop(handle); + goto err_out; + } } if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size != i_size_read(inode)) { - rc = vmtruncate(inode, attr->ia_size); - if (rc) - goto err_out; + truncate_setsize(inode, attr->ia_size); + ext3_truncate(inode); } setattr_copy(inode, attr); @@ -3374,6 +3452,7 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode) int err; might_sleep(); + trace_ext3_mark_inode_dirty(inode, _RET_IP_); err = ext3_reserve_inode_write(handle, inode, &iloc); if (!err) err = ext3_mark_iloc_dirty(handle, inode, &iloc); diff --git a/fs/ext3/ioctl.c b/fs/ext3/ioctl.c index f4090bd2f34..c7f43944f16 100644 --- a/fs/ext3/ioctl.c +++ b/fs/ext3/ioctl.c @@ -285,7 +285,7 @@ group_add_out: if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (copy_from_user(&range, (struct fstrim_range *)arg, + if (copy_from_user(&range, (struct fstrim_range __user *)arg, sizeof(range))) return -EFAULT; @@ -293,7 +293,7 @@ group_add_out: if (ret < 0) return ret; - if (copy_to_user((struct fstrim_range *)arg, &range, + if (copy_to_user((struct fstrim_range __user *)arg, &range, sizeof(range))) return -EFAULT; diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index c095cf5640c..6e18a0b7750 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -36,6 +36,7 @@ #include <linux/quotaops.h> #include <linux/buffer_head.h> #include <linux/bio.h> +#include <trace/events/ext3.h> #include "namei.h" #include "xattr.h" @@ -287,7 +288,7 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent while (len--) printk("%c", *name++); ext3fs_dirhash(de->name, de->name_len, &h); printk(":%x.%u ", h.hash, - ((char *) de - base)); + (unsigned) ((char *) de - base)); } space += EXT3_DIR_REC_LEN(de->name_len); names++; @@ -1013,7 +1014,7 @@ static struct buffer_head * ext3_dx_find_entry(struct inode *dir, *err = -ENOENT; errout: - dxtrace(printk("%s not found\n", name)); + dxtrace(printk("%s not found\n", entry->name)); dx_release (frames); return NULL; } @@ -2140,6 +2141,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) struct ext3_dir_entry_2 * de; handle_t *handle; + trace_ext3_unlink_enter(dir, dentry); /* Initialize quotas before so that eventual writes go * in separate transaction */ dquot_initialize(dir); @@ -2185,6 +2187,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry) end_unlink: ext3_journal_stop(handle); brelse (bh); + trace_ext3_unlink_exit(dentry, retval); return retval; } @@ -2529,7 +2532,7 @@ const struct inode_operations ext3_dir_inode_operations = { .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif - .check_acl = ext3_check_acl, + .get_acl = ext3_get_acl, }; const struct inode_operations ext3_special_inode_operations = { @@ -2540,5 +2543,5 @@ const struct inode_operations ext3_special_inode_operations = { .listxattr = ext3_listxattr, .removexattr = generic_removexattr, #endif - .check_acl = ext3_check_acl, + .get_acl = ext3_get_acl, }; diff --git a/fs/ext3/super.c b/fs/ext3/super.c index b57ea2f9126..7beb69ae001 100644 --- a/fs/ext3/super.c +++ b/fs/ext3/super.c @@ -44,6 +44,9 @@ #include "acl.h" #include "namei.h" +#define CREATE_TRACE_POINTS +#include <trace/events/ext3.h> + #ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA #else @@ -497,6 +500,14 @@ static struct inode *ext3_alloc_inode(struct super_block *sb) return &ei->vfs_inode; } +static int ext3_drop_inode(struct inode *inode) +{ + int drop = generic_drop_inode(inode); + + trace_ext3_drop_inode(inode, drop); + return drop; +} + static void ext3_i_callback(struct rcu_head *head) { struct inode *inode = container_of(head, struct inode, i_rcu); @@ -788,6 +799,7 @@ static const struct super_operations ext3_sops = { .destroy_inode = ext3_destroy_inode, .write_inode = ext3_write_inode, .dirty_inode = ext3_dirty_inode, + .drop_inode = ext3_drop_inode, .evict_inode = ext3_evict_inode, .put_super = ext3_put_super, .sync_fs = ext3_sync_fs, @@ -2509,6 +2521,7 @@ static int ext3_sync_fs(struct super_block *sb, int wait) { tid_t target; + trace_ext3_sync_fs(sb, wait); if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) { if (wait) log_wait_commit(EXT3_SB(sb)->s_journal, target); diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index 32e6cc23bd9..d565759d82e 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -803,8 +803,16 @@ inserted: /* We need to allocate a new block */ ext3_fsblk_t goal = ext3_group_first_block_no(sb, EXT3_I(inode)->i_block_group); - ext3_fsblk_t block = ext3_new_block(handle, inode, - goal, &error); + ext3_fsblk_t block; + + /* + * Protect us agaist concurrent allocations to the + * same inode from ext3_..._writepage(). Reservation + * code does not expect racing allocations. + */ + mutex_lock(&EXT3_I(inode)->truncate_mutex); + block = ext3_new_block(handle, inode, goal, &error); + mutex_unlock(&EXT3_I(inode)->truncate_mutex); if (error) goto cleanup; ea_idebug(inode, "creating block %d", block); diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index 60d900fcc3d..dca2d1ded93 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c @@ -131,7 +131,7 @@ fail: * * inode->i_mutex: don't care */ -static struct posix_acl * +struct posix_acl * ext4_get_acl(struct inode *inode, int type) { int name_index; @@ -237,29 +237,6 @@ ext4_set_acl(handle_t *handle, struct inode *inode, int type, return error; } -int -ext4_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl; - - if (mask & MAY_NOT_BLOCK) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - return -ECHILD; - return -EAGAIN; - } - - acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; -} - /* * Initialize the ACLs of a new inode. Called from ext4_new_inode. * @@ -282,8 +259,7 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) inode->i_mode &= ~current_umask(); } if (test_opt(inode->i_sb, POSIX_ACL) && acl) { - struct posix_acl *clone; - mode_t mode; + mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { error = ext4_set_acl(handle, inode, @@ -291,22 +267,15 @@ ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir) if (error) goto cleanup; } - clone = posix_acl_clone(acl, GFP_NOFS); - error = -ENOMEM; - if (!clone) - goto cleanup; - - mode = inode->i_mode; - error = posix_acl_create_masq(clone, &mode); - if (error >= 0) { - inode->i_mode = mode; - if (error > 0) { - /* This is an extended ACL */ - error = ext4_set_acl(handle, inode, - ACL_TYPE_ACCESS, clone); - } + error = posix_acl_create(&acl, GFP_NOFS, &mode); + if (error < 0) + return error; + + inode->i_mode = mode; + if (error > 0) { + /* This is an extended ACL */ + error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); } - posix_acl_release(clone); } cleanup: posix_acl_release(acl); @@ -330,9 +299,12 @@ cleanup: int ext4_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; + handle_t *handle; + int retries = 0; int error; + if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; if (!test_opt(inode->i_sb, POSIX_ACL)) @@ -340,31 +312,24 @@ ext4_acl_chmod(struct inode *inode) acl = ext4_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) { - handle_t *handle; - int retries = 0; - - retry: - handle = ext4_journal_start(inode, - EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); - if (IS_ERR(handle)) { - error = PTR_ERR(handle); - ext4_std_error(inode->i_sb, error); - goto out; - } - error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, clone); - ext4_journal_stop(handle); - if (error == -ENOSPC && - ext4_should_retry_alloc(inode->i_sb, &retries)) - goto retry; + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (error) + return error; +retry: + handle = ext4_journal_start(inode, + EXT4_DATA_TRANS_BLOCKS(inode->i_sb)); + if (IS_ERR(handle)) { + error = PTR_ERR(handle); + ext4_std_error(inode->i_sb, error); + goto out; } + error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, acl); + ext4_journal_stop(handle); + if (error == -ENOSPC && + ext4_should_retry_alloc(inode->i_sb, &retries)) + goto retry; out: - posix_acl_release(clone); + posix_acl_release(acl); return error; } diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h index 9d843d5deac..18cb39ed7c7 100644 --- a/fs/ext4/acl.h +++ b/fs/ext4/acl.h @@ -54,13 +54,13 @@ static inline int ext4_acl_count(size_t size) #ifdef CONFIG_EXT4_FS_POSIX_ACL /* acl.c */ -extern int ext4_check_acl(struct inode *, int); +struct posix_acl *ext4_get_acl(struct inode *inode, int type); extern int ext4_acl_chmod(struct inode *); extern int ext4_init_acl(handle_t *, struct inode *, struct inode *); #else /* CONFIG_EXT4_FS_POSIX_ACL */ #include <linux/sched.h> -#define ext4_check_acl NULL +#define ext4_get_acl NULL static inline int ext4_acl_chmod(struct inode *inode) diff --git a/fs/ext4/file.c b/fs/ext4/file.c index ce766f974b1..e4095e988eb 100644 --- a/fs/ext4/file.c +++ b/fs/ext4/file.c @@ -301,7 +301,7 @@ const struct inode_operations ext4_file_inode_operations = { .listxattr = ext4_listxattr, .removexattr = generic_removexattr, #endif - .check_acl = ext4_check_acl, + .get_acl = ext4_get_acl, .fiemap = ext4_fiemap, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 678cde834f1..3e5191f9f39 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2741,7 +2741,7 @@ static int write_cache_pages_da(struct address_space *mapping, index = wbc->range_start >> PAGE_CACHE_SHIFT; end = wbc->range_end >> PAGE_CACHE_SHIFT; - if (wbc->sync_mode == WB_SYNC_ALL) + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag = PAGECACHE_TAG_TOWRITE; else tag = PAGECACHE_TAG_DIRTY; @@ -2973,7 +2973,7 @@ static int ext4_da_writepages(struct address_space *mapping, } retry: - if (wbc->sync_mode == WB_SYNC_ALL) + if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages) tag_pages_for_writeback(mapping, index, end); while (!ret && wbc->nr_to_write > 0) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 707d605bf76..8c9babac43d 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2590,7 +2590,7 @@ const struct inode_operations ext4_dir_inode_operations = { .listxattr = ext4_listxattr, .removexattr = generic_removexattr, #endif - .check_acl = ext4_check_acl, + .get_acl = ext4_get_acl, .fiemap = ext4_fiemap, }; @@ -2602,5 +2602,5 @@ const struct inode_operations ext4_special_inode_operations = { .listxattr = ext4_listxattr, .removexattr = generic_removexattr, #endif - .check_acl = ext4_check_acl, + .get_acl = ext4_get_acl, }; diff --git a/fs/file_table.c b/fs/file_table.c index 01e4c1e8e6b..c322794f736 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -25,7 +25,7 @@ #include <linux/percpu.h> #include <linux/ima.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "internal.h" diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b8c507ca42f..1599aa985fe 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -35,7 +35,9 @@ struct wb_writeback_work { long nr_pages; struct super_block *sb; + unsigned long *older_than_this; enum writeback_sync_modes sync_mode; + unsigned int tagged_writepages:1; unsigned int for_kupdate:1; unsigned int range_cyclic:1; unsigned int for_background:1; @@ -180,12 +182,13 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi) */ void inode_wb_list_del(struct inode *inode) { - spin_lock(&inode_wb_list_lock); + struct backing_dev_info *bdi = inode_to_bdi(inode); + + spin_lock(&bdi->wb.list_lock); list_del_init(&inode->i_wb_list); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&bdi->wb.list_lock); } - /* * Redirty an inode: set its when-it-was dirtied timestamp and move it to the * furthest end of its superblock's dirty-inode list. @@ -195,11 +198,9 @@ void inode_wb_list_del(struct inode *inode) * the case then the inode must have been redirtied while it was being written * out and we don't reset its dirtied_when. */ -static void redirty_tail(struct inode *inode) +static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) { - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; - - assert_spin_locked(&inode_wb_list_lock); + assert_spin_locked(&wb->list_lock); if (!list_empty(&wb->b_dirty)) { struct inode *tail; @@ -213,11 +214,9 @@ static void redirty_tail(struct inode *inode) /* * requeue inode for re-scanning after bdi->b_io list is exhausted. */ -static void requeue_io(struct inode *inode) +static void requeue_io(struct inode *inode, struct bdi_writeback *wb) { - struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; - - assert_spin_locked(&inode_wb_list_lock); + assert_spin_locked(&wb->list_lock); list_move(&inode->i_wb_list, &wb->b_more_io); } @@ -225,7 +224,7 @@ static void inode_sync_complete(struct inode *inode) { /* * Prevent speculative execution through - * spin_unlock(&inode_wb_list_lock); + * spin_unlock(&wb->list_lock); */ smp_mb(); @@ -250,15 +249,16 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) /* * Move expired dirty inodes from @delaying_queue to @dispatch_queue. */ -static void move_expired_inodes(struct list_head *delaying_queue, +static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, - unsigned long *older_than_this) + unsigned long *older_than_this) { LIST_HEAD(tmp); struct list_head *pos, *node; struct super_block *sb = NULL; struct inode *inode; int do_sb_sort = 0; + int moved = 0; while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); @@ -269,12 +269,13 @@ static void move_expired_inodes(struct list_head *delaying_queue, do_sb_sort = 1; sb = inode->i_sb; list_move(&inode->i_wb_list, &tmp); + moved++; } /* just one sb in list, splice to dispatch_queue and we're done */ if (!do_sb_sort) { list_splice(&tmp, dispatch_queue); - return; + goto out; } /* Move inodes from one superblock together */ @@ -286,6 +287,8 @@ static void move_expired_inodes(struct list_head *delaying_queue, list_move(&inode->i_wb_list, dispatch_queue); } } +out: + return moved; } /* @@ -301,9 +304,11 @@ static void move_expired_inodes(struct list_head *delaying_queue, */ static void queue_io(struct bdi_writeback *wb, unsigned long *older_than_this) { - assert_spin_locked(&inode_wb_list_lock); + int moved; + assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); - move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, older_than_this); + trace_writeback_queue_io(wb, older_than_this, moved); } static int write_inode(struct inode *inode, struct writeback_control *wbc) @@ -316,7 +321,8 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) /* * Wait for writeback on an inode to complete. */ -static void inode_wait_for_writeback(struct inode *inode) +static void inode_wait_for_writeback(struct inode *inode, + struct bdi_writeback *wb) { DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); wait_queue_head_t *wqh; @@ -324,15 +330,15 @@ static void inode_wait_for_writeback(struct inode *inode) wqh = bit_waitqueue(&inode->i_state, __I_SYNC); while (inode->i_state & I_SYNC) { spin_unlock(&inode->i_lock); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&wb->list_lock); __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); - spin_lock(&inode_wb_list_lock); + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); } } /* - * Write out an inode's dirty pages. Called under inode_wb_list_lock and + * Write out an inode's dirty pages. Called under wb->list_lock and * inode->i_lock. Either the caller has an active reference on the inode or * the inode has I_WILL_FREE set. * @@ -343,13 +349,15 @@ static void inode_wait_for_writeback(struct inode *inode) * livelocks, etc. */ static int -writeback_single_inode(struct inode *inode, struct writeback_control *wbc) +writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, + struct writeback_control *wbc) { struct address_space *mapping = inode->i_mapping; + long nr_to_write = wbc->nr_to_write; unsigned dirty; int ret; - assert_spin_locked(&inode_wb_list_lock); + assert_spin_locked(&wb->list_lock); assert_spin_locked(&inode->i_lock); if (!atomic_read(&inode->i_count)) @@ -367,14 +375,16 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * completed a full scan of b_io. */ if (wbc->sync_mode != WB_SYNC_ALL) { - requeue_io(inode); + requeue_io(inode, wb); + trace_writeback_single_inode_requeue(inode, wbc, + nr_to_write); return 0; } /* * It's a data-integrity sync. We must wait. */ - inode_wait_for_writeback(inode); + inode_wait_for_writeback(inode, wb); } BUG_ON(inode->i_state & I_SYNC); @@ -383,7 +393,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) inode->i_state |= I_SYNC; inode->i_state &= ~I_DIRTY_PAGES; spin_unlock(&inode->i_lock); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&wb->list_lock); ret = do_writepages(mapping, wbc); @@ -414,10 +424,19 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) ret = err; } - spin_lock(&inode_wb_list_lock); + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); inode->i_state &= ~I_SYNC; if (!(inode->i_state & I_FREEING)) { + /* + * Sync livelock prevention. Each inode is tagged and synced in + * one shot. If still dirty, it will be redirty_tail()'ed below. + * Update the dirty time to prevent enqueue and sync it again. + */ + if ((inode->i_state & I_DIRTY) && + (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) + inode->dirtied_when = jiffies; + if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) { /* * We didn't write back all the pages. nfs_writepages() @@ -428,7 +447,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) /* * slice used up: queue for next turn */ - requeue_io(inode); + requeue_io(inode, wb); } else { /* * Writeback blocked by something other than @@ -437,7 +456,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * retrying writeback of the dirty page/inode * that cannot be performed immediately. */ - redirty_tail(inode); + redirty_tail(inode, wb); } } else if (inode->i_state & I_DIRTY) { /* @@ -446,7 +465,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * submission or metadata updates after data IO * completion. */ - redirty_tail(inode); + redirty_tail(inode, wb); } else { /* * The inode is clean. At this point we either have @@ -457,9 +476,41 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) } } inode_sync_complete(inode); + trace_writeback_single_inode(inode, wbc, nr_to_write); return ret; } +static long writeback_chunk_size(struct backing_dev_info *bdi, + struct wb_writeback_work *work) +{ + long pages; + + /* + * WB_SYNC_ALL mode does livelock avoidance by syncing dirty + * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX + * here avoids calling into writeback_inodes_wb() more than once. + * + * The intended call sequence for WB_SYNC_ALL writeback is: + * + * wb_writeback() + * writeback_sb_inodes() <== called only once + * write_cache_pages() <== called once for each inode + * (quickly) tag currently dirty pages + * (maybe slowly) sync all tagged pages + */ + if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) + pages = LONG_MAX; + else { + pages = min(bdi->avg_write_bandwidth / 2, + global_dirty_limit / DIRTY_SCOPE); + pages = min(pages, work->nr_pages); + pages = round_down(pages + MIN_WRITEBACK_PAGES, + MIN_WRITEBACK_PAGES); + } + + return pages; +} + /* * Write a portion of b_io inodes which belong to @sb. * @@ -467,24 +518,36 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * inodes. Otherwise write only ones which go sequentially * in reverse order. * - * Return 1, if the caller writeback routine should be - * interrupted. Otherwise return 0. + * Return the number of pages and/or inodes written. */ -static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, - struct writeback_control *wbc, bool only_this_sb) +static long writeback_sb_inodes(struct super_block *sb, + struct bdi_writeback *wb, + struct wb_writeback_work *work) { + struct writeback_control wbc = { + .sync_mode = work->sync_mode, + .tagged_writepages = work->tagged_writepages, + .for_kupdate = work->for_kupdate, + .for_background = work->for_background, + .range_cyclic = work->range_cyclic, + .range_start = 0, + .range_end = LLONG_MAX, + }; + unsigned long start_time = jiffies; + long write_chunk; + long wrote = 0; /* count both pages and inodes */ + while (!list_empty(&wb->b_io)) { - long pages_skipped; struct inode *inode = wb_inode(wb->b_io.prev); if (inode->i_sb != sb) { - if (only_this_sb) { + if (work->sb) { /* * We only want to write back data for this * superblock, move all inodes not belonging * to it back onto the dirty list. */ - redirty_tail(inode); + redirty_tail(inode, wb); continue; } @@ -493,7 +556,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, * Bounce back to the caller to unpin this and * pin the next superblock. */ - return 0; + break; } /* @@ -504,95 +567,91 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, spin_lock(&inode->i_lock); if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { spin_unlock(&inode->i_lock); - requeue_io(inode); + redirty_tail(inode, wb); continue; } - - /* - * Was this inode dirtied after sync_sb_inodes was called? - * This keeps sync from extra jobs and livelock. - */ - if (inode_dirtied_after(inode, wbc->wb_start)) { - spin_unlock(&inode->i_lock); - return 1; - } - __iget(inode); + write_chunk = writeback_chunk_size(wb->bdi, work); + wbc.nr_to_write = write_chunk; + wbc.pages_skipped = 0; - pages_skipped = wbc->pages_skipped; - writeback_single_inode(inode, wbc); - if (wbc->pages_skipped != pages_skipped) { + writeback_single_inode(inode, wb, &wbc); + + work->nr_pages -= write_chunk - wbc.nr_to_write; + wrote += write_chunk - wbc.nr_to_write; + if (!(inode->i_state & I_DIRTY)) + wrote++; + if (wbc.pages_skipped) { /* * writeback is not making progress due to locked * buffers. Skip this inode for now. */ - redirty_tail(inode); + redirty_tail(inode, wb); } spin_unlock(&inode->i_lock); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&wb->list_lock); iput(inode); cond_resched(); - spin_lock(&inode_wb_list_lock); - if (wbc->nr_to_write <= 0) { - wbc->more_io = 1; - return 1; + spin_lock(&wb->list_lock); + /* + * bail out to wb_writeback() often enough to check + * background threshold and other termination conditions. + */ + if (wrote) { + if (time_is_before_jiffies(start_time + HZ / 10UL)) + break; + if (work->nr_pages <= 0) + break; } - if (!list_empty(&wb->b_more_io)) - wbc->more_io = 1; } - /* b_io is empty */ - return 1; + return wrote; } -void writeback_inodes_wb(struct bdi_writeback *wb, - struct writeback_control *wbc) +static long __writeback_inodes_wb(struct bdi_writeback *wb, + struct wb_writeback_work *work) { - int ret = 0; - - if (!wbc->wb_start) - wbc->wb_start = jiffies; /* livelock avoidance */ - spin_lock(&inode_wb_list_lock); - if (!wbc->for_kupdate || list_empty(&wb->b_io)) - queue_io(wb, wbc->older_than_this); + unsigned long start_time = jiffies; + long wrote = 0; while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); struct super_block *sb = inode->i_sb; if (!grab_super_passive(sb)) { - requeue_io(inode); + requeue_io(inode, wb); continue; } - ret = writeback_sb_inodes(sb, wb, wbc, false); + wrote += writeback_sb_inodes(sb, wb, work); drop_super(sb); - if (ret) - break; + /* refer to the same tests at the end of writeback_sb_inodes */ + if (wrote) { + if (time_is_before_jiffies(start_time + HZ / 10UL)) + break; + if (work->nr_pages <= 0) + break; + } } - spin_unlock(&inode_wb_list_lock); /* Leave any unwritten inodes on b_io */ + return wrote; } -static void __writeback_inodes_sb(struct super_block *sb, - struct bdi_writeback *wb, struct writeback_control *wbc) +long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages) { - WARN_ON(!rwsem_is_locked(&sb->s_umount)); + struct wb_writeback_work work = { + .nr_pages = nr_pages, + .sync_mode = WB_SYNC_NONE, + .range_cyclic = 1, + }; - spin_lock(&inode_wb_list_lock); - if (!wbc->for_kupdate || list_empty(&wb->b_io)) - queue_io(wb, wbc->older_than_this); - writeback_sb_inodes(sb, wb, wbc, true); - spin_unlock(&inode_wb_list_lock); -} + spin_lock(&wb->list_lock); + if (list_empty(&wb->b_io)) + queue_io(wb, NULL); + __writeback_inodes_wb(wb, &work); + spin_unlock(&wb->list_lock); -/* - * The maximum number of pages to writeout in a single bdi flush/kupdate - * operation. We do this so we don't hold I_SYNC against an inode for - * enormous amounts of time, which would block a userspace task which has - * been forced to throttle against that inode. Also, the code reevaluates - * the dirty each time it has written this many pages. - */ -#define MAX_WRITEBACK_PAGES 1024 + return nr_pages - work.nr_pages; +} static inline bool over_bground_thresh(void) { @@ -605,6 +664,16 @@ static inline bool over_bground_thresh(void) } /* + * Called under wb->list_lock. If there are multiple wb per bdi, + * only the flusher working on the first wb should do it. + */ +static void wb_update_bandwidth(struct bdi_writeback *wb, + unsigned long start_time) +{ + __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, start_time); +} + +/* * Explicit flushing or periodic writeback of "old" data. * * Define "old": the first time one of an inode's pages is dirtied, we mark the @@ -622,47 +691,16 @@ static inline bool over_bground_thresh(void) static long wb_writeback(struct bdi_writeback *wb, struct wb_writeback_work *work) { - struct writeback_control wbc = { - .sync_mode = work->sync_mode, - .older_than_this = NULL, - .for_kupdate = work->for_kupdate, - .for_background = work->for_background, - .range_cyclic = work->range_cyclic, - }; + unsigned long wb_start = jiffies; + long nr_pages = work->nr_pages; unsigned long oldest_jif; - long wrote = 0; - long write_chunk; struct inode *inode; + long progress; - if (wbc.for_kupdate) { - wbc.older_than_this = &oldest_jif; - oldest_jif = jiffies - - msecs_to_jiffies(dirty_expire_interval * 10); - } - if (!wbc.range_cyclic) { - wbc.range_start = 0; - wbc.range_end = LLONG_MAX; - } + oldest_jif = jiffies; + work->older_than_this = &oldest_jif; - /* - * WB_SYNC_ALL mode does livelock avoidance by syncing dirty - * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX - * here avoids calling into writeback_inodes_wb() more than once. - * - * The intended call sequence for WB_SYNC_ALL writeback is: - * - * wb_writeback() - * __writeback_inodes_sb() <== called only once - * write_cache_pages() <== called once for each inode - * (quickly) tag currently dirty pages - * (maybe slowly) sync all tagged pages - */ - if (wbc.sync_mode == WB_SYNC_NONE) - write_chunk = MAX_WRITEBACK_PAGES; - else - write_chunk = LONG_MAX; - - wbc.wb_start = jiffies; /* livelock avoidance */ + spin_lock(&wb->list_lock); for (;;) { /* * Stop writeback when nr_pages has been consumed @@ -687,52 +725,54 @@ static long wb_writeback(struct bdi_writeback *wb, if (work->for_background && !over_bground_thresh()) break; - wbc.more_io = 0; - wbc.nr_to_write = write_chunk; - wbc.pages_skipped = 0; + if (work->for_kupdate) { + oldest_jif = jiffies - + msecs_to_jiffies(dirty_expire_interval * 10); + work->older_than_this = &oldest_jif; + } - trace_wbc_writeback_start(&wbc, wb->bdi); + trace_writeback_start(wb->bdi, work); + if (list_empty(&wb->b_io)) + queue_io(wb, work->older_than_this); if (work->sb) - __writeback_inodes_sb(work->sb, wb, &wbc); + progress = writeback_sb_inodes(work->sb, wb, work); else - writeback_inodes_wb(wb, &wbc); - trace_wbc_writeback_written(&wbc, wb->bdi); + progress = __writeback_inodes_wb(wb, work); + trace_writeback_written(wb->bdi, work); - work->nr_pages -= write_chunk - wbc.nr_to_write; - wrote += write_chunk - wbc.nr_to_write; + wb_update_bandwidth(wb, wb_start); /* - * If we consumed everything, see if we have more + * Did we write something? Try for more + * + * Dirty inodes are moved to b_io for writeback in batches. + * The completion of the current batch does not necessarily + * mean the overall work is done. So we keep looping as long + * as made some progress on cleaning pages or inodes. */ - if (wbc.nr_to_write <= 0) + if (progress) continue; /* - * Didn't write everything and we don't have more IO, bail + * No more inodes for IO, bail */ - if (!wbc.more_io) + if (list_empty(&wb->b_more_io)) break; /* - * Did we write something? Try for more - */ - if (wbc.nr_to_write < write_chunk) - continue; - /* * Nothing written. Wait for some inode to * become available for writeback. Otherwise * we'll just busyloop. */ - spin_lock(&inode_wb_list_lock); if (!list_empty(&wb->b_more_io)) { + trace_writeback_wait(wb->bdi, work); inode = wb_inode(wb->b_more_io.prev); - trace_wbc_writeback_wait(&wbc, wb->bdi); spin_lock(&inode->i_lock); - inode_wait_for_writeback(inode); + inode_wait_for_writeback(inode, wb); spin_unlock(&inode->i_lock); } - spin_unlock(&inode_wb_list_lock); } + spin_unlock(&wb->list_lock); - return wrote; + return nr_pages - work->nr_pages; } /* @@ -1063,10 +1103,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) } spin_unlock(&inode->i_lock); - spin_lock(&inode_wb_list_lock); + spin_lock(&bdi->wb.list_lock); inode->dirtied_when = jiffies; list_move(&inode->i_wb_list, &bdi->wb.b_dirty); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&bdi->wb.list_lock); if (wakeup_bdi) bdi_wakeup_thread_delayed(bdi); @@ -1162,10 +1202,11 @@ void writeback_inodes_sb_nr(struct super_block *sb, unsigned long nr) { DECLARE_COMPLETION_ONSTACK(done); struct wb_writeback_work work = { - .sb = sb, - .sync_mode = WB_SYNC_NONE, - .done = &done, - .nr_pages = nr, + .sb = sb, + .sync_mode = WB_SYNC_NONE, + .tagged_writepages = 1, + .done = &done, + .nr_pages = nr, }; WARN_ON(!rwsem_is_locked(&sb->s_umount)); @@ -1267,6 +1308,7 @@ EXPORT_SYMBOL(sync_inodes_sb); */ int write_inode_now(struct inode *inode, int sync) { + struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; int ret; struct writeback_control wbc = { .nr_to_write = LONG_MAX, @@ -1279,11 +1321,11 @@ int write_inode_now(struct inode *inode, int sync) wbc.nr_to_write = 0; might_sleep(); - spin_lock(&inode_wb_list_lock); + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); - ret = writeback_single_inode(inode, &wbc); + ret = writeback_single_inode(inode, wb, &wbc); spin_unlock(&inode->i_lock); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&wb->list_lock); if (sync) inode_sync_wait(inode); return ret; @@ -1303,13 +1345,14 @@ EXPORT_SYMBOL(write_inode_now); */ int sync_inode(struct inode *inode, struct writeback_control *wbc) { + struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; int ret; - spin_lock(&inode_wb_list_lock); + spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); - ret = writeback_single_inode(inode, wbc); + ret = writeback_single_inode(inode, wb, wbc); spin_unlock(&inode->i_lock); - spin_unlock(&inode_wb_list_lock); + spin_unlock(&wb->list_lock); return ret; } EXPORT_SYMBOL(sync_inode); diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7bb685cdd00..d480d9af46c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1507,7 +1507,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock) pid_t pid = fl->fl_type != F_UNLCK ? current->tgid : 0; int err; - if (fl->fl_lmops && fl->fl_lmops->fl_grant) { + if (fl->fl_lmops && fl->fl_lmops->lm_grant) { /* NLM needs asynchronous locks, which we don't support yet */ return -ENOLCK; } diff --git a/fs/generic_acl.c b/fs/generic_acl.c index 70e90b4974c..d5e33a077a6 100644 --- a/fs/generic_acl.c +++ b/fs/generic_acl.c @@ -132,31 +132,17 @@ generic_acl_init(struct inode *inode, struct inode *dir) if (!S_ISLNK(inode->i_mode)) acl = get_cached_acl(dir, ACL_TYPE_DEFAULT); if (acl) { - struct posix_acl *clone; - - if (S_ISDIR(inode->i_mode)) { - clone = posix_acl_clone(acl, GFP_KERNEL); - error = -ENOMEM; - if (!clone) - goto cleanup; - set_cached_acl(inode, ACL_TYPE_DEFAULT, clone); - posix_acl_release(clone); - } - clone = posix_acl_clone(acl, GFP_KERNEL); - error = -ENOMEM; - if (!clone) - goto cleanup; - error = posix_acl_create_masq(clone, &mode); - if (error >= 0) { - inode->i_mode = mode; - if (error > 0) - set_cached_acl(inode, ACL_TYPE_ACCESS, clone); - } - posix_acl_release(clone); + if (S_ISDIR(inode->i_mode)) + set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); + error = posix_acl_create(&acl, GFP_KERNEL, &mode); + if (error < 0) + return error; + inode->i_mode = mode; + if (error > 0) + set_cached_acl(inode, ACL_TYPE_ACCESS, acl); } error = 0; -cleanup: posix_acl_release(acl); return error; } @@ -170,44 +156,22 @@ cleanup: int generic_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int error = 0; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; acl = get_cached_acl(inode, ACL_TYPE_ACCESS); if (acl) { - clone = posix_acl_clone(acl, GFP_KERNEL); + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (error) + return error; + set_cached_acl(inode, ACL_TYPE_ACCESS, acl); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) - set_cached_acl(inode, ACL_TYPE_ACCESS, clone); - posix_acl_release(clone); } return error; } -int -generic_check_acl(struct inode *inode, int mask) -{ - if (mask & MAY_NOT_BLOCK) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - return -ECHILD; - } else { - struct posix_acl *acl; - - acl = get_cached_acl(inode, ACL_TYPE_ACCESS); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - } - return -EAGAIN; -} - const struct xattr_handler generic_acl_access_handler = { .prefix = POSIX_ACL_XATTR_ACCESS, .flags = ACL_TYPE_ACCESS, diff --git a/fs/gfs2/acl.c b/fs/gfs2/acl.c index 8ef1079f166..884c9af0542 100644 --- a/fs/gfs2/acl.c +++ b/fs/gfs2/acl.c @@ -67,36 +67,9 @@ static struct posix_acl *gfs2_acl_get(struct gfs2_inode *ip, int type) return acl; } -/** - * gfs2_check_acl - Check an ACL to see if we're allowed to do something - * @inode: the file we want to do something to - * @mask: what we want to do - * - * Returns: errno - */ - -int gfs2_check_acl(struct inode *inode, int mask) +struct posix_acl *gfs2_get_acl(struct inode *inode, int type) { - struct posix_acl *acl; - int error; - - if (mask & MAY_NOT_BLOCK) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - return -ECHILD; - return -EAGAIN; - } - - acl = gfs2_acl_get(GFS2_I(inode), ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - - if (acl) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; + return gfs2_acl_get(GFS2_I(inode), type); } static int gfs2_set_mode(struct inode *inode, mode_t mode) @@ -143,7 +116,7 @@ out: int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) { struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct posix_acl *acl, *clone; + struct posix_acl *acl; mode_t mode = inode->i_mode; int error = 0; @@ -168,16 +141,10 @@ int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode) goto out; } - clone = posix_acl_clone(acl, GFP_NOFS); - error = -ENOMEM; - if (!clone) - goto out; - posix_acl_release(acl); - acl = clone; - - error = posix_acl_create_masq(acl, &mode); + error = posix_acl_create(&acl, GFP_NOFS, &mode); if (error < 0) - goto out; + return error; + if (error == 0) goto munge; @@ -193,7 +160,7 @@ out: int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; char *data; unsigned int len; int error; @@ -204,25 +171,19 @@ int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr) if (!acl) return gfs2_setattr_simple(ip, attr); - clone = posix_acl_clone(acl, GFP_NOFS); + error = posix_acl_chmod(&acl, GFP_NOFS, attr->ia_mode); + if (error) + return error; + + len = posix_acl_to_xattr(acl, NULL, 0); + data = kmalloc(len, GFP_NOFS); error = -ENOMEM; - if (!clone) + if (data == NULL) goto out; - posix_acl_release(acl); - acl = clone; - - error = posix_acl_chmod_masq(acl, attr->ia_mode); - if (!error) { - len = posix_acl_to_xattr(acl, NULL, 0); - data = kmalloc(len, GFP_NOFS); - error = -ENOMEM; - if (data == NULL) - goto out; - posix_acl_to_xattr(acl, data, len); - error = gfs2_xattr_acl_chmod(ip, attr, data); - kfree(data); - set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); - } + posix_acl_to_xattr(acl, data, len); + error = gfs2_xattr_acl_chmod(ip, attr, data); + kfree(data); + set_cached_acl(&ip->i_inode, ACL_TYPE_ACCESS, acl); out: posix_acl_release(acl); diff --git a/fs/gfs2/acl.h b/fs/gfs2/acl.h index b522b0cb39e..0da38dc7efe 100644 --- a/fs/gfs2/acl.h +++ b/fs/gfs2/acl.h @@ -16,7 +16,7 @@ #define GFS2_POSIX_ACL_DEFAULT "posix_acl_default" #define GFS2_ACL_MAX_ENTRIES 25 -extern int gfs2_check_acl(struct inode *inode, int mask); +extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type); extern int gfs2_acl_create(struct gfs2_inode *dip, struct inode *inode); extern int gfs2_acl_chmod(struct gfs2_inode *ip, struct iattr *attr); extern const struct xattr_handler gfs2_xattr_system_handler; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 0fb51a96eff..900cf986aad 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1846,7 +1846,7 @@ const struct inode_operations gfs2_file_iops = { .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, - .check_acl = gfs2_check_acl, + .get_acl = gfs2_get_acl, }; const struct inode_operations gfs2_dir_iops = { @@ -1867,7 +1867,7 @@ const struct inode_operations gfs2_dir_iops = { .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, - .check_acl = gfs2_check_acl, + .get_acl = gfs2_get_acl, }; const struct inode_operations gfs2_symlink_iops = { @@ -1882,6 +1882,6 @@ const struct inode_operations gfs2_symlink_iops = { .listxattr = gfs2_listxattr, .removexattr = gfs2_removexattr, .fiemap = gfs2_fiemap, - .check_acl = gfs2_check_acl, + .get_acl = gfs2_get_acl, }; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index 29e1ace7953..8a139ff1919 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -16,7 +16,7 @@ #include <linux/gfs2_ondisk.h> #include <linux/rcupdate.h> #include <linux/rculist_bl.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "gfs2.h" #include "incore.h" diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 7aafeb8fa30..87b6e0421c1 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -94,7 +94,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma) vma->vm_flags |= VM_HUGETLB | VM_RESERVED; vma->vm_ops = &hugetlb_vm_ops; - if (vma->vm_pgoff & ~(huge_page_mask(h) >> PAGE_SHIFT)) + if (vma->vm_pgoff & (~huge_page_mask(h) >> PAGE_SHIFT)) return -EINVAL; vma_len = (loff_t)(vma->vm_end - vma->vm_start); @@ -1030,6 +1030,7 @@ static int __init init_hugetlbfs_fs(void) static void __exit exit_hugetlbfs_fs(void) { kmem_cache_destroy(hugetlbfs_inode_cachep); + kern_unmount(hugetlbfs_vfsmount); unregister_filesystem(&hugetlbfs_fs_type); bdi_destroy(&hugetlbfs_backing_dev_info); } diff --git a/fs/inode.c b/fs/inode.c index 96c77b81167..a48fa5355fb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -37,7 +37,7 @@ * inode->i_sb->s_inode_lru, inode->i_lru * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list - * inode_wb_list_lock protects: + * bdi->wb.list_lock protects: * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list * inode_hash_lock protects: * inode_hashtable, inode->i_hash @@ -48,7 +48,7 @@ * inode->i_lock * inode->i_sb->s_inode_lru_lock * - * inode_wb_list_lock + * bdi->wb.list_lock * inode->i_lock * * inode_hash_lock @@ -65,7 +65,6 @@ static struct hlist_head *inode_hashtable __read_mostly; static __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_hash_lock); __cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_sb_list_lock); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(inode_wb_list_lock); /* * Empty aops. Can be used for the cases where the user does not diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index e4b87bc1fa5..f94fc48ff3a 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -22,6 +22,8 @@ #include <linux/jbd.h> #include <linux/errno.h> #include <linux/slab.h> +#include <linux/blkdev.h> +#include <trace/events/jbd.h> /* * Unlink a buffer from a transaction checkpoint list. @@ -95,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh) if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh) && !buffer_write_io_error(bh)) { + /* + * Get our reference so that bh cannot be freed before + * we unlock it + */ + get_bh(bh); JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); BUFFER_TRACE(bh, "release"); __brelse(bh); } else { @@ -220,8 +226,8 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + get_bh(bh); if (buffer_locked(bh)) { - get_bh(bh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); wait_on_buffer(bh); @@ -240,7 +246,6 @@ restart: */ released = __journal_remove_checkpoint(jh); jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); __brelse(bh); } @@ -253,9 +258,12 @@ static void __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) { int i; + struct blk_plug plug; + blk_start_plug(&plug); for (i = 0; i < *batch_count; i++) - write_dirty_buffer(bhs[i], WRITE); + write_dirty_buffer(bhs[i], WRITE_SYNC); + blk_finish_plug(&plug); for (i = 0; i < *batch_count; i++) { struct buffer_head *bh = bhs[i]; @@ -304,12 +312,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, ret = 1; if (unlikely(buffer_write_io_error(bh))) ret = -EIO; + get_bh(bh); J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __journal_remove_checkpoint(jh); spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); __brelse(bh); } else { /* @@ -358,6 +366,7 @@ int log_do_checkpoint(journal_t *journal) * journal straight away. */ result = cleanup_journal_tail(journal); + trace_jbd_checkpoint(journal, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; @@ -503,6 +512,7 @@ int cleanup_journal_tail(journal_t *journal) if (blocknr < journal->j_tail) freed = freed + journal->j_last - journal->j_first; + trace_jbd_cleanup_journal_tail(journal, first_tid, blocknr, freed); jbd_debug(1, "Cleaning journal tail from %d to %d (offset %u), " "freeing %u\n", @@ -523,9 +533,9 @@ int cleanup_journal_tail(journal_t *journal) /* * journal_clean_one_cp_list * - * Find all the written-back checkpoint buffers in the given list and release them. + * Find all the written-back checkpoint buffers in the given list and release + * them. * - * Called with the journal locked. * Called with j_list_lock held. * Returns number of bufers reaped (for debug) */ @@ -632,8 +642,8 @@ out: * checkpoint lists. * * The function returns 1 if it frees the transaction, 0 otherwise. + * The function can free jh and bh. * - * This function is called with the journal locked. * This function is called with j_list_lock held. * This function is called with jbd_lock_bh_state(jh2bh(jh)) */ @@ -652,13 +662,14 @@ int __journal_remove_checkpoint(struct journal_head *jh) } journal = transaction->t_journal; + JBUFFER_TRACE(jh, "removing from transaction"); __buffer_unlink(jh); jh->b_cp_transaction = NULL; + journal_put_journal_head(jh); if (transaction->t_checkpoint_list != NULL || transaction->t_checkpoint_io_list != NULL) goto out; - JBUFFER_TRACE(jh, "transaction has no more buffers"); /* * There is one special case to worry about: if we have just pulled the @@ -669,10 +680,8 @@ int __journal_remove_checkpoint(struct journal_head *jh) * The locking here around t_state is a bit sleazy. * See the comment at the end of journal_commit_transaction(). */ - if (transaction->t_state != T_FINISHED) { - JBUFFER_TRACE(jh, "belongs to running/committing transaction"); + if (transaction->t_state != T_FINISHED) goto out; - } /* OK, that was the last buffer for the transaction: we can now safely remove this transaction from the log */ @@ -684,7 +693,6 @@ int __journal_remove_checkpoint(struct journal_head *jh) wake_up(&journal->j_wait_logspace); ret = 1; out: - JBUFFER_TRACE(jh, "exit"); return ret; } @@ -703,6 +711,8 @@ void __journal_insert_checkpoint(struct journal_head *jh, J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh))); J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + /* Get reference for checkpointing transaction */ + journal_grab_journal_head(jh2bh(jh)); jh->b_cp_transaction = transaction; if (!transaction->t_checkpoint_list) { @@ -752,6 +762,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction) J_ASSERT(journal->j_committing_transaction != transaction); J_ASSERT(journal->j_running_transaction != transaction); + trace_jbd_drop_transaction(journal, transaction); jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid); kfree(transaction); } diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 72ffa974b0b..8799207df05 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -21,6 +21,7 @@ #include <linux/pagemap.h> #include <linux/bio.h> #include <linux/blkdev.h> +#include <trace/events/jbd.h> /* * Default IO end handler for temporary BJ_IO buffer_heads. @@ -204,6 +205,8 @@ write_out_data: if (!trylock_buffer(bh)) { BUFFER_TRACE(bh, "needs blocking lock"); spin_unlock(&journal->j_list_lock); + trace_jbd_do_submit_data(journal, + commit_transaction); /* Write out all data to prevent deadlocks */ journal_do_submit_data(wbuf, bufs, write_op); bufs = 0; @@ -236,6 +239,8 @@ write_out_data: jbd_unlock_bh_state(bh); if (bufs == journal->j_wbufsize) { spin_unlock(&journal->j_list_lock); + trace_jbd_do_submit_data(journal, + commit_transaction); journal_do_submit_data(wbuf, bufs, write_op); bufs = 0; goto write_out_data; @@ -253,10 +258,6 @@ write_out_data: jbd_unlock_bh_state(bh); if (locked) unlock_buffer(bh); - journal_remove_journal_head(bh); - /* One for our safety reference, other for - * journal_remove_journal_head() */ - put_bh(bh); release_data_buffer(bh); } @@ -266,6 +267,7 @@ write_out_data: } } spin_unlock(&journal->j_list_lock); + trace_jbd_do_submit_data(journal, commit_transaction); journal_do_submit_data(wbuf, bufs, write_op); return err; @@ -316,12 +318,14 @@ void journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); + trace_jbd_start_commit(journal, commit_transaction); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); spin_lock(&journal->j_state_lock); commit_transaction->t_state = T_LOCKED; + trace_jbd_commit_locking(journal, commit_transaction); spin_lock(&commit_transaction->t_handle_lock); while (commit_transaction->t_updates) { DEFINE_WAIT(wait); @@ -392,6 +396,7 @@ void journal_commit_transaction(journal_t *journal) */ journal_switch_revoke_table(journal); + trace_jbd_commit_flushing(journal, commit_transaction); commit_transaction->t_state = T_FLUSH; journal->j_committing_transaction = commit_transaction; journal->j_running_transaction = NULL; @@ -446,14 +451,9 @@ void journal_commit_transaction(journal_t *journal) } if (buffer_jbd(bh) && bh2jh(bh) == jh && jh->b_transaction == commit_transaction && - jh->b_jlist == BJ_Locked) { + jh->b_jlist == BJ_Locked) __journal_unfile_buffer(jh); - jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - put_bh(bh); - } else { - jbd_unlock_bh_state(bh); - } + jbd_unlock_bh_state(bh); release_data_buffer(bh); cond_resched_lock(&journal->j_list_lock); } @@ -493,6 +493,7 @@ void journal_commit_transaction(journal_t *journal) commit_transaction->t_state = T_COMMIT; spin_unlock(&journal->j_state_lock); + trace_jbd_commit_logging(journal, commit_transaction); J_ASSERT(commit_transaction->t_nr_buffers <= commit_transaction->t_outstanding_credits); @@ -797,10 +798,16 @@ restart_loop: while (commit_transaction->t_forget) { transaction_t *cp_transaction; struct buffer_head *bh; + int try_to_free = 0; jh = commit_transaction->t_forget; spin_unlock(&journal->j_list_lock); bh = jh2bh(jh); + /* + * Get a reference so that bh cannot be freed before we are + * done with it. + */ + get_bh(bh); jbd_lock_bh_state(bh); J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || jh->b_transaction == journal->j_running_transaction); @@ -858,28 +865,27 @@ restart_loop: __journal_insert_checkpoint(jh, commit_transaction); if (is_journal_aborted(journal)) clear_buffer_jbddirty(bh); - JBUFFER_TRACE(jh, "refile for checkpoint writeback"); - __journal_refile_buffer(jh); - jbd_unlock_bh_state(bh); } else { J_ASSERT_BH(bh, !buffer_dirty(bh)); - /* The buffer on BJ_Forget list and not jbddirty means + /* + * The buffer on BJ_Forget list and not jbddirty means * it has been freed by this transaction and hence it * could not have been reallocated until this * transaction has committed. *BUT* it could be * reallocated once we have written all the data to * disk and before we process the buffer on BJ_Forget - * list. */ - JBUFFER_TRACE(jh, "refile or unfile freed buffer"); - __journal_refile_buffer(jh); - if (!jh->b_transaction) { - jbd_unlock_bh_state(bh); - /* needs a brelse */ - journal_remove_journal_head(bh); - release_buffer_page(bh); - } else - jbd_unlock_bh_state(bh); + * list. + */ + if (!jh->b_next_transaction) + try_to_free = 1; } + JBUFFER_TRACE(jh, "refile or unfile freed buffer"); + __journal_refile_buffer(jh); + jbd_unlock_bh_state(bh); + if (try_to_free) + release_buffer_page(bh); + else + __brelse(bh); cond_resched_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); @@ -946,6 +952,7 @@ restart_loop: } spin_unlock(&journal->j_list_lock); + trace_jbd_end_commit(journal, commit_transaction); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index e2d4285fbe9..9fe061fb877 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -38,6 +38,9 @@ #include <linux/debugfs.h> #include <linux/ratelimit.h> +#define CREATE_TRACE_POINTS +#include <trace/events/jbd.h> + #include <asm/uaccess.h> #include <asm/page.h> @@ -1065,6 +1068,7 @@ void journal_update_superblock(journal_t *journal, int wait) } else write_dirty_buffer(bh, WRITE); + trace_jbd_update_superblock_end(journal, wait); out: /* If we have just flushed the log (by marking s_start==0), then * any future commit will have to be careful to update the @@ -1799,10 +1803,9 @@ static void journal_free_journal_head(struct journal_head *jh) * When a buffer has its BH_JBD bit set it is immune from being released by * core kernel code, mainly via ->b_count. * - * A journal_head may be detached from its buffer_head when the journal_head's - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. - * Various places in JBD call journal_remove_journal_head() to indicate that the - * journal_head can be dropped if needed. + * A journal_head is detached from its buffer_head when the journal_head's + * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint + * transaction (b_cp_transaction) hold their references to b_jcount. * * Various places in the kernel want to attach a journal_head to a buffer_head * _before_ attaching the journal_head to a transaction. To protect the @@ -1815,17 +1818,16 @@ static void journal_free_journal_head(struct journal_head *jh) * (Attach a journal_head if needed. Increments b_jcount) * struct journal_head *jh = journal_add_journal_head(bh); * ... - * jh->b_transaction = xxx; - * journal_put_journal_head(jh); - * - * Now, the journal_head's b_jcount is zero, but it is safe from being released - * because it has a non-zero b_transaction. + * (Get another reference for transaction) + * journal_grab_journal_head(bh); + * jh->b_transaction = xxx; + * (Put original reference) + * journal_put_journal_head(jh); */ /* * Give a buffer_head a journal_head. * - * Doesn't need the journal lock. * May sleep. */ struct journal_head *journal_add_journal_head(struct buffer_head *bh) @@ -1889,61 +1891,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) struct journal_head *jh = bh2jh(bh); J_ASSERT_JH(jh, jh->b_jcount >= 0); - - get_bh(bh); - if (jh->b_jcount == 0) { - if (jh->b_transaction == NULL && - jh->b_next_transaction == NULL && - jh->b_cp_transaction == NULL) { - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing " - "b_frozen_data\n", - __func__); - jbd_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing " - "b_committed_data\n", - __func__); - jbd_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - __brelse(bh); - journal_free_journal_head(jh); - } else { - BUFFER_TRACE(bh, "journal_head was locked"); - } + J_ASSERT_JH(jh, jh->b_transaction == NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); + jbd_free(jh->b_frozen_data, bh->b_size); } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); + jbd_free(jh->b_committed_data, bh->b_size); + } + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); + journal_free_journal_head(jh); } /* - * journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - */ -void journal_remove_journal_head(struct buffer_head *bh) -{ - jbd_lock_bh_journal_head(bh); - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then try to + * Drop a reference on the passed journal_head. If it fell to zero then * release the journal_head from the buffer_head. */ void journal_put_journal_head(struct journal_head *jh) @@ -1953,11 +1923,12 @@ void journal_put_journal_head(struct journal_head *jh) jbd_lock_bh_journal_head(bh); J_ASSERT_JH(jh, jh->b_jcount > 0); --jh->b_jcount; - if (!jh->b_jcount && !jh->b_transaction) { + if (!jh->b_jcount) { __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); __brelse(bh); - } - jbd_unlock_bh_journal_head(bh); + } else + jbd_unlock_bh_journal_head(bh); } /* diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index f7ee81a065d..7e59c6e66f9 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -26,6 +26,7 @@ #include <linux/mm.h> #include <linux/highmem.h> #include <linux/hrtimer.h> +#include <linux/backing-dev.h> static void __journal_temp_unlink_buffer(struct journal_head *jh); @@ -99,11 +100,10 @@ static int start_this_handle(journal_t *journal, handle_t *handle) alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kzalloc(sizeof(*new_transaction), - GFP_NOFS|__GFP_NOFAIL); + new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS); if (!new_transaction) { - ret = -ENOMEM; - goto out; + congestion_wait(BLK_RW_ASYNC, HZ/50); + goto alloc_transaction; } } @@ -696,7 +696,6 @@ repeat: if (!jh->b_transaction) { JBUFFER_TRACE(jh, "no transaction"); J_ASSERT_JH(jh, !jh->b_next_transaction); - jh->b_transaction = transaction; JBUFFER_TRACE(jh, "file as BJ_Reserved"); spin_lock(&journal->j_list_lock); __journal_file_buffer(jh, transaction, BJ_Reserved); @@ -818,7 +817,6 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) * committed and so it's safe to clear the dirty bit. */ clear_buffer_dirty(jh2bh(jh)); - jh->b_transaction = transaction; /* first access by this transaction */ jh->b_modified = 0; @@ -844,8 +842,8 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh) */ JBUFFER_TRACE(jh, "cancelling revoke"); journal_cancel_revoke(handle, jh); - journal_put_journal_head(jh); out: + journal_put_journal_head(jh); return err; } @@ -1069,8 +1067,9 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) ret = -EIO; goto no_journal; } - - if (jh->b_transaction != NULL) { + /* We might have slept so buffer could be refiled now */ + if (jh->b_transaction != NULL && + jh->b_transaction != handle->h_transaction) { JBUFFER_TRACE(jh, "unfile from commit"); __journal_temp_unlink_buffer(jh); /* It still points to the committing @@ -1091,8 +1090,6 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh) if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { JBUFFER_TRACE(jh, "not on correct data list: unfile"); J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); - __journal_temp_unlink_buffer(jh); - jh->b_transaction = handle->h_transaction; JBUFFER_TRACE(jh, "file as data"); __journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); @@ -1300,8 +1297,6 @@ int journal_forget (handle_t *handle, struct buffer_head *bh) __journal_file_buffer(jh, transaction, BJ_Forget); } else { __journal_unfile_buffer(jh); - journal_remove_journal_head(bh); - __brelse(bh); if (!buffer_jbd(bh)) { spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); @@ -1622,19 +1617,32 @@ static void __journal_temp_unlink_buffer(struct journal_head *jh) mark_buffer_dirty(bh); /* Expose it to the VM */ } +/* + * Remove buffer from all transactions. + * + * Called with bh_state lock and j_list_lock + * + * jh and bh may be already freed when this function returns. + */ void __journal_unfile_buffer(struct journal_head *jh) { __journal_temp_unlink_buffer(jh); jh->b_transaction = NULL; + journal_put_journal_head(jh); } void journal_unfile_buffer(journal_t *journal, struct journal_head *jh) { - jbd_lock_bh_state(jh2bh(jh)); + struct buffer_head *bh = jh2bh(jh); + + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); + jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); __journal_unfile_buffer(jh); spin_unlock(&journal->j_list_lock); - jbd_unlock_bh_state(jh2bh(jh)); + jbd_unlock_bh_state(bh); + __brelse(bh); } /* @@ -1661,16 +1669,12 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) /* A written-back ordered data buffer */ JBUFFER_TRACE(jh, "release data"); __journal_unfile_buffer(jh); - journal_remove_journal_head(bh); - __brelse(bh); } } else if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { /* written-back checkpointed metadata buffer */ if (jh->b_jlist == BJ_None) { JBUFFER_TRACE(jh, "remove from checkpoint list"); __journal_remove_checkpoint(jh); - journal_remove_journal_head(bh); - __brelse(bh); } } spin_unlock(&journal->j_list_lock); @@ -1733,7 +1737,7 @@ int journal_try_to_free_buffers(journal_t *journal, /* * We take our own ref against the journal_head here to avoid * having to add tons of locking around each instance of - * journal_remove_journal_head() and journal_put_journal_head(). + * journal_put_journal_head(). */ jh = journal_grab_journal_head(bh); if (!jh) @@ -1770,10 +1774,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) int may_free = 1; struct buffer_head *bh = jh2bh(jh); - __journal_unfile_buffer(jh); - if (jh->b_cp_transaction) { JBUFFER_TRACE(jh, "on running+cp transaction"); + __journal_temp_unlink_buffer(jh); /* * We don't want to write the buffer anymore, clear the * bit so that we don't confuse checks in @@ -1784,8 +1787,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) may_free = 0; } else { JBUFFER_TRACE(jh, "on running transaction"); - journal_remove_journal_head(bh); - __brelse(bh); + __journal_unfile_buffer(jh); } return may_free; } @@ -2070,6 +2072,8 @@ void __journal_file_buffer(struct journal_head *jh, if (jh->b_transaction) __journal_temp_unlink_buffer(jh); + else + journal_grab_journal_head(bh); jh->b_transaction = transaction; switch (jlist) { @@ -2127,9 +2131,10 @@ void journal_file_buffer(struct journal_head *jh, * already started to be used by a subsequent transaction, refile the * buffer on that transaction's metadata list. * - * Called under journal->j_list_lock - * + * Called under j_list_lock * Called under jbd_lock_bh_state(jh2bh(jh)) + * + * jh and bh may be already free when this function returns */ void __journal_refile_buffer(struct journal_head *jh) { @@ -2153,6 +2158,11 @@ void __journal_refile_buffer(struct journal_head *jh) was_dirty = test_clear_buffer_jbddirty(bh); __journal_temp_unlink_buffer(jh); + /* + * We set b_transaction here because b_next_transaction will inherit + * our jh reference and thus __journal_file_buffer() must not take a + * new one. + */ jh->b_transaction = jh->b_next_transaction; jh->b_next_transaction = NULL; if (buffer_freed(bh)) @@ -2169,30 +2179,21 @@ void __journal_refile_buffer(struct journal_head *jh) } /* - * For the unlocked version of this call, also make sure that any - * hanging journal_head is cleaned up if necessary. - * - * __journal_refile_buffer is usually called as part of a single locked - * operation on a buffer_head, in which the caller is probably going to - * be hooking the journal_head onto other lists. In that case it is up - * to the caller to remove the journal_head if necessary. For the - * unlocked journal_refile_buffer call, the caller isn't going to be - * doing anything else to the buffer so we need to do the cleanup - * ourselves to avoid a jh leak. - * - * *** The journal_head may be freed by this call! *** + * __journal_refile_buffer() with necessary locking added. We take our bh + * reference so that we can safely unlock bh. + * + * The jh and bh may be freed by this call. */ void journal_refile_buffer(journal_t *journal, struct journal_head *jh) { struct buffer_head *bh = jh2bh(jh); + /* Get reference so that buffer cannot be freed before we unlock it */ + get_bh(bh); jbd_lock_bh_state(bh); spin_lock(&journal->j_list_lock); - __journal_refile_buffer(jh); jbd_unlock_bh_state(bh); - journal_remove_journal_head(bh); - spin_unlock(&journal->j_list_lock); __brelse(bh); } diff --git a/fs/jffs2/acl.c b/fs/jffs2/acl.c index 3675b3cdee8..27c511a1cf0 100644 --- a/fs/jffs2/acl.c +++ b/fs/jffs2/acl.c @@ -156,7 +156,7 @@ static void *jffs2_acl_to_medium(const struct posix_acl *acl, size_t *size) return ERR_PTR(-EINVAL); } -static struct posix_acl *jffs2_get_acl(struct inode *inode, int type) +struct posix_acl *jffs2_get_acl(struct inode *inode, int type) { struct posix_acl *acl; char *value = NULL; @@ -259,30 +259,11 @@ static int jffs2_set_acl(struct inode *inode, int type, struct posix_acl *acl) return rc; } -int jffs2_check_acl(struct inode *inode, int mask) +int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, mode_t *i_mode) { struct posix_acl *acl; int rc; - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - - acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - rc = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return rc; - } - return -EAGAIN; -} - -int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) -{ - struct posix_acl *acl, *clone; - int rc; - cache_no_acl(inode); if (S_ISLNK(*i_mode)) @@ -298,18 +279,13 @@ int jffs2_init_acl_pre(struct inode *dir_i, struct inode *inode, int *i_mode) if (S_ISDIR(*i_mode)) set_cached_acl(inode, ACL_TYPE_DEFAULT, acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - if (!clone) - return -ENOMEM; - rc = posix_acl_create_masq(clone, (mode_t *)i_mode); - if (rc < 0) { - posix_acl_release(clone); + rc = posix_acl_create(&acl, GFP_KERNEL, i_mode); + if (rc < 0) return rc; - } if (rc > 0) - set_cached_acl(inode, ACL_TYPE_ACCESS, clone); + set_cached_acl(inode, ACL_TYPE_ACCESS, acl); - posix_acl_release(clone); + posix_acl_release(acl); } return 0; } @@ -335,7 +311,7 @@ int jffs2_init_acl_post(struct inode *inode) int jffs2_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int rc; if (S_ISLNK(inode->i_mode)) @@ -343,14 +319,11 @@ int jffs2_acl_chmod(struct inode *inode) acl = jffs2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); + rc = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (rc) + return rc; + rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, acl); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - rc = posix_acl_chmod_masq(clone, inode->i_mode); - if (!rc) - rc = jffs2_set_acl(inode, ACL_TYPE_ACCESS, clone); - posix_acl_release(clone); return rc; } diff --git a/fs/jffs2/acl.h b/fs/jffs2/acl.h index 5e42de8d954..b3421c78d9f 100644 --- a/fs/jffs2/acl.h +++ b/fs/jffs2/acl.h @@ -26,9 +26,9 @@ struct jffs2_acl_header { #ifdef CONFIG_JFFS2_FS_POSIX_ACL -extern int jffs2_check_acl(struct inode *, int); +struct posix_acl *jffs2_get_acl(struct inode *inode, int type); extern int jffs2_acl_chmod(struct inode *); -extern int jffs2_init_acl_pre(struct inode *, struct inode *, int *); +extern int jffs2_init_acl_pre(struct inode *, struct inode *, mode_t *); extern int jffs2_init_acl_post(struct inode *); extern const struct xattr_handler jffs2_acl_access_xattr_handler; @@ -36,7 +36,7 @@ extern const struct xattr_handler jffs2_acl_default_xattr_handler; #else -#define jffs2_check_acl (NULL) +#define jffs2_get_acl (NULL) #define jffs2_acl_chmod(inode) (0) #define jffs2_init_acl_pre(dir_i,inode,mode) (0) #define jffs2_init_acl_post(inode) (0) diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index 5f243cd63af..9659b7c0046 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -56,7 +56,7 @@ const struct inode_operations jffs2_dir_inode_operations = .rmdir = jffs2_rmdir, .mknod = jffs2_mknod, .rename = jffs2_rename, - .check_acl = jffs2_check_acl, + .get_acl = jffs2_get_acl, .setattr = jffs2_setattr, .setxattr = jffs2_setxattr, .getxattr = jffs2_getxattr, diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c index 3989f7e09f7..61e6723535b 100644 --- a/fs/jffs2/file.c +++ b/fs/jffs2/file.c @@ -63,7 +63,7 @@ const struct file_operations jffs2_file_operations = const struct inode_operations jffs2_file_inode_operations = { - .check_acl = jffs2_check_acl, + .get_acl = jffs2_get_acl, .setattr = jffs2_setattr, .setxattr = jffs2_setxattr, .getxattr = jffs2_getxattr, diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 46ad619b612..eeead33d8ef 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -406,7 +406,7 @@ int jffs2_remount_fs (struct super_block *sb, int *flags, char *data) /* jffs2_new_inode: allocate a new inode and inocache, add it to the hash, fill in the raw_inode while you're at it. */ -struct inode *jffs2_new_inode (struct inode *dir_i, int mode, struct jffs2_raw_inode *ri) +struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri) { struct inode *inode; struct super_block *sb = dir_i->i_sb; diff --git a/fs/jffs2/os-linux.h b/fs/jffs2/os-linux.h index 9c252835e8e..526979c607b 100644 --- a/fs/jffs2/os-linux.h +++ b/fs/jffs2/os-linux.h @@ -173,7 +173,7 @@ int jffs2_do_setattr (struct inode *, struct iattr *); struct inode *jffs2_iget(struct super_block *, unsigned long); void jffs2_evict_inode (struct inode *); void jffs2_dirty_inode(struct inode *inode, int flags); -struct inode *jffs2_new_inode (struct inode *dir_i, int mode, +struct inode *jffs2_new_inode (struct inode *dir_i, mode_t mode, struct jffs2_raw_inode *ri); int jffs2_statfs (struct dentry *, struct kstatfs *); int jffs2_remount_fs (struct super_block *, int *, char *); diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 2ab1a0d9121..ee57bac1ba6 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -1041,7 +1041,7 @@ static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_inf /* FIXME: point() */ err = jffs2_flash_read(c, ref_offset(ref), len, &retlen, buf); if (err) { - JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err); + JFFS2_ERROR("can not read %d bytes from 0x%08x, error code: %d.\n", len, ref_offset(ref), err); goto free_out; } diff --git a/fs/jffs2/symlink.c b/fs/jffs2/symlink.c index b955626071c..e3035afb181 100644 --- a/fs/jffs2/symlink.c +++ b/fs/jffs2/symlink.c @@ -20,7 +20,7 @@ const struct inode_operations jffs2_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = jffs2_follow_link, - .check_acl = jffs2_check_acl, + .get_acl = jffs2_get_acl, .setattr = jffs2_setattr, .setxattr = jffs2_setxattr, .getxattr = jffs2_getxattr, diff --git a/fs/jfs/acl.c b/fs/jfs/acl.c index 8a0a0666d5a..b3a32caf2b4 100644 --- a/fs/jfs/acl.c +++ b/fs/jfs/acl.c @@ -27,7 +27,7 @@ #include "jfs_xattr.h" #include "jfs_acl.h" -static struct posix_acl *jfs_get_acl(struct inode *inode, int type) +struct posix_acl *jfs_get_acl(struct inode *inode, int type) { struct posix_acl *acl; char *ea_name; @@ -114,30 +114,9 @@ out: return rc; } -int jfs_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl; - - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - - acl = jfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - int error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return error; - } - - return -EAGAIN; -} - int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) { struct posix_acl *acl = NULL; - struct posix_acl *clone; - mode_t mode; int rc = 0; if (S_ISLNK(inode->i_mode)) @@ -148,25 +127,18 @@ int jfs_init_acl(tid_t tid, struct inode *inode, struct inode *dir) return PTR_ERR(acl); if (acl) { + mode_t mode = inode->i_mode; if (S_ISDIR(inode->i_mode)) { rc = jfs_set_acl(tid, inode, ACL_TYPE_DEFAULT, acl); if (rc) goto cleanup; } - clone = posix_acl_clone(acl, GFP_KERNEL); - if (!clone) { - rc = -ENOMEM; - goto cleanup; - } - mode = inode->i_mode; - rc = posix_acl_create_masq(clone, &mode); - if (rc >= 0) { - inode->i_mode = mode; - if (rc > 0) - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, - clone); - } - posix_acl_release(clone); + rc = posix_acl_create(&acl, GFP_KERNEL, &mode); + if (rc < 0) + goto cleanup; /* posix_acl_release(NULL) is no-op */ + inode->i_mode = mode; + if (rc > 0) + rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); cleanup: posix_acl_release(acl); } else @@ -180,8 +152,9 @@ cleanup: int jfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int rc; + tid_t tid; if (S_ISLNK(inode->i_mode)) return -EOPNOTSUPP; @@ -190,22 +163,18 @@ int jfs_acl_chmod(struct inode *inode) if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - - rc = posix_acl_chmod_masq(clone, inode->i_mode); - if (!rc) { - tid_t tid = txBegin(inode->i_sb, 0); - mutex_lock(&JFS_IP(inode)->commit_mutex); - rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, clone); - if (!rc) - rc = txCommit(tid, 1, &inode, 0); - txEnd(tid); - mutex_unlock(&JFS_IP(inode)->commit_mutex); - } + rc = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (rc) + return rc; - posix_acl_release(clone); + tid = txBegin(inode->i_sb, 0); + mutex_lock(&JFS_IP(inode)->commit_mutex); + rc = jfs_set_acl(tid, inode, ACL_TYPE_ACCESS, acl); + if (!rc) + rc = txCommit(tid, 1, &inode, 0); + txEnd(tid); + mutex_unlock(&JFS_IP(inode)->commit_mutex); + + posix_acl_release(acl); return rc; } diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 7527855b5cc..844f9460cb1 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -140,7 +140,7 @@ const struct inode_operations jfs_file_inode_operations = { .removexattr = jfs_removexattr, .setattr = jfs_setattr, #ifdef CONFIG_JFS_POSIX_ACL - .check_acl = jfs_check_acl, + .get_acl = jfs_get_acl, #endif }; diff --git a/fs/jfs/jfs_acl.h b/fs/jfs/jfs_acl.h index 54e07559878..ad84fe50ca9 100644 --- a/fs/jfs/jfs_acl.h +++ b/fs/jfs/jfs_acl.h @@ -20,7 +20,7 @@ #ifdef CONFIG_JFS_POSIX_ACL -int jfs_check_acl(struct inode *, int); +struct posix_acl *jfs_get_acl(struct inode *inode, int type); int jfs_init_acl(tid_t, struct inode *, struct inode *); int jfs_acl_chmod(struct inode *inode); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 03787ef6a11..29b1f1a2114 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -1537,7 +1537,7 @@ const struct inode_operations jfs_dir_inode_operations = { .removexattr = jfs_removexattr, .setattr = jfs_setattr, #ifdef CONFIG_JFS_POSIX_ACL - .check_acl = jfs_check_acl, + .get_acl = jfs_get_acl, #endif }; diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c index 6e31695d046..f0179c3745d 100644 --- a/fs/lockd/svclock.c +++ b/fs/lockd/svclock.c @@ -632,7 +632,7 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock) /* * This is a callback from the filesystem for VFS file lock requests. - * It will be used if fl_grant is defined and the filesystem can not + * It will be used if lm_grant is defined and the filesystem can not * respond to the request immediately. * For GETLK request it will copy the reply to the nlm_block. * For SETLK or SETLKW request it will get the local posix lock. @@ -719,9 +719,9 @@ static int nlmsvc_same_owner(struct file_lock *fl1, struct file_lock *fl2) } const struct lock_manager_operations nlmsvc_lock_operations = { - .fl_compare_owner = nlmsvc_same_owner, - .fl_notify = nlmsvc_notify_blocked, - .fl_grant = nlmsvc_grant_deferred, + .lm_compare_owner = nlmsvc_same_owner, + .lm_notify = nlmsvc_notify_blocked, + .lm_grant = nlmsvc_grant_deferred, }; /* diff --git a/fs/locks.c b/fs/locks.c index b286539d547..703f545097d 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -160,26 +160,20 @@ EXPORT_SYMBOL_GPL(unlock_flocks); static struct kmem_cache *filelock_cache __read_mostly; -static void locks_init_lock_always(struct file_lock *fl) +static void locks_init_lock_heads(struct file_lock *fl) { - fl->fl_next = NULL; - fl->fl_fasync = NULL; - fl->fl_owner = NULL; - fl->fl_pid = 0; - fl->fl_nspid = NULL; - fl->fl_file = NULL; - fl->fl_flags = 0; - fl->fl_type = 0; - fl->fl_start = fl->fl_end = 0; + INIT_LIST_HEAD(&fl->fl_link); + INIT_LIST_HEAD(&fl->fl_block); + init_waitqueue_head(&fl->fl_wait); } /* Allocate an empty lock structure. */ struct file_lock *locks_alloc_lock(void) { - struct file_lock *fl = kmem_cache_alloc(filelock_cache, GFP_KERNEL); + struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL); if (fl) - locks_init_lock_always(fl); + locks_init_lock_heads(fl); return fl; } @@ -193,8 +187,8 @@ void locks_release_private(struct file_lock *fl) fl->fl_ops = NULL; } if (fl->fl_lmops) { - if (fl->fl_lmops->fl_release_private) - fl->fl_lmops->fl_release_private(fl); + if (fl->fl_lmops->lm_release_private) + fl->fl_lmops->lm_release_private(fl); fl->fl_lmops = NULL; } @@ -215,27 +209,12 @@ EXPORT_SYMBOL(locks_free_lock); void locks_init_lock(struct file_lock *fl) { - INIT_LIST_HEAD(&fl->fl_link); - INIT_LIST_HEAD(&fl->fl_block); - init_waitqueue_head(&fl->fl_wait); - fl->fl_ops = NULL; - fl->fl_lmops = NULL; - locks_init_lock_always(fl); + memset(fl, 0, sizeof(struct file_lock)); + locks_init_lock_heads(fl); } EXPORT_SYMBOL(locks_init_lock); -/* - * Initialises the fields of the file lock which are invariant for - * free file_locks. - */ -static void init_once(void *foo) -{ - struct file_lock *lock = (struct file_lock *) foo; - - locks_init_lock(lock); -} - static void locks_copy_private(struct file_lock *new, struct file_lock *fl) { if (fl->fl_ops) { @@ -444,9 +423,9 @@ static void lease_release_private_callback(struct file_lock *fl) } static const struct lock_manager_operations lease_manager_ops = { - .fl_break = lease_break_callback, - .fl_release_private = lease_release_private_callback, - .fl_change = lease_modify, + .lm_break = lease_break_callback, + .lm_release_private = lease_release_private_callback, + .lm_change = lease_modify, }; /* @@ -499,9 +478,9 @@ static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2) */ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) { - if (fl1->fl_lmops && fl1->fl_lmops->fl_compare_owner) + if (fl1->fl_lmops && fl1->fl_lmops->lm_compare_owner) return fl2->fl_lmops == fl1->fl_lmops && - fl1->fl_lmops->fl_compare_owner(fl1, fl2); + fl1->fl_lmops->lm_compare_owner(fl1, fl2); return fl1->fl_owner == fl2->fl_owner; } @@ -551,8 +530,8 @@ static void locks_wake_up_blocks(struct file_lock *blocker) waiter = list_first_entry(&blocker->fl_block, struct file_lock, fl_block); __locks_delete_block(waiter); - if (waiter->fl_lmops && waiter->fl_lmops->fl_notify) - waiter->fl_lmops->fl_notify(waiter); + if (waiter->fl_lmops && waiter->fl_lmops->lm_notify) + waiter->fl_lmops->lm_notify(waiter); else wake_up(&waiter->fl_wait); } @@ -1239,7 +1218,7 @@ int __break_lease(struct inode *inode, unsigned int mode) fl->fl_type = future; fl->fl_break_time = break_time; /* lease must have lmops break callback */ - fl->fl_lmops->fl_break(fl); + fl->fl_lmops->lm_break(fl); } } @@ -1349,7 +1328,7 @@ int fcntl_getlease(struct file *filp) * @arg: type of lease to obtain * @flp: input - file_lock to use, output - file_lock inserted * - * The (input) flp->fl_lmops->fl_break function is required + * The (input) flp->fl_lmops->lm_break function is required * by break_lease(). * * Called with file_lock_lock held. @@ -1375,7 +1354,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) time_out_leases(inode); - BUG_ON(!(*flp)->fl_lmops->fl_break); + BUG_ON(!(*flp)->fl_lmops->lm_break); if (arg != F_UNLCK) { error = -EAGAIN; @@ -1417,7 +1396,7 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp) goto out; if (my_before != NULL) { - error = lease->fl_lmops->fl_change(my_before, arg); + error = lease->fl_lmops->lm_change(my_before, arg); if (!error) *flp = *my_before; goto out; @@ -1453,7 +1432,7 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease) * @lease: file_lock to use * * Call this to establish a lease on the file. - * The (*lease)->fl_lmops->fl_break operation must be set; if not, + * The (*lease)->fl_lmops->lm_break operation must be set; if not, * break_lease will oops! * * This will call the filesystem's setlease file method, if @@ -1751,10 +1730,10 @@ out: * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX * locks, the ->lock() interface may return asynchronously, before the lock has * been granted or denied by the underlying filesystem, if (and only if) - * fl_grant is set. Callers expecting ->lock() to return asynchronously + * lm_grant is set. Callers expecting ->lock() to return asynchronously * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if) * the request is for a blocking lock. When ->lock() does return asynchronously, - * it must return FILE_LOCK_DEFERRED, and call ->fl_grant() when the lock + * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock * request completes. * If the request is for non-blocking lock the file system should return * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine @@ -1764,7 +1743,7 @@ out: * grants a lock so the VFS can find out which locks are locally held and do * the correct lock cleanup when required. * The underlying filesystem must not drop the kernel lock or call - * ->fl_grant() before returning to the caller with a FILE_LOCK_DEFERRED + * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED * return code. */ int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf) @@ -2333,8 +2312,8 @@ EXPORT_SYMBOL(lock_may_write); static int __init filelock_init(void) { filelock_cache = kmem_cache_create("file_lock_cache", - sizeof(struct file_lock), 0, SLAB_PANIC, - init_once); + sizeof(struct file_lock), 0, SLAB_PANIC, NULL); + return 0; } diff --git a/fs/namei.c b/fs/namei.c index b7fad009bbf..f8c69d37379 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -32,6 +32,7 @@ #include <linux/fcntl.h> #include <linux/device_cgroup.h> #include <linux/fs_struct.h> +#include <linux/posix_acl.h> #include <asm/uaccess.h> #include "internal.h" @@ -173,12 +174,62 @@ void putname(const char *name) EXPORT_SYMBOL(putname); #endif +static int check_acl(struct inode *inode, int mask) +{ +#ifdef CONFIG_FS_POSIX_ACL + struct posix_acl *acl; + + /* + * Under RCU walk, we cannot even do a "get_cached_acl()", + * because that involves locking and getting a refcount on + * a cached ACL. + * + * So the only case we handle during RCU walking is the + * case of a cached "no ACL at all", which needs no locks + * or refcounts. + */ + if (mask & MAY_NOT_BLOCK) { + if (negative_cached_acl(inode, ACL_TYPE_ACCESS)) + return -EAGAIN; + return -ECHILD; + } + + acl = get_cached_acl(inode, ACL_TYPE_ACCESS); + + /* + * A filesystem can force a ACL callback by just never filling the + * ACL cache. But normally you'd fill the cache either at inode + * instantiation time, or on the first ->get_acl call. + * + * If the filesystem doesn't have a get_acl() function at all, we'll + * just create the negative cache entry. + */ + if (acl == ACL_NOT_CACHED) { + if (inode->i_op->get_acl) { + acl = inode->i_op->get_acl(inode, ACL_TYPE_ACCESS); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } else { + set_cached_acl(inode, ACL_TYPE_ACCESS, NULL); + return -EAGAIN; + } + } + + if (acl) { + int error = posix_acl_permission(inode, acl, mask); + posix_acl_release(acl); + return error; + } +#endif + + return -EAGAIN; +} + /* * This does basic POSIX ACL permission checking */ static int acl_permission_check(struct inode *inode, int mask) { - int (*check_acl)(struct inode *inode, int mask); unsigned int mode = inode->i_mode; mask &= MAY_READ | MAY_WRITE | MAY_EXEC | MAY_NOT_BLOCK; @@ -186,11 +237,10 @@ static int acl_permission_check(struct inode *inode, int mask) if (current_user_ns() != inode_userns(inode)) goto other_perms; - if (current_fsuid() == inode->i_uid) + if (likely(current_fsuid() == inode->i_uid)) mode >>= 6; else { - check_acl = inode->i_op->check_acl; - if (IS_POSIXACL(inode) && (mode & S_IRWXG) && check_acl) { + if (IS_POSIXACL(inode) && (mode & S_IRWXG)) { int error = check_acl(inode, mask); if (error != -EAGAIN) return error; diff --git a/fs/namespace.c b/fs/namespace.c index cda50fe9250..22bfe8273c6 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -2721,6 +2721,25 @@ EXPORT_SYMBOL(put_mnt_ns); struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) { - return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); + struct vfsmount *mnt; + mnt = vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); + if (!IS_ERR(mnt)) { + /* + * it is a longterm mount, don't release mnt until + * we unmount before file sys is unregistered + */ + mnt_make_longterm(mnt); + } + return mnt; } EXPORT_SYMBOL_GPL(kern_mount_data); + +void kern_unmount(struct vfsmount *mnt) +{ + /* release long term mount so mount point can be released */ + if (!IS_ERR_OR_NULL(mnt)) { + mnt_make_shortterm(mnt); + mntput(mnt); + } +} +EXPORT_SYMBOL(kern_unmount); diff --git a/fs/nfs/cache_lib.h b/fs/nfs/cache_lib.h index 76f856e284e..7cf6cafcc00 100644 --- a/fs/nfs/cache_lib.h +++ b/fs/nfs/cache_lib.h @@ -6,7 +6,7 @@ #include <linux/completion.h> #include <linux/sunrpc/cache.h> -#include <asm/atomic.h> +#include <linux/atomic.h> /* * Deferred request handling diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index b35d25b98da..1940f1a56a5 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -53,7 +53,7 @@ #include <asm/system.h> #include <asm/uaccess.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include "internal.h" #include "iostat.h" diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 79664a1025a..f20801ae0a1 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -36,6 +36,8 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/nfs_idmap.h> static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) { @@ -59,12 +61,10 @@ static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) #ifdef CONFIG_NFS_USE_NEW_IDMAPPER -#include <linux/slab.h> #include <linux/cred.h> #include <linux/sunrpc/sched.h> #include <linux/nfs4.h> #include <linux/nfs_fs_sb.h> -#include <linux/nfs_idmap.h> #include <linux/keyctl.h> #include <linux/key-type.h> #include <linux/rcupdate.h> @@ -284,18 +284,15 @@ int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, #include <linux/module.h> #include <linux/mutex.h> #include <linux/init.h> -#include <linux/slab.h> #include <linux/socket.h> #include <linux/in.h> #include <linux/sched.h> - #include <linux/sunrpc/clnt.h> #include <linux/workqueue.h> #include <linux/sunrpc/rpc_pipe_fs.h> #include <linux/nfs_fs.h> -#include <linux/nfs_idmap.h> #include "nfs4_fs.h" #define IDMAP_HASH_SZ 128 diff --git a/fs/nfs/nfs3acl.c b/fs/nfs/nfs3acl.c index 27434277165..e49e73107e6 100644 --- a/fs/nfs/nfs3acl.c +++ b/fs/nfs/nfs3acl.c @@ -427,16 +427,12 @@ int nfs3_proc_set_default_acl(struct inode *dir, struct inode *inode, } if (!dfacl) return 0; - acl = posix_acl_clone(dfacl, GFP_KERNEL); - error = -ENOMEM; - if (!acl) - goto out_release_dfacl; - error = posix_acl_create_masq(acl, &mode); + acl = posix_acl_dup(dfacl); + error = posix_acl_create(&acl, GFP_KERNEL, &mode); if (error < 0) - goto out_release_acl; + goto out_release_dfacl; error = nfs3_proc_setacls(inode, acl, S_ISDIR(inode->i_mode) ? dfacl : NULL); -out_release_acl: posix_acl_release(acl); out_release_dfacl: posix_acl_release(dfacl); diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 08579312c57..00e37501fa3 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -1566,8 +1566,7 @@ int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) int status; bool sync = true; - if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking || - wbc->for_background) + if (wbc->sync_mode == WB_SYNC_NONE) sync = false; status = pnfs_layoutcommit_inode(inode, sync); diff --git a/fs/nfsctl.c b/fs/nfsctl.c deleted file mode 100644 index 124e8fcb0dd..00000000000 --- a/fs/nfsctl.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * fs/nfsctl.c - * - * This should eventually move to userland. - * - */ -#include <linux/types.h> -#include <linux/file.h> -#include <linux/fs.h> -#include <linux/nfsd/syscall.h> -#include <linux/cred.h> -#include <linux/sched.h> -#include <linux/linkage.h> -#include <linux/namei.h> -#include <linux/mount.h> -#include <linux/syscalls.h> -#include <asm/uaccess.h> - -/* - * open a file on nfsd fs - */ - -static struct file *do_open(char *name, int flags) -{ - struct vfsmount *mnt; - struct file *file; - - mnt = do_kern_mount("nfsd", 0, "nfsd", NULL); - if (IS_ERR(mnt)) - return (struct file *)mnt; - - file = file_open_root(mnt->mnt_root, mnt, name, flags); - - mntput(mnt); /* drop do_kern_mount reference */ - return file; -} - -static struct { - char *name; int wsize; int rsize; -} map[] = { - [NFSCTL_SVC] = { - .name = ".svc", - .wsize = sizeof(struct nfsctl_svc) - }, - [NFSCTL_ADDCLIENT] = { - .name = ".add", - .wsize = sizeof(struct nfsctl_client) - }, - [NFSCTL_DELCLIENT] = { - .name = ".del", - .wsize = sizeof(struct nfsctl_client) - }, - [NFSCTL_EXPORT] = { - .name = ".export", - .wsize = sizeof(struct nfsctl_export) - }, - [NFSCTL_UNEXPORT] = { - .name = ".unexport", - .wsize = sizeof(struct nfsctl_export) - }, - [NFSCTL_GETFD] = { - .name = ".getfd", - .wsize = sizeof(struct nfsctl_fdparm), - .rsize = NFS_FHSIZE - }, - [NFSCTL_GETFS] = { - .name = ".getfs", - .wsize = sizeof(struct nfsctl_fsparm), - .rsize = sizeof(struct knfsd_fh) - }, -}; - -SYSCALL_DEFINE3(nfsservctl, int, cmd, struct nfsctl_arg __user *, arg, - void __user *, res) -{ - struct file *file; - void __user *p = &arg->u; - int version; - int err; - - if (copy_from_user(&version, &arg->ca_version, sizeof(int))) - return -EFAULT; - - if (version != NFSCTL_VERSION) - return -EINVAL; - - if (cmd < 0 || cmd >= ARRAY_SIZE(map) || !map[cmd].name) - return -EINVAL; - - file = do_open(map[cmd].name, map[cmd].rsize ? O_RDWR : O_WRONLY); - if (IS_ERR(file)) - return PTR_ERR(file); - err = file->f_op->write(file, p, map[cmd].wsize, &file->f_pos); - if (err >= 0 && map[cmd].rsize) - err = file->f_op->read(file, res, map[cmd].rsize, &file->f_pos); - if (err >= 0) - err = 0; - fput(file); - return err; -} diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig index fbb2a5ef581..10e6366608f 100644 --- a/fs/nfsd/Kconfig +++ b/fs/nfsd/Kconfig @@ -28,18 +28,6 @@ config NFSD If unsure, say N. -config NFSD_DEPRECATED - bool "Include support for deprecated syscall interface to NFSD" - depends on NFSD - default y - help - The syscall interface to nfsd was obsoleted in 2.6.0 by a new - filesystem based interface. The old interface is due for removal - in 2.6.40. If you wish to remove the interface before then - say N. - - In unsure, say Y. - config NFSD_V2_ACL bool depends on NFSD diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index d892be61016..93cc9d34c45 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -69,7 +69,7 @@ enum { int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); -int nfsd_cache_lookup(struct svc_rqst *, int); +int nfsd_cache_lookup(struct svc_rqst *); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); #ifdef CONFIG_NFSD_V4 diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c index b9566e46219..f4cc1e2bfc5 100644 --- a/fs/nfsd/export.c +++ b/fs/nfsd/export.c @@ -797,58 +797,6 @@ exp_find_key(svc_client *clp, int fsid_type, u32 *fsidv, struct cache_req *reqp) return ek; } -#ifdef CONFIG_NFSD_DEPRECATED -static int exp_set_key(svc_client *clp, int fsid_type, u32 *fsidv, - struct svc_export *exp) -{ - struct svc_expkey key, *ek; - - key.ek_client = clp; - key.ek_fsidtype = fsid_type; - memcpy(key.ek_fsid, fsidv, key_len(fsid_type)); - key.ek_path = exp->ex_path; - key.h.expiry_time = NEVER; - key.h.flags = 0; - - ek = svc_expkey_lookup(&key); - if (ek) - ek = svc_expkey_update(&key,ek); - if (ek) { - cache_put(&ek->h, &svc_expkey_cache); - return 0; - } - return -ENOMEM; -} - -/* - * Find the client's export entry matching xdev/xino. - */ -static inline struct svc_expkey * -exp_get_key(svc_client *clp, dev_t dev, ino_t ino) -{ - u32 fsidv[3]; - - if (old_valid_dev(dev)) { - mk_fsid(FSID_DEV, fsidv, dev, ino, 0, NULL); - return exp_find_key(clp, FSID_DEV, fsidv, NULL); - } - mk_fsid(FSID_ENCODE_DEV, fsidv, dev, ino, 0, NULL); - return exp_find_key(clp, FSID_ENCODE_DEV, fsidv, NULL); -} - -/* - * Find the client's export entry matching fsid - */ -static inline struct svc_expkey * -exp_get_fsid_key(svc_client *clp, int fsid) -{ - u32 fsidv[2]; - - mk_fsid(FSID_NUM, fsidv, 0, 0, fsid, NULL); - - return exp_find_key(clp, FSID_NUM, fsidv, NULL); -} -#endif static svc_export *exp_get_by_name(svc_client *clp, const struct path *path, struct cache_req *reqp) @@ -890,275 +838,7 @@ static struct svc_export *exp_parent(svc_client *clp, struct path *path) return exp; } -#ifdef CONFIG_NFSD_DEPRECATED -/* - * Hashtable locking. Write locks are placed only by user processes - * wanting to modify export information. - * Write locking only done in this file. Read locking - * needed externally. - */ - -static DECLARE_RWSEM(hash_sem); - -void -exp_readlock(void) -{ - down_read(&hash_sem); -} - -static inline void -exp_writelock(void) -{ - down_write(&hash_sem); -} - -void -exp_readunlock(void) -{ - up_read(&hash_sem); -} - -static inline void -exp_writeunlock(void) -{ - up_write(&hash_sem); -} -#else - -/* hash_sem not needed once deprecated interface is removed */ -void exp_readlock(void) {} -static inline void exp_writelock(void){} -void exp_readunlock(void) {} -static inline void exp_writeunlock(void){} - -#endif - -#ifdef CONFIG_NFSD_DEPRECATED -static void exp_do_unexport(svc_export *unexp); -static int exp_verify_string(char *cp, int max); - -static void exp_fsid_unhash(struct svc_export *exp) -{ - struct svc_expkey *ek; - - if ((exp->ex_flags & NFSEXP_FSID) == 0) - return; - - ek = exp_get_fsid_key(exp->ex_client, exp->ex_fsid); - if (!IS_ERR(ek)) { - sunrpc_invalidate(&ek->h, &svc_expkey_cache); - cache_put(&ek->h, &svc_expkey_cache); - } -} - -static int exp_fsid_hash(svc_client *clp, struct svc_export *exp) -{ - u32 fsid[2]; - - if ((exp->ex_flags & NFSEXP_FSID) == 0) - return 0; - - mk_fsid(FSID_NUM, fsid, 0, 0, exp->ex_fsid, NULL); - return exp_set_key(clp, FSID_NUM, fsid, exp); -} - -static int exp_hash(struct auth_domain *clp, struct svc_export *exp) -{ - u32 fsid[2]; - struct inode *inode = exp->ex_path.dentry->d_inode; - dev_t dev = inode->i_sb->s_dev; - - if (old_valid_dev(dev)) { - mk_fsid(FSID_DEV, fsid, dev, inode->i_ino, 0, NULL); - return exp_set_key(clp, FSID_DEV, fsid, exp); - } - mk_fsid(FSID_ENCODE_DEV, fsid, dev, inode->i_ino, 0, NULL); - return exp_set_key(clp, FSID_ENCODE_DEV, fsid, exp); -} -static void exp_unhash(struct svc_export *exp) -{ - struct svc_expkey *ek; - struct inode *inode = exp->ex_path.dentry->d_inode; - - ek = exp_get_key(exp->ex_client, inode->i_sb->s_dev, inode->i_ino); - if (!IS_ERR(ek)) { - sunrpc_invalidate(&ek->h, &svc_expkey_cache); - cache_put(&ek->h, &svc_expkey_cache); - } -} - -/* - * Export a file system. - */ -int -exp_export(struct nfsctl_export *nxp) -{ - svc_client *clp; - struct svc_export *exp = NULL; - struct svc_export new; - struct svc_expkey *fsid_key = NULL; - struct path path; - int err; - - /* Consistency check */ - err = -EINVAL; - if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || - !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) - goto out; - - dprintk("exp_export called for %s:%s (%x/%ld fl %x).\n", - nxp->ex_client, nxp->ex_path, - (unsigned)nxp->ex_dev, (long)nxp->ex_ino, - nxp->ex_flags); - - /* Try to lock the export table for update */ - exp_writelock(); - - /* Look up client info */ - if (!(clp = auth_domain_find(nxp->ex_client))) - goto out_unlock; - - - /* Look up the dentry */ - err = kern_path(nxp->ex_path, 0, &path); - if (err) - goto out_put_clp; - err = -EINVAL; - - exp = exp_get_by_name(clp, &path, NULL); - - memset(&new, 0, sizeof(new)); - - /* must make sure there won't be an ex_fsid clash */ - if ((nxp->ex_flags & NFSEXP_FSID) && - (!IS_ERR(fsid_key = exp_get_fsid_key(clp, nxp->ex_dev))) && - fsid_key->ek_path.mnt && - (fsid_key->ek_path.mnt != path.mnt || - fsid_key->ek_path.dentry != path.dentry)) - goto finish; - - if (!IS_ERR(exp)) { - /* just a flags/id/fsid update */ - - exp_fsid_unhash(exp); - exp->ex_flags = nxp->ex_flags; - exp->ex_anon_uid = nxp->ex_anon_uid; - exp->ex_anon_gid = nxp->ex_anon_gid; - exp->ex_fsid = nxp->ex_dev; - - err = exp_fsid_hash(clp, exp); - goto finish; - } - - err = check_export(path.dentry->d_inode, &nxp->ex_flags, NULL); - if (err) goto finish; - - err = -ENOMEM; - - dprintk("nfsd: creating export entry %p for client %p\n", exp, clp); - - new.h.expiry_time = NEVER; - new.h.flags = 0; - new.ex_pathname = kstrdup(nxp->ex_path, GFP_KERNEL); - if (!new.ex_pathname) - goto finish; - new.ex_client = clp; - new.ex_path = path; - new.ex_flags = nxp->ex_flags; - new.ex_anon_uid = nxp->ex_anon_uid; - new.ex_anon_gid = nxp->ex_anon_gid; - new.ex_fsid = nxp->ex_dev; - - exp = svc_export_lookup(&new); - if (exp) - exp = svc_export_update(&new, exp); - - if (!exp) - goto finish; - - if (exp_hash(clp, exp) || - exp_fsid_hash(clp, exp)) { - /* failed to create at least one index */ - exp_do_unexport(exp); - cache_flush(); - } else - err = 0; -finish: - kfree(new.ex_pathname); - if (!IS_ERR_OR_NULL(exp)) - exp_put(exp); - if (!IS_ERR_OR_NULL(fsid_key)) - cache_put(&fsid_key->h, &svc_expkey_cache); - path_put(&path); -out_put_clp: - auth_domain_put(clp); -out_unlock: - exp_writeunlock(); -out: - return err; -} - -/* - * Unexport a file system. The export entry has already - * been removed from the client's list of exported fs's. - */ -static void -exp_do_unexport(svc_export *unexp) -{ - sunrpc_invalidate(&unexp->h, &svc_export_cache); - exp_unhash(unexp); - exp_fsid_unhash(unexp); -} - - -/* - * unexport syscall. - */ -int -exp_unexport(struct nfsctl_export *nxp) -{ - struct auth_domain *dom; - svc_export *exp; - struct path path; - int err; - - /* Consistency check */ - if (!exp_verify_string(nxp->ex_path, NFS_MAXPATHLEN) || - !exp_verify_string(nxp->ex_client, NFSCLNT_IDMAX)) - return -EINVAL; - - exp_writelock(); - - err = -EINVAL; - dom = auth_domain_find(nxp->ex_client); - if (!dom) { - dprintk("nfsd: unexport couldn't find %s\n", nxp->ex_client); - goto out_unlock; - } - - err = kern_path(nxp->ex_path, 0, &path); - if (err) - goto out_domain; - - err = -EINVAL; - exp = exp_get_by_name(dom, &path, NULL); - path_put(&path); - if (IS_ERR(exp)) - goto out_domain; - - exp_do_unexport(exp); - exp_put(exp); - err = 0; - -out_domain: - auth_domain_put(dom); - cache_flush(); -out_unlock: - exp_writeunlock(); - return err; -} -#endif /* CONFIG_NFSD_DEPRECATED */ /* * Obtain the root fh on behalf of a client. @@ -1367,7 +1047,6 @@ static void *e_start(struct seq_file *m, loff_t *pos) unsigned hash, export; struct cache_head *ch; - exp_readlock(); read_lock(&svc_export_cache.hash_lock); if (!n--) return SEQ_START_TOKEN; @@ -1418,7 +1097,6 @@ static void e_stop(struct seq_file *m, void *p) __releases(svc_export_cache.hash_lock) { read_unlock(&svc_export_cache.hash_lock); - exp_readunlock(); } static struct flags { @@ -1550,97 +1228,6 @@ const struct seq_operations nfs_exports_op = { .show = e_show, }; -#ifdef CONFIG_NFSD_DEPRECATED -/* - * Add or modify a client. - * Change requests may involve the list of host addresses. The list of - * exports and possibly existing uid maps are left untouched. - */ -int -exp_addclient(struct nfsctl_client *ncp) -{ - struct auth_domain *dom; - int i, err; - struct in6_addr addr6; - - /* First, consistency check. */ - err = -EINVAL; - if (! exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) - goto out; - if (ncp->cl_naddr > NFSCLNT_ADDRMAX) - goto out; - - /* Lock the hashtable */ - exp_writelock(); - - dom = unix_domain_find(ncp->cl_ident); - - err = -ENOMEM; - if (!dom) - goto out_unlock; - - /* Insert client into hashtable. */ - for (i = 0; i < ncp->cl_naddr; i++) { - ipv6_addr_set_v4mapped(ncp->cl_addrlist[i].s_addr, &addr6); - auth_unix_add_addr(&init_net, &addr6, dom); - } - auth_unix_forget_old(dom); - auth_domain_put(dom); - - err = 0; - -out_unlock: - exp_writeunlock(); -out: - return err; -} - -/* - * Delete a client given an identifier. - */ -int -exp_delclient(struct nfsctl_client *ncp) -{ - int err; - struct auth_domain *dom; - - err = -EINVAL; - if (!exp_verify_string(ncp->cl_ident, NFSCLNT_IDMAX)) - goto out; - - /* Lock the hashtable */ - exp_writelock(); - - dom = auth_domain_find(ncp->cl_ident); - /* just make sure that no addresses work - * and that it will expire soon - */ - if (dom) { - err = auth_unix_forget_old(dom); - auth_domain_put(dom); - } - - exp_writeunlock(); -out: - return err; -} - -/* - * Verify that string is non-empty and does not exceed max length. - */ -static int -exp_verify_string(char *cp, int max) -{ - int i; - - for (i = 0; i < max; i++) - if (!cp[i]) - return i; - cp[i] = 0; - printk(KERN_NOTICE "nfsd: couldn't validate string %s\n", cp); - return 0; -} -#endif /* CONFIG_NFSD_DEPRECATED */ /* * Initialize the exports module. @@ -1667,10 +1254,8 @@ nfsd_export_init(void) void nfsd_export_flush(void) { - exp_writelock(); cache_purge(&svc_expkey_cache); cache_purge(&svc_export_cache); - exp_writeunlock(); } /* @@ -1682,12 +1267,9 @@ nfsd_export_shutdown(void) dprintk("nfsd: shutting down export module.\n"); - exp_writelock(); - cache_unregister(&svc_expkey_cache); cache_unregister(&svc_export_cache); svcauth_unix_purge(); - exp_writeunlock(); dprintk("nfsd: export shutdown complete.\n"); } diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c index 7c831a2731f..77e7a5cca88 100644 --- a/fs/nfsd/lockd.c +++ b/fs/nfsd/lockd.c @@ -35,10 +35,8 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp) memcpy((char*)&fh.fh_handle.fh_base, f->data, f->size); fh.fh_export = NULL; - exp_readlock(); nfserr = nfsd_open(rqstp, &fh, S_IFREG, NFSD_MAY_LOCK, filp); fh_put(&fh); - exp_readunlock(); /* We return nlm error codes as nlm doesn't know * about nfsd, but nfsd does know about nlm.. */ diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 3a6dbd70b34..e8077766661 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -291,6 +291,15 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) return nfserr_inval; + /* + * RFC5661 18.51.3 + * Before RECLAIM_COMPLETE done, server should deny new lock + */ + if (nfsd4_has_session(cstate) && + !cstate->session->se_client->cl_firststate && + open->op_claim_type != NFS4_OPEN_CLAIM_PREVIOUS) + return nfserr_grace; + if (nfsd4_has_session(cstate)) copy_clientid(&open->op_clientid, cstate->session); @@ -998,6 +1007,15 @@ struct nfsd4_operation { nfsd4op_func op_func; u32 op_flags; char *op_name; + /* + * We use the DRC for compounds containing non-idempotent + * operations, *except* those that are 4.1-specific (since + * sessions provide their own EOS), and except for stateful + * operations other than setclientid and setclientid_confirm + * (since sequence numbers provide EOS for open, lock, etc in + * the v4.0 case). + */ + bool op_cacheresult; }; static struct nfsd4_operation nfsd4_ops[]; @@ -1042,6 +1060,11 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op) return &nfsd4_ops[op->opnum]; } +bool nfsd4_cache_this_op(struct nfsd4_op *op) +{ + return OPDESC(op)->op_cacheresult; +} + static bool need_wrongsec_check(struct svc_rqst *rqstp) { struct nfsd4_compoundres *resp = rqstp->rq_resp; @@ -1209,7 +1232,6 @@ encode_op: fh_put(&resp->cstate.save_fh); BUG_ON(resp->cstate.replay_owner); out: - nfsd4_release_compoundargs(args); /* Reset deferral mechanism for RPC deferrals */ rqstp->rq_usedeferral = 1; dprintk("nfsv4 compound returned %d\n", ntohl(status)); @@ -1232,6 +1254,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_CREATE] = { .op_func = (nfsd4op_func)nfsd4_create, .op_name = "OP_CREATE", + .op_cacheresult = true, }, [OP_DELEGRETURN] = { .op_func = (nfsd4op_func)nfsd4_delegreturn, @@ -1249,6 +1272,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_LINK] = { .op_func = (nfsd4op_func)nfsd4_link, .op_name = "OP_LINK", + .op_cacheresult = true, }, [OP_LOCK] = { .op_func = (nfsd4op_func)nfsd4_lock, @@ -1322,10 +1346,12 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_REMOVE] = { .op_func = (nfsd4op_func)nfsd4_remove, .op_name = "OP_REMOVE", + .op_cacheresult = true, }, [OP_RENAME] = { .op_name = "OP_RENAME", .op_func = (nfsd4op_func)nfsd4_rename, + .op_cacheresult = true, }, [OP_RENEW] = { .op_func = (nfsd4op_func)nfsd4_renew, @@ -1351,16 +1377,19 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_SETATTR] = { .op_func = (nfsd4op_func)nfsd4_setattr, .op_name = "OP_SETATTR", + .op_cacheresult = true, }, [OP_SETCLIENTID] = { .op_func = (nfsd4op_func)nfsd4_setclientid, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, .op_name = "OP_SETCLIENTID", + .op_cacheresult = true, }, [OP_SETCLIENTID_CONFIRM] = { .op_func = (nfsd4op_func)nfsd4_setclientid_confirm, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS, .op_name = "OP_SETCLIENTID_CONFIRM", + .op_cacheresult = true, }, [OP_VERIFY] = { .op_func = (nfsd4op_func)nfsd4_verify, @@ -1369,6 +1398,7 @@ static struct nfsd4_operation nfsd4_ops[] = { [OP_WRITE] = { .op_func = (nfsd4op_func)nfsd4_write, .op_name = "OP_WRITE", + .op_cacheresult = true, }, [OP_RELEASE_LOCKOWNER] = { .op_func = (nfsd4op_func)nfsd4_release_lockowner, @@ -1402,6 +1432,11 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, .op_name = "OP_SEQUENCE", }, + [OP_DESTROY_CLIENTID] = { + .op_func = NULL, + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP, + .op_name = "OP_DESTROY_CLIENTID", + }, [OP_RECLAIM_COMPLETE] = { .op_func = (nfsd4op_func)nfsd4_reclaim_complete, .op_flags = ALLOWED_WITHOUT_FH, @@ -1412,6 +1447,16 @@ static struct nfsd4_operation nfsd4_ops[] = { .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", }, + [OP_TEST_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_test_stateid, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_TEST_STATEID", + }, + [OP_FREE_STATEID] = { + .op_func = (nfsd4op_func)nfsd4_free_stateid, + .op_flags = ALLOWED_WITHOUT_FH, + .op_name = "OP_FREE_STATEID", + }, }; static const char *nfsd4_op_name(unsigned opnum) @@ -1424,16 +1469,6 @@ static const char *nfsd4_op_name(unsigned opnum) #define nfsd4_voidres nfsd4_voidargs struct nfsd4_voidargs { int dummy; }; -/* - * TODO: At the present time, the NFSv4 server does not do XID caching - * of requests. Implementing XID caching would not be a serious problem, - * although it would require a mild change in interfaces since one - * doesn't know whether an NFSv4 request is idempotent until after the - * XDR decode. However, XID caching totally confuses pynfs (Peter - * Astrand's regression testsuite for NFSv4 servers), which reuses - * XID's liberally, so I've left it unimplemented until pynfs generates - * better XID's. - */ static struct svc_procedure nfsd_procedures4[2] = { [NFSPROC4_NULL] = { .pc_func = (svc_procfunc) nfsd4_proc_null, @@ -1449,6 +1484,7 @@ static struct svc_procedure nfsd_procedures4[2] = { .pc_encode = (kxdrproc_t) nfs4svc_encode_compoundres, .pc_argsize = sizeof(struct nfsd4_compoundargs), .pc_ressize = sizeof(struct nfsd4_compoundres), + .pc_release = nfsd4_release_compoundargs, .pc_cachetype = RC_NOCACHE, .pc_xdrressize = NFSD_BUFSIZE/4, }, diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index e98f3c2e949..3787ec11740 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -37,6 +37,7 @@ #include <linux/slab.h> #include <linux/namei.h> #include <linux/swap.h> +#include <linux/pagemap.h> #include <linux/sunrpc/svcauth_gss.h> #include <linux/sunrpc/clnt.h> #include "xdr4.h" @@ -60,9 +61,12 @@ static u64 current_sessionid = 1; /* forward declarations */ static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags); +static struct nfs4_stateid * search_for_stateid(stateid_t *stid); +static struct nfs4_delegation * search_for_delegation(stateid_t *stid); static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid); static char user_recovery_dirname[PATH_MAX] = "/var/lib/nfs/v4recovery"; static void nfs4_set_recdir(char *recdir); +static int check_for_locks(struct nfs4_file *filp, struct nfs4_stateowner *lowner); /* Locking: */ @@ -381,14 +385,6 @@ static int nfs4_access_to_omode(u32 access) BUG(); } -static int nfs4_access_bmap_to_omode(struct nfs4_stateid *stp) -{ - unsigned int access; - - set_access(&access, stp->st_access_bmap); - return nfs4_access_to_omode(access); -} - static void unhash_generic_stateid(struct nfs4_stateid *stp) { list_del(&stp->st_hash); @@ -398,11 +394,14 @@ static void unhash_generic_stateid(struct nfs4_stateid *stp) static void free_generic_stateid(struct nfs4_stateid *stp) { - int oflag; + int i; if (stp->st_access_bmap) { - oflag = nfs4_access_bmap_to_omode(stp); - nfs4_file_put_access(stp->st_file, oflag); + for (i = 1; i < 4; i++) { + if (test_bit(i, &stp->st_access_bmap)) + nfs4_file_put_access(stp->st_file, + nfs4_access_to_omode(i)); + } } put_nfs4_file(stp->st_file); kmem_cache_free(stateid_slab, stp); @@ -1507,6 +1506,29 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses, return slot->sl_status; } +#define NFSD_MIN_REQ_HDR_SEQ_SZ ((\ + 2 * 2 + /* credential,verifier: AUTH_NULL, length 0 */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* version, opcount, opcode */ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, cache */ \ + 4 ) * sizeof(__be32)) + +#define NFSD_MIN_RESP_HDR_SEQ_SZ ((\ + 2 + /* verifier: AUTH_NULL, length 0 */\ + 1 + /* status */ \ + 1 + /* MIN tag is length with zero, only length */ \ + 3 + /* opcount, opcode, opstatus*/ \ + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \ + /* seqid, slotID, slotID, slotID, status */ \ + 5 ) * sizeof(__be32)) + +static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs fchannel) +{ + return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ + || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ; +} + __be32 nfsd4_create_session(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1575,6 +1597,10 @@ nfsd4_create_session(struct svc_rqst *rqstp, cr_ses->flags &= ~SESSION4_PERSIST; cr_ses->flags &= ~SESSION4_RDMA; + status = nfserr_toosmall; + if (check_forechannel_attrs(cr_ses->fore_channel)) + goto out; + status = nfserr_jukebox; new = alloc_init_session(rqstp, conf, cr_ses); if (!new) @@ -1736,6 +1762,14 @@ static bool nfsd4_session_too_many_ops(struct svc_rqst *rqstp, struct nfsd4_sess return args->opcnt > session->se_fchannel.maxops; } +static bool nfsd4_request_too_big(struct svc_rqst *rqstp, + struct nfsd4_session *session) +{ + struct xdr_buf *xb = &rqstp->rq_arg; + + return xb->len > session->se_fchannel.maxreq_sz; +} + __be32 nfsd4_sequence(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, @@ -1768,6 +1802,10 @@ nfsd4_sequence(struct svc_rqst *rqstp, if (nfsd4_session_too_many_ops(rqstp, session)) goto out; + status = nfserr_req_too_big; + if (nfsd4_request_too_big(rqstp, session)) + goto out; + status = nfserr_badslot; if (seq->slotid >= session->se_fchannel.maxreqs) goto out; @@ -2337,15 +2375,6 @@ out: return ret; } -static inline void -nfs4_file_downgrade(struct nfs4_file *fp, unsigned int share_access) -{ - if (share_access & NFS4_SHARE_ACCESS_WRITE) - nfs4_file_put_access(fp, O_WRONLY); - if (share_access & NFS4_SHARE_ACCESS_READ) - nfs4_file_put_access(fp, O_RDONLY); -} - static void nfsd_break_one_deleg(struct nfs4_delegation *dp) { /* We're assuming the state code never drops its reference @@ -2396,8 +2425,8 @@ int nfsd_change_deleg_cb(struct file_lock **onlist, int arg) } static const struct lock_manager_operations nfsd_lease_mng_ops = { - .fl_break = nfsd_break_deleg_cb, - .fl_change = nfsd_change_deleg_cb, + .lm_break = nfsd_break_deleg_cb, + .lm_change = nfsd_change_deleg_cb, }; @@ -2556,12 +2585,18 @@ static inline int nfs4_access_to_access(u32 nfs4_access) return flags; } -static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file -*fp, struct svc_fh *cur_fh, u32 nfs4_access) +static __be32 nfs4_get_vfs_file(struct svc_rqst *rqstp, struct nfs4_file *fp, + struct svc_fh *cur_fh, struct nfsd4_open *open) { __be32 status; - int oflag = nfs4_access_to_omode(nfs4_access); - int access = nfs4_access_to_access(nfs4_access); + int oflag = nfs4_access_to_omode(open->op_share_access); + int access = nfs4_access_to_access(open->op_share_access); + + /* CLAIM_DELEGATE_CUR is used in response to a broken lease; + * allowing it to break the lease and return EAGAIN leaves the + * client unable to make progress in returning the delegation */ + if (open->op_claim_type == NFS4_OPEN_CLAIM_DELEGATE_CUR) + access |= NFSD_MAY_NOT_BREAK_LEASE; if (!fp->fi_fds[oflag]) { status = nfsd_open(rqstp, cur_fh, S_IFREG, access, @@ -2586,7 +2621,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp, if (stp == NULL) return nfserr_resource; - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open->op_share_access); + status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); if (status) { kmem_cache_free(stateid_slab, stp); return status; @@ -2619,14 +2654,14 @@ nfs4_upgrade_open(struct svc_rqst *rqstp, struct nfs4_file *fp, struct svc_fh *c new_access = !test_bit(op_share_access, &stp->st_access_bmap); if (new_access) { - status = nfs4_get_vfs_file(rqstp, fp, cur_fh, op_share_access); + status = nfs4_get_vfs_file(rqstp, fp, cur_fh, open); if (status) return status; } status = nfsd4_truncate(rqstp, cur_fh, open); if (status) { if (new_access) { - int oflag = nfs4_access_to_omode(new_access); + int oflag = nfs4_access_to_omode(op_share_access); nfs4_file_put_access(fp, oflag); } return status; @@ -3137,6 +3172,37 @@ static int is_delegation_stateid(stateid_t *stateid) return stateid->si_fileid == 0; } +static int is_open_stateid(struct nfs4_stateid *stateid) +{ + return stateid->st_openstp == NULL; +} + +__be32 nfs4_validate_stateid(stateid_t *stateid, int flags) +{ + struct nfs4_stateid *stp = NULL; + __be32 status = nfserr_stale_stateid; + + if (STALE_STATEID(stateid)) + goto out; + + status = nfserr_expired; + stp = search_for_stateid(stateid); + if (!stp) + goto out; + status = nfserr_bad_stateid; + + if (!stp->st_stateowner->so_confirmed) + goto out; + + status = check_stateid_generation(stateid, &stp->st_stateid, flags); + if (status) + goto out; + + status = nfs_ok; +out: + return status; +} + /* * Checks for stateid operations */ @@ -3216,6 +3282,81 @@ out: return status; } +static __be32 +nfsd4_free_delegation_stateid(stateid_t *stateid) +{ + struct nfs4_delegation *dp = search_for_delegation(stateid); + if (dp) + return nfserr_locks_held; + return nfserr_bad_stateid; +} + +static __be32 +nfsd4_free_lock_stateid(struct nfs4_stateid *stp) +{ + if (check_for_locks(stp->st_file, stp->st_stateowner)) + return nfserr_locks_held; + release_lock_stateid(stp); + return nfs_ok; +} + +/* + * Test if the stateid is valid + */ +__be32 +nfsd4_test_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_test_stateid *test_stateid) +{ + test_stateid->ts_has_session = nfsd4_has_session(cstate); + return nfs_ok; +} + +/* + * Free a state id + */ +__be32 +nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, + struct nfsd4_free_stateid *free_stateid) +{ + stateid_t *stateid = &free_stateid->fr_stateid; + struct nfs4_stateid *stp; + __be32 ret; + + nfs4_lock_state(); + if (is_delegation_stateid(stateid)) { + ret = nfsd4_free_delegation_stateid(stateid); + goto out; + } + + stp = search_for_stateid(stateid); + if (!stp) { + ret = nfserr_bad_stateid; + goto out; + } + if (stateid->si_generation != 0) { + if (stateid->si_generation < stp->st_stateid.si_generation) { + ret = nfserr_old_stateid; + goto out; + } + if (stateid->si_generation > stp->st_stateid.si_generation) { + ret = nfserr_bad_stateid; + goto out; + } + } + + if (is_open_stateid(stp)) { + ret = nfserr_locks_held; + goto out; + } else { + ret = nfsd4_free_lock_stateid(stp); + goto out; + } + +out: + nfs4_unlock_state(); + return ret; +} + static inline int setlkflg (int type) { @@ -3384,18 +3525,15 @@ out: return status; } - -/* - * unset all bits in union bitmap (bmap) that - * do not exist in share (from successful OPEN_DOWNGRADE) - */ -static void -reset_union_bmap_access(unsigned long access, unsigned long *bmap) +static inline void nfs4_file_downgrade(struct nfs4_stateid *stp, unsigned int to_access) { int i; + for (i = 1; i < 4; i++) { - if ((i & access) != i) - __clear_bit(i, bmap); + if (test_bit(i, &stp->st_access_bmap) && !(i & to_access)) { + nfs4_file_put_access(stp->st_file, i); + __clear_bit(i, &stp->st_access_bmap); + } } } @@ -3416,7 +3554,6 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, { __be32 status; struct nfs4_stateid *stp; - unsigned int share_access; dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", (int)cstate->current_fh.fh_dentry->d_name.len, @@ -3445,10 +3582,8 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp, stp->st_deny_bmap, od->od_share_deny); goto out; } - set_access(&share_access, stp->st_access_bmap); - nfs4_file_downgrade(stp->st_file, share_access & ~od->od_share_access); + nfs4_file_downgrade(stp, od->od_share_access); - reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); update_stateid(&stp->st_stateid); @@ -3594,6 +3729,14 @@ static struct list_head lock_ownerid_hashtbl[LOCK_HASH_SIZE]; static struct list_head lock_ownerstr_hashtbl[LOCK_HASH_SIZE]; static struct list_head lockstateid_hashtbl[STATEID_HASH_SIZE]; +static int +same_stateid(stateid_t *id_one, stateid_t *id_two) +{ + if (id_one->si_stateownerid != id_two->si_stateownerid) + return 0; + return id_one->si_fileid == id_two->si_fileid; +} + static struct nfs4_stateid * find_stateid(stateid_t *stid, int flags) { @@ -3623,6 +3766,44 @@ find_stateid(stateid_t *stid, int flags) return NULL; } +static struct nfs4_stateid * +search_for_stateid(stateid_t *stid) +{ + struct nfs4_stateid *local; + unsigned int hashval = stateid_hashval(stid->si_stateownerid, stid->si_fileid); + + list_for_each_entry(local, &lockstateid_hashtbl[hashval], st_hash) { + if (same_stateid(&local->st_stateid, stid)) + return local; + } + + list_for_each_entry(local, &stateid_hashtbl[hashval], st_hash) { + if (same_stateid(&local->st_stateid, stid)) + return local; + } + return NULL; +} + +static struct nfs4_delegation * +search_for_delegation(stateid_t *stid) +{ + struct nfs4_file *fp; + struct nfs4_delegation *dp; + struct list_head *pos; + int i; + + for (i = 0; i < FILE_HASH_SIZE; i++) { + list_for_each_entry(fp, &file_hashtbl[i], fi_hash) { + list_for_each(pos, &fp->fi_delegations) { + dp = list_entry(pos, struct nfs4_delegation, dl_perfile); + if (same_stateid(&dp->dl_stateid, stid)) + return dp; + } + } + } + return NULL; +} + static struct nfs4_delegation * find_delegation_stateid(struct inode *ino, stateid_t *stid) { diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 99018110321..c8bf405d19d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -44,13 +44,15 @@ #include <linux/namei.h> #include <linux/statfs.h> #include <linux/utsname.h> +#include <linux/pagemap.h> #include <linux/sunrpc/svcauth_gss.h> #include "idmap.h" #include "acl.h" #include "xdr4.h" #include "vfs.h" - +#include "state.h" +#include "cache.h" #define NFSDDBG_FACILITY NFSDDBG_XDR @@ -131,6 +133,22 @@ xdr_error: \ } \ } while (0) +static void save_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) +{ + savep->p = argp->p; + savep->end = argp->end; + savep->pagelen = argp->pagelen; + savep->pagelist = argp->pagelist; +} + +static void restore_buf(struct nfsd4_compoundargs *argp, struct nfsd4_saved_compoundargs *savep) +{ + argp->p = savep->p; + argp->end = savep->end; + argp->pagelen = savep->pagelen; + argp->pagelist = savep->pagelist; +} + static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) { /* We want more bytes than seem to be available. @@ -1246,6 +1264,19 @@ nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp, } static __be32 +nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp, + struct nfsd4_free_stateid *free_stateid) +{ + DECODE_HEAD; + + READ_BUF(sizeof(stateid_t)); + READ32(free_stateid->fr_stateid.si_generation); + COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t)); + + DECODE_TAIL; +} + +static __be32 nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, struct nfsd4_sequence *seq) { @@ -1261,6 +1292,40 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp, DECODE_TAIL; } +static __be32 +nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid) +{ + unsigned int nbytes; + stateid_t si; + int i; + __be32 *p; + __be32 status; + + READ_BUF(4); + test_stateid->ts_num_ids = ntohl(*p++); + + nbytes = test_stateid->ts_num_ids * sizeof(stateid_t); + if (nbytes > (u32)((char *)argp->end - (char *)argp->p)) + goto xdr_error; + + test_stateid->ts_saved_args = argp; + save_buf(argp, &test_stateid->ts_savedp); + + for (i = 0; i < test_stateid->ts_num_ids; i++) { + status = nfsd4_decode_stateid(argp, &si); + if (status) + return status; + } + + status = 0; +out: + return status; +xdr_error: + dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__); + status = nfserr_bad_xdr; + goto out; +} + static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc) { DECODE_HEAD; @@ -1370,7 +1435,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_EXCHANGE_ID] = (nfsd4_dec)nfsd4_decode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_dec)nfsd4_decode_create_session, [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, @@ -1380,7 +1445,7 @@ static nfsd4_dec nfsd41_dec_ops[] = { [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, - [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_notsupp, + [OP_TEST_STATEID] = (nfsd4_dec)nfsd4_decode_test_stateid, [OP_WANT_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_DESTROY_CLIENTID] = (nfsd4_dec)nfsd4_decode_notsupp, [OP_RECLAIM_COMPLETE] = (nfsd4_dec)nfsd4_decode_reclaim_complete, @@ -1402,6 +1467,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) DECODE_HEAD; struct nfsd4_op *op; struct nfsd4_minorversion_ops *ops; + bool cachethis = false; int i; /* @@ -1483,7 +1549,16 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) argp->opcnt = i+1; break; } + /* + * We'll try to cache the result in the DRC if any one + * op in the compound wants to be cached: + */ + cachethis |= nfsd4_cache_this_op(op); } + /* Sessions make the DRC unnecessary: */ + if (argp->minorversion) + cachethis = false; + argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE; DECODE_TAIL; } @@ -3116,6 +3191,21 @@ nfsd4_encode_destroy_session(struct nfsd4_compoundres *resp, int nfserr, } static __be32 +nfsd4_encode_free_stateid(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_free_stateid *free_stateid) +{ + __be32 *p; + + if (nfserr) + return nfserr; + + RESERVE_SPACE(4); + WRITE32(nfserr); + ADJUST_ARGS(); + return nfserr; +} + +static __be32 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_sequence *seq) { @@ -3138,6 +3228,36 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr, return 0; } +__be32 +nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, + struct nfsd4_test_stateid *test_stateid) +{ + struct nfsd4_compoundargs *argp; + stateid_t si; + __be32 *p; + int i; + int valid; + + restore_buf(test_stateid->ts_saved_args, &test_stateid->ts_savedp); + argp = test_stateid->ts_saved_args; + + RESERVE_SPACE(4); + *p++ = htonl(test_stateid->ts_num_ids); + resp->p = p; + + nfs4_lock_state(); + for (i = 0; i < test_stateid->ts_num_ids; i++) { + nfsd4_decode_stateid(argp, &si); + valid = nfs4_validate_stateid(&si, test_stateid->ts_has_session); + RESERVE_SPACE(4); + *p++ = htonl(valid); + resp->p = p; + } + nfs4_unlock_state(); + + return nfserr; +} + static __be32 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p) { @@ -3196,7 +3316,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_EXCHANGE_ID] = (nfsd4_enc)nfsd4_encode_exchange_id, [OP_CREATE_SESSION] = (nfsd4_enc)nfsd4_encode_create_session, [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_destroy_session, - [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_free_stateid, [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, @@ -3206,7 +3326,7 @@ static nfsd4_enc nfsd4_enc_ops[] = { [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, - [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_noop, + [OP_TEST_STATEID] = (nfsd4_enc)nfsd4_encode_test_stateid, [OP_WANT_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, [OP_DESTROY_CLIENTID] = (nfsd4_enc)nfsd4_encode_noop, [OP_RECLAIM_COMPLETE] = (nfsd4_enc)nfsd4_encode_noop, @@ -3319,8 +3439,11 @@ nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy) return xdr_ressize_check(rqstp, p); } -void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) +int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp) { + struct svc_rqst *rqstp = rq; + struct nfsd4_compoundargs *args = rqstp->rq_argp; + if (args->ops != args->iops) { kfree(args->ops); args->ops = args->iops; @@ -3333,13 +3456,12 @@ void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) tb->release(tb->buf); kfree(tb); } + return 1; } int nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args) { - __be32 status; - args->p = p; args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; args->pagelist = rqstp->rq_arg.pages; @@ -3349,11 +3471,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_comp args->ops = args->iops; args->rqstp = rqstp; - status = nfsd4_decode_compound(args); - if (status) { - nfsd4_release_compoundargs(args); - } - return !status; + return !nfsd4_decode_compound(args); } int diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 4666a209678..2cbac34a55d 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -118,7 +118,7 @@ hash_refile(struct svc_cacherep *rp) * Note that no operation within the loop may sleep. */ int -nfsd_cache_lookup(struct svc_rqst *rqstp, int type) +nfsd_cache_lookup(struct svc_rqst *rqstp) { struct hlist_node *hn; struct hlist_head *rh; @@ -128,6 +128,7 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type) vers = rqstp->rq_vers, proc = rqstp->rq_proc; unsigned long age; + int type = rqstp->rq_cachetype; int rtn; rqstp->rq_cacherep = NULL; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 2b1449dd2f4..c7716143cbd 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -24,15 +24,6 @@ */ enum { NFSD_Root = 1, -#ifdef CONFIG_NFSD_DEPRECATED - NFSD_Svc, - NFSD_Add, - NFSD_Del, - NFSD_Export, - NFSD_Unexport, - NFSD_Getfd, - NFSD_Getfs, -#endif NFSD_List, NFSD_Export_features, NFSD_Fh, @@ -59,15 +50,6 @@ enum { /* * write() for these nodes. */ -#ifdef CONFIG_NFSD_DEPRECATED -static ssize_t write_svc(struct file *file, char *buf, size_t size); -static ssize_t write_add(struct file *file, char *buf, size_t size); -static ssize_t write_del(struct file *file, char *buf, size_t size); -static ssize_t write_export(struct file *file, char *buf, size_t size); -static ssize_t write_unexport(struct file *file, char *buf, size_t size); -static ssize_t write_getfd(struct file *file, char *buf, size_t size); -static ssize_t write_getfs(struct file *file, char *buf, size_t size); -#endif static ssize_t write_filehandle(struct file *file, char *buf, size_t size); static ssize_t write_unlock_ip(struct file *file, char *buf, size_t size); static ssize_t write_unlock_fs(struct file *file, char *buf, size_t size); @@ -83,15 +65,6 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size); #endif static ssize_t (*write_op[])(struct file *, char *, size_t) = { -#ifdef CONFIG_NFSD_DEPRECATED - [NFSD_Svc] = write_svc, - [NFSD_Add] = write_add, - [NFSD_Del] = write_del, - [NFSD_Export] = write_export, - [NFSD_Unexport] = write_unexport, - [NFSD_Getfd] = write_getfd, - [NFSD_Getfs] = write_getfs, -#endif [NFSD_Fh] = write_filehandle, [NFSD_FO_UnlockIP] = write_unlock_ip, [NFSD_FO_UnlockFS] = write_unlock_fs, @@ -130,16 +103,6 @@ static ssize_t nfsctl_transaction_write(struct file *file, const char __user *bu static ssize_t nfsctl_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos) { -#ifdef CONFIG_NFSD_DEPRECATED - static int warned; - if (file->f_dentry->d_name.name[0] == '.' && !warned) { - printk(KERN_INFO - "Warning: \"%s\" uses deprecated NFSD interface: %s." - " This will be removed in 2.6.40\n", - current->comm, file->f_dentry->d_name.name); - warned = 1; - } -#endif if (! file->private_data) { /* An attempt to read a transaction file without writing * causes a 0-byte write so that the file can return @@ -226,303 +189,6 @@ static const struct file_operations pool_stats_operations = { * payload - write methods */ -#ifdef CONFIG_NFSD_DEPRECATED -/** - * write_svc - Start kernel's NFSD server - * - * Deprecated. /proc/fs/nfsd/threads is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_svc - * svc_port: port number of this - * server's listener - * svc_nthreads: number of threads to start - * size: size in bytes of passed in nfsctl_svc - * Output: - * On success: returns zero - * On error: return code is negative errno value - */ -static ssize_t write_svc(struct file *file, char *buf, size_t size) -{ - struct nfsctl_svc *data; - int err; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_svc*) buf; - err = nfsd_svc(data->svc_port, data->svc_nthreads); - if (err < 0) - return err; - return 0; -} - -/** - * write_add - Add or modify client entry in auth unix cache - * - * Deprecated. /proc/net/rpc/auth.unix.ip is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_client - * cl_ident: '\0'-terminated C string - * containing domain name - * of client - * cl_naddr: no. of items in cl_addrlist - * cl_addrlist: array of client addresses - * cl_fhkeytype: ignored - * cl_fhkeylen: ignored - * cl_fhkey: ignored - * size: size in bytes of passed in nfsctl_client - * Output: - * On success: returns zero - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since - * nfsctl_client.cl_addrlist contains only in_addr fields for addresses. - */ -static ssize_t write_add(struct file *file, char *buf, size_t size) -{ - struct nfsctl_client *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_client *)buf; - return exp_addclient(data); -} - -/** - * write_del - Remove client from auth unix cache - * - * Deprecated. /proc/net/rpc/auth.unix.ip is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_client - * cl_ident: '\0'-terminated C string - * containing domain name - * of client - * cl_naddr: ignored - * cl_addrlist: ignored - * cl_fhkeytype: ignored - * cl_fhkeylen: ignored - * cl_fhkey: ignored - * size: size in bytes of passed in nfsctl_client - * Output: - * On success: returns zero - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since - * nfsctl_client.cl_addrlist contains only in_addr fields for addresses. - */ -static ssize_t write_del(struct file *file, char *buf, size_t size) -{ - struct nfsctl_client *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_client *)buf; - return exp_delclient(data); -} - -/** - * write_export - Export part or all of a local file system - * - * Deprecated. /proc/net/rpc/{nfsd.export,nfsd.fh} are preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_export - * ex_client: '\0'-terminated C string - * containing domain name - * of client allowed to access - * this export - * ex_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * ex_dev: fsid to use for this export - * ex_ino: ignored - * ex_flags: export flags for this export - * ex_anon_uid: UID to use for anonymous - * requests - * ex_anon_gid: GID to use for anonymous - * requests - * size: size in bytes of passed in nfsctl_export - * Output: - * On success: returns zero - * On error: return code is negative errno value - */ -static ssize_t write_export(struct file *file, char *buf, size_t size) -{ - struct nfsctl_export *data; - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_export*)buf; - return exp_export(data); -} - -/** - * write_unexport - Unexport a previously exported file system - * - * Deprecated. /proc/net/rpc/{nfsd.export,nfsd.fh} are preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_export - * ex_client: '\0'-terminated C string - * containing domain name - * of client no longer allowed - * to access this export - * ex_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * ex_dev: ignored - * ex_ino: ignored - * ex_flags: ignored - * ex_anon_uid: ignored - * ex_anon_gid: ignored - * size: size in bytes of passed in nfsctl_export - * Output: - * On success: returns zero - * On error: return code is negative errno value - */ -static ssize_t write_unexport(struct file *file, char *buf, size_t size) -{ - struct nfsctl_export *data; - - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_export*)buf; - return exp_unexport(data); -} - -/** - * write_getfs - Get a variable-length NFS file handle by path - * - * Deprecated. /proc/fs/nfsd/filehandle is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_fsparm - * gd_addr: socket address of client - * gd_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * gd_maxlen: maximum size of returned file - * handle - * size: size in bytes of passed in nfsctl_fsparm - * Output: - * On success: passed-in buffer filled with a knfsd_fh structure - * (a variable-length raw NFS file handle); - * return code is the size in bytes of the file handle - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since gd_addr - * is the same size as a struct sockaddr_in. - */ -static ssize_t write_getfs(struct file *file, char *buf, size_t size) -{ - struct nfsctl_fsparm *data; - struct sockaddr_in *sin; - struct auth_domain *clp; - int err = 0; - struct knfsd_fh *res; - struct in6_addr in6; - - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_fsparm*)buf; - err = -EPROTONOSUPPORT; - if (data->gd_addr.sa_family != AF_INET) - goto out; - sin = (struct sockaddr_in *)&data->gd_addr; - if (data->gd_maxlen > NFS3_FHSIZE) - data->gd_maxlen = NFS3_FHSIZE; - - res = (struct knfsd_fh*)buf; - - exp_readlock(); - - ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - - clp = auth_unix_lookup(&init_net, &in6); - if (!clp) - err = -EPERM; - else { - err = exp_rootfh(clp, data->gd_path, res, data->gd_maxlen); - auth_domain_put(clp); - } - exp_readunlock(); - if (err == 0) - err = res->fh_size + offsetof(struct knfsd_fh, fh_base); - out: - return err; -} - -/** - * write_getfd - Get a fixed-length NFS file handle by path (used by mountd) - * - * Deprecated. /proc/fs/nfsd/filehandle is preferred. - * Function remains to support old versions of nfs-utils. - * - * Input: - * buf: struct nfsctl_fdparm - * gd_addr: socket address of client - * gd_path: '\0'-terminated C string - * containing pathname of - * directory in local file system - * gd_version: fdparm structure version - * size: size in bytes of passed in nfsctl_fdparm - * Output: - * On success: passed-in buffer filled with nfsctl_res - * (a fixed-length raw NFS file handle); - * return code is the size in bytes of the file handle - * On error: return code is negative errno value - * - * Note: Only AF_INET client addresses are passed in, since gd_addr - * is the same size as a struct sockaddr_in. - */ -static ssize_t write_getfd(struct file *file, char *buf, size_t size) -{ - struct nfsctl_fdparm *data; - struct sockaddr_in *sin; - struct auth_domain *clp; - int err = 0; - struct knfsd_fh fh; - char *res; - struct in6_addr in6; - - if (size < sizeof(*data)) - return -EINVAL; - data = (struct nfsctl_fdparm*)buf; - err = -EPROTONOSUPPORT; - if (data->gd_addr.sa_family != AF_INET) - goto out; - err = -EINVAL; - if (data->gd_version < 2 || data->gd_version > NFSSVC_MAXVERS) - goto out; - - res = buf; - sin = (struct sockaddr_in *)&data->gd_addr; - exp_readlock(); - - ipv6_addr_set_v4mapped(sin->sin_addr.s_addr, &in6); - - clp = auth_unix_lookup(&init_net, &in6); - if (!clp) - err = -EPERM; - else { - err = exp_rootfh(clp, data->gd_path, &fh, NFS_FHSIZE); - auth_domain_put(clp); - } - exp_readunlock(); - - if (err == 0) { - memset(res,0, NFS_FHSIZE); - memcpy(res, &fh.fh_base, fh.fh_size); - err = NFS_FHSIZE; - } - out: - return err; -} -#endif /* CONFIG_NFSD_DEPRECATED */ /** * write_unlock_ip - Release all locks used by a client @@ -1397,15 +1063,6 @@ static ssize_t write_recoverydir(struct file *file, char *buf, size_t size) static int nfsd_fill_super(struct super_block * sb, void * data, int silent) { static struct tree_descr nfsd_files[] = { -#ifdef CONFIG_NFSD_DEPRECATED - [NFSD_Svc] = {".svc", &transaction_ops, S_IWUSR}, - [NFSD_Add] = {".add", &transaction_ops, S_IWUSR}, - [NFSD_Del] = {".del", &transaction_ops, S_IWUSR}, - [NFSD_Export] = {".export", &transaction_ops, S_IWUSR}, - [NFSD_Unexport] = {".unexport", &transaction_ops, S_IWUSR}, - [NFSD_Getfd] = {".getfd", &transaction_ops, S_IWUSR|S_IRUSR}, - [NFSD_Getfs] = {".getfs", &transaction_ops, S_IWUSR|S_IRUSR}, -#endif [NFSD_List] = {"exports", &exports_operations, S_IRUGO}, [NFSD_Export_features] = {"export_features", &export_features_operations, S_IRUGO}, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 18743c4d8bc..dc5a1bf476b 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -528,16 +528,9 @@ nfsd(void *vrqstp) continue; } - - /* Lock the export hash tables for reading. */ - exp_readlock(); - validate_process_creds(); svc_process(rqstp); validate_process_creds(); - - /* Unlock export hash tables */ - exp_readunlock(); } /* Clear signals before calling svc_exit_thread() */ @@ -577,8 +570,22 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) rqstp->rq_vers, rqstp->rq_proc); proc = rqstp->rq_procinfo; + /* + * Give the xdr decoder a chance to change this if it wants + * (necessary in the NFSv4.0 compound case) + */ + rqstp->rq_cachetype = proc->pc_cachetype; + /* Decode arguments */ + xdr = proc->pc_decode; + if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base, + rqstp->rq_argp)) { + dprintk("nfsd: failed to decode arguments!\n"); + *statp = rpc_garbage_args; + return 1; + } + /* Check whether we have this call in the cache. */ - switch (nfsd_cache_lookup(rqstp, proc->pc_cachetype)) { + switch (nfsd_cache_lookup(rqstp)) { case RC_INTR: case RC_DROPIT: return 0; @@ -588,16 +595,6 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) /* do it */ } - /* Decode arguments */ - xdr = proc->pc_decode; - if (xdr && !xdr(rqstp, (__be32*)rqstp->rq_arg.head[0].iov_base, - rqstp->rq_argp)) { - dprintk("nfsd: failed to decode arguments!\n"); - nfsd_cache_update(rqstp, RC_NOCACHE, NULL); - *statp = rpc_garbage_args; - return 1; - } - /* need to grab the location to store the status, as * nfsv4 does some encoding while processing */ diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 6bd2f3c21f2..4eefaf1b42e 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -482,6 +482,7 @@ extern void nfsd4_recdir_purge_old(void); extern int nfsd4_create_clid_dir(struct nfs4_client *clp); extern void nfsd4_remove_clid_dir(struct nfs4_client *clp); extern void release_session_client(struct nfsd4_session *); +extern __be32 nfs4_validate_stateid(stateid_t *, int); static inline void nfs4_put_stateowner(struct nfs4_stateowner *so) diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 366401e1a53..d2a8d04428c 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -342,6 +342,25 @@ struct nfsd4_setclientid_confirm { nfs4_verifier sc_confirm; }; +struct nfsd4_saved_compoundargs { + __be32 *p; + __be32 *end; + int pagelen; + struct page **pagelist; +}; + +struct nfsd4_test_stateid { + __be32 ts_num_ids; + __be32 ts_has_session; + struct nfsd4_compoundargs *ts_saved_args; + struct nfsd4_saved_compoundargs ts_savedp; +}; + +struct nfsd4_free_stateid { + stateid_t fr_stateid; /* request */ + __be32 fr_status; /* response */ +}; + /* also used for NVERIFY */ struct nfsd4_verify { u32 ve_bmval[3]; /* request */ @@ -432,10 +451,14 @@ struct nfsd4_op { struct nfsd4_destroy_session destroy_session; struct nfsd4_sequence sequence; struct nfsd4_reclaim_complete reclaim_complete; + struct nfsd4_test_stateid test_stateid; + struct nfsd4_free_stateid free_stateid; } u; struct nfs4_replay * replay; }; +bool nfsd4_cache_this_op(struct nfsd4_op *); + struct nfsd4_compoundargs { /* scratch variables for XDR decode */ __be32 * p; @@ -458,6 +481,7 @@ struct nfsd4_compoundargs { u32 opcnt; struct nfsd4_op *ops; struct nfsd4_op iops[8]; + int cachetype; }; struct nfsd4_compoundres { @@ -559,11 +583,15 @@ extern __be32 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_release_lockowner *rlockowner); -extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *); +extern int nfsd4_release_compoundargs(void *rq, __be32 *p, void *resp); extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr); extern __be32 nfsd4_renew(struct svc_rqst *rqstp, struct nfsd4_compound_state *, clientid_t *clid); +extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid); +extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, + struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid); #endif /* diff --git a/fs/notify/group.c b/fs/notify/group.c index d309f38449c..63fc294a469 100644 --- a/fs/notify/group.c +++ b/fs/notify/group.c @@ -26,7 +26,7 @@ #include <linux/fsnotify_backend.h> #include "fsnotify.h" -#include <asm/atomic.h> +#include <linux/atomic.h> /* * Final freeing of a group diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 07ea8d3e6ea..b13c00ac48e 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -23,7 +23,7 @@ #include <linux/mutex.h> #include <linux/spinlock.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 252ab1f6452..e14587d5568 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -92,7 +92,7 @@ #include <linux/spinlock.h> #include <linux/srcu.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" diff --git a/fs/notify/notification.c b/fs/notify/notification.c index f39260f8f86..ee188158a22 100644 --- a/fs/notify/notification.c +++ b/fs/notify/notification.c @@ -43,7 +43,7 @@ #include <linux/slab.h> #include <linux/spinlock.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c index e86577d6c5c..778fe6cae3b 100644 --- a/fs/notify/vfsmount_mark.c +++ b/fs/notify/vfsmount_mark.c @@ -24,7 +24,7 @@ #include <linux/mutex.h> #include <linux/spinlock.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/fsnotify_backend.h> #include "fsnotify.h" diff --git a/fs/ntfs/inode.h b/fs/ntfs/inode.h index 2dabf813456..fe8e7e92888 100644 --- a/fs/ntfs/inode.h +++ b/fs/ntfs/inode.h @@ -24,7 +24,7 @@ #ifndef _LINUX_NTFS_INODE_H #define _LINUX_NTFS_INODE_H -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/fs.h> #include <linux/list.h> diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c index 1cee970eb55..783c58d9daf 100644 --- a/fs/ocfs2/acl.c +++ b/fs/ocfs2/acl.c @@ -290,47 +290,32 @@ static int ocfs2_set_acl(handle_t *handle, return ret; } -int ocfs2_check_acl(struct inode *inode, int mask) +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type) { struct ocfs2_super *osb; struct buffer_head *di_bh = NULL; struct posix_acl *acl; int ret = -EAGAIN; - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - osb = OCFS2_SB(inode->i_sb); if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL)) - return ret; + return NULL; ret = ocfs2_read_inode_block(inode, &di_bh); - if (ret < 0) { - mlog_errno(ret); - return ret; - } + if (ret < 0) + return ERR_PTR(ret); - acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, di_bh); + acl = ocfs2_get_acl_nolock(inode, type, di_bh); brelse(di_bh); - if (IS_ERR(acl)) { - mlog_errno(PTR_ERR(acl)); - return PTR_ERR(acl); - } - if (acl) { - ret = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - return ret; - } - - return -EAGAIN; + return acl; } int ocfs2_acl_chmod(struct inode *inode) { struct ocfs2_super *osb = OCFS2_SB(inode->i_sb); - struct posix_acl *acl, *clone; + struct posix_acl *acl; int ret; if (S_ISLNK(inode->i_mode)) @@ -342,15 +327,12 @@ int ocfs2_acl_chmod(struct inode *inode) acl = ocfs2_get_acl(inode, ACL_TYPE_ACCESS); if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); + ret = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (ret) + return ret; + ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, + acl, NULL, NULL); posix_acl_release(acl); - if (!clone) - return -ENOMEM; - ret = posix_acl_chmod_masq(clone, inode->i_mode); - if (!ret) - ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS, - clone, NULL, NULL); - posix_acl_release(clone); return ret; } @@ -388,8 +370,6 @@ int ocfs2_init_acl(handle_t *handle, } } if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) { - struct posix_acl *clone; - if (S_ISDIR(inode->i_mode)) { ret = ocfs2_set_acl(handle, inode, di_bh, ACL_TYPE_DEFAULT, acl, @@ -397,27 +377,22 @@ int ocfs2_init_acl(handle_t *handle, if (ret) goto cleanup; } - clone = posix_acl_clone(acl, GFP_NOFS); - ret = -ENOMEM; - if (!clone) - goto cleanup; - mode = inode->i_mode; - ret = posix_acl_create_masq(clone, &mode); - if (ret >= 0) { - ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); - if (ret2) { - mlog_errno(ret2); - ret = ret2; - goto cleanup; - } - if (ret > 0) { - ret = ocfs2_set_acl(handle, inode, - di_bh, ACL_TYPE_ACCESS, - clone, meta_ac, data_ac); - } + ret = posix_acl_create(&acl, GFP_NOFS, &mode); + if (ret < 0) + return ret; + + ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode); + if (ret2) { + mlog_errno(ret2); + ret = ret2; + goto cleanup; + } + if (ret > 0) { + ret = ocfs2_set_acl(handle, inode, + di_bh, ACL_TYPE_ACCESS, + acl, meta_ac, data_ac); } - posix_acl_release(clone); } cleanup: posix_acl_release(acl); diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h index 5c5d31f0585..071fbd380f2 100644 --- a/fs/ocfs2/acl.h +++ b/fs/ocfs2/acl.h @@ -26,7 +26,7 @@ struct ocfs2_acl_entry { __le32 e_id; }; -extern int ocfs2_check_acl(struct inode *, int); +struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type); extern int ocfs2_acl_chmod(struct inode *); extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *, struct buffer_head *, struct buffer_head *, diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0fc2bd34039..de4ea1af041 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -2600,14 +2600,14 @@ const struct inode_operations ocfs2_file_iops = { .listxattr = ocfs2_listxattr, .removexattr = generic_removexattr, .fiemap = ocfs2_fiemap, - .check_acl = ocfs2_check_acl, + .get_acl = ocfs2_iop_get_acl, }; const struct inode_operations ocfs2_special_file_iops = { .setattr = ocfs2_setattr, .getattr = ocfs2_getattr, .permission = ocfs2_permission, - .check_acl = ocfs2_check_acl, + .get_acl = ocfs2_iop_get_acl, }; /* diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c index cd9427023d2..d53cb706f14 100644 --- a/fs/ocfs2/move_extents.c +++ b/fs/ocfs2/move_extents.c @@ -36,7 +36,6 @@ #include "dir.h" #include "buffer_head_io.h" #include "sysfile.h" -#include "suballoc.h" #include "refcounttree.h" #include "move_extents.h" diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 33889dc52dd..53aa41ed7bf 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -2498,5 +2498,5 @@ const struct inode_operations ocfs2_dir_iops = { .listxattr = ocfs2_listxattr, .removexattr = generic_removexattr, .fiemap = ocfs2_fiemap, - .check_acl = ocfs2_check_acl, + .get_acl = ocfs2_iop_get_acl, }; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index d545e97d99c..e3c63d1c5e1 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -237,22 +237,22 @@ ssize_t part_size_show(struct device *dev, return sprintf(buf, "%llu\n",(unsigned long long)p->nr_sects); } -ssize_t part_ro_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t part_ro_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%d\n", p->policy ? 1 : 0); } -ssize_t part_alignment_offset_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t part_alignment_offset_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%llu\n", (unsigned long long)p->alignment_offset); } -ssize_t part_discard_alignment_show(struct device *dev, - struct device_attribute *attr, char *buf) +static ssize_t part_discard_alignment_show(struct device *dev, + struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); return sprintf(buf, "%u\n", p->discard_alignment); diff --git a/fs/pipe.c b/fs/pipe.c index da42f7db50d..1b7f9af67cc 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1291,8 +1291,8 @@ static int __init init_pipe_fs(void) static void __exit exit_pipe_fs(void) { + kern_unmount(pipe_mnt); unregister_filesystem(&pipe_fs_type); - mntput(pipe_mnt); } fs_initcall(init_pipe_fs); diff --git a/fs/posix_acl.c b/fs/posix_acl.c index b1cf6bf4b41..d43729a760e 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -14,7 +14,7 @@ #include <linux/kernel.h> #include <linux/slab.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/fs.h> #include <linux/sched.h> #include <linux/posix_acl.h> @@ -24,13 +24,9 @@ EXPORT_SYMBOL(posix_acl_init); EXPORT_SYMBOL(posix_acl_alloc); -EXPORT_SYMBOL(posix_acl_clone); EXPORT_SYMBOL(posix_acl_valid); EXPORT_SYMBOL(posix_acl_equiv_mode); EXPORT_SYMBOL(posix_acl_from_mode); -EXPORT_SYMBOL(posix_acl_create_masq); -EXPORT_SYMBOL(posix_acl_chmod_masq); -EXPORT_SYMBOL(posix_acl_permission); /* * Init a fresh posix_acl @@ -59,7 +55,7 @@ posix_acl_alloc(int count, gfp_t flags) /* * Clone an ACL. */ -struct posix_acl * +static struct posix_acl * posix_acl_clone(const struct posix_acl *acl, gfp_t flags) { struct posix_acl *clone = NULL; @@ -283,8 +279,7 @@ check_perm: * system calls. All permissions that are not granted by the acl are removed. * The permissions in the acl are changed to reflect the mode_p parameter. */ -int -posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) +static int posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) { struct posix_acl_entry *pa, *pe; struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; @@ -341,8 +336,7 @@ posix_acl_create_masq(struct posix_acl *acl, mode_t *mode_p) /* * Modify the ACL for the chmod syscall. */ -int -posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) +static int posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) { struct posix_acl_entry *group_obj = NULL, *mask_obj = NULL; struct posix_acl_entry *pa, *pe; @@ -386,3 +380,39 @@ posix_acl_chmod_masq(struct posix_acl *acl, mode_t mode) return 0; } + +int +posix_acl_create(struct posix_acl **acl, gfp_t gfp, mode_t *mode_p) +{ + struct posix_acl *clone = posix_acl_clone(*acl, gfp); + int err = -ENOMEM; + if (clone) { + err = posix_acl_create_masq(clone, mode_p); + if (err < 0) { + posix_acl_release(clone); + clone = NULL; + } + } + posix_acl_release(*acl); + *acl = clone; + return err; +} +EXPORT_SYMBOL(posix_acl_create); + +int +posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, mode_t mode) +{ + struct posix_acl *clone = posix_acl_clone(*acl, gfp); + int err = -ENOMEM; + if (clone) { + err = posix_acl_chmod_masq(clone, mode); + if (err) { + posix_acl_release(clone); + clone = NULL; + } + } + posix_acl_release(*acl); + *acl = clone; + return err; +} +EXPORT_SYMBOL(posix_acl_chmod); diff --git a/fs/proc/base.c b/fs/proc/base.c index 91fb655a5cb..08e3eccf9a1 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1118,10 +1118,9 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf, * Warn that /proc/pid/oom_adj is deprecated, see * Documentation/feature-removal-schedule.txt. */ - printk_once(KERN_WARNING "%s (%d): /proc/%d/oom_adj is deprecated, " - "please use /proc/%d/oom_score_adj instead.\n", - current->comm, task_pid_nr(current), - task_pid_nr(task), task_pid_nr(task)); + WARN_ONCE(1, "%s (%d): /proc/%d/oom_adj is deprecated, please use /proc/%d/oom_score_adj instead.\n", + current->comm, task_pid_nr(current), task_pid_nr(task), + task_pid_nr(task)); task->signal->oom_adj = oom_adjust; /* * Scale /proc/pid/oom_score_adj appropriately ensuring that a maximum @@ -2707,9 +2706,16 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { struct task_io_accounting acct = task->ioac; unsigned long flags; + int result; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - return -EACCES; + result = mutex_lock_killable(&task->signal->cred_guard_mutex); + if (result) + return result; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) { + result = -EACCES; + goto out_unlock; + } if (whole && lock_task_sighand(task, &flags)) { struct task_struct *t = task; @@ -2720,7 +2726,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) unlock_task_sighand(task, &flags); } - return sprintf(buffer, + result = sprintf(buffer, "rchar: %llu\n" "wchar: %llu\n" "syscr: %llu\n" @@ -2735,6 +2741,9 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole) (unsigned long long)acct.read_bytes, (unsigned long long)acct.write_bytes, (unsigned long long)acct.cancelled_write_bytes); +out_unlock: + mutex_unlock(&task->signal->cred_guard_mutex); + return result; } static int proc_tid_io_accounting(struct task_struct *task, char *buffer) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 74b48cfa1bb..7ed72d6c1c6 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -319,7 +319,7 @@ static int proc_reg_open(struct inode *inode, struct file *file) if (!pde->proc_fops) { spin_unlock(&pde->pde_unload_lock); kfree(pdeo); - return -EINVAL; + return -ENOENT; } pde->pde_users++; open = pde->proc_fops->open; diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index ed257d14156..586174168e2 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -10,7 +10,7 @@ #include <linux/seq_file.h> #include <linux/swap.h> #include <linux/vmstat.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/page.h> #include <asm/pgtable.h> #include "internal.h" diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 483442e66ed..d1aca1df4f9 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -214,7 +214,7 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, } /* otherwise we clear all bit were set ... */ while (--i >= *beg) - reiserfs_test_and_clear_le_bit + reiserfs_clear_le_bit (i, bh->b_data); reiserfs_restore_prepared_buffer(s, bh); *beg = org; @@ -1222,15 +1222,11 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb, info->free_count = 0; while (--cur >= (unsigned long *)bh->b_data) { - int i; - /* 0 and ~0 are special, we can optimize for them */ if (*cur == 0) info->free_count += BITS_PER_LONG; else if (*cur != ~0L) /* A mix, investigate */ - for (i = BITS_PER_LONG - 1; i >= 0; i--) - if (!reiserfs_test_le_bit(i, cur)) - info->free_count++; + info->free_count += BITS_PER_LONG - hweight_long(*cur); } } diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index c7156dc39ce..ace635053a3 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -319,5 +319,5 @@ const struct inode_operations reiserfs_file_inode_operations = { .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, - .check_acl = reiserfs_check_acl, + .get_acl = reiserfs_get_acl, }; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 2922b90ceac..9b0d4b78b4f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1475,6 +1475,11 @@ void reiserfs_read_locked_inode(struct inode *inode, reiserfs_check_path(&path_to_sd); /* init inode should be relsing */ + /* + * Stat data v1 doesn't support ACLs. + */ + if (get_inode_sd_version(inode) == STAT_DATA_V1) + cache_no_acl(inode); } /** diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index c5e82ece7c6..a159ba5a35e 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -678,23 +678,19 @@ struct buffer_chunk { static void write_chunk(struct buffer_chunk *chunk) { int i; - get_fs_excl(); for (i = 0; i < chunk->nr; i++) { submit_logged_buffer(chunk->bh[i]); } chunk->nr = 0; - put_fs_excl(); } static void write_ordered_chunk(struct buffer_chunk *chunk) { int i; - get_fs_excl(); for (i = 0; i < chunk->nr; i++) { submit_ordered_buffer(chunk->bh[i]); } chunk->nr = 0; - put_fs_excl(); } static int add_to_chunk(struct buffer_chunk *chunk, struct buffer_head *bh, @@ -986,8 +982,6 @@ static int flush_commit_list(struct super_block *s, return 0; } - get_fs_excl(); - /* before we can put our commit blocks on disk, we have to make sure everyone older than ** us is on disk too */ @@ -1145,7 +1139,6 @@ static int flush_commit_list(struct super_block *s, if (retval) reiserfs_abort(s, retval, "Journal write error in %s", __func__); - put_fs_excl(); return retval; } @@ -1374,8 +1367,6 @@ static int flush_journal_list(struct super_block *s, return 0; } - get_fs_excl(); - /* if all the work is already done, get out of here */ if (atomic_read(&(jl->j_nonzerolen)) <= 0 && atomic_read(&(jl->j_commit_left)) <= 0) { @@ -1597,7 +1588,6 @@ static int flush_journal_list(struct super_block *s, put_journal_list(s, jl); if (flushall) mutex_unlock(&journal->j_flush_mutex); - put_fs_excl(); return err; } @@ -3108,7 +3098,6 @@ static int do_journal_begin_r(struct reiserfs_transaction_handle *th, th->t_trans_id = journal->j_trans_id; unlock_journal(sb); INIT_LIST_HEAD(&th->t_list); - get_fs_excl(); return 0; out_fail: @@ -3964,7 +3953,6 @@ static int do_journal_end(struct reiserfs_transaction_handle *th, flush = flags & FLUSH_ALL; wait_on_commit = flags & WAIT; - put_fs_excl(); current->journal_info = th->t_handle_save; reiserfs_check_lock_depth(sb, "journal end"); if (journal->j_len == 0) { @@ -4316,4 +4304,3 @@ void reiserfs_abort_journal(struct super_block *sb, int errno) dump_stack(); #endif } - diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index 551f1b79dbc..ef392324bbf 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1529,7 +1529,7 @@ const struct inode_operations reiserfs_dir_inode_operations = { .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, - .check_acl = reiserfs_check_acl, + .get_acl = reiserfs_get_acl, }; /* @@ -1546,7 +1546,7 @@ const struct inode_operations reiserfs_symlink_inode_operations = { .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, - .check_acl = reiserfs_check_acl, + .get_acl = reiserfs_get_acl, }; @@ -1560,5 +1560,5 @@ const struct inode_operations reiserfs_special_inode_operations = { .listxattr = reiserfs_listxattr, .removexattr = reiserfs_removexattr, .permission = reiserfs_permission, - .check_acl = reiserfs_check_acl, + .get_acl = reiserfs_get_acl, }; diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index b3a94d20f0f..b6b9b1fe33b 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -136,7 +136,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) return -EIO; } memset(bh->b_data, 0, sb_blocksize(sb)); - reiserfs_test_and_set_le_bit(0, bh->b_data); + reiserfs_set_le_bit(0, bh->b_data); reiserfs_cache_bitmap_metadata(s, bh, bitmap + i); set_buffer_uptodate(bh); @@ -172,7 +172,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r; i < s->s_blocksize * 8; i++) - reiserfs_test_and_clear_le_bit(i, bh->b_data); + reiserfs_clear_le_bit(i, bh->b_data); info->free_count += s->s_blocksize * 8 - block_r; journal_mark_dirty(&th, s, bh); @@ -190,7 +190,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) reiserfs_prepare_for_journal(s, bh, 1); for (i = block_r_new; i < s->s_blocksize * 8; i++) - reiserfs_test_and_set_le_bit(i, bh->b_data); + reiserfs_set_le_bit(i, bh->b_data); journal_mark_dirty(&th, s, bh); brelse(bh); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index 6938d8c68d6..6bc346c160e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -867,33 +867,6 @@ out: return err; } -int reiserfs_check_acl(struct inode *inode, int mask) -{ - struct posix_acl *acl; - int error = -EAGAIN; /* do regular unix permission checks by default */ - - /* - * Stat data v1 doesn't support ACLs. - */ - if (get_inode_sd_version(inode) == STAT_DATA_V1) - return -EAGAIN; - - if (mask & MAY_NOT_BLOCK) - return -ECHILD; - - acl = reiserfs_get_acl(inode, ACL_TYPE_ACCESS); - - if (acl) { - if (!IS_ERR(acl)) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - } else if (PTR_ERR(acl) != -ENODATA) - error = PTR_ERR(acl); - } - - return error; -} - static int create_privroot(struct dentry *dentry) { int err; diff --git a/fs/reiserfs/xattr_acl.c b/fs/reiserfs/xattr_acl.c index 3dc38f1206f..7362cf4c946 100644 --- a/fs/reiserfs/xattr_acl.c +++ b/fs/reiserfs/xattr_acl.c @@ -354,9 +354,7 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, return PTR_ERR(acl); if (acl) { - struct posix_acl *acl_copy; mode_t mode = inode->i_mode; - int need_acl; /* Copy the default ACL to the default ACL of a new directory */ if (S_ISDIR(inode->i_mode)) { @@ -368,29 +366,15 @@ reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th, /* Now we reconcile the new ACL and the mode, potentially modifying both */ - acl_copy = posix_acl_clone(acl, GFP_NOFS); - if (!acl_copy) { - err = -ENOMEM; - goto cleanup; - } + err = posix_acl_create(&acl, GFP_NOFS, &mode); + if (err < 0) + return err; - need_acl = posix_acl_create_masq(acl_copy, &mode); - if (need_acl >= 0) { - if (mode != inode->i_mode) { - inode->i_mode = mode; - } + inode->i_mode = mode; - /* If we need an ACL.. */ - if (need_acl > 0) { - err = reiserfs_set_acl(th, inode, - ACL_TYPE_ACCESS, - acl_copy); - if (err) - goto cleanup_copy; - } - } - cleanup_copy: - posix_acl_release(acl_copy); + /* If we need an ACL.. */ + if (err > 0) + err = reiserfs_set_acl(th, inode, ACL_TYPE_ACCESS, acl); cleanup: posix_acl_release(acl); } else { @@ -445,7 +429,10 @@ int reiserfs_cache_default_acl(struct inode *inode) int reiserfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct reiserfs_transaction_handle th; + struct posix_acl *acl; + size_t size; + int depth; int error; if (S_ISLNK(inode->i_mode)) @@ -463,30 +450,22 @@ int reiserfs_acl_chmod(struct inode *inode) return 0; if (IS_ERR(acl)) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_NOFS); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - error = posix_acl_chmod_masq(clone, inode->i_mode); + error = posix_acl_chmod(&acl, GFP_NOFS, inode->i_mode); + if (error) + return error; + + size = reiserfs_xattr_nblocks(inode, reiserfs_acl_size(acl->a_count)); + depth = reiserfs_write_lock_once(inode->i_sb); + error = journal_begin(&th, inode->i_sb, size * 2); if (!error) { - struct reiserfs_transaction_handle th; - size_t size = reiserfs_xattr_nblocks(inode, - reiserfs_acl_size(clone->a_count)); - int depth; - - depth = reiserfs_write_lock_once(inode->i_sb); - error = journal_begin(&th, inode->i_sb, size * 2); - if (!error) { - int error2; - error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, - clone); - error2 = journal_end(&th, inode->i_sb, size * 2); - if (error2) - error = error2; - } - reiserfs_write_unlock_once(inode->i_sb, depth); + int error2; + error = reiserfs_set_acl(&th, inode, ACL_TYPE_ACCESS, acl); + error2 = journal_end(&th, inode->i_sb, size * 2); + if (error2) + error = error2; } - posix_acl_release(clone); + reiserfs_write_unlock_once(inode->i_sb, depth); + posix_acl_release(acl); return error; } diff --git a/fs/splice.c b/fs/splice.c index aa866d30969..fa2defa8afc 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -132,7 +132,7 @@ error: return err; } -static const struct pipe_buf_operations page_cache_pipe_buf_ops = { +const struct pipe_buf_operations page_cache_pipe_buf_ops = { .can_merge = 0, .map = generic_pipe_buf_map, .unmap = generic_pipe_buf_unmap, @@ -264,7 +264,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, return ret; } -static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) +void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) { page_cache_release(spd->pages[i]); } diff --git a/fs/squashfs/Kconfig b/fs/squashfs/Kconfig index 7797218d0b3..1360d4f88f4 100644 --- a/fs/squashfs/Kconfig +++ b/fs/squashfs/Kconfig @@ -1,7 +1,6 @@ config SQUASHFS tristate "SquashFS 4.0 - Squashed file system support" depends on BLOCK - select ZLIB_INFLATE help Saying Y here includes support for SquashFS 4.0 (a Compressed Read-Only File System). Squashfs is a highly compressed read-only @@ -36,6 +35,19 @@ config SQUASHFS_XATTR If unsure, say N. +config SQUASHFS_ZLIB + bool "Include support for ZLIB compressed file systems" + depends on SQUASHFS + select ZLIB_INFLATE + default y + help + ZLIB compression is the standard compression used by Squashfs + file systems. It offers a good trade-off between compression + achieved and the amount of CPU time and memory necessary to + compress and decompress. + + If unsure, say Y. + config SQUASHFS_LZO bool "Include support for LZO compressed file systems" depends on SQUASHFS diff --git a/fs/squashfs/Makefile b/fs/squashfs/Makefile index cecf2bea07a..110b0476f3b 100644 --- a/fs/squashfs/Makefile +++ b/fs/squashfs/Makefile @@ -4,7 +4,8 @@ obj-$(CONFIG_SQUASHFS) += squashfs.o squashfs-y += block.o cache.o dir.o export.o file.o fragment.o id.o inode.o -squashfs-y += namei.o super.o symlink.o zlib_wrapper.o decompressor.o +squashfs-y += namei.o super.o symlink.o decompressor.o squashfs-$(CONFIG_SQUASHFS_XATTR) += xattr.o xattr_id.o squashfs-$(CONFIG_SQUASHFS_LZO) += lzo_wrapper.o squashfs-$(CONFIG_SQUASHFS_XZ) += xz_wrapper.o +squashfs-$(CONFIG_SQUASHFS_ZLIB) += zlib_wrapper.o diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c index 9f1b0bb96f1..3f6271d86ab 100644 --- a/fs/squashfs/decompressor.c +++ b/fs/squashfs/decompressor.c @@ -52,6 +52,12 @@ static const struct squashfs_decompressor squashfs_xz_comp_ops = { }; #endif +#ifndef CONFIG_SQUASHFS_ZLIB +static const struct squashfs_decompressor squashfs_zlib_comp_ops = { + NULL, NULL, NULL, ZLIB_COMPRESSION, "zlib", 0 +}; +#endif + static const struct squashfs_decompressor squashfs_unknown_comp_ops = { NULL, NULL, NULL, 0, "unknown", 0 }; diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index 8ba70cff09a..330073e2902 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -56,4 +56,8 @@ extern const struct squashfs_decompressor squashfs_xz_comp_ops; extern const struct squashfs_decompressor squashfs_lzo_comp_ops; #endif +#ifdef CONFIG_SQUASHFS_ZLIB +extern const struct squashfs_decompressor squashfs_zlib_comp_ops; +#endif + #endif diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index e3be6a71cfa..d1266516ed0 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -97,6 +97,3 @@ extern const struct inode_operations squashfs_symlink_inode_ops; /* xattr.c */ extern const struct xattr_handler *squashfs_xattr_handlers[]; - -/* zlib_wrapper.c */ -extern const struct squashfs_decompressor squashfs_zlib_comp_ops; diff --git a/fs/super.c b/fs/super.c index 7943f04cb3a..3f56a269a4f 100644 --- a/fs/super.c +++ b/fs/super.c @@ -351,13 +351,11 @@ bool grab_super_passive(struct super_block *sb) */ void lock_super(struct super_block * sb) { - get_fs_excl(); mutex_lock(&sb->s_lock); } void unlock_super(struct super_block * sb) { - put_fs_excl(); mutex_unlock(&sb->s_lock); } @@ -385,7 +383,6 @@ void generic_shutdown_super(struct super_block *sb) if (sb->s_root) { shrink_dcache_for_umount(sb); sync_filesystem(sb); - get_fs_excl(); sb->s_flags &= ~MS_ACTIVE; fsnotify_unmount_inodes(&sb->s_inodes); @@ -400,7 +397,6 @@ void generic_shutdown_super(struct super_block *sb) "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); } - put_fs_excl(); } spin_lock(&sb_lock); /* should be initialized for __put_super_and_need_restart() */ diff --git a/fs/xfs/linux-2.6/xfs_acl.c b/fs/xfs/linux-2.6/xfs_acl.c index cac48fe22ad..44ce5165680 100644 --- a/fs/xfs/linux-2.6/xfs_acl.c +++ b/fs/xfs/linux-2.6/xfs_acl.c @@ -114,6 +114,8 @@ xfs_get_acl(struct inode *inode, int type) if (acl != ACL_NOT_CACHED) return acl; + trace_xfs_get_acl(ip); + switch (type) { case ACL_TYPE_ACCESS: ea_name = SGI_ACL_FILE; @@ -218,40 +220,6 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) return error; } -int -xfs_check_acl(struct inode *inode, int mask) -{ - struct xfs_inode *ip; - struct posix_acl *acl; - int error = -EAGAIN; - - ip = XFS_I(inode); - trace_xfs_check_acl(ip); - - /* - * If there is no attribute fork no ACL exists on this inode and - * we can skip the whole exercise. - */ - if (!XFS_IFORK_Q(ip)) - return -EAGAIN; - - if (mask & MAY_NOT_BLOCK) { - if (!negative_cached_acl(inode, ACL_TYPE_ACCESS)) - return -ECHILD; - return -EAGAIN; - } - - acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); - if (IS_ERR(acl)) - return PTR_ERR(acl); - if (acl) { - error = posix_acl_permission(inode, acl, mask); - posix_acl_release(acl); - } - - return error; -} - static int xfs_set_mode(struct inode *inode, mode_t mode) { @@ -297,29 +265,23 @@ posix_acl_default_exists(struct inode *inode) * No need for i_mutex because the inode is not yet exposed to the VFS. */ int -xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl) +xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) { - struct posix_acl *clone; - mode_t mode; + mode_t mode = inode->i_mode; int error = 0, inherit = 0; if (S_ISDIR(inode->i_mode)) { - error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, default_acl); + error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); if (error) - return error; + goto out; } - clone = posix_acl_clone(default_acl, GFP_KERNEL); - if (!clone) - return -ENOMEM; - - mode = inode->i_mode; - error = posix_acl_create_masq(clone, &mode); + error = posix_acl_create(&acl, GFP_KERNEL, &mode); if (error < 0) - goto out_release_clone; + return error; /* - * If posix_acl_create_masq returns a positive value we need to + * If posix_acl_create returns a positive value we need to * inherit a permission that can't be represented using the Unix * mode bits and we actually need to set an ACL. */ @@ -328,20 +290,20 @@ xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl) error = xfs_set_mode(inode, mode); if (error) - goto out_release_clone; + goto out; if (inherit) - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - out_release_clone: - posix_acl_release(clone); +out: + posix_acl_release(acl); return error; } int xfs_acl_chmod(struct inode *inode) { - struct posix_acl *acl, *clone; + struct posix_acl *acl; int error; if (S_ISLNK(inode->i_mode)) @@ -351,16 +313,12 @@ xfs_acl_chmod(struct inode *inode) if (IS_ERR(acl) || !acl) return PTR_ERR(acl); - clone = posix_acl_clone(acl, GFP_KERNEL); - posix_acl_release(acl); - if (!clone) - return -ENOMEM; - - error = posix_acl_chmod_masq(clone, inode->i_mode); - if (!error) - error = xfs_set_acl(inode, ACL_TYPE_ACCESS, clone); + error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); + if (error) + return error; - posix_acl_release(clone); + error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); + posix_acl_release(acl); return error; } diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index cca00f49e09..825390e1c13 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -881,11 +881,14 @@ xfs_file_aio_write( /* Handle various SYNC-type writes */ if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { loff_t end = pos + ret - 1; + int error; xfs_rw_iunlock(ip, iolock); - ret = -xfs_file_fsync(file, pos, end, + error = xfs_file_fsync(file, pos, end, (file->f_flags & __O_SYNC) ? 0 : 1); xfs_rw_ilock(ip, iolock); + if (error) + ret = error; } out_unlock: diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 501e4f63054..6544c3236bc 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -202,9 +202,9 @@ xfs_vn_mknod( if (default_acl) { error = -xfs_inherit_acl(inode, default_acl); + default_acl = NULL; if (unlikely(error)) goto out_cleanup_inode; - posix_acl_release(default_acl); } @@ -1022,7 +1022,7 @@ xfs_vn_fiemap( } static const struct inode_operations xfs_inode_operations = { - .check_acl = xfs_check_acl, + .get_acl = xfs_get_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1048,7 +1048,7 @@ static const struct inode_operations xfs_dir_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .check_acl = xfs_check_acl, + .get_acl = xfs_get_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1073,7 +1073,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = { .rmdir = xfs_vn_unlink, .mknod = xfs_vn_mknod, .rename = xfs_vn_rename, - .check_acl = xfs_check_acl, + .get_acl = xfs_get_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1086,7 +1086,7 @@ static const struct inode_operations xfs_symlink_inode_operations = { .readlink = generic_readlink, .follow_link = xfs_vn_follow_link, .put_link = xfs_vn_put_link, - .check_acl = xfs_check_acl, + .get_acl = xfs_get_acl, .getattr = xfs_vn_getattr, .setattr = xfs_vn_setattr, .setxattr = generic_setxattr, @@ -1194,6 +1194,10 @@ xfs_setup_inode( break; } + /* if there is no attribute fork no ACL can exist on this inode */ + if (!XFS_IFORK_Q(ip)) + cache_no_acl(inode); + xfs_iflags_clear(ip, XFS_INEW); barrier(); diff --git a/fs/xfs/linux-2.6/xfs_trace.h b/fs/xfs/linux-2.6/xfs_trace.h index fda0708ef2e..690fc7a7bd7 100644 --- a/fs/xfs/linux-2.6/xfs_trace.h +++ b/fs/xfs/linux-2.6/xfs_trace.h @@ -571,7 +571,7 @@ DEFINE_INODE_EVENT(xfs_alloc_file_space); DEFINE_INODE_EVENT(xfs_free_file_space); DEFINE_INODE_EVENT(xfs_readdir); #ifdef CONFIG_XFS_POSIX_ACL -DEFINE_INODE_EVENT(xfs_check_acl); +DEFINE_INODE_EVENT(xfs_get_acl); #endif DEFINE_INODE_EVENT(xfs_vm_bmap); DEFINE_INODE_EVENT(xfs_file_ioctl); diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 0135e2a669d..2c656ef4947 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -42,7 +42,6 @@ struct xfs_acl { #define SGI_ACL_DEFAULT_SIZE (sizeof(SGI_ACL_DEFAULT)-1) #ifdef CONFIG_XFS_POSIX_ACL -extern int xfs_check_acl(struct inode *inode, int mask); extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); extern int xfs_acl_chmod(struct inode *inode); @@ -52,7 +51,6 @@ extern int posix_acl_default_exists(struct inode *inode); extern const struct xattr_handler xfs_xattr_acl_access_handler; extern const struct xattr_handler xfs_xattr_acl_default_handler; #else -# define xfs_check_acl NULL # define xfs_get_acl(inode, type) NULL # define xfs_inherit_acl(inode, default_acl) 0 # define xfs_acl_chmod(inode) 0 diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 084b3247d63..0179a41d9e5 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -1564,7 +1564,7 @@ xfs_dir2_node_addname_int( if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { xfs_alert(mp, - "%s: dir ino " "%llu needed freesp block %lld for\n" + "%s: dir ino %llu needed freesp block %lld for\n" " data block %lld, got %lld ifbno %llu lastfbno %d", __func__, (unsigned long long)dp->i_ino, (long long)xfs_dir2_db_to_fdb(mp, dbno), |