diff options
Diffstat (limited to 'fs/xfs/xfs_iops.c')
| -rw-r--r-- | fs/xfs/xfs_iops.c | 303 | 
1 files changed, 179 insertions, 124 deletions
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2b8952d9199..205613a0606 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -17,32 +17,29 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_acl.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h"  #include "xfs_inode.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" +#include "xfs_acl.h" +#include "xfs_quota.h"  #include "xfs_error.h" -#include "xfs_itable.h"  #include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_inode_item.h" +#include "xfs_trans.h"  #include "xfs_trace.h"  #include "xfs_icache.h"  #include "xfs_symlink.h"  #include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2_priv.h" +#include "xfs_dinode.h" +#include "xfs_trans_space.h"  #include <linux/capability.h>  #include <linux/xattr.h> @@ -52,6 +49,18 @@  #include <linux/fiemap.h>  #include <linux/slab.h> +/* + * Directories have different lock order w.r.t. mmap_sem compared to regular + * files. This is due to readdir potentially triggering page faults on a user + * buffer inside filldir(), and this happens with the ilock on the directory + * held. For regular files, the lock order is the other way around - the + * mmap_sem is taken during the page fault, and then we lock the ilock to do + * block mapping. Hence we need a different class for the directory ilock so + * that lockdep can tell them apart. + */ +static struct lock_class_key xfs_nondir_ilock_class; +static struct lock_class_key xfs_dir_ilock_class; +  static int  xfs_initxattrs(  	struct inode		*inode, @@ -63,8 +72,8 @@ xfs_initxattrs(  	int			error = 0;  	for (xattr = xattr_array; xattr->name != NULL; xattr++) { -		error = xfs_attr_set(ip, xattr->name, xattr->value, -				     xattr->value_len, ATTR_SECURE); +		error = -xfs_attr_set(ip, xattr->name, xattr->value, +				      xattr->value_len, ATTR_SECURE);  		if (error < 0)  			break;  	} @@ -84,8 +93,8 @@ xfs_init_security(  	struct inode	*dir,  	const struct qstr *qstr)  { -	return security_inode_init_security(inode, dir, qstr, -					    &xfs_initxattrs, NULL); +	return -security_inode_init_security(inode, dir, qstr, +					     &xfs_initxattrs, NULL);  }  static void @@ -115,19 +124,19 @@ xfs_cleanup_inode(  	xfs_dentry_to_name(&teardown, dentry, 0);  	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); -	iput(inode);  }  STATIC int -xfs_vn_mknod( +xfs_generic_create(  	struct inode	*dir,  	struct dentry	*dentry,  	umode_t		mode, -	dev_t		rdev) +	dev_t		rdev, +	bool		tmpfile)	/* unnamed file */  {  	struct inode	*inode;  	struct xfs_inode *ip = NULL; -	struct posix_acl *default_acl = NULL; +	struct posix_acl *default_acl, *acl;  	struct xfs_name	name;  	int		error; @@ -143,17 +152,16 @@ xfs_vn_mknod(  		rdev = 0;  	} -	if (IS_POSIXACL(dir)) { -		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); -		if (IS_ERR(default_acl)) -			return PTR_ERR(default_acl); +	error = posix_acl_create(dir, &mode, &default_acl, &acl); +	if (error) +		return error; -		if (!default_acl) -			mode &= ~current_umask(); +	if (!tmpfile) { +		xfs_dentry_to_name(&name, dentry, mode); +		error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); +	} else { +		error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);  	} - -	xfs_dentry_to_name(&name, dentry, mode); -	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);  	if (unlikely(error))  		goto out_free_acl; @@ -163,22 +171,46 @@ xfs_vn_mknod(  	if (unlikely(error))  		goto out_cleanup_inode; +#ifdef CONFIG_XFS_POSIX_ACL  	if (default_acl) { -		error = -xfs_inherit_acl(inode, default_acl); -		default_acl = NULL; -		if (unlikely(error)) +		error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); +		if (error) +			goto out_cleanup_inode; +	} +	if (acl) { +		error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); +		if (error)  			goto out_cleanup_inode;  	} +#endif +	if (tmpfile) +		d_tmpfile(dentry, inode); +	else +		d_instantiate(dentry, inode); -	d_instantiate(dentry, inode); + out_free_acl: +	if (default_acl) +		posix_acl_release(default_acl); +	if (acl) +		posix_acl_release(acl);  	return -error;   out_cleanup_inode: -	xfs_cleanup_inode(dir, inode, dentry); - out_free_acl: -	posix_acl_release(default_acl); -	return -error; +	if (!tmpfile) +		xfs_cleanup_inode(dir, inode, dentry); +	iput(inode); +	goto out_free_acl; +} + +STATIC int +xfs_vn_mknod( +	struct inode	*dir, +	struct dentry	*dentry, +	umode_t		mode, +	dev_t		rdev) +{ +	return xfs_generic_create(dir, dentry, mode, rdev, false);  }  STATIC int @@ -341,6 +373,7 @@ xfs_vn_symlink(   out_cleanup_inode:  	xfs_cleanup_inode(dir, inode, dentry); +	iput(inode);   out:  	return -error;  } @@ -395,18 +428,6 @@ xfs_vn_follow_link(  	return NULL;  } -STATIC void -xfs_vn_put_link( -	struct dentry	*dentry, -	struct nameidata *nd, -	void		*p) -{ -	char		*s = nd_get_link(nd); - -	if (!IS_ERR(s)) -		kfree(s); -} -  STATIC int  xfs_vn_getattr(  	struct vfsmount		*mnt, @@ -463,14 +484,12 @@ xfs_vn_getattr(  static void  xfs_setattr_mode( -	struct xfs_trans	*tp,  	struct xfs_inode	*ip,  	struct iattr		*iattr)  { -	struct inode	*inode = VFS_I(ip); -	umode_t		mode = iattr->ia_mode; +	struct inode		*inode = VFS_I(ip); +	umode_t			mode = iattr->ia_mode; -	ASSERT(tp);  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	ip->i_d.di_mode &= S_IFMT; @@ -480,6 +499,32 @@ xfs_setattr_mode(  	inode->i_mode |= mode & ~S_IFMT;  } +static void +xfs_setattr_time( +	struct xfs_inode	*ip, +	struct iattr		*iattr) +{ +	struct inode		*inode = VFS_I(ip); + +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + +	if (iattr->ia_valid & ATTR_ATIME) { +		inode->i_atime = iattr->ia_atime; +		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; +		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; +	} +	if (iattr->ia_valid & ATTR_CTIME) { +		inode->i_ctime = iattr->ia_ctime; +		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; +		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; +	} +	if (iattr->ia_valid & ATTR_MTIME) { +		inode->i_mtime = iattr->ia_mtime; +		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; +		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; +	} +} +  int  xfs_setattr_nonsize(  	struct xfs_inode	*ip, @@ -622,7 +667,8 @@ xfs_setattr_nonsize(  		}  		if (!gid_eq(igid, gid)) {  			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { -				ASSERT(!XFS_IS_PQUOTA_ON(mp)); +				ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || +				       !XFS_IS_PQUOTA_ON(mp));  				ASSERT(mask & ATTR_GID);  				ASSERT(gdqp);  				olddquot2 = xfs_qm_vop_chown(tp, ip, @@ -633,30 +679,10 @@ xfs_setattr_nonsize(  		}  	} -	/* -	 * Change file access modes. -	 */  	if (mask & ATTR_MODE) -		xfs_setattr_mode(tp, ip, iattr); - -	/* -	 * Change file access or modified times. -	 */ -	if (mask & ATTR_ATIME) { -		inode->i_atime = iattr->ia_atime; -		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; -		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; -	} -	if (mask & ATTR_CTIME) { -		inode->i_ctime = iattr->ia_ctime; -		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; -		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; -	} -	if (mask & ATTR_MTIME) { -		inode->i_mtime = iattr->ia_mtime; -		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; -		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; -	} +		xfs_setattr_mode(ip, iattr); +	if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) +		xfs_setattr_time(ip, iattr);  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -687,7 +713,7 @@ xfs_setattr_nonsize(  	 * 	     Posix ACL code seems to care about this issue either.  	 */  	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { -		error = -xfs_acl_chmod(inode); +		error = -posix_acl_chmod(inode, inode->i_mode);  		if (error)  			return XFS_ERROR(error);  	} @@ -709,12 +735,10 @@ out_dqrele:  int  xfs_setattr_size(  	struct xfs_inode	*ip, -	struct iattr		*iattr, -	int			flags) +	struct iattr		*iattr)  {  	struct xfs_mount	*mp = ip->i_mount;  	struct inode		*inode = VFS_I(ip); -	int			mask = iattr->ia_valid;  	xfs_off_t		oldsize, newsize;  	struct xfs_trans	*tp;  	int			error; @@ -733,14 +757,10 @@ xfs_setattr_size(  	if (error)  		return XFS_ERROR(error); +	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));  	ASSERT(S_ISREG(ip->i_d.di_mode)); -	ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| -			ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - -	if (!(flags & XFS_ATTR_NOLOCK)) { -		lock_flags |= XFS_IOLOCK_EXCL; -		xfs_ilock(ip, lock_flags); -	} +	ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| +		ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);  	oldsize = inode->i_size;  	newsize = iattr->ia_size; @@ -749,13 +769,12 @@ xfs_setattr_size(  	 * Short circuit the truncate case for zero length files.  	 */  	if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { -		if (!(mask & (ATTR_CTIME|ATTR_MTIME))) -			goto out_unlock; +		if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) +			return 0;  		/*  		 * Use the regular setattr path to update the timestamps.  		 */ -		xfs_iunlock(ip, lock_flags);  		iattr->ia_valid &= ~ATTR_SIZE;  		return xfs_setattr_nonsize(ip, iattr, 0);  	} @@ -765,7 +784,7 @@ xfs_setattr_size(  	 */  	error = xfs_qm_dqattach(ip, 0);  	if (error) -		goto out_unlock; +		return error;  	/*  	 * Now we can make the changes.  Before we join the inode to the @@ -783,7 +802,7 @@ xfs_setattr_size(  		 */  		error = xfs_zero_eof(ip, newsize, oldsize);  		if (error) -			goto out_unlock; +			return error;  	}  	/* @@ -802,7 +821,7 @@ xfs_setattr_size(  		error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,  						      ip->i_d.di_size, newsize);  		if (error) -			goto out_unlock; +			return error;  	}  	/* @@ -810,22 +829,34 @@ xfs_setattr_size(  	 */  	inode_dio_wait(inode); +	/* +	 * Do all the page cache truncate work outside the transaction context +	 * as the "lock" order is page lock->log space reservation.  i.e. +	 * locking pages inside the transaction can ABBA deadlock with +	 * writeback. We have to do the VFS inode size update before we truncate +	 * the pagecache, however, to avoid racing with page faults beyond the +	 * new EOF they are not serialised against truncate operations except by +	 * page locks and size updates. +	 * +	 * Hence we are in a situation where a truncate can fail with ENOMEM +	 * from xfs_trans_reserve(), but having already truncated the in-memory +	 * version of the file (i.e. made user visible changes). There's not +	 * much we can do about this, except to hope that the caller sees ENOMEM +	 * and retries the truncate operation. +	 */  	error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);  	if (error) -		goto out_unlock; +		return error; +	truncate_setsize(inode, newsize);  	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);  	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);  	if (error)  		goto out_trans_cancel; -	truncate_setsize(inode, newsize); -  	commit_flags = XFS_TRANS_RELEASE_LOG_RES;  	lock_flags |= XFS_ILOCK_EXCL; -  	xfs_ilock(ip, XFS_ILOCK_EXCL); -  	xfs_trans_ijoin(tp, ip, 0);  	/* @@ -838,10 +869,11 @@ xfs_setattr_size(  	 * these flags set.  For all other operations the VFS set these flags  	 * explicitly if it wants a timestamp update.  	 */ -	if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { +	if (newsize != oldsize && +	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {  		iattr->ia_ctime = iattr->ia_mtime =  			current_fs_time(inode->i_sb); -		mask |= ATTR_CTIME | ATTR_MTIME; +		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;  	}  	/* @@ -877,22 +909,10 @@ xfs_setattr_size(  		xfs_inode_clear_eofblocks_tag(ip);  	} -	/* -	 * Change file access modes. -	 */ -	if (mask & ATTR_MODE) -		xfs_setattr_mode(tp, ip, iattr); - -	if (mask & ATTR_CTIME) { -		inode->i_ctime = iattr->ia_ctime; -		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; -		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; -	} -	if (mask & ATTR_MTIME) { -		inode->i_mtime = iattr->ia_mtime; -		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; -		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; -	} +	if (iattr->ia_valid & ATTR_MODE) +		xfs_setattr_mode(ip, iattr); +	if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) +		xfs_setattr_time(ip, iattr);  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -916,12 +936,21 @@ out_trans_cancel:  STATIC int  xfs_vn_setattr( -	struct dentry	*dentry, -	struct iattr	*iattr) +	struct dentry		*dentry, +	struct iattr		*iattr)  { -	if (iattr->ia_valid & ATTR_SIZE) -		return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); -	return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); +	struct xfs_inode	*ip = XFS_I(dentry->d_inode); +	int			error; + +	if (iattr->ia_valid & ATTR_SIZE) { +		xfs_ilock(ip, XFS_IOLOCK_EXCL); +		error = xfs_setattr_size(ip, iattr); +		xfs_iunlock(ip, XFS_IOLOCK_EXCL); +	} else { +		error = xfs_setattr_nonsize(ip, iattr, 0); +	} + +	return -error;  }  STATIC int @@ -1051,8 +1080,18 @@ xfs_vn_fiemap(  	return 0;  } +STATIC int +xfs_vn_tmpfile( +	struct inode	*dir, +	struct dentry	*dentry, +	umode_t		mode) +{ +	return xfs_generic_create(dir, dentry, mode, 0, true); +} +  static const struct inode_operations xfs_inode_operations = {  	.get_acl		= xfs_get_acl, +	.set_acl		= xfs_set_acl,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1080,6 +1119,7 @@ static const struct inode_operations xfs_dir_inode_operations = {  	.mknod			= xfs_vn_mknod,  	.rename			= xfs_vn_rename,  	.get_acl		= xfs_get_acl, +	.set_acl		= xfs_set_acl,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1087,6 +1127,7 @@ static const struct inode_operations xfs_dir_inode_operations = {  	.removexattr		= generic_removexattr,  	.listxattr		= xfs_vn_listxattr,  	.update_time		= xfs_vn_update_time, +	.tmpfile		= xfs_vn_tmpfile,  };  static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1106,6 +1147,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {  	.mknod			= xfs_vn_mknod,  	.rename			= xfs_vn_rename,  	.get_acl		= xfs_get_acl, +	.set_acl		= xfs_set_acl,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1113,13 +1155,13 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {  	.removexattr		= generic_removexattr,  	.listxattr		= xfs_vn_listxattr,  	.update_time		= xfs_vn_update_time, +	.tmpfile		= xfs_vn_tmpfile,  };  static const struct inode_operations xfs_symlink_inode_operations = {  	.readlink		= generic_readlink,  	.follow_link		= xfs_vn_follow_link, -	.put_link		= xfs_vn_put_link, -	.get_acl		= xfs_get_acl, +	.put_link		= kfree_put_link,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1169,6 +1211,7 @@ xfs_setup_inode(  	struct xfs_inode	*ip)  {  	struct inode		*inode = &ip->i_vnode; +	gfp_t			gfp_mask;  	inode->i_ino = ip->i_ino;  	inode->i_state = I_NEW; @@ -1204,6 +1247,8 @@ xfs_setup_inode(  	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;  	xfs_diflags_to_iflags(inode, ip); +	ip->d_ops = ip->i_mount->m_nondir_inode_ops; +	lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);  	switch (inode->i_mode & S_IFMT) {  	case S_IFREG:  		inode->i_op = &xfs_inode_operations; @@ -1211,11 +1256,13 @@ xfs_setup_inode(  		inode->i_mapping->a_ops = &xfs_address_space_operations;  		break;  	case S_IFDIR: +		lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);  		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))  			inode->i_op = &xfs_dir_ci_inode_operations;  		else  			inode->i_op = &xfs_dir_inode_operations;  		inode->i_fop = &xfs_dir_file_operations; +		ip->d_ops = ip->i_mount->m_dir_inode_ops;  		break;  	case S_IFLNK:  		inode->i_op = &xfs_symlink_inode_operations; @@ -1229,6 +1276,14 @@ xfs_setup_inode(  	}  	/* +	 * Ensure all page cache allocations are done from GFP_NOFS context to +	 * prevent direct reclaim recursion back into the filesystem and blowing +	 * stacks or deadlocking. +	 */ +	gfp_mask = mapping_gfp_mask(inode->i_mapping); +	mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + +	/*  	 * If there is no attribute fork no ACL can exist on this inode,  	 * and it can't have any file capabilities attached to it either.  	 */  | 
