diff options
Diffstat (limited to 'fs/xfs')
135 files changed, 9970 insertions, 8866 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0719e4db93f..c21f4350666 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -66,12 +66,14 @@ xfs-y				+= xfs_alloc.o \  				   xfs_bmap_btree.o \  				   xfs_btree.o \  				   xfs_da_btree.o \ +				   xfs_da_format.o \  				   xfs_dir2.o \  				   xfs_dir2_block.o \  				   xfs_dir2_data.o \  				   xfs_dir2_leaf.o \  				   xfs_dir2_node.o \  				   xfs_dir2_sf.o \ +				   xfs_dquot_buf.o \  				   xfs_ialloc.o \  				   xfs_ialloc_btree.o \  				   xfs_icreate_item.o \ @@ -103,7 +105,11 @@ xfs-$(CONFIG_XFS_QUOTA)		+= xfs_dquot.o \  				   xfs_qm_bhv.o \  				   xfs_qm.o \  				   xfs_quotaops.o -xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o + +# xfs_rtbitmap is shared with libxfs +xfs-$(CONFIG_XFS_RT)		+= xfs_rtalloc.o \ +				   xfs_rtbitmap.o +  xfs-$(CONFIG_XFS_POSIX_ACL)	+= xfs_acl.o  xfs-$(CONFIG_PROC_FS)		+= xfs_stats.o  xfs-$(CONFIG_SYSCTL)		+= xfs_sysctl.o diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index a02cfb9e3bc..844e288b957 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -63,25 +63,33 @@ kmem_alloc(size_t size, xfs_km_flags_t flags)  }  void * -kmem_zalloc(size_t size, xfs_km_flags_t flags) -{ -	void	*ptr; - -	ptr = kmem_alloc(size, flags); -	if (ptr) -		memset((char *)ptr, 0, (int)size); -	return ptr; -} - -void *  kmem_zalloc_large(size_t size, xfs_km_flags_t flags)  { +	unsigned noio_flag = 0;  	void	*ptr; +	gfp_t	lflags;  	ptr = kmem_zalloc(size, flags | KM_MAYFAIL);  	if (ptr)  		return ptr; -	return vzalloc(size); + +	/* +	 * __vmalloc() will allocate data pages and auxillary structures (e.g. +	 * pagetables) with GFP_KERNEL, yet we may be under GFP_NOFS context +	 * here. Hence we need to tell memory reclaim that we are in such a +	 * context via PF_MEMALLOC_NOIO to prevent memory reclaim re-entering +	 * the filesystem here and potentially deadlocking. +	 */ +	if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) +		noio_flag = memalloc_noio_save(); + +	lflags = kmem_flags_convert(flags); +	ptr = __vmalloc(size, lflags | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL); + +	if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS)) +		memalloc_noio_restore(noio_flag); + +	return ptr;  }  void @@ -128,14 +136,3 @@ kmem_zone_alloc(kmem_zone_t *zone, xfs_km_flags_t flags)  		congestion_wait(BLK_RW_ASYNC, HZ/50);  	} while (1);  } - -void * -kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) -{ -	void	*ptr; - -	ptr = kmem_zone_alloc(zone, flags); -	if (ptr) -		memset((char *)ptr, 0, kmem_cache_size(zone)); -	return ptr; -} diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 3a7371cab50..64db0e53ede 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -32,6 +32,7 @@ typedef unsigned __bitwise xfs_km_flags_t;  #define KM_NOSLEEP	((__force xfs_km_flags_t)0x0002u)  #define KM_NOFS		((__force xfs_km_flags_t)0x0004u)  #define KM_MAYFAIL	((__force xfs_km_flags_t)0x0008u) +#define KM_ZERO		((__force xfs_km_flags_t)0x0010u)  /*   * We use a special process flag to avoid recursive callbacks into @@ -43,7 +44,7 @@ kmem_flags_convert(xfs_km_flags_t flags)  {  	gfp_t	lflags; -	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL)); +	BUG_ON(flags & ~(KM_SLEEP|KM_NOSLEEP|KM_NOFS|KM_MAYFAIL|KM_ZERO));  	if (flags & KM_NOSLEEP) {  		lflags = GFP_ATOMIC | __GFP_NOWARN; @@ -52,11 +53,14 @@ kmem_flags_convert(xfs_km_flags_t flags)  		if ((current->flags & PF_FSTRANS) || (flags & KM_NOFS))  			lflags &= ~__GFP_FS;  	} + +	if (flags & KM_ZERO) +		lflags |= __GFP_ZERO; +  	return lflags;  }  extern void *kmem_alloc(size_t, xfs_km_flags_t); -extern void *kmem_zalloc(size_t, xfs_km_flags_t);  extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);  extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);  extern void  kmem_free(const void *); @@ -64,6 +68,12 @@ extern void  kmem_free(const void *);  extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); +static inline void * +kmem_zalloc(size_t size, xfs_km_flags_t flags) +{ +	return kmem_alloc(size, flags | KM_ZERO); +} +  /*   * Zone interfaces   */ @@ -102,6 +112,11 @@ kmem_zone_destroy(kmem_zone_t *zone)  }  extern void *kmem_zone_alloc(kmem_zone_t *, xfs_km_flags_t); -extern void *kmem_zone_zalloc(kmem_zone_t *, xfs_km_flags_t); + +static inline void * +kmem_zone_zalloc(kmem_zone_t *zone, xfs_km_flags_t flags) +{ +	return kmem_zone_alloc(zone, flags | KM_ZERO); +}  #endif /* __XFS_SUPPORT_KMEM_H__ */ diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index 0e2f37efedd..6888ad886ff 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -16,15 +16,15 @@   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */  #include "xfs.h" +#include "xfs_format.h"  #include "xfs_log_format.h"  #include "xfs_trans_resv.h" -#include "xfs_acl.h" -#include "xfs_attr.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h"  #include "xfs_ag.h"  #include "xfs_sb.h"  #include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_acl.h" +#include "xfs_attr.h"  #include "xfs_trace.h"  #include <linux/slab.h>  #include <linux/xattr.h> @@ -124,16 +124,12 @@ struct posix_acl *  xfs_get_acl(struct inode *inode, int type)  {  	struct xfs_inode *ip = XFS_I(inode); -	struct posix_acl *acl; +	struct posix_acl *acl = NULL;  	struct xfs_acl *xfs_acl;  	unsigned char *ea_name;  	int error;  	int len; -	acl = get_cached_acl(inode, type); -	if (acl != ACL_NOT_CACHED) -		return acl; -  	trace_xfs_get_acl(ip);  	switch (type) { @@ -164,10 +160,8 @@ xfs_get_acl(struct inode *inode, int type)  		 * cache entry, for any other error assume it is transient and  		 * leave the cache entry as ACL_NOT_CACHED.  		 */ -		if (error == -ENOATTR) { -			acl = NULL; +		if (error == -ENOATTR)  			goto out_update_cache; -		}  		goto out;  	} @@ -183,15 +177,12 @@ out:  }  STATIC int -xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl) +__xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)  {  	struct xfs_inode *ip = XFS_I(inode);  	unsigned char *ea_name;  	int error; -	if (S_ISLNK(inode->i_mode)) -		return -EOPNOTSUPP; -  	switch (type) {  	case ACL_TYPE_ACCESS:  		ea_name = SGI_ACL_FILE; @@ -282,131 +273,23 @@ posix_acl_default_exists(struct inode *inode)  	return xfs_acl_exists(inode, SGI_ACL_DEFAULT);  } -/* - * No need for i_mutex because the inode is not yet exposed to the VFS. - */ -int -xfs_inherit_acl(struct inode *inode, struct posix_acl *acl) -{ -	umode_t mode = inode->i_mode; -	int error = 0, inherit = 0; - -	if (S_ISDIR(inode->i_mode)) { -		error = xfs_set_acl(inode, ACL_TYPE_DEFAULT, acl); -		if (error) -			goto out; -	} - -	error = posix_acl_create(&acl, GFP_KERNEL, &mode); -	if (error < 0) -		return error; - -	/* -	 * If posix_acl_create returns a positive value we need to -	 * inherit a permission that can't be represented using the Unix -	 * mode bits and we actually need to set an ACL. -	 */ -	if (error > 0) -		inherit = 1; - -	error = xfs_set_mode(inode, mode); -	if (error) -		goto out; - -	if (inherit) -		error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); - -out: -	posix_acl_release(acl); -	return error; -} -  int -xfs_acl_chmod(struct inode *inode) +xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)  { -	struct posix_acl *acl; -	int error; - -	if (S_ISLNK(inode->i_mode)) -		return -EOPNOTSUPP; - -	acl = xfs_get_acl(inode, ACL_TYPE_ACCESS); -	if (IS_ERR(acl) || !acl) -		return PTR_ERR(acl); - -	error = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode); -	if (error) -		return error; - -	error = xfs_set_acl(inode, ACL_TYPE_ACCESS, acl); -	posix_acl_release(acl); -	return error; -} - -static int -xfs_xattr_acl_get(struct dentry *dentry, const char *name, -		void *value, size_t size, int type) -{ -	struct posix_acl *acl; -	int error; - -	acl = xfs_get_acl(dentry->d_inode, type); -	if (IS_ERR(acl)) -		return PTR_ERR(acl); -	if (acl == NULL) -		return -ENODATA; - -	error = posix_acl_to_xattr(&init_user_ns, acl, value, size); -	posix_acl_release(acl); - -	return error; -} - -static int -xfs_xattr_acl_set(struct dentry *dentry, const char *name, -		const void *value, size_t size, int flags, int type) -{ -	struct inode *inode = dentry->d_inode; -	struct posix_acl *acl = NULL;  	int error = 0; -	if (flags & XATTR_CREATE) -		return -EINVAL; -	if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) -		return value ? -EACCES : 0; -	if (!inode_owner_or_capable(inode)) -		return -EPERM; - -	if (!value) +	if (!acl)  		goto set_acl; -	acl = posix_acl_from_xattr(&init_user_ns, value, size); -	if (!acl) { -		/* -		 * acl_set_file(3) may request that we set default ACLs with -		 * zero length -- defend (gracefully) against that here. -		 */ -		goto out; -	} -	if (IS_ERR(acl)) { -		error = PTR_ERR(acl); -		goto out; -	} - -	error = posix_acl_valid(acl); -	if (error) -		goto out_release; - -	error = -EINVAL; +	error = -E2BIG;  	if (acl->a_count > XFS_ACL_MAX_ENTRIES(XFS_M(inode->i_sb))) -		goto out_release; +		return error;  	if (type == ACL_TYPE_ACCESS) {  		umode_t mode = inode->i_mode;  		error = posix_acl_equiv_mode(acl, &mode);  		if (error <= 0) { -			posix_acl_release(acl);  			acl = NULL;  			if (error < 0) @@ -415,27 +298,9 @@ xfs_xattr_acl_set(struct dentry *dentry, const char *name,  		error = xfs_set_mode(inode, mode);  		if (error) -			goto out_release; +			return error;  	}   set_acl: -	error = xfs_set_acl(inode, type, acl); - out_release: -	posix_acl_release(acl); - out: -	return error; +	return __xfs_set_acl(inode, type, acl);  } - -const struct xattr_handler xfs_xattr_acl_access_handler = { -	.prefix	= POSIX_ACL_XATTR_ACCESS, -	.flags	= ACL_TYPE_ACCESS, -	.get	= xfs_xattr_acl_get, -	.set	= xfs_xattr_acl_set, -}; - -const struct xattr_handler xfs_xattr_acl_default_handler = { -	.prefix	= POSIX_ACL_XATTR_DEFAULT, -	.flags	= ACL_TYPE_DEFAULT, -	.get	= xfs_xattr_acl_get, -	.set	= xfs_xattr_acl_set, -}; diff --git a/fs/xfs/xfs_acl.h b/fs/xfs/xfs_acl.h index 4016a567b83..5dc16374451 100644 --- a/fs/xfs/xfs_acl.h +++ b/fs/xfs/xfs_acl.h @@ -60,20 +60,15 @@ struct xfs_acl {  #ifdef CONFIG_XFS_POSIX_ACL  extern struct posix_acl *xfs_get_acl(struct inode *inode, int type); -extern int xfs_inherit_acl(struct inode *inode, struct posix_acl *default_acl); -extern int xfs_acl_chmod(struct inode *inode); +extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);  extern int posix_acl_access_exists(struct inode *inode);  extern int posix_acl_default_exists(struct inode *inode); - -extern const struct xattr_handler xfs_xattr_acl_access_handler; -extern const struct xattr_handler xfs_xattr_acl_default_handler;  #else  static inline struct posix_acl *xfs_get_acl(struct inode *inode, int type)  {  	return NULL;  } -# define xfs_inherit_acl(inode, default_acl)		0 -# define xfs_acl_chmod(inode)				0 +# define xfs_set_acl					NULL  # define posix_acl_access_exists(inode)			0  # define posix_acl_default_exists(inode)		0  #endif /* CONFIG_XFS_POSIX_ACL */ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 1cb740afd67..6e247a99f5d 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -89,6 +89,8 @@ typedef struct xfs_agf {  	/* structure must be padded to 64 bit alignment */  } xfs_agf_t; +#define XFS_AGF_CRC_OFF		offsetof(struct xfs_agf, agf_crc) +  #define	XFS_AGF_MAGICNUM	0x00000001  #define	XFS_AGF_VERSIONNUM	0x00000002  #define	XFS_AGF_SEQNO		0x00000004 @@ -128,8 +130,6 @@ typedef struct xfs_agf {  extern int xfs_read_agf(struct xfs_mount *mp, struct xfs_trans *tp,  			xfs_agnumber_t agno, int flags, struct xfs_buf **bpp); -extern const struct xfs_buf_ops xfs_agf_buf_ops; -  /*   * Size of the unlinked inode hash table in the agi.   */ @@ -160,28 +160,38 @@ typedef struct xfs_agi {  	 * still being referenced.  	 */  	__be32		agi_unlinked[XFS_AGI_UNLINKED_BUCKETS]; - +	/* +	 * This marks the end of logging region 1 and start of logging region 2. +	 */  	uuid_t		agi_uuid;	/* uuid of filesystem */  	__be32		agi_crc;	/* crc of agi sector */  	__be32		agi_pad32;  	__be64		agi_lsn;	/* last write sequence */ +	__be32		agi_free_root; /* root of the free inode btree */ +	__be32		agi_free_level;/* levels in free inode btree */ +  	/* structure must be padded to 64 bit alignment */  } xfs_agi_t; -#define	XFS_AGI_MAGICNUM	0x00000001 -#define	XFS_AGI_VERSIONNUM	0x00000002 -#define	XFS_AGI_SEQNO		0x00000004 -#define	XFS_AGI_LENGTH		0x00000008 -#define	XFS_AGI_COUNT		0x00000010 -#define	XFS_AGI_ROOT		0x00000020 -#define	XFS_AGI_LEVEL		0x00000040 -#define	XFS_AGI_FREECOUNT	0x00000080 -#define	XFS_AGI_NEWINO		0x00000100 -#define	XFS_AGI_DIRINO		0x00000200 -#define	XFS_AGI_UNLINKED	0x00000400 -#define	XFS_AGI_NUM_BITS	11 -#define	XFS_AGI_ALL_BITS	((1 << XFS_AGI_NUM_BITS) - 1) +#define XFS_AGI_CRC_OFF		offsetof(struct xfs_agi, agi_crc) + +#define	XFS_AGI_MAGICNUM	(1 << 0) +#define	XFS_AGI_VERSIONNUM	(1 << 1) +#define	XFS_AGI_SEQNO		(1 << 2) +#define	XFS_AGI_LENGTH		(1 << 3) +#define	XFS_AGI_COUNT		(1 << 4) +#define	XFS_AGI_ROOT		(1 << 5) +#define	XFS_AGI_LEVEL		(1 << 6) +#define	XFS_AGI_FREECOUNT	(1 << 7) +#define	XFS_AGI_NEWINO		(1 << 8) +#define	XFS_AGI_DIRINO		(1 << 9) +#define	XFS_AGI_UNLINKED	(1 << 10) +#define	XFS_AGI_NUM_BITS_R1	11	/* end of the 1st agi logging region */ +#define	XFS_AGI_ALL_BITS_R1	((1 << XFS_AGI_NUM_BITS_R1) - 1) +#define	XFS_AGI_FREE_ROOT	(1 << 11) +#define	XFS_AGI_FREE_LEVEL	(1 << 12) +#define	XFS_AGI_NUM_BITS_R2	13  /* disk block (xfs_daddr_t) in the AG */  #define XFS_AGI_DADDR(mp)	((xfs_daddr_t)(2 << (mp)->m_sectbb_log)) @@ -191,8 +201,6 @@ typedef struct xfs_agi {  extern int xfs_read_agi(struct xfs_mount *mp, struct xfs_trans *tp,  				xfs_agnumber_t agno, struct xfs_buf **bpp); -extern const struct xfs_buf_ops xfs_agi_buf_ops; -  /*   * The third a.g. block contains the a.g. freelist, an array   * of block pointers to blocks owned by the allocation btree code. @@ -226,6 +234,8 @@ typedef struct xfs_agfl {  	__be32		agfl_bno[];	/* actually XFS_AGFL_SIZE(mp) */  } xfs_agfl_t; +#define XFS_AGFL_CRC_OFF	offsetof(struct xfs_agfl, agfl_crc) +  /*   * tags for inode radix tree   */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 5a1393f5e02..d43813267a8 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -17,25 +17,25 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_btree.h" +#include "xfs_alloc_btree.h"  #include "xfs_alloc.h"  #include "xfs_extent_busy.h"  #include "xfs_error.h"  #include "xfs_cksum.h"  #include "xfs_trace.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h" +#include "xfs_log.h"  struct workqueue_struct *xfs_alloc_wq; @@ -257,16 +257,14 @@ xfs_alloc_fix_len(  	k = rlen % args->prod;  	if (k == args->mod)  		return; -	if (k > args->mod) { -		if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) -			return; -	} else { -		if ((int)(rlen = rlen - args->prod - (args->mod - k)) < -		    (int)args->minlen) -			return; -	} -	ASSERT(rlen >= args->minlen); -	ASSERT(rlen <= args->maxlen); +	if (k > args->mod) +		rlen = rlen - (k - args->mod); +	else +		rlen = rlen - args->prod + (args->mod - k); +	if ((int)rlen < (int)args->minlen) +		return; +	ASSERT(rlen >= args->minlen && rlen <= args->maxlen); +	ASSERT(rlen % args->prod == args->mod);  	args->len = rlen;  } @@ -474,7 +472,6 @@ xfs_agfl_read_verify(  	struct xfs_buf	*bp)  {  	struct xfs_mount *mp = bp->b_target->bt_mount; -	int		agfl_ok = 1;  	/*  	 * There is no verification of non-crc AGFLs because mkfs does not @@ -485,15 +482,13 @@ xfs_agfl_read_verify(  	if (!xfs_sb_version_hascrc(&mp->m_sb))  		return; -	agfl_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -				   offsetof(struct xfs_agfl, agfl_crc)); - -	agfl_ok = agfl_ok && xfs_agfl_verify(bp); - -	if (!agfl_ok) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_agfl_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -508,16 +503,15 @@ xfs_agfl_write_verify(  		return;  	if (!xfs_agfl_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	}  	if (bip)  		XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 offsetof(struct xfs_agfl, agfl_crc)); +	xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF);  }  const struct xfs_buf_ops xfs_agfl_buf_ops = { @@ -545,7 +539,6 @@ xfs_alloc_read_agfl(  			XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops);  	if (error)  		return error; -	ASSERT(!xfs_buf_geterror(bp));  	xfs_buf_set_ref(bp, XFS_AGFL_REF);  	*bpp = bp;  	return 0; @@ -2238,19 +2231,17 @@ xfs_agf_read_verify(  	struct xfs_buf	*bp)  {  	struct xfs_mount *mp = bp->b_target->bt_mount; -	int		agf_ok = 1; - -	if (xfs_sb_version_hascrc(&mp->m_sb)) -		agf_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					  offsetof(struct xfs_agf, agf_crc)); - -	agf_ok = agf_ok && xfs_agf_verify(mp, bp); -	if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, -			XFS_RANDOM_ALLOC_READ_AGF))) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (xfs_sb_version_hascrc(&mp->m_sb) && +	    !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, +				XFS_ERRTAG_ALLOC_READ_AGF, +				XFS_RANDOM_ALLOC_READ_AGF))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -2261,8 +2252,8 @@ xfs_agf_write_verify(  	struct xfs_buf_log_item	*bip = bp->b_fspriv;  	if (!xfs_agf_verify(mp, bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -2272,8 +2263,7 @@ xfs_agf_write_verify(  	if (bip)  		XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 offsetof(struct xfs_agf, agf_crc)); +	xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF);  }  const struct xfs_buf_ops xfs_agf_buf_ops = { @@ -2294,6 +2284,8 @@ xfs_read_agf(  {  	int		error; +	trace_xfs_read_agf(mp, agno); +  	ASSERT(agno != NULLAGNUMBER);  	error = xfs_trans_read_buf(  			mp, tp, mp->m_ddev_targp, @@ -2324,8 +2316,9 @@ xfs_alloc_read_agf(  	struct xfs_perag	*pag;		/* per allocation group data */  	int			error; -	ASSERT(agno != NULLAGNUMBER); +	trace_xfs_alloc_read_agf(mp, agno); +	ASSERT(agno != NULLAGNUMBER);  	error = xfs_read_agf(mp, tp, agno,  			(flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0,  			bpp); diff --git a/fs/xfs/xfs_alloc.h b/fs/xfs/xfs_alloc.h index 99d0a610155..feacb061bab 100644 --- a/fs/xfs/xfs_alloc.h +++ b/fs/xfs/xfs_alloc.h @@ -231,7 +231,4 @@ xfs_alloc_get_rec(  	xfs_extlen_t		*len,	/* output: length of extent */  	int			*stat);	/* output: success/failure */ -extern const struct xfs_buf_ops xfs_agf_buf_ops; -extern const struct xfs_buf_ops xfs_agfl_buf_ops; -  #endif	/* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/xfs_alloc_btree.c b/fs/xfs/xfs_alloc_btree.c index cafc90251d1..8358f1ded94 100644 --- a/fs/xfs/xfs_alloc_btree.c +++ b/fs/xfs/xfs_alloc_btree.c @@ -17,23 +17,21 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h"  #include "xfs_btree.h" +#include "xfs_alloc_btree.h"  #include "xfs_alloc.h"  #include "xfs_extent_busy.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_cksum.h" +#include "xfs_trans.h"  STATIC struct xfs_btree_cur * @@ -72,7 +70,6 @@ xfs_allocbt_alloc_block(  	struct xfs_btree_cur	*cur,  	union xfs_btree_ptr	*start,  	union xfs_btree_ptr	*new, -	int			length,  	int			*stat)  {  	int			error; @@ -357,12 +354,14 @@ static void  xfs_allocbt_read_verify(  	struct xfs_buf	*bp)  { -	if (!(xfs_btree_sblock_verify_crc(bp) && -	      xfs_allocbt_verify(bp))) { -		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     bp->b_target->bt_mount, bp->b_addr); +	if (!xfs_btree_sblock_verify_crc(bp)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_allocbt_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); + +	if (bp->b_error) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		xfs_verifier_error(bp);  	}  } @@ -372,9 +371,9 @@ xfs_allocbt_write_verify(  {  	if (!xfs_allocbt_verify(bp)) {  		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     bp->b_target->bt_mount, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp); +		return;  	}  	xfs_btree_sblock_calc_crc(bp); diff --git a/fs/xfs/xfs_alloc_btree.h b/fs/xfs/xfs_alloc_btree.h index e3a3f742419..45e189e7e81 100644 --- a/fs/xfs/xfs_alloc_btree.h +++ b/fs/xfs/xfs_alloc_btree.h @@ -27,39 +27,6 @@ struct xfs_btree_cur;  struct xfs_mount;  /* - * There are two on-disk btrees, one sorted by blockno and one sorted - * by blockcount and blockno.  All blocks look the same to make the code - * simpler; if we have time later, we'll make the optimizations. - */ -#define	XFS_ABTB_MAGIC		0x41425442	/* 'ABTB' for bno tree */ -#define	XFS_ABTB_CRC_MAGIC	0x41423342	/* 'AB3B' */ -#define	XFS_ABTC_MAGIC		0x41425443	/* 'ABTC' for cnt tree */ -#define	XFS_ABTC_CRC_MAGIC	0x41423343	/* 'AB3C' */ - -/* - * Data record/key structure - */ -typedef struct xfs_alloc_rec { -	__be32		ar_startblock;	/* starting block number */ -	__be32		ar_blockcount;	/* count of free blocks */ -} xfs_alloc_rec_t, xfs_alloc_key_t; - -typedef struct xfs_alloc_rec_incore { -	xfs_agblock_t	ar_startblock;	/* starting block number */ -	xfs_extlen_t	ar_blockcount;	/* count of free blocks */ -} xfs_alloc_rec_incore_t; - -/* btree pointer type */ -typedef __be32 xfs_alloc_ptr_t; - -/* - * Block numbers in the AG: - * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. - */ -#define	XFS_BNO_BLOCK(mp)	((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) -#define	XFS_CNT_BLOCK(mp)	((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) - -/*   * Btree block header size depends on a superblock flag.   */  #define XFS_ALLOC_BLOCK_LEN(mp) \ @@ -95,6 +62,4 @@ extern struct xfs_btree_cur *xfs_allocbt_init_cursor(struct xfs_mount *,  		xfs_agnumber_t, xfs_btnum_t);  extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); -extern const struct xfs_buf_ops xfs_allocbt_buf_ops; -  #endif	/* __XFS_ALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index e51e581454e..faaf716e208 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -16,14 +16,15 @@   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */  #include "xfs.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_trans.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_alloc.h"  #include "xfs_error.h" @@ -31,6 +32,8 @@  #include "xfs_trace.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h"  #include <linux/aio.h>  #include <linux/gfp.h>  #include <linux/mpage.h> @@ -333,7 +336,7 @@ xfs_map_blocks(  	if (type == XFS_IO_DELALLOC &&  	    (!nimaps || isnullstartblock(imap->br_startblock))) { -		error = xfs_iomap_write_allocate(ip, offset, count, imap); +		error = xfs_iomap_write_allocate(ip, offset, imap);  		if (!error)  			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);  		return -XFS_ERROR(error); @@ -404,7 +407,7 @@ xfs_alloc_ioend_bio(  	struct bio		*bio = bio_alloc(GFP_NOIO, nvecs);  	ASSERT(bio->bi_private == NULL); -	bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); +	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);  	bio->bi_bdev = bh->b_bdev;  	return bio;  } @@ -629,38 +632,46 @@ xfs_map_at_offset(  }  /* - * Test if a given page is suitable for writing as part of an unwritten - * or delayed allocate extent. + * Test if a given page contains at least one buffer of a given @type. + * If @check_all_buffers is true, then we walk all the buffers in the page to + * try to find one of the type passed in. If it is not set, then the caller only + * needs to check the first buffer on the page for a match.   */ -STATIC int +STATIC bool  xfs_check_page_type(  	struct page		*page, -	unsigned int		type) +	unsigned int		type, +	bool			check_all_buffers)  { -	if (PageWriteback(page)) -		return 0; +	struct buffer_head	*bh; +	struct buffer_head	*head; -	if (page->mapping && page_has_buffers(page)) { -		struct buffer_head	*bh, *head; -		int			acceptable = 0; +	if (PageWriteback(page)) +		return false; +	if (!page->mapping) +		return false; +	if (!page_has_buffers(page)) +		return false; -		bh = head = page_buffers(page); -		do { -			if (buffer_unwritten(bh)) -				acceptable += (type == XFS_IO_UNWRITTEN); -			else if (buffer_delay(bh)) -				acceptable += (type == XFS_IO_DELALLOC); -			else if (buffer_dirty(bh) && buffer_mapped(bh)) -				acceptable += (type == XFS_IO_OVERWRITE); -			else -				break; -		} while ((bh = bh->b_this_page) != head); +	bh = head = page_buffers(page); +	do { +		if (buffer_unwritten(bh)) { +			if (type == XFS_IO_UNWRITTEN) +				return true; +		} else if (buffer_delay(bh)) { +			if (type == XFS_IO_DELALLOC) +				return true; +		} else if (buffer_dirty(bh) && buffer_mapped(bh)) { +			if (type == XFS_IO_OVERWRITE) +				return true; +		} -		if (acceptable) -			return 1; -	} +		/* If we are only checking the first buffer, we are done now. */ +		if (!check_all_buffers) +			break; +	} while ((bh = bh->b_this_page) != head); -	return 0; +	return false;  }  /* @@ -694,7 +705,7 @@ xfs_convert_page(  		goto fail_unlock_page;  	if (page->mapping != inode->i_mapping)  		goto fail_unlock_page; -	if (!xfs_check_page_type(page, (*ioendp)->io_type)) +	if (!xfs_check_page_type(page, (*ioendp)->io_type, false))  		goto fail_unlock_page;  	/* @@ -739,6 +750,15 @@ xfs_convert_page(  	p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;  	page_dirty = p_offset / len; +	/* +	 * The moment we find a buffer that doesn't match our current type +	 * specification or can't be written, abort the loop and start +	 * writeback. As per the above xfs_imap_valid() check, only +	 * xfs_vm_writepage() can handle partial page writeback fully - we are +	 * limited here to the buffers that are contiguous with the current +	 * ioend, and hence a buffer we can't write breaks that contiguity and +	 * we have to defer the rest of the IO to xfs_vm_writepage(). +	 */  	bh = head = page_buffers(page);  	do {  		if (offset >= end_offset) @@ -747,7 +767,7 @@ xfs_convert_page(  			uptodate = 0;  		if (!(PageUptodate(page) || buffer_uptodate(bh))) {  			done = 1; -			continue; +			break;  		}  		if (buffer_unwritten(bh) || buffer_delay(bh) || @@ -759,10 +779,11 @@ xfs_convert_page(  			else  				type = XFS_IO_OVERWRITE; -			if (!xfs_imap_valid(inode, imap, offset)) { -				done = 1; -				continue; -			} +			/* +			 * imap should always be valid because of the above +			 * partial page end_offset check on the imap. +			 */ +			ASSERT(xfs_imap_valid(inode, imap, offset));  			lock_buffer(bh);  			if (type != XFS_IO_OVERWRITE) @@ -774,6 +795,7 @@ xfs_convert_page(  			count++;  		} else {  			done = 1; +			break;  		}  	} while (offset += len, (bh = bh->b_this_page) != head); @@ -865,7 +887,7 @@ xfs_aops_discard_page(  	struct buffer_head	*bh, *head;  	loff_t			offset = page_offset(page); -	if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) +	if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))  		goto out_invalidate;  	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) @@ -953,14 +975,39 @@ xfs_vm_writepage(  	 * Given that we do not allow direct reclaim to call us, we should  	 * never be called while in a filesystem transaction.  	 */ -	if (WARN_ON(current->flags & PF_FSTRANS)) +	if (WARN_ON_ONCE(current->flags & PF_FSTRANS))  		goto redirty;  	/* Is this page beyond the end of the file? */  	offset = i_size_read(inode);  	end_index = offset >> PAGE_CACHE_SHIFT;  	last_index = (offset - 1) >> PAGE_CACHE_SHIFT; -	if (page->index >= end_index) { + +	/* +	 * The page index is less than the end_index, adjust the end_offset +	 * to the highest offset that this page should represent. +	 * ----------------------------------------------------- +	 * |			file mapping	       | <EOF> | +	 * ----------------------------------------------------- +	 * | Page ... | Page N-2 | Page N-1 |  Page N  |       | +	 * ^--------------------------------^----------|-------- +	 * |     desired writeback range    |      see else    | +	 * ---------------------------------^------------------| +	 */ +	if (page->index < end_index) +		end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT; +	else { +		/* +		 * Check whether the page to write out is beyond or straddles +		 * i_size or not. +		 * ------------------------------------------------------- +		 * |		file mapping		        | <EOF>  | +		 * ------------------------------------------------------- +		 * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond | +		 * ^--------------------------------^-----------|--------- +		 * |				    |      Straddles     | +		 * ---------------------------------^-----------|--------| +		 */  		unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);  		/* @@ -968,24 +1015,36 @@ xfs_vm_writepage(  		 * truncate operation that is in progress. We must redirty the  		 * page so that reclaim stops reclaiming it. Otherwise  		 * xfs_vm_releasepage() is called on it and gets confused. +		 * +		 * Note that the end_index is unsigned long, it would overflow +		 * if the given offset is greater than 16TB on 32-bit system +		 * and if we do check the page is fully outside i_size or not +		 * via "if (page->index >= end_index + 1)" as "end_index + 1" +		 * will be evaluated to 0.  Hence this page will be redirtied +		 * and be written out repeatedly which would result in an +		 * infinite loop, the user program that perform this operation +		 * will hang.  Instead, we can verify this situation by checking +		 * if the page to write is totally beyond the i_size or if it's +		 * offset is just equal to the EOF.  		 */ -		if (page->index >= end_index + 1 || offset_into_page == 0) +		if (page->index > end_index || +		    (page->index == end_index && offset_into_page == 0))  			goto redirty;  		/*  		 * The page straddles i_size.  It must be zeroed out on each  		 * and every writepage invocation because it may be mmapped.  		 * "A file is mapped in multiples of the page size.  For a file -		 * that is not a multiple of the  page size, the remaining +		 * that is not a multiple of the page size, the remaining  		 * memory is zeroed when mapped, and writes to that region are  		 * not written out to the file."  		 */  		zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); + +		/* Adjust the end_offset to the end of file */ +		end_offset = offset;  	} -	end_offset = min_t(unsigned long long, -			(xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, -			offset);  	len = 1 << inode->i_blkbits;  	bh = head = page_buffers(page); @@ -1166,9 +1225,9 @@ xfs_vm_releasepage(  	xfs_count_page_state(page, &delalloc, &unwritten); -	if (WARN_ON(delalloc)) +	if (WARN_ON_ONCE(delalloc))  		return 0; -	if (WARN_ON(unwritten)) +	if (WARN_ON_ONCE(unwritten))  		return 0;  	return try_to_free_buffers(page); @@ -1214,7 +1273,7 @@ __xfs_get_blocks(  		lockmode = XFS_ILOCK_EXCL;  		xfs_ilock(ip, lockmode);  	} else { -		lockmode = xfs_ilock_map_shared(ip); +		lockmode = xfs_ilock_data_map_shared(ip);  	}  	ASSERT(offset <= mp->m_super->s_maxbytes); @@ -1322,6 +1381,14 @@ __xfs_get_blocks(  	/*  	 * If this is O_DIRECT or the mpage code calling tell them how large  	 * the mapping is, so that we can avoid repeated get_blocks calls. +	 * +	 * If the mapping spans EOF, then we have to break the mapping up as the +	 * mapping for blocks beyond EOF must be marked new so that sub block +	 * regions can be correctly zeroed. We can't do this for mappings within +	 * EOF unless the mapping was just allocated or is unwritten, otherwise +	 * the callers would overwrite existing data with zeros. Hence we have +	 * to split the mapping into a range up to and including EOF, and a +	 * second mapping for beyond EOF.  	 */  	if (direct || size > (1 << inode->i_blkbits)) {  		xfs_off_t		mapping_size; @@ -1332,6 +1399,12 @@ __xfs_get_blocks(  		ASSERT(mapping_size > 0);  		if (mapping_size > size)  			mapping_size = size; +		if (offset < i_size_read(inode) && +		    offset + mapping_size >= i_size_read(inode)) { +			/* limit mapping to block that spans EOF */ +			mapping_size = roundup_64(i_size_read(inode) - offset, +						  1 << inode->i_blkbits); +		}  		if (mapping_size > LONG_MAX)  			mapping_size = LONG_MAX; @@ -1413,9 +1486,8 @@ STATIC ssize_t  xfs_vm_direct_IO(  	int			rw,  	struct kiocb		*iocb, -	const struct iovec	*iov, -	loff_t			offset, -	unsigned long		nr_segs) +	struct iov_iter		*iter, +	loff_t			offset)  {  	struct inode		*inode = iocb->ki_filp->f_mapping->host;  	struct block_device	*bdev = xfs_find_bdev_for_inode(inode); @@ -1423,7 +1495,7 @@ xfs_vm_direct_IO(  	ssize_t			ret;  	if (rw & WRITE) { -		size_t size = iov_length(iov, nr_segs); +		size_t size = iov_iter_count(iter);  		/*  		 * We cannot preallocate a size update transaction here as we @@ -1435,16 +1507,15 @@ xfs_vm_direct_IO(  		if (offset + size > XFS_I(inode)->i_d.di_size)  			ioend->io_isdirect = 1; -		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, -					    offset, nr_segs, -					    xfs_get_blocks_direct, -					    xfs_end_io_direct_write, NULL, 0); +		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, +					    offset, xfs_get_blocks_direct, +					    xfs_end_io_direct_write, NULL, +					    DIO_ASYNC_EXTEND);  		if (ret != -EIOCBQUEUED && iocb->private)  			goto out_destroy_ioend;  	} else { -		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, -					    offset, nr_segs, -					    xfs_get_blocks_direct, +		ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iter, +					    offset, xfs_get_blocks_direct,  					    NULL, NULL, 0);  	} @@ -1543,6 +1614,16 @@ xfs_vm_write_failed(  		xfs_vm_kill_delalloc_range(inode, block_offset,  					   block_offset + bh->b_size); + +		/* +		 * This buffer does not contain data anymore. make sure anyone +		 * who finds it knows that for certain. +		 */ +		clear_buffer_delay(bh); +		clear_buffer_uptodate(bh); +		clear_buffer_mapped(bh); +		clear_buffer_new(bh); +		clear_buffer_dirty(bh);  	}  } @@ -1569,20 +1650,28 @@ xfs_vm_write_begin(  	ASSERT(len <= PAGE_CACHE_SIZE); -	page = grab_cache_page_write_begin(mapping, index, -					   flags | AOP_FLAG_NOFS); +	page = grab_cache_page_write_begin(mapping, index, flags);  	if (!page)  		return -ENOMEM;  	status = __block_write_begin(page, pos, len, xfs_get_blocks);  	if (unlikely(status)) {  		struct inode	*inode = mapping->host; +		size_t		isize = i_size_read(inode);  		xfs_vm_write_failed(inode, page, pos, len);  		unlock_page(page); -		if (pos + len > i_size_read(inode)) -			truncate_pagecache(inode, i_size_read(inode)); +		/* +		 * If the write is beyond EOF, we only want to kill blocks +		 * allocated in this write, not blocks that were previously +		 * written successfully. +		 */ +		if (pos + len > isize) { +			ssize_t start = max_t(ssize_t, pos, isize); + +			truncate_pagecache_range(inode, start, pos + len); +		}  		page_cache_release(page);  		page = NULL; @@ -1593,9 +1682,12 @@ xfs_vm_write_begin(  }  /* - * On failure, we only need to kill delalloc blocks beyond EOF because they - * will never be written. For blocks within EOF, generic_write_end() zeros them - * so they are safe to leave alone and be written with all the other valid data. + * On failure, we only need to kill delalloc blocks beyond EOF in the range of + * this specific write because they will never be written. Previous writes + * beyond EOF where block allocation succeeded do not need to be trashed, so + * only new blocks from this write should be trashed. For blocks within + * EOF, generic_write_end() zeros them so they are safe to leave alone and be + * written with all the other valid data.   */  STATIC int  xfs_vm_write_end( @@ -1618,8 +1710,11 @@ xfs_vm_write_end(  		loff_t		to = pos + len;  		if (to > isize) { -			truncate_pagecache(inode, isize); +			/* only kill blocks in this write beyond EOF */ +			if (pos > isize) +				isize = pos;  			xfs_vm_kill_delalloc_range(inode, isize, to); +			truncate_pagecache_range(inode, isize, to);  		}  	}  	return ret; diff --git a/fs/xfs/xfs_attr.c b/fs/xfs/xfs_attr.c index ddcf2267ffa..bfe36fc2cdc 100644 --- a/fs/xfs/xfs_attr.c +++ b/fs/xfs/xfs_attr.c @@ -17,23 +17,24 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h"  #include "xfs_attr_sf.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_alloc.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h"  #include "xfs_attr.h"  #include "xfs_attr_leaf.h"  #include "xfs_attr_remote.h" @@ -41,6 +42,7 @@  #include "xfs_quota.h"  #include "xfs_trans_space.h"  #include "xfs_trace.h" +#include "xfs_dinode.h"  /*   * xfs_attr.c @@ -75,17 +77,27 @@ STATIC int xfs_attr_refillstate(xfs_da_state_t *state);  STATIC int -xfs_attr_name_to_xname( -	struct xfs_name	*xname, -	const unsigned char *aname) +xfs_attr_args_init( +	struct xfs_da_args	*args, +	struct xfs_inode	*dp, +	const unsigned char	*name, +	int			flags)  { -	if (!aname) + +	if (!name)  		return EINVAL; -	xname->name = aname; -	xname->len = strlen((char *)aname); -	if (xname->len >= MAXNAMELEN) + +	memset(args, 0, sizeof(*args)); +	args->geo = dp->i_mount->m_attr_geo; +	args->whichfork = XFS_ATTR_FORK; +	args->dp = dp; +	args->flags = flags; +	args->name = name; +	args->namelen = strlen((const char *)name); +	if (args->namelen >= MAXNAMELEN)  		return EFAULT;		/* match IRIX behaviour */ +	args->hashval = xfs_da_hashname(args->name, args->namelen);  	return 0;  } @@ -104,78 +116,46 @@ xfs_inode_hasattr(   * Overall external interface routines.   *========================================================================*/ -STATIC int -xfs_attr_get_int( +int +xfs_attr_get(  	struct xfs_inode	*ip, -	struct xfs_name		*name, +	const unsigned char	*name,  	unsigned char		*value,  	int			*valuelenp,  	int			flags)  { -	xfs_da_args_t   args; -	int             error; +	struct xfs_da_args	args; +	uint			lock_mode; +	int			error; + +	XFS_STATS_INC(xs_attr_get); + +	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) +		return EIO;  	if (!xfs_inode_hasattr(ip))  		return ENOATTR; -	/* -	 * Fill in the arg structure for this request. -	 */ -	memset((char *)&args, 0, sizeof(args)); -	args.name = name->name; -	args.namelen = name->len; +	error = xfs_attr_args_init(&args, ip, name, flags); +	if (error) +		return error; +  	args.value = value;  	args.valuelen = *valuelenp; -	args.flags = flags; -	args.hashval = xfs_da_hashname(args.name, args.namelen); -	args.dp = ip; -	args.whichfork = XFS_ATTR_FORK; -	/* -	 * Decide on what work routines to call based on the inode size. -	 */ -	if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { +	lock_mode = xfs_ilock_attr_map_shared(ip); +	if (!xfs_inode_hasattr(ip)) +		error = ENOATTR; +	else if (ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)  		error = xfs_attr_shortform_getvalue(&args); -	} else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK)) { +	else if (xfs_bmap_one_block(ip, XFS_ATTR_FORK))  		error = xfs_attr_leaf_get(&args); -	} else { +	else  		error = xfs_attr_node_get(&args); -	} +	xfs_iunlock(ip, lock_mode); -	/* -	 * Return the number of bytes in the value to the caller. -	 */  	*valuelenp = args.valuelen; - -	if (error == EEXIST) -		error = 0; -	return(error); -} - -int -xfs_attr_get( -	xfs_inode_t	*ip, -	const unsigned char *name, -	unsigned char	*value, -	int		*valuelenp, -	int		flags) -{ -	int		error; -	struct xfs_name	xname; - -	XFS_STATS_INC(xs_attr_get); - -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) -		return(EIO); - -	error = xfs_attr_name_to_xname(&xname, name); -	if (error) -		return error; - -	xfs_ilock(ip, XFS_ILOCK_SHARED); -	error = xfs_attr_get_int(ip, &xname, value, valuelenp, flags); -	xfs_iunlock(ip, XFS_ILOCK_SHARED); -	return(error); +	return error == EEXIST ? 0 : error;  }  /* @@ -183,12 +163,10 @@ xfs_attr_get(   */  STATIC int  xfs_attr_calc_size( -	struct xfs_inode 	*ip, -	int			namelen, -	int			valuelen, +	struct xfs_da_args	*args,  	int			*local)  { -	struct xfs_mount 	*mp = ip->i_mount; +	struct xfs_mount	*mp = args->dp->i_mount;  	int			size;  	int			nblks; @@ -196,12 +174,10 @@ xfs_attr_calc_size(  	 * Determine space new attribute will use, and if it would be  	 * "local" or "remote" (note: local != inline).  	 */ -	size = xfs_attr_leaf_newentsize(namelen, valuelen, -					mp->m_sb.sb_blocksize, local); - +	size = xfs_attr_leaf_newentsize(args, local);  	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);  	if (*local) { -		if (size > (mp->m_sb.sb_blocksize >> 1)) { +		if (size > (args->geo->blksize / 2)) {  			/* Double split possible */  			nblks *= 2;  		} @@ -210,7 +186,7 @@ xfs_attr_calc_size(  		 * Out of line attribute, cannot double split, but  		 * make room for the attribute value itself.  		 */ -		uint	dblocks = XFS_B_TO_FSB(mp, valuelen); +		uint	dblocks = xfs_attr3_rmt_blocks(mp, args->valuelen);  		nblks += dblocks;  		nblks += XFS_NEXTENTADD_SPACE_RES(mp, dblocks, XFS_ATTR_FORK);  	} @@ -218,26 +194,38 @@ xfs_attr_calc_size(  	return nblks;  } -STATIC int -xfs_attr_set_int( -	struct xfs_inode *dp, -	struct xfs_name	*name, -	unsigned char	*value, -	int		valuelen, -	int		flags) +int +xfs_attr_set( +	struct xfs_inode	*dp, +	const unsigned char	*name, +	unsigned char		*value, +	int			valuelen, +	int			flags)  { -	xfs_da_args_t		args; -	xfs_fsblock_t		firstblock; -	xfs_bmap_free_t		flist; -	int			error, err2, committed;  	struct xfs_mount	*mp = dp->i_mount; +	struct xfs_da_args	args; +	struct xfs_bmap_free	flist;  	struct xfs_trans_res	tres; +	xfs_fsblock_t		firstblock;  	int			rsvd = (flags & ATTR_ROOT) != 0; -	int			local; +	int			error, err2, committed, local; + +	XFS_STATS_INC(xs_attr_set); + +	if (XFS_FORCED_SHUTDOWN(dp->i_mount)) +		return EIO; + +	error = xfs_attr_args_init(&args, dp, name, flags); +	if (error) +		return error; + +	args.value = value; +	args.valuelen = valuelen; +	args.firstblock = &firstblock; +	args.flist = &flist; +	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; +	args.total = xfs_attr_calc_size(&args, &local); -	/* -	 * Attach the dquots to the inode. -	 */  	error = xfs_qm_dqattach(dp, 0);  	if (error)  		return error; @@ -248,32 +236,14 @@ xfs_attr_set_int(  	 */  	if (XFS_IFORK_Q(dp) == 0) {  		int sf_size = sizeof(xfs_attr_sf_hdr_t) + -			      XFS_ATTR_SF_ENTSIZE_BYNAME(name->len, valuelen); +			XFS_ATTR_SF_ENTSIZE_BYNAME(args.namelen, valuelen); -		if ((error = xfs_bmap_add_attrfork(dp, sf_size, rsvd))) -			return(error); +		error = xfs_bmap_add_attrfork(dp, sf_size, rsvd); +		if (error) +			return error;  	}  	/* -	 * Fill in the arg structure for this request. -	 */ -	memset((char *)&args, 0, sizeof(args)); -	args.name = name->name; -	args.namelen = name->len; -	args.value = value; -	args.valuelen = valuelen; -	args.flags = flags; -	args.hashval = xfs_da_hashname(args.name, args.namelen); -	args.dp = dp; -	args.firstblock = &firstblock; -	args.flist = &flist; -	args.whichfork = XFS_ATTR_FORK; -	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - -	/* Size is now blocks for attribute data */ -	args.total = xfs_attr_calc_size(dp, name->len, valuelen, &local); - -	/*  	 * Start our first transaction of the day.  	 *  	 * All future transactions during this code must be "chained" off @@ -300,7 +270,7 @@ xfs_attr_set_int(  	error = xfs_trans_reserve(args.trans, &tres, args.total, 0);  	if (error) {  		xfs_trans_cancel(args.trans, 0); -		return(error); +		return error;  	}  	xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -310,7 +280,7 @@ xfs_attr_set_int(  	if (error) {  		xfs_iunlock(dp, XFS_ILOCK_EXCL);  		xfs_trans_cancel(args.trans, XFS_TRANS_RELEASE_LOG_RES); -		return (error); +		return error;  	}  	xfs_trans_ijoin(args.trans, dp, 0); @@ -319,9 +289,9 @@ xfs_attr_set_int(  	 * If the attribute list is non-existent or a shortform list,  	 * upgrade it to a single-leaf-block attribute list.  	 */ -	if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) || -	    ((dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS) && -	     (dp->i_d.di_anextents == 0))) { +	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL || +	    (dp->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS && +	     dp->i_d.di_anextents == 0)) {  		/*  		 * Build initial attribute list (if required). @@ -346,9 +316,8 @@ xfs_attr_set_int(  			 * the transaction goes to disk before returning  			 * to the user.  			 */ -			if (mp->m_flags & XFS_MOUNT_WSYNC) { +			if (mp->m_flags & XFS_MOUNT_WSYNC)  				xfs_trans_set_sync(args.trans); -			}  			if (!error && (flags & ATTR_KERNOTIME) == 0) {  				xfs_trans_ichgtime(args.trans, dp, @@ -358,7 +327,7 @@ xfs_attr_set_int(  						 XFS_TRANS_RELEASE_LOG_RES);  			xfs_iunlock(dp, XFS_ILOCK_EXCL); -			return(error == 0 ? err2 : error); +			return error ? error : err2;  		}  		/* @@ -396,22 +365,19 @@ xfs_attr_set_int(  	} -	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) { +	if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))  		error = xfs_attr_leaf_addname(&args); -	} else { +	else  		error = xfs_attr_node_addname(&args); -	} -	if (error) { +	if (error)  		goto out; -	}  	/*  	 * If this is a synchronous mount, make sure that the  	 * transaction goes to disk before returning to the user.  	 */ -	if (mp->m_flags & XFS_MOUNT_WSYNC) { +	if (mp->m_flags & XFS_MOUNT_WSYNC)  		xfs_trans_set_sync(args.trans); -	}  	if ((flags & ATTR_KERNOTIME) == 0)  		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); @@ -423,65 +389,47 @@ xfs_attr_set_int(  	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);  	xfs_iunlock(dp, XFS_ILOCK_EXCL); -	return(error); +	return error;  out: -	if (args.trans) +	if (args.trans) {  		xfs_trans_cancel(args.trans,  			XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); +	}  	xfs_iunlock(dp, XFS_ILOCK_EXCL); -	return(error); +	return error;  } +/* + * Generic handler routine to remove a name from an attribute list. + * Transitions attribute list from Btree to shortform as necessary. + */  int -xfs_attr_set( -	xfs_inode_t	*dp, -	const unsigned char *name, -	unsigned char	*value, -	int		valuelen, -	int		flags) +xfs_attr_remove( +	struct xfs_inode	*dp, +	const unsigned char	*name, +	int			flags)  { -	int             error; -	struct xfs_name	xname; +	struct xfs_mount	*mp = dp->i_mount; +	struct xfs_da_args	args; +	struct xfs_bmap_free	flist; +	xfs_fsblock_t		firstblock; +	int			error; -	XFS_STATS_INC(xs_attr_set); +	XFS_STATS_INC(xs_attr_remove);  	if (XFS_FORCED_SHUTDOWN(dp->i_mount)) -		return (EIO); +		return EIO; + +	if (!xfs_inode_hasattr(dp)) +		return ENOATTR; -	error = xfs_attr_name_to_xname(&xname, name); +	error = xfs_attr_args_init(&args, dp, name, flags);  	if (error)  		return error; -	return xfs_attr_set_int(dp, &xname, value, valuelen, flags); -} - -/* - * Generic handler routine to remove a name from an attribute list. - * Transitions attribute list from Btree to shortform as necessary. - */ -STATIC int -xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags) -{ -	xfs_da_args_t	args; -	xfs_fsblock_t	firstblock; -	xfs_bmap_free_t	flist; -	int		error; -	xfs_mount_t	*mp = dp->i_mount; - -	/* -	 * Fill in the arg structure for this request. -	 */ -	memset((char *)&args, 0, sizeof(args)); -	args.name = name->name; -	args.namelen = name->len; -	args.flags = flags; -	args.hashval = xfs_da_hashname(args.name, args.namelen); -	args.dp = dp;  	args.firstblock = &firstblock;  	args.flist = &flist; -	args.total = 0; -	args.whichfork = XFS_ATTR_FORK;  	/*  	 * we have no control over the attribute names that userspace passes us @@ -490,9 +438,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)  	 */  	args.op_flags = XFS_DA_OP_OKNOENT; -	/* -	 * Attach the dquots to the inode. -	 */  	error = xfs_qm_dqattach(dp, 0);  	if (error)  		return error; @@ -521,7 +466,7 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)  				  XFS_ATTRRM_SPACE_RES(mp), 0);  	if (error) {  		xfs_trans_cancel(args.trans, 0); -		return(error); +		return error;  	}  	xfs_ilock(dp, XFS_ILOCK_EXCL); @@ -531,35 +476,26 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)  	 */  	xfs_trans_ijoin(args.trans, dp, 0); -	/* -	 * Decide on what work routines to call based on the inode size. -	 */  	if (!xfs_inode_hasattr(dp)) {  		error = XFS_ERROR(ENOATTR); -		goto out; -	} -	if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { +	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) {  		ASSERT(dp->i_afp->if_flags & XFS_IFINLINE);  		error = xfs_attr_shortform_remove(&args); -		if (error) { -			goto out; -		}  	} else if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) {  		error = xfs_attr_leaf_removename(&args);  	} else {  		error = xfs_attr_node_removename(&args);  	} -	if (error) { + +	if (error)  		goto out; -	}  	/*  	 * If this is a synchronous mount, make sure that the  	 * transaction goes to disk before returning to the user.  	 */ -	if (mp->m_flags & XFS_MOUNT_WSYNC) { +	if (mp->m_flags & XFS_MOUNT_WSYNC)  		xfs_trans_set_sync(args.trans); -	}  	if ((flags & ATTR_KERNOTIME) == 0)  		xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG); @@ -571,45 +507,17 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)  	error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);  	xfs_iunlock(dp, XFS_ILOCK_EXCL); -	return(error); +	return error;  out: -	if (args.trans) +	if (args.trans) {  		xfs_trans_cancel(args.trans,  			XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); -	xfs_iunlock(dp, XFS_ILOCK_EXCL); -	return(error); -} - -int -xfs_attr_remove( -	xfs_inode_t	*dp, -	const unsigned char *name, -	int		flags) -{ -	int		error; -	struct xfs_name	xname; - -	XFS_STATS_INC(xs_attr_remove); - -	if (XFS_FORCED_SHUTDOWN(dp->i_mount)) -		return (EIO); - -	error = xfs_attr_name_to_xname(&xname, name); -	if (error) -		return error; - -	xfs_ilock(dp, XFS_ILOCK_SHARED); -	if (!xfs_inode_hasattr(dp)) { -		xfs_iunlock(dp, XFS_ILOCK_SHARED); -		return XFS_ERROR(ENOATTR);  	} -	xfs_iunlock(dp, XFS_ILOCK_SHARED); - -	return xfs_attr_remove_int(dp, &xname, flags); +	xfs_iunlock(dp, XFS_ILOCK_EXCL); +	return error;  } -  /*========================================================================   * External routines when attribute list is inside the inode   *========================================================================*/ @@ -695,11 +603,22 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)  		trace_xfs_attr_leaf_replace(args); +		/* save the attribute state for later removal*/  		args->op_flags |= XFS_DA_OP_RENAME;	/* an atomic rename */  		args->blkno2 = args->blkno;		/* set 2nd entry info*/  		args->index2 = args->index;  		args->rmtblkno2 = args->rmtblkno;  		args->rmtblkcnt2 = args->rmtblkcnt; +		args->rmtvaluelen2 = args->rmtvaluelen; + +		/* +		 * clear the remote attr state now that it is saved so that the +		 * values reflect the state of the attribute we are about to +		 * add, not the attribute we just found and will remove later. +		 */ +		args->rmtblkno = 0; +		args->rmtblkcnt = 0; +		args->rmtvaluelen = 0;  	}  	/* @@ -791,6 +710,7 @@ xfs_attr_leaf_addname(xfs_da_args_t *args)  		args->blkno = args->blkno2;  		args->rmtblkno = args->rmtblkno2;  		args->rmtblkcnt = args->rmtblkcnt2; +		args->rmtvaluelen = args->rmtvaluelen2;  		if (args->rmtblkno) {  			error = xfs_attr_rmtval_remove(args);  			if (error) @@ -943,7 +863,7 @@ xfs_attr_leaf_get(xfs_da_args_t *args)  }  /*======================================================================== - * External routines when attribute list size > XFS_LBSIZE(mp). + * External routines when attribute list size > geo->blksize   *========================================================================*/  /* @@ -976,8 +896,6 @@ restart:  	state = xfs_da_state_alloc();  	state->args = args;  	state->mp = mp; -	state->blocksize = state->mp->m_sb.sb_blocksize; -	state->node_ents = state->mp->m_attr_node_ents;  	/*  	 * Search to see if name already exists, and get back a pointer @@ -996,13 +914,22 @@ restart:  		trace_xfs_attr_node_replace(args); +		/* save the attribute state for later removal*/  		args->op_flags |= XFS_DA_OP_RENAME;	/* atomic rename op */  		args->blkno2 = args->blkno;		/* set 2nd entry info*/  		args->index2 = args->index;  		args->rmtblkno2 = args->rmtblkno;  		args->rmtblkcnt2 = args->rmtblkcnt; +		args->rmtvaluelen2 = args->rmtvaluelen; + +		/* +		 * clear the remote attr state now that it is saved so that the +		 * values reflect the state of the attribute we are about to +		 * add, not the attribute we just found and will remove later. +		 */  		args->rmtblkno = 0;  		args->rmtblkcnt = 0; +		args->rmtvaluelen = 0;  	}  	retval = xfs_attr3_leaf_add(blk->bp, state->args); @@ -1130,6 +1057,7 @@ restart:  		args->blkno = args->blkno2;  		args->rmtblkno = args->rmtblkno2;  		args->rmtblkcnt = args->rmtblkcnt2; +		args->rmtvaluelen = args->rmtvaluelen2;  		if (args->rmtblkno) {  			error = xfs_attr_rmtval_remove(args);  			if (error) @@ -1145,8 +1073,6 @@ restart:  		state = xfs_da_state_alloc();  		state->args = args;  		state->mp = mp; -		state->blocksize = state->mp->m_sb.sb_blocksize; -		state->node_ents = state->mp->m_attr_node_ents;  		state->inleaf = 0;  		error = xfs_da3_node_lookup_int(state, &retval);  		if (error) @@ -1237,8 +1163,6 @@ xfs_attr_node_removename(xfs_da_args_t *args)  	state = xfs_da_state_alloc();  	state->args = args;  	state->mp = dp->i_mount; -	state->blocksize = state->mp->m_sb.sb_blocksize; -	state->node_ents = state->mp->m_attr_node_ents;  	/*  	 * Search to see if name exists, and get back a pointer to it. @@ -1500,8 +1424,6 @@ xfs_attr_node_get(xfs_da_args_t *args)  	state = xfs_da_state_alloc();  	state->args = args;  	state->mp = args->dp->i_mount; -	state->blocksize = state->mp->m_sb.sb_blocksize; -	state->node_ents = state->mp->m_attr_node_ents;  	/*  	 * Search to see if name exists, and get back a pointer to it. diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index bb24b07cbed..09480c57f06 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -18,22 +18,20 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" +#include "xfs_inode.h"  #include "xfs_alloc.h" -#include "xfs_btree.h"  #include "xfs_attr_remote.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h"  #include "xfs_attr.h" @@ -41,7 +39,8 @@  #include "xfs_error.h"  #include "xfs_quota.h"  #include "xfs_trace.h" -#include "xfs_trans_priv.h" +#include "xfs_dinode.h" +#include "xfs_dir2.h"  /*   * Look at all the extents for this logical region, @@ -232,13 +231,13 @@ xfs_attr3_node_inactive(  	}  	node = bp->b_addr; -	xfs_da3_node_hdr_from_disk(&ichdr, node); +	dp->d_ops->node_hdr_from_disk(&ichdr, node);  	parent_blkno = bp->b_bn;  	if (!ichdr.count) {  		xfs_trans_brelse(*trans, bp);  		return 0;  	} -	btree = xfs_da3_node_tree_p(node); +	btree = dp->d_ops->node_tree_p(node);  	child_fsb = be32_to_cpu(btree[0].before);  	xfs_trans_brelse(*trans, bp);	/* no locks for later trans */ diff --git a/fs/xfs/xfs_attr_leaf.c b/fs/xfs/xfs_attr_leaf.c index 86db20a9cc0..28712d29e43 100644 --- a/fs/xfs/xfs_attr_leaf.c +++ b/fs/xfs/xfs_attr_leaf.c @@ -18,32 +18,31 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h" +#include "xfs_bmap_btree.h"  #include "xfs_bmap.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h"  #include "xfs_attr.h"  #include "xfs_attr_leaf.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_buf_item.h"  #include "xfs_cksum.h" +#include "xfs_dinode.h" +#include "xfs_dir2.h"  /* @@ -81,11 +80,12 @@ STATIC int xfs_attr3_leaf_figure_balance(xfs_da_state_t *state,  /*   * Utility routines.   */ -STATIC void xfs_attr3_leaf_moveents(struct xfs_attr_leafblock *src_leaf, +STATIC void xfs_attr3_leaf_moveents(struct xfs_da_args *args, +			struct xfs_attr_leafblock *src_leaf,  			struct xfs_attr3_icleaf_hdr *src_ichdr, int src_start,  			struct xfs_attr_leafblock *dst_leaf,  			struct xfs_attr3_icleaf_hdr *dst_ichdr, int dst_start, -			int move_count, struct xfs_mount *mp); +			int move_count);  STATIC int xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index);  void @@ -214,8 +214,8 @@ xfs_attr3_leaf_write_verify(  	struct xfs_attr3_leaf_hdr *hdr3 = bp->b_addr;  	if (!xfs_attr3_leaf_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -225,7 +225,7 @@ xfs_attr3_leaf_write_verify(  	if (bip)  		hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_ATTR3_LEAF_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF);  }  /* @@ -240,13 +240,14 @@ xfs_attr3_leaf_read_verify(  {  	struct xfs_mount	*mp = bp->b_target->bt_mount; -	if ((xfs_sb_version_hascrc(&mp->m_sb) && -	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					  XFS_ATTR3_LEAF_CRC_OFF)) || -	    !xfs_attr3_leaf_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (xfs_sb_version_hascrc(&mp->m_sb) && +	     !xfs_buf_verify_cksum(bp, XFS_ATTR3_LEAF_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_attr3_leaf_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  const struct xfs_buf_ops xfs_attr3_leaf_buf_ops = { @@ -711,6 +712,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)  	memset((char *)&nargs, 0, sizeof(nargs));  	nargs.dp = dp; +	nargs.geo = args->geo;  	nargs.firstblock = args->firstblock;  	nargs.flist = args->flist;  	nargs.total = args->total; @@ -805,18 +807,18 @@ xfs_attr3_leaf_to_shortform(  	trace_xfs_attr_leaf_to_sf(args); -	tmpbuffer = kmem_alloc(XFS_LBSIZE(dp->i_mount), KM_SLEEP); +	tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP);  	if (!tmpbuffer)  		return ENOMEM; -	memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(dp->i_mount)); +	memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);  	leaf = (xfs_attr_leafblock_t *)tmpbuffer;  	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);  	entry = xfs_attr3_leaf_entryp(leaf);  	/* XXX (dgc): buffer is about to be marked stale - why zero it? */ -	memset(bp->b_addr, 0, XFS_LBSIZE(dp->i_mount)); +	memset(bp->b_addr, 0, args->geo->blksize);  	/*  	 * Clean out the prior contents of the attribute list. @@ -838,6 +840,7 @@ xfs_attr3_leaf_to_shortform(  	 * Copy the attributes  	 */  	memset((char *)&nargs, 0, sizeof(nargs)); +	nargs.geo = args->geo;  	nargs.dp = dp;  	nargs.firstblock = args->firstblock;  	nargs.flist = args->flist; @@ -904,12 +907,12 @@ xfs_attr3_leaf_to_node(  	/* copy leaf to new buffer, update identifiers */  	xfs_trans_buf_set_type(args->trans, bp2, XFS_BLFT_ATTR_LEAF_BUF);  	bp2->b_ops = bp1->b_ops; -	memcpy(bp2->b_addr, bp1->b_addr, XFS_LBSIZE(mp)); +	memcpy(bp2->b_addr, bp1->b_addr, args->geo->blksize);  	if (xfs_sb_version_hascrc(&mp->m_sb)) {  		struct xfs_da3_blkinfo *hdr3 = bp2->b_addr;  		hdr3->blkno = cpu_to_be64(bp2->b_bn);  	} -	xfs_trans_log_buf(args->trans, bp2, 0, XFS_LBSIZE(mp) - 1); +	xfs_trans_log_buf(args->trans, bp2, 0, args->geo->blksize - 1);  	/*  	 * Set up the new root node. @@ -918,8 +921,8 @@ xfs_attr3_leaf_to_node(  	if (error)  		goto out;  	node = bp1->b_addr; -	xfs_da3_node_hdr_from_disk(&icnodehdr, node); -	btree = xfs_da3_node_tree_p(node); +	dp->d_ops->node_hdr_from_disk(&icnodehdr, node); +	btree = dp->d_ops->node_tree_p(node);  	leaf = bp2->b_addr;  	xfs_attr3_leaf_hdr_from_disk(&icleafhdr, leaf); @@ -929,8 +932,8 @@ xfs_attr3_leaf_to_node(  	btree[0].hashval = entries[icleafhdr.count - 1].hashval;  	btree[0].before = cpu_to_be32(blkno);  	icnodehdr.count = 1; -	xfs_da3_node_hdr_to_disk(node, &icnodehdr); -	xfs_trans_log_buf(args->trans, bp1, 0, XFS_LBSIZE(mp) - 1); +	dp->d_ops->node_hdr_to_disk(node, &icnodehdr); +	xfs_trans_log_buf(args->trans, bp1, 0, args->geo->blksize - 1);  	error = 0;  out:  	return error; @@ -966,10 +969,10 @@ xfs_attr3_leaf_create(  	bp->b_ops = &xfs_attr3_leaf_buf_ops;  	xfs_trans_buf_set_type(args->trans, bp, XFS_BLFT_ATTR_LEAF_BUF);  	leaf = bp->b_addr; -	memset(leaf, 0, XFS_LBSIZE(mp)); +	memset(leaf, 0, args->geo->blksize);  	memset(&ichdr, 0, sizeof(ichdr)); -	ichdr.firstused = XFS_LBSIZE(mp); +	ichdr.firstused = args->geo->blksize;  	if (xfs_sb_version_hascrc(&mp->m_sb)) {  		struct xfs_da3_blkinfo *hdr3 = bp->b_addr; @@ -988,7 +991,7 @@ xfs_attr3_leaf_create(  	ichdr.freemap[0].size = ichdr.firstused - ichdr.freemap[0].base;  	xfs_attr3_leaf_hdr_to_disk(leaf, &ichdr); -	xfs_trans_log_buf(args->trans, bp, 0, XFS_LBSIZE(mp) - 1); +	xfs_trans_log_buf(args->trans, bp, 0, args->geo->blksize - 1);  	*bpp = bp;  	return 0; @@ -1074,8 +1077,7 @@ xfs_attr3_leaf_add(  	leaf = bp->b_addr;  	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);  	ASSERT(args->index >= 0 && args->index <= ichdr.count); -	entsize = xfs_attr_leaf_newentsize(args->namelen, args->valuelen, -			   args->trans->t_mountp->m_sb.sb_blocksize, NULL); +	entsize = xfs_attr_leaf_newentsize(args, NULL);  	/*  	 * Search through freemap for first-fit on new name length. @@ -1174,17 +1176,14 @@ xfs_attr3_leaf_add_work(  	 * Allocate space for the new string (at the end of the run).  	 */  	mp = args->trans->t_mountp; -	ASSERT(ichdr->freemap[mapindex].base < XFS_LBSIZE(mp)); +	ASSERT(ichdr->freemap[mapindex].base < args->geo->blksize);  	ASSERT((ichdr->freemap[mapindex].base & 0x3) == 0);  	ASSERT(ichdr->freemap[mapindex].size >= -		xfs_attr_leaf_newentsize(args->namelen, args->valuelen, -					 mp->m_sb.sb_blocksize, NULL)); -	ASSERT(ichdr->freemap[mapindex].size < XFS_LBSIZE(mp)); +		xfs_attr_leaf_newentsize(args, NULL)); +	ASSERT(ichdr->freemap[mapindex].size < args->geo->blksize);  	ASSERT((ichdr->freemap[mapindex].size & 0x3) == 0); -	ichdr->freemap[mapindex].size -= -			xfs_attr_leaf_newentsize(args->namelen, args->valuelen, -						 mp->m_sb.sb_blocksize, &tmp); +	ichdr->freemap[mapindex].size -= xfs_attr_leaf_newentsize(args, &tmp);  	entry->nameidx = cpu_to_be16(ichdr->freemap[mapindex].base +  				     ichdr->freemap[mapindex].size); @@ -1229,6 +1228,7 @@ xfs_attr3_leaf_add_work(  		name_rmt->valueblk = 0;  		args->rmtblkno = 1;  		args->rmtblkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); +		args->rmtvaluelen = args->valuelen;  	}  	xfs_trans_log_buf(args->trans, bp,  	     XFS_DA_LOGRANGE(leaf, xfs_attr3_leaf_name(leaf, args->index), @@ -1268,14 +1268,13 @@ xfs_attr3_leaf_compact(  	struct xfs_attr_leafblock *leaf_dst;  	struct xfs_attr3_icleaf_hdr ichdr_src;  	struct xfs_trans	*trans = args->trans; -	struct xfs_mount	*mp = trans->t_mountp;  	char			*tmpbuffer;  	trace_xfs_attr_leaf_compact(args); -	tmpbuffer = kmem_alloc(XFS_LBSIZE(mp), KM_SLEEP); -	memcpy(tmpbuffer, bp->b_addr, XFS_LBSIZE(mp)); -	memset(bp->b_addr, 0, XFS_LBSIZE(mp)); +	tmpbuffer = kmem_alloc(args->geo->blksize, KM_SLEEP); +	memcpy(tmpbuffer, bp->b_addr, args->geo->blksize); +	memset(bp->b_addr, 0, args->geo->blksize);  	leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;  	leaf_dst = bp->b_addr; @@ -1288,7 +1287,7 @@ xfs_attr3_leaf_compact(  	/* Initialise the incore headers */  	ichdr_src = *ichdr_dst;	/* struct copy */ -	ichdr_dst->firstused = XFS_LBSIZE(mp); +	ichdr_dst->firstused = args->geo->blksize;  	ichdr_dst->usedbytes = 0;  	ichdr_dst->count = 0;  	ichdr_dst->holes = 0; @@ -1303,13 +1302,13 @@ xfs_attr3_leaf_compact(  	 * Copy all entry's in the same (sorted) order,  	 * but allocate name/value pairs packed and in sequence.  	 */ -	xfs_attr3_leaf_moveents(leaf_src, &ichdr_src, 0, leaf_dst, ichdr_dst, 0, -				ichdr_src.count, mp); +	xfs_attr3_leaf_moveents(args, leaf_src, &ichdr_src, 0, +				leaf_dst, ichdr_dst, 0, ichdr_src.count);  	/*  	 * this logs the entire buffer, but the caller must write the header  	 * back to the buffer when it is finished modifying it.  	 */ -	xfs_trans_log_buf(trans, bp, 0, XFS_LBSIZE(mp) - 1); +	xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);  	kmem_free(tmpbuffer);  } @@ -1460,8 +1459,8 @@ xfs_attr3_leaf_rebalance(  		/*  		 * Move high entries from leaf1 to low end of leaf2.  		 */ -		xfs_attr3_leaf_moveents(leaf1, &ichdr1, ichdr1.count - count, -				leaf2, &ichdr2, 0, count, state->mp); +		xfs_attr3_leaf_moveents(args, leaf1, &ichdr1, +				ichdr1.count - count, leaf2, &ichdr2, 0, count);  	} else if (count > ichdr1.count) {  		/* @@ -1489,14 +1488,14 @@ xfs_attr3_leaf_rebalance(  		/*  		 * Move low entries from leaf2 to high end of leaf1.  		 */ -		xfs_attr3_leaf_moveents(leaf2, &ichdr2, 0, leaf1, &ichdr1, -					ichdr1.count, count, state->mp); +		xfs_attr3_leaf_moveents(args, leaf2, &ichdr2, 0, leaf1, &ichdr1, +					ichdr1.count, count);  	}  	xfs_attr3_leaf_hdr_to_disk(leaf1, &ichdr1);  	xfs_attr3_leaf_hdr_to_disk(leaf2, &ichdr2); -	xfs_trans_log_buf(args->trans, blk1->bp, 0, state->blocksize-1); -	xfs_trans_log_buf(args->trans, blk2->bp, 0, state->blocksize-1); +	xfs_trans_log_buf(args->trans, blk1->bp, 0, args->geo->blksize - 1); +	xfs_trans_log_buf(args->trans, blk2->bp, 0, args->geo->blksize - 1);  	/*  	 * Copy out last hashval in each block for B-tree code. @@ -1591,11 +1590,9 @@ xfs_attr3_leaf_figure_balance(  	max = ichdr1->count + ichdr2->count;  	half = (max + 1) * sizeof(*entry);  	half += ichdr1->usedbytes + ichdr2->usedbytes + -			xfs_attr_leaf_newentsize(state->args->namelen, -						 state->args->valuelen, -						 state->blocksize, NULL); +			xfs_attr_leaf_newentsize(state->args, NULL);  	half /= 2; -	lastdelta = state->blocksize; +	lastdelta = state->args->geo->blksize;  	entry = xfs_attr3_leaf_entryp(leaf1);  	for (count = index = 0; count < max; entry++, index++, count++) { @@ -1605,10 +1602,7 @@ xfs_attr3_leaf_figure_balance(  		 */  		if (count == blk1->index) {  			tmp = totallen + sizeof(*entry) + -				xfs_attr_leaf_newentsize( -						state->args->namelen, -						state->args->valuelen, -						state->blocksize, NULL); +				xfs_attr_leaf_newentsize(state->args, NULL);  			if (XFS_ATTR_ABS(half - tmp) > lastdelta)  				break;  			lastdelta = XFS_ATTR_ABS(half - tmp); @@ -1644,10 +1638,7 @@ xfs_attr3_leaf_figure_balance(  	totallen -= count * sizeof(*entry);  	if (foundit) {  		totallen -= sizeof(*entry) + -				xfs_attr_leaf_newentsize( -						state->args->namelen, -						state->args->valuelen, -						state->blocksize, NULL); +				xfs_attr_leaf_newentsize(state->args, NULL);  	}  	*countarg = count; @@ -1699,7 +1690,7 @@ xfs_attr3_leaf_toosmall(  	bytes = xfs_attr3_leaf_hdr_size(leaf) +  		ichdr.count * sizeof(xfs_attr_leaf_entry_t) +  		ichdr.usedbytes; -	if (bytes > (state->blocksize >> 1)) { +	if (bytes > (state->args->geo->blksize >> 1)) {  		*action = 0;	/* blk over 50%, don't try to join */  		return(0);  	} @@ -1753,7 +1744,8 @@ xfs_attr3_leaf_toosmall(  		xfs_attr3_leaf_hdr_from_disk(&ichdr2, bp->b_addr); -		bytes = state->blocksize - (state->blocksize >> 2) - +		bytes = state->args->geo->blksize - +			(state->args->geo->blksize >> 2) -  			ichdr.usedbytes - ichdr2.usedbytes -  			((ichdr.count + ichdr2.count) *  					sizeof(xfs_attr_leaf_entry_t)) - @@ -1804,7 +1796,6 @@ xfs_attr3_leaf_remove(  	struct xfs_attr_leafblock *leaf;  	struct xfs_attr3_icleaf_hdr ichdr;  	struct xfs_attr_leaf_entry *entry; -	struct xfs_mount	*mp = args->trans->t_mountp;  	int			before;  	int			after;  	int			smallest; @@ -1818,7 +1809,7 @@ xfs_attr3_leaf_remove(  	leaf = bp->b_addr;  	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); -	ASSERT(ichdr.count > 0 && ichdr.count < XFS_LBSIZE(mp) / 8); +	ASSERT(ichdr.count > 0 && ichdr.count < args->geo->blksize / 8);  	ASSERT(args->index >= 0 && args->index < ichdr.count);  	ASSERT(ichdr.firstused >= ichdr.count * sizeof(*entry) +  					xfs_attr3_leaf_hdr_size(leaf)); @@ -1826,7 +1817,7 @@ xfs_attr3_leaf_remove(  	entry = &xfs_attr3_leaf_entryp(leaf)[args->index];  	ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); -	ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); +	ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);  	/*  	 * Scan through free region table: @@ -1841,8 +1832,8 @@ xfs_attr3_leaf_remove(  	smallest = XFS_ATTR_LEAF_MAPSIZE - 1;  	entsize = xfs_attr_leaf_entsize(leaf, args->index);  	for (i = 0; i < XFS_ATTR_LEAF_MAPSIZE; i++) { -		ASSERT(ichdr.freemap[i].base < XFS_LBSIZE(mp)); -		ASSERT(ichdr.freemap[i].size < XFS_LBSIZE(mp)); +		ASSERT(ichdr.freemap[i].base < args->geo->blksize); +		ASSERT(ichdr.freemap[i].size < args->geo->blksize);  		if (ichdr.freemap[i].base == tablesize) {  			ichdr.freemap[i].base -= sizeof(xfs_attr_leaf_entry_t);  			ichdr.freemap[i].size += sizeof(xfs_attr_leaf_entry_t); @@ -1919,11 +1910,11 @@ xfs_attr3_leaf_remove(  	 * removing the name.  	 */  	if (smallest) { -		tmp = XFS_LBSIZE(mp); +		tmp = args->geo->blksize;  		entry = xfs_attr3_leaf_entryp(leaf);  		for (i = ichdr.count - 1; i >= 0; entry++, i--) {  			ASSERT(be16_to_cpu(entry->nameidx) >= ichdr.firstused); -			ASSERT(be16_to_cpu(entry->nameidx) < XFS_LBSIZE(mp)); +			ASSERT(be16_to_cpu(entry->nameidx) < args->geo->blksize);  			if (be16_to_cpu(entry->nameidx) < tmp)  				tmp = be16_to_cpu(entry->nameidx); @@ -1946,7 +1937,7 @@ xfs_attr3_leaf_remove(  	tmp = ichdr.usedbytes + xfs_attr3_leaf_hdr_size(leaf) +  	      ichdr.count * sizeof(xfs_attr_leaf_entry_t); -	return tmp < mp->m_attr_magicpct; /* leaf is < 37% full */ +	return tmp < args->geo->magicpct; /* leaf is < 37% full */  }  /* @@ -1963,7 +1954,6 @@ xfs_attr3_leaf_unbalance(  	struct xfs_attr3_icleaf_hdr drophdr;  	struct xfs_attr3_icleaf_hdr savehdr;  	struct xfs_attr_leaf_entry *entry; -	struct xfs_mount	*mp = state->mp;  	trace_xfs_attr_leaf_unbalance(state->args); @@ -1990,13 +1980,15 @@ xfs_attr3_leaf_unbalance(  		 */  		if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,  					 drop_blk->bp, &drophdr)) { -			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, +			xfs_attr3_leaf_moveents(state->args, +						drop_leaf, &drophdr, 0,  						save_leaf, &savehdr, 0, -						drophdr.count, mp); +						drophdr.count);  		} else { -			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, +			xfs_attr3_leaf_moveents(state->args, +						drop_leaf, &drophdr, 0,  						save_leaf, &savehdr, -						savehdr.count, drophdr.count, mp); +						savehdr.count, drophdr.count);  		}  	} else {  		/* @@ -2006,7 +1998,7 @@ xfs_attr3_leaf_unbalance(  		struct xfs_attr_leafblock *tmp_leaf;  		struct xfs_attr3_icleaf_hdr tmphdr; -		tmp_leaf = kmem_zalloc(state->blocksize, KM_SLEEP); +		tmp_leaf = kmem_zalloc(state->args->geo->blksize, KM_SLEEP);  		/*  		 * Copy the header into the temp leaf so that all the stuff @@ -2019,35 +2011,39 @@ xfs_attr3_leaf_unbalance(  		tmphdr.magic = savehdr.magic;  		tmphdr.forw = savehdr.forw;  		tmphdr.back = savehdr.back; -		tmphdr.firstused = state->blocksize; +		tmphdr.firstused = state->args->geo->blksize;  		/* write the header to the temp buffer to initialise it */  		xfs_attr3_leaf_hdr_to_disk(tmp_leaf, &tmphdr);  		if (xfs_attr3_leaf_order(save_blk->bp, &savehdr,  					 drop_blk->bp, &drophdr)) { -			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, +			xfs_attr3_leaf_moveents(state->args, +						drop_leaf, &drophdr, 0,  						tmp_leaf, &tmphdr, 0, -						drophdr.count, mp); -			xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0, +						drophdr.count); +			xfs_attr3_leaf_moveents(state->args, +						save_leaf, &savehdr, 0,  						tmp_leaf, &tmphdr, tmphdr.count, -						savehdr.count, mp); +						savehdr.count);  		} else { -			xfs_attr3_leaf_moveents(save_leaf, &savehdr, 0, +			xfs_attr3_leaf_moveents(state->args, +						save_leaf, &savehdr, 0,  						tmp_leaf, &tmphdr, 0, -						savehdr.count, mp); -			xfs_attr3_leaf_moveents(drop_leaf, &drophdr, 0, +						savehdr.count); +			xfs_attr3_leaf_moveents(state->args, +						drop_leaf, &drophdr, 0,  						tmp_leaf, &tmphdr, tmphdr.count, -						drophdr.count, mp); +						drophdr.count);  		} -		memcpy(save_leaf, tmp_leaf, state->blocksize); +		memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);  		savehdr = tmphdr; /* struct copy */  		kmem_free(tmp_leaf);  	}  	xfs_attr3_leaf_hdr_to_disk(save_leaf, &savehdr);  	xfs_trans_log_buf(state->args->trans, save_blk->bp, 0, -					   state->blocksize - 1); +					   state->args->geo->blksize - 1);  	/*  	 * Copy out last hashval in each block for B-tree code. @@ -2093,7 +2089,7 @@ xfs_attr3_leaf_lookup_int(  	leaf = bp->b_addr;  	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf);  	entries = xfs_attr3_leaf_entryp(leaf); -	ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8); +	ASSERT(ichdr.count < args->geo->blksize / 8);  	/*  	 * Binary search.  (note: small blocks will skip this loop) @@ -2167,11 +2163,11 @@ xfs_attr3_leaf_lookup_int(  			if (!xfs_attr_namesp_match(args->flags, entry->flags))  				continue;  			args->index = probe; -			args->valuelen = be32_to_cpu(name_rmt->valuelen); +			args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);  			args->rmtblkno = be32_to_cpu(name_rmt->valueblk);  			args->rmtblkcnt = xfs_attr3_rmt_blocks(  							args->dp->i_mount, -							args->valuelen); +							args->rmtvaluelen);  			return XFS_ERROR(EEXIST);  		}  	} @@ -2197,7 +2193,7 @@ xfs_attr3_leaf_getvalue(  	leaf = bp->b_addr;  	xfs_attr3_leaf_hdr_from_disk(&ichdr, leaf); -	ASSERT(ichdr.count < XFS_LBSIZE(args->dp->i_mount) / 8); +	ASSERT(ichdr.count < args->geo->blksize / 8);  	ASSERT(args->index < ichdr.count);  	entry = &xfs_attr3_leaf_entryp(leaf)[args->index]; @@ -2220,19 +2216,19 @@ xfs_attr3_leaf_getvalue(  		name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);  		ASSERT(name_rmt->namelen == args->namelen);  		ASSERT(memcmp(args->name, name_rmt->name, args->namelen) == 0); -		valuelen = be32_to_cpu(name_rmt->valuelen); +		args->rmtvaluelen = be32_to_cpu(name_rmt->valuelen);  		args->rmtblkno = be32_to_cpu(name_rmt->valueblk);  		args->rmtblkcnt = xfs_attr3_rmt_blocks(args->dp->i_mount, -						       valuelen); +						       args->rmtvaluelen);  		if (args->flags & ATTR_KERNOVAL) { -			args->valuelen = valuelen; +			args->valuelen = args->rmtvaluelen;  			return 0;  		} -		if (args->valuelen < valuelen) { -			args->valuelen = valuelen; +		if (args->valuelen < args->rmtvaluelen) { +			args->valuelen = args->rmtvaluelen;  			return XFS_ERROR(ERANGE);  		} -		args->valuelen = valuelen; +		args->valuelen = args->rmtvaluelen;  	}  	return 0;  } @@ -2248,14 +2244,14 @@ xfs_attr3_leaf_getvalue(  /*ARGSUSED*/  STATIC void  xfs_attr3_leaf_moveents( +	struct xfs_da_args		*args,  	struct xfs_attr_leafblock	*leaf_s,  	struct xfs_attr3_icleaf_hdr	*ichdr_s,  	int				start_s,  	struct xfs_attr_leafblock	*leaf_d,  	struct xfs_attr3_icleaf_hdr	*ichdr_d,  	int				start_d, -	int				count, -	struct xfs_mount		*mp) +	int				count)  {  	struct xfs_attr_leaf_entry	*entry_s;  	struct xfs_attr_leaf_entry	*entry_d; @@ -2275,10 +2271,10 @@ xfs_attr3_leaf_moveents(  	ASSERT(ichdr_s->magic == XFS_ATTR_LEAF_MAGIC ||  	       ichdr_s->magic == XFS_ATTR3_LEAF_MAGIC);  	ASSERT(ichdr_s->magic == ichdr_d->magic); -	ASSERT(ichdr_s->count > 0 && ichdr_s->count < XFS_LBSIZE(mp) / 8); +	ASSERT(ichdr_s->count > 0 && ichdr_s->count < args->geo->blksize / 8);  	ASSERT(ichdr_s->firstused >= (ichdr_s->count * sizeof(*entry_s))  					+ xfs_attr3_leaf_hdr_size(leaf_s)); -	ASSERT(ichdr_d->count < XFS_LBSIZE(mp) / 8); +	ASSERT(ichdr_d->count < args->geo->blksize / 8);  	ASSERT(ichdr_d->firstused >= (ichdr_d->count * sizeof(*entry_d))  					+ xfs_attr3_leaf_hdr_size(leaf_d)); @@ -2330,11 +2326,11 @@ xfs_attr3_leaf_moveents(  			entry_d->nameidx = cpu_to_be16(ichdr_d->firstused);  			entry_d->flags = entry_s->flags;  			ASSERT(be16_to_cpu(entry_d->nameidx) + tmp -							<= XFS_LBSIZE(mp)); +							<= args->geo->blksize);  			memmove(xfs_attr3_leaf_name(leaf_d, desti),  				xfs_attr3_leaf_name(leaf_s, start_s + i), tmp);  			ASSERT(be16_to_cpu(entry_s->nameidx) + tmp -							<= XFS_LBSIZE(mp)); +							<= args->geo->blksize);  			memset(xfs_attr3_leaf_name(leaf_s, start_s + i), 0, tmp);  			ichdr_s->usedbytes -= tmp;  			ichdr_d->usedbytes += tmp; @@ -2355,7 +2351,7 @@ xfs_attr3_leaf_moveents(  		tmp = count * sizeof(xfs_attr_leaf_entry_t);  		entry_s = &xfs_attr3_leaf_entryp(leaf_s)[start_s];  		ASSERT(((char *)entry_s + tmp) <= -		       ((char *)leaf_s + XFS_LBSIZE(mp))); +		       ((char *)leaf_s + args->geo->blksize));  		memset(entry_s, 0, tmp);  	} else {  		/* @@ -2370,7 +2366,7 @@ xfs_attr3_leaf_moveents(  		tmp = count * sizeof(xfs_attr_leaf_entry_t);  		entry_s = &xfs_attr3_leaf_entryp(leaf_s)[ichdr_s->count];  		ASSERT(((char *)entry_s + tmp) <= -		       ((char *)leaf_s + XFS_LBSIZE(mp))); +		       ((char *)leaf_s + args->geo->blksize));  		memset(entry_s, 0, tmp);  	} @@ -2438,22 +2434,21 @@ xfs_attr_leaf_entsize(xfs_attr_leafblock_t *leaf, int index)   * a "local" or a "remote" attribute.   */  int -xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, int *local) +xfs_attr_leaf_newentsize( +	struct xfs_da_args	*args, +	int			*local)  { -	int size; +	int			size; -	size = xfs_attr_leaf_entsize_local(namelen, valuelen); -	if (size < xfs_attr_leaf_entsize_local_max(blocksize)) { -		if (local) { +	size = xfs_attr_leaf_entsize_local(args->namelen, args->valuelen); +	if (size < xfs_attr_leaf_entsize_local_max(args->geo->blksize)) { +		if (local)  			*local = 1; -		} -	} else { -		size = xfs_attr_leaf_entsize_remote(namelen); -		if (local) { -			*local = 0; -		} +		return size;  	} -	return size; +	if (local) +		*local = 0; +	return xfs_attr_leaf_entsize_remote(args->namelen);  } @@ -2519,7 +2514,7 @@ xfs_attr3_leaf_clearflag(  		ASSERT((entry->flags & XFS_ATTR_LOCAL) == 0);  		name_rmt = xfs_attr3_leaf_name_remote(leaf, args->index);  		name_rmt->valueblk = cpu_to_be32(args->rmtblkno); -		name_rmt->valuelen = cpu_to_be32(args->valuelen); +		name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);  		xfs_trans_log_buf(args->trans, bp,  			 XFS_DA_LOGRANGE(leaf, name_rmt, sizeof(*name_rmt)));  	} @@ -2677,7 +2672,7 @@ xfs_attr3_leaf_flipflags(  		ASSERT((entry1->flags & XFS_ATTR_LOCAL) == 0);  		name_rmt = xfs_attr3_leaf_name_remote(leaf1, args->index);  		name_rmt->valueblk = cpu_to_be32(args->rmtblkno); -		name_rmt->valuelen = cpu_to_be32(args->valuelen); +		name_rmt->valuelen = cpu_to_be32(args->rmtvaluelen);  		xfs_trans_log_buf(args->trans, bp1,  			 XFS_DA_LOGRANGE(leaf1, name_rmt, sizeof(*name_rmt)));  	} diff --git a/fs/xfs/xfs_attr_leaf.h b/fs/xfs/xfs_attr_leaf.h index c1022138c7e..e2929da7c3b 100644 --- a/fs/xfs/xfs_attr_leaf.h +++ b/fs/xfs/xfs_attr_leaf.h @@ -19,16 +19,6 @@  #ifndef __XFS_ATTR_LEAF_H__  #define	__XFS_ATTR_LEAF_H__ -/* - * Attribute storage layout, internal structure, access macros, etc. - * - * Attribute lists are structured around Btrees where all the data - * elements are in the leaf nodes.  Attribute names are hashed into an int, - * then that int is used as the index into the Btree.  Since the hashval - * of an attribute name may not be unique, we may have duplicate keys.  The - * internal links in the Btree are logical block offsets into the file. - */ -  struct attrlist;  struct attrlist_cursor_kern;  struct xfs_attr_list_context; @@ -38,226 +28,6 @@ struct xfs_da_state_blk;  struct xfs_inode;  struct xfs_trans; -/*======================================================================== - * Attribute structure when equal to XFS_LBSIZE(mp) bytes. - *========================================================================*/ - -/* - * This is the structure of the leaf nodes in the Btree. - * - * Struct leaf_entry's are packed from the top.  Name/values grow from the - * bottom but are not packed.  The freemap contains run-length-encoded entries - * for the free bytes after the leaf_entry's, but only the N largest such, - * smaller runs are dropped.  When the freemap doesn't show enough space - * for an allocation, we compact the name/value area and try again.  If we - * still don't have enough space, then we have to split the block.  The - * name/value structs (both local and remote versions) must be 32bit aligned. - * - * Since we have duplicate hash keys, for each key that matches, compare - * the actual name string.  The root and intermediate node search always - * takes the first-in-the-block key match found, so we should only have - * to work "forw"ard.  If none matches, continue with the "forw"ard leaf - * nodes until the hash key changes or the attribute name is found. - * - * We store the fact that an attribute is a ROOT/USER/SECURE attribute in - * the leaf_entry.  The namespaces are independent only because we also look - * at the namespace bit when we are looking for a matching attribute name. - * - * We also store an "incomplete" bit in the leaf_entry.  It shows that an - * attribute is in the middle of being created and should not be shown to - * the user if we crash during the time that the bit is set.  We clear the - * bit when we have finished setting up the attribute.  We do this because - * we cannot create some large attributes inside a single transaction, and we - * need some indication that we weren't finished if we crash in the middle. - */ -#define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */ - -typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */ -	__be16	base;			  /* base of free region */ -	__be16	size;			  /* length of free region */ -} xfs_attr_leaf_map_t; - -typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */ -	xfs_da_blkinfo_t info;		/* block type, links, etc. */ -	__be16	count;			/* count of active leaf_entry's */ -	__be16	usedbytes;		/* num bytes of names/values stored */ -	__be16	firstused;		/* first used byte in name area */ -	__u8	holes;			/* != 0 if blk needs compaction */ -	__u8	pad1; -	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; -					/* N largest free regions */ -} xfs_attr_leaf_hdr_t; - -typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */ -	__be32	hashval;		/* hash value of name */ -	__be16	nameidx;		/* index into buffer of name/value */ -	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */ -	__u8	pad2;			/* unused pad byte */ -} xfs_attr_leaf_entry_t; - -typedef struct xfs_attr_leaf_name_local { -	__be16	valuelen;		/* number of bytes in value */ -	__u8	namelen;		/* length of name bytes */ -	__u8	nameval[1];		/* name/value bytes */ -} xfs_attr_leaf_name_local_t; - -typedef struct xfs_attr_leaf_name_remote { -	__be32	valueblk;		/* block number of value bytes */ -	__be32	valuelen;		/* number of bytes in value */ -	__u8	namelen;		/* length of name bytes */ -	__u8	name[1];		/* name bytes */ -} xfs_attr_leaf_name_remote_t; - -typedef struct xfs_attr_leafblock { -	xfs_attr_leaf_hdr_t	hdr;	/* constant-structure header block */ -	xfs_attr_leaf_entry_t	entries[1];	/* sorted on key, not name */ -	xfs_attr_leaf_name_local_t namelist;	/* grows from bottom of buf */ -	xfs_attr_leaf_name_remote_t valuelist;	/* grows from bottom of buf */ -} xfs_attr_leafblock_t; - -/* - * CRC enabled leaf structures. Called "version 3" structures to match the - * version number of the directory and dablk structures for this feature, and - * attr2 is already taken by the variable inode attribute fork size feature. - */ -struct xfs_attr3_leaf_hdr { -	struct xfs_da3_blkinfo	info; -	__be16			count; -	__be16			usedbytes; -	__be16			firstused; -	__u8			holes; -	__u8			pad1; -	struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; -	__be32			pad2;		/* 64 bit alignment */ -}; - -#define XFS_ATTR3_LEAF_CRC_OFF	(offsetof(struct xfs_attr3_leaf_hdr, info.crc)) - -struct xfs_attr3_leafblock { -	struct xfs_attr3_leaf_hdr	hdr; -	struct xfs_attr_leaf_entry	entries[1]; - -	/* -	 * The rest of the block contains the following structures after the -	 * leaf entries, growing from the bottom up. The variables are never -	 * referenced, the locations accessed purely from helper functions. -	 * -	 * struct xfs_attr_leaf_name_local -	 * struct xfs_attr_leaf_name_remote -	 */ -}; - -/* - * incore, neutral version of the attribute leaf header - */ -struct xfs_attr3_icleaf_hdr { -	__uint32_t	forw; -	__uint32_t	back; -	__uint16_t	magic; -	__uint16_t	count; -	__uint16_t	usedbytes; -	__uint16_t	firstused; -	__u8		holes; -	struct { -		__uint16_t	base; -		__uint16_t	size; -	} freemap[XFS_ATTR_LEAF_MAPSIZE]; -}; - -/* - * Flags used in the leaf_entry[i].flags field. - * NOTE: the INCOMPLETE bit must not collide with the flags bits specified - * on the system call, they are "or"ed together for various operations. - */ -#define	XFS_ATTR_LOCAL_BIT	0	/* attr is stored locally */ -#define	XFS_ATTR_ROOT_BIT	1	/* limit access to trusted attrs */ -#define	XFS_ATTR_SECURE_BIT	2	/* limit access to secure attrs */ -#define	XFS_ATTR_INCOMPLETE_BIT	7	/* attr in middle of create/delete */ -#define XFS_ATTR_LOCAL		(1 << XFS_ATTR_LOCAL_BIT) -#define XFS_ATTR_ROOT		(1 << XFS_ATTR_ROOT_BIT) -#define XFS_ATTR_SECURE		(1 << XFS_ATTR_SECURE_BIT) -#define XFS_ATTR_INCOMPLETE	(1 << XFS_ATTR_INCOMPLETE_BIT) - -/* - * Conversion macros for converting namespace bits from argument flags - * to ondisk flags. - */ -#define XFS_ATTR_NSP_ARGS_MASK		(ATTR_ROOT | ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK_MASK	(XFS_ATTR_ROOT | XFS_ATTR_SECURE) -#define XFS_ATTR_NSP_ONDISK(flags)	((flags) & XFS_ATTR_NSP_ONDISK_MASK) -#define XFS_ATTR_NSP_ARGS(flags)	((flags) & XFS_ATTR_NSP_ARGS_MASK) -#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x)	(((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ -					 ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) -#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x)	(((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ -					 ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) - -/* - * Alignment for namelist and valuelist entries (since they are mixed - * there can be only one alignment value) - */ -#define	XFS_ATTR_LEAF_NAME_ALIGN	((uint)sizeof(xfs_dablk_t)) - -static inline int -xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp) -{ -	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) -		return sizeof(struct xfs_attr3_leaf_hdr); -	return sizeof(struct xfs_attr_leaf_hdr); -} - -static inline struct xfs_attr_leaf_entry * -xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp) -{ -	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) -		return &((struct xfs_attr3_leafblock *)leafp)->entries[0]; -	return &leafp->entries[0]; -} - -/* - * Cast typed pointers for "local" and "remote" name/value structs. - */ -static inline char * -xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx) -{ -	struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp); - -	return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)]; -} - -static inline xfs_attr_leaf_name_remote_t * -xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) -{ -	return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx); -} - -static inline xfs_attr_leaf_name_local_t * -xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) -{ -	return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx); -} - -/* - * Calculate total bytes used (including trailing pad for alignment) for - * a "local" name/value structure, a "remote" name/value structure, and - * a pointer which might be either. - */ -static inline int xfs_attr_leaf_entsize_remote(int nlen) -{ -	return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ -		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); -} - -static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) -{ -	return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + -		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); -} - -static inline int xfs_attr_leaf_entsize_local_max(int bsize) -{ -	return (((bsize) >> 1) + ((bsize) >> 2)); -} -  /*   * Used to keep a list of "remote value" extents when unlinking an inode.   */ @@ -326,8 +96,7 @@ int	xfs_attr3_root_inactive(struct xfs_trans **trans, struct xfs_inode *dp);  xfs_dahash_t	xfs_attr_leaf_lasthash(struct xfs_buf *bp, int *count);  int	xfs_attr_leaf_order(struct xfs_buf *leaf1_bp,  				   struct xfs_buf *leaf2_bp); -int	xfs_attr_leaf_newentsize(int namelen, int valuelen, int blocksize, -					int *local); +int	xfs_attr_leaf_newentsize(struct xfs_da_args *args, int *local);  int	xfs_attr3_leaf_read(struct xfs_trans *tp, struct xfs_inode *dp,  			xfs_dablk_t bno, xfs_daddr_t mappedbno,  			struct xfs_buf **bpp); @@ -336,6 +105,4 @@ void	xfs_attr3_leaf_hdr_from_disk(struct xfs_attr3_icleaf_hdr *to,  void	xfs_attr3_leaf_hdr_to_disk(struct xfs_attr_leafblock *to,  				   struct xfs_attr3_icleaf_hdr *from); -extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; -  #endif	/* __XFS_ATTR_LEAF_H__ */ diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c index cbc80d48517..90e2eeb2120 100644 --- a/fs/xfs/xfs_attr_list.c +++ b/fs/xfs/xfs_attr_list.c @@ -18,31 +18,29 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_alloc.h" -#include "xfs_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_attr_remote.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h"  #include "xfs_attr.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h"  #include "xfs_attr_leaf.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_buf_item.h"  #include "xfs_cksum.h" +#include "xfs_dinode.h" +#include "xfs_dir2.h"  STATIC int  xfs_attr_shortform_compare(const void *a, const void *b) @@ -229,6 +227,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)  	struct xfs_da_node_entry *btree;  	int error, i;  	struct xfs_buf *bp; +	struct xfs_inode	*dp = context->dp;  	trace_xfs_attr_node_list(context); @@ -242,7 +241,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)  	 */  	bp = NULL;  	if (cursor->blkno > 0) { -		error = xfs_da3_node_read(NULL, context->dp, cursor->blkno, -1, +		error = xfs_da3_node_read(NULL, dp, cursor->blkno, -1,  					      &bp, XFS_ATTR_FORK);  		if ((error != 0) && (error != EFSCORRUPTED))  			return(error); @@ -292,7 +291,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)  		for (;;) {  			__uint16_t magic; -			error = xfs_da3_node_read(NULL, context->dp, +			error = xfs_da3_node_read(NULL, dp,  						      cursor->blkno, -1, &bp,  						      XFS_ATTR_FORK);  			if (error) @@ -312,8 +311,8 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)  				return XFS_ERROR(EFSCORRUPTED);  			} -			xfs_da3_node_hdr_from_disk(&nodehdr, node); -			btree = xfs_da3_node_tree_p(node); +			dp->d_ops->node_hdr_from_disk(&nodehdr, node); +			btree = dp->d_ops->node_tree_p(node);  			for (i = 0; i < nodehdr.count; btree++, i++) {  				if (cursor->hashval  						<= be32_to_cpu(btree->hashval)) { @@ -349,8 +348,7 @@ xfs_attr_node_list(xfs_attr_list_context_t *context)  			break;  		cursor->blkno = leafhdr.forw;  		xfs_trans_brelse(NULL, bp); -		error = xfs_attr3_leaf_read(NULL, context->dp, cursor->blkno, -1, -					   &bp); +		error = xfs_attr3_leaf_read(NULL, dp, cursor->blkno, -1, &bp);  		if (error)  			return error;  	} @@ -446,9 +444,11 @@ xfs_attr3_leaf_list_int(  				xfs_da_args_t args;  				memset((char *)&args, 0, sizeof(args)); +				args.geo = context->dp->i_mount->m_attr_geo;  				args.dp = context->dp;  				args.whichfork = XFS_ATTR_FORK;  				args.valuelen = valuelen; +				args.rmtvaluelen = valuelen;  				args.value = kmem_alloc(valuelen, KM_SLEEP | KM_NOFS);  				args.rmtblkno = be32_to_cpu(name_rmt->valueblk);  				args.rmtblkcnt = xfs_attr3_rmt_blocks( @@ -509,17 +509,17 @@ xfs_attr_list_int(  {  	int error;  	xfs_inode_t *dp = context->dp; +	uint		lock_mode;  	XFS_STATS_INC(xs_attr_list);  	if (XFS_FORCED_SHUTDOWN(dp->i_mount))  		return EIO; -	xfs_ilock(dp, XFS_ILOCK_SHARED); -  	/*  	 * Decide on what work routines to call based on the inode size.  	 */ +	lock_mode = xfs_ilock_attr_map_shared(dp);  	if (!xfs_inode_hasattr(dp)) {  		error = 0;  	} else if (dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) { @@ -529,9 +529,7 @@ xfs_attr_list_int(  	} else {  		error = xfs_attr_node_list(context);  	} - -	xfs_iunlock(dp, XFS_ILOCK_SHARED); - +	xfs_iunlock(dp, lock_mode);  	return error;  } diff --git a/fs/xfs/xfs_attr_remote.c b/fs/xfs/xfs_attr_remote.c index 712a502de61..b5adfecbb8e 100644 --- a/fs/xfs/xfs_attr_remote.c +++ b/fs/xfs/xfs_attr_remote.c @@ -18,20 +18,19 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_error.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_alloc.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" @@ -42,6 +41,7 @@  #include "xfs_trace.h"  #include "xfs_cksum.h"  #include "xfs_buf_item.h" +#include "xfs_error.h"  #define ATTR_RMTVALUE_MAPSIZE	1	/* # of map entries at once */ @@ -68,7 +68,6 @@ xfs_attr3_rmt_blocks(   */  static bool  xfs_attr3_rmt_hdr_ok( -	struct xfs_mount	*mp,  	void			*ptr,  	xfs_ino_t		ino,  	uint32_t		offset, @@ -110,7 +109,7 @@ xfs_attr3_rmt_verify(  	if (be32_to_cpu(rmt->rm_bytes) > fsbsize - sizeof(*rmt))  		return false;  	if (be32_to_cpu(rmt->rm_offset) + -				be32_to_cpu(rmt->rm_bytes) >= XATTR_SIZE_MAX) +				be32_to_cpu(rmt->rm_bytes) > XATTR_SIZE_MAX)  		return false;  	if (rmt->rm_owner == 0)  		return false; @@ -125,8 +124,8 @@ xfs_attr3_rmt_read_verify(  	struct xfs_mount *mp = bp->b_target->bt_mount;  	char		*ptr;  	int		len; -	bool		corrupt = false;  	xfs_daddr_t	bno; +	int		blksize = mp->m_attr_geo->blksize;  	/* no verification of non-crc buffers */  	if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -135,27 +134,25 @@ xfs_attr3_rmt_read_verify(  	ptr = bp->b_addr;  	bno = bp->b_bn;  	len = BBTOB(bp->b_length); -	ASSERT(len >= XFS_LBSIZE(mp)); +	ASSERT(len >= blksize);  	while (len > 0) { -		if (!xfs_verify_cksum(ptr, XFS_LBSIZE(mp), -				      XFS_ATTR3_RMT_CRC_OFF)) { -			corrupt = true; +		if (!xfs_verify_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF)) { +			xfs_buf_ioerror(bp, EFSBADCRC);  			break;  		} -		if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { -			corrupt = true; +		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) { +			xfs_buf_ioerror(bp, EFSCORRUPTED);  			break;  		} -		len -= XFS_LBSIZE(mp); -		ptr += XFS_LBSIZE(mp); -		bno += mp->m_bsize; +		len -= blksize; +		ptr += blksize; +		bno += BTOBB(blksize);  	} -	if (corrupt) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} else +	if (bp->b_error) +		xfs_verifier_error(bp); +	else  		ASSERT(len == 0);  } @@ -168,6 +165,7 @@ xfs_attr3_rmt_write_verify(  	char		*ptr;  	int		len;  	xfs_daddr_t	bno; +	int		blksize = mp->m_attr_geo->blksize;  	/* no verification of non-crc buffers */  	if (!xfs_sb_version_hascrc(&mp->m_sb)) @@ -176,13 +174,12 @@ xfs_attr3_rmt_write_verify(  	ptr = bp->b_addr;  	bno = bp->b_bn;  	len = BBTOB(bp->b_length); -	ASSERT(len >= XFS_LBSIZE(mp)); +	ASSERT(len >= blksize);  	while (len > 0) { -		if (!xfs_attr3_rmt_verify(mp, ptr, XFS_LBSIZE(mp), bno)) { -			XFS_CORRUPTION_ERROR(__func__, -					    XFS_ERRLEVEL_LOW, mp, bp->b_addr); +		if (!xfs_attr3_rmt_verify(mp, ptr, blksize, bno)) {  			xfs_buf_ioerror(bp, EFSCORRUPTED); +			xfs_verifier_error(bp);  			return;  		}  		if (bip) { @@ -191,11 +188,11 @@ xfs_attr3_rmt_write_verify(  			rmt = (struct xfs_attr3_rmt_hdr *)ptr;  			rmt->rm_lsn = cpu_to_be64(bip->bli_item.li_lsn);  		} -		xfs_update_cksum(ptr, XFS_LBSIZE(mp), XFS_ATTR3_RMT_CRC_OFF); +		xfs_update_cksum(ptr, blksize, XFS_ATTR3_RMT_CRC_OFF); -		len -= XFS_LBSIZE(mp); -		ptr += XFS_LBSIZE(mp); -		bno += mp->m_bsize; +		len -= blksize; +		ptr += blksize; +		bno += BTOBB(blksize);  	}  	ASSERT(len == 0);  } @@ -244,17 +241,18 @@ xfs_attr_rmtval_copyout(  	char		*src = bp->b_addr;  	xfs_daddr_t	bno = bp->b_bn;  	int		len = BBTOB(bp->b_length); +	int		blksize = mp->m_attr_geo->blksize; -	ASSERT(len >= XFS_LBSIZE(mp)); +	ASSERT(len >= blksize);  	while (len > 0 && *valuelen > 0) {  		int hdr_size = 0; -		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp)); +		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);  		byte_cnt = min(*valuelen, byte_cnt);  		if (xfs_sb_version_hascrc(&mp->m_sb)) { -			if (!xfs_attr3_rmt_hdr_ok(mp, src, ino, *offset, +			if (!xfs_attr3_rmt_hdr_ok(src, ino, *offset,  						  byte_cnt, bno)) {  				xfs_alert(mp,  "remote attribute header mismatch bno/off/len/owner (0x%llx/0x%x/Ox%x/0x%llx)", @@ -267,9 +265,9 @@ xfs_attr_rmtval_copyout(  		memcpy(*dst, src + hdr_size, byte_cnt);  		/* roll buffer forwards */ -		len -= XFS_LBSIZE(mp); -		src += XFS_LBSIZE(mp); -		bno += mp->m_bsize; +		len -= blksize; +		src += blksize; +		bno += BTOBB(blksize);  		/* roll attribute data forwards */  		*valuelen -= byte_cnt; @@ -291,12 +289,13 @@ xfs_attr_rmtval_copyin(  	char		*dst = bp->b_addr;  	xfs_daddr_t	bno = bp->b_bn;  	int		len = BBTOB(bp->b_length); +	int		blksize = mp->m_attr_geo->blksize; -	ASSERT(len >= XFS_LBSIZE(mp)); +	ASSERT(len >= blksize);  	while (len > 0 && *valuelen > 0) {  		int hdr_size; -		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, XFS_LBSIZE(mp)); +		int byte_cnt = XFS_ATTR3_RMT_BUF_SPACE(mp, blksize);  		byte_cnt = min(*valuelen, byte_cnt);  		hdr_size = xfs_attr3_rmt_hdr_set(mp, dst, ino, *offset, @@ -308,17 +307,17 @@ xfs_attr_rmtval_copyin(  		 * If this is the last block, zero the remainder of it.  		 * Check that we are actually the last block, too.  		 */ -		if (byte_cnt + hdr_size < XFS_LBSIZE(mp)) { +		if (byte_cnt + hdr_size < blksize) {  			ASSERT(*valuelen - byte_cnt == 0); -			ASSERT(len == XFS_LBSIZE(mp)); +			ASSERT(len == blksize);  			memset(dst + hdr_size + byte_cnt, 0, -					XFS_LBSIZE(mp) - hdr_size - byte_cnt); +					blksize - hdr_size - byte_cnt);  		}  		/* roll buffer forwards */ -		len -= XFS_LBSIZE(mp); -		dst += XFS_LBSIZE(mp); -		bno += mp->m_bsize; +		len -= blksize; +		dst += blksize; +		bno += BTOBB(blksize);  		/* roll attribute data forwards */  		*valuelen -= byte_cnt; @@ -340,7 +339,7 @@ xfs_attr_rmtval_get(  	struct xfs_buf		*bp;  	xfs_dablk_t		lblkno = args->rmtblkno;  	__uint8_t		*dst = args->value; -	int			valuelen = args->valuelen; +	int			valuelen;  	int			nmap;  	int			error;  	int			blkcnt = args->rmtblkcnt; @@ -350,7 +349,9 @@ xfs_attr_rmtval_get(  	trace_xfs_attr_rmtval_get(args);  	ASSERT(!(args->flags & ATTR_KERNOVAL)); +	ASSERT(args->rmtvaluelen == args->valuelen); +	valuelen = args->rmtvaluelen;  	while (valuelen > 0) {  		nmap = ATTR_RMTVALUE_MAPSIZE;  		error = xfs_bmapi_read(args->dp, (xfs_fileoff_t)lblkno, @@ -418,7 +419,7 @@ xfs_attr_rmtval_set(  	 * attributes have headers, we can't just do a straight byte to FSB  	 * conversion and have to take the header space into account.  	 */ -	blkcnt = xfs_attr3_rmt_blocks(mp, args->valuelen); +	blkcnt = xfs_attr3_rmt_blocks(mp, args->rmtvaluelen);  	error = xfs_bmap_first_unused(args->trans, args->dp, blkcnt, &lfileoff,  						   XFS_ATTR_FORK);  	if (error) @@ -483,7 +484,7 @@ xfs_attr_rmtval_set(  	 */  	lblkno = args->rmtblkno;  	blkcnt = args->rmtblkcnt; -	valuelen = args->valuelen; +	valuelen = args->rmtvaluelen;  	while (valuelen > 0) {  		struct xfs_buf	*bp;  		xfs_daddr_t	dblkno; diff --git a/fs/xfs/xfs_attr_remote.h b/fs/xfs/xfs_attr_remote.h index 92a8fd7977c..5a9acfa156d 100644 --- a/fs/xfs/xfs_attr_remote.h +++ b/fs/xfs/xfs_attr_remote.h @@ -18,35 +18,6 @@  #ifndef __XFS_ATTR_REMOTE_H__  #define	__XFS_ATTR_REMOTE_H__ -#define XFS_ATTR3_RMT_MAGIC	0x5841524d	/* XARM */ - -/* - * There is one of these headers per filesystem block in a remote attribute. - * This is done to ensure there is a 1:1 mapping between the attribute value - * length and the number of blocks needed to store the attribute. This makes the - * verification of a buffer a little more complex, but greatly simplifies the - * allocation, reading and writing of these attributes as we don't have to guess - * the number of blocks needed to store the attribute data. - */ -struct xfs_attr3_rmt_hdr { -	__be32	rm_magic; -	__be32	rm_offset; -	__be32	rm_bytes; -	__be32	rm_crc; -	uuid_t	rm_uuid; -	__be64	rm_owner; -	__be64	rm_blkno; -	__be64	rm_lsn; -}; - -#define XFS_ATTR3_RMT_CRC_OFF	offsetof(struct xfs_attr3_rmt_hdr, rm_crc) - -#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize)	\ -	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ -			sizeof(struct xfs_attr3_rmt_hdr) : 0)) - -extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; -  int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen);  int xfs_attr_rmtval_get(struct xfs_da_args *args); diff --git a/fs/xfs/xfs_bit.c b/fs/xfs/xfs_bit.c index 48228848f5a..0e8885a5964 100644 --- a/fs/xfs/xfs_bit.c +++ b/fs/xfs/xfs_bit.c @@ -16,10 +16,8 @@   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */  #include "xfs.h" +#include "xfs_log_format.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h"  /*   * XFS bit manipulation routines, used in non-realtime code. diff --git a/fs/xfs/xfs_bit.h b/fs/xfs/xfs_bit.h index f1e3c907044..e1649c0d3e0 100644 --- a/fs/xfs/xfs_bit.h +++ b/fs/xfs/xfs_bit.h @@ -66,8 +66,11 @@ static inline int xfs_lowbit64(__uint64_t v)  		n = ffs(w);  	} else {	/* upper bits */  		w = (__uint32_t)(v >> 32); -		if (w && (n = ffs(w))) -		n += 32; +		if (w) { +			n = ffs(w); +			if (n) +				n += 32; +		}  	}  	return n - 1;  } diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c index f47e65c30be..75c3fe5f3d9 100644 --- a/fs/xfs/xfs_bmap.c +++ b/fs/xfs/xfs_bmap.c @@ -17,39 +17,37 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h"  #include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_btree.h" -#include "xfs_mount.h" -#include "xfs_itable.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_extfree_item.h"  #include "xfs_alloc.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h"  #include "xfs_rtalloc.h"  #include "xfs_error.h" -#include "xfs_attr_leaf.h"  #include "xfs_quota.h"  #include "xfs_trans_space.h"  #include "xfs_buf_item.h" -#include "xfs_filestream.h"  #include "xfs_trace.h"  #include "xfs_symlink.h" +#include "xfs_attr_leaf.h" +#include "xfs_dinode.h" +#include "xfs_filestream.h"  kmem_zone_t		*xfs_bmap_free_item_zone; @@ -96,7 +94,7 @@ xfs_bmap_compute_maxlevels(  		maxleafents = MAXAEXTNUM;  		sz = XFS_BMDR_SPACE_CALC(MINABTPTRS);  	} -	maxrootrecs = xfs_bmdr_maxrecs(mp, sz, 0); +	maxrootrecs = xfs_bmdr_maxrecs(sz, 0);  	minleafrecs = mp->m_bmap_dmnr[0];  	minnoderecs = mp->m_bmap_dmnr[1];  	maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs; @@ -235,7 +233,6 @@ xfs_default_attroffset(   */  STATIC void  xfs_bmap_forkoff_reset( -	xfs_mount_t	*mp,  	xfs_inode_t	*ip,  	int		whichfork)  { @@ -907,7 +904,7 @@ xfs_bmap_local_to_extents_empty(  	ASSERT(ifp->if_bytes == 0);  	ASSERT(XFS_IFORK_NEXTENTS(ip, whichfork) == 0); -	xfs_bmap_forkoff_reset(ip->i_mount, ip, whichfork); +	xfs_bmap_forkoff_reset(ip, whichfork);  	ifp->if_flags &= ~XFS_IFINLINE;  	ifp->if_flags |= XFS_IFEXTENTS;  	XFS_IFORK_FMT_SET(ip, whichfork, XFS_DINODE_FMT_EXTENTS); @@ -1101,10 +1098,11 @@ xfs_bmap_add_attrfork_local(  	if (S_ISDIR(ip->i_d.di_mode)) {  		memset(&dargs, 0, sizeof(dargs)); +		dargs.geo = ip->i_mount->m_dir_geo;  		dargs.dp = ip;  		dargs.firstblock = firstblock;  		dargs.flist = flist; -		dargs.total = ip->i_mount->m_dirblkfsbs; +		dargs.total = dargs.geo->fsbcount;  		dargs.whichfork = XFS_DATA_FORK;  		dargs.trans = tp;  		return xfs_dir2_sf_to_block(&dargs); @@ -1139,6 +1137,7 @@ xfs_bmap_add_attrfork(  	int			committed;	/* xaction was committed */  	int			logflags;	/* logging flags */  	int			error;		/* error return value */ +	int			cancel_flags = 0;  	ASSERT(XFS_IFORK_Q(ip) == 0); @@ -1149,19 +1148,20 @@ xfs_bmap_add_attrfork(  	if (rsvd)  		tp->t_flags |= XFS_TRANS_RESERVE;  	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_addafork, blks, 0); -	if (error) -		goto error0; +	if (error) { +		xfs_trans_cancel(tp, 0); +		return error; +	} +	cancel_flags = XFS_TRANS_RELEASE_LOG_RES;  	xfs_ilock(ip, XFS_ILOCK_EXCL);  	error = xfs_trans_reserve_quota_nblks(tp, ip, blks, 0, rsvd ?  			XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_FORCE_RES :  			XFS_QMOPT_RES_REGBLKS); -	if (error) { -		xfs_iunlock(ip, XFS_ILOCK_EXCL); -		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); -		return error; -	} +	if (error) +		goto trans_cancel; +	cancel_flags |= XFS_TRANS_ABORT;  	if (XFS_IFORK_Q(ip)) -		goto error1; +		goto trans_cancel;  	if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS) {  		/*  		 * For inodes coming from pre-6.2 filesystems. @@ -1171,7 +1171,7 @@ xfs_bmap_add_attrfork(  	}  	ASSERT(ip->i_d.di_anextents == 0); -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, 0);  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);  	switch (ip->i_d.di_format) { @@ -1193,7 +1193,7 @@ xfs_bmap_add_attrfork(  	default:  		ASSERT(0);  		error = XFS_ERROR(EINVAL); -		goto error1; +		goto trans_cancel;  	}  	ASSERT(ip->i_afp == NULL); @@ -1221,7 +1221,7 @@ xfs_bmap_add_attrfork(  	if (logflags)  		xfs_trans_log_inode(tp, ip, logflags);  	if (error) -		goto error2; +		goto bmap_cancel;  	if (!xfs_sb_version_hasattr(&mp->m_sb) ||  	   (!xfs_sb_version_hasattr2(&mp->m_sb) && version == 2)) {  		__int64_t sbfields = 0; @@ -1244,14 +1244,16 @@ xfs_bmap_add_attrfork(  	error = xfs_bmap_finish(&tp, &flist, &committed);  	if (error) -		goto error2; -	return xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -error2: +		goto bmap_cancel; +	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	return error; + +bmap_cancel:  	xfs_bmap_cancel(&flist); -error1: +trans_cancel: +	xfs_trans_cancel(tp, cancel_flags);  	xfs_iunlock(ip, XFS_ILOCK_EXCL); -error0: -	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT);  	return error;  } @@ -1482,7 +1484,7 @@ xfs_bmap_search_extents(  		xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,  				"Access to block zero in inode %llu "  				"start_block: %llx start_off: %llx " -				"blkcnt: %llx extent-state: %x lastx: %x\n", +				"blkcnt: %llx extent-state: %x lastx: %x",  			(unsigned long long)ip->i_ino,  			(unsigned long long)gotp->br_startblock,  			(unsigned long long)gotp->br_startoff, @@ -1633,7 +1635,7 @@ xfs_bmap_last_extent(   * blocks at the end of the file which do not start at the previous data block,   * we will try to align the new blocks at stripe unit boundaries.   * - * Returns 0 in bma->aeof if the file (fork) is empty as any new write will be + * Returns 1 in bma->aeof if the file (fork) is empty as any new write will be   * at, or past the EOF.   */  STATIC int @@ -1648,9 +1650,14 @@ xfs_bmap_isaeof(  	bma->aeof = 0;  	error = xfs_bmap_last_extent(NULL, bma->ip, whichfork, &rec,  				     &is_empty); -	if (error || is_empty) +	if (error)  		return error; +	if (is_empty) { +		bma->aeof = 1; +		return 0; +	} +  	/*  	 * Check if we are allocation or past the last extent, or at least into  	 * the last delayed allocated extent. @@ -1668,7 +1675,6 @@ xfs_bmap_isaeof(   */  int  xfs_bmap_last_offset( -	struct xfs_trans	*tp,  	struct xfs_inode	*ip,  	xfs_fileoff_t		*last_block,  	int			whichfork) @@ -3510,6 +3516,67 @@ xfs_bmap_adjacent(  #undef ISVALID  } +static int +xfs_bmap_longest_free_extent( +	struct xfs_trans	*tp, +	xfs_agnumber_t		ag, +	xfs_extlen_t		*blen, +	int			*notinit) +{ +	struct xfs_mount	*mp = tp->t_mountp; +	struct xfs_perag	*pag; +	xfs_extlen_t		longest; +	int			error = 0; + +	pag = xfs_perag_get(mp, ag); +	if (!pag->pagf_init) { +		error = xfs_alloc_pagf_init(mp, tp, ag, XFS_ALLOC_FLAG_TRYLOCK); +		if (error) +			goto out; + +		if (!pag->pagf_init) { +			*notinit = 1; +			goto out; +		} +	} + +	longest = xfs_alloc_longest_free_extent(mp, pag); +	if (*blen < longest) +		*blen = longest; + +out: +	xfs_perag_put(pag); +	return error; +} + +static void +xfs_bmap_select_minlen( +	struct xfs_bmalloca	*ap, +	struct xfs_alloc_arg	*args, +	xfs_extlen_t		*blen, +	int			notinit) +{ +	if (notinit || *blen < ap->minlen) { +		/* +		 * Since we did a BUF_TRYLOCK above, it is possible that +		 * there is space for this request. +		 */ +		args->minlen = ap->minlen; +	} else if (*blen < args->maxlen) { +		/* +		 * If the best seen length is less than the request length, +		 * use the best as the minimum. +		 */ +		args->minlen = *blen; +	} else { +		/* +		 * Otherwise we've seen an extent as big as maxlen, use that +		 * as the minimum. +		 */ +		args->minlen = args->maxlen; +	} +} +  STATIC int  xfs_bmap_btalloc_nullfb(  	struct xfs_bmalloca	*ap, @@ -3517,111 +3584,74 @@ xfs_bmap_btalloc_nullfb(  	xfs_extlen_t		*blen)  {  	struct xfs_mount	*mp = ap->ip->i_mount; -	struct xfs_perag	*pag;  	xfs_agnumber_t		ag, startag;  	int			notinit = 0;  	int			error; -	if (ap->userdata && xfs_inode_is_filestream(ap->ip)) -		args->type = XFS_ALLOCTYPE_NEAR_BNO; -	else -		args->type = XFS_ALLOCTYPE_START_BNO; +	args->type = XFS_ALLOCTYPE_START_BNO;  	args->total = ap->total; -	/* -	 * Search for an allocation group with a single extent large enough -	 * for the request.  If one isn't found, then adjust the minimum -	 * allocation size to the largest space found. -	 */  	startag = ag = XFS_FSB_TO_AGNO(mp, args->fsbno);  	if (startag == NULLAGNUMBER)  		startag = ag = 0; -	pag = xfs_perag_get(mp, ag);  	while (*blen < args->maxlen) { -		if (!pag->pagf_init) { -			error = xfs_alloc_pagf_init(mp, args->tp, ag, -						    XFS_ALLOC_FLAG_TRYLOCK); -			if (error) { -				xfs_perag_put(pag); -				return error; -			} -		} - -		/* -		 * See xfs_alloc_fix_freelist... -		 */ -		if (pag->pagf_init) { -			xfs_extlen_t	longest; -			longest = xfs_alloc_longest_free_extent(mp, pag); -			if (*blen < longest) -				*blen = longest; -		} else -			notinit = 1; - -		if (xfs_inode_is_filestream(ap->ip)) { -			if (*blen >= args->maxlen) -				break; - -			if (ap->userdata) { -				/* -				 * If startag is an invalid AG, we've -				 * come here once before and -				 * xfs_filestream_new_ag picked the -				 * best currently available. -				 * -				 * Don't continue looping, since we -				 * could loop forever. -				 */ -				if (startag == NULLAGNUMBER) -					break; - -				error = xfs_filestream_new_ag(ap, &ag); -				xfs_perag_put(pag); -				if (error) -					return error; +		error = xfs_bmap_longest_free_extent(args->tp, ag, blen, +						     ¬init); +		if (error) +			return error; -				/* loop again to set 'blen'*/ -				startag = NULLAGNUMBER; -				pag = xfs_perag_get(mp, ag); -				continue; -			} -		}  		if (++ag == mp->m_sb.sb_agcount)  			ag = 0;  		if (ag == startag)  			break; -		xfs_perag_put(pag); -		pag = xfs_perag_get(mp, ag);  	} -	xfs_perag_put(pag); -	/* -	 * Since the above loop did a BUF_TRYLOCK, it is -	 * possible that there is space for this request. -	 */ -	if (notinit || *blen < ap->minlen) -		args->minlen = ap->minlen; -	/* -	 * If the best seen length is less than the request -	 * length, use the best as the minimum. -	 */ -	else if (*blen < args->maxlen) -		args->minlen = *blen; -	/* -	 * Otherwise we've seen an extent as big as maxlen, -	 * use that as the minimum. -	 */ -	else -		args->minlen = args->maxlen; +	xfs_bmap_select_minlen(ap, args, blen, notinit); +	return 0; +} + +STATIC int +xfs_bmap_btalloc_filestreams( +	struct xfs_bmalloca	*ap, +	struct xfs_alloc_arg	*args, +	xfs_extlen_t		*blen) +{ +	struct xfs_mount	*mp = ap->ip->i_mount; +	xfs_agnumber_t		ag; +	int			notinit = 0; +	int			error; + +	args->type = XFS_ALLOCTYPE_NEAR_BNO; +	args->total = ap->total; + +	ag = XFS_FSB_TO_AGNO(mp, args->fsbno); +	if (ag == NULLAGNUMBER) +		ag = 0; + +	error = xfs_bmap_longest_free_extent(args->tp, ag, blen, ¬init); +	if (error) +		return error; + +	if (*blen < args->maxlen) { +		error = xfs_filestream_new_ag(ap, &ag); +		if (error) +			return error; + +		error = xfs_bmap_longest_free_extent(args->tp, ag, blen, +						     ¬init); +		if (error) +			return error; + +	} + +	xfs_bmap_select_minlen(ap, args, blen, notinit);  	/* -	 * set the failure fallback case to look in the selected -	 * AG as the stream may have moved. +	 * Set the failure fallback case to look in the selected AG as stream +	 * may have moved.  	 */ -	if (xfs_inode_is_filestream(ap->ip)) -		ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0); - +	ap->blkno = args->fsbno = XFS_AGB_TO_FSB(mp, ag, 0);  	return 0;  } @@ -3641,10 +3671,19 @@ xfs_bmap_btalloc(  	int		isaligned;  	int		tryagain;  	int		error; +	int		stripe_align;  	ASSERT(ap->length);  	mp = ap->ip->i_mount; + +	/* stripe alignment for allocation is determined by mount parameters */ +	stripe_align = 0; +	if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC)) +		stripe_align = mp->m_swidth; +	else if (mp->m_dalign) +		stripe_align = mp->m_dalign; +  	align = ap->userdata ? xfs_get_extsz_hint(ap->ip) : 0;  	if (unlikely(align)) {  		error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev, @@ -3653,6 +3692,8 @@ xfs_bmap_btalloc(  		ASSERT(!error);  		ASSERT(ap->length);  	} + +  	nullfb = *ap->firstblock == NULLFSBLOCK;  	fb_agno = nullfb ? NULLAGNUMBER : XFS_FSB_TO_AGNO(mp, *ap->firstblock);  	if (nullfb) { @@ -3690,7 +3731,15 @@ xfs_bmap_btalloc(  	args.firstblock = *ap->firstblock;  	blen = 0;  	if (nullfb) { -		error = xfs_bmap_btalloc_nullfb(ap, &args, &blen); +		/* +		 * Search for an allocation group with a single extent large +		 * enough for the request.  If one isn't found, then adjust +		 * the minimum allocation size to the largest space found. +		 */ +		if (ap->userdata && xfs_inode_is_filestream(ap->ip)) +			error = xfs_bmap_btalloc_filestreams(ap, &args, &blen); +		else +			error = xfs_bmap_btalloc_nullfb(ap, &args, &blen);  		if (error)  			return error;  	} else if (ap->flist->xbf_low) { @@ -3728,7 +3777,7 @@ xfs_bmap_btalloc(  	 */  	if (!ap->flist->xbf_low && ap->aeof) {  		if (!ap->offset) { -			args.alignment = mp->m_dalign; +			args.alignment = stripe_align;  			atype = args.type;  			isaligned = 1;  			/* @@ -3753,13 +3802,13 @@ xfs_bmap_btalloc(  			 * of minlen+alignment+slop doesn't go up  			 * between the calls.  			 */ -			if (blen > mp->m_dalign && blen <= args.maxlen) -				nextminlen = blen - mp->m_dalign; +			if (blen > stripe_align && blen <= args.maxlen) +				nextminlen = blen - stripe_align;  			else  				nextminlen = args.minlen; -			if (nextminlen + mp->m_dalign > args.minlen + 1) +			if (nextminlen + stripe_align > args.minlen + 1)  				args.minalignslop = -					nextminlen + mp->m_dalign - +					nextminlen + stripe_align -  					args.minlen - 1;  			else  				args.minalignslop = 0; @@ -3781,7 +3830,7 @@ xfs_bmap_btalloc(  		 */  		args.type = atype;  		args.fsbno = ap->blkno; -		args.alignment = mp->m_dalign; +		args.alignment = stripe_align;  		args.minlen = nextminlen;  		args.minalignslop = 0;  		isaligned = 1; @@ -3995,6 +4044,7 @@ xfs_bmapi_read(  	ASSERT(*nmap >= 1);  	ASSERT(!(flags & ~(XFS_BMAPI_ATTRFORK|XFS_BMAPI_ENTIRE|  			   XFS_BMAPI_IGSTATE))); +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_SHARED|XFS_ILOCK_EXCL));  	if (unlikely(XFS_TEST_ERROR(  	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && @@ -4189,6 +4239,7 @@ xfs_bmapi_delay(  	ASSERT(*nmap >= 1);  	ASSERT(*nmap <= XFS_BMAP_MAX_NMAP);  	ASSERT(!(flags & ~XFS_BMAPI_ENTIRE)); +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	if (unlikely(XFS_TEST_ERROR(  	    (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_EXTENTS && @@ -4247,8 +4298,8 @@ xfs_bmapi_delay(  } -int -__xfs_bmapi_allocate( +static int +xfs_bmapi_allocate(  	struct xfs_bmalloca	*bma)  {  	struct xfs_mount	*mp = bma->ip->i_mount; @@ -4482,6 +4533,7 @@ xfs_bmapi_write(  	ASSERT(tp != NULL);  	ASSERT(len > 0);  	ASSERT(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_LOCAL); +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	if (unlikely(XFS_TEST_ERROR(  	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && @@ -4526,9 +4578,6 @@ xfs_bmapi_write(  	bma.flist = flist;  	bma.firstblock = firstblock; -	if (flags & XFS_BMAPI_STACK_SWITCH) -		bma.stack_switch = 1; -  	while (bno < end && n < *nmap) {  		inhole = eof || bma.got.br_startoff > bno;  		wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); @@ -5033,6 +5082,7 @@ xfs_bunmapi(  	if (XFS_FORCED_SHUTDOWN(mp))  		return XFS_ERROR(EIO); +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	ASSERT(len > 0);  	ASSERT(nexts >= 0); @@ -5356,3 +5406,201 @@ error0:  	}  	return error;  } + +/* + * Shift extent records to the left to cover a hole. + * + * The maximum number of extents to be shifted in a single operation + * is @num_exts, and @current_ext keeps track of the current extent + * index we have shifted. @offset_shift_fsb is the length by which each + * extent is shifted. If there is no hole to shift the extents + * into, this will be considered invalid operation and we abort immediately. + */ +int +xfs_bmap_shift_extents( +	struct xfs_trans	*tp, +	struct xfs_inode	*ip, +	int			*done, +	xfs_fileoff_t		start_fsb, +	xfs_fileoff_t		offset_shift_fsb, +	xfs_extnum_t		*current_ext, +	xfs_fsblock_t		*firstblock, +	struct xfs_bmap_free	*flist, +	int			num_exts) +{ +	struct xfs_btree_cur		*cur; +	struct xfs_bmbt_rec_host	*gotp; +	struct xfs_bmbt_irec            got; +	struct xfs_bmbt_irec		left; +	struct xfs_mount		*mp = ip->i_mount; +	struct xfs_ifork		*ifp; +	xfs_extnum_t			nexts = 0; +	xfs_fileoff_t			startoff; +	int				error = 0; +	int				i; +	int				whichfork = XFS_DATA_FORK; +	int				logflags; +	xfs_filblks_t			blockcount = 0; +	int				total_extents; + +	if (unlikely(XFS_TEST_ERROR( +	    (XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_EXTENTS && +	     XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE), +	     mp, XFS_ERRTAG_BMAPIFORMAT, XFS_RANDOM_BMAPIFORMAT))) { +		XFS_ERROR_REPORT("xfs_bmap_shift_extents", +				 XFS_ERRLEVEL_LOW, mp); +		return XFS_ERROR(EFSCORRUPTED); +	} + +	if (XFS_FORCED_SHUTDOWN(mp)) +		return XFS_ERROR(EIO); + +	ASSERT(current_ext != NULL); + +	ifp = XFS_IFORK_PTR(ip, whichfork); +	if (!(ifp->if_flags & XFS_IFEXTENTS)) { +		/* Read in all the extents */ +		error = xfs_iread_extents(tp, ip, whichfork); +		if (error) +			return error; +	} + +	/* +	 * If *current_ext is 0, we would need to lookup the extent +	 * from where we would start shifting and store it in gotp. +	 */ +	if (!*current_ext) { +		gotp = xfs_iext_bno_to_ext(ifp, start_fsb, current_ext); +		/* +		 * gotp can be null in 2 cases: 1) if there are no extents +		 * or 2) start_fsb lies in a hole beyond which there are +		 * no extents. Either way, we are done. +		 */ +		if (!gotp) { +			*done = 1; +			return 0; +		} +	} + +	/* We are going to change core inode */ +	logflags = XFS_ILOG_CORE; +	if (ifp->if_flags & XFS_IFBROOT) { +		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork); +		cur->bc_private.b.firstblock = *firstblock; +		cur->bc_private.b.flist = flist; +		cur->bc_private.b.flags = 0; +	} else { +		cur = NULL; +		logflags |= XFS_ILOG_DEXT; +	} + +	/* +	 * There may be delalloc extents in the data fork before the range we +	 * are collapsing out, so we cannot +	 * use the count of real extents here. Instead we have to calculate it +	 * from the incore fork. +	 */ +	total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); +	while (nexts++ < num_exts && *current_ext < total_extents) { + +		gotp = xfs_iext_get_ext(ifp, *current_ext); +		xfs_bmbt_get_all(gotp, &got); +		startoff = got.br_startoff - offset_shift_fsb; + +		/* +		 * Before shifting extent into hole, make sure that the hole +		 * is large enough to accomodate the shift. +		 */ +		if (*current_ext) { +			xfs_bmbt_get_all(xfs_iext_get_ext(ifp, +						*current_ext - 1), &left); + +			if (startoff < left.br_startoff + left.br_blockcount) +				error = XFS_ERROR(EINVAL); +		} else if (offset_shift_fsb > got.br_startoff) { +			/* +			 * When first extent is shifted, offset_shift_fsb +			 * should be less than the stating offset of +			 * the first extent. +			 */ +			error = XFS_ERROR(EINVAL); +		} + +		if (error) +			goto del_cursor; + +		if (cur) { +			error = xfs_bmbt_lookup_eq(cur, got.br_startoff, +						   got.br_startblock, +						   got.br_blockcount, +						   &i); +			if (error) +				goto del_cursor; +			XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); +		} + +		/* Check if we can merge 2 adjacent extents */ +		if (*current_ext && +		    left.br_startoff + left.br_blockcount == startoff && +		    left.br_startblock + left.br_blockcount == +				got.br_startblock && +		    left.br_state == got.br_state && +		    left.br_blockcount + got.br_blockcount <= MAXEXTLEN) { +			blockcount = left.br_blockcount + +				got.br_blockcount; +			xfs_iext_remove(ip, *current_ext, 1, 0); +			if (cur) { +				error = xfs_btree_delete(cur, &i); +				if (error) +					goto del_cursor; +				XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); +			} +			XFS_IFORK_NEXT_SET(ip, whichfork, +				XFS_IFORK_NEXTENTS(ip, whichfork) - 1); +			gotp = xfs_iext_get_ext(ifp, --*current_ext); +			xfs_bmbt_get_all(gotp, &got); + +			/* Make cursor point to the extent we will update */ +			if (cur) { +				error = xfs_bmbt_lookup_eq(cur, got.br_startoff, +							   got.br_startblock, +							   got.br_blockcount, +							   &i); +				if (error) +					goto del_cursor; +				XFS_WANT_CORRUPTED_GOTO(i == 1, del_cursor); +			} + +			xfs_bmbt_set_blockcount(gotp, blockcount); +			got.br_blockcount = blockcount; +		} else { +			/* We have to update the startoff */ +			xfs_bmbt_set_startoff(gotp, startoff); +			got.br_startoff = startoff; +		} + +		if (cur) { +			error = xfs_bmbt_update(cur, got.br_startoff, +						got.br_startblock, +						got.br_blockcount, +						got.br_state); +			if (error) +				goto del_cursor; +		} + +		(*current_ext)++; +		total_extents = ifp->if_bytes / sizeof(xfs_bmbt_rec_t); +	} + +	/* Check if we are done */ +	if (*current_ext == total_extents) +		*done = 1; + +del_cursor: +	if (cur) +		xfs_btree_del_cursor(cur, +			error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR); + +	xfs_trans_log_inode(tp, ip, logflags); +	return error; +} diff --git a/fs/xfs/xfs_bmap.h b/fs/xfs/xfs_bmap.h index 33b41f35122..b879ca56a64 100644 --- a/fs/xfs/xfs_bmap.h +++ b/fs/xfs/xfs_bmap.h @@ -77,7 +77,6 @@ typedef	struct xfs_bmap_free   * from written to unwritten, otherwise convert from unwritten to written.   */  #define XFS_BMAPI_CONVERT	0x040 -#define XFS_BMAPI_STACK_SWITCH	0x080  #define XFS_BMAPI_FLAGS \  	{ XFS_BMAPI_ENTIRE,	"ENTIRE" }, \ @@ -86,8 +85,7 @@ typedef	struct xfs_bmap_free  	{ XFS_BMAPI_PREALLOC,	"PREALLOC" }, \  	{ XFS_BMAPI_IGSTATE,	"IGSTATE" }, \  	{ XFS_BMAPI_CONTIG,	"CONTIG" }, \ -	{ XFS_BMAPI_CONVERT,	"CONVERT" }, \ -	{ XFS_BMAPI_STACK_SWITCH, "STACK_SWITCH" } +	{ XFS_BMAPI_CONVERT,	"CONVERT" }  static inline int xfs_bmapi_aflag(int w) @@ -127,6 +125,16 @@ static inline void xfs_bmap_init(xfs_bmap_free_t *flp, xfs_fsblock_t *fbp)  	{ BMAP_RIGHT_FILLING,	"RF" }, \  	{ BMAP_ATTRFORK,	"ATTR" } + +/* + * This macro is used to determine how many extents will be shifted + * in one write transaction. We could require two splits, + * an extent move on the first and an extent merge on the second, + * So it is proper that one extent is shifted inside write transaction + * at a time. + */ +#define XFS_BMAP_MAX_SHIFT_EXTENTS	1 +  #ifdef DEBUG  void	xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt,  		int whichfork, unsigned long caller_ip); @@ -146,8 +154,8 @@ int	xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip,  		xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork);  int	xfs_bmap_last_before(struct xfs_trans *tp, struct xfs_inode *ip,  		xfs_fileoff_t *last_block, int whichfork); -int	xfs_bmap_last_offset(struct xfs_trans *tp, struct xfs_inode *ip, -		xfs_fileoff_t *unused, int whichfork); +int	xfs_bmap_last_offset(struct xfs_inode *ip, xfs_fileoff_t *unused, +		int whichfork);  int	xfs_bmap_one_block(struct xfs_inode *ip, int whichfork);  int	xfs_bmap_read_extents(struct xfs_trans *tp, struct xfs_inode *ip,  		int whichfork); @@ -169,5 +177,10 @@ int	xfs_bunmapi(struct xfs_trans *tp, struct xfs_inode *ip,  int	xfs_check_nostate_extents(struct xfs_ifork *ifp, xfs_extnum_t idx,  		xfs_extnum_t num);  uint	xfs_default_attroffset(struct xfs_inode *ip); +int	xfs_bmap_shift_extents(struct xfs_trans *tp, struct xfs_inode *ip, +		int *done, xfs_fileoff_t start_fsb, +		xfs_fileoff_t offset_shift_fsb, xfs_extnum_t *current_ext, +		xfs_fsblock_t *firstblock, struct xfs_bmap_free	*flist, +		int num_exts);  #endif	/* __XFS_BMAP_H__ */ diff --git a/fs/xfs/xfs_bmap_btree.c b/fs/xfs/xfs_bmap_btree.c index bb8de8e399c..948836c4fd9 100644 --- a/fs/xfs/xfs_bmap_btree.c +++ b/fs/xfs/xfs_bmap_btree.c @@ -17,27 +17,26 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_alloc.h"  #include "xfs_btree.h" -#include "xfs_itable.h" +#include "xfs_bmap_btree.h"  #include "xfs_bmap.h"  #include "xfs_error.h"  #include "xfs_quota.h"  #include "xfs_trace.h"  #include "xfs_cksum.h" +#include "xfs_dinode.h"  /*   * Determine the extent state. @@ -85,7 +84,7 @@ xfs_bmdr_to_bmbt(  	rblock->bb_level = dblock->bb_level;  	ASSERT(be16_to_cpu(rblock->bb_level) > 0);  	rblock->bb_numrecs = dblock->bb_numrecs; -	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); +	dmxr = xfs_bmdr_maxrecs(dblocklen, 0);  	fkp = XFS_BMDR_KEY_ADDR(dblock, 1);  	tkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);  	fpp = XFS_BMDR_PTR_ADDR(dblock, 1, dmxr); @@ -444,7 +443,7 @@ xfs_bmbt_to_bmdr(  	ASSERT(rblock->bb_level != 0);  	dblock->bb_level = rblock->bb_level;  	dblock->bb_numrecs = rblock->bb_numrecs; -	dmxr = xfs_bmdr_maxrecs(mp, dblocklen, 0); +	dmxr = xfs_bmdr_maxrecs(dblocklen, 0);  	fkp = XFS_BMBT_KEY_ADDR(mp, rblock, 1);  	tkp = XFS_BMDR_KEY_ADDR(dblock, 1);  	fpp = XFS_BMAP_BROOT_PTR_ADDR(mp, rblock, 1, rblocklen); @@ -520,7 +519,6 @@ xfs_bmbt_alloc_block(  	struct xfs_btree_cur	*cur,  	union xfs_btree_ptr	*start,  	union xfs_btree_ptr	*new, -	int			length,  	int			*stat)  {  	xfs_alloc_arg_t		args;		/* block allocation args */ @@ -673,8 +671,7 @@ xfs_bmbt_get_dmaxrecs(  {  	if (level != cur->bc_nlevels - 1)  		return cur->bc_mp->m_bmap_dmxr[level != 0]; -	return xfs_bmdr_maxrecs(cur->bc_mp, cur->bc_private.b.forksize, -				level == 0); +	return xfs_bmdr_maxrecs(cur->bc_private.b.forksize, level == 0);  }  STATIC void @@ -781,12 +778,14 @@ static void  xfs_bmbt_read_verify(  	struct xfs_buf	*bp)  { -	if (!(xfs_btree_lblock_verify_crc(bp) && -	      xfs_bmbt_verify(bp))) { -		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     bp->b_target->bt_mount, bp->b_addr); +	if (!xfs_btree_lblock_verify_crc(bp)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_bmbt_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); + +	if (bp->b_error) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		xfs_verifier_error(bp);  	}  } @@ -795,11 +794,9 @@ xfs_bmbt_write_verify(  	struct xfs_buf	*bp)  {  	if (!xfs_bmbt_verify(bp)) { -		xfs_warn(bp->b_target->bt_mount, "bmbt daddr 0x%llx failed", bp->b_bn);  		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     bp->b_target->bt_mount, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	}  	xfs_btree_lblock_calc_crc(bp); @@ -915,7 +912,6 @@ xfs_bmbt_maxrecs(   */  int  xfs_bmdr_maxrecs( -	struct xfs_mount	*mp,  	int			blocklen,  	int			leaf)  { diff --git a/fs/xfs/xfs_bmap_btree.h b/fs/xfs/xfs_bmap_btree.h index e367461a638..819a8a4dee9 100644 --- a/fs/xfs/xfs_bmap_btree.h +++ b/fs/xfs/xfs_bmap_btree.h @@ -18,9 +18,6 @@  #ifndef __XFS_BMAP_BTREE_H__  #define __XFS_BMAP_BTREE_H__ -#define XFS_BMAP_MAGIC		0x424d4150	/* 'BMAP' */ -#define XFS_BMAP_CRC_MAGIC	0x424d4133	/* 'BMA3' */ -  struct xfs_btree_cur;  struct xfs_btree_block;  struct xfs_mount; @@ -28,85 +25,6 @@ struct xfs_inode;  struct xfs_trans;  /* - * Bmap root header, on-disk form only. - */ -typedef struct xfs_bmdr_block { -	__be16		bb_level;	/* 0 is a leaf */ -	__be16		bb_numrecs;	/* current # of data records */ -} xfs_bmdr_block_t; - -/* - * Bmap btree record and extent descriptor. - *  l0:63 is an extent flag (value 1 indicates non-normal). - *  l0:9-62 are startoff. - *  l0:0-8 and l1:21-63 are startblock. - *  l1:0-20 are blockcount. - */ -#define BMBT_EXNTFLAG_BITLEN	1 -#define BMBT_STARTOFF_BITLEN	54 -#define BMBT_STARTBLOCK_BITLEN	52 -#define BMBT_BLOCKCOUNT_BITLEN	21 - -typedef struct xfs_bmbt_rec { -	__be64			l0, l1; -} xfs_bmbt_rec_t; - -typedef __uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */ -typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; - -typedef struct xfs_bmbt_rec_host { -	__uint64_t		l0, l1; -} xfs_bmbt_rec_host_t; - -/* - * Values and macros for delayed-allocation startblock fields. - */ -#define STARTBLOCKVALBITS	17 -#define STARTBLOCKMASKBITS	(15 + XFS_BIG_BLKNOS * 20) -#define DSTARTBLOCKMASKBITS	(15 + 20) -#define STARTBLOCKMASK		\ -	(((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) -#define DSTARTBLOCKMASK		\ -	(((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) - -static inline int isnullstartblock(xfs_fsblock_t x) -{ -	return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; -} - -static inline int isnulldstartblock(xfs_dfsbno_t x) -{ -	return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; -} - -static inline xfs_fsblock_t nullstartblock(int k) -{ -	ASSERT(k < (1 << STARTBLOCKVALBITS)); -	return STARTBLOCKMASK | (k); -} - -static inline xfs_filblks_t startblockval(xfs_fsblock_t x) -{ -	return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); -} - -/* - * Possible extent formats. - */ -typedef enum { -	XFS_EXTFMT_NOSTATE = 0, -	XFS_EXTFMT_HASSTATE -} xfs_exntfmt_t; - -/* - * Possible extent states. - */ -typedef enum { -	XFS_EXT_NORM, XFS_EXT_UNWRITTEN, -	XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID -} xfs_exntst_t; - -/*   * Extent state and extent format macros.   */  #define XFS_EXTFMT_INODE(x)	\ @@ -115,27 +33,6 @@ typedef enum {  #define ISUNWRITTEN(x)	((x)->br_state == XFS_EXT_UNWRITTEN)  /* - * Incore version of above. - */ -typedef struct xfs_bmbt_irec -{ -	xfs_fileoff_t	br_startoff;	/* starting file offset */ -	xfs_fsblock_t	br_startblock;	/* starting block number */ -	xfs_filblks_t	br_blockcount;	/* number of blocks */ -	xfs_exntst_t	br_state;	/* extent state */ -} xfs_bmbt_irec_t; - -/* - * Key structure for non-leaf levels of the tree. - */ -typedef struct xfs_bmbt_key { -	__be64		br_startoff;	/* starting file offset */ -} xfs_bmbt_key_t, xfs_bmdr_key_t; - -/* btree pointer type */ -typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; - -/*   * Btree block header size depends on a superblock flag.   */  #define XFS_BMBT_BLOCK_LEN(mp) \ @@ -233,7 +130,7 @@ extern void xfs_bmbt_to_bmdr(struct xfs_mount *, struct xfs_btree_block *, int,  			xfs_bmdr_block_t *, int);  extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level); -extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf); +extern int xfs_bmdr_maxrecs(int blocklen, int leaf);  extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);  extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip, @@ -243,6 +140,4 @@ extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,  extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,  		struct xfs_trans *, struct xfs_inode *, int); -extern const struct xfs_buf_ops xfs_bmbt_buf_ops; -  #endif	/* __XFS_BMAP_BTREE_H__ */ diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 97f952caea7..64731ef3324 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -18,31 +18,31 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h"  #include "xfs_inode.h"  #include "xfs_btree.h" +#include "xfs_trans.h"  #include "xfs_extfree_item.h"  #include "xfs_alloc.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h"  #include "xfs_rtalloc.h"  #include "xfs_error.h"  #include "xfs_quota.h"  #include "xfs_trans_space.h"  #include "xfs_trace.h"  #include "xfs_icache.h" +#include "xfs_log.h" +#include "xfs_dinode.h"  /* Kernel only BMAP related definitions and functions */ @@ -249,48 +249,6 @@ xfs_bmap_rtalloc(  }  /* - * Stack switching interfaces for allocation - */ -static void -xfs_bmapi_allocate_worker( -	struct work_struct	*work) -{ -	struct xfs_bmalloca	*args = container_of(work, -						struct xfs_bmalloca, work); -	unsigned long		pflags; - -	/* we are in a transaction context here */ -	current_set_flags_nested(&pflags, PF_FSTRANS); - -	args->result = __xfs_bmapi_allocate(args); -	complete(args->done); - -	current_restore_flags_nested(&pflags, PF_FSTRANS); -} - -/* - * Some allocation requests often come in with little stack to work on. Push - * them off to a worker thread so there is lots of stack to use. Otherwise just - * call directly to avoid the context switch overhead here. - */ -int -xfs_bmapi_allocate( -	struct xfs_bmalloca	*args) -{ -	DECLARE_COMPLETION_ONSTACK(done); - -	if (!args->stack_switch) -		return __xfs_bmapi_allocate(args); - - -	args->done = &done; -	INIT_WORK_ONSTACK(&args->work, xfs_bmapi_allocate_worker); -	queue_work(xfs_alloc_wq, &args->work); -	wait_for_completion(&done); -	return args->result; -} - -/*   * Check if the endoff is outside the last extent. If so the caller will grow   * the allocation to a stripe unit boundary.  All offsets are considered outside   * the end of file for an empty fork, so 1 is returned in *eof in that case. @@ -617,22 +575,27 @@ xfs_getbmap(  		return XFS_ERROR(ENOMEM);  	xfs_ilock(ip, XFS_IOLOCK_SHARED); -	if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) { -		if (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size) { +	if (whichfork == XFS_DATA_FORK) { +		if (!(iflags & BMV_IF_DELALLOC) && +		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {  			error = -filemap_write_and_wait(VFS_I(ip)->i_mapping);  			if (error)  				goto out_unlock_iolock; + +			/* +			 * Even after flushing the inode, there can still be +			 * delalloc blocks on the inode beyond EOF due to +			 * speculative preallocation.  These are not removed +			 * until the release function is called or the inode +			 * is inactivated.  Hence we cannot assert here that +			 * ip->i_delayed_blks == 0. +			 */  		} -		/* -		 * even after flushing the inode, there can still be delalloc -		 * blocks on the inode beyond EOF due to speculative -		 * preallocation. These are not removed until the release -		 * function is called or the inode is inactivated. Hence we -		 * cannot assert here that ip->i_delayed_blks == 0. -		 */ -	} -	lock = xfs_ilock_map_shared(ip); +		lock = xfs_ilock_data_map_shared(ip); +	} else { +		lock = xfs_ilock_attr_map_shared(ip); +	}  	/*  	 * Don't let nex be bigger than the number of extents @@ -737,7 +700,7 @@ xfs_getbmap(   out_free_map:  	kmem_free(map);   out_unlock_ilock: -	xfs_iunlock_map_shared(ip, lock); +	xfs_iunlock(ip, lock);   out_unlock_iolock:  	xfs_iunlock(ip, XFS_IOLOCK_SHARED); @@ -965,32 +928,12 @@ xfs_free_eofblocks(  	return error;  } -/* - * xfs_alloc_file_space() - *      This routine allocates disk space for the given file. - * - *	If alloc_type == 0, this request is for an ALLOCSP type - *	request which will change the file size.  In this case, no - *	DMAPI event will be generated by the call.  A TRUNCATE event - *	will be generated later by xfs_setattr. - * - *	If alloc_type != 0, this request is for a RESVSP type - *	request, and a DMAPI DM_EVENT_WRITE will be generated if the - *	lower block boundary byte address is less than the file's - *	length. - * - * RETURNS: - *       0 on success - *      errno on error - * - */ -STATIC int +int  xfs_alloc_file_space( -	xfs_inode_t		*ip, +	struct xfs_inode	*ip,  	xfs_off_t		offset,  	xfs_off_t		len, -	int			alloc_type, -	int			attr_flags) +	int			alloc_type)  {  	xfs_mount_t		*mp = ip->i_mount;  	xfs_off_t		count; @@ -1188,9 +1131,15 @@ xfs_zero_remaining_bytes(  	xfs_buf_unlock(bp);  	for (offset = startoff; offset <= endoff; offset = lastoffset + 1) { +		uint lock_mode; +  		offset_fsb = XFS_B_TO_FSBT(mp, offset);  		nimap = 1; + +		lock_mode = xfs_ilock_data_map_shared(ip);  		error = xfs_bmapi_read(ip, offset_fsb, 1, &imap, &nimap, 0); +		xfs_iunlock(ip, lock_mode); +  		if (error || nimap < 1)  			break;  		ASSERT(imap.br_blockcount >= 1); @@ -1207,7 +1156,12 @@ xfs_zero_remaining_bytes(  		XFS_BUF_UNWRITE(bp);  		XFS_BUF_READ(bp);  		XFS_BUF_SET_ADDR(bp, xfs_fsb_to_db(ip, imap.br_startblock)); -		xfsbdstrat(mp, bp); + +		if (XFS_FORCED_SHUTDOWN(mp)) { +			error = XFS_ERROR(EIO); +			break; +		} +		xfs_buf_iorequest(bp);  		error = xfs_buf_iowait(bp);  		if (error) {  			xfs_buf_ioerror_alert(bp, @@ -1220,7 +1174,12 @@ xfs_zero_remaining_bytes(  		XFS_BUF_UNDONE(bp);  		XFS_BUF_UNREAD(bp);  		XFS_BUF_WRITE(bp); -		xfsbdstrat(mp, bp); + +		if (XFS_FORCED_SHUTDOWN(mp)) { +			error = XFS_ERROR(EIO); +			break; +		} +		xfs_buf_iorequest(bp);  		error = xfs_buf_iowait(bp);  		if (error) {  			xfs_buf_ioerror_alert(bp, @@ -1232,24 +1191,11 @@ xfs_zero_remaining_bytes(  	return error;  } -/* - * xfs_free_file_space() - *      This routine frees disk space for the given file. - * - *	This routine is only called by xfs_change_file_space - *	for an UNRESVSP type call. - * - * RETURNS: - *       0 on success - *      errno on error - * - */ -STATIC int +int  xfs_free_file_space( -	xfs_inode_t		*ip, +	struct xfs_inode	*ip,  	xfs_off_t		offset, -	xfs_off_t		len, -	int			attr_flags) +	xfs_off_t		len)  {  	int			committed;  	int			done; @@ -1267,7 +1213,6 @@ xfs_free_file_space(  	int			rt;  	xfs_fileoff_t		startoffset_fsb;  	xfs_trans_t		*tp; -	int			need_iolock = 1;  	mp = ip->i_mount; @@ -1284,20 +1229,15 @@ xfs_free_file_space(  	startoffset_fsb	= XFS_B_TO_FSB(mp, offset);  	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len); -	if (attr_flags & XFS_ATTR_NOLOCK) -		need_iolock = 0; -	if (need_iolock) { -		xfs_ilock(ip, XFS_IOLOCK_EXCL); -		/* wait for the completion of any pending DIOs */ -		inode_dio_wait(VFS_I(ip)); -	} +	/* wait for the completion of any pending DIOs */ +	inode_dio_wait(VFS_I(ip));  	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);  	ioffset = offset & ~(rounding - 1);  	error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,  					      ioffset, -1);  	if (error) -		goto out_unlock_iolock; +		goto out;  	truncate_pagecache_range(VFS_I(ip), ioffset, -1);  	/* @@ -1311,7 +1251,7 @@ xfs_free_file_space(  		error = xfs_bmapi_read(ip, startoffset_fsb, 1,  					&imap, &nimap, 0);  		if (error) -			goto out_unlock_iolock; +			goto out;  		ASSERT(nimap == 0 || nimap == 1);  		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {  			xfs_daddr_t	block; @@ -1326,7 +1266,7 @@ xfs_free_file_space(  		error = xfs_bmapi_read(ip, endoffset_fsb - 1, 1,  					&imap, &nimap, 0);  		if (error) -			goto out_unlock_iolock; +			goto out;  		ASSERT(nimap == 0 || nimap == 1);  		if (nimap && imap.br_startblock != HOLESTARTBLOCK) {  			ASSERT(imap.br_startblock != DELAYSTARTBLOCK); @@ -1366,7 +1306,6 @@ xfs_free_file_space(  		 * the freeing of the space succeeds at ENOSPC.  		 */  		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT); -		tp->t_flags |= XFS_TRANS_RESERVE;  		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, resblks, 0);  		/* @@ -1412,27 +1351,23 @@ xfs_free_file_space(  		xfs_iunlock(ip, XFS_ILOCK_EXCL);  	} - out_unlock_iolock: -	if (need_iolock) -		xfs_iunlock(ip, XFS_IOLOCK_EXCL); + out:  	return error;   error0:  	xfs_bmap_cancel(&free_list);   error1:  	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); -	xfs_iunlock(ip, need_iolock ? (XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL) : -		    XFS_ILOCK_EXCL); -	return error; +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	goto out;  } -STATIC int +int  xfs_zero_file_space(  	struct xfs_inode	*ip,  	xfs_off_t		offset, -	xfs_off_t		len, -	int			attr_flags) +	xfs_off_t		len)  {  	struct xfs_mount	*mp = ip->i_mount;  	uint			granularity; @@ -1440,6 +1375,8 @@ xfs_zero_file_space(  	xfs_off_t		end_boundary;  	int			error; +	trace_xfs_zero_file_space(ip); +  	granularity = max_t(uint, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);  	/* @@ -1453,26 +1390,32 @@ xfs_zero_file_space(  	ASSERT(start_boundary >= offset);  	ASSERT(end_boundary <= offset + len); -	if (!(attr_flags & XFS_ATTR_NOLOCK)) -		xfs_ilock(ip, XFS_IOLOCK_EXCL); -  	if (start_boundary < end_boundary - 1) { -		/* punch out the page cache over the conversion range */ +		/* +		 * punch out delayed allocation blocks and the page cache over +		 * the conversion range +		 */ +		xfs_ilock(ip, XFS_ILOCK_EXCL); +		error = xfs_bmap_punch_delalloc_range(ip, +				XFS_B_TO_FSBT(mp, start_boundary), +				XFS_B_TO_FSB(mp, end_boundary - start_boundary)); +		xfs_iunlock(ip, XFS_ILOCK_EXCL);  		truncate_pagecache_range(VFS_I(ip), start_boundary,  					 end_boundary - 1); +  		/* convert the blocks */  		error = xfs_alloc_file_space(ip, start_boundary,  					end_boundary - start_boundary - 1, -					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT, -					attr_flags); +					XFS_BMAPI_PREALLOC | XFS_BMAPI_CONVERT);  		if (error) -			goto out_unlock; +			goto out;  		/* We've handled the interior of the range, now for the edges */ -		if (start_boundary != offset) +		if (start_boundary != offset) {  			error = xfs_iozero(ip, offset, start_boundary - offset); -		if (error) -			goto out_unlock; +			if (error) +				goto out; +		}  		if (end_boundary != offset + len)  			error = xfs_iozero(ip, end_boundary, @@ -1486,194 +1429,103 @@ xfs_zero_file_space(  		error = xfs_iozero(ip, offset, len);  	} -out_unlock: -	if (!(attr_flags & XFS_ATTR_NOLOCK)) -		xfs_iunlock(ip, XFS_IOLOCK_EXCL); +out:  	return error;  }  /* - * xfs_change_file_space() - *      This routine allocates or frees disk space for the given file. - *      The user specified parameters are checked for alignment and size - *      limitations. - * + * xfs_collapse_file_space() + *	This routine frees disk space and shift extent for the given file. + *	The first thing we do is to free data blocks in the specified range + *	by calling xfs_free_file_space(). It would also sync dirty data + *	and invalidate page cache over the region on which collapse range + *	is working. And Shift extent records to the left to cover a hole.   * RETURNS: - *       0 on success - *      errno on error + *	0 on success + *	errno on error   *   */  int -xfs_change_file_space( -	xfs_inode_t	*ip, -	int		cmd, -	xfs_flock64_t	*bf, -	xfs_off_t	offset, -	int		attr_flags) +xfs_collapse_file_space( +	struct xfs_inode	*ip, +	xfs_off_t		offset, +	xfs_off_t		len)  { -	xfs_mount_t	*mp = ip->i_mount; -	int		clrprealloc; -	int		error; -	xfs_fsize_t	fsize; -	int		setprealloc; -	xfs_off_t	startoffset; -	xfs_trans_t	*tp; -	struct iattr	iattr; - -	if (!S_ISREG(ip->i_d.di_mode)) -		return XFS_ERROR(EINVAL); - -	switch (bf->l_whence) { -	case 0: /*SEEK_SET*/ -		break; -	case 1: /*SEEK_CUR*/ -		bf->l_start += offset; -		break; -	case 2: /*SEEK_END*/ -		bf->l_start += XFS_ISIZE(ip); -		break; -	default: -		return XFS_ERROR(EINVAL); -	} - -	/* -	 * length of <= 0 for resv/unresv/zero is invalid.  length for -	 * alloc/free is ignored completely and we have no idea what userspace -	 * might have set it to, so set it to zero to allow range -	 * checks to pass. -	 */ -	switch (cmd) { -	case XFS_IOC_ZERO_RANGE: -	case XFS_IOC_RESVSP: -	case XFS_IOC_RESVSP64: -	case XFS_IOC_UNRESVSP: -	case XFS_IOC_UNRESVSP64: -		if (bf->l_len <= 0) -			return XFS_ERROR(EINVAL); -		break; -	default: -		bf->l_len = 0; -		break; -	} - -	if (bf->l_start < 0 || -	    bf->l_start > mp->m_super->s_maxbytes || -	    bf->l_start + bf->l_len < 0 || -	    bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) -		return XFS_ERROR(EINVAL); - -	bf->l_whence = 0; +	int			done = 0; +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_trans	*tp; +	int			error; +	xfs_extnum_t		current_ext = 0; +	struct xfs_bmap_free	free_list; +	xfs_fsblock_t		first_block; +	int			committed; +	xfs_fileoff_t		start_fsb; +	xfs_fileoff_t		shift_fsb; -	startoffset = bf->l_start; -	fsize = XFS_ISIZE(ip); +	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL)); -	setprealloc = clrprealloc = 0; -	switch (cmd) { -	case XFS_IOC_ZERO_RANGE: -		error = xfs_zero_file_space(ip, startoffset, bf->l_len, -						attr_flags); -		if (error) -			return error; -		setprealloc = 1; -		break; +	trace_xfs_collapse_file_space(ip); -	case XFS_IOC_RESVSP: -	case XFS_IOC_RESVSP64: -		error = xfs_alloc_file_space(ip, startoffset, bf->l_len, -						XFS_BMAPI_PREALLOC, attr_flags); -		if (error) -			return error; -		setprealloc = 1; -		break; +	start_fsb = XFS_B_TO_FSB(mp, offset + len); +	shift_fsb = XFS_B_TO_FSB(mp, len); -	case XFS_IOC_UNRESVSP: -	case XFS_IOC_UNRESVSP64: -		if ((error = xfs_free_file_space(ip, startoffset, bf->l_len, -								attr_flags))) -			return error; -		break; +	error = xfs_free_file_space(ip, offset, len); +	if (error) +		return error; -	case XFS_IOC_ALLOCSP: -	case XFS_IOC_ALLOCSP64: -	case XFS_IOC_FREESP: -	case XFS_IOC_FREESP64: +	while (!error && !done) { +		tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);  		/* -		 * These operations actually do IO when extending the file, but -		 * the allocation is done seperately to the zeroing that is -		 * done. This set of operations need to be serialised against -		 * other IO operations, such as truncate and buffered IO. We -		 * need to take the IOLOCK here to serialise the allocation and -		 * zeroing IO to prevent other IOLOCK holders (e.g. getbmap, -		 * truncate, direct IO) from racing against the transient -		 * allocated but not written state we can have here. +		 * We would need to reserve permanent block for transaction. +		 * This will come into picture when after shifting extent into +		 * hole we found that adjacent extents can be merged which +		 * may lead to freeing of a block during record update.  		 */ -		xfs_ilock(ip, XFS_IOLOCK_EXCL); -		if (startoffset > fsize) { -			error = xfs_alloc_file_space(ip, fsize, -					startoffset - fsize, 0, -					attr_flags | XFS_ATTR_NOLOCK); -			if (error) { -				xfs_iunlock(ip, XFS_IOLOCK_EXCL); -				break; -			} +		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_write, +				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0); +		if (error) { +			xfs_trans_cancel(tp, 0); +			break;  		} -		iattr.ia_valid = ATTR_SIZE; -		iattr.ia_size = startoffset; - -		error = xfs_setattr_size(ip, &iattr, -					 attr_flags | XFS_ATTR_NOLOCK); -		xfs_iunlock(ip, XFS_IOLOCK_EXCL); - +		xfs_ilock(ip, XFS_ILOCK_EXCL); +		error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, +				ip->i_gdquot, ip->i_pdquot, +				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, +				XFS_QMOPT_RES_REGBLKS);  		if (error) -			return error; - -		clrprealloc = 1; -		break; - -	default: -		ASSERT(0); -		return XFS_ERROR(EINVAL); -	} - -	/* -	 * update the inode timestamp, mode, and prealloc flag bits -	 */ -	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); -	if (error) { -		xfs_trans_cancel(tp, 0); -		return error; -	} +			goto out; -	xfs_ilock(ip, XFS_ILOCK_EXCL); -	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +		xfs_trans_ijoin(tp, ip, 0); -	if ((attr_flags & XFS_ATTR_DMI) == 0) { -		ip->i_d.di_mode &= ~S_ISUID; +		xfs_bmap_init(&free_list, &first_block);  		/* -		 * Note that we don't have to worry about mandatory -		 * file locking being disabled here because we only -		 * clear the S_ISGID bit if the Group execute bit is -		 * on, but if it was on then mandatory locking wouldn't -		 * have been enabled. +		 * We are using the write transaction in which max 2 bmbt +		 * updates are allowed  		 */ -		if (ip->i_d.di_mode & S_IXGRP) -			ip->i_d.di_mode &= ~S_ISGID; +		error = xfs_bmap_shift_extents(tp, ip, &done, start_fsb, +					       shift_fsb, ¤t_ext, +					       &first_block, &free_list, +					       XFS_BMAP_MAX_SHIFT_EXTENTS); +		if (error) +			goto out; + +		error = xfs_bmap_finish(&tp, &free_list, &committed); +		if (error) +			goto out; -		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); +		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +		xfs_iunlock(ip, XFS_ILOCK_EXCL);  	} -	if (setprealloc) -		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; -	else if (clrprealloc) -		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; -	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -	if (attr_flags & XFS_ATTR_SYNC) -		xfs_trans_set_sync(tp); -	return xfs_trans_commit(tp, 0); +	return error; + +out: +	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	return error;  }  /* diff --git a/fs/xfs/xfs_bmap_util.h b/fs/xfs/xfs_bmap_util.h index 061260946f7..2fdb72d2c90 100644 --- a/fs/xfs/xfs_bmap_util.h +++ b/fs/xfs/xfs_bmap_util.h @@ -50,12 +50,11 @@ struct xfs_bmalloca {  	xfs_extlen_t		total;	/* total blocks needed for xaction */  	xfs_extlen_t		minlen;	/* minimum allocation size (blocks) */  	xfs_extlen_t		minleft; /* amount must be left after alloc */ -	char			eof;	/* set if allocating past last extent */ -	char			wasdel;	/* replacing a delayed allocation */ -	char			userdata;/* set if is user data */ -	char			aeof;	/* allocated space at eof */ -	char			conv;	/* overwriting unwritten extents */ -	char			stack_switch; +	bool			eof;	/* set if allocating past last extent */ +	bool			wasdel;	/* replacing a delayed allocation */ +	bool			userdata;/* set if is user data */ +	bool			aeof;	/* allocated space at eof */ +	bool			conv;	/* overwriting unwritten extents */  	int			flags;  	struct completion	*done;  	struct work_struct	work; @@ -65,8 +64,6 @@ struct xfs_bmalloca {  int	xfs_bmap_finish(struct xfs_trans **tp, struct xfs_bmap_free *flist,  			int *committed);  int	xfs_bmap_rtalloc(struct xfs_bmalloca *ap); -int	xfs_bmapi_allocate(struct xfs_bmalloca *args); -int	__xfs_bmapi_allocate(struct xfs_bmalloca *args);  int	xfs_bmap_eof(struct xfs_inode *ip, xfs_fileoff_t endoff,  		     int whichfork, int *eof);  int	xfs_bmap_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip, @@ -93,9 +90,14 @@ int	xfs_bmap_last_extent(struct xfs_trans *tp, struct xfs_inode *ip,  			     int *is_empty);  /* preallocation and hole punch interface */ -int	xfs_change_file_space(struct xfs_inode *ip, int cmd, -			      xfs_flock64_t *bf, xfs_off_t offset, -			      int attr_flags); +int	xfs_alloc_file_space(struct xfs_inode *ip, xfs_off_t offset, +			     xfs_off_t len, int alloc_type); +int	xfs_free_file_space(struct xfs_inode *ip, xfs_off_t offset, +			    xfs_off_t len); +int	xfs_zero_file_space(struct xfs_inode *ip, xfs_off_t offset, +			    xfs_off_t len); +int	xfs_collapse_file_space(struct xfs_inode *, xfs_off_t offset, +				xfs_off_t len);  /* EOF block manipulation functions */  bool	xfs_can_free_eofblocks(struct xfs_inode *ip, bool force); diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 5690e102243..cf893bc1e37 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -17,24 +17,23 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_buf_item.h"  #include "xfs_btree.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_cksum.h" +#include "xfs_alloc.h"  /*   * Cursor allocation zone. @@ -45,9 +44,10 @@ kmem_zone_t	*xfs_btree_cur_zone;   * Btree magic numbers.   */  static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { -	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, +	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, +	  XFS_FIBT_MAGIC },  	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, -	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } +	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }  };  #define xfs_btree_magic(cur) \  	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] @@ -236,8 +236,7 @@ xfs_btree_lblock_calc_crc(  		return;  	if (bip)  		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 XFS_BTREE_LBLOCK_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);  }  bool @@ -245,8 +244,8 @@ xfs_btree_lblock_verify_crc(  	struct xfs_buf		*bp)  {  	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) -		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					XFS_BTREE_LBLOCK_CRC_OFF); +		return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); +  	return true;  } @@ -269,8 +268,7 @@ xfs_btree_sblock_calc_crc(  		return;  	if (bip)  		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 XFS_BTREE_SBLOCK_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);  }  bool @@ -278,8 +276,8 @@ xfs_btree_sblock_verify_crc(  	struct xfs_buf		*bp)  {  	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) -		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					XFS_BTREE_SBLOCK_CRC_OFF); +		return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); +  	return true;  } @@ -556,14 +554,11 @@ xfs_btree_get_bufl(  	xfs_fsblock_t	fsbno,		/* file system block number */  	uint		lock)		/* lock flags for get_buf */  { -	xfs_buf_t	*bp;		/* buffer pointer (return value) */  	xfs_daddr_t		d;		/* real disk block address */  	ASSERT(fsbno != NULLFSBLOCK);  	d = XFS_FSB_TO_DADDR(mp, fsbno); -	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -	ASSERT(!xfs_buf_geterror(bp)); -	return bp; +	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);  }  /* @@ -578,15 +573,12 @@ xfs_btree_get_bufs(  	xfs_agblock_t	agbno,		/* allocation group block number */  	uint		lock)		/* lock flags for get_buf */  { -	xfs_buf_t	*bp;		/* buffer pointer (return value) */  	xfs_daddr_t		d;		/* real disk block address */  	ASSERT(agno != NULLAGNUMBER);  	ASSERT(agbno != NULLAGBLOCK);  	d = XFS_AGB_TO_DADDR(mp, agno, agbno); -	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -	ASSERT(!xfs_buf_geterror(bp)); -	return bp; +	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);  }  /* @@ -726,7 +718,6 @@ xfs_btree_read_bufl(  				   mp->m_bsize, lock, &bp, ops);  	if (error)  		return error; -	ASSERT(!xfs_buf_geterror(bp));  	if (bp)  		xfs_buf_set_ref(bp, refval);  	*bpp = bp; @@ -1119,6 +1110,7 @@ xfs_btree_set_refs(  		xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);  		break;  	case XFS_BTNUM_INO: +	case XFS_BTNUM_FINO:  		xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);  		break;  	case XFS_BTNUM_BMAP: @@ -1163,7 +1155,6 @@ STATIC int  xfs_btree_read_buf_block(  	struct xfs_btree_cur	*cur,  	union xfs_btree_ptr	*ptr, -	int			level,  	int			flags,  	struct xfs_btree_block	**block,  	struct xfs_buf		**bpp) @@ -1182,7 +1173,6 @@ xfs_btree_read_buf_block(  	if (error)  		return error; -	ASSERT(!xfs_buf_geterror(*bpp));  	xfs_btree_set_refs(cur, *bpp);  	*block = XFS_BUF_TO_BLOCK(*bpp);  	return 0; @@ -1521,8 +1511,8 @@ xfs_btree_increment(  		union xfs_btree_ptr	*ptrp;  		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); -		error = xfs_btree_read_buf_block(cur, ptrp, --lev, -							0, &block, &bp); +		--lev; +		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);  		if (error)  			goto error0; @@ -1620,8 +1610,8 @@ xfs_btree_decrement(  		union xfs_btree_ptr	*ptrp;  		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); -		error = xfs_btree_read_buf_block(cur, ptrp, --lev, -							0, &block, &bp); +		--lev; +		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);  		if (error)  			goto error0;  		xfs_btree_setbuf(cur, lev, bp); @@ -1671,7 +1661,7 @@ xfs_btree_lookup_get_block(  		return 0;  	} -	error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); +	error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);  	if (error)  		return error; @@ -2022,7 +2012,7 @@ xfs_btree_lshift(  		goto out0;  	/* Set up the left neighbor as "left". */ -	error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); +	error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  	if (error)  		goto error0; @@ -2206,7 +2196,7 @@ xfs_btree_rshift(  		goto out0;  	/* Set up the right neighbor as "right". */ -	error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); +	error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  	if (error)  		goto error0; @@ -2334,7 +2324,7 @@ error1:   * record (to be inserted into parent).   */  STATIC int					/* error */ -xfs_btree_split( +__xfs_btree_split(  	struct xfs_btree_cur	*cur,  	int			level,  	union xfs_btree_ptr	*ptrp, @@ -2376,7 +2366,7 @@ xfs_btree_split(  	xfs_btree_buf_to_ptr(cur, lbp, &lptr);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) @@ -2474,7 +2464,7 @@ xfs_btree_split(  	 * point back to right instead of to left.  	 */  	if (!xfs_btree_ptr_is_null(cur, &rrptr)) { -		error = xfs_btree_read_buf_block(cur, &rrptr, level, +		error = xfs_btree_read_buf_block(cur, &rrptr,  							0, &rrblock, &rrbp);  		if (error)  			goto error0; @@ -2514,6 +2504,85 @@ error0:  	return error;  } +struct xfs_btree_split_args { +	struct xfs_btree_cur	*cur; +	int			level; +	union xfs_btree_ptr	*ptrp; +	union xfs_btree_key	*key; +	struct xfs_btree_cur	**curp; +	int			*stat;		/* success/failure */ +	int			result; +	bool			kswapd;	/* allocation in kswapd context */ +	struct completion	*done; +	struct work_struct	work; +}; + +/* + * Stack switching interfaces for allocation + */ +static void +xfs_btree_split_worker( +	struct work_struct	*work) +{ +	struct xfs_btree_split_args	*args = container_of(work, +						struct xfs_btree_split_args, work); +	unsigned long		pflags; +	unsigned long		new_pflags = PF_FSTRANS; + +	/* +	 * we are in a transaction context here, but may also be doing work +	 * in kswapd context, and hence we may need to inherit that state +	 * temporarily to ensure that we don't block waiting for memory reclaim +	 * in any way. +	 */ +	if (args->kswapd) +		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + +	current_set_flags_nested(&pflags, new_pflags); + +	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, +					 args->key, args->curp, args->stat); +	complete(args->done); + +	current_restore_flags_nested(&pflags, new_pflags); +} + +/* + * BMBT split requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. For the other + * btree types, just call directly to avoid the context switch overhead here. + */ +STATIC int					/* error */ +xfs_btree_split( +	struct xfs_btree_cur	*cur, +	int			level, +	union xfs_btree_ptr	*ptrp, +	union xfs_btree_key	*key, +	struct xfs_btree_cur	**curp, +	int			*stat)		/* success/failure */ +{ +	struct xfs_btree_split_args	args; +	DECLARE_COMPLETION_ONSTACK(done); + +	if (cur->bc_btnum != XFS_BTNUM_BMAP) +		return __xfs_btree_split(cur, level, ptrp, key, curp, stat); + +	args.cur = cur; +	args.level = level; +	args.ptrp = ptrp; +	args.key = key; +	args.curp = curp; +	args.stat = stat; +	args.done = &done; +	args.kswapd = current_is_kswapd(); +	INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); +	queue_work(xfs_alloc_wq, &args.work); +	wait_for_completion(&done); +	destroy_work_on_stack(&args.work); +	return args.result; +} + +  /*   * Copy the old inode root contents into a real block and make the   * broot point to it. @@ -2549,7 +2618,7 @@ xfs_btree_new_iroot(  	pp = xfs_btree_ptr_addr(cur, 1, block);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) { @@ -2653,7 +2722,7 @@ xfs_btree_new_root(  	cur->bc_ops->init_ptr_from_cur(cur, &rptr);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) @@ -2688,8 +2757,7 @@ xfs_btree_new_root(  		lbp = bp;  		xfs_btree_buf_to_ptr(cur, lbp, &lptr);  		left = block; -		error = xfs_btree_read_buf_block(cur, &rptr, -					cur->bc_nlevels - 1, 0, &right, &rbp); +		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  		if (error)  			goto error0;  		bp = rbp; @@ -2700,8 +2768,7 @@ xfs_btree_new_root(  		xfs_btree_buf_to_ptr(cur, rbp, &rptr);  		right = block;  		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); -		error = xfs_btree_read_buf_block(cur, &lptr, -					cur->bc_nlevels - 1, 0, &left, &lbp); +		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  		if (error)  			goto error0;  		bp = lbp; @@ -3653,8 +3720,7 @@ xfs_btree_delrec(  		rptr = cptr;  		right = block;  		rbp = bp; -		error = xfs_btree_read_buf_block(cur, &lptr, level, -							0, &left, &lbp); +		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  		if (error)  			goto error0; @@ -3671,8 +3737,7 @@ xfs_btree_delrec(  		lptr = cptr;  		left = block;  		lbp = bp; -		error = xfs_btree_read_buf_block(cur, &rptr, level, -							0, &right, &rbp); +		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  		if (error)  			goto error0; @@ -3744,8 +3809,7 @@ xfs_btree_delrec(  	/* If there is a right sibling, point it to the remaining block. */  	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);  	if (!xfs_btree_ptr_is_null(cur, &cptr)) { -		error = xfs_btree_read_buf_block(cur, &cptr, level, -							0, &rrblock, &rrbp); +		error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);  		if (error)  			goto error0;  		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); diff --git a/fs/xfs/xfs_btree.h b/fs/xfs/xfs_btree.h index 06729b67ad5..a04b69422f6 100644 --- a/fs/xfs/xfs_btree.h +++ b/fs/xfs/xfs_btree.h @@ -27,73 +27,6 @@ struct xfs_trans;  extern kmem_zone_t	*xfs_btree_cur_zone;  /* - * This nonsense is to make -wlint happy. - */ -#define	XFS_LOOKUP_EQ	((xfs_lookup_t)XFS_LOOKUP_EQi) -#define	XFS_LOOKUP_LE	((xfs_lookup_t)XFS_LOOKUP_LEi) -#define	XFS_LOOKUP_GE	((xfs_lookup_t)XFS_LOOKUP_GEi) - -#define	XFS_BTNUM_BNO	((xfs_btnum_t)XFS_BTNUM_BNOi) -#define	XFS_BTNUM_CNT	((xfs_btnum_t)XFS_BTNUM_CNTi) -#define	XFS_BTNUM_BMAP	((xfs_btnum_t)XFS_BTNUM_BMAPi) -#define	XFS_BTNUM_INO	((xfs_btnum_t)XFS_BTNUM_INOi) - -/* - * Generic btree header. - * - * This is a combination of the actual format used on disk for short and long - * format btrees.  The first three fields are shared by both format, but the - * pointers are different and should be used with care. - * - * To get the size of the actual short or long form headers please use the size - * macros below.  Never use sizeof(xfs_btree_block). - * - * The blkno, crc, lsn, owner and uuid fields are only available in filesystems - * with the crc feature bit, and all accesses to them must be conditional on - * that flag. - */ -struct xfs_btree_block { -	__be32		bb_magic;	/* magic number for block type */ -	__be16		bb_level;	/* 0 is a leaf */ -	__be16		bb_numrecs;	/* current # of data records */ -	union { -		struct { -			__be32		bb_leftsib; -			__be32		bb_rightsib; - -			__be64		bb_blkno; -			__be64		bb_lsn; -			uuid_t		bb_uuid; -			__be32		bb_owner; -			__le32		bb_crc; -		} s;			/* short form pointers */ -		struct	{ -			__be64		bb_leftsib; -			__be64		bb_rightsib; - -			__be64		bb_blkno; -			__be64		bb_lsn; -			uuid_t		bb_uuid; -			__be64		bb_owner; -			__le32		bb_crc; -			__be32		bb_pad; /* padding for alignment */ -		} l;			/* long form pointers */ -	} bb_u;				/* rest */ -}; - -#define XFS_BTREE_SBLOCK_LEN	16	/* size of a short form block */ -#define XFS_BTREE_LBLOCK_LEN	24	/* size of a long form block */ - -/* sizes of CRC enabled btree blocks */ -#define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40) -#define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48) - -#define XFS_BTREE_SBLOCK_CRC_OFF \ -	offsetof(struct xfs_btree_block, bb_u.s.bb_crc) -#define XFS_BTREE_LBLOCK_CRC_OFF \ -	offsetof(struct xfs_btree_block, bb_u.l.bb_crc) - -/*   * Generic key, ptr and record wrapper structures.   *   * These are disk format structures, and are converted where necessary @@ -119,6 +52,19 @@ union xfs_btree_rec {  };  /* + * This nonsense is to make -wlint happy. + */ +#define	XFS_LOOKUP_EQ	((xfs_lookup_t)XFS_LOOKUP_EQi) +#define	XFS_LOOKUP_LE	((xfs_lookup_t)XFS_LOOKUP_LEi) +#define	XFS_LOOKUP_GE	((xfs_lookup_t)XFS_LOOKUP_GEi) + +#define	XFS_BTNUM_BNO	((xfs_btnum_t)XFS_BTNUM_BNOi) +#define	XFS_BTNUM_CNT	((xfs_btnum_t)XFS_BTNUM_CNTi) +#define	XFS_BTNUM_BMAP	((xfs_btnum_t)XFS_BTNUM_BMAPi) +#define	XFS_BTNUM_INO	((xfs_btnum_t)XFS_BTNUM_INOi) +#define	XFS_BTNUM_FINO	((xfs_btnum_t)XFS_BTNUM_FINOi) + +/*   * For logging record fields.   */  #define	XFS_BB_MAGIC		(1 << 0) @@ -147,6 +93,7 @@ do {    \  	case XFS_BTNUM_CNT: __XFS_BTREE_STATS_INC(abtc, stat); break;	\  	case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(bmbt, stat); break;	\  	case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(ibt, stat); break;	\ +	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(fibt, stat); break;	\  	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\  	}       \  } while (0) @@ -160,6 +107,7 @@ do {    \  	case XFS_BTNUM_CNT: __XFS_BTREE_STATS_ADD(abtc, stat, val); break; \  	case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_ADD(bmbt, stat, val); break; \  	case XFS_BTNUM_INO: __XFS_BTREE_STATS_ADD(ibt, stat, val); break; \ +	case XFS_BTNUM_FINO: __XFS_BTREE_STATS_ADD(fibt, stat, val); break; \  	case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break;	\  	}       \  } while (0) @@ -184,7 +132,7 @@ struct xfs_btree_ops {  	int	(*alloc_block)(struct xfs_btree_cur *cur,  			       union xfs_btree_ptr *start_bno,  			       union xfs_btree_ptr *new_bno, -			       int length, int *stat); +			       int *stat);  	int	(*free_block)(struct xfs_btree_cur *cur, struct xfs_buf *bp);  	/* update last record information */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 263470075ea..7a34a1ae655 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -34,12 +34,13 @@  #include <linux/backing-dev.h>  #include <linux/freezer.h> -#include "xfs_sb.h" +#include "xfs_log_format.h"  #include "xfs_trans_resv.h" -#include "xfs_log.h" +#include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h"  #include "xfs_trace.h" +#include "xfs_log.h"  static kmem_zone_t *xfs_buf_zone; @@ -215,8 +216,7 @@ _xfs_buf_alloc(  STATIC int  _xfs_buf_get_pages(  	xfs_buf_t		*bp, -	int			page_count, -	xfs_buf_flags_t		flags) +	int			page_count)  {  	/* Make sure that we have a page list */  	if (bp->b_pages == NULL) { @@ -329,7 +329,7 @@ use_alloc_page:  	end = (BBTOB(bp->b_maps[0].bm_bn + bp->b_length) + PAGE_SIZE - 1)  								>> PAGE_SHIFT;  	page_count = end - start; -	error = _xfs_buf_get_pages(bp, page_count, flags); +	error = _xfs_buf_get_pages(bp, page_count);  	if (unlikely(error))  		return error; @@ -395,7 +395,17 @@ _xfs_buf_map_pages(  		bp->b_addr = NULL;  	} else {  		int retried = 0; +		unsigned noio_flag; +		/* +		 * vm_map_ram() will allocate auxillary structures (e.g. +		 * pagetables) with GFP_KERNEL, yet we are likely to be under +		 * GFP_NOFS context here. Hence we need to tell memory reclaim +		 * that we are in such a context via PF_MEMALLOC_NOIO to prevent +		 * memory reclaim re-entering the filesystem here and +		 * potentially deadlocking. +		 */ +		noio_flag = memalloc_noio_save();  		do {  			bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count,  						-1, PAGE_KERNEL); @@ -403,6 +413,7 @@ _xfs_buf_map_pages(  				break;  			vm_unmap_aliases();  		} while (retried++ <= 1); +		memalloc_noio_restore(noio_flag);  		if (!bp->b_addr)  			return -ENOMEM; @@ -444,8 +455,8 @@ _xfs_buf_find(  	numbytes = BBTOB(numblks);  	/* Check for IOs smaller than the sector size / not sector aligned */ -	ASSERT(!(numbytes < (1 << btp->bt_sshift))); -	ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); +	ASSERT(!(numbytes < btp->bt_meta_sectorsize)); +	ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_meta_sectormask));  	/*  	 * Corrupted block numbers can get through to here, unfortunately, so we @@ -590,7 +601,7 @@ found:  		error = _xfs_buf_map_pages(bp, flags);  		if (unlikely(error)) {  			xfs_warn(target->bt_mount, -				"%s: failed to map pages\n", __func__); +				"%s: failed to map pagesn", __func__);  			xfs_buf_relse(bp);  			return NULL;  		} @@ -697,7 +708,11 @@ xfs_buf_read_uncached(  	bp->b_flags |= XBF_READ;  	bp->b_ops = ops; -	xfsbdstrat(target->bt_mount, bp); +	if (XFS_FORCED_SHUTDOWN(target->bt_mount)) { +		xfs_buf_relse(bp); +		return NULL; +	} +	xfs_buf_iorequest(bp);  	xfs_buf_iowait(bp);  	return bp;  } @@ -762,7 +777,7 @@ xfs_buf_associate_memory(  	bp->b_pages = NULL;  	bp->b_addr = mem; -	rval = _xfs_buf_get_pages(bp, page_count, 0); +	rval = _xfs_buf_get_pages(bp, page_count);  	if (rval)  		return rval; @@ -795,7 +810,7 @@ xfs_buf_get_uncached(  		goto fail;  	page_count = PAGE_ALIGN(numblks << BBSHIFT) >> PAGE_SHIFT; -	error = _xfs_buf_get_pages(bp, page_count, 0); +	error = _xfs_buf_get_pages(bp, page_count);  	if (error)  		goto fail_free_buf; @@ -809,7 +824,7 @@ xfs_buf_get_uncached(  	error = _xfs_buf_map_pages(bp, 0);  	if (unlikely(error)) {  		xfs_warn(target->bt_mount, -			"%s: failed to map pages\n", __func__); +			"%s: failed to map pages", __func__);  		goto fail_free_mem;  	} @@ -1088,7 +1103,7 @@ xfs_bioerror(   * This is meant for userdata errors; metadata bufs come with   * iodone functions attached, so that we can track down errors.   */ -STATIC int +int  xfs_bioerror_relse(  	struct xfs_buf	*bp)  { @@ -1151,7 +1166,7 @@ xfs_bwrite(  	ASSERT(xfs_buf_islocked(bp));  	bp->b_flags |= XBF_WRITE; -	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q); +	bp->b_flags &= ~(XBF_ASYNC | XBF_READ | _XBF_DELWRI_Q | XBF_WRITE_FAIL);  	xfs_bdstrat_cb(bp); @@ -1163,25 +1178,6 @@ xfs_bwrite(  	return error;  } -/* - * Wrapper around bdstrat so that we can stop data from going to disk in case - * we are shutting down the filesystem.  Typically user data goes thru this - * path; one of the exceptions is the superblock. - */ -void -xfsbdstrat( -	struct xfs_mount	*mp, -	struct xfs_buf		*bp) -{ -	if (XFS_FORCED_SHUTDOWN(mp)) { -		trace_xfs_bdstrat_shut(bp, _RET_IP_); -		xfs_bioerror_relse(bp); -		return; -	} - -	xfs_buf_iorequest(bp); -} -  STATIC void  _xfs_buf_ioend(  	xfs_buf_t		*bp, @@ -1254,7 +1250,7 @@ next_chunk:  	bio = bio_alloc(GFP_NOIO, nr_pages);  	bio->bi_bdev = bp->b_target->bt_bdev; -	bio->bi_sector = sector; +	bio->bi_iter.bi_sector = sector;  	bio->bi_end_io = xfs_buf_bio_end_io;  	bio->bi_private = bp; @@ -1276,7 +1272,7 @@ next_chunk:  		total_nr_pages--;  	} -	if (likely(bio->bi_size)) { +	if (likely(bio->bi_iter.bi_size)) {  		if (xfs_buf_is_vmapped(bp)) {  			flush_kernel_vmap_range(bp->b_addr,  						xfs_buf_vmap_len(bp)); @@ -1375,21 +1371,29 @@ xfs_buf_iorequest(  		xfs_buf_wait_unpin(bp);  	xfs_buf_hold(bp); -	/* Set the count to 1 initially, this will stop an I/O +	/* +	 * Set the count to 1 initially, this will stop an I/O  	 * completion callout which happens before we have started  	 * all the I/O from calling xfs_buf_ioend too early.  	 */  	atomic_set(&bp->b_io_remaining, 1);  	_xfs_buf_ioapply(bp); -	_xfs_buf_ioend(bp, 1); +	/* +	 * If _xfs_buf_ioapply failed, we'll get back here with +	 * only the reference we took above.  _xfs_buf_ioend will +	 * drop it to zero, so we'd better not queue it for later, +	 * or we'll free it before it's done. +	 */ +	_xfs_buf_ioend(bp, bp->b_error ? 0 : 1);  	xfs_buf_rele(bp);  }  /*   * Waits for I/O to complete on the buffer supplied.  It returns immediately if - * no I/O is pending or there is already a pending error on the buffer.  It - * returns the I/O error code, if any, or 0 if there was no error. + * no I/O is pending or there is already a pending error on the buffer, in which + * case nothing will ever complete.  It returns the I/O error code, if any, or + * 0 if there was no error.   */  int  xfs_buf_iowait( @@ -1515,6 +1519,12 @@ xfs_wait_buftarg(  			struct xfs_buf *bp;  			bp = list_first_entry(&dispose, struct xfs_buf, b_lru);  			list_del_init(&bp->b_lru); +			if (bp->b_flags & XBF_WRITE_FAIL) { +				xfs_alert(btp->bt_mount, +"Corruption Alert: Buffer at block 0x%llx had permanent write failures!\n" +"Please run xfs_repair to determine the extent of the problem.", +					(long long)bp->b_bn); +			}  			xfs_buf_rele(bp);  		}  		if (loop++ != 0) @@ -1601,16 +1611,14 @@ xfs_free_buftarg(  	kmem_free(btp);  } -STATIC int -xfs_setsize_buftarg_flags( +int +xfs_setsize_buftarg(  	xfs_buftarg_t		*btp, -	unsigned int		blocksize, -	unsigned int		sectorsize, -	int			verbose) +	unsigned int		sectorsize)  { -	btp->bt_bsize = blocksize; -	btp->bt_sshift = ffs(sectorsize) - 1; -	btp->bt_smask = sectorsize - 1; +	/* Set up metadata sector size info */ +	btp->bt_meta_sectorsize = sectorsize; +	btp->bt_meta_sectormask = sectorsize - 1;  	if (set_blocksize(btp->bt_bdev, sectorsize)) {  		char name[BDEVNAME_SIZE]; @@ -1618,43 +1626,35 @@ xfs_setsize_buftarg_flags(  		bdevname(btp->bt_bdev, name);  		xfs_warn(btp->bt_mount, -			"Cannot set_blocksize to %u on device %s\n", +			"Cannot set_blocksize to %u on device %s",  			sectorsize, name);  		return EINVAL;  	} +	/* Set up device logical sector size mask */ +	btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev); +	btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1; +  	return 0;  }  /* - *	When allocating the initial buffer target we have not yet - *	read in the superblock, so don't know what sized sectors - *	are being used at this early stage.  Play safe. + * When allocating the initial buffer target we have not yet + * read in the superblock, so don't know what sized sectors + * are being used at this early stage.  Play safe.   */  STATIC int  xfs_setsize_buftarg_early(  	xfs_buftarg_t		*btp,  	struct block_device	*bdev)  { -	return xfs_setsize_buftarg_flags(btp, -			PAGE_SIZE, bdev_logical_block_size(bdev), 0); -} - -int -xfs_setsize_buftarg( -	xfs_buftarg_t		*btp, -	unsigned int		blocksize, -	unsigned int		sectorsize) -{ -	return xfs_setsize_buftarg_flags(btp, blocksize, sectorsize, 1); +	return xfs_setsize_buftarg(btp, bdev_logical_block_size(bdev));  }  xfs_buftarg_t *  xfs_alloc_buftarg(  	struct xfs_mount	*mp, -	struct block_device	*bdev, -	int			external, -	const char		*fsname) +	struct block_device	*bdev)  {  	xfs_buftarg_t		*btp; @@ -1798,7 +1798,7 @@ __xfs_buf_delwri_submit(  	blk_start_plug(&plug);  	list_for_each_entry_safe(bp, n, io_list, b_list) { -		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC); +		bp->b_flags &= ~(_XBF_DELWRI_Q | XBF_ASYNC | XBF_WRITE_FAIL);  		bp->b_flags |= XBF_WRITE;  		if (!wait) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index e6568336101..3a7a5523d3d 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -45,6 +45,7 @@ typedef enum {  #define XBF_ASYNC	 (1 << 4) /* initiator will not wait for completion */  #define XBF_DONE	 (1 << 5) /* all pages in the buffer uptodate */  #define XBF_STALE	 (1 << 6) /* buffer has been staled, do not find it */ +#define XBF_WRITE_FAIL	 (1 << 24)/* async writes have failed on this buffer */  /* I/O hints for the BIO layer */  #define XBF_SYNCIO	 (1 << 10)/* treat this buffer as synchronous I/O */ @@ -70,6 +71,7 @@ typedef unsigned int xfs_buf_flags_t;  	{ XBF_ASYNC,		"ASYNC" }, \  	{ XBF_DONE,		"DONE" }, \  	{ XBF_STALE,		"STALE" }, \ +	{ XBF_WRITE_FAIL,	"WRITE_FAIL" }, \  	{ XBF_SYNCIO,		"SYNCIO" }, \  	{ XBF_FUA,		"FUA" }, \  	{ XBF_FLUSH,		"FLUSH" }, \ @@ -80,19 +82,34 @@ typedef unsigned int xfs_buf_flags_t;  	{ _XBF_DELWRI_Q,	"DELWRI_Q" }, \  	{ _XBF_COMPOUND,	"COMPOUND" } +  /*   * Internal state flags.   */  #define XFS_BSTATE_DISPOSE	 (1 << 0)	/* buffer being discarded */ +/* + * The xfs_buftarg contains 2 notions of "sector size" - + * + * 1) The metadata sector size, which is the minimum unit and + *    alignment of IO which will be performed by metadata operations. + * 2) The device logical sector size + * + * The first is specified at mkfs time, and is stored on-disk in the + * superblock's sb_sectsize. + * + * The latter is derived from the underlying device, and controls direct IO + * alignment constraints. + */  typedef struct xfs_buftarg {  	dev_t			bt_dev;  	struct block_device	*bt_bdev;  	struct backing_dev_info	*bt_bdi;  	struct xfs_mount	*bt_mount; -	unsigned int		bt_bsize; -	unsigned int		bt_sshift; -	size_t			bt_smask; +	unsigned int		bt_meta_sectorsize; +	size_t			bt_meta_sectormask; +	size_t			bt_logical_sectorsize; +	size_t			bt_logical_sectormask;  	/* LRU control structures */  	struct shrinker		bt_shrinker; @@ -269,9 +286,6 @@ extern void xfs_buf_unlock(xfs_buf_t *);  /* Buffer Read and Write Routines */  extern int xfs_bwrite(struct xfs_buf *bp); - -extern void xfsbdstrat(struct xfs_mount *, struct xfs_buf *); -  extern void xfs_buf_ioend(xfs_buf_t *,	int);  extern void xfs_buf_ioerror(xfs_buf_t *, int);  extern void xfs_buf_ioerror_alert(struct xfs_buf *, const char *func); @@ -282,10 +296,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,  #define xfs_buf_zero(bp, off, len) \  	    xfs_buf_iomove((bp), (off), (len), NULL, XBRW_ZERO) -static inline int xfs_buf_geterror(xfs_buf_t *bp) -{ -	return bp ? bp->b_error : ENOMEM; -} +extern int xfs_bioerror_relse(struct xfs_buf *);  /* Buffer Utility Routines */  extern xfs_caddr_t xfs_buf_offset(xfs_buf_t *, size_t); @@ -301,7 +312,8 @@ extern void xfs_buf_terminate(void);  #define XFS_BUF_ZEROFLAGS(bp) \  	((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \ -			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH)) +			    XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \ +			    XBF_WRITE_FAIL))  void xfs_buf_stale(struct xfs_buf *bp);  #define XFS_BUF_UNSTALE(bp)	((bp)->b_flags &= ~XBF_STALE) @@ -352,14 +364,28 @@ static inline void xfs_buf_relse(xfs_buf_t *bp)  	xfs_buf_rele(bp);  } +static inline int +xfs_buf_verify_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ +	return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), +				cksum_offset); +} + +static inline void +xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset) +{ +	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), +			 cksum_offset); +} +  /*   *	Handling of buftargs.   */  extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, -			struct block_device *, int, const char *); +			struct block_device *);  extern void xfs_free_buftarg(struct xfs_mount *, struct xfs_buftarg *);  extern void xfs_wait_buftarg(xfs_buftarg_t *); -extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int, unsigned int); +extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int);  #define xfs_getsize_buftarg(buftarg)	block_size((buftarg)->bt_bdev)  #define xfs_readonly_buftarg(buftarg)	bdev_read_only((buftarg)->bt_bdev) diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index f1d85cfc0a5..4654338b03f 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -17,17 +17,18 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_trans_priv.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_log.h"  kmem_zone_t	*xfs_buf_item_zone; @@ -181,21 +182,47 @@ xfs_buf_item_size(  	trace_xfs_buf_item_size(bip);  } -static struct xfs_log_iovec * +static inline void +xfs_buf_item_copy_iovec( +	struct xfs_log_vec	*lv, +	struct xfs_log_iovec	**vecp, +	struct xfs_buf		*bp, +	uint			offset, +	int			first_bit, +	uint			nbits) +{ +	offset += first_bit * XFS_BLF_CHUNK; +	xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK, +			xfs_buf_offset(bp, offset), +			nbits * XFS_BLF_CHUNK); +} + +static inline bool +xfs_buf_item_straddle( +	struct xfs_buf		*bp, +	uint			offset, +	int			next_bit, +	int			last_bit) +{ +	return xfs_buf_offset(bp, offset + (next_bit << XFS_BLF_SHIFT)) != +		(xfs_buf_offset(bp, offset + (last_bit << XFS_BLF_SHIFT)) + +		 XFS_BLF_CHUNK); +} + +static void  xfs_buf_item_format_segment(  	struct xfs_buf_log_item	*bip, -	struct xfs_log_iovec	*vecp, +	struct xfs_log_vec	*lv, +	struct xfs_log_iovec	**vecp,  	uint			offset,  	struct xfs_buf_log_format *blfp)  {  	struct xfs_buf	*bp = bip->bli_buf;  	uint		base_size; -	uint		nvecs;  	int		first_bit;  	int		last_bit;  	int		next_bit;  	uint		nbits; -	uint		buffer_offset;  	/* copy the flags across from the base format item */  	blfp->blf_flags = bip->__bli_format.blf_flags; @@ -207,21 +234,17 @@ xfs_buf_item_format_segment(  	 */  	base_size = xfs_buf_log_format_size(blfp); -	nvecs = 0;  	first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);  	if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {  		/*  		 * If the map is not be dirty in the transaction, mark  		 * the size as zero and do not advance the vector pointer.  		 */ -		goto out; +		return;  	} -	vecp->i_addr = blfp; -	vecp->i_len = base_size; -	vecp->i_type = XLOG_REG_TYPE_BFORMAT; -	vecp++; -	nvecs = 1; +	blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size); +	blfp->blf_size = 1;  	if (bip->bli_flags & XFS_BLI_STALE) {  		/* @@ -231,14 +254,13 @@ xfs_buf_item_format_segment(  		 */  		trace_xfs_buf_item_format_stale(bip);  		ASSERT(blfp->blf_flags & XFS_BLF_CANCEL); -		goto out; +		return;  	}  	/*  	 * Fill in an iovec for each set of contiguous chunks.  	 */ -  	last_bit = first_bit;  	nbits = 1;  	for (;;) { @@ -251,42 +273,22 @@ xfs_buf_item_format_segment(  		next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,  					(uint)last_bit + 1);  		/* -		 * If we run out of bits fill in the last iovec and get -		 * out of the loop. -		 * Else if we start a new set of bits then fill in the -		 * iovec for the series we were looking at and start -		 * counting the bits in the new one. -		 * Else we're still in the same set of bits so just -		 * keep counting and scanning. +		 * If we run out of bits fill in the last iovec and get out of +		 * the loop.  Else if we start a new set of bits then fill in +		 * the iovec for the series we were looking at and start +		 * counting the bits in the new one.  Else we're still in the +		 * same set of bits so just keep counting and scanning.  		 */  		if (next_bit == -1) { -			buffer_offset = offset + first_bit * XFS_BLF_CHUNK; -			vecp->i_addr = xfs_buf_offset(bp, buffer_offset); -			vecp->i_len = nbits * XFS_BLF_CHUNK; -			vecp->i_type = XLOG_REG_TYPE_BCHUNK; -			nvecs++; +			xfs_buf_item_copy_iovec(lv, vecp, bp, offset, +						first_bit, nbits); +			blfp->blf_size++;  			break; -		} else if (next_bit != last_bit + 1) { -			buffer_offset = offset + first_bit * XFS_BLF_CHUNK; -			vecp->i_addr = xfs_buf_offset(bp, buffer_offset); -			vecp->i_len = nbits * XFS_BLF_CHUNK; -			vecp->i_type = XLOG_REG_TYPE_BCHUNK; -			nvecs++; -			vecp++; -			first_bit = next_bit; -			last_bit = next_bit; -			nbits = 1; -		} else if (xfs_buf_offset(bp, offset + -					      (next_bit << XFS_BLF_SHIFT)) != -			   (xfs_buf_offset(bp, offset + -					       (last_bit << XFS_BLF_SHIFT)) + -			    XFS_BLF_CHUNK)) { -			buffer_offset = offset + first_bit * XFS_BLF_CHUNK; -			vecp->i_addr = xfs_buf_offset(bp, buffer_offset); -			vecp->i_len = nbits * XFS_BLF_CHUNK; -			vecp->i_type = XLOG_REG_TYPE_BCHUNK; -			nvecs++; -			vecp++; +		} else if (next_bit != last_bit + 1 || +		           xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) { +			xfs_buf_item_copy_iovec(lv, vecp, bp, offset, +						first_bit, nbits); +			blfp->blf_size++;  			first_bit = next_bit;  			last_bit = next_bit;  			nbits = 1; @@ -295,9 +297,6 @@ xfs_buf_item_format_segment(  			nbits++;  		}  	} -out: -	blfp->blf_size = nvecs; -	return vecp;  }  /* @@ -309,10 +308,11 @@ out:  STATIC void  xfs_buf_item_format(  	struct xfs_log_item	*lip, -	struct xfs_log_iovec	*vecp) +	struct xfs_log_vec	*lv)  {  	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);  	struct xfs_buf		*bp = bip->bli_buf; +	struct xfs_log_iovec	*vecp = NULL;  	uint			offset = 0;  	int			i; @@ -353,8 +353,8 @@ xfs_buf_item_format(  	}  	for (i = 0; i < bip->bli_format_count; i++) { -		vecp = xfs_buf_item_format_segment(bip, vecp, offset, -						&bip->bli_formats[i]); +		xfs_buf_item_format_segment(bip, lv, &vecp, offset, +					    &bip->bli_formats[i]);  		offset += bp->b_maps[i].bm_len;  	} @@ -495,6 +495,14 @@ xfs_buf_item_unpin(  	}  } +/* + * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30 + * seconds so as to not spam logs too much on repeated detection of the same + * buffer being bad.. + */ + +DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10); +  STATIC uint  xfs_buf_item_push(  	struct xfs_log_item	*lip, @@ -523,6 +531,14 @@ xfs_buf_item_push(  	trace_xfs_buf_item_push(bip); +	/* has a previous flush failed due to IO errors? */ +	if ((bp->b_flags & XBF_WRITE_FAIL) && +	    ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) { +		xfs_warn(bp->b_target->bt_mount, +"Detected failing async write on buffer block 0x%llx. Retrying async write.\n", +			 (long long)bp->b_bn); +	} +  	if (!xfs_buf_delwri_queue(bp, buffer_list))  		rval = XFS_ITEM_FLUSHING;  	xfs_buf_unlock(bp); @@ -780,20 +796,6 @@ xfs_buf_item_init(  		bip->bli_formats[i].blf_map_size = map_size;  	} -#ifdef XFS_TRANS_DEBUG -	/* -	 * Allocate the arrays for tracking what needs to be logged -	 * and what our callers request to be logged.  bli_orig -	 * holds a copy of the original, clean buffer for comparison -	 * against, and bli_logged keeps a 1 bit flag per byte in -	 * the buffer to indicate which bytes the callers have asked -	 * to have logged. -	 */ -	bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP); -	memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length)); -	bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP); -#endif -  	/*  	 * Put the buf item into the list of items attached to the  	 * buffer at the front. @@ -808,9 +810,8 @@ xfs_buf_item_init(   * Mark bytes first through last inclusive as dirty in the buf   * item's bitmap.   */ -void +static void  xfs_buf_item_log_segment( -	struct xfs_buf_log_item	*bip,  	uint			first,  	uint			last,  	uint			*map) @@ -918,7 +919,7 @@ xfs_buf_item_log(  		if (end > last)  			end = last; -		xfs_buf_item_log_segment(bip, first, end, +		xfs_buf_item_log_segment(first, end,  					 &bip->bli_formats[i].blf_data_map[0]);  		start += bp->b_maps[i].bm_len; @@ -941,11 +942,6 @@ STATIC void  xfs_buf_item_free(  	xfs_buf_log_item_t	*bip)  { -#ifdef XFS_TRANS_DEBUG -	kmem_free(bip->bli_orig); -	kmem_free(bip->bli_logged); -#endif /* XFS_TRANS_DEBUG */ -  	xfs_buf_item_free_format(bip);  	kmem_zone_free(xfs_buf_item_zone, bip);  } @@ -1056,7 +1052,7 @@ xfs_buf_iodone_callbacks(  	static ulong		lasttime;  	static xfs_buftarg_t	*lasttarg; -	if (likely(!xfs_buf_geterror(bp))) +	if (likely(!bp->b_error))  		goto do_callbacks;  	/* @@ -1095,8 +1091,9 @@ xfs_buf_iodone_callbacks(  		xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */ -		if (!XFS_BUF_ISSTALE(bp)) { -			bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE; +		if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) { +			bp->b_flags |= XBF_WRITE | XBF_ASYNC | +				       XBF_DONE | XBF_WRITE_FAIL;  			xfs_buf_iorequest(bp);  		} else {  			xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_buf_item.h b/fs/xfs/xfs_buf_item.h index db6371087fe..3f3455a4151 100644 --- a/fs/xfs/xfs_buf_item.h +++ b/fs/xfs/xfs_buf_item.h @@ -71,10 +71,6 @@ void	xfs_buf_attach_iodone(struct xfs_buf *,  void	xfs_buf_iodone_callbacks(struct xfs_buf *);  void	xfs_buf_iodone(struct xfs_buf *, struct xfs_log_item *); -void	xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, -			       enum xfs_blft); -void	xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, struct xfs_buf *src_bp); -  extern kmem_zone_t	*xfs_buf_item_zone;  #endif	/* __XFS_BUF_ITEM_H__ */ diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 20bf8e8002d..a514ab61665 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -18,20 +18,20 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_alloc.h"  #include "xfs_bmap.h" @@ -129,56 +129,6 @@ xfs_da_state_free(xfs_da_state_t *state)  	kmem_zone_free(xfs_da_state_zone, state);  } -void -xfs_da3_node_hdr_from_disk( -	struct xfs_da3_icnode_hdr	*to, -	struct xfs_da_intnode		*from) -{ -	ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC) || -	       from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); - -	if (from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { -		struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; - -		to->forw = be32_to_cpu(hdr3->info.hdr.forw); -		to->back = be32_to_cpu(hdr3->info.hdr.back); -		to->magic = be16_to_cpu(hdr3->info.hdr.magic); -		to->count = be16_to_cpu(hdr3->__count); -		to->level = be16_to_cpu(hdr3->__level); -		return; -	} -	to->forw = be32_to_cpu(from->hdr.info.forw); -	to->back = be32_to_cpu(from->hdr.info.back); -	to->magic = be16_to_cpu(from->hdr.info.magic); -	to->count = be16_to_cpu(from->hdr.__count); -	to->level = be16_to_cpu(from->hdr.__level); -} - -void -xfs_da3_node_hdr_to_disk( -	struct xfs_da_intnode		*to, -	struct xfs_da3_icnode_hdr	*from) -{ -	ASSERT(from->magic == XFS_DA_NODE_MAGIC || -	       from->magic == XFS_DA3_NODE_MAGIC); - -	if (from->magic == XFS_DA3_NODE_MAGIC) { -		struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; - -		hdr3->info.hdr.forw = cpu_to_be32(from->forw); -		hdr3->info.hdr.back = cpu_to_be32(from->back); -		hdr3->info.hdr.magic = cpu_to_be16(from->magic); -		hdr3->__count = cpu_to_be16(from->count); -		hdr3->__level = cpu_to_be16(from->level); -		return; -	} -	to->hdr.info.forw = cpu_to_be32(from->forw); -	to->hdr.info.back = cpu_to_be32(from->back); -	to->hdr.info.magic = cpu_to_be16(from->magic); -	to->hdr.__count = cpu_to_be16(from->count); -	to->hdr.__level = cpu_to_be16(from->level); -} -  static bool  xfs_da3_node_verify(  	struct xfs_buf		*bp) @@ -186,8 +136,11 @@ xfs_da3_node_verify(  	struct xfs_mount	*mp = bp->b_target->bt_mount;  	struct xfs_da_intnode	*hdr = bp->b_addr;  	struct xfs_da3_icnode_hdr ichdr; +	const struct xfs_dir_ops *ops; -	xfs_da3_node_hdr_from_disk(&ichdr, hdr); +	ops = xfs_dir_get_ops(mp, NULL); + +	ops->node_hdr_from_disk(&ichdr, hdr);  	if (xfs_sb_version_hascrc(&mp->m_sb)) {  		struct xfs_da3_node_hdr *hdr3 = bp->b_addr; @@ -214,8 +167,8 @@ xfs_da3_node_verify(  	 * we don't know if the node is for and attribute or directory tree,  	 * so only fail if the count is outside both bounds  	 */ -	if (ichdr.count > mp->m_dir_node_ents && -	    ichdr.count > mp->m_attr_node_ents) +	if (ichdr.count > mp->m_dir_geo->node_ents && +	    ichdr.count > mp->m_attr_geo->node_ents)  		return false;  	/* XXX: hash order check? */ @@ -232,8 +185,8 @@ xfs_da3_node_write_verify(  	struct xfs_da3_node_hdr *hdr3 = bp->b_addr;  	if (!xfs_da3_node_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -243,7 +196,7 @@ xfs_da3_node_write_verify(  	if (bip)  		hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DA3_NODE_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_DA3_NODE_CRC_OFF);  }  /* @@ -256,18 +209,20 @@ static void  xfs_da3_node_read_verify(  	struct xfs_buf		*bp)  { -	struct xfs_mount	*mp = bp->b_target->bt_mount;  	struct xfs_da_blkinfo	*info = bp->b_addr;  	switch (be16_to_cpu(info->magic)) {  		case XFS_DA3_NODE_MAGIC: -			if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					      XFS_DA3_NODE_CRC_OFF)) +			if (!xfs_buf_verify_cksum(bp, XFS_DA3_NODE_CRC_OFF)) { +				xfs_buf_ioerror(bp, EFSBADCRC);  				break; +			}  			/* fall through */  		case XFS_DA_NODE_MAGIC: -			if (!xfs_da3_node_verify(bp)) +			if (!xfs_da3_node_verify(bp)) { +				xfs_buf_ioerror(bp, EFSCORRUPTED);  				break; +			}  			return;  		case XFS_ATTR_LEAF_MAGIC:  		case XFS_ATTR3_LEAF_MAGIC: @@ -284,8 +239,7 @@ xfs_da3_node_read_verify(  	}  	/* corrupt block */ -	XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -	xfs_buf_ioerror(bp, EFSCORRUPTED); +	xfs_verifier_error(bp);  }  const struct xfs_buf_ops xfs_da3_node_buf_ops = { @@ -354,11 +308,12 @@ xfs_da3_node_create(  	struct xfs_da3_icnode_hdr ichdr = {0};  	struct xfs_buf		*bp;  	int			error; +	struct xfs_inode	*dp = args->dp;  	trace_xfs_da_node_create(args);  	ASSERT(level <= XFS_DA_NODE_MAXDEPTH); -	error = xfs_da_get_buf(tp, args->dp, blkno, -1, &bp, whichfork); +	error = xfs_da_get_buf(tp, dp, blkno, -1, &bp, whichfork);  	if (error)  		return(error);  	bp->b_ops = &xfs_da3_node_buf_ops; @@ -377,9 +332,9 @@ xfs_da3_node_create(  	}  	ichdr.level = level; -	xfs_da3_node_hdr_to_disk(node, &ichdr); +	dp->d_ops->node_hdr_to_disk(node, &ichdr);  	xfs_trans_log_buf(tp, bp, -		XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); +		XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));  	*bpp = bp;  	return(0); @@ -589,8 +544,8 @@ xfs_da3_root_split(  	    oldroot->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) {  		struct xfs_da3_icnode_hdr nodehdr; -		xfs_da3_node_hdr_from_disk(&nodehdr, oldroot); -		btree = xfs_da3_node_tree_p(oldroot); +		dp->d_ops->node_hdr_from_disk(&nodehdr, oldroot); +		btree = dp->d_ops->node_tree_p(oldroot);  		size = (int)((char *)&btree[nodehdr.count] - (char *)oldroot);  		level = nodehdr.level; @@ -604,8 +559,8 @@ xfs_da3_root_split(  		struct xfs_dir2_leaf_entry *ents;  		leaf = (xfs_dir2_leaf_t *)oldroot; -		xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -		ents = xfs_dir3_leaf_ents_p(leaf); +		dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +		ents = dp->d_ops->leaf_ents_p(leaf);  		ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||  		       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -643,28 +598,28 @@ xfs_da3_root_split(  	 * Set up the new root node.  	 */  	error = xfs_da3_node_create(args, -		(args->whichfork == XFS_DATA_FORK) ? mp->m_dirleafblk : 0, +		(args->whichfork == XFS_DATA_FORK) ? args->geo->leafblk : 0,  		level + 1, &bp, args->whichfork);  	if (error)  		return error;  	node = bp->b_addr; -	xfs_da3_node_hdr_from_disk(&nodehdr, node); -	btree = xfs_da3_node_tree_p(node); +	dp->d_ops->node_hdr_from_disk(&nodehdr, node); +	btree = dp->d_ops->node_tree_p(node);  	btree[0].hashval = cpu_to_be32(blk1->hashval);  	btree[0].before = cpu_to_be32(blk1->blkno);  	btree[1].hashval = cpu_to_be32(blk2->hashval);  	btree[1].before = cpu_to_be32(blk2->blkno);  	nodehdr.count = 2; -	xfs_da3_node_hdr_to_disk(node, &nodehdr); +	dp->d_ops->node_hdr_to_disk(node, &nodehdr);  #ifdef DEBUG  	if (oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||  	    oldroot->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { -		ASSERT(blk1->blkno >= mp->m_dirleafblk && -		       blk1->blkno < mp->m_dirfreeblk); -		ASSERT(blk2->blkno >= mp->m_dirleafblk && -		       blk2->blkno < mp->m_dirfreeblk); +		ASSERT(blk1->blkno >= args->geo->leafblk && +		       blk1->blkno < args->geo->freeblk); +		ASSERT(blk2->blkno >= args->geo->leafblk && +		       blk2->blkno < args->geo->freeblk);  	}  #endif @@ -693,11 +648,12 @@ xfs_da3_node_split(  	int			newcount;  	int			error;  	int			useextra; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_node_split(state->args);  	node = oldblk->bp->b_addr; -	xfs_da3_node_hdr_from_disk(&nodehdr, node); +	dp->d_ops->node_hdr_from_disk(&nodehdr, node);  	/*  	 * With V2 dirs the extra block is data or freespace. @@ -707,7 +663,7 @@ xfs_da3_node_split(  	/*  	 * Do we have to split the node?  	 */ -	if (nodehdr.count + newcount > state->node_ents) { +	if (nodehdr.count + newcount > state->args->geo->node_ents) {  		/*  		 * Allocate a new node, add to the doubly linked chain of  		 * nodes, then move some of our excess entries into it. @@ -744,7 +700,7 @@ xfs_da3_node_split(  	 * If we had double-split op below us, then add the extra block too.  	 */  	node = oldblk->bp->b_addr; -	xfs_da3_node_hdr_from_disk(&nodehdr, node); +	dp->d_ops->node_hdr_from_disk(&nodehdr, node);  	if (oldblk->index <= nodehdr.count) {  		oldblk->index++;  		xfs_da3_node_add(state, oldblk, addblk); @@ -793,15 +749,16 @@ xfs_da3_node_rebalance(  	int			count;  	int			tmp;  	int			swap = 0; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_node_rebalance(state->args);  	node1 = blk1->bp->b_addr;  	node2 = blk2->bp->b_addr; -	xfs_da3_node_hdr_from_disk(&nodehdr1, node1); -	xfs_da3_node_hdr_from_disk(&nodehdr2, node2); -	btree1 = xfs_da3_node_tree_p(node1); -	btree2 = xfs_da3_node_tree_p(node2); +	dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); +	dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); +	btree1 = dp->d_ops->node_tree_p(node1); +	btree2 = dp->d_ops->node_tree_p(node2);  	/*  	 * Figure out how many entries need to move, and in which direction. @@ -814,10 +771,10 @@ xfs_da3_node_rebalance(  		tmpnode = node1;  		node1 = node2;  		node2 = tmpnode; -		xfs_da3_node_hdr_from_disk(&nodehdr1, node1); -		xfs_da3_node_hdr_from_disk(&nodehdr2, node2); -		btree1 = xfs_da3_node_tree_p(node1); -		btree2 = xfs_da3_node_tree_p(node2); +		dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); +		dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); +		btree1 = dp->d_ops->node_tree_p(node1); +		btree2 = dp->d_ops->node_tree_p(node2);  		swap = 1;  	} @@ -879,15 +836,14 @@ xfs_da3_node_rebalance(  	/*  	 * Log header of node 1 and all current bits of node 2.  	 */ -	xfs_da3_node_hdr_to_disk(node1, &nodehdr1); +	dp->d_ops->node_hdr_to_disk(node1, &nodehdr1);  	xfs_trans_log_buf(tp, blk1->bp, -		XFS_DA_LOGRANGE(node1, &node1->hdr, -				xfs_da3_node_hdr_size(node1))); +		XFS_DA_LOGRANGE(node1, &node1->hdr, dp->d_ops->node_hdr_size)); -	xfs_da3_node_hdr_to_disk(node2, &nodehdr2); +	dp->d_ops->node_hdr_to_disk(node2, &nodehdr2);  	xfs_trans_log_buf(tp, blk2->bp,  		XFS_DA_LOGRANGE(node2, &node2->hdr, -				xfs_da3_node_hdr_size(node2) + +				dp->d_ops->node_hdr_size +  				(sizeof(btree2[0]) * nodehdr2.count)));  	/* @@ -897,10 +853,10 @@ xfs_da3_node_rebalance(  	if (swap) {  		node1 = blk1->bp->b_addr;  		node2 = blk2->bp->b_addr; -		xfs_da3_node_hdr_from_disk(&nodehdr1, node1); -		xfs_da3_node_hdr_from_disk(&nodehdr2, node2); -		btree1 = xfs_da3_node_tree_p(node1); -		btree2 = xfs_da3_node_tree_p(node2); +		dp->d_ops->node_hdr_from_disk(&nodehdr1, node1); +		dp->d_ops->node_hdr_from_disk(&nodehdr2, node2); +		btree1 = dp->d_ops->node_tree_p(node1); +		btree2 = dp->d_ops->node_tree_p(node2);  	}  	blk1->hashval = be32_to_cpu(btree1[nodehdr1.count - 1].hashval);  	blk2->hashval = be32_to_cpu(btree2[nodehdr2.count - 1].hashval); @@ -927,18 +883,19 @@ xfs_da3_node_add(  	struct xfs_da3_icnode_hdr nodehdr;  	struct xfs_da_node_entry *btree;  	int			tmp; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_node_add(state->args);  	node = oldblk->bp->b_addr; -	xfs_da3_node_hdr_from_disk(&nodehdr, node); -	btree = xfs_da3_node_tree_p(node); +	dp->d_ops->node_hdr_from_disk(&nodehdr, node); +	btree = dp->d_ops->node_tree_p(node);  	ASSERT(oldblk->index >= 0 && oldblk->index <= nodehdr.count);  	ASSERT(newblk->blkno != 0);  	if (state->args->whichfork == XFS_DATA_FORK) -		ASSERT(newblk->blkno >= state->mp->m_dirleafblk && -		       newblk->blkno < state->mp->m_dirfreeblk); +		ASSERT(newblk->blkno >= state->args->geo->leafblk && +		       newblk->blkno < state->args->geo->freeblk);  	/*  	 * We may need to make some room before we insert the new node. @@ -955,9 +912,9 @@ xfs_da3_node_add(  				tmp + sizeof(*btree)));  	nodehdr.count += 1; -	xfs_da3_node_hdr_to_disk(node, &nodehdr); +	dp->d_ops->node_hdr_to_disk(node, &nodehdr);  	xfs_trans_log_buf(state->args->trans, oldblk->bp, -		XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); +		XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));  	/*  	 * Copy the last hash value from the oldblk to propagate upwards. @@ -1094,6 +1051,7 @@ xfs_da3_root_join(  	struct xfs_da3_icnode_hdr oldroothdr;  	struct xfs_da_node_entry *btree;  	int			error; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_root_join(state->args); @@ -1101,7 +1059,7 @@ xfs_da3_root_join(  	args = state->args;  	oldroot = root_blk->bp->b_addr; -	xfs_da3_node_hdr_from_disk(&oldroothdr, oldroot); +	dp->d_ops->node_hdr_from_disk(&oldroothdr, oldroot);  	ASSERT(oldroothdr.forw == 0);  	ASSERT(oldroothdr.back == 0); @@ -1115,10 +1073,10 @@ xfs_da3_root_join(  	 * Read in the (only) child block, then copy those bytes into  	 * the root block's buffer and free the original child block.  	 */ -	btree = xfs_da3_node_tree_p(oldroot); +	btree = dp->d_ops->node_tree_p(oldroot);  	child = be32_to_cpu(btree[0].before);  	ASSERT(child != 0); -	error = xfs_da3_node_read(args->trans, args->dp, child, -1, &bp, +	error = xfs_da3_node_read(args->trans, dp, child, -1, &bp,  					     args->whichfork);  	if (error)  		return error; @@ -1131,14 +1089,15 @@ xfs_da3_root_join(  	 * that could occur. For dir3 blocks we also need to update the block  	 * number in the buffer header.  	 */ -	memcpy(root_blk->bp->b_addr, bp->b_addr, state->blocksize); +	memcpy(root_blk->bp->b_addr, bp->b_addr, args->geo->blksize);  	root_blk->bp->b_ops = bp->b_ops;  	xfs_trans_buf_copy_type(root_blk->bp, bp);  	if (oldroothdr.magic == XFS_DA3_NODE_MAGIC) {  		struct xfs_da3_blkinfo *da3 = root_blk->bp->b_addr;  		da3->blkno = cpu_to_be64(root_blk->bp->b_bn);  	} -	xfs_trans_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); +	xfs_trans_log_buf(args->trans, root_blk->bp, 0, +			  args->geo->blksize - 1);  	error = xfs_da_shrink_inode(args, child, bp);  	return(error);  } @@ -1168,6 +1127,7 @@ xfs_da3_node_toosmall(  	int			error;  	int			retval;  	int			i; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_node_toosmall(state->args); @@ -1179,8 +1139,8 @@ xfs_da3_node_toosmall(  	blk = &state->path.blk[ state->path.active-1 ];  	info = blk->bp->b_addr;  	node = (xfs_da_intnode_t *)info; -	xfs_da3_node_hdr_from_disk(&nodehdr, node); -	if (nodehdr.count > (state->node_ents >> 1)) { +	dp->d_ops->node_hdr_from_disk(&nodehdr, node); +	if (nodehdr.count > (state->args->geo->node_ents >> 1)) {  		*action = 0;	/* blk over 50%, don't try to join */  		return(0);	/* blk over 50%, don't try to join */  	} @@ -1217,8 +1177,8 @@ xfs_da3_node_toosmall(  	 * We prefer coalescing with the lower numbered sibling so as  	 * to shrink a directory over time.  	 */ -	count  = state->node_ents; -	count -= state->node_ents >> 2; +	count  = state->args->geo->node_ents; +	count -= state->args->geo->node_ents >> 2;  	count -= nodehdr.count;  	/* start with smaller blk num */ @@ -1231,13 +1191,13 @@ xfs_da3_node_toosmall(  			blkno = nodehdr.back;  		if (blkno == 0)  			continue; -		error = xfs_da3_node_read(state->args->trans, state->args->dp, +		error = xfs_da3_node_read(state->args->trans, dp,  					blkno, -1, &bp, state->args->whichfork);  		if (error)  			return(error);  		node = bp->b_addr; -		xfs_da3_node_hdr_from_disk(&thdr, node); +		dp->d_ops->node_hdr_from_disk(&thdr, node);  		xfs_trans_brelse(state->args->trans, bp);  		if (count - thdr.count >= 0) @@ -1275,6 +1235,7 @@ xfs_da3_node_toosmall(   */  STATIC uint  xfs_da3_node_lasthash( +	struct xfs_inode	*dp,  	struct xfs_buf		*bp,  	int			*count)  { @@ -1283,12 +1244,12 @@ xfs_da3_node_lasthash(  	struct xfs_da3_icnode_hdr nodehdr;  	node = bp->b_addr; -	xfs_da3_node_hdr_from_disk(&nodehdr, node); +	dp->d_ops->node_hdr_from_disk(&nodehdr, node);  	if (count)  		*count = nodehdr.count;  	if (!nodehdr.count)  		return 0; -	btree = xfs_da3_node_tree_p(node); +	btree = dp->d_ops->node_tree_p(node);  	return be32_to_cpu(btree[nodehdr.count - 1].hashval);  } @@ -1307,6 +1268,7 @@ xfs_da3_fixhashpath(  	xfs_dahash_t		lasthash=0;  	int			level;  	int			count; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_fixhashpath(state->args); @@ -1319,12 +1281,12 @@ xfs_da3_fixhashpath(  			return;  		break;  	case XFS_DIR2_LEAFN_MAGIC: -		lasthash = xfs_dir2_leafn_lasthash(blk->bp, &count); +		lasthash = xfs_dir2_leafn_lasthash(dp, blk->bp, &count);  		if (count == 0)  			return;  		break;  	case XFS_DA_NODE_MAGIC: -		lasthash = xfs_da3_node_lasthash(blk->bp, &count); +		lasthash = xfs_da3_node_lasthash(dp, blk->bp, &count);  		if (count == 0)  			return;  		break; @@ -1333,9 +1295,9 @@ xfs_da3_fixhashpath(  		struct xfs_da3_icnode_hdr nodehdr;  		node = blk->bp->b_addr; -		xfs_da3_node_hdr_from_disk(&nodehdr, node); -		btree = xfs_da3_node_tree_p(node); -		if (be32_to_cpu(btree->hashval) == lasthash) +		dp->d_ops->node_hdr_from_disk(&nodehdr, node); +		btree = dp->d_ops->node_tree_p(node); +		if (be32_to_cpu(btree[blk->index].hashval) == lasthash)  			break;  		blk->hashval = lasthash;  		btree[blk->index].hashval = cpu_to_be32(lasthash); @@ -1360,11 +1322,12 @@ xfs_da3_node_remove(  	struct xfs_da_node_entry *btree;  	int			index;  	int			tmp; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_node_remove(state->args);  	node = drop_blk->bp->b_addr; -	xfs_da3_node_hdr_from_disk(&nodehdr, node); +	dp->d_ops->node_hdr_from_disk(&nodehdr, node);  	ASSERT(drop_blk->index < nodehdr.count);  	ASSERT(drop_blk->index >= 0); @@ -1372,7 +1335,7 @@ xfs_da3_node_remove(  	 * Copy over the offending entry, or just zero it out.  	 */  	index = drop_blk->index; -	btree = xfs_da3_node_tree_p(node); +	btree = dp->d_ops->node_tree_p(node);  	if (index < nodehdr.count - 1) {  		tmp  = nodehdr.count - index - 1;  		tmp *= (uint)sizeof(xfs_da_node_entry_t); @@ -1385,9 +1348,9 @@ xfs_da3_node_remove(  	xfs_trans_log_buf(state->args->trans, drop_blk->bp,  	    XFS_DA_LOGRANGE(node, &btree[index], sizeof(btree[index])));  	nodehdr.count -= 1; -	xfs_da3_node_hdr_to_disk(node, &nodehdr); +	dp->d_ops->node_hdr_to_disk(node, &nodehdr);  	xfs_trans_log_buf(state->args->trans, drop_blk->bp, -	    XFS_DA_LOGRANGE(node, &node->hdr, xfs_da3_node_hdr_size(node))); +	    XFS_DA_LOGRANGE(node, &node->hdr, dp->d_ops->node_hdr_size));  	/*  	 * Copy the last hash value from the block to propagate upwards. @@ -1414,15 +1377,16 @@ xfs_da3_node_unbalance(  	struct xfs_trans	*tp;  	int			sindex;  	int			tmp; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_node_unbalance(state->args);  	drop_node = drop_blk->bp->b_addr;  	save_node = save_blk->bp->b_addr; -	xfs_da3_node_hdr_from_disk(&drop_hdr, drop_node); -	xfs_da3_node_hdr_from_disk(&save_hdr, save_node); -	drop_btree = xfs_da3_node_tree_p(drop_node); -	save_btree = xfs_da3_node_tree_p(save_node); +	dp->d_ops->node_hdr_from_disk(&drop_hdr, drop_node); +	dp->d_ops->node_hdr_from_disk(&save_hdr, save_node); +	drop_btree = dp->d_ops->node_tree_p(drop_node); +	save_btree = dp->d_ops->node_tree_p(save_node);  	tp = state->args->trans;  	/* @@ -1456,10 +1420,10 @@ xfs_da3_node_unbalance(  	memcpy(&save_btree[sindex], &drop_btree[0], tmp);  	save_hdr.count += drop_hdr.count; -	xfs_da3_node_hdr_to_disk(save_node, &save_hdr); +	dp->d_ops->node_hdr_to_disk(save_node, &save_hdr);  	xfs_trans_log_buf(tp, save_blk->bp,  		XFS_DA_LOGRANGE(save_node, &save_node->hdr, -				xfs_da3_node_hdr_size(save_node))); +				dp->d_ops->node_hdr_size));  	/*  	 * Save the last hashval in the remaining block for upward propagation. @@ -1501,6 +1465,7 @@ xfs_da3_node_lookup_int(  	int			max;  	int			error;  	int			retval; +	struct xfs_inode	*dp = state->args->dp;  	args = state->args; @@ -1508,7 +1473,7 @@ xfs_da3_node_lookup_int(  	 * Descend thru the B-tree searching each level for the right  	 * node to use, until the right hashval is found.  	 */ -	blkno = (args->whichfork == XFS_DATA_FORK)? state->mp->m_dirleafblk : 0; +	blkno = (args->whichfork == XFS_DATA_FORK)? args->geo->leafblk : 0;  	for (blk = &state->path.blk[0], state->path.active = 1;  			 state->path.active <= XFS_DA_NODE_MAXDEPTH;  			 blk++, state->path.active++) { @@ -1536,7 +1501,8 @@ xfs_da3_node_lookup_int(  		if (blk->magic == XFS_DIR2_LEAFN_MAGIC ||  		    blk->magic == XFS_DIR3_LEAFN_MAGIC) {  			blk->magic = XFS_DIR2_LEAFN_MAGIC; -			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, NULL); +			blk->hashval = xfs_dir2_leafn_lasthash(args->dp, +							       blk->bp, NULL);  			break;  		} @@ -1547,8 +1513,8 @@ xfs_da3_node_lookup_int(  		 * Search an intermediate node for a match.  		 */  		node = blk->bp->b_addr; -		xfs_da3_node_hdr_from_disk(&nodehdr, node); -		btree = xfs_da3_node_tree_p(node); +		dp->d_ops->node_hdr_from_disk(&nodehdr, node); +		btree = dp->d_ops->node_tree_p(node);  		max = nodehdr.count;  		blk->hashval = be32_to_cpu(btree[max - 1].hashval); @@ -1643,6 +1609,7 @@ xfs_da3_node_lookup_int(   */  STATIC int  xfs_da3_node_order( +	struct xfs_inode *dp,  	struct xfs_buf	*node1_bp,  	struct xfs_buf	*node2_bp)  { @@ -1655,10 +1622,10 @@ xfs_da3_node_order(  	node1 = node1_bp->b_addr;  	node2 = node2_bp->b_addr; -	xfs_da3_node_hdr_from_disk(&node1hdr, node1); -	xfs_da3_node_hdr_from_disk(&node2hdr, node2); -	btree1 = xfs_da3_node_tree_p(node1); -	btree2 = xfs_da3_node_tree_p(node2); +	dp->d_ops->node_hdr_from_disk(&node1hdr, node1); +	dp->d_ops->node_hdr_from_disk(&node2hdr, node2); +	btree1 = dp->d_ops->node_tree_p(node1); +	btree2 = dp->d_ops->node_tree_p(node2);  	if (node1hdr.count > 0 && node2hdr.count > 0 &&  	    ((be32_to_cpu(btree2[0].hashval) < be32_to_cpu(btree1[0].hashval)) || @@ -1685,6 +1652,7 @@ xfs_da3_blk_link(  	struct xfs_buf		*bp;  	int			before = 0;  	int			error; +	struct xfs_inode	*dp = state->args->dp;  	/*  	 * Set up environment. @@ -1702,10 +1670,10 @@ xfs_da3_blk_link(  		before = xfs_attr_leaf_order(old_blk->bp, new_blk->bp);  		break;  	case XFS_DIR2_LEAFN_MAGIC: -		before = xfs_dir2_leafn_order(old_blk->bp, new_blk->bp); +		before = xfs_dir2_leafn_order(dp, old_blk->bp, new_blk->bp);  		break;  	case XFS_DA_NODE_MAGIC: -		before = xfs_da3_node_order(old_blk->bp, new_blk->bp); +		before = xfs_da3_node_order(dp, old_blk->bp, new_blk->bp);  		break;  	} @@ -1720,7 +1688,7 @@ xfs_da3_blk_link(  		new_info->forw = cpu_to_be32(old_blk->blkno);  		new_info->back = old_info->back;  		if (old_info->back) { -			error = xfs_da3_node_read(args->trans, args->dp, +			error = xfs_da3_node_read(args->trans, dp,  						be32_to_cpu(old_info->back),  						-1, &bp, args->whichfork);  			if (error) @@ -1741,7 +1709,7 @@ xfs_da3_blk_link(  		new_info->forw = old_info->forw;  		new_info->back = cpu_to_be32(old_blk->blkno);  		if (old_info->forw) { -			error = xfs_da3_node_read(args->trans, args->dp, +			error = xfs_da3_node_read(args->trans, dp,  						be32_to_cpu(old_info->forw),  						-1, &bp, args->whichfork);  			if (error) @@ -1861,6 +1829,7 @@ xfs_da3_path_shift(  	xfs_dablk_t		blkno = 0;  	int			level;  	int			error; +	struct xfs_inode	*dp = state->args->dp;  	trace_xfs_da_path_shift(state->args); @@ -1876,8 +1845,8 @@ xfs_da3_path_shift(  	level = (path->active-1) - 1;	/* skip bottom layer in path */  	for (blk = &path->blk[level]; level >= 0; blk--, level--) {  		node = blk->bp->b_addr; -		xfs_da3_node_hdr_from_disk(&nodehdr, node); -		btree = xfs_da3_node_tree_p(node); +		dp->d_ops->node_hdr_from_disk(&nodehdr, node); +		btree = dp->d_ops->node_tree_p(node);  		if (forward && (blk->index < nodehdr.count - 1)) {  			blk->index++; @@ -1911,7 +1880,7 @@ xfs_da3_path_shift(  		 * Read the next child block.  		 */  		blk->blkno = blkno; -		error = xfs_da3_node_read(args->trans, args->dp, blkno, -1, +		error = xfs_da3_node_read(args->trans, dp, blkno, -1,  					&blk->bp, args->whichfork);  		if (error)  			return(error); @@ -1933,8 +1902,8 @@ xfs_da3_path_shift(  		case XFS_DA3_NODE_MAGIC:  			blk->magic = XFS_DA_NODE_MAGIC;  			node = (xfs_da_intnode_t *)info; -			xfs_da3_node_hdr_from_disk(&nodehdr, node); -			btree = xfs_da3_node_tree_p(node); +			dp->d_ops->node_hdr_from_disk(&nodehdr, node); +			btree = dp->d_ops->node_tree_p(node);  			blk->hashval = be32_to_cpu(btree[nodehdr.count - 1].hashval);  			if (forward)  				blk->index = 0; @@ -1947,16 +1916,15 @@ xfs_da3_path_shift(  			blk->magic = XFS_ATTR_LEAF_MAGIC;  			ASSERT(level == path->active-1);  			blk->index = 0; -			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, -							      NULL); +			blk->hashval = xfs_attr_leaf_lasthash(blk->bp, NULL);  			break;  		case XFS_DIR2_LEAFN_MAGIC:  		case XFS_DIR3_LEAFN_MAGIC:  			blk->magic = XFS_DIR2_LEAFN_MAGIC;  			ASSERT(level == path->active-1);  			blk->index = 0; -			blk->hashval = xfs_dir2_leafn_lasthash(blk->bp, -							       NULL); +			blk->hashval = xfs_dir2_leafn_lasthash(args->dp, +							       blk->bp, NULL);  			break;  		default:  			ASSERT(0); @@ -2123,20 +2091,12 @@ xfs_da_grow_inode(  	xfs_dablk_t		*new_blkno)  {  	xfs_fileoff_t		bno; -	int			count;  	int			error;  	trace_xfs_da_grow_inode(args); -	if (args->whichfork == XFS_DATA_FORK) { -		bno = args->dp->i_mount->m_dirleafblk; -		count = args->dp->i_mount->m_dirblkfsbs; -	} else { -		bno = 0; -		count = 1; -	} - -	error = xfs_da_grow_inode_int(args, &bno, count); +	bno = args->geo->leafblk; +	error = xfs_da_grow_inode_int(args, &bno, args->geo->fsbcount);  	if (!error)  		*new_blkno = (xfs_dablk_t)bno;  	return error; @@ -2163,7 +2123,7 @@ xfs_da3_swap_lastblock(  	struct xfs_dir2_leaf	*dead_leaf2;  	struct xfs_da_node_entry *btree;  	struct xfs_da3_icnode_hdr par_hdr; -	struct xfs_inode	*ip; +	struct xfs_inode	*dp;  	struct xfs_trans	*tp;  	struct xfs_mount	*mp;  	struct xfs_buf		*dead_buf; @@ -2187,12 +2147,12 @@ xfs_da3_swap_lastblock(  	dead_buf = *dead_bufp;  	dead_blkno = *dead_blknop;  	tp = args->trans; -	ip = args->dp; +	dp = args->dp;  	w = args->whichfork;  	ASSERT(w == XFS_DATA_FORK); -	mp = ip->i_mount; -	lastoff = mp->m_dirfreeblk; -	error = xfs_bmap_last_before(tp, ip, &lastoff, w); +	mp = dp->i_mount; +	lastoff = args->geo->freeblk; +	error = xfs_bmap_last_before(tp, dp, &lastoff, w);  	if (error)  		return error;  	if (unlikely(lastoff == 0)) { @@ -2203,15 +2163,15 @@ xfs_da3_swap_lastblock(  	/*  	 * Read the last block in the btree space.  	 */ -	last_blkno = (xfs_dablk_t)lastoff - mp->m_dirblkfsbs; -	error = xfs_da3_node_read(tp, ip, last_blkno, -1, &last_buf, w); +	last_blkno = (xfs_dablk_t)lastoff - args->geo->fsbcount; +	error = xfs_da3_node_read(tp, dp, last_blkno, -1, &last_buf, w);  	if (error)  		return error;  	/*  	 * Copy the last block into the dead buffer and log it.  	 */ -	memcpy(dead_buf->b_addr, last_buf->b_addr, mp->m_dirblksize); -	xfs_trans_log_buf(tp, dead_buf, 0, mp->m_dirblksize - 1); +	memcpy(dead_buf->b_addr, last_buf->b_addr, args->geo->blksize); +	xfs_trans_log_buf(tp, dead_buf, 0, args->geo->blksize - 1);  	dead_info = dead_buf->b_addr;  	/*  	 * Get values from the moved block. @@ -2222,16 +2182,16 @@ xfs_da3_swap_lastblock(  		struct xfs_dir2_leaf_entry *ents;  		dead_leaf2 = (xfs_dir2_leaf_t *)dead_info; -		xfs_dir3_leaf_hdr_from_disk(&leafhdr, dead_leaf2); -		ents = xfs_dir3_leaf_ents_p(dead_leaf2); +		dp->d_ops->leaf_hdr_from_disk(&leafhdr, dead_leaf2); +		ents = dp->d_ops->leaf_ents_p(dead_leaf2);  		dead_level = 0;  		dead_hash = be32_to_cpu(ents[leafhdr.count - 1].hashval);  	} else {  		struct xfs_da3_icnode_hdr deadhdr;  		dead_node = (xfs_da_intnode_t *)dead_info; -		xfs_da3_node_hdr_from_disk(&deadhdr, dead_node); -		btree = xfs_da3_node_tree_p(dead_node); +		dp->d_ops->node_hdr_from_disk(&deadhdr, dead_node); +		btree = dp->d_ops->node_tree_p(dead_node);  		dead_level = deadhdr.level;  		dead_hash = be32_to_cpu(btree[deadhdr.count - 1].hashval);  	} @@ -2240,7 +2200,7 @@ xfs_da3_swap_lastblock(  	 * If the moved block has a left sibling, fix up the pointers.  	 */  	if ((sib_blkno = be32_to_cpu(dead_info->back))) { -		error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); +		error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);  		if (error)  			goto done;  		sib_info = sib_buf->b_addr; @@ -2262,7 +2222,7 @@ xfs_da3_swap_lastblock(  	 * If the moved block has a right sibling, fix up the pointers.  	 */  	if ((sib_blkno = be32_to_cpu(dead_info->forw))) { -		error = xfs_da3_node_read(tp, ip, sib_blkno, -1, &sib_buf, w); +		error = xfs_da3_node_read(tp, dp, sib_blkno, -1, &sib_buf, w);  		if (error)  			goto done;  		sib_info = sib_buf->b_addr; @@ -2280,17 +2240,17 @@ xfs_da3_swap_lastblock(  					sizeof(sib_info->back)));  		sib_buf = NULL;  	} -	par_blkno = mp->m_dirleafblk; +	par_blkno = args->geo->leafblk;  	level = -1;  	/*  	 * Walk down the tree looking for the parent of the moved block.  	 */  	for (;;) { -		error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w); +		error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);  		if (error)  			goto done;  		par_node = par_buf->b_addr; -		xfs_da3_node_hdr_from_disk(&par_hdr, par_node); +		dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);  		if (level >= 0 && level != par_hdr.level + 1) {  			XFS_ERROR_REPORT("xfs_da_swap_lastblock(4)",  					 XFS_ERRLEVEL_LOW, mp); @@ -2298,7 +2258,7 @@ xfs_da3_swap_lastblock(  			goto done;  		}  		level = par_hdr.level; -		btree = xfs_da3_node_tree_p(par_node); +		btree = dp->d_ops->node_tree_p(par_node);  		for (entno = 0;  		     entno < par_hdr.count &&  		     be32_to_cpu(btree[entno].hashval) < dead_hash; @@ -2337,18 +2297,18 @@ xfs_da3_swap_lastblock(  			error = XFS_ERROR(EFSCORRUPTED);  			goto done;  		} -		error = xfs_da3_node_read(tp, ip, par_blkno, -1, &par_buf, w); +		error = xfs_da3_node_read(tp, dp, par_blkno, -1, &par_buf, w);  		if (error)  			goto done;  		par_node = par_buf->b_addr; -		xfs_da3_node_hdr_from_disk(&par_hdr, par_node); +		dp->d_ops->node_hdr_from_disk(&par_hdr, par_node);  		if (par_hdr.level != level) {  			XFS_ERROR_REPORT("xfs_da_swap_lastblock(7)",  					 XFS_ERRLEVEL_LOW, mp);  			error = XFS_ERROR(EFSCORRUPTED);  			goto done;  		} -		btree = xfs_da3_node_tree_p(par_node); +		btree = dp->d_ops->node_tree_p(par_node);  		entno = 0;  	}  	/* @@ -2390,10 +2350,7 @@ xfs_da_shrink_inode(  	w = args->whichfork;  	tp = args->trans;  	mp = dp->i_mount; -	if (w == XFS_DATA_FORK) -		count = mp->m_dirblkfsbs; -	else -		count = 1; +	count = args->geo->fsbcount;  	for (;;) {  		/*  		 * Remove extents.  If we get ENOSPC for a dir we have to move @@ -2495,7 +2452,6 @@ xfs_buf_map_from_irec(   */  static int  xfs_dabuf_map( -	struct xfs_trans	*trans,  	struct xfs_inode	*dp,  	xfs_dablk_t		bno,  	xfs_daddr_t		mappedbno, @@ -2513,7 +2469,10 @@ xfs_dabuf_map(  	ASSERT(map && *map);  	ASSERT(*nmaps == 1); -	nfsb = (whichfork == XFS_DATA_FORK) ? mp->m_dirblkfsbs : 1; +	if (whichfork == XFS_DATA_FORK) +		nfsb = mp->m_dir_geo->fsbcount; +	else +		nfsb = mp->m_attr_geo->fsbcount;  	/*  	 * Caller doesn't have a mapping.  -2 means don't complain @@ -2591,7 +2550,7 @@ xfs_da_get_buf(  	*bpp = NULL;  	mapp = ↦  	nmap = 1; -	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, +	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,  				&mapp, &nmap);  	if (error) {  		/* mapping a hole is not an error, but we don't continue */ @@ -2639,7 +2598,7 @@ xfs_da_read_buf(  	*bpp = NULL;  	mapp = ↦  	nmap = 1; -	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, +	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,  				&mapp, &nmap);  	if (error) {  		/* mapping a hole is not an error, but we don't continue */ @@ -2658,47 +2617,6 @@ xfs_da_read_buf(  		xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);  	else  		xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); - -	/* -	 * This verification code will be moved to a CRC verification callback -	 * function so just leave it here unchanged until then. -	 */ -	{ -		xfs_dir2_data_hdr_t	*hdr = bp->b_addr; -		xfs_dir2_free_t		*free = bp->b_addr; -		xfs_da_blkinfo_t	*info = bp->b_addr; -		uint			magic, magic1; -		struct xfs_mount	*mp = dp->i_mount; - -		magic = be16_to_cpu(info->magic); -		magic1 = be32_to_cpu(hdr->magic); -		if (unlikely( -		    XFS_TEST_ERROR((magic != XFS_DA_NODE_MAGIC) && -				   (magic != XFS_DA3_NODE_MAGIC) && -				   (magic != XFS_ATTR_LEAF_MAGIC) && -				   (magic != XFS_ATTR3_LEAF_MAGIC) && -				   (magic != XFS_DIR2_LEAF1_MAGIC) && -				   (magic != XFS_DIR3_LEAF1_MAGIC) && -				   (magic != XFS_DIR2_LEAFN_MAGIC) && -				   (magic != XFS_DIR3_LEAFN_MAGIC) && -				   (magic1 != XFS_DIR2_BLOCK_MAGIC) && -				   (magic1 != XFS_DIR3_BLOCK_MAGIC) && -				   (magic1 != XFS_DIR2_DATA_MAGIC) && -				   (magic1 != XFS_DIR3_DATA_MAGIC) && -				   (free->hdr.magic != -					cpu_to_be32(XFS_DIR2_FREE_MAGIC)) && -				   (free->hdr.magic != -					cpu_to_be32(XFS_DIR3_FREE_MAGIC)), -				mp, XFS_ERRTAG_DA_READ_BUF, -				XFS_RANDOM_DA_READ_BUF))) { -			trace_xfs_da_btree_corrupt(bp, _RET_IP_); -			XFS_CORRUPTION_ERROR("xfs_da_do_buf(2)", -					     XFS_ERRLEVEL_LOW, mp, info); -			error = XFS_ERROR(EFSCORRUPTED); -			xfs_trans_brelse(trans, bp); -			goto out_free; -		} -	}  	*bpp = bp;  out_free:  	if (mapp != &map) @@ -2712,7 +2630,6 @@ out_free:   */  xfs_daddr_t  xfs_da_reada_buf( -	struct xfs_trans	*trans,  	struct xfs_inode	*dp,  	xfs_dablk_t		bno,  	xfs_daddr_t		mappedbno, @@ -2726,7 +2643,7 @@ xfs_da_reada_buf(  	mapp = ↦  	nmap = 1; -	error = xfs_dabuf_map(trans, dp, bno, mappedbno, whichfork, +	error = xfs_dabuf_map(dp, bno, mappedbno, whichfork,  				&mapp, &nmap);  	if (error) {  		/* mapping a hole is not an error, but we don't continue */ diff --git a/fs/xfs/xfs_da_btree.h b/fs/xfs/xfs_da_btree.h index b1f267995de..6e153e399a7 100644 --- a/fs/xfs/xfs_da_btree.h +++ b/fs/xfs/xfs_da_btree.h @@ -23,147 +23,25 @@ struct xfs_bmap_free;  struct xfs_inode;  struct xfs_trans;  struct zone; - -/*======================================================================== - * Directory Structure when greater than XFS_LBSIZE(mp) bytes. - *========================================================================*/ - -/* - * This structure is common to both leaf nodes and non-leaf nodes in the Btree. - * - * It is used to manage a doubly linked list of all blocks at the same - * level in the Btree, and to identify which type of block this is. - */ -#define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */ -#define XFS_ATTR_LEAF_MAGIC	0xfbee	/* magic number: attribute leaf blks */ -#define	XFS_DIR2_LEAF1_MAGIC	0xd2f1	/* magic number: v2 dirlf single blks */ -#define	XFS_DIR2_LEAFN_MAGIC	0xd2ff	/* magic number: v2 dirlf multi blks */ - -typedef struct xfs_da_blkinfo { -	__be32		forw;			/* previous block in list */ -	__be32		back;			/* following block in list */ -	__be16		magic;			/* validity check on block */ -	__be16		pad;			/* unused */ -} xfs_da_blkinfo_t; - -/* - * CRC enabled directory structure types - * - * The headers change size for the additional verification information, but - * otherwise the tree layouts and contents are unchanged. Hence the da btree - * code can use the struct xfs_da_blkinfo for manipulating the tree links and - * magic numbers without modification for both v2 and v3 nodes. - */ -#define XFS_DA3_NODE_MAGIC	0x3ebe	/* magic number: non-leaf blocks */ -#define XFS_ATTR3_LEAF_MAGIC	0x3bee	/* magic number: attribute leaf blks */ -#define	XFS_DIR3_LEAF1_MAGIC	0x3df1	/* magic number: v2 dirlf single blks */ -#define	XFS_DIR3_LEAFN_MAGIC	0x3dff	/* magic number: v2 dirlf multi blks */ - -struct xfs_da3_blkinfo { -	/* -	 * the node link manipulation code relies on the fact that the first -	 * element of this structure is the struct xfs_da_blkinfo so it can -	 * ignore the differences in the rest of the structures. -	 */ -	struct xfs_da_blkinfo	hdr; -	__be32			crc;	/* CRC of block */ -	__be64			blkno;	/* first block of the buffer */ -	__be64			lsn;	/* sequence number of last write */ -	uuid_t			uuid;	/* filesystem we belong to */ -	__be64			owner;	/* inode that owns the block */ +struct xfs_dir_ops; + +/* + * Directory/attribute geometry information. There will be one of these for each + * data fork type, and it will be passed around via the xfs_da_args. Global + * structures will be attached to the xfs_mount. + */ +struct xfs_da_geometry { +	int		blksize;	/* da block size in bytes */ +	int		fsbcount;	/* da block size in filesystem blocks */ +	uint8_t		fsblog;		/* log2 of _filesystem_ block size */ +	uint8_t		blklog;		/* log2 of da block size */ +	uint		node_ents;	/* # of entries in a danode */ +	int		magicpct;	/* 37% of block size in bytes */ +	xfs_dablk_t	datablk;	/* blockno of dir data v2 */ +	xfs_dablk_t	leafblk;	/* blockno of leaf data v2 */ +	xfs_dablk_t	freeblk;	/* blockno of free data v2 */  }; -/* - * This is the structure of the root and intermediate nodes in the Btree. - * The leaf nodes are defined above. - * - * Entries are not packed. - * - * Since we have duplicate keys, use a binary search but always follow - * all match in the block, not just the first match found. - */ -#define	XFS_DA_NODE_MAXDEPTH	5	/* max depth of Btree */ - -typedef struct xfs_da_node_hdr { -	struct xfs_da_blkinfo	info;	/* block type, links, etc. */ -	__be16			__count; /* count of active entries */ -	__be16			__level; /* level above leaves (leaf == 0) */ -} xfs_da_node_hdr_t; - -struct xfs_da3_node_hdr { -	struct xfs_da3_blkinfo	info;	/* block type, links, etc. */ -	__be16			__count; /* count of active entries */ -	__be16			__level; /* level above leaves (leaf == 0) */ -	__be32			__pad32; -}; - -#define XFS_DA3_NODE_CRC_OFF	(offsetof(struct xfs_da3_node_hdr, info.crc)) - -typedef struct xfs_da_node_entry { -	__be32	hashval;	/* hash value for this descendant */ -	__be32	before;		/* Btree block before this key */ -} xfs_da_node_entry_t; - -typedef struct xfs_da_intnode { -	struct xfs_da_node_hdr	hdr; -	struct xfs_da_node_entry __btree[]; -} xfs_da_intnode_t; - -struct xfs_da3_intnode { -	struct xfs_da3_node_hdr	hdr; -	struct xfs_da_node_entry __btree[]; -}; - -/* - * In-core version of the node header to abstract the differences in the v2 and - * v3 disk format of the headers. Callers need to convert to/from disk format as - * appropriate. - */ -struct xfs_da3_icnode_hdr { -	__uint32_t	forw; -	__uint32_t	back; -	__uint16_t	magic; -	__uint16_t	count; -	__uint16_t	level; -}; - -extern void xfs_da3_node_hdr_from_disk(struct xfs_da3_icnode_hdr *to, -				       struct xfs_da_intnode *from); -extern void xfs_da3_node_hdr_to_disk(struct xfs_da_intnode *to, -				     struct xfs_da3_icnode_hdr *from); - -static inline int -__xfs_da3_node_hdr_size(bool v3) -{ -	if (v3) -		return sizeof(struct xfs_da3_node_hdr); -	return sizeof(struct xfs_da_node_hdr); -} -static inline int -xfs_da3_node_hdr_size(struct xfs_da_intnode *dap) -{ -	bool	v3 = dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC); - -	return __xfs_da3_node_hdr_size(v3); -} - -static inline struct xfs_da_node_entry * -xfs_da3_node_tree_p(struct xfs_da_intnode *dap) -{ -	if (dap->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)) { -		struct xfs_da3_intnode *dap3 = (struct xfs_da3_intnode *)dap; -		return dap3->__btree; -	} -	return dap->__btree; -} - -extern void xfs_da3_intnode_from_disk(struct xfs_da3_icnode_hdr *to, -				      struct xfs_da_intnode *from); -extern void xfs_da3_intnode_to_disk(struct xfs_da_intnode *to, -				    struct xfs_da3_icnode_hdr *from); - -#define	XFS_LBSIZE(mp)	(mp)->m_sb.sb_blocksize -  /*========================================================================   * Btree searching and modification structure definitions.   *========================================================================*/ @@ -181,6 +59,7 @@ enum xfs_dacmp {   * Structure to ease passing around component names.   */  typedef struct xfs_da_args { +	struct xfs_da_geometry *geo;	/* da block geometry */  	const __uint8_t	*name;		/* string (maybe not NULL terminated) */  	int		namelen;	/* length of string (maybe no NULL) */  	__uint8_t	filetype;	/* filetype of inode for directories */ @@ -199,10 +78,12 @@ typedef struct xfs_da_args {  	int		index;		/* index of attr of interest in blk */  	xfs_dablk_t	rmtblkno;	/* remote attr value starting blkno */  	int		rmtblkcnt;	/* remote attr value block count */ +	int		rmtvaluelen;	/* remote attr value length in bytes */  	xfs_dablk_t	blkno2;		/* blkno of 2nd attr leaf of interest */  	int		index2;		/* index of 2nd attr in blk */  	xfs_dablk_t	rmtblkno2;	/* remote attr value starting blkno */  	int		rmtblkcnt2;	/* remote attr value block count */ +	int		rmtvaluelen2;	/* remote attr value length in bytes */  	int		op_flags;	/* operation flags */  	enum xfs_dacmp	cmpresult;	/* name compare result for lookups */  } xfs_da_args_t; @@ -247,8 +128,6 @@ typedef struct xfs_da_state_path {  typedef struct xfs_da_state {  	xfs_da_args_t		*args;		/* filename arguments */  	struct xfs_mount	*mp;		/* filesystem mount point */ -	unsigned int		blocksize;	/* logical block size */ -	unsigned int		node_ents;	/* how many entries in danode */  	xfs_da_state_path_t	path;		/* search/split paths */  	xfs_da_state_path_t	altpath;	/* alternate path for join */  	unsigned char		inleaf;		/* insert into 1->lf, 0->splf */ @@ -309,8 +188,6 @@ int	xfs_da3_node_read(struct xfs_trans *tp, struct xfs_inode *dp,  			 xfs_dablk_t bno, xfs_daddr_t mappedbno,  			 struct xfs_buf **bpp, int which_fork); -extern const struct xfs_buf_ops xfs_da3_node_buf_ops; -  /*   * Utility routines.   */ @@ -324,9 +201,9 @@ int	xfs_da_read_buf(struct xfs_trans *trans, struct xfs_inode *dp,  			       xfs_dablk_t bno, xfs_daddr_t mappedbno,  			       struct xfs_buf **bpp, int whichfork,  			       const struct xfs_buf_ops *ops); -xfs_daddr_t	xfs_da_reada_buf(struct xfs_trans *trans, struct xfs_inode *dp, -				xfs_dablk_t bno, xfs_daddr_t mapped_bno, -				int whichfork, const struct xfs_buf_ops *ops); +xfs_daddr_t	xfs_da_reada_buf(struct xfs_inode *dp, xfs_dablk_t bno, +				xfs_daddr_t mapped_bno, int whichfork, +				const struct xfs_buf_ops *ops);  int	xfs_da_shrink_inode(xfs_da_args_t *args, xfs_dablk_t dead_blkno,  					  struct xfs_buf *dead_buf); diff --git a/fs/xfs/xfs_da_format.c b/fs/xfs/xfs_da_format.c new file mode 100644 index 00000000000..c9aee52a37e --- /dev/null +++ b/fs/xfs/xfs_da_format.c @@ -0,0 +1,911 @@ +/* + * Copyright (c) 2000,2002,2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_inode.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" + +/* + * Shortform directory ops + */ +static int +xfs_dir2_sf_entsize( +	struct xfs_dir2_sf_hdr	*hdr, +	int			len) +{ +	int count = sizeof(struct xfs_dir2_sf_entry);	/* namelen + offset */ + +	count += len;					/* name */ +	count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : +				sizeof(xfs_dir2_ino4_t); /* ino # */ +	return count; +} + +static int +xfs_dir3_sf_entsize( +	struct xfs_dir2_sf_hdr	*hdr, +	int			len) +{ +	return xfs_dir2_sf_entsize(hdr, len) + sizeof(__uint8_t); +} + +static struct xfs_dir2_sf_entry * +xfs_dir2_sf_nextentry( +	struct xfs_dir2_sf_hdr	*hdr, +	struct xfs_dir2_sf_entry *sfep) +{ +	return (struct xfs_dir2_sf_entry *) +		((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen)); +} + +static struct xfs_dir2_sf_entry * +xfs_dir3_sf_nextentry( +	struct xfs_dir2_sf_hdr	*hdr, +	struct xfs_dir2_sf_entry *sfep) +{ +	return (struct xfs_dir2_sf_entry *) +		((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen)); +} + + +/* + * For filetype enabled shortform directories, the file type field is stored at + * the end of the name.  Because it's only a single byte, endian conversion is + * not necessary. For non-filetype enable directories, the type is always + * unknown and we never store the value. + */ +static __uint8_t +xfs_dir2_sfe_get_ftype( +	struct xfs_dir2_sf_entry *sfep) +{ +	return XFS_DIR3_FT_UNKNOWN; +} + +static void +xfs_dir2_sfe_put_ftype( +	struct xfs_dir2_sf_entry *sfep, +	__uint8_t		ftype) +{ +	ASSERT(ftype < XFS_DIR3_FT_MAX); +} + +static __uint8_t +xfs_dir3_sfe_get_ftype( +	struct xfs_dir2_sf_entry *sfep) +{ +	__uint8_t	ftype; + +	ftype = sfep->name[sfep->namelen]; +	if (ftype >= XFS_DIR3_FT_MAX) +		return XFS_DIR3_FT_UNKNOWN; +	return ftype; +} + +static void +xfs_dir3_sfe_put_ftype( +	struct xfs_dir2_sf_entry *sfep, +	__uint8_t		ftype) +{ +	ASSERT(ftype < XFS_DIR3_FT_MAX); + +	sfep->name[sfep->namelen] = ftype; +} + +/* + * Inode numbers in short-form directories can come in two versions, + * either 4 bytes or 8 bytes wide.  These helpers deal with the + * two forms transparently by looking at the headers i8count field. + * + * For 64-bit inode number the most significant byte must be zero. + */ +static xfs_ino_t +xfs_dir2_sf_get_ino( +	struct xfs_dir2_sf_hdr	*hdr, +	xfs_dir2_inou_t		*from) +{ +	if (hdr->i8count) +		return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; +	else +		return get_unaligned_be32(&from->i4.i); +} + +static void +xfs_dir2_sf_put_ino( +	struct xfs_dir2_sf_hdr	*hdr, +	xfs_dir2_inou_t		*to, +	xfs_ino_t		ino) +{ +	ASSERT((ino & 0xff00000000000000ULL) == 0); + +	if (hdr->i8count) +		put_unaligned_be64(ino, &to->i8.i); +	else +		put_unaligned_be32(ino, &to->i4.i); +} + +static xfs_ino_t +xfs_dir2_sf_get_parent_ino( +	struct xfs_dir2_sf_hdr	*hdr) +{ +	return xfs_dir2_sf_get_ino(hdr, &hdr->parent); +} + +static void +xfs_dir2_sf_put_parent_ino( +	struct xfs_dir2_sf_hdr	*hdr, +	xfs_ino_t		ino) +{ +	xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); +} + +/* + * In short-form directory entries the inode numbers are stored at variable + * offset behind the entry name. If the entry stores a filetype value, then it + * sits between the name and the inode number. Hence the inode numbers may only + * be accessed through the helpers below. + */ +static xfs_ino_t +xfs_dir2_sfe_get_ino( +	struct xfs_dir2_sf_hdr	*hdr, +	struct xfs_dir2_sf_entry *sfep) +{ +	return xfs_dir2_sf_get_ino(hdr, +				(xfs_dir2_inou_t *)&sfep->name[sfep->namelen]); +} + +static void +xfs_dir2_sfe_put_ino( +	struct xfs_dir2_sf_hdr	*hdr, +	struct xfs_dir2_sf_entry *sfep, +	xfs_ino_t		ino) +{ +	xfs_dir2_sf_put_ino(hdr, +			    (xfs_dir2_inou_t *)&sfep->name[sfep->namelen], ino); +} + +static xfs_ino_t +xfs_dir3_sfe_get_ino( +	struct xfs_dir2_sf_hdr	*hdr, +	struct xfs_dir2_sf_entry *sfep) +{ +	return xfs_dir2_sf_get_ino(hdr, +			(xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1]); +} + +static void +xfs_dir3_sfe_put_ino( +	struct xfs_dir2_sf_hdr	*hdr, +	struct xfs_dir2_sf_entry *sfep, +	xfs_ino_t		ino) +{ +	xfs_dir2_sf_put_ino(hdr, +			(xfs_dir2_inou_t *)&sfep->name[sfep->namelen + 1], ino); +} + + +/* + * Directory data block operations + */ + +/* + * For special situations, the dirent size ends up fixed because we always know + * what the size of the entry is. That's true for the "." and "..", and + * therefore we know that they are a fixed size and hence their offsets are + * constant, as is the first entry. + * + * Hence, this calculation is written as a macro to be able to be calculated at + * compile time and so certain offsets can be calculated directly in the + * structure initaliser via the macro. There are two macros - one for dirents + * with ftype and without so there are no unresolvable conditionals in the + * calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power + * of 2 and the compiler doesn't reject it (unlike roundup()). + */ +#define XFS_DIR2_DATA_ENTSIZE(n)					\ +	round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) +	\ +		 sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN) + +#define XFS_DIR3_DATA_ENTSIZE(n)					\ +	round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) +	\ +		 sizeof(xfs_dir2_data_off_t) + sizeof(__uint8_t)),	\ +		XFS_DIR2_DATA_ALIGN) + +static int +xfs_dir2_data_entsize( +	int			n) +{ +	return XFS_DIR2_DATA_ENTSIZE(n); +} + +static int +xfs_dir3_data_entsize( +	int			n) +{ +	return XFS_DIR3_DATA_ENTSIZE(n); +} + +static __uint8_t +xfs_dir2_data_get_ftype( +	struct xfs_dir2_data_entry *dep) +{ +	return XFS_DIR3_FT_UNKNOWN; +} + +static void +xfs_dir2_data_put_ftype( +	struct xfs_dir2_data_entry *dep, +	__uint8_t		ftype) +{ +	ASSERT(ftype < XFS_DIR3_FT_MAX); +} + +static __uint8_t +xfs_dir3_data_get_ftype( +	struct xfs_dir2_data_entry *dep) +{ +	__uint8_t	ftype = dep->name[dep->namelen]; + +	ASSERT(ftype < XFS_DIR3_FT_MAX); +	if (ftype >= XFS_DIR3_FT_MAX) +		return XFS_DIR3_FT_UNKNOWN; +	return ftype; +} + +static void +xfs_dir3_data_put_ftype( +	struct xfs_dir2_data_entry *dep, +	__uint8_t		type) +{ +	ASSERT(type < XFS_DIR3_FT_MAX); +	ASSERT(dep->namelen != 0); + +	dep->name[dep->namelen] = type; +} + +/* + * Pointer to an entry's tag word. + */ +static __be16 * +xfs_dir2_data_entry_tag_p( +	struct xfs_dir2_data_entry *dep) +{ +	return (__be16 *)((char *)dep + +		xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16)); +} + +static __be16 * +xfs_dir3_data_entry_tag_p( +	struct xfs_dir2_data_entry *dep) +{ +	return (__be16 *)((char *)dep + +		xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16)); +} + +/* + * location of . and .. in data space (always block 0) + */ +static struct xfs_dir2_data_entry * +xfs_dir2_data_dot_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_dotdot_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR2_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_first_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR2_DATA_ENTSIZE(1) + +				XFS_DIR2_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_ftype_data_dotdot_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir2_ftype_data_first_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1) + +				XFS_DIR3_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_dot_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_dotdot_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_first_entry_p( +	struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir3_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1) + +				XFS_DIR3_DATA_ENTSIZE(2)); +} + +static struct xfs_dir2_data_free * +xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) +{ +	return hdr->bestfree; +} + +static struct xfs_dir2_data_free * +xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) +{ +	return ((struct xfs_dir3_data_hdr *)hdr)->best_free; +} + +static struct xfs_dir2_data_entry * +xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_unused * +xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_unused *) +		((char *)hdr + sizeof(struct xfs_dir2_data_hdr)); +} + +static struct xfs_dir2_data_entry * +xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_entry *) +		((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + +static struct xfs_dir2_data_unused * +xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) +{ +	return (struct xfs_dir2_data_unused *) +		((char *)hdr + sizeof(struct xfs_dir3_data_hdr)); +} + + +/* + * Directory Leaf block operations + */ +static int +xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo) +{ +	return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) / +		(uint)sizeof(struct xfs_dir2_leaf_entry); +} + +static struct xfs_dir2_leaf_entry * +xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp) +{ +	return lp->__ents; +} + +static int +xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo) +{ +	return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) / +		(uint)sizeof(struct xfs_dir2_leaf_entry); +} + +static struct xfs_dir2_leaf_entry * +xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) +{ +	return ((struct xfs_dir3_leaf *)lp)->__ents; +} + +static void +xfs_dir2_leaf_hdr_from_disk( +	struct xfs_dir3_icleaf_hdr	*to, +	struct xfs_dir2_leaf		*from) +{ +	to->forw = be32_to_cpu(from->hdr.info.forw); +	to->back = be32_to_cpu(from->hdr.info.back); +	to->magic = be16_to_cpu(from->hdr.info.magic); +	to->count = be16_to_cpu(from->hdr.count); +	to->stale = be16_to_cpu(from->hdr.stale); + +	ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || +	       to->magic == XFS_DIR2_LEAFN_MAGIC); +} + +static void +xfs_dir2_leaf_hdr_to_disk( +	struct xfs_dir2_leaf		*to, +	struct xfs_dir3_icleaf_hdr	*from) +{ +	ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || +	       from->magic == XFS_DIR2_LEAFN_MAGIC); + +	to->hdr.info.forw = cpu_to_be32(from->forw); +	to->hdr.info.back = cpu_to_be32(from->back); +	to->hdr.info.magic = cpu_to_be16(from->magic); +	to->hdr.count = cpu_to_be16(from->count); +	to->hdr.stale = cpu_to_be16(from->stale); +} + +static void +xfs_dir3_leaf_hdr_from_disk( +	struct xfs_dir3_icleaf_hdr	*to, +	struct xfs_dir2_leaf		*from) +{ +	struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; + +	to->forw = be32_to_cpu(hdr3->info.hdr.forw); +	to->back = be32_to_cpu(hdr3->info.hdr.back); +	to->magic = be16_to_cpu(hdr3->info.hdr.magic); +	to->count = be16_to_cpu(hdr3->count); +	to->stale = be16_to_cpu(hdr3->stale); + +	ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC || +	       to->magic == XFS_DIR3_LEAFN_MAGIC); +} + +static void +xfs_dir3_leaf_hdr_to_disk( +	struct xfs_dir2_leaf		*to, +	struct xfs_dir3_icleaf_hdr	*from) +{ +	struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; + +	ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC || +	       from->magic == XFS_DIR3_LEAFN_MAGIC); + +	hdr3->info.hdr.forw = cpu_to_be32(from->forw); +	hdr3->info.hdr.back = cpu_to_be32(from->back); +	hdr3->info.hdr.magic = cpu_to_be16(from->magic); +	hdr3->count = cpu_to_be16(from->count); +	hdr3->stale = cpu_to_be16(from->stale); +} + + +/* + * Directory/Attribute Node block operations + */ +static struct xfs_da_node_entry * +xfs_da2_node_tree_p(struct xfs_da_intnode *dap) +{ +	return dap->__btree; +} + +static struct xfs_da_node_entry * +xfs_da3_node_tree_p(struct xfs_da_intnode *dap) +{ +	return ((struct xfs_da3_intnode *)dap)->__btree; +} + +static void +xfs_da2_node_hdr_from_disk( +	struct xfs_da3_icnode_hdr	*to, +	struct xfs_da_intnode		*from) +{ +	ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); +	to->forw = be32_to_cpu(from->hdr.info.forw); +	to->back = be32_to_cpu(from->hdr.info.back); +	to->magic = be16_to_cpu(from->hdr.info.magic); +	to->count = be16_to_cpu(from->hdr.__count); +	to->level = be16_to_cpu(from->hdr.__level); +} + +static void +xfs_da2_node_hdr_to_disk( +	struct xfs_da_intnode		*to, +	struct xfs_da3_icnode_hdr	*from) +{ +	ASSERT(from->magic == XFS_DA_NODE_MAGIC); +	to->hdr.info.forw = cpu_to_be32(from->forw); +	to->hdr.info.back = cpu_to_be32(from->back); +	to->hdr.info.magic = cpu_to_be16(from->magic); +	to->hdr.__count = cpu_to_be16(from->count); +	to->hdr.__level = cpu_to_be16(from->level); +} + +static void +xfs_da3_node_hdr_from_disk( +	struct xfs_da3_icnode_hdr	*to, +	struct xfs_da_intnode		*from) +{ +	struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from; + +	ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC)); +	to->forw = be32_to_cpu(hdr3->info.hdr.forw); +	to->back = be32_to_cpu(hdr3->info.hdr.back); +	to->magic = be16_to_cpu(hdr3->info.hdr.magic); +	to->count = be16_to_cpu(hdr3->__count); +	to->level = be16_to_cpu(hdr3->__level); +} + +static void +xfs_da3_node_hdr_to_disk( +	struct xfs_da_intnode		*to, +	struct xfs_da3_icnode_hdr	*from) +{ +	struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to; + +	ASSERT(from->magic == XFS_DA3_NODE_MAGIC); +	hdr3->info.hdr.forw = cpu_to_be32(from->forw); +	hdr3->info.hdr.back = cpu_to_be32(from->back); +	hdr3->info.hdr.magic = cpu_to_be16(from->magic); +	hdr3->__count = cpu_to_be16(from->count); +	hdr3->__level = cpu_to_be16(from->level); +} + + +/* + * Directory free space block operations + */ +static int +xfs_dir2_free_max_bests(struct xfs_da_geometry *geo) +{ +	return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) / +		sizeof(xfs_dir2_data_off_t); +} + +static __be16 * +xfs_dir2_free_bests_p(struct xfs_dir2_free *free) +{ +	return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr)); +} + +/* + * Convert data space db to the corresponding free db. + */ +static xfs_dir2_db_t +xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ +	return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + +			(db / xfs_dir2_free_max_bests(geo)); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static int +xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ +	return db % xfs_dir2_free_max_bests(geo); +} + +static int +xfs_dir3_free_max_bests(struct xfs_da_geometry *geo) +{ +	return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) / +		sizeof(xfs_dir2_data_off_t); +} + +static __be16 * +xfs_dir3_free_bests_p(struct xfs_dir2_free *free) +{ +	return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr)); +} + +/* + * Convert data space db to the corresponding free db. + */ +static xfs_dir2_db_t +xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ +	return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) + +			(db / xfs_dir3_free_max_bests(geo)); +} + +/* + * Convert data space db to the corresponding index in a free db. + */ +static int +xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ +	return db % xfs_dir3_free_max_bests(geo); +} + +static void +xfs_dir2_free_hdr_from_disk( +	struct xfs_dir3_icfree_hdr	*to, +	struct xfs_dir2_free		*from) +{ +	to->magic = be32_to_cpu(from->hdr.magic); +	to->firstdb = be32_to_cpu(from->hdr.firstdb); +	to->nvalid = be32_to_cpu(from->hdr.nvalid); +	to->nused = be32_to_cpu(from->hdr.nused); +	ASSERT(to->magic == XFS_DIR2_FREE_MAGIC); +} + +static void +xfs_dir2_free_hdr_to_disk( +	struct xfs_dir2_free		*to, +	struct xfs_dir3_icfree_hdr	*from) +{ +	ASSERT(from->magic == XFS_DIR2_FREE_MAGIC); + +	to->hdr.magic = cpu_to_be32(from->magic); +	to->hdr.firstdb = cpu_to_be32(from->firstdb); +	to->hdr.nvalid = cpu_to_be32(from->nvalid); +	to->hdr.nused = cpu_to_be32(from->nused); +} + +static void +xfs_dir3_free_hdr_from_disk( +	struct xfs_dir3_icfree_hdr	*to, +	struct xfs_dir2_free		*from) +{ +	struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; + +	to->magic = be32_to_cpu(hdr3->hdr.magic); +	to->firstdb = be32_to_cpu(hdr3->firstdb); +	to->nvalid = be32_to_cpu(hdr3->nvalid); +	to->nused = be32_to_cpu(hdr3->nused); + +	ASSERT(to->magic == XFS_DIR3_FREE_MAGIC); +} + +static void +xfs_dir3_free_hdr_to_disk( +	struct xfs_dir2_free		*to, +	struct xfs_dir3_icfree_hdr	*from) +{ +	struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; + +	ASSERT(from->magic == XFS_DIR3_FREE_MAGIC); + +	hdr3->hdr.magic = cpu_to_be32(from->magic); +	hdr3->firstdb = cpu_to_be32(from->firstdb); +	hdr3->nvalid = cpu_to_be32(from->nvalid); +	hdr3->nused = cpu_to_be32(from->nused); +} + +static const struct xfs_dir_ops xfs_dir2_ops = { +	.sf_entsize = xfs_dir2_sf_entsize, +	.sf_nextentry = xfs_dir2_sf_nextentry, +	.sf_get_ftype = xfs_dir2_sfe_get_ftype, +	.sf_put_ftype = xfs_dir2_sfe_put_ftype, +	.sf_get_ino = xfs_dir2_sfe_get_ino, +	.sf_put_ino = xfs_dir2_sfe_put_ino, +	.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, +	.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + +	.data_entsize = xfs_dir2_data_entsize, +	.data_get_ftype = xfs_dir2_data_get_ftype, +	.data_put_ftype = xfs_dir2_data_put_ftype, +	.data_entry_tag_p = xfs_dir2_data_entry_tag_p, +	.data_bestfree_p = xfs_dir2_data_bestfree_p, + +	.data_dot_offset = sizeof(struct xfs_dir2_data_hdr), +	.data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR2_DATA_ENTSIZE(1), +	.data_first_offset =  sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR2_DATA_ENTSIZE(1) + +				XFS_DIR2_DATA_ENTSIZE(2), +	.data_entry_offset = sizeof(struct xfs_dir2_data_hdr), + +	.data_dot_entry_p = xfs_dir2_data_dot_entry_p, +	.data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p, +	.data_first_entry_p = xfs_dir2_data_first_entry_p, +	.data_entry_p = xfs_dir2_data_entry_p, +	.data_unused_p = xfs_dir2_data_unused_p, + +	.leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), +	.leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, +	.leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, +	.leaf_max_ents = xfs_dir2_max_leaf_ents, +	.leaf_ents_p = xfs_dir2_leaf_ents_p, + +	.node_hdr_size = sizeof(struct xfs_da_node_hdr), +	.node_hdr_to_disk = xfs_da2_node_hdr_to_disk, +	.node_hdr_from_disk = xfs_da2_node_hdr_from_disk, +	.node_tree_p = xfs_da2_node_tree_p, + +	.free_hdr_size = sizeof(struct xfs_dir2_free_hdr), +	.free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, +	.free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, +	.free_max_bests = xfs_dir2_free_max_bests, +	.free_bests_p = xfs_dir2_free_bests_p, +	.db_to_fdb = xfs_dir2_db_to_fdb, +	.db_to_fdindex = xfs_dir2_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir2_ftype_ops = { +	.sf_entsize = xfs_dir3_sf_entsize, +	.sf_nextentry = xfs_dir3_sf_nextentry, +	.sf_get_ftype = xfs_dir3_sfe_get_ftype, +	.sf_put_ftype = xfs_dir3_sfe_put_ftype, +	.sf_get_ino = xfs_dir3_sfe_get_ino, +	.sf_put_ino = xfs_dir3_sfe_put_ino, +	.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, +	.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + +	.data_entsize = xfs_dir3_data_entsize, +	.data_get_ftype = xfs_dir3_data_get_ftype, +	.data_put_ftype = xfs_dir3_data_put_ftype, +	.data_entry_tag_p = xfs_dir3_data_entry_tag_p, +	.data_bestfree_p = xfs_dir2_data_bestfree_p, + +	.data_dot_offset = sizeof(struct xfs_dir2_data_hdr), +	.data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1), +	.data_first_offset =  sizeof(struct xfs_dir2_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1) + +				XFS_DIR3_DATA_ENTSIZE(2), +	.data_entry_offset = sizeof(struct xfs_dir2_data_hdr), + +	.data_dot_entry_p = xfs_dir2_data_dot_entry_p, +	.data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p, +	.data_first_entry_p = xfs_dir2_ftype_data_first_entry_p, +	.data_entry_p = xfs_dir2_data_entry_p, +	.data_unused_p = xfs_dir2_data_unused_p, + +	.leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr), +	.leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk, +	.leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk, +	.leaf_max_ents = xfs_dir2_max_leaf_ents, +	.leaf_ents_p = xfs_dir2_leaf_ents_p, + +	.node_hdr_size = sizeof(struct xfs_da_node_hdr), +	.node_hdr_to_disk = xfs_da2_node_hdr_to_disk, +	.node_hdr_from_disk = xfs_da2_node_hdr_from_disk, +	.node_tree_p = xfs_da2_node_tree_p, + +	.free_hdr_size = sizeof(struct xfs_dir2_free_hdr), +	.free_hdr_to_disk = xfs_dir2_free_hdr_to_disk, +	.free_hdr_from_disk = xfs_dir2_free_hdr_from_disk, +	.free_max_bests = xfs_dir2_free_max_bests, +	.free_bests_p = xfs_dir2_free_bests_p, +	.db_to_fdb = xfs_dir2_db_to_fdb, +	.db_to_fdindex = xfs_dir2_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir3_ops = { +	.sf_entsize = xfs_dir3_sf_entsize, +	.sf_nextentry = xfs_dir3_sf_nextentry, +	.sf_get_ftype = xfs_dir3_sfe_get_ftype, +	.sf_put_ftype = xfs_dir3_sfe_put_ftype, +	.sf_get_ino = xfs_dir3_sfe_get_ino, +	.sf_put_ino = xfs_dir3_sfe_put_ino, +	.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino, +	.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino, + +	.data_entsize = xfs_dir3_data_entsize, +	.data_get_ftype = xfs_dir3_data_get_ftype, +	.data_put_ftype = xfs_dir3_data_put_ftype, +	.data_entry_tag_p = xfs_dir3_data_entry_tag_p, +	.data_bestfree_p = xfs_dir3_data_bestfree_p, + +	.data_dot_offset = sizeof(struct xfs_dir3_data_hdr), +	.data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1), +	.data_first_offset =  sizeof(struct xfs_dir3_data_hdr) + +				XFS_DIR3_DATA_ENTSIZE(1) + +				XFS_DIR3_DATA_ENTSIZE(2), +	.data_entry_offset = sizeof(struct xfs_dir3_data_hdr), + +	.data_dot_entry_p = xfs_dir3_data_dot_entry_p, +	.data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p, +	.data_first_entry_p = xfs_dir3_data_first_entry_p, +	.data_entry_p = xfs_dir3_data_entry_p, +	.data_unused_p = xfs_dir3_data_unused_p, + +	.leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr), +	.leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk, +	.leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk, +	.leaf_max_ents = xfs_dir3_max_leaf_ents, +	.leaf_ents_p = xfs_dir3_leaf_ents_p, + +	.node_hdr_size = sizeof(struct xfs_da3_node_hdr), +	.node_hdr_to_disk = xfs_da3_node_hdr_to_disk, +	.node_hdr_from_disk = xfs_da3_node_hdr_from_disk, +	.node_tree_p = xfs_da3_node_tree_p, + +	.free_hdr_size = sizeof(struct xfs_dir3_free_hdr), +	.free_hdr_to_disk = xfs_dir3_free_hdr_to_disk, +	.free_hdr_from_disk = xfs_dir3_free_hdr_from_disk, +	.free_max_bests = xfs_dir3_free_max_bests, +	.free_bests_p = xfs_dir3_free_bests_p, +	.db_to_fdb = xfs_dir3_db_to_fdb, +	.db_to_fdindex = xfs_dir3_db_to_fdindex, +}; + +static const struct xfs_dir_ops xfs_dir2_nondir_ops = { +	.node_hdr_size = sizeof(struct xfs_da_node_hdr), +	.node_hdr_to_disk = xfs_da2_node_hdr_to_disk, +	.node_hdr_from_disk = xfs_da2_node_hdr_from_disk, +	.node_tree_p = xfs_da2_node_tree_p, +}; + +static const struct xfs_dir_ops xfs_dir3_nondir_ops = { +	.node_hdr_size = sizeof(struct xfs_da3_node_hdr), +	.node_hdr_to_disk = xfs_da3_node_hdr_to_disk, +	.node_hdr_from_disk = xfs_da3_node_hdr_from_disk, +	.node_tree_p = xfs_da3_node_tree_p, +}; + +/* + * Return the ops structure according to the current config.  If we are passed + * an inode, then that overrides the default config we use which is based on + * feature bits. + */ +const struct xfs_dir_ops * +xfs_dir_get_ops( +	struct xfs_mount	*mp, +	struct xfs_inode	*dp) +{ +	if (dp) +		return dp->d_ops; +	if (mp->m_dir_inode_ops) +		return mp->m_dir_inode_ops; +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		return &xfs_dir3_ops; +	if (xfs_sb_version_hasftype(&mp->m_sb)) +		return &xfs_dir2_ftype_ops; +	return &xfs_dir2_ops; +} + +const struct xfs_dir_ops * +xfs_nondir_get_ops( +	struct xfs_mount	*mp, +	struct xfs_inode	*dp) +{ +	if (dp) +		return dp->d_ops; +	if (mp->m_nondir_inode_ops) +		return mp->m_nondir_inode_ops; +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		return &xfs_dir3_nondir_ops; +	return &xfs_dir2_nondir_ops; +} diff --git a/fs/xfs/xfs_dir2_format.h b/fs/xfs/xfs_da_format.h index a0961a61ac1..0a49b028637 100644 --- a/fs/xfs/xfs_dir2_format.h +++ b/fs/xfs/xfs_da_format.h @@ -16,8 +16,107 @@   * along with this program; if not, write the Free Software Foundation,   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */ -#ifndef __XFS_DIR2_FORMAT_H__ -#define __XFS_DIR2_FORMAT_H__ +#ifndef __XFS_DA_FORMAT_H__ +#define __XFS_DA_FORMAT_H__ + +/* + * This structure is common to both leaf nodes and non-leaf nodes in the Btree. + * + * It is used to manage a doubly linked list of all blocks at the same + * level in the Btree, and to identify which type of block this is. + */ +#define XFS_DA_NODE_MAGIC	0xfebe	/* magic number: non-leaf blocks */ +#define XFS_ATTR_LEAF_MAGIC	0xfbee	/* magic number: attribute leaf blks */ +#define	XFS_DIR2_LEAF1_MAGIC	0xd2f1	/* magic number: v2 dirlf single blks */ +#define	XFS_DIR2_LEAFN_MAGIC	0xd2ff	/* magic number: v2 dirlf multi blks */ + +typedef struct xfs_da_blkinfo { +	__be32		forw;			/* previous block in list */ +	__be32		back;			/* following block in list */ +	__be16		magic;			/* validity check on block */ +	__be16		pad;			/* unused */ +} xfs_da_blkinfo_t; + +/* + * CRC enabled directory structure types + * + * The headers change size for the additional verification information, but + * otherwise the tree layouts and contents are unchanged. Hence the da btree + * code can use the struct xfs_da_blkinfo for manipulating the tree links and + * magic numbers without modification for both v2 and v3 nodes. + */ +#define XFS_DA3_NODE_MAGIC	0x3ebe	/* magic number: non-leaf blocks */ +#define XFS_ATTR3_LEAF_MAGIC	0x3bee	/* magic number: attribute leaf blks */ +#define	XFS_DIR3_LEAF1_MAGIC	0x3df1	/* magic number: v2 dirlf single blks */ +#define	XFS_DIR3_LEAFN_MAGIC	0x3dff	/* magic number: v2 dirlf multi blks */ + +struct xfs_da3_blkinfo { +	/* +	 * the node link manipulation code relies on the fact that the first +	 * element of this structure is the struct xfs_da_blkinfo so it can +	 * ignore the differences in the rest of the structures. +	 */ +	struct xfs_da_blkinfo	hdr; +	__be32			crc;	/* CRC of block */ +	__be64			blkno;	/* first block of the buffer */ +	__be64			lsn;	/* sequence number of last write */ +	uuid_t			uuid;	/* filesystem we belong to */ +	__be64			owner;	/* inode that owns the block */ +}; + +/* + * This is the structure of the root and intermediate nodes in the Btree. + * The leaf nodes are defined above. + * + * Entries are not packed. + * + * Since we have duplicate keys, use a binary search but always follow + * all match in the block, not just the first match found. + */ +#define	XFS_DA_NODE_MAXDEPTH	5	/* max depth of Btree */ + +typedef struct xfs_da_node_hdr { +	struct xfs_da_blkinfo	info;	/* block type, links, etc. */ +	__be16			__count; /* count of active entries */ +	__be16			__level; /* level above leaves (leaf == 0) */ +} xfs_da_node_hdr_t; + +struct xfs_da3_node_hdr { +	struct xfs_da3_blkinfo	info;	/* block type, links, etc. */ +	__be16			__count; /* count of active entries */ +	__be16			__level; /* level above leaves (leaf == 0) */ +	__be32			__pad32; +}; + +#define XFS_DA3_NODE_CRC_OFF	(offsetof(struct xfs_da3_node_hdr, info.crc)) + +typedef struct xfs_da_node_entry { +	__be32	hashval;	/* hash value for this descendant */ +	__be32	before;		/* Btree block before this key */ +} xfs_da_node_entry_t; + +typedef struct xfs_da_intnode { +	struct xfs_da_node_hdr	hdr; +	struct xfs_da_node_entry __btree[]; +} xfs_da_intnode_t; + +struct xfs_da3_intnode { +	struct xfs_da3_node_hdr	hdr; +	struct xfs_da_node_entry __btree[]; +}; + +/* + * In-core version of the node header to abstract the differences in the v2 and + * v3 disk format of the headers. Callers need to convert to/from disk format as + * appropriate. + */ +struct xfs_da3_icnode_hdr { +	__uint32_t	forw; +	__uint32_t	back; +	__uint16_t	magic; +	__uint16_t	count; +	__uint16_t	level; +};  /*   * Directory version 2. @@ -189,79 +288,6 @@ xfs_dir2_sf_firstentry(struct xfs_dir2_sf_hdr *hdr)  		((char *)hdr + xfs_dir2_sf_hdr_size(hdr->i8count));  } -static inline int -xfs_dir3_sf_entsize( -	struct xfs_mount	*mp, -	struct xfs_dir2_sf_hdr	*hdr, -	int			len) -{ -	int count = sizeof(struct xfs_dir2_sf_entry); 	/* namelen + offset */ - -	count += len;					/* name */ -	count += hdr->i8count ? sizeof(xfs_dir2_ino8_t) : -				sizeof(xfs_dir2_ino4_t); /* ino # */ -	if (xfs_sb_version_hasftype(&mp->m_sb)) -		count += sizeof(__uint8_t);		/* file type */ -	return count; -} - -static inline struct xfs_dir2_sf_entry * -xfs_dir3_sf_nextentry( -	struct xfs_mount	*mp, -	struct xfs_dir2_sf_hdr	*hdr, -	struct xfs_dir2_sf_entry *sfep) -{ -	return (struct xfs_dir2_sf_entry *) -		((char *)sfep + xfs_dir3_sf_entsize(mp, hdr, sfep->namelen)); -} - -/* - * in dir3 shortform directories, the file type field is stored at a variable - * offset after the inode number. Because it's only a single byte, endian - * conversion is not necessary. - */ -static inline __uint8_t * -xfs_dir3_sfe_ftypep( -	struct xfs_dir2_sf_hdr	*hdr, -	struct xfs_dir2_sf_entry *sfep) -{ -	return (__uint8_t *)&sfep->name[sfep->namelen]; -} - -static inline __uint8_t -xfs_dir3_sfe_get_ftype( -	struct xfs_mount	*mp, -	struct xfs_dir2_sf_hdr	*hdr, -	struct xfs_dir2_sf_entry *sfep) -{ -	__uint8_t	*ftp; - -	if (!xfs_sb_version_hasftype(&mp->m_sb)) -		return XFS_DIR3_FT_UNKNOWN; - -	ftp = xfs_dir3_sfe_ftypep(hdr, sfep); -	if (*ftp >= XFS_DIR3_FT_MAX) -		return XFS_DIR3_FT_UNKNOWN; -	return *ftp; -} - -static inline void -xfs_dir3_sfe_put_ftype( -	struct xfs_mount	*mp, -	struct xfs_dir2_sf_hdr	*hdr, -	struct xfs_dir2_sf_entry *sfep, -	__uint8_t		ftype) -{ -	__uint8_t	*ftp; - -	ASSERT(ftype < XFS_DIR3_FT_MAX); - -	if (!xfs_sb_version_hasftype(&mp->m_sb)) -		return; -	ftp = xfs_dir3_sfe_ftypep(hdr, sfep); -	*ftp = ftype; -} -  /*   * Data block structures.   * @@ -298,8 +324,6 @@ xfs_dir3_sfe_put_ftype(  #define	XFS_DIR2_SPACE_SIZE	(1ULL << (32 + XFS_DIR2_DATA_ALIGN_LOG))  #define	XFS_DIR2_DATA_SPACE	0  #define	XFS_DIR2_DATA_OFFSET	(XFS_DIR2_DATA_SPACE * XFS_DIR2_SPACE_SIZE) -#define	XFS_DIR2_DATA_FIRSTDB(mp)	\ -	xfs_dir2_byte_to_db(mp, XFS_DIR2_DATA_OFFSET)  /*   * Describe a free area in the data block. @@ -345,17 +369,6 @@ struct xfs_dir3_data_hdr {  #define XFS_DIR3_DATA_CRC_OFF  offsetof(struct xfs_dir3_data_hdr, hdr.crc) -static inline struct xfs_dir2_data_free * -xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr) -{ -	if (hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || -	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { -		struct xfs_dir3_data_hdr *hdr3 = (struct xfs_dir3_data_hdr *)hdr; -		return hdr3->best_free; -	} -	return hdr->bestfree; -} -  /*   * Active entry in a data block.   * @@ -389,72 +402,6 @@ typedef struct xfs_dir2_data_unused {  } xfs_dir2_data_unused_t;  /* - * Size of a data entry. - */ -static inline int -__xfs_dir3_data_entsize( -	bool	ftype, -	int	n) -{ -	int	size = offsetof(struct xfs_dir2_data_entry, name[0]); - -	size += n; -	size += sizeof(xfs_dir2_data_off_t); -	if (ftype) -		size += sizeof(__uint8_t); -	return roundup(size, XFS_DIR2_DATA_ALIGN); -} -static inline int -xfs_dir3_data_entsize( -	struct xfs_mount	*mp, -	int			n) -{ -	bool ftype = xfs_sb_version_hasftype(&mp->m_sb) ? true : false; -	return __xfs_dir3_data_entsize(ftype, n); -} - -static inline __uint8_t -xfs_dir3_dirent_get_ftype( -	struct xfs_mount	*mp, -	struct xfs_dir2_data_entry *dep) -{ -	if (xfs_sb_version_hasftype(&mp->m_sb)) { -		__uint8_t	type = dep->name[dep->namelen]; - -		ASSERT(type < XFS_DIR3_FT_MAX); -		if (type < XFS_DIR3_FT_MAX) -			return type; - -	} -	return XFS_DIR3_FT_UNKNOWN; -} - -static inline void -xfs_dir3_dirent_put_ftype( -	struct xfs_mount	*mp, -	struct xfs_dir2_data_entry *dep, -	__uint8_t		type) -{ -	ASSERT(type < XFS_DIR3_FT_MAX); -	ASSERT(dep->namelen != 0); - -	if (xfs_sb_version_hasftype(&mp->m_sb)) -		dep->name[dep->namelen] = type; -} - -/* - * Pointer to an entry's tag word. - */ -static inline __be16 * -xfs_dir3_data_entry_tag_p( -	struct xfs_mount	*mp, -	struct xfs_dir2_data_entry *dep) -{ -	return (__be16 *)((char *)dep + -		xfs_dir3_data_entsize(mp, dep->namelen) - sizeof(__be16)); -} - -/*   * Pointer to a freespace's tag word.   */  static inline __be16 * @@ -464,104 +411,6 @@ xfs_dir2_data_unused_tag_p(struct xfs_dir2_data_unused *dup)  			be16_to_cpu(dup->length) - sizeof(__be16));  } -static inline size_t -xfs_dir3_data_hdr_size(bool dir3) -{ -	if (dir3) -		return sizeof(struct xfs_dir3_data_hdr); -	return sizeof(struct xfs_dir2_data_hdr); -} - -static inline size_t -xfs_dir3_data_entry_offset(struct xfs_dir2_data_hdr *hdr) -{ -	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || -		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); -	return xfs_dir3_data_hdr_size(dir3); -} - -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr) -{ -	return (struct xfs_dir2_data_entry *) -		((char *)hdr + xfs_dir3_data_entry_offset(hdr)); -} - -static inline struct xfs_dir2_data_unused * -xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr) -{ -	return (struct xfs_dir2_data_unused *) -		((char *)hdr + xfs_dir3_data_entry_offset(hdr)); -} - -/* - * Offsets of . and .. in data space (always block 0) - * - * The macros are used for shortform directories as they have no headers to read - * the magic number out of. Shortform directories need to know the size of the - * data block header because the sfe embeds the block offset of the entry into - * it so that it doesn't change when format conversion occurs. Bad Things Happen - * if we don't follow this rule. - * - * XXX: there is scope for significant optimisation of the logic here. Right - * now we are checking for "dir3 format" over and over again. Ideally we should - * only do it once for each operation. - */ -#define	XFS_DIR3_DATA_DOT_OFFSET(mp)	\ -	xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&(mp)->m_sb)) -#define	XFS_DIR3_DATA_DOTDOT_OFFSET(mp)	\ -	(XFS_DIR3_DATA_DOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 1)) -#define	XFS_DIR3_DATA_FIRST_OFFSET(mp)		\ -	(XFS_DIR3_DATA_DOTDOT_OFFSET(mp) + xfs_dir3_data_entsize(mp, 2)) - -static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dot_offset(struct xfs_dir2_data_hdr *hdr) -{ -	return xfs_dir3_data_entry_offset(hdr); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir3_data_dotdot_offset(struct xfs_dir2_data_hdr *hdr) -{ -	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || -		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); -	return xfs_dir3_data_dot_offset(hdr) + -		__xfs_dir3_data_entsize(dir3, 1); -} - -static inline xfs_dir2_data_aoff_t -xfs_dir3_data_first_offset(struct xfs_dir2_data_hdr *hdr) -{ -	bool dir3 = hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || -		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); -	return xfs_dir3_data_dotdot_offset(hdr) + -		__xfs_dir3_data_entsize(dir3, 2); -} - -/* - * location of . and .. in data space (always block 0) - */ -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dot_entry_p(struct xfs_dir2_data_hdr *hdr) -{ -	return (struct xfs_dir2_data_entry *) -		((char *)hdr + xfs_dir3_data_dot_offset(hdr)); -} - -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_dotdot_entry_p(struct xfs_dir2_data_hdr *hdr) -{ -	return (struct xfs_dir2_data_entry *) -		((char *)hdr + xfs_dir3_data_dotdot_offset(hdr)); -} - -static inline struct xfs_dir2_data_entry * -xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr) -{ -	return (struct xfs_dir2_data_entry *) -		((char *)hdr + xfs_dir3_data_first_offset(hdr)); -} -  /*   * Leaf block structures.   * @@ -599,8 +448,6 @@ xfs_dir3_data_first_entry_p(struct xfs_dir2_data_hdr *hdr)   */  #define	XFS_DIR2_LEAF_SPACE	1  #define	XFS_DIR2_LEAF_OFFSET	(XFS_DIR2_LEAF_SPACE * XFS_DIR2_SPACE_SIZE) -#define	XFS_DIR2_LEAF_FIRSTDB(mp)	\ -	xfs_dir2_byte_to_db(mp, XFS_DIR2_LEAF_OFFSET)  /*   * Leaf block header. @@ -656,50 +503,6 @@ struct xfs_dir3_leaf {  #define XFS_DIR3_LEAF_CRC_OFF  offsetof(struct xfs_dir3_leaf_hdr, info.crc) -extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to, -					struct xfs_dir2_leaf *from); - -static inline int -xfs_dir3_leaf_hdr_size(struct xfs_dir2_leaf *lp) -{ -	if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || -	    lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) -		return sizeof(struct xfs_dir3_leaf_hdr); -	return sizeof(struct xfs_dir2_leaf_hdr); -} - -static inline int -xfs_dir3_max_leaf_ents(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) -{ -	return (mp->m_dirblksize - xfs_dir3_leaf_hdr_size(lp)) / -		(uint)sizeof(struct xfs_dir2_leaf_entry); -} - -/* - * Get address of the bestcount field in the single-leaf block. - */ -static inline struct xfs_dir2_leaf_entry * -xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp) -{ -	if (lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) || -	    lp->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)) { -		struct xfs_dir3_leaf *lp3 = (struct xfs_dir3_leaf *)lp; -		return lp3->__ents; -	} -	return lp->__ents; -} - -/* - * Get address of the bestcount field in the single-leaf block. - */ -static inline struct xfs_dir2_leaf_tail * -xfs_dir2_leaf_tail_p(struct xfs_mount *mp, struct xfs_dir2_leaf *lp) -{ -	return (struct xfs_dir2_leaf_tail *) -		((char *)lp + mp->m_dirblksize - -		  sizeof(struct xfs_dir2_leaf_tail)); -} -  /*   * Get address of the bests array in the single-leaf block.   */ @@ -710,123 +513,6 @@ xfs_dir2_leaf_bests_p(struct xfs_dir2_leaf_tail *ltp)  }  /* - * DB blocks here are logical directory block numbers, not filesystem blocks. - */ - -/* - * Convert dataptr to byte in file space - */ -static inline xfs_dir2_off_t -xfs_dir2_dataptr_to_byte(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ -	return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; -} - -/* - * Convert byte in file space to dataptr.  It had better be aligned. - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_byte_to_dataptr(struct xfs_mount *mp, xfs_dir2_off_t by) -{ -	return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); -} - -/* - * Convert byte in space to (DB) block - */ -static inline xfs_dir2_db_t -xfs_dir2_byte_to_db(struct xfs_mount *mp, xfs_dir2_off_t by) -{ -	return (xfs_dir2_db_t) -		(by >> (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)); -} - -/* - * Convert dataptr to a block number - */ -static inline xfs_dir2_db_t -xfs_dir2_dataptr_to_db(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ -	return xfs_dir2_byte_to_db(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert byte in space to offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_byte_to_off(struct xfs_mount *mp, xfs_dir2_off_t by) -{ -	return (xfs_dir2_data_aoff_t)(by & -		((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) - 1)); -} - -/* - * Convert dataptr to a byte offset in a block - */ -static inline xfs_dir2_data_aoff_t -xfs_dir2_dataptr_to_off(struct xfs_mount *mp, xfs_dir2_dataptr_t dp) -{ -	return xfs_dir2_byte_to_off(mp, xfs_dir2_dataptr_to_byte(mp, dp)); -} - -/* - * Convert block and offset to byte in space - */ -static inline xfs_dir2_off_t -xfs_dir2_db_off_to_byte(struct xfs_mount *mp, xfs_dir2_db_t db, -			xfs_dir2_data_aoff_t o) -{ -	return ((xfs_dir2_off_t)db << -		(mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) + o; -} - -/* - * Convert block (DB) to block (dablk) - */ -static inline xfs_dablk_t -xfs_dir2_db_to_da(struct xfs_mount *mp, xfs_dir2_db_t db) -{ -	return (xfs_dablk_t)(db << mp->m_sb.sb_dirblklog); -} - -/* - * Convert byte in space to (DA) block - */ -static inline xfs_dablk_t -xfs_dir2_byte_to_da(struct xfs_mount *mp, xfs_dir2_off_t by) -{ -	return xfs_dir2_db_to_da(mp, xfs_dir2_byte_to_db(mp, by)); -} - -/* - * Convert block and offset to dataptr - */ -static inline xfs_dir2_dataptr_t -xfs_dir2_db_off_to_dataptr(struct xfs_mount *mp, xfs_dir2_db_t db, -			   xfs_dir2_data_aoff_t o) -{ -	return xfs_dir2_byte_to_dataptr(mp, xfs_dir2_db_off_to_byte(mp, db, o)); -} - -/* - * Convert block (dablk) to block (DB) - */ -static inline xfs_dir2_db_t -xfs_dir2_da_to_db(struct xfs_mount *mp, xfs_dablk_t da) -{ -	return (xfs_dir2_db_t)(da >> mp->m_sb.sb_dirblklog); -} - -/* - * Convert block (dablk) to byte offset in space - */ -static inline xfs_dir2_off_t -xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da) -{ -	return xfs_dir2_db_off_to_byte(mp, xfs_dir2_da_to_db(mp, da), 0); -} - -/*   * Free space block defintions for the node format.   */ @@ -835,8 +521,6 @@ xfs_dir2_da_to_byte(struct xfs_mount *mp, xfs_dablk_t da)   */  #define	XFS_DIR2_FREE_SPACE	2  #define	XFS_DIR2_FREE_OFFSET	(XFS_DIR2_FREE_SPACE * XFS_DIR2_SPACE_SIZE) -#define	XFS_DIR2_FREE_FIRSTDB(mp)	\ -	xfs_dir2_byte_to_db(mp, XFS_DIR2_FREE_OFFSET)  typedef	struct xfs_dir2_free_hdr {  	__be32			magic;		/* XFS_DIR2_FREE_MAGIC */ @@ -880,48 +564,6 @@ struct xfs_dir3_icfree_hdr {  }; -void xfs_dir3_free_hdr_from_disk(struct xfs_dir3_icfree_hdr *to, -				 struct xfs_dir2_free *from); - -static inline int -xfs_dir3_free_hdr_size(struct xfs_mount *mp) -{ -	if (xfs_sb_version_hascrc(&mp->m_sb)) -		return sizeof(struct xfs_dir3_free_hdr); -	return sizeof(struct xfs_dir2_free_hdr); -} - -static inline int -xfs_dir3_free_max_bests(struct xfs_mount *mp) -{ -	return (mp->m_dirblksize - xfs_dir3_free_hdr_size(mp)) / -		sizeof(xfs_dir2_data_off_t); -} - -static inline __be16 * -xfs_dir3_free_bests_p(struct xfs_mount *mp, struct xfs_dir2_free *free) -{ -	return (__be16 *)((char *)free + xfs_dir3_free_hdr_size(mp)); -} - -/* - * Convert data space db to the corresponding free db. - */ -static inline xfs_dir2_db_t -xfs_dir2_db_to_fdb(struct xfs_mount *mp, xfs_dir2_db_t db) -{ -	return XFS_DIR2_FREE_FIRSTDB(mp) + db / xfs_dir3_free_max_bests(mp); -} - -/* - * Convert data space db to the corresponding index in a free db. - */ -static inline int -xfs_dir2_db_to_fdindex(struct xfs_mount *mp, xfs_dir2_db_t db) -{ -	return db % xfs_dir3_free_max_bests(mp); -} -  /*   * Single block format.   * @@ -954,22 +596,266 @@ typedef struct xfs_dir2_block_tail {  } xfs_dir2_block_tail_t;  /* - * Pointer to the leaf header embedded in a data block (1-block format) + * Pointer to the leaf entries embedded in a data block (1-block format)   */ -static inline struct xfs_dir2_block_tail * -xfs_dir2_block_tail_p(struct xfs_mount *mp, struct xfs_dir2_data_hdr *hdr) +static inline struct xfs_dir2_leaf_entry * +xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)  { -	return ((struct xfs_dir2_block_tail *) -		((char *)hdr + mp->m_dirblksize)) - 1; +	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count);  } +  /* - * Pointer to the leaf entries embedded in a data block (1-block format) + * Attribute storage layout + * + * Attribute lists are structured around Btrees where all the data + * elements are in the leaf nodes.  Attribute names are hashed into an int, + * then that int is used as the index into the Btree.  Since the hashval + * of an attribute name may not be unique, we may have duplicate keys.  The + * internal links in the Btree are logical block offsets into the file. + * + * Struct leaf_entry's are packed from the top.  Name/values grow from the + * bottom but are not packed.  The freemap contains run-length-encoded entries + * for the free bytes after the leaf_entry's, but only the N largest such, + * smaller runs are dropped.  When the freemap doesn't show enough space + * for an allocation, we compact the name/value area and try again.  If we + * still don't have enough space, then we have to split the block.  The + * name/value structs (both local and remote versions) must be 32bit aligned. + * + * Since we have duplicate hash keys, for each key that matches, compare + * the actual name string.  The root and intermediate node search always + * takes the first-in-the-block key match found, so we should only have + * to work "forw"ard.  If none matches, continue with the "forw"ard leaf + * nodes until the hash key changes or the attribute name is found. + * + * We store the fact that an attribute is a ROOT/USER/SECURE attribute in + * the leaf_entry.  The namespaces are independent only because we also look + * at the namespace bit when we are looking for a matching attribute name. + * + * We also store an "incomplete" bit in the leaf_entry.  It shows that an + * attribute is in the middle of being created and should not be shown to + * the user if we crash during the time that the bit is set.  We clear the + * bit when we have finished setting up the attribute.  We do this because + * we cannot create some large attributes inside a single transaction, and we + * need some indication that we weren't finished if we crash in the middle. + */ +#define XFS_ATTR_LEAF_MAPSIZE	3	/* how many freespace slots */ + +typedef struct xfs_attr_leaf_map {	/* RLE map of free bytes */ +	__be16	base;			  /* base of free region */ +	__be16	size;			  /* length of free region */ +} xfs_attr_leaf_map_t; + +typedef struct xfs_attr_leaf_hdr {	/* constant-structure header block */ +	xfs_da_blkinfo_t info;		/* block type, links, etc. */ +	__be16	count;			/* count of active leaf_entry's */ +	__be16	usedbytes;		/* num bytes of names/values stored */ +	__be16	firstused;		/* first used byte in name area */ +	__u8	holes;			/* != 0 if blk needs compaction */ +	__u8	pad1; +	xfs_attr_leaf_map_t freemap[XFS_ATTR_LEAF_MAPSIZE]; +					/* N largest free regions */ +} xfs_attr_leaf_hdr_t; + +typedef struct xfs_attr_leaf_entry {	/* sorted on key, not name */ +	__be32	hashval;		/* hash value of name */ +	__be16	nameidx;		/* index into buffer of name/value */ +	__u8	flags;			/* LOCAL/ROOT/SECURE/INCOMPLETE flag */ +	__u8	pad2;			/* unused pad byte */ +} xfs_attr_leaf_entry_t; + +typedef struct xfs_attr_leaf_name_local { +	__be16	valuelen;		/* number of bytes in value */ +	__u8	namelen;		/* length of name bytes */ +	__u8	nameval[1];		/* name/value bytes */ +} xfs_attr_leaf_name_local_t; + +typedef struct xfs_attr_leaf_name_remote { +	__be32	valueblk;		/* block number of value bytes */ +	__be32	valuelen;		/* number of bytes in value */ +	__u8	namelen;		/* length of name bytes */ +	__u8	name[1];		/* name bytes */ +} xfs_attr_leaf_name_remote_t; + +typedef struct xfs_attr_leafblock { +	xfs_attr_leaf_hdr_t	hdr;	/* constant-structure header block */ +	xfs_attr_leaf_entry_t	entries[1];	/* sorted on key, not name */ +	xfs_attr_leaf_name_local_t namelist;	/* grows from bottom of buf */ +	xfs_attr_leaf_name_remote_t valuelist;	/* grows from bottom of buf */ +} xfs_attr_leafblock_t; + +/* + * CRC enabled leaf structures. Called "version 3" structures to match the + * version number of the directory and dablk structures for this feature, and + * attr2 is already taken by the variable inode attribute fork size feature. + */ +struct xfs_attr3_leaf_hdr { +	struct xfs_da3_blkinfo	info; +	__be16			count; +	__be16			usedbytes; +	__be16			firstused; +	__u8			holes; +	__u8			pad1; +	struct xfs_attr_leaf_map freemap[XFS_ATTR_LEAF_MAPSIZE]; +	__be32			pad2;		/* 64 bit alignment */ +}; + +#define XFS_ATTR3_LEAF_CRC_OFF	(offsetof(struct xfs_attr3_leaf_hdr, info.crc)) + +struct xfs_attr3_leafblock { +	struct xfs_attr3_leaf_hdr	hdr; +	struct xfs_attr_leaf_entry	entries[1]; + +	/* +	 * The rest of the block contains the following structures after the +	 * leaf entries, growing from the bottom up. The variables are never +	 * referenced, the locations accessed purely from helper functions. +	 * +	 * struct xfs_attr_leaf_name_local +	 * struct xfs_attr_leaf_name_remote +	 */ +}; + +/* + * incore, neutral version of the attribute leaf header + */ +struct xfs_attr3_icleaf_hdr { +	__uint32_t	forw; +	__uint32_t	back; +	__uint16_t	magic; +	__uint16_t	count; +	__uint16_t	usedbytes; +	__uint16_t	firstused; +	__u8		holes; +	struct { +		__uint16_t	base; +		__uint16_t	size; +	} freemap[XFS_ATTR_LEAF_MAPSIZE]; +}; + +/* + * Flags used in the leaf_entry[i].flags field. + * NOTE: the INCOMPLETE bit must not collide with the flags bits specified + * on the system call, they are "or"ed together for various operations.   */ -static inline struct xfs_dir2_leaf_entry * -xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp) +#define	XFS_ATTR_LOCAL_BIT	0	/* attr is stored locally */ +#define	XFS_ATTR_ROOT_BIT	1	/* limit access to trusted attrs */ +#define	XFS_ATTR_SECURE_BIT	2	/* limit access to secure attrs */ +#define	XFS_ATTR_INCOMPLETE_BIT	7	/* attr in middle of create/delete */ +#define XFS_ATTR_LOCAL		(1 << XFS_ATTR_LOCAL_BIT) +#define XFS_ATTR_ROOT		(1 << XFS_ATTR_ROOT_BIT) +#define XFS_ATTR_SECURE		(1 << XFS_ATTR_SECURE_BIT) +#define XFS_ATTR_INCOMPLETE	(1 << XFS_ATTR_INCOMPLETE_BIT) + +/* + * Conversion macros for converting namespace bits from argument flags + * to ondisk flags. + */ +#define XFS_ATTR_NSP_ARGS_MASK		(ATTR_ROOT | ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK_MASK	(XFS_ATTR_ROOT | XFS_ATTR_SECURE) +#define XFS_ATTR_NSP_ONDISK(flags)	((flags) & XFS_ATTR_NSP_ONDISK_MASK) +#define XFS_ATTR_NSP_ARGS(flags)	((flags) & XFS_ATTR_NSP_ARGS_MASK) +#define XFS_ATTR_NSP_ARGS_TO_ONDISK(x)	(((x) & ATTR_ROOT ? XFS_ATTR_ROOT : 0) |\ +					 ((x) & ATTR_SECURE ? XFS_ATTR_SECURE : 0)) +#define XFS_ATTR_NSP_ONDISK_TO_ARGS(x)	(((x) & XFS_ATTR_ROOT ? ATTR_ROOT : 0) |\ +					 ((x) & XFS_ATTR_SECURE ? ATTR_SECURE : 0)) + +/* + * Alignment for namelist and valuelist entries (since they are mixed + * there can be only one alignment value) + */ +#define	XFS_ATTR_LEAF_NAME_ALIGN	((uint)sizeof(xfs_dablk_t)) + +static inline int +xfs_attr3_leaf_hdr_size(struct xfs_attr_leafblock *leafp)  { -	return ((struct xfs_dir2_leaf_entry *)btp) - be32_to_cpu(btp->count); +	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) +		return sizeof(struct xfs_attr3_leaf_hdr); +	return sizeof(struct xfs_attr_leaf_hdr); +} + +static inline struct xfs_attr_leaf_entry * +xfs_attr3_leaf_entryp(xfs_attr_leafblock_t *leafp) +{ +	if (leafp->hdr.info.magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC)) +		return &((struct xfs_attr3_leafblock *)leafp)->entries[0]; +	return &leafp->entries[0]; +} + +/* + * Cast typed pointers for "local" and "remote" name/value structs. + */ +static inline char * +xfs_attr3_leaf_name(xfs_attr_leafblock_t *leafp, int idx) +{ +	struct xfs_attr_leaf_entry *entries = xfs_attr3_leaf_entryp(leafp); + +	return &((char *)leafp)[be16_to_cpu(entries[idx].nameidx)]; +} + +static inline xfs_attr_leaf_name_remote_t * +xfs_attr3_leaf_name_remote(xfs_attr_leafblock_t *leafp, int idx) +{ +	return (xfs_attr_leaf_name_remote_t *)xfs_attr3_leaf_name(leafp, idx); +} + +static inline xfs_attr_leaf_name_local_t * +xfs_attr3_leaf_name_local(xfs_attr_leafblock_t *leafp, int idx) +{ +	return (xfs_attr_leaf_name_local_t *)xfs_attr3_leaf_name(leafp, idx);  } -#endif /* __XFS_DIR2_FORMAT_H__ */ +/* + * Calculate total bytes used (including trailing pad for alignment) for + * a "local" name/value structure, a "remote" name/value structure, and + * a pointer which might be either. + */ +static inline int xfs_attr_leaf_entsize_remote(int nlen) +{ +	return ((uint)sizeof(xfs_attr_leaf_name_remote_t) - 1 + (nlen) + \ +		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); +} + +static inline int xfs_attr_leaf_entsize_local(int nlen, int vlen) +{ +	return ((uint)sizeof(xfs_attr_leaf_name_local_t) - 1 + (nlen) + (vlen) + +		XFS_ATTR_LEAF_NAME_ALIGN - 1) & ~(XFS_ATTR_LEAF_NAME_ALIGN - 1); +} + +static inline int xfs_attr_leaf_entsize_local_max(int bsize) +{ +	return (((bsize) >> 1) + ((bsize) >> 2)); +} + + + +/* + * Remote attribute block format definition + * + * There is one of these headers per filesystem block in a remote attribute. + * This is done to ensure there is a 1:1 mapping between the attribute value + * length and the number of blocks needed to store the attribute. This makes the + * verification of a buffer a little more complex, but greatly simplifies the + * allocation, reading and writing of these attributes as we don't have to guess + * the number of blocks needed to store the attribute data. + */ +#define XFS_ATTR3_RMT_MAGIC	0x5841524d	/* XARM */ + +struct xfs_attr3_rmt_hdr { +	__be32	rm_magic; +	__be32	rm_offset; +	__be32	rm_bytes; +	__be32	rm_crc; +	uuid_t	rm_uuid; +	__be64	rm_owner; +	__be64	rm_blkno; +	__be64	rm_lsn; +}; + +#define XFS_ATTR3_RMT_CRC_OFF	offsetof(struct xfs_attr3_rmt_hdr, rm_crc) + +#define XFS_ATTR3_RMT_BUF_SPACE(mp, bufsize)	\ +	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \ +			sizeof(struct xfs_attr3_rmt_hdr) : 0)) + +#endif /* __XFS_DA_FORMAT_H__ */ diff --git a/fs/xfs/xfs_dinode.h b/fs/xfs/xfs_dinode.h index e5869b50dc4..623bbe8fd92 100644 --- a/fs/xfs/xfs_dinode.h +++ b/fs/xfs/xfs_dinode.h @@ -89,6 +89,8 @@ typedef struct xfs_dinode {  	/* structure must be padded to 64 bit alignment */  } xfs_dinode_t; +#define XFS_DINODE_CRC_OFF	offsetof(struct xfs_dinode, di_crc) +  #define DI_MAX_FLUSH 0xffff  /* diff --git a/fs/xfs/xfs_dir2.c b/fs/xfs/xfs_dir2.c index edf203ab50a..79670cda48a 100644 --- a/fs/xfs/xfs_dir2.c +++ b/fs/xfs/xfs_dir2.c @@ -17,25 +17,24 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_dinode.h"  struct xfs_name xfs_name_dotdot = { (unsigned char *)"..", 2, XFS_DIR3_FT_DIR }; @@ -86,33 +85,74 @@ static struct xfs_nameops xfs_ascii_ci_nameops = {  	.compname	= xfs_ascii_ci_compname,  }; -void -xfs_dir_mount( -	xfs_mount_t	*mp) +int +xfs_da_mount( +	struct xfs_mount	*mp)  { -	int	nodehdr_size; +	struct xfs_da_geometry	*dageo; +	int			nodehdr_size; -	ASSERT(xfs_sb_version_hasdirv2(&mp->m_sb)); +	ASSERT(mp->m_sb.sb_versionnum & XFS_SB_VERSION_DIRV2BIT);  	ASSERT((1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog)) <=  	       XFS_MAX_BLOCKSIZE); -	mp->m_dirblksize = 1 << (mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog); -	mp->m_dirblkfsbs = 1 << mp->m_sb.sb_dirblklog; -	mp->m_dirdatablk = xfs_dir2_db_to_da(mp, XFS_DIR2_DATA_FIRSTDB(mp)); -	mp->m_dirleafblk = xfs_dir2_db_to_da(mp, XFS_DIR2_LEAF_FIRSTDB(mp)); -	mp->m_dirfreeblk = xfs_dir2_db_to_da(mp, XFS_DIR2_FREE_FIRSTDB(mp)); - -	nodehdr_size = __xfs_da3_node_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); -	mp->m_attr_node_ents = (mp->m_sb.sb_blocksize - nodehdr_size) / + +	mp->m_dir_inode_ops = xfs_dir_get_ops(mp, NULL); +	mp->m_nondir_inode_ops = xfs_nondir_get_ops(mp, NULL); + +	nodehdr_size = mp->m_dir_inode_ops->node_hdr_size; +	mp->m_dir_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), +				    KM_SLEEP | KM_MAYFAIL); +	mp->m_attr_geo = kmem_zalloc(sizeof(struct xfs_da_geometry), +				     KM_SLEEP | KM_MAYFAIL); +	if (!mp->m_dir_geo || !mp->m_attr_geo) { +		kmem_free(mp->m_dir_geo); +		kmem_free(mp->m_attr_geo); +		return ENOMEM; +	} + +	/* set up directory geometry */ +	dageo = mp->m_dir_geo; +	dageo->blklog = mp->m_sb.sb_blocklog + mp->m_sb.sb_dirblklog; +	dageo->fsblog = mp->m_sb.sb_blocklog; +	dageo->blksize = 1 << dageo->blklog; +	dageo->fsbcount = 1 << mp->m_sb.sb_dirblklog; + +	/* +	 * Now we've set up the block conversion variables, we can calculate the +	 * segment block constants using the geometry structure. +	 */ +	dageo->datablk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_DATA_OFFSET); +	dageo->leafblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_LEAF_OFFSET); +	dageo->freeblk = xfs_dir2_byte_to_da(dageo, XFS_DIR2_FREE_OFFSET); +	dageo->node_ents = (dageo->blksize - nodehdr_size) /  				(uint)sizeof(xfs_da_node_entry_t); -	mp->m_dir_node_ents = (mp->m_dirblksize - nodehdr_size) / +	dageo->magicpct = (dageo->blksize * 37) / 100; + +	/* set up attribute geometry - single fsb only */ +	dageo = mp->m_attr_geo; +	dageo->blklog = mp->m_sb.sb_blocklog; +	dageo->fsblog = mp->m_sb.sb_blocklog; +	dageo->blksize = 1 << dageo->blklog; +	dageo->fsbcount = 1; +	dageo->node_ents = (dageo->blksize - nodehdr_size) /  				(uint)sizeof(xfs_da_node_entry_t); +	dageo->magicpct = (dageo->blksize * 37) / 100; -	mp->m_dir_magicpct = (mp->m_dirblksize * 37) / 100;  	if (xfs_sb_version_hasasciici(&mp->m_sb))  		mp->m_dirnameops = &xfs_ascii_ci_nameops;  	else  		mp->m_dirnameops = &xfs_default_nameops; + +	return 0; +} + +void +xfs_da_unmount( +	struct xfs_mount	*mp) +{ +	kmem_free(mp->m_dir_geo); +	kmem_free(mp->m_attr_geo);  }  /* @@ -176,16 +216,24 @@ xfs_dir_init(  	xfs_inode_t	*dp,  	xfs_inode_t	*pdp)  { -	xfs_da_args_t	args; +	struct xfs_da_args *args;  	int		error; -	memset((char *)&args, 0, sizeof(args)); -	args.dp = dp; -	args.trans = tp;  	ASSERT(S_ISDIR(dp->i_d.di_mode)); -	if ((error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino))) +	error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino); +	if (error)  		return error; -	return xfs_dir2_sf_create(&args, pdp->i_ino); + +	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); +	if (!args) +		return ENOMEM; + +	args->geo = dp->i_mount->m_dir_geo; +	args->dp = dp; +	args->trans = tp; +	error = xfs_dir2_sf_create(args, pdp->i_ino); +	kmem_free(args); +	return error;  }  /* @@ -201,41 +249,57 @@ xfs_dir_createname(  	xfs_bmap_free_t		*flist,		/* bmap's freeblock list */  	xfs_extlen_t		total)		/* bmap's total block count */  { -	xfs_da_args_t		args; +	struct xfs_da_args	*args;  	int			rval;  	int			v;		/* type-checking value */  	ASSERT(S_ISDIR(dp->i_d.di_mode)); -	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) +	rval = xfs_dir_ino_validate(tp->t_mountp, inum); +	if (rval)  		return rval;  	XFS_STATS_INC(xs_dir_create); -	memset(&args, 0, sizeof(xfs_da_args_t)); -	args.name = name->name; -	args.namelen = name->len; -	args.filetype = name->type; -	args.hashval = dp->i_mount->m_dirnameops->hashname(name); -	args.inumber = inum; -	args.dp = dp; -	args.firstblock = first; -	args.flist = flist; -	args.total = total; -	args.whichfork = XFS_DATA_FORK; -	args.trans = tp; -	args.op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; - -	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		rval = xfs_dir2_sf_addname(&args); -	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_block_addname(&args); -	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_leaf_addname(&args); +	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); +	if (!args) +		return ENOMEM; + +	args->geo = dp->i_mount->m_dir_geo; +	args->name = name->name; +	args->namelen = name->len; +	args->filetype = name->type; +	args->hashval = dp->i_mount->m_dirnameops->hashname(name); +	args->inumber = inum; +	args->dp = dp; +	args->firstblock = first; +	args->flist = flist; +	args->total = total; +	args->whichfork = XFS_DATA_FORK; +	args->trans = tp; +	args->op_flags = XFS_DA_OP_ADDNAME | XFS_DA_OP_OKNOENT; + +	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { +		rval = xfs_dir2_sf_addname(args); +		goto out_free; +	} + +	rval = xfs_dir2_isblock(args, &v); +	if (rval) +		goto out_free; +	if (v) { +		rval = xfs_dir2_block_addname(args); +		goto out_free; +	} + +	rval = xfs_dir2_isleaf(args, &v); +	if (rval) +		goto out_free; +	if (v) +		rval = xfs_dir2_leaf_addname(args);  	else -		rval = xfs_dir2_node_addname(&args); +		rval = xfs_dir2_node_addname(args); + +out_free: +	kmem_free(args);  	return rval;  } @@ -278,46 +342,67 @@ xfs_dir_lookup(  	xfs_ino_t	*inum,		/* out: inode number */  	struct xfs_name *ci_name)	/* out: actual name if CI match */  { -	xfs_da_args_t	args; +	struct xfs_da_args *args;  	int		rval;  	int		v;		/* type-checking value */  	ASSERT(S_ISDIR(dp->i_d.di_mode));  	XFS_STATS_INC(xs_dir_lookup); -	memset(&args, 0, sizeof(xfs_da_args_t)); -	args.name = name->name; -	args.namelen = name->len; -	args.filetype = name->type; -	args.hashval = dp->i_mount->m_dirnameops->hashname(name); -	args.dp = dp; -	args.whichfork = XFS_DATA_FORK; -	args.trans = tp; -	args.op_flags = XFS_DA_OP_OKNOENT; +	/* +	 * We need to use KM_NOFS here so that lockdep will not throw false +	 * positive deadlock warnings on a non-transactional lookup path. It is +	 * safe to recurse into inode recalim in that case, but lockdep can't +	 * easily be taught about it. Hence KM_NOFS avoids having to add more +	 * lockdep Doing this avoids having to add a bunch of lockdep class +	 * annotations into the reclaim path for the ilock. +	 */ +	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); +	args->geo = dp->i_mount->m_dir_geo; +	args->name = name->name; +	args->namelen = name->len; +	args->filetype = name->type; +	args->hashval = dp->i_mount->m_dirnameops->hashname(name); +	args->dp = dp; +	args->whichfork = XFS_DATA_FORK; +	args->trans = tp; +	args->op_flags = XFS_DA_OP_OKNOENT;  	if (ci_name) -		args.op_flags |= XFS_DA_OP_CILOOKUP; +		args->op_flags |= XFS_DA_OP_CILOOKUP; -	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		rval = xfs_dir2_sf_lookup(&args); -	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_block_lookup(&args); -	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_leaf_lookup(&args); +	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { +		rval = xfs_dir2_sf_lookup(args); +		goto out_check_rval; +	} + +	rval = xfs_dir2_isblock(args, &v); +	if (rval) +		goto out_free; +	if (v) { +		rval = xfs_dir2_block_lookup(args); +		goto out_check_rval; +	} + +	rval = xfs_dir2_isleaf(args, &v); +	if (rval) +		goto out_free; +	if (v) +		rval = xfs_dir2_leaf_lookup(args);  	else -		rval = xfs_dir2_node_lookup(&args); +		rval = xfs_dir2_node_lookup(args); + +out_check_rval:  	if (rval == EEXIST)  		rval = 0;  	if (!rval) { -		*inum = args.inumber; +		*inum = args->inumber;  		if (ci_name) { -			ci_name->name = args.value; -			ci_name->len = args.valuelen; +			ci_name->name = args->value; +			ci_name->len = args->valuelen;  		}  	} +out_free: +	kmem_free(args);  	return rval;  } @@ -334,38 +419,52 @@ xfs_dir_removename(  	xfs_bmap_free_t	*flist,		/* bmap's freeblock list */  	xfs_extlen_t	total)		/* bmap's total block count */  { -	xfs_da_args_t	args; +	struct xfs_da_args *args;  	int		rval;  	int		v;		/* type-checking value */  	ASSERT(S_ISDIR(dp->i_d.di_mode));  	XFS_STATS_INC(xs_dir_remove); -	memset(&args, 0, sizeof(xfs_da_args_t)); -	args.name = name->name; -	args.namelen = name->len; -	args.filetype = name->type; -	args.hashval = dp->i_mount->m_dirnameops->hashname(name); -	args.inumber = ino; -	args.dp = dp; -	args.firstblock = first; -	args.flist = flist; -	args.total = total; -	args.whichfork = XFS_DATA_FORK; -	args.trans = tp; - -	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		rval = xfs_dir2_sf_removename(&args); -	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_block_removename(&args); -	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_leaf_removename(&args); +	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); +	if (!args) +		return ENOMEM; + +	args->geo = dp->i_mount->m_dir_geo; +	args->name = name->name; +	args->namelen = name->len; +	args->filetype = name->type; +	args->hashval = dp->i_mount->m_dirnameops->hashname(name); +	args->inumber = ino; +	args->dp = dp; +	args->firstblock = first; +	args->flist = flist; +	args->total = total; +	args->whichfork = XFS_DATA_FORK; +	args->trans = tp; + +	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { +		rval = xfs_dir2_sf_removename(args); +		goto out_free; +	} + +	rval = xfs_dir2_isblock(args, &v); +	if (rval) +		goto out_free; +	if (v) { +		rval = xfs_dir2_block_removename(args); +		goto out_free; +	} + +	rval = xfs_dir2_isleaf(args, &v); +	if (rval) +		goto out_free; +	if (v) +		rval = xfs_dir2_leaf_removename(args);  	else -		rval = xfs_dir2_node_removename(&args); +		rval = xfs_dir2_node_removename(args); +out_free: +	kmem_free(args);  	return rval;  } @@ -382,40 +481,55 @@ xfs_dir_replace(  	xfs_bmap_free_t	*flist,		/* bmap's freeblock list */  	xfs_extlen_t	total)		/* bmap's total block count */  { -	xfs_da_args_t	args; +	struct xfs_da_args *args;  	int		rval;  	int		v;		/* type-checking value */  	ASSERT(S_ISDIR(dp->i_d.di_mode)); -	if ((rval = xfs_dir_ino_validate(tp->t_mountp, inum))) +	rval = xfs_dir_ino_validate(tp->t_mountp, inum); +	if (rval)  		return rval; -	memset(&args, 0, sizeof(xfs_da_args_t)); -	args.name = name->name; -	args.namelen = name->len; -	args.filetype = name->type; -	args.hashval = dp->i_mount->m_dirnameops->hashname(name); -	args.inumber = inum; -	args.dp = dp; -	args.firstblock = first; -	args.flist = flist; -	args.total = total; -	args.whichfork = XFS_DATA_FORK; -	args.trans = tp; - -	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		rval = xfs_dir2_sf_replace(&args); -	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_block_replace(&args); -	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_leaf_replace(&args); +	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); +	if (!args) +		return ENOMEM; + +	args->geo = dp->i_mount->m_dir_geo; +	args->name = name->name; +	args->namelen = name->len; +	args->filetype = name->type; +	args->hashval = dp->i_mount->m_dirnameops->hashname(name); +	args->inumber = inum; +	args->dp = dp; +	args->firstblock = first; +	args->flist = flist; +	args->total = total; +	args->whichfork = XFS_DATA_FORK; +	args->trans = tp; + +	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { +		rval = xfs_dir2_sf_replace(args); +		goto out_free; +	} + +	rval = xfs_dir2_isblock(args, &v); +	if (rval) +		goto out_free; +	if (v) { +		rval = xfs_dir2_block_replace(args); +		goto out_free; +	} + +	rval = xfs_dir2_isleaf(args, &v); +	if (rval) +		goto out_free; +	if (v) +		rval = xfs_dir2_leaf_replace(args);  	else -		rval = xfs_dir2_node_replace(&args); +		rval = xfs_dir2_node_replace(args); +out_free: +	kmem_free(args);  	return rval;  } @@ -430,7 +544,7 @@ xfs_dir_canenter(  	struct xfs_name	*name,		/* name of entry to add */  	uint		resblks)  { -	xfs_da_args_t	args; +	struct xfs_da_args *args;  	int		rval;  	int		v;		/* type-checking value */ @@ -439,29 +553,43 @@ xfs_dir_canenter(  	ASSERT(S_ISDIR(dp->i_d.di_mode)); -	memset(&args, 0, sizeof(xfs_da_args_t)); -	args.name = name->name; -	args.namelen = name->len; -	args.filetype = name->type; -	args.hashval = dp->i_mount->m_dirnameops->hashname(name); -	args.dp = dp; -	args.whichfork = XFS_DATA_FORK; -	args.trans = tp; -	args.op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME | +	args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS); +	if (!args) +		return ENOMEM; + +	args->geo = dp->i_mount->m_dir_geo; +	args->name = name->name; +	args->namelen = name->len; +	args->filetype = name->type; +	args->hashval = dp->i_mount->m_dirnameops->hashname(name); +	args->dp = dp; +	args->whichfork = XFS_DATA_FORK; +	args->trans = tp; +	args->op_flags = XFS_DA_OP_JUSTCHECK | XFS_DA_OP_ADDNAME |  							XFS_DA_OP_OKNOENT; -	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		rval = xfs_dir2_sf_addname(&args); -	else if ((rval = xfs_dir2_isblock(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_block_addname(&args); -	else if ((rval = xfs_dir2_isleaf(tp, dp, &v))) -		return rval; -	else if (v) -		rval = xfs_dir2_leaf_addname(&args); +	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) { +		rval = xfs_dir2_sf_addname(args); +		goto out_free; +	} + +	rval = xfs_dir2_isblock(args, &v); +	if (rval) +		goto out_free; +	if (v) { +		rval = xfs_dir2_block_addname(args); +		goto out_free; +	} + +	rval = xfs_dir2_isleaf(args, &v); +	if (rval) +		goto out_free; +	if (v) +		rval = xfs_dir2_leaf_addname(args);  	else -		rval = xfs_dir2_node_addname(&args); +		rval = xfs_dir2_node_addname(args); +out_free: +	kmem_free(args);  	return rval;  } @@ -493,13 +621,13 @@ xfs_dir2_grow_inode(  	 * Set lowest possible block in the space requested.  	 */  	bno = XFS_B_TO_FSBT(mp, space * XFS_DIR2_SPACE_SIZE); -	count = mp->m_dirblkfsbs; +	count = args->geo->fsbcount;  	error = xfs_da_grow_inode_int(args, &bno, count);  	if (error)  		return error; -	*dbp = xfs_dir2_da_to_db(mp, (xfs_dablk_t)bno); +	*dbp = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)bno);  	/*  	 * Update file's size if this is the data space and it grew. @@ -521,19 +649,16 @@ xfs_dir2_grow_inode(   */  int  xfs_dir2_isblock( -	xfs_trans_t	*tp, -	xfs_inode_t	*dp, -	int		*vp)		/* out: 1 is block, 0 is not block */ +	struct xfs_da_args	*args, +	int			*vp)	/* out: 1 is block, 0 is not block */  { -	xfs_fileoff_t	last;		/* last file offset */ -	xfs_mount_t	*mp; -	int		rval; +	xfs_fileoff_t		last;	/* last file offset */ +	int			rval; -	mp = dp->i_mount; -	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) +	if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))  		return rval; -	rval = XFS_FSB_TO_B(mp, last) == mp->m_dirblksize; -	ASSERT(rval == 0 || dp->i_d.di_size == mp->m_dirblksize); +	rval = XFS_FSB_TO_B(args->dp->i_mount, last) == args->geo->blksize; +	ASSERT(rval == 0 || args->dp->i_d.di_size == args->geo->blksize);  	*vp = rval;  	return 0;  } @@ -543,18 +668,15 @@ xfs_dir2_isblock(   */  int  xfs_dir2_isleaf( -	xfs_trans_t	*tp, -	xfs_inode_t	*dp, -	int		*vp)		/* out: 1 is leaf, 0 is not leaf */ +	struct xfs_da_args	*args, +	int			*vp)	/* out: 1 is block, 0 is not block */  { -	xfs_fileoff_t	last;		/* last file offset */ -	xfs_mount_t	*mp; -	int		rval; +	xfs_fileoff_t		last;	/* last file offset */ +	int			rval; -	mp = dp->i_mount; -	if ((rval = xfs_bmap_last_offset(tp, dp, &last, XFS_DATA_FORK))) +	if ((rval = xfs_bmap_last_offset(args->dp, &last, XFS_DATA_FORK)))  		return rval; -	*vp = last == mp->m_dirleafblk + (1 << mp->m_sb.sb_dirblklog); +	*vp = last == args->geo->leafblk + args->geo->fsbcount;  	return 0;  } @@ -582,11 +704,11 @@ xfs_dir2_shrink_inode(  	dp = args->dp;  	mp = dp->i_mount;  	tp = args->trans; -	da = xfs_dir2_db_to_da(mp, db); +	da = xfs_dir2_db_to_da(args->geo, db);  	/*  	 * Unmap the fsblock(s).  	 */ -	if ((error = xfs_bunmapi(tp, dp, da, mp->m_dirblkfsbs, +	if ((error = xfs_bunmapi(tp, dp, da, args->geo->fsbcount,  			XFS_BMAPI_METADATA, 0, args->firstblock, args->flist,  			&done))) {  		/* @@ -613,12 +735,12 @@ xfs_dir2_shrink_inode(  	/*  	 * If it's not a data block, we're done.  	 */ -	if (db >= XFS_DIR2_LEAF_FIRSTDB(mp)) +	if (db >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET))  		return 0;  	/*  	 * If the block isn't the last one in the directory, we're done.  	 */ -	if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(mp, db + 1, 0)) +	if (dp->i_d.di_size > xfs_dir2_db_off_to_byte(args->geo, db + 1, 0))  		return 0;  	bno = da;  	if ((error = xfs_bmap_last_before(tp, dp, &bno, XFS_DATA_FORK))) { @@ -627,7 +749,7 @@ xfs_dir2_shrink_inode(  		 */  		return error;  	} -	if (db == mp->m_dirdatablk) +	if (db == args->geo->datablk)  		ASSERT(bno == 0);  	else  		ASSERT(bno > 0); diff --git a/fs/xfs/xfs_dir2.h b/fs/xfs/xfs_dir2.h index 9910401327d..c8e86b0b5e9 100644 --- a/fs/xfs/xfs_dir2.h +++ b/fs/xfs/xfs_dir2.h @@ -32,10 +32,91 @@ struct xfs_dir2_data_unused;  extern struct xfs_name	xfs_name_dotdot;  /* + * directory operations vector for encode/decode routines + */ +struct xfs_dir_ops { +	int	(*sf_entsize)(struct xfs_dir2_sf_hdr *hdr, int len); +	struct xfs_dir2_sf_entry * +		(*sf_nextentry)(struct xfs_dir2_sf_hdr *hdr, +				struct xfs_dir2_sf_entry *sfep); +	__uint8_t (*sf_get_ftype)(struct xfs_dir2_sf_entry *sfep); +	void	(*sf_put_ftype)(struct xfs_dir2_sf_entry *sfep, +				__uint8_t ftype); +	xfs_ino_t (*sf_get_ino)(struct xfs_dir2_sf_hdr *hdr, +				struct xfs_dir2_sf_entry *sfep); +	void	(*sf_put_ino)(struct xfs_dir2_sf_hdr *hdr, +			      struct xfs_dir2_sf_entry *sfep, +			      xfs_ino_t ino); +	xfs_ino_t (*sf_get_parent_ino)(struct xfs_dir2_sf_hdr *hdr); +	void	(*sf_put_parent_ino)(struct xfs_dir2_sf_hdr *hdr, +				     xfs_ino_t ino); + +	int	(*data_entsize)(int len); +	__uint8_t (*data_get_ftype)(struct xfs_dir2_data_entry *dep); +	void	(*data_put_ftype)(struct xfs_dir2_data_entry *dep, +				__uint8_t ftype); +	__be16 * (*data_entry_tag_p)(struct xfs_dir2_data_entry *dep); +	struct xfs_dir2_data_free * +		(*data_bestfree_p)(struct xfs_dir2_data_hdr *hdr); + +	xfs_dir2_data_aoff_t data_dot_offset; +	xfs_dir2_data_aoff_t data_dotdot_offset; +	xfs_dir2_data_aoff_t data_first_offset; +	size_t	data_entry_offset; + +	struct xfs_dir2_data_entry * +		(*data_dot_entry_p)(struct xfs_dir2_data_hdr *hdr); +	struct xfs_dir2_data_entry * +		(*data_dotdot_entry_p)(struct xfs_dir2_data_hdr *hdr); +	struct xfs_dir2_data_entry * +		(*data_first_entry_p)(struct xfs_dir2_data_hdr *hdr); +	struct xfs_dir2_data_entry * +		(*data_entry_p)(struct xfs_dir2_data_hdr *hdr); +	struct xfs_dir2_data_unused * +		(*data_unused_p)(struct xfs_dir2_data_hdr *hdr); + +	int	leaf_hdr_size; +	void	(*leaf_hdr_to_disk)(struct xfs_dir2_leaf *to, +				    struct xfs_dir3_icleaf_hdr *from); +	void	(*leaf_hdr_from_disk)(struct xfs_dir3_icleaf_hdr *to, +				      struct xfs_dir2_leaf *from); +	int	(*leaf_max_ents)(struct xfs_da_geometry *geo); +	struct xfs_dir2_leaf_entry * +		(*leaf_ents_p)(struct xfs_dir2_leaf *lp); + +	int	node_hdr_size; +	void	(*node_hdr_to_disk)(struct xfs_da_intnode *to, +				    struct xfs_da3_icnode_hdr *from); +	void	(*node_hdr_from_disk)(struct xfs_da3_icnode_hdr *to, +				      struct xfs_da_intnode *from); +	struct xfs_da_node_entry * +		(*node_tree_p)(struct xfs_da_intnode *dap); + +	int	free_hdr_size; +	void	(*free_hdr_to_disk)(struct xfs_dir2_free *to, +				    struct xfs_dir3_icfree_hdr *from); +	void	(*free_hdr_from_disk)(struct xfs_dir3_icfree_hdr *to, +				      struct xfs_dir2_free *from); +	int	(*free_max_bests)(struct xfs_da_geometry *geo); +	__be16 * (*free_bests_p)(struct xfs_dir2_free *free); +	xfs_dir2_db_t (*db_to_fdb)(struct xfs_da_geometry *geo, +				   xfs_dir2_db_t db); +	int	(*db_to_fdindex)(struct xfs_da_geometry *geo, +				 xfs_dir2_db_t db); +}; + +extern const struct xfs_dir_ops * +	xfs_dir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); +extern const struct xfs_dir_ops * +	xfs_nondir_get_ops(struct xfs_mount *mp, struct xfs_inode *dp); + +/*   * Generic directory interface routines   */  extern void xfs_dir_startup(void); -extern void xfs_dir_mount(struct xfs_mount *mp); +extern int xfs_da_mount(struct xfs_mount *mp); +extern void xfs_da_unmount(struct xfs_mount *mp); +  extern int xfs_dir_isempty(struct xfs_inode *dp);  extern int xfs_dir_init(struct xfs_trans *tp, struct xfs_inode *dp,  				struct xfs_inode *pdp); @@ -65,37 +146,30 @@ extern int xfs_dir2_sf_to_block(struct xfs_da_args *args);  /*   * Interface routines used by userspace utilities   */ -extern xfs_ino_t xfs_dir2_sf_get_parent_ino(struct xfs_dir2_sf_hdr *sfp); -extern void xfs_dir2_sf_put_parent_ino(struct xfs_dir2_sf_hdr *sfp, -		xfs_ino_t ino); -extern xfs_ino_t xfs_dir3_sfe_get_ino(struct xfs_mount *mp, -		struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep); -extern void xfs_dir3_sfe_put_ino(struct xfs_mount *mp, -		struct xfs_dir2_sf_hdr *hdr, struct xfs_dir2_sf_entry *sfep, -		xfs_ino_t ino); - -extern int xfs_dir2_isblock(struct xfs_trans *tp, struct xfs_inode *dp, int *r); -extern int xfs_dir2_isleaf(struct xfs_trans *tp, struct xfs_inode *dp, int *r); +extern int xfs_dir2_isblock(struct xfs_da_args *args, int *r); +extern int xfs_dir2_isleaf(struct xfs_da_args *args, int *r);  extern int xfs_dir2_shrink_inode(struct xfs_da_args *args, xfs_dir2_db_t db,  				struct xfs_buf *bp); -extern void xfs_dir2_data_freescan(struct xfs_mount *mp, +extern void xfs_dir2_data_freescan(struct xfs_inode *dp,  		struct xfs_dir2_data_hdr *hdr, int *loghead); -extern void xfs_dir2_data_log_entry(struct xfs_trans *tp, struct xfs_buf *bp, -		struct xfs_dir2_data_entry *dep); -extern void xfs_dir2_data_log_header(struct xfs_trans *tp, +extern void xfs_dir2_data_log_entry(struct xfs_da_args *args, +		struct xfs_buf *bp, struct xfs_dir2_data_entry *dep); +extern void xfs_dir2_data_log_header(struct xfs_da_args *args,  		struct xfs_buf *bp); -extern void xfs_dir2_data_log_unused(struct xfs_trans *tp, struct xfs_buf *bp, -		struct xfs_dir2_data_unused *dup); -extern void xfs_dir2_data_make_free(struct xfs_trans *tp, struct xfs_buf *bp, +extern void xfs_dir2_data_log_unused(struct xfs_da_args *args, +		struct xfs_buf *bp, struct xfs_dir2_data_unused *dup); +extern void xfs_dir2_data_make_free(struct xfs_da_args *args, +		struct xfs_buf *bp, xfs_dir2_data_aoff_t offset, +		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp); +extern void xfs_dir2_data_use_free(struct xfs_da_args *args, +		struct xfs_buf *bp, struct xfs_dir2_data_unused *dup,  		xfs_dir2_data_aoff_t offset, xfs_dir2_data_aoff_t len,  		int *needlogp, int *needscanp); -extern void xfs_dir2_data_use_free(struct xfs_trans *tp, struct xfs_buf *bp, -		struct xfs_dir2_data_unused *dup, xfs_dir2_data_aoff_t offset, -		xfs_dir2_data_aoff_t len, int *needlogp, int *needscanp);  extern struct xfs_dir2_data_free *xfs_dir2_data_freefind( -		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_unused *dup); +		struct xfs_dir2_data_hdr *hdr, struct xfs_dir2_data_free *bf, +		struct xfs_dir2_data_unused *dup);  extern const struct xfs_buf_ops xfs_dir3_block_buf_ops;  extern const struct xfs_buf_ops xfs_dir3_leafn_buf_ops; diff --git a/fs/xfs/xfs_dir2_block.c b/fs/xfs/xfs_dir2_block.c index 0957aa98b6c..c7cd3154026 100644 --- a/fs/xfs/xfs_dir2_block.c +++ b/fs/xfs/xfs_dir2_block.c @@ -18,25 +18,25 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h"  #include "xfs_buf_item.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_cksum.h" +#include "xfs_dinode.h"  /*   * Local function prototypes. @@ -89,13 +89,14 @@ xfs_dir3_block_read_verify(  {  	struct xfs_mount	*mp = bp->b_target->bt_mount; -	if ((xfs_sb_version_hascrc(&mp->m_sb) && -	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					  XFS_DIR3_DATA_CRC_OFF)) || -	    !xfs_dir3_block_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (xfs_sb_version_hascrc(&mp->m_sb) && +	     !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_dir3_block_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -107,8 +108,8 @@ xfs_dir3_block_write_verify(  	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;  	if (!xfs_dir3_block_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -118,7 +119,7 @@ xfs_dir3_block_write_verify(  	if (bip)  		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);  }  const struct xfs_buf_ops xfs_dir3_block_buf_ops = { @@ -135,7 +136,7 @@ xfs_dir3_block_read(  	struct xfs_mount	*mp = dp->i_mount;  	int			err; -	err = xfs_da_read_buf(tp, dp, mp->m_dirdatablk, -1, bpp, +	err = xfs_da_read_buf(tp, dp, mp->m_dir_geo->datablk, -1, bpp,  				XFS_DATA_FORK, &xfs_dir3_block_buf_ops);  	if (!err && tp)  		xfs_trans_buf_set_type(tp, *bpp, XFS_BLFT_DIR_BLOCK_BUF); @@ -168,6 +169,7 @@ xfs_dir3_block_init(  static void  xfs_dir2_block_need_space( +	struct xfs_inode		*dp,  	struct xfs_dir2_data_hdr	*hdr,  	struct xfs_dir2_block_tail	*btp,  	struct xfs_dir2_leaf_entry	*blp, @@ -183,7 +185,7 @@ xfs_dir2_block_need_space(  	struct xfs_dir2_data_unused	*enddup = NULL;  	*compact = 0; -	bf = xfs_dir3_data_bestfree_p(hdr); +	bf = dp->d_ops->data_bestfree_p(hdr);  	/*  	 * If there are stale entries we'll use one for the leaf. @@ -279,7 +281,7 @@ out:   */  static void  xfs_dir2_block_compact( -	struct xfs_trans		*tp, +	struct xfs_da_args		*args,  	struct xfs_buf			*bp,  	struct xfs_dir2_data_hdr	*hdr,  	struct xfs_dir2_block_tail	*btp, @@ -312,18 +314,17 @@ xfs_dir2_block_compact(  	*lfloglow = toidx + 1 - (be32_to_cpu(btp->stale) - 1);  	*lfloghigh -= be32_to_cpu(btp->stale) - 1;  	be32_add_cpu(&btp->count, -(be32_to_cpu(btp->stale) - 1)); -	xfs_dir2_data_make_free(tp, bp, +	xfs_dir2_data_make_free(args, bp,  		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr),  		(xfs_dir2_data_aoff_t)((be32_to_cpu(btp->stale) - 1) * sizeof(*blp)),  		needlog, &needscan); -	blp += be32_to_cpu(btp->stale) - 1;  	btp->stale = cpu_to_be32(1);  	/*  	 * If we now need to rebuild the bestfree map, do so.  	 * This needs to happen before the next call to use_free.  	 */  	if (needscan) -		xfs_dir2_data_freescan(tp->t_mountp, hdr, needlog); +		xfs_dir2_data_freescan(args->dp, hdr, needlog);  }  /* @@ -369,20 +370,20 @@ xfs_dir2_block_addname(  	if (error)  		return error; -	len = xfs_dir3_data_entsize(mp, args->namelen); +	len = dp->d_ops->data_entsize(args->namelen);  	/*  	 * Set up pointers to parts of the block.  	 */  	hdr = bp->b_addr; -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp);  	/*  	 * Find out if we can reuse stale entries or whether we need extra  	 * space for entry and new leaf.  	 */ -	xfs_dir2_block_need_space(hdr, btp, blp, &tagp, &dup, +	xfs_dir2_block_need_space(dp, hdr, btp, blp, &tagp, &dup,  				  &enddup, &compact, len);  	/* @@ -418,7 +419,7 @@ xfs_dir2_block_addname(  	 * If need to compact the leaf entries, do it now.  	 */  	if (compact) { -		xfs_dir2_block_compact(tp, bp, hdr, btp, blp, &needlog, +		xfs_dir2_block_compact(args, bp, hdr, btp, blp, &needlog,  				      &lfloghigh, &lfloglow);  		/* recalculate blp post-compaction */  		blp = xfs_dir2_block_leaf_p(btp); @@ -453,7 +454,7 @@ xfs_dir2_block_addname(  		/*  		 * Mark the space needed for the new leaf entry, now in use.  		 */ -		xfs_dir2_data_use_free(tp, bp, enddup, +		xfs_dir2_data_use_free(args, bp, enddup,  			(xfs_dir2_data_aoff_t)  			((char *)enddup - (char *)hdr + be16_to_cpu(enddup->length) -  			 sizeof(*blp)), @@ -468,7 +469,7 @@ xfs_dir2_block_addname(  		 * This needs to happen before the next call to use_free.  		 */  		if (needscan) { -			xfs_dir2_data_freescan(mp, hdr, &needlog); +			xfs_dir2_data_freescan(dp, hdr, &needlog);  			needscan = 0;  		}  		/* @@ -534,13 +535,13 @@ xfs_dir2_block_addname(  	 * Fill in the leaf entry.  	 */  	blp[mid].hashval = cpu_to_be32(args->hashval); -	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, +	blp[mid].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(  				(char *)dep - (char *)hdr));  	xfs_dir2_block_log_leaf(tp, bp, lfloglow, lfloghigh);  	/*  	 * Mark space for the data entry used.  	 */ -	xfs_dir2_data_use_free(tp, bp, dup, +	xfs_dir2_data_use_free(args, bp, dup,  		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),  		(xfs_dir2_data_aoff_t)len, &needlog, &needscan);  	/* @@ -549,18 +550,18 @@ xfs_dir2_block_addname(  	dep->inumber = cpu_to_be64(args->inumber);  	dep->namelen = args->namelen;  	memcpy(dep->name, args->name, args->namelen); -	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); -	tagp = xfs_dir3_data_entry_tag_p(mp, dep); +	dp->d_ops->data_put_ftype(dep, args->filetype); +	tagp = dp->d_ops->data_entry_tag_p(dep);  	*tagp = cpu_to_be16((char *)dep - (char *)hdr);  	/*  	 * Clean up the bestfree array and log the header, tail, and entry.  	 */  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	if (needlog) -		xfs_dir2_data_log_header(tp, bp); +		xfs_dir2_data_log_header(args, bp);  	xfs_dir2_block_log_tail(tp, bp); -	xfs_dir2_data_log_entry(tp, bp, dep); +	xfs_dir2_data_log_entry(args, bp, dep);  	xfs_dir3_data_check(dp, bp);  	return 0;  } @@ -579,7 +580,7 @@ xfs_dir2_block_log_leaf(  	xfs_dir2_leaf_entry_t	*blp;  	xfs_dir2_block_tail_t	*btp; -	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); +	btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp);  	xfs_trans_log_buf(tp, bp, (uint)((char *)&blp[first] - (char *)hdr),  		(uint)((char *)&blp[last + 1] - (char *)hdr - 1)); @@ -596,7 +597,7 @@ xfs_dir2_block_log_tail(  	xfs_dir2_data_hdr_t	*hdr = bp->b_addr;  	xfs_dir2_block_tail_t	*btp; -	btp = xfs_dir2_block_tail_p(tp->t_mountp, hdr); +	btp = xfs_dir2_block_tail_p(tp->t_mountp->m_dir_geo, hdr);  	xfs_trans_log_buf(tp, bp, (uint)((char *)btp - (char *)hdr),  		(uint)((char *)(btp + 1) - (char *)hdr - 1));  } @@ -631,18 +632,19 @@ xfs_dir2_block_lookup(  	mp = dp->i_mount;  	hdr = bp->b_addr;  	xfs_dir3_data_check(dp, bp); -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp);  	/*  	 * Get the offset from the leaf entry, to point to the data.  	 */  	dep = (xfs_dir2_data_entry_t *)((char *)hdr + -		xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); +			xfs_dir2_dataptr_to_off(args->geo, +						be32_to_cpu(blp[ent].address)));  	/*  	 * Fill in inode number, CI name if appropriate, release the block.  	 */  	args->inumber = be64_to_cpu(dep->inumber); -	args->filetype = xfs_dir3_dirent_get_ftype(mp, dep); +	args->filetype = dp->d_ops->data_get_ftype(dep);  	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);  	xfs_trans_brelse(args->trans, bp);  	return XFS_ERROR(error); @@ -683,7 +685,7 @@ xfs_dir2_block_lookup_int(  	hdr = bp->b_addr;  	xfs_dir3_data_check(dp, bp); -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp);  	/*  	 * Loop doing a binary search for our hash value. @@ -721,7 +723,7 @@ xfs_dir2_block_lookup_int(  		 * Get pointer to the entry from the leaf.  		 */  		dep = (xfs_dir2_data_entry_t *) -			((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); +			((char *)hdr + xfs_dir2_dataptr_to_off(args->geo, addr));  		/*  		 * Compare name and if it's an exact match, return the index  		 * and buffer. If it's the first case-insensitive match, store @@ -788,20 +790,21 @@ xfs_dir2_block_removename(  	tp = args->trans;  	mp = dp->i_mount;  	hdr = bp->b_addr; -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp);  	/*  	 * Point to the data entry using the leaf entry.  	 */ -	dep = (xfs_dir2_data_entry_t *) -	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); +	dep = (xfs_dir2_data_entry_t *)((char *)hdr + +			xfs_dir2_dataptr_to_off(args->geo, +						be32_to_cpu(blp[ent].address)));  	/*  	 * Mark the data entry's space free.  	 */  	needlog = needscan = 0; -	xfs_dir2_data_make_free(tp, bp, +	xfs_dir2_data_make_free(args, bp,  		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), -		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); +		dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);  	/*  	 * Fix up the block tail.  	 */ @@ -816,9 +819,9 @@ xfs_dir2_block_removename(  	 * Fix up bestfree, log the header if necessary.  	 */  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	if (needlog) -		xfs_dir2_data_log_header(tp, bp); +		xfs_dir2_data_log_header(args, bp);  	xfs_dir3_data_check(dp, bp);  	/*  	 * See if the size as a shortform is good enough. @@ -863,20 +866,21 @@ xfs_dir2_block_replace(  	dp = args->dp;  	mp = dp->i_mount;  	hdr = bp->b_addr; -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp);  	/*  	 * Point to the data entry we need to change.  	 */ -	dep = (xfs_dir2_data_entry_t *) -	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(blp[ent].address))); +	dep = (xfs_dir2_data_entry_t *)((char *)hdr + +			xfs_dir2_dataptr_to_off(args->geo, +						be32_to_cpu(blp[ent].address)));  	ASSERT(be64_to_cpu(dep->inumber) != args->inumber);  	/*  	 * Change the inode number to the new value.  	 */  	dep->inumber = cpu_to_be64(args->inumber); -	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); -	xfs_dir2_data_log_entry(args->trans, bp, dep); +	dp->d_ops->data_put_ftype(dep, args->filetype); +	xfs_dir2_data_log_entry(args, bp, dep);  	xfs_dir3_data_check(dp, bp);  	return 0;  } @@ -934,9 +938,9 @@ xfs_dir2_leaf_to_block(  	tp = args->trans;  	mp = dp->i_mount;  	leaf = lbp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);  	ASSERT(leafhdr.magic == XFS_DIR2_LEAF1_MAGIC ||  	       leafhdr.magic == XFS_DIR3_LEAF1_MAGIC); @@ -946,13 +950,13 @@ xfs_dir2_leaf_to_block(  	 * been left behind during no-space-reservation operations.  	 * These will show up in the leaf bests table.  	 */ -	while (dp->i_d.di_size > mp->m_dirblksize) { +	while (dp->i_d.di_size > args->geo->blksize) {  		int hdrsz; -		hdrsz = xfs_dir3_data_hdr_size(xfs_sb_version_hascrc(&mp->m_sb)); +		hdrsz = dp->d_ops->data_entry_offset;  		bestsp = xfs_dir2_leaf_bests_p(ltp);  		if (be16_to_cpu(bestsp[be32_to_cpu(ltp->bestcount) - 1]) == -					    mp->m_dirblksize - hdrsz) { +					    args->geo->blksize - hdrsz) {  			if ((error =  			    xfs_dir2_leaf_trim_data(args, lbp,  				    (xfs_dir2_db_t)(be32_to_cpu(ltp->bestcount) - 1)))) @@ -964,7 +968,7 @@ xfs_dir2_leaf_to_block(  	 * Read the data block if we don't already have it, give up if it fails.  	 */  	if (!dbp) { -		error = xfs_dir3_data_read(tp, dp, mp->m_dirdatablk, -1, &dbp); +		error = xfs_dir3_data_read(tp, dp, args->geo->datablk, -1, &dbp);  		if (error)  			return error;  	} @@ -980,7 +984,7 @@ xfs_dir2_leaf_to_block(  	/*  	 * Look at the last data entry.  	 */ -	tagp = (__be16 *)((char *)hdr + mp->m_dirblksize) - 1; +	tagp = (__be16 *)((char *)hdr + args->geo->blksize) - 1;  	dup = (xfs_dir2_data_unused_t *)((char *)hdr + be16_to_cpu(*tagp));  	/*  	 * If it's not free or is too short we can't do it. @@ -999,12 +1003,12 @@ xfs_dir2_leaf_to_block(  	/*  	 * Use up the space at the end of the block (blp/btp).  	 */ -	xfs_dir2_data_use_free(tp, dbp, dup, mp->m_dirblksize - size, size, +	xfs_dir2_data_use_free(args, dbp, dup, args->geo->blksize - size, size,  		&needlog, &needscan);  	/*  	 * Initialize the block tail.  	 */ -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	btp->count = cpu_to_be32(leafhdr.count - leafhdr.stale);  	btp->stale = 0;  	xfs_dir2_block_log_tail(tp, dbp); @@ -1023,13 +1027,13 @@ xfs_dir2_leaf_to_block(  	 * Scan the bestfree if we need it and log the data block header.  	 */  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	if (needlog) -		xfs_dir2_data_log_header(tp, dbp); +		xfs_dir2_data_log_header(args, dbp);  	/*  	 * Pitch the old leaf block.  	 */ -	error = xfs_da_shrink_inode(args, mp->m_dirleafblk, lbp); +	error = xfs_da_shrink_inode(args, args->geo->leafblk, lbp);  	if (error)  		return error; @@ -1136,15 +1140,15 @@ xfs_dir2_sf_to_block(  	 * The whole thing is initialized to free by the init routine.  	 * Say we're using the leaf and tail area.  	 */ -	dup = xfs_dir3_data_unused_p(hdr); +	dup = dp->d_ops->data_unused_p(hdr);  	needlog = needscan = 0; -	xfs_dir2_data_use_free(tp, bp, dup, mp->m_dirblksize - i, i, &needlog, -		&needscan); +	xfs_dir2_data_use_free(args, bp, dup, args->geo->blksize - i, +			       i, &needlog, &needscan);  	ASSERT(needscan == 0);  	/*  	 * Fill in the tail.  	 */ -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	btp->count = cpu_to_be32(sfp->count + 2);	/* ., .. */  	btp->stale = 0;  	blp = xfs_dir2_block_leaf_p(btp); @@ -1152,38 +1156,38 @@ xfs_dir2_sf_to_block(  	/*  	 * Remove the freespace, we'll manage it.  	 */ -	xfs_dir2_data_use_free(tp, bp, dup, +	xfs_dir2_data_use_free(args, bp, dup,  		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr),  		be16_to_cpu(dup->length), &needlog, &needscan);  	/*  	 * Create entry for .  	 */ -	dep = xfs_dir3_data_dot_entry_p(hdr); +	dep = dp->d_ops->data_dot_entry_p(hdr);  	dep->inumber = cpu_to_be64(dp->i_ino);  	dep->namelen = 1;  	dep->name[0] = '.'; -	xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR); -	tagp = xfs_dir3_data_entry_tag_p(mp, dep); +	dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); +	tagp = dp->d_ops->data_entry_tag_p(dep);  	*tagp = cpu_to_be16((char *)dep - (char *)hdr); -	xfs_dir2_data_log_entry(tp, bp, dep); +	xfs_dir2_data_log_entry(args, bp, dep);  	blp[0].hashval = cpu_to_be32(xfs_dir_hash_dot); -	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, +	blp[0].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(  				(char *)dep - (char *)hdr));  	/*  	 * Create entry for ..  	 */ -	dep = xfs_dir3_data_dotdot_entry_p(hdr); -	dep->inumber = cpu_to_be64(xfs_dir2_sf_get_parent_ino(sfp)); +	dep = dp->d_ops->data_dotdot_entry_p(hdr); +	dep->inumber = cpu_to_be64(dp->d_ops->sf_get_parent_ino(sfp));  	dep->namelen = 2;  	dep->name[0] = dep->name[1] = '.'; -	xfs_dir3_dirent_put_ftype(mp, dep, XFS_DIR3_FT_DIR); -	tagp = xfs_dir3_data_entry_tag_p(mp, dep); +	dp->d_ops->data_put_ftype(dep, XFS_DIR3_FT_DIR); +	tagp = dp->d_ops->data_entry_tag_p(dep);  	*tagp = cpu_to_be16((char *)dep - (char *)hdr); -	xfs_dir2_data_log_entry(tp, bp, dep); +	xfs_dir2_data_log_entry(args, bp, dep);  	blp[1].hashval = cpu_to_be32(xfs_dir_hash_dotdot); -	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, +	blp[1].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(  				(char *)dep - (char *)hdr)); -	offset = xfs_dir3_data_first_offset(hdr); +	offset = dp->d_ops->data_first_offset;  	/*  	 * Loop over existing entries, stuff them in.  	 */ @@ -1213,8 +1217,10 @@ xfs_dir2_sf_to_block(  			dup->length = cpu_to_be16(newoffset - offset);  			*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16(  				((char *)dup - (char *)hdr)); -			xfs_dir2_data_log_unused(tp, bp, dup); -			xfs_dir2_data_freeinsert(hdr, dup, &dummy); +			xfs_dir2_data_log_unused(args, bp, dup); +			xfs_dir2_data_freeinsert(hdr, +						 dp->d_ops->data_bestfree_p(hdr), +						 dup, &dummy);  			offset += be16_to_cpu(dup->length);  			continue;  		} @@ -1222,25 +1228,24 @@ xfs_dir2_sf_to_block(  		 * Copy a real entry.  		 */  		dep = (xfs_dir2_data_entry_t *)((char *)hdr + newoffset); -		dep->inumber = cpu_to_be64(xfs_dir3_sfe_get_ino(mp, sfp, sfep)); +		dep->inumber = cpu_to_be64(dp->d_ops->sf_get_ino(sfp, sfep));  		dep->namelen = sfep->namelen; -		xfs_dir3_dirent_put_ftype(mp, dep, -					xfs_dir3_sfe_get_ftype(mp, sfp, sfep)); +		dp->d_ops->data_put_ftype(dep, dp->d_ops->sf_get_ftype(sfep));  		memcpy(dep->name, sfep->name, dep->namelen); -		tagp = xfs_dir3_data_entry_tag_p(mp, dep); +		tagp = dp->d_ops->data_entry_tag_p(dep);  		*tagp = cpu_to_be16((char *)dep - (char *)hdr); -		xfs_dir2_data_log_entry(tp, bp, dep); +		xfs_dir2_data_log_entry(args, bp, dep);  		name.name = sfep->name;  		name.len = sfep->namelen;  		blp[2 + i].hashval = cpu_to_be32(mp->m_dirnameops->  							hashname(&name)); -		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(mp, +		blp[2 + i].address = cpu_to_be32(xfs_dir2_byte_to_dataptr(  						 (char *)dep - (char *)hdr));  		offset = (int)((char *)(tagp + 1) - (char *)hdr);  		if (++i == sfp->count)  			sfep = NULL;  		else -			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); +			sfep = dp->d_ops->sf_nextentry(sfp, sfep);  	}  	/* Done with the temporary buffer */  	kmem_free(sfp); diff --git a/fs/xfs/xfs_dir2_data.c b/fs/xfs/xfs_dir2_data.c index 47e1326c169..8c2f6422648 100644 --- a/fs/xfs/xfs_dir2_data.c +++ b/fs/xfs/xfs_dir2_data.c @@ -18,20 +18,19 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_error.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_cksum.h" @@ -63,32 +62,52 @@ __xfs_dir3_data_check(  	char			*p;		/* current data position */  	int			stale;		/* count of stale leaves */  	struct xfs_name		name; +	const struct xfs_dir_ops *ops; +	struct xfs_da_geometry	*geo;  	mp = bp->b_target->bt_mount; +	geo = mp->m_dir_geo; + +	/* +	 * We can be passed a null dp here from a verifier, so we need to go the +	 * hard way to get them. +	 */ +	ops = xfs_dir_get_ops(mp, dp); +  	hdr = bp->b_addr; -	bf = xfs_dir3_data_bestfree_p(hdr); -	p = (char *)xfs_dir3_data_entry_p(hdr); +	p = (char *)ops->data_entry_p(hdr);  	switch (hdr->magic) {  	case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC):  	case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): -		btp = xfs_dir2_block_tail_p(mp, hdr); +		btp = xfs_dir2_block_tail_p(geo, hdr);  		lep = xfs_dir2_block_leaf_p(btp);  		endp = (char *)lep; + +		/* +		 * The number of leaf entries is limited by the size of the +		 * block and the amount of space used by the data entries. +		 * We don't know how much space is used by the data entries yet, +		 * so just ensure that the count falls somewhere inside the +		 * block right now. +		 */ +		XFS_WANT_CORRUPTED_RETURN(be32_to_cpu(btp->count) < +			((char *)btp - p) / sizeof(struct xfs_dir2_leaf_entry));  		break;  	case cpu_to_be32(XFS_DIR3_DATA_MAGIC):  	case cpu_to_be32(XFS_DIR2_DATA_MAGIC): -		endp = (char *)hdr + mp->m_dirblksize; +		endp = (char *)hdr + geo->blksize;  		break;  	default:  		XFS_ERROR_REPORT("Bad Magic", XFS_ERRLEVEL_LOW, mp);  		return EFSCORRUPTED;  	} -	count = lastfree = freeseen = 0;  	/*  	 * Account for zero bestfree entries.  	 */ +	bf = ops->data_bestfree_p(hdr); +	count = lastfree = freeseen = 0;  	if (!bf[0].length) {  		XFS_WANT_CORRUPTED_RETURN(!bf[0].offset);  		freeseen |= 1 << 0; @@ -121,7 +140,7 @@ __xfs_dir3_data_check(  			XFS_WANT_CORRUPTED_RETURN(  				be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup)) ==  					       (char *)dup - (char *)hdr); -			dfp = xfs_dir2_data_freefind(hdr, dup); +			dfp = xfs_dir2_data_freefind(hdr, bf, dup);  			if (dfp) {  				i = (int)(dfp - bf);  				XFS_WANT_CORRUPTED_RETURN( @@ -147,17 +166,17 @@ __xfs_dir3_data_check(  		XFS_WANT_CORRUPTED_RETURN(  			!xfs_dir_ino_validate(mp, be64_to_cpu(dep->inumber)));  		XFS_WANT_CORRUPTED_RETURN( -			be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep)) == +			be16_to_cpu(*ops->data_entry_tag_p(dep)) ==  					       (char *)dep - (char *)hdr);  		XFS_WANT_CORRUPTED_RETURN( -			xfs_dir3_dirent_get_ftype(mp, dep) < XFS_DIR3_FT_MAX); +				ops->data_get_ftype(dep) < XFS_DIR3_FT_MAX);  		count++;  		lastfree = 0;  		if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||  		    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { -			addr = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, -				(xfs_dir2_data_aoff_t) -				((char *)dep - (char *)hdr)); +			addr = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, +						(xfs_dir2_data_aoff_t) +						((char *)dep - (char *)hdr));  			name.name = dep->name;  			name.len = dep->namelen;  			hash = mp->m_dirnameops->hashname(&name); @@ -168,7 +187,7 @@ __xfs_dir3_data_check(  			}  			XFS_WANT_CORRUPTED_RETURN(i < be32_to_cpu(btp->count));  		} -		p += xfs_dir3_data_entsize(mp, dep->namelen); +		p += ops->data_entsize(dep->namelen);  	}  	/*  	 * Need to have seen all the entries and all the bestfree slots. @@ -224,7 +243,6 @@ static void  xfs_dir3_data_reada_verify(  	struct xfs_buf		*bp)  { -	struct xfs_mount	*mp = bp->b_target->bt_mount;  	struct xfs_dir2_data_hdr *hdr = bp->b_addr;  	switch (hdr->magic) { @@ -238,8 +256,8 @@ xfs_dir3_data_reada_verify(  		xfs_dir3_data_verify(bp);  		return;  	default: -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, hdr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		break;  	}  } @@ -250,13 +268,14 @@ xfs_dir3_data_read_verify(  {  	struct xfs_mount	*mp = bp->b_target->bt_mount; -	if ((xfs_sb_version_hascrc(&mp->m_sb) && -	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					  XFS_DIR3_DATA_CRC_OFF)) || -	    !xfs_dir3_data_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (xfs_sb_version_hascrc(&mp->m_sb) && +	     !xfs_buf_verify_cksum(bp, XFS_DIR3_DATA_CRC_OFF)) +		 xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_dir3_data_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -268,8 +287,8 @@ xfs_dir3_data_write_verify(  	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;  	if (!xfs_dir3_data_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -279,7 +298,7 @@ xfs_dir3_data_write_verify(  	if (bip)  		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_DATA_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_DIR3_DATA_CRC_OFF);  }  const struct xfs_buf_ops xfs_dir3_data_buf_ops = { @@ -312,12 +331,11 @@ xfs_dir3_data_read(  int  xfs_dir3_data_readahead( -	struct xfs_trans	*tp,  	struct xfs_inode	*dp,  	xfs_dablk_t		bno,  	xfs_daddr_t		mapped_bno)  { -	return xfs_da_reada_buf(tp, dp, bno, mapped_bno, +	return xfs_da_reada_buf(dp, bno, mapped_bno,  				XFS_DATA_FORK, &xfs_dir3_data_reada_buf_ops);  } @@ -327,19 +345,18 @@ xfs_dir3_data_readahead(   */  xfs_dir2_data_free_t *  xfs_dir2_data_freefind( -	xfs_dir2_data_hdr_t	*hdr,		/* data block */ -	xfs_dir2_data_unused_t	*dup)		/* data unused entry */ +	struct xfs_dir2_data_hdr *hdr,		/* data block header */ +	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */ +	struct xfs_dir2_data_unused *dup)	/* unused space */  {  	xfs_dir2_data_free_t	*dfp;		/* bestfree entry */  	xfs_dir2_data_aoff_t	off;		/* offset value needed */ -	struct xfs_dir2_data_free *bf;  #ifdef DEBUG  	int			matched;	/* matched the value */  	int			seenzero;	/* saw a 0 bestfree entry */  #endif  	off = (xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr); -	bf = xfs_dir3_data_bestfree_p(hdr);  #ifdef DEBUG  	/* @@ -399,11 +416,11 @@ xfs_dir2_data_freefind(   */  xfs_dir2_data_free_t *				/* entry inserted */  xfs_dir2_data_freeinsert( -	xfs_dir2_data_hdr_t	*hdr,		/* data block pointer */ -	xfs_dir2_data_unused_t	*dup,		/* unused space */ +	struct xfs_dir2_data_hdr *hdr,		/* data block pointer */ +	struct xfs_dir2_data_free *dfp,		/* bestfree table pointer */ +	struct xfs_dir2_data_unused *dup,	/* unused space */  	int			*loghead)	/* log the data header (out) */  { -	xfs_dir2_data_free_t	*dfp;		/* bestfree table pointer */  	xfs_dir2_data_free_t	new;		/* new bestfree entry */  	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) || @@ -411,7 +428,6 @@ xfs_dir2_data_freeinsert(  	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); -	dfp = xfs_dir3_data_bestfree_p(hdr);  	new.length = dup->length;  	new.offset = cpu_to_be16((char *)dup - (char *)hdr); @@ -444,11 +460,11 @@ xfs_dir2_data_freeinsert(   */  STATIC void  xfs_dir2_data_freeremove( -	xfs_dir2_data_hdr_t	*hdr,		/* data block header */ -	xfs_dir2_data_free_t	*dfp,		/* bestfree entry pointer */ +	struct xfs_dir2_data_hdr *hdr,		/* data block header */ +	struct xfs_dir2_data_free *bf,		/* bestfree table pointer */ +	struct xfs_dir2_data_free *dfp,		/* bestfree entry pointer */  	int			*loghead)	/* out: log data header */  { -	struct xfs_dir2_data_free *bf;  	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) || @@ -458,7 +474,6 @@ xfs_dir2_data_freeremove(  	/*  	 * It's the first entry, slide the next 2 up.  	 */ -	bf = xfs_dir3_data_bestfree_p(hdr);  	if (dfp == &bf[0]) {  		bf[0] = bf[1];  		bf[1] = bf[2]; @@ -486,9 +501,9 @@ xfs_dir2_data_freeremove(   */  void  xfs_dir2_data_freescan( -	xfs_mount_t		*mp,		/* filesystem mount point */ -	xfs_dir2_data_hdr_t	*hdr,		/* data block header */ -	int			*loghead)	/* out: log data header */ +	struct xfs_inode	*dp, +	struct xfs_dir2_data_hdr *hdr, +	int			*loghead)  {  	xfs_dir2_block_tail_t	*btp;		/* block tail */  	xfs_dir2_data_entry_t	*dep;		/* active data entry */ @@ -496,6 +511,7 @@ xfs_dir2_data_freescan(  	struct xfs_dir2_data_free *bf;  	char			*endp;		/* end of block's data */  	char			*p;		/* current entry pointer */ +	struct xfs_da_geometry	*geo = dp->i_mount->m_dir_geo;  	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) || @@ -505,19 +521,19 @@ xfs_dir2_data_freescan(  	/*  	 * Start by clearing the table.  	 */ -	bf = xfs_dir3_data_bestfree_p(hdr); +	bf = dp->d_ops->data_bestfree_p(hdr);  	memset(bf, 0, sizeof(*bf) * XFS_DIR2_DATA_FD_COUNT);  	*loghead = 1;  	/*  	 * Set up pointers.  	 */ -	p = (char *)xfs_dir3_data_entry_p(hdr); +	p = (char *)dp->d_ops->data_entry_p(hdr);  	if (hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||  	    hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)) { -		btp = xfs_dir2_block_tail_p(mp, hdr); +		btp = xfs_dir2_block_tail_p(geo, hdr);  		endp = (char *)xfs_dir2_block_leaf_p(btp);  	} else -		endp = (char *)hdr + mp->m_dirblksize; +		endp = (char *)hdr + geo->blksize;  	/*  	 * Loop over the block's entries.  	 */ @@ -529,7 +545,7 @@ xfs_dir2_data_freescan(  		if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) {  			ASSERT((char *)dup - (char *)hdr ==  			       be16_to_cpu(*xfs_dir2_data_unused_tag_p(dup))); -			xfs_dir2_data_freeinsert(hdr, dup, loghead); +			xfs_dir2_data_freeinsert(hdr, bf, dup, loghead);  			p += be16_to_cpu(dup->length);  		}  		/* @@ -538,8 +554,8 @@ xfs_dir2_data_freescan(  		else {  			dep = (xfs_dir2_data_entry_t *)p;  			ASSERT((char *)dep - (char *)hdr == -			       be16_to_cpu(*xfs_dir3_data_entry_tag_p(mp, dep))); -			p += xfs_dir3_data_entsize(mp, dep->namelen); +			       be16_to_cpu(*dp->d_ops->data_entry_tag_p(dep))); +			p += dp->d_ops->data_entsize(dep->namelen);  		}  	}  } @@ -571,8 +587,8 @@ xfs_dir3_data_init(  	/*  	 * Get the buffer set up for the block.  	 */ -	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, blkno), -1, &bp, -		XFS_DATA_FORK); +	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, blkno), +			       -1, &bp, XFS_DATA_FORK);  	if (error)  		return error;  	bp->b_ops = &xfs_dir3_data_buf_ops; @@ -594,8 +610,8 @@ xfs_dir3_data_init(  	} else  		hdr->magic = cpu_to_be32(XFS_DIR2_DATA_MAGIC); -	bf = xfs_dir3_data_bestfree_p(hdr); -	bf[0].offset = cpu_to_be16(xfs_dir3_data_entry_offset(hdr)); +	bf = dp->d_ops->data_bestfree_p(hdr); +	bf[0].offset = cpu_to_be16(dp->d_ops->data_entry_offset);  	for (i = 1; i < XFS_DIR2_DATA_FD_COUNT; i++) {  		bf[i].length = 0;  		bf[i].offset = 0; @@ -604,18 +620,18 @@ xfs_dir3_data_init(  	/*  	 * Set up an unused entry for the block's body.  	 */ -	dup = xfs_dir3_data_unused_p(hdr); +	dup = dp->d_ops->data_unused_p(hdr);  	dup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG); -	t = mp->m_dirblksize - (uint)xfs_dir3_data_entry_offset(hdr); +	t = args->geo->blksize - (uint)dp->d_ops->data_entry_offset;  	bf[0].length = cpu_to_be16(t);  	dup->length = cpu_to_be16(t);  	*xfs_dir2_data_unused_tag_p(dup) = cpu_to_be16((char *)dup - (char *)hdr);  	/*  	 * Log it and return it.  	 */ -	xfs_dir2_data_log_header(tp, bp); -	xfs_dir2_data_log_unused(tp, bp, dup); +	xfs_dir2_data_log_header(args, bp); +	xfs_dir2_data_log_unused(args, bp, dup);  	*bpp = bp;  	return 0;  } @@ -625,20 +641,19 @@ xfs_dir3_data_init(   */  void  xfs_dir2_data_log_entry( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp,  	xfs_dir2_data_entry_t	*dep)		/* data entry pointer */  {  	struct xfs_dir2_data_hdr *hdr = bp->b_addr; -	struct xfs_mount	*mp = tp->t_mountp;  	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); -	xfs_trans_log_buf(tp, bp, (uint)((char *)dep - (char *)hdr), -		(uint)((char *)(xfs_dir3_data_entry_tag_p(mp, dep) + 1) - +	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dep - (char *)hdr), +		(uint)((char *)(args->dp->d_ops->data_entry_tag_p(dep) + 1) -  		       (char *)hdr - 1));  } @@ -647,17 +662,20 @@ xfs_dir2_data_log_entry(   */  void  xfs_dir2_data_log_header( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp)  { -	xfs_dir2_data_hdr_t	*hdr = bp->b_addr; +#ifdef DEBUG +	struct xfs_dir2_data_hdr *hdr = bp->b_addr;  	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); +#endif -	xfs_trans_log_buf(tp, bp, 0, xfs_dir3_data_entry_offset(hdr) - 1); +	xfs_trans_log_buf(args->trans, bp, 0, +			  args->dp->d_ops->data_entry_offset - 1);  }  /* @@ -665,7 +683,7 @@ xfs_dir2_data_log_header(   */  void  xfs_dir2_data_log_unused( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp,  	xfs_dir2_data_unused_t	*dup)		/* data unused pointer */  { @@ -679,13 +697,13 @@ xfs_dir2_data_log_unused(  	/*  	 * Log the first part of the unused entry.  	 */ -	xfs_trans_log_buf(tp, bp, (uint)((char *)dup - (char *)hdr), +	xfs_trans_log_buf(args->trans, bp, (uint)((char *)dup - (char *)hdr),  		(uint)((char *)&dup->length + sizeof(dup->length) -  		       1 - (char *)hdr));  	/*  	 * Log the end (tag) of the unused entry.  	 */ -	xfs_trans_log_buf(tp, bp, +	xfs_trans_log_buf(args->trans, bp,  		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr),  		(uint)((char *)xfs_dir2_data_unused_tag_p(dup) - (char *)hdr +  		       sizeof(xfs_dir2_data_off_t) - 1)); @@ -697,7 +715,7 @@ xfs_dir2_data_log_unused(   */  void  xfs_dir2_data_make_free( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp,  	xfs_dir2_data_aoff_t	offset,		/* starting byte offset */  	xfs_dir2_data_aoff_t	len,		/* length in bytes */ @@ -707,14 +725,12 @@ xfs_dir2_data_make_free(  	xfs_dir2_data_hdr_t	*hdr;		/* data block pointer */  	xfs_dir2_data_free_t	*dfp;		/* bestfree pointer */  	char			*endptr;	/* end of data area */ -	xfs_mount_t		*mp;		/* filesystem mount point */  	int			needscan;	/* need to regen bestfree */  	xfs_dir2_data_unused_t	*newdup;	/* new unused entry */  	xfs_dir2_data_unused_t	*postdup;	/* unused entry after us */  	xfs_dir2_data_unused_t	*prevdup;	/* unused entry before us */  	struct xfs_dir2_data_free *bf; -	mp = tp->t_mountp;  	hdr = bp->b_addr;  	/* @@ -722,20 +738,20 @@ xfs_dir2_data_make_free(  	 */  	if (hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||  	    hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC)) -		endptr = (char *)hdr + mp->m_dirblksize; +		endptr = (char *)hdr + args->geo->blksize;  	else {  		xfs_dir2_block_tail_t	*btp;	/* block tail */  		ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_BLOCK_MAGIC) ||  			hdr->magic == cpu_to_be32(XFS_DIR3_BLOCK_MAGIC)); -		btp = xfs_dir2_block_tail_p(mp, hdr); +		btp = xfs_dir2_block_tail_p(args->geo, hdr);  		endptr = (char *)xfs_dir2_block_leaf_p(btp);  	}  	/*  	 * If this isn't the start of the block, then back up to  	 * the previous entry and see if it's free.  	 */ -	if (offset > xfs_dir3_data_entry_offset(hdr)) { +	if (offset > args->dp->d_ops->data_entry_offset) {  		__be16			*tagp;	/* tag just before us */  		tagp = (__be16 *)((char *)hdr + offset) - 1; @@ -761,15 +777,15 @@ xfs_dir2_data_make_free(  	 * Previous and following entries are both free,  	 * merge everything into a single free entry.  	 */ -	bf = xfs_dir3_data_bestfree_p(hdr); +	bf = args->dp->d_ops->data_bestfree_p(hdr);  	if (prevdup && postdup) {  		xfs_dir2_data_free_t	*dfp2;	/* another bestfree pointer */  		/*  		 * See if prevdup and/or postdup are in bestfree table.  		 */ -		dfp = xfs_dir2_data_freefind(hdr, prevdup); -		dfp2 = xfs_dir2_data_freefind(hdr, postdup); +		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup); +		dfp2 = xfs_dir2_data_freefind(hdr, bf, postdup);  		/*  		 * We need a rescan unless there are exactly 2 free entries  		 * namely our two.  Then we know what's happening, otherwise @@ -783,7 +799,7 @@ xfs_dir2_data_make_free(  		be16_add_cpu(&prevdup->length, len + be16_to_cpu(postdup->length));  		*xfs_dir2_data_unused_tag_p(prevdup) =  			cpu_to_be16((char *)prevdup - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, prevdup); +		xfs_dir2_data_log_unused(args, bp, prevdup);  		if (!needscan) {  			/*  			 * Has to be the case that entries 0 and 1 are @@ -797,12 +813,13 @@ xfs_dir2_data_make_free(  				ASSERT(dfp2 == dfp);  				dfp2 = &bf[1];  			} -			xfs_dir2_data_freeremove(hdr, dfp2, needlogp); -			xfs_dir2_data_freeremove(hdr, dfp, needlogp); +			xfs_dir2_data_freeremove(hdr, bf, dfp2, needlogp); +			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp);  			/*  			 * Now insert the new entry.  			 */ -			dfp = xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); +			dfp = xfs_dir2_data_freeinsert(hdr, bf, prevdup, +						       needlogp);  			ASSERT(dfp == &bf[0]);  			ASSERT(dfp->length == prevdup->length);  			ASSERT(!dfp[1].length); @@ -813,19 +830,19 @@ xfs_dir2_data_make_free(  	 * The entry before us is free, merge with it.  	 */  	else if (prevdup) { -		dfp = xfs_dir2_data_freefind(hdr, prevdup); +		dfp = xfs_dir2_data_freefind(hdr, bf, prevdup);  		be16_add_cpu(&prevdup->length, len);  		*xfs_dir2_data_unused_tag_p(prevdup) =  			cpu_to_be16((char *)prevdup - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, prevdup); +		xfs_dir2_data_log_unused(args, bp, prevdup);  		/*  		 * If the previous entry was in the table, the new entry  		 * is longer, so it will be in the table too.  Remove  		 * the old one and add the new one.  		 */  		if (dfp) { -			xfs_dir2_data_freeremove(hdr, dfp, needlogp); -			xfs_dir2_data_freeinsert(hdr, prevdup, needlogp); +			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); +			xfs_dir2_data_freeinsert(hdr, bf, prevdup, needlogp);  		}  		/*  		 * Otherwise we need a scan if the new entry is big enough. @@ -839,21 +856,21 @@ xfs_dir2_data_make_free(  	 * The following entry is free, merge with it.  	 */  	else if (postdup) { -		dfp = xfs_dir2_data_freefind(hdr, postdup); +		dfp = xfs_dir2_data_freefind(hdr, bf, postdup);  		newdup = (xfs_dir2_data_unused_t *)((char *)hdr + offset);  		newdup->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);  		newdup->length = cpu_to_be16(len + be16_to_cpu(postdup->length));  		*xfs_dir2_data_unused_tag_p(newdup) =  			cpu_to_be16((char *)newdup - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, newdup); +		xfs_dir2_data_log_unused(args, bp, newdup);  		/*  		 * If the following entry was in the table, the new entry  		 * is longer, so it will be in the table too.  Remove  		 * the old one and add the new one.  		 */  		if (dfp) { -			xfs_dir2_data_freeremove(hdr, dfp, needlogp); -			xfs_dir2_data_freeinsert(hdr, newdup, needlogp); +			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); +			xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);  		}  		/*  		 * Otherwise we need a scan if the new entry is big enough. @@ -872,8 +889,8 @@ xfs_dir2_data_make_free(  		newdup->length = cpu_to_be16(len);  		*xfs_dir2_data_unused_tag_p(newdup) =  			cpu_to_be16((char *)newdup - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, newdup); -		xfs_dir2_data_freeinsert(hdr, newdup, needlogp); +		xfs_dir2_data_log_unused(args, bp, newdup); +		xfs_dir2_data_freeinsert(hdr, bf, newdup, needlogp);  	}  	*needscanp = needscan;  } @@ -883,7 +900,7 @@ xfs_dir2_data_make_free(   */  void  xfs_dir2_data_use_free( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp,  	xfs_dir2_data_unused_t	*dup,		/* unused entry */  	xfs_dir2_data_aoff_t	offset,		/* starting offset to use */ @@ -913,9 +930,9 @@ xfs_dir2_data_use_free(  	/*  	 * Look up the entry in the bestfree table.  	 */ -	dfp = xfs_dir2_data_freefind(hdr, dup);  	oldlen = be16_to_cpu(dup->length); -	bf = xfs_dir3_data_bestfree_p(hdr); +	bf = args->dp->d_ops->data_bestfree_p(hdr); +	dfp = xfs_dir2_data_freefind(hdr, bf, dup);  	ASSERT(dfp || oldlen <= be16_to_cpu(bf[2].length));  	/*  	 * Check for alignment with front and back of the entry. @@ -932,7 +949,8 @@ xfs_dir2_data_use_free(  		if (dfp) {  			needscan = (bf[2].offset != 0);  			if (!needscan) -				xfs_dir2_data_freeremove(hdr, dfp, needlogp); +				xfs_dir2_data_freeremove(hdr, bf, dfp, +							 needlogp);  		}  	}  	/* @@ -945,13 +963,14 @@ xfs_dir2_data_use_free(  		newdup->length = cpu_to_be16(oldlen - len);  		*xfs_dir2_data_unused_tag_p(newdup) =  			cpu_to_be16((char *)newdup - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, newdup); +		xfs_dir2_data_log_unused(args, bp, newdup);  		/*  		 * If it was in the table, remove it and add the new one.  		 */  		if (dfp) { -			xfs_dir2_data_freeremove(hdr, dfp, needlogp); -			dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); +			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); +			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, +						       needlogp);  			ASSERT(dfp != NULL);  			ASSERT(dfp->length == newdup->length);  			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); @@ -972,13 +991,14 @@ xfs_dir2_data_use_free(  		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);  		*xfs_dir2_data_unused_tag_p(newdup) =  			cpu_to_be16((char *)newdup - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, newdup); +		xfs_dir2_data_log_unused(args, bp, newdup);  		/*  		 * If it was in the table, remove it and add the new one.  		 */  		if (dfp) { -			xfs_dir2_data_freeremove(hdr, dfp, needlogp); -			dfp = xfs_dir2_data_freeinsert(hdr, newdup, needlogp); +			xfs_dir2_data_freeremove(hdr, bf, dfp, needlogp); +			dfp = xfs_dir2_data_freeinsert(hdr, bf, newdup, +						       needlogp);  			ASSERT(dfp != NULL);  			ASSERT(dfp->length == newdup->length);  			ASSERT(be16_to_cpu(dfp->offset) == (char *)newdup - (char *)hdr); @@ -999,13 +1019,13 @@ xfs_dir2_data_use_free(  		newdup->length = cpu_to_be16(((char *)hdr + offset) - (char *)newdup);  		*xfs_dir2_data_unused_tag_p(newdup) =  			cpu_to_be16((char *)newdup - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, newdup); +		xfs_dir2_data_log_unused(args, bp, newdup);  		newdup2 = (xfs_dir2_data_unused_t *)((char *)hdr + offset + len);  		newdup2->freetag = cpu_to_be16(XFS_DIR2_DATA_FREE_TAG);  		newdup2->length = cpu_to_be16(oldlen - len - be16_to_cpu(newdup->length));  		*xfs_dir2_data_unused_tag_p(newdup2) =  			cpu_to_be16((char *)newdup2 - (char *)hdr); -		xfs_dir2_data_log_unused(tp, bp, newdup2); +		xfs_dir2_data_log_unused(args, bp, newdup2);  		/*  		 * If the old entry was in the table, we need to scan  		 * if the 3rd entry was valid, since these entries @@ -1017,9 +1037,11 @@ xfs_dir2_data_use_free(  		if (dfp) {  			needscan = (bf[2].length != 0);  			if (!needscan) { -				xfs_dir2_data_freeremove(hdr, dfp, needlogp); -				xfs_dir2_data_freeinsert(hdr, newdup, needlogp); -				xfs_dir2_data_freeinsert(hdr, newdup2, +				xfs_dir2_data_freeremove(hdr, bf, dfp, +							 needlogp); +				xfs_dir2_data_freeinsert(hdr, bf, newdup, +							 needlogp); +				xfs_dir2_data_freeinsert(hdr, bf, newdup2,  							 needlogp);  			}  		} diff --git a/fs/xfs/xfs_dir2_leaf.c b/fs/xfs/xfs_dir2_leaf.c index 1021c8356d0..fb0aad4440c 100644 --- a/fs/xfs/xfs_dir2_leaf.c +++ b/fs/xfs/xfs_dir2_leaf.c @@ -18,23 +18,21 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_bmap.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_cksum.h" @@ -43,30 +41,31 @@   */  static int xfs_dir2_leaf_lookup_int(xfs_da_args_t *args, struct xfs_buf **lbpp,  				    int *indexp, struct xfs_buf **dbpp); -static void xfs_dir3_leaf_log_bests(struct xfs_trans *tp, struct xfs_buf *bp, -				    int first, int last); -static void xfs_dir3_leaf_log_tail(struct xfs_trans *tp, struct xfs_buf *bp); +static void xfs_dir3_leaf_log_bests(struct xfs_da_args *args, +				    struct xfs_buf *bp, int first, int last); +static void xfs_dir3_leaf_log_tail(struct xfs_da_args *args, +				   struct xfs_buf *bp);  /*   * Check the internal consistency of a leaf1 block.   * Pop an assert if something is wrong.   */  #ifdef DEBUG -#define	xfs_dir3_leaf_check(mp, bp) \ +#define	xfs_dir3_leaf_check(dp, bp) \  do { \ -	if (!xfs_dir3_leaf1_check((mp), (bp))) \ +	if (!xfs_dir3_leaf1_check((dp), (bp))) \  		ASSERT(0); \  } while (0);  STATIC bool  xfs_dir3_leaf1_check( -	struct xfs_mount	*mp, +	struct xfs_inode	*dp,  	struct xfs_buf		*bp)  {  	struct xfs_dir2_leaf	*leaf = bp->b_addr;  	struct xfs_dir3_icleaf_hdr leafhdr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	if (leafhdr.magic == XFS_DIR3_LEAF1_MAGIC) {  		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -75,71 +74,16 @@ xfs_dir3_leaf1_check(  	} else if (leafhdr.magic != XFS_DIR2_LEAF1_MAGIC)  		return false; -	return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); +	return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);  }  #else -#define	xfs_dir3_leaf_check(mp, bp) +#define	xfs_dir3_leaf_check(dp, bp)  #endif -void -xfs_dir3_leaf_hdr_from_disk( -	struct xfs_dir3_icleaf_hdr	*to, -	struct xfs_dir2_leaf		*from) -{ -	if (from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) || -	    from->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC)) { -		to->forw = be32_to_cpu(from->hdr.info.forw); -		to->back = be32_to_cpu(from->hdr.info.back); -		to->magic = be16_to_cpu(from->hdr.info.magic); -		to->count = be16_to_cpu(from->hdr.count); -		to->stale = be16_to_cpu(from->hdr.stale); -	} else { -		struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from; - -		to->forw = be32_to_cpu(hdr3->info.hdr.forw); -		to->back = be32_to_cpu(hdr3->info.hdr.back); -		to->magic = be16_to_cpu(hdr3->info.hdr.magic); -		to->count = be16_to_cpu(hdr3->count); -		to->stale = be16_to_cpu(hdr3->stale); -	} - -	ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC || -	       to->magic == XFS_DIR3_LEAF1_MAGIC || -	       to->magic == XFS_DIR2_LEAFN_MAGIC || -	       to->magic == XFS_DIR3_LEAFN_MAGIC); -} - -void -xfs_dir3_leaf_hdr_to_disk( -	struct xfs_dir2_leaf		*to, -	struct xfs_dir3_icleaf_hdr	*from) -{ -	ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC || -	       from->magic == XFS_DIR3_LEAF1_MAGIC || -	       from->magic == XFS_DIR2_LEAFN_MAGIC || -	       from->magic == XFS_DIR3_LEAFN_MAGIC); - -	if (from->magic == XFS_DIR2_LEAF1_MAGIC || -	    from->magic == XFS_DIR2_LEAFN_MAGIC) { -		to->hdr.info.forw = cpu_to_be32(from->forw); -		to->hdr.info.back = cpu_to_be32(from->back); -		to->hdr.info.magic = cpu_to_be16(from->magic); -		to->hdr.count = cpu_to_be16(from->count); -		to->hdr.stale = cpu_to_be16(from->stale); -	} else { -		struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to; - -		hdr3->info.hdr.forw = cpu_to_be32(from->forw); -		hdr3->info.hdr.back = cpu_to_be32(from->back); -		hdr3->info.hdr.magic = cpu_to_be16(from->magic); -		hdr3->count = cpu_to_be16(from->count); -		hdr3->stale = cpu_to_be16(from->stale); -	} -} -  bool  xfs_dir3_leaf_check_int(  	struct xfs_mount	*mp, +	struct xfs_inode	*dp,  	struct xfs_dir3_icleaf_hdr *hdr,  	struct xfs_dir2_leaf	*leaf)  { @@ -147,16 +91,30 @@ xfs_dir3_leaf_check_int(  	xfs_dir2_leaf_tail_t	*ltp;  	int			stale;  	int			i; +	const struct xfs_dir_ops *ops; +	struct xfs_dir3_icleaf_hdr leafhdr; +	struct xfs_da_geometry	*geo = mp->m_dir_geo; + +	/* +	 * we can be passed a null dp here from a verifier, so we need to go the +	 * hard way to get them. +	 */ +	ops = xfs_dir_get_ops(mp, dp); + +	if (!hdr) { +		ops->leaf_hdr_from_disk(&leafhdr, leaf); +		hdr = &leafhdr; +	} -	ents = xfs_dir3_leaf_ents_p(leaf); -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); +	ents = ops->leaf_ents_p(leaf); +	ltp = xfs_dir2_leaf_tail_p(geo, leaf);  	/*  	 * XXX (dgc): This value is not restrictive enough.  	 * Should factor in the size of the bests table as well.  	 * We can deduce a value for that from di_size.  	 */ -	if (hdr->count > xfs_dir3_max_leaf_ents(mp, leaf)) +	if (hdr->count > ops->leaf_max_ents(geo))  		return false;  	/* Leaves and bests don't overlap in leaf format. */ @@ -192,7 +150,6 @@ xfs_dir3_leaf_verify(  {  	struct xfs_mount	*mp = bp->b_target->bt_mount;  	struct xfs_dir2_leaf	*leaf = bp->b_addr; -	struct xfs_dir3_icleaf_hdr leafhdr;  	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); @@ -214,8 +171,7 @@ xfs_dir3_leaf_verify(  			return false;  	} -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); +	return xfs_dir3_leaf_check_int(mp, NULL, NULL, leaf);  }  static void @@ -225,13 +181,14 @@ __read_verify(  {  	struct xfs_mount	*mp = bp->b_target->bt_mount; -	if ((xfs_sb_version_hascrc(&mp->m_sb) && -	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					  XFS_DIR3_LEAF_CRC_OFF)) || -	    !xfs_dir3_leaf_verify(bp, magic)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (xfs_sb_version_hascrc(&mp->m_sb) && +	     !xfs_buf_verify_cksum(bp, XFS_DIR3_LEAF_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_dir3_leaf_verify(bp, magic))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -244,8 +201,8 @@ __write_verify(  	struct xfs_dir3_leaf_hdr *hdr3 = bp->b_addr;  	if (!xfs_dir3_leaf_verify(bp, magic)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -255,7 +212,7 @@ __write_verify(  	if (bip)  		hdr3->info.lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_LEAF_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_DIR3_LEAF_CRC_OFF);  }  static void @@ -368,7 +325,7 @@ xfs_dir3_leaf_init(  	if (type == XFS_DIR2_LEAF1_MAGIC) {  		struct xfs_dir2_leaf_tail *ltp; -		ltp = xfs_dir2_leaf_tail_p(mp, leaf); +		ltp = xfs_dir2_leaf_tail_p(mp->m_dir_geo, leaf);  		ltp->bestcount = 0;  		bp->b_ops = &xfs_dir3_leaf1_buf_ops;  		xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DIR_LEAF1_BUF); @@ -392,18 +349,18 @@ xfs_dir3_leaf_get_buf(  	int			error;  	ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC); -	ASSERT(bno >= XFS_DIR2_LEAF_FIRSTDB(mp) && -	       bno < XFS_DIR2_FREE_FIRSTDB(mp)); +	ASSERT(bno >= xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET) && +	       bno < xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET)); -	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, bno), -1, &bp, -			       XFS_DATA_FORK); +	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, bno), +			       -1, &bp, XFS_DATA_FORK);  	if (error)  		return error;  	xfs_dir3_leaf_init(mp, tp, bp, dp->i_ino, magic); -	xfs_dir3_leaf_log_header(tp, bp); +	xfs_dir3_leaf_log_header(args, bp);  	if (magic == XFS_DIR2_LEAF1_MAGIC) -		xfs_dir3_leaf_log_tail(tp, bp); +		xfs_dir3_leaf_log_tail(args, bp);  	*bpp = bp;  	return 0;  } @@ -448,8 +405,8 @@ xfs_dir2_block_to_leaf(  	if ((error = xfs_da_grow_inode(args, &blkno))) {  		return error;  	} -	ldb = xfs_dir2_da_to_db(mp, blkno); -	ASSERT(ldb == XFS_DIR2_LEAF_FIRSTDB(mp)); +	ldb = xfs_dir2_da_to_db(args->geo, blkno); +	ASSERT(ldb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_LEAF_OFFSET));  	/*  	 * Initialize the leaf block, get a buffer for it.  	 */ @@ -460,35 +417,35 @@ xfs_dir2_block_to_leaf(  	leaf = lbp->b_addr;  	hdr = dbp->b_addr;  	xfs_dir3_data_check(dp, dbp); -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp); -	bf = xfs_dir3_data_bestfree_p(hdr); -	ents = xfs_dir3_leaf_ents_p(leaf); +	bf = dp->d_ops->data_bestfree_p(hdr); +	ents = dp->d_ops->leaf_ents_p(leaf);  	/*  	 * Set the counts in the leaf header.  	 */ -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	leafhdr.count = be32_to_cpu(btp->count);  	leafhdr.stale = be32_to_cpu(btp->stale); -	xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); -	xfs_dir3_leaf_log_header(tp, lbp); +	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); +	xfs_dir3_leaf_log_header(args, lbp);  	/*  	 * Could compact these but I think we always do the conversion  	 * after squeezing out stale entries.  	 */  	memcpy(ents, blp, be32_to_cpu(btp->count) * sizeof(xfs_dir2_leaf_entry_t)); -	xfs_dir3_leaf_log_ents(tp, lbp, 0, leafhdr.count - 1); +	xfs_dir3_leaf_log_ents(args, lbp, 0, leafhdr.count - 1);  	needscan = 0;  	needlog = 1;  	/*  	 * Make the space formerly occupied by the leaf entries and block  	 * tail be free.  	 */ -	xfs_dir2_data_make_free(tp, dbp, +	xfs_dir2_data_make_free(args, dbp,  		(xfs_dir2_data_aoff_t)((char *)blp - (char *)hdr), -		(xfs_dir2_data_aoff_t)((char *)hdr + mp->m_dirblksize - +		(xfs_dir2_data_aoff_t)((char *)hdr + args->geo->blksize -  				       (char *)blp),  		&needlog, &needscan);  	/* @@ -502,11 +459,11 @@ xfs_dir2_block_to_leaf(  		hdr->magic = cpu_to_be32(XFS_DIR3_DATA_MAGIC);  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	/*  	 * Set up leaf tail and bests table.  	 */ -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);  	ltp->bestcount = cpu_to_be32(1);  	bestsp = xfs_dir2_leaf_bests_p(ltp);  	bestsp[0] =  bf[0].length; @@ -514,10 +471,10 @@ xfs_dir2_block_to_leaf(  	 * Log the data header and leaf bests table.  	 */  	if (needlog) -		xfs_dir2_data_log_header(tp, dbp); -	xfs_dir3_leaf_check(mp, lbp); +		xfs_dir2_data_log_header(args, dbp); +	xfs_dir3_leaf_check(dp, lbp);  	xfs_dir3_data_check(dp, dbp); -	xfs_dir3_leaf_log_bests(tp, lbp, 0, 0); +	xfs_dir3_leaf_log_bests(args, lbp, 0, 0);  	return 0;  } @@ -686,7 +643,7 @@ xfs_dir2_leaf_addname(  	tp = args->trans;  	mp = dp->i_mount; -	error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); +	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);  	if (error)  		return error; @@ -698,11 +655,11 @@ xfs_dir2_leaf_addname(  	 */  	index = xfs_dir2_leaf_search_hash(args, lbp);  	leaf = lbp->b_addr; -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	bestsp = xfs_dir2_leaf_bests_p(ltp); -	length = xfs_dir3_data_entsize(mp, args->namelen); +	length = dp->d_ops->data_entsize(args->namelen);  	/*  	 * See if there are any entries with the same hash value @@ -715,7 +672,7 @@ xfs_dir2_leaf_addname(  	     index++, lep++) {  		if (be32_to_cpu(lep->address) == XFS_DIR2_NULL_DATAPTR)  			continue; -		i = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); +		i = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));  		ASSERT(i < be32_to_cpu(ltp->bestcount));  		ASSERT(bestsp[i] != cpu_to_be16(NULLDATAOFF));  		if (be16_to_cpu(bestsp[i]) >= length) { @@ -855,16 +812,17 @@ xfs_dir2_leaf_addname(  			memmove(&bestsp[0], &bestsp[1],  				be32_to_cpu(ltp->bestcount) * sizeof(bestsp[0]));  			be32_add_cpu(<p->bestcount, 1); -			xfs_dir3_leaf_log_tail(tp, lbp); -			xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); +			xfs_dir3_leaf_log_tail(args, lbp); +			xfs_dir3_leaf_log_bests(args, lbp, 0, +						be32_to_cpu(ltp->bestcount) - 1);  		}  		/*  		 * If we're filling in a previously empty block just log it.  		 */  		else -			xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); +			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);  		hdr = dbp->b_addr; -		bf = xfs_dir3_data_bestfree_p(hdr); +		bf = dp->d_ops->data_bestfree_p(hdr);  		bestsp[use_block] = bf[0].length;  		grown = 1;  	} else { @@ -873,14 +831,14 @@ xfs_dir2_leaf_addname(  		 * Just read that one in.  		 */  		error = xfs_dir3_data_read(tp, dp, -					   xfs_dir2_db_to_da(mp, use_block), -					   -1, &dbp); +				   xfs_dir2_db_to_da(args->geo, use_block), +				   -1, &dbp);  		if (error) {  			xfs_trans_brelse(tp, lbp);  			return error;  		}  		hdr = dbp->b_addr; -		bf = xfs_dir3_data_bestfree_p(hdr); +		bf = dp->d_ops->data_bestfree_p(hdr);  		grown = 0;  	}  	/* @@ -893,7 +851,7 @@ xfs_dir2_leaf_addname(  	/*  	 * Mark the initial part of our freespace in use for the new entry.  	 */ -	xfs_dir2_data_use_free(tp, dbp, dup, +	xfs_dir2_data_use_free(args, dbp, dup,  		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,  		&needlog, &needscan);  	/* @@ -903,20 +861,20 @@ xfs_dir2_leaf_addname(  	dep->inumber = cpu_to_be64(args->inumber);  	dep->namelen = args->namelen;  	memcpy(dep->name, args->name, dep->namelen); -	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); -	tagp = xfs_dir3_data_entry_tag_p(mp, dep); +	dp->d_ops->data_put_ftype(dep, args->filetype); +	tagp = dp->d_ops->data_entry_tag_p(dep);  	*tagp = cpu_to_be16((char *)dep - (char *)hdr);  	/*  	 * Need to scan fix up the bestfree table.  	 */  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	/*  	 * Need to log the data block's header.  	 */  	if (needlog) -		xfs_dir2_data_log_header(tp, dbp); -	xfs_dir2_data_log_entry(tp, dbp, dep); +		xfs_dir2_data_log_header(args, dbp); +	xfs_dir2_data_log_entry(args, dbp, dep);  	/*  	 * If the bests table needs to be changed, do it.  	 * Log the change unless we've already done that. @@ -924,7 +882,7 @@ xfs_dir2_leaf_addname(  	if (be16_to_cpu(bestsp[use_block]) != be16_to_cpu(bf[0].length)) {  		bestsp[use_block] = bf[0].length;  		if (!grown) -			xfs_dir3_leaf_log_bests(tp, lbp, use_block, use_block); +			xfs_dir3_leaf_log_bests(args, lbp, use_block, use_block);  	}  	lep = xfs_dir3_leaf_find_entry(&leafhdr, ents, index, compact, lowstale, @@ -934,15 +892,16 @@ xfs_dir2_leaf_addname(  	 * Fill in the new leaf entry.  	 */  	lep->hashval = cpu_to_be32(args->hashval); -	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, use_block, +	lep->address = cpu_to_be32( +				xfs_dir2_db_off_to_dataptr(args->geo, use_block,  				be16_to_cpu(*tagp)));  	/*  	 * Log the leaf fields and give up the buffers.  	 */ -	xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); -	xfs_dir3_leaf_log_header(tp, lbp); -	xfs_dir3_leaf_log_ents(tp, lbp, lfloglow, lfloghigh); -	xfs_dir3_leaf_check(mp, lbp); +	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); +	xfs_dir3_leaf_log_header(args, lbp); +	xfs_dir3_leaf_log_ents(args, lbp, lfloglow, lfloghigh); +	xfs_dir3_leaf_check(dp, lbp);  	xfs_dir3_data_check(dp, dbp);  	return 0;  } @@ -962,6 +921,7 @@ xfs_dir3_leaf_compact(  	int		loglow;		/* first leaf entry to log */  	int		to;		/* target leaf index */  	struct xfs_dir2_leaf_entry *ents; +	struct xfs_inode *dp = args->dp;  	leaf = bp->b_addr;  	if (!leafhdr->stale) @@ -970,7 +930,7 @@ xfs_dir3_leaf_compact(  	/*  	 * Compress out the stale entries in place.  	 */ -	ents = xfs_dir3_leaf_ents_p(leaf); +	ents = dp->d_ops->leaf_ents_p(leaf);  	for (from = to = 0, loglow = -1; from < leafhdr->count; from++) {  		if (ents[from].address == cpu_to_be32(XFS_DIR2_NULL_DATAPTR))  			continue; @@ -991,10 +951,10 @@ xfs_dir3_leaf_compact(  	leafhdr->count -= leafhdr->stale;  	leafhdr->stale = 0; -	xfs_dir3_leaf_hdr_to_disk(leaf, leafhdr); -	xfs_dir3_leaf_log_header(args->trans, bp); +	dp->d_ops->leaf_hdr_to_disk(leaf, leafhdr); +	xfs_dir3_leaf_log_header(args, bp);  	if (loglow != -1) -		xfs_dir3_leaf_log_ents(args->trans, bp, loglow, to - 1); +		xfs_dir3_leaf_log_ents(args, bp, loglow, to - 1);  }  /* @@ -1096,7 +1056,7 @@ xfs_dir3_leaf_compact_x1(   */  static void  xfs_dir3_leaf_log_bests( -	xfs_trans_t		*tp,		/* transaction pointer */ +	struct xfs_da_args	*args,  	struct xfs_buf		*bp,		/* leaf buffer */  	int			first,		/* first entry to log */  	int			last)		/* last entry to log */ @@ -1109,10 +1069,11 @@ xfs_dir3_leaf_log_bests(  	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC)); -	ltp = xfs_dir2_leaf_tail_p(tp->t_mountp, leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);  	firstb = xfs_dir2_leaf_bests_p(ltp) + first;  	lastb = xfs_dir2_leaf_bests_p(ltp) + last; -	xfs_trans_log_buf(tp, bp, (uint)((char *)firstb - (char *)leaf), +	xfs_trans_log_buf(args->trans, bp, +		(uint)((char *)firstb - (char *)leaf),  		(uint)((char *)lastb - (char *)leaf + sizeof(*lastb) - 1));  } @@ -1121,10 +1082,10 @@ xfs_dir3_leaf_log_bests(   */  void  xfs_dir3_leaf_log_ents( -	xfs_trans_t		*tp,		/* transaction pointer */ -	struct xfs_buf		*bp,		/* leaf buffer */ -	int			first,		/* first entry to log */ -	int			last)		/* last entry to log */ +	struct xfs_da_args	*args, +	struct xfs_buf		*bp, +	int			first, +	int			last)  {  	xfs_dir2_leaf_entry_t	*firstlep;	/* pointer to first entry */  	xfs_dir2_leaf_entry_t	*lastlep;	/* pointer to last entry */ @@ -1136,10 +1097,11 @@ xfs_dir3_leaf_log_ents(  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); -	ents = xfs_dir3_leaf_ents_p(leaf); +	ents = args->dp->d_ops->leaf_ents_p(leaf);  	firstlep = &ents[first];  	lastlep = &ents[last]; -	xfs_trans_log_buf(tp, bp, (uint)((char *)firstlep - (char *)leaf), +	xfs_trans_log_buf(args->trans, bp, +		(uint)((char *)firstlep - (char *)leaf),  		(uint)((char *)lastlep - (char *)leaf + sizeof(*lastlep) - 1));  } @@ -1148,7 +1110,7 @@ xfs_dir3_leaf_log_ents(   */  void  xfs_dir3_leaf_log_header( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp)  {  	struct xfs_dir2_leaf	*leaf = bp->b_addr; @@ -1158,8 +1120,9 @@ xfs_dir3_leaf_log_header(  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); -	xfs_trans_log_buf(tp, bp, (uint)((char *)&leaf->hdr - (char *)leaf), -			  xfs_dir3_leaf_hdr_size(leaf) - 1); +	xfs_trans_log_buf(args->trans, bp, +			  (uint)((char *)&leaf->hdr - (char *)leaf), +			  args->dp->d_ops->leaf_hdr_size - 1);  }  /* @@ -1167,21 +1130,20 @@ xfs_dir3_leaf_log_header(   */  STATIC void  xfs_dir3_leaf_log_tail( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp)  {  	struct xfs_dir2_leaf	*leaf = bp->b_addr;  	xfs_dir2_leaf_tail_t	*ltp;		/* leaf tail structure */ -	struct xfs_mount	*mp = tp->t_mountp;  	ASSERT(leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAF1_MAGIC) ||  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAF1_MAGIC) ||  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) ||  	       leaf->hdr.info.magic == cpu_to_be16(XFS_DIR3_LEAFN_MAGIC)); -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); -	xfs_trans_log_buf(tp, bp, (uint)((char *)ltp - (char *)leaf), -		(uint)(mp->m_dirblksize - 1)); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf); +	xfs_trans_log_buf(args->trans, bp, (uint)((char *)ltp - (char *)leaf), +		(uint)(args->geo->blksize - 1));  }  /* @@ -1214,9 +1176,9 @@ xfs_dir2_leaf_lookup(  	}  	tp = args->trans;  	dp = args->dp; -	xfs_dir3_leaf_check(dp->i_mount, lbp); +	xfs_dir3_leaf_check(dp, lbp);  	leaf = lbp->b_addr; -	ents = xfs_dir3_leaf_ents_p(leaf); +	ents = dp->d_ops->leaf_ents_p(leaf);  	/*  	 * Get to the leaf entry and contained data entry address.  	 */ @@ -1227,12 +1189,12 @@ xfs_dir2_leaf_lookup(  	 */  	dep = (xfs_dir2_data_entry_t *)  	      ((char *)dbp->b_addr + -	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); +	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));  	/*  	 * Return the found inode number & CI name if appropriate  	 */  	args->inumber = be64_to_cpu(dep->inumber); -	args->filetype = xfs_dir3_dirent_get_ftype(dp->i_mount, dep); +	args->filetype = dp->d_ops->data_get_ftype(dep);  	error = xfs_dir_cilookup_result(args, dep->name, dep->namelen);  	xfs_trans_brelse(tp, dbp);  	xfs_trans_brelse(tp, lbp); @@ -1273,15 +1235,15 @@ xfs_dir2_leaf_lookup_int(  	tp = args->trans;  	mp = dp->i_mount; -	error = xfs_dir3_leaf_read(tp, dp, mp->m_dirleafblk, -1, &lbp); +	error = xfs_dir3_leaf_read(tp, dp, args->geo->leafblk, -1, &lbp);  	if (error)  		return error;  	*lbpp = lbp;  	leaf = lbp->b_addr; -	xfs_dir3_leaf_check(mp, lbp); -	ents = xfs_dir3_leaf_ents_p(leaf); -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	xfs_dir3_leaf_check(dp, lbp); +	ents = dp->d_ops->leaf_ents_p(leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	/*  	 * Look for the first leaf entry with our hash value. @@ -1302,7 +1264,8 @@ xfs_dir2_leaf_lookup_int(  		/*  		 * Get the new data block number.  		 */ -		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); +		newdb = xfs_dir2_dataptr_to_db(args->geo, +					       be32_to_cpu(lep->address));  		/*  		 * If it's not the same as the old data block number,  		 * need to pitch the old one and read the new one. @@ -1311,8 +1274,8 @@ xfs_dir2_leaf_lookup_int(  			if (dbp)  				xfs_trans_brelse(tp, dbp);  			error = xfs_dir3_data_read(tp, dp, -						   xfs_dir2_db_to_da(mp, newdb), -						   -1, &dbp); +					   xfs_dir2_db_to_da(args->geo, newdb), +					   -1, &dbp);  			if (error) {  				xfs_trans_brelse(tp, lbp);  				return error; @@ -1323,7 +1286,8 @@ xfs_dir2_leaf_lookup_int(  		 * Point to the data entry.  		 */  		dep = (xfs_dir2_data_entry_t *)((char *)dbp->b_addr + -			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); +			xfs_dir2_dataptr_to_off(args->geo, +						be32_to_cpu(lep->address)));  		/*  		 * Compare name and if it's an exact match, return the index  		 * and buffer. If it's the first case-insensitive match, store @@ -1352,8 +1316,8 @@ xfs_dir2_leaf_lookup_int(  		if (cidb != curdb) {  			xfs_trans_brelse(tp, dbp);  			error = xfs_dir3_data_read(tp, dp, -						   xfs_dir2_db_to_da(mp, cidb), -						   -1, &dbp); +					   xfs_dir2_db_to_da(args->geo, cidb), +					   -1, &dbp);  			if (error) {  				xfs_trans_brelse(tp, lbp);  				return error; @@ -1415,60 +1379,60 @@ xfs_dir2_leaf_removename(  	leaf = lbp->b_addr;  	hdr = dbp->b_addr;  	xfs_dir3_data_check(dp, dbp); -	bf = xfs_dir3_data_bestfree_p(hdr); -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); +	bf = dp->d_ops->data_bestfree_p(hdr); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf);  	/*  	 * Point to the leaf entry, use that to point to the data entry.  	 */  	lep = &ents[index]; -	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); -	dep = (xfs_dir2_data_entry_t *) -	      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); +	db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address)); +	dep = (xfs_dir2_data_entry_t *)((char *)hdr + +		xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));  	needscan = needlog = 0;  	oldbest = be16_to_cpu(bf[0].length); -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);  	bestsp = xfs_dir2_leaf_bests_p(ltp);  	ASSERT(be16_to_cpu(bestsp[db]) == oldbest);  	/*  	 * Mark the former data entry unused.  	 */ -	xfs_dir2_data_make_free(tp, dbp, +	xfs_dir2_data_make_free(args, dbp,  		(xfs_dir2_data_aoff_t)((char *)dep - (char *)hdr), -		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); +		dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);  	/*  	 * We just mark the leaf entry stale by putting a null in it.  	 */  	leafhdr.stale++; -	xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); -	xfs_dir3_leaf_log_header(tp, lbp); +	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); +	xfs_dir3_leaf_log_header(args, lbp);  	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); -	xfs_dir3_leaf_log_ents(tp, lbp, index, index); +	xfs_dir3_leaf_log_ents(args, lbp, index, index);  	/*  	 * Scan the freespace in the data block again if necessary,  	 * log the data block header if necessary.  	 */  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	if (needlog) -		xfs_dir2_data_log_header(tp, dbp); +		xfs_dir2_data_log_header(args, dbp);  	/*  	 * If the longest freespace in the data block has changed,  	 * put the new value in the bests table and log that.  	 */  	if (be16_to_cpu(bf[0].length) != oldbest) {  		bestsp[db] = bf[0].length; -		xfs_dir3_leaf_log_bests(tp, lbp, db, db); +		xfs_dir3_leaf_log_bests(args, lbp, db, db);  	}  	xfs_dir3_data_check(dp, dbp);  	/*  	 * If the data block is now empty then get rid of the data block.  	 */  	if (be16_to_cpu(bf[0].length) == -			mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)) { -		ASSERT(db != mp->m_dirdatablk); +			args->geo->blksize - dp->d_ops->data_entry_offset) { +		ASSERT(db != args->geo->datablk);  		if ((error = xfs_dir2_shrink_inode(args, db, dbp))) {  			/*  			 * Nope, can't get rid of it because it caused @@ -1478,7 +1442,7 @@ xfs_dir2_leaf_removename(  			 */  			if (error == ENOSPC && args->total == 0)  				error = 0; -			xfs_dir3_leaf_check(mp, lbp); +			xfs_dir3_leaf_check(dp, lbp);  			return error;  		}  		dbp = NULL; @@ -1501,18 +1465,19 @@ xfs_dir2_leaf_removename(  			memmove(&bestsp[db - i], bestsp,  				(be32_to_cpu(ltp->bestcount) - (db - i)) * sizeof(*bestsp));  			be32_add_cpu(<p->bestcount, -(db - i)); -			xfs_dir3_leaf_log_tail(tp, lbp); -			xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); +			xfs_dir3_leaf_log_tail(args, lbp); +			xfs_dir3_leaf_log_bests(args, lbp, 0, +						be32_to_cpu(ltp->bestcount) - 1);  		} else  			bestsp[db] = cpu_to_be16(NULLDATAOFF);  	}  	/*  	 * If the data block was not the first one, drop it.  	 */ -	else if (db != mp->m_dirdatablk) +	else if (db != args->geo->datablk)  		dbp = NULL; -	xfs_dir3_leaf_check(mp, lbp); +	xfs_dir3_leaf_check(dp, lbp);  	/*  	 * See if we can convert to block form.  	 */ @@ -1547,7 +1512,7 @@ xfs_dir2_leaf_replace(  	}  	dp = args->dp;  	leaf = lbp->b_addr; -	ents = xfs_dir3_leaf_ents_p(leaf); +	ents = dp->d_ops->leaf_ents_p(leaf);  	/*  	 * Point to the leaf entry, get data address from it.  	 */ @@ -1557,16 +1522,16 @@ xfs_dir2_leaf_replace(  	 */  	dep = (xfs_dir2_data_entry_t *)  	      ((char *)dbp->b_addr + -	       xfs_dir2_dataptr_to_off(dp->i_mount, be32_to_cpu(lep->address))); +	       xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address)));  	ASSERT(args->inumber != be64_to_cpu(dep->inumber));  	/*  	 * Put the new inode number in, log it.  	 */  	dep->inumber = cpu_to_be64(args->inumber); -	xfs_dir3_dirent_put_ftype(dp->i_mount, dep, args->filetype); +	dp->d_ops->data_put_ftype(dep, args->filetype);  	tp = args->trans; -	xfs_dir2_data_log_entry(tp, dbp, dep); -	xfs_dir3_leaf_check(dp->i_mount, lbp); +	xfs_dir2_data_log_entry(args, dbp, dep); +	xfs_dir3_leaf_check(dp, lbp);  	xfs_trans_brelse(tp, lbp);  	return 0;  } @@ -1592,8 +1557,8 @@ xfs_dir2_leaf_search_hash(  	struct xfs_dir3_icleaf_hdr leafhdr;  	leaf = lbp->b_addr; -	ents = xfs_dir3_leaf_ents_p(leaf); -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	ents = args->dp->d_ops->leaf_ents_p(leaf); +	args->dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	/*  	 * Note, the table cannot be empty, so we have to go through the loop. @@ -1651,22 +1616,23 @@ xfs_dir2_leaf_trim_data(  	/*  	 * Read the offending data block.  We need its buffer.  	 */ -	error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, db), -1, &dbp); +	error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(args->geo, db), +				   -1, &dbp);  	if (error)  		return error;  	leaf = lbp->b_addr; -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);  #ifdef DEBUG  {  	struct xfs_dir2_data_hdr *hdr = dbp->b_addr; -	struct xfs_dir2_data_free *bf = xfs_dir3_data_bestfree_p(hdr); +	struct xfs_dir2_data_free *bf = dp->d_ops->data_bestfree_p(hdr);  	ASSERT(hdr->magic == cpu_to_be32(XFS_DIR2_DATA_MAGIC) ||  	       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));  	ASSERT(be16_to_cpu(bf[0].length) == -	       mp->m_dirblksize - xfs_dir3_data_entry_offset(hdr)); +	       args->geo->blksize - dp->d_ops->data_entry_offset);  	ASSERT(db == be32_to_cpu(ltp->bestcount) - 1);  }  #endif @@ -1685,8 +1651,8 @@ xfs_dir2_leaf_trim_data(  	bestsp = xfs_dir2_leaf_bests_p(ltp);  	be32_add_cpu(<p->bestcount, -1);  	memmove(&bestsp[1], &bestsp[0], be32_to_cpu(ltp->bestcount) * sizeof(*bestsp)); -	xfs_dir3_leaf_log_tail(tp, lbp); -	xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); +	xfs_dir3_leaf_log_tail(args, lbp); +	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1);  	return 0;  } @@ -1750,22 +1716,22 @@ xfs_dir2_node_to_leaf(  	/*  	 * Get the last offset in the file.  	 */ -	if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) { +	if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK))) {  		return error;  	} -	fo -= mp->m_dirblkfsbs; +	fo -= args->geo->fsbcount;  	/*  	 * If there are freespace blocks other than the first one,  	 * take this opportunity to remove trailing empty freespace blocks  	 * that may have been left behind during no-space-reservation  	 * operations.  	 */ -	while (fo > mp->m_dirfreeblk) { +	while (fo > args->geo->freeblk) {  		if ((error = xfs_dir2_node_trim_free(args, fo, &rval))) {  			return error;  		}  		if (rval) -			fo -= mp->m_dirblkfsbs; +			fo -= args->geo->fsbcount;  		else  			return 0;  	} @@ -1778,11 +1744,11 @@ xfs_dir2_node_to_leaf(  	/*  	 * If it's not the single leaf block, give up.  	 */ -	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + mp->m_dirblksize) +	if (XFS_FSB_TO_B(mp, fo) > XFS_DIR2_LEAF_OFFSET + args->geo->blksize)  		return 0;  	lbp = state->path.blk[0].bp;  	leaf = lbp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||  	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -1790,11 +1756,11 @@ xfs_dir2_node_to_leaf(  	/*  	 * Read the freespace block.  	 */ -	error = xfs_dir2_free_read(tp, dp,  mp->m_dirfreeblk, &fbp); +	error = xfs_dir2_free_read(tp, dp,  args->geo->freeblk, &fbp);  	if (error)  		return error;  	free = fbp->b_addr; -	xfs_dir3_free_hdr_from_disk(&freehdr, free); +	dp->d_ops->free_hdr_from_disk(&freehdr, free);  	ASSERT(!freehdr.firstdb); @@ -1802,7 +1768,7 @@ xfs_dir2_node_to_leaf(  	 * Now see if the leafn and free data will fit in a leaf1.  	 * If not, release the buffer and give up.  	 */ -	if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > mp->m_dirblksize) { +	if (xfs_dir3_leaf_size(&leafhdr, freehdr.nvalid) > args->geo->blksize) {  		xfs_trans_brelse(tp, fbp);  		return 0;  	} @@ -1822,25 +1788,27 @@ xfs_dir2_node_to_leaf(  	/*  	 * Set up the leaf tail from the freespace block.  	 */ -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);  	ltp->bestcount = cpu_to_be32(freehdr.nvalid);  	/*  	 * Set up the leaf bests table.  	 */ -	memcpy(xfs_dir2_leaf_bests_p(ltp), xfs_dir3_free_bests_p(mp, free), +	memcpy(xfs_dir2_leaf_bests_p(ltp), dp->d_ops->free_bests_p(free),  		freehdr.nvalid * sizeof(xfs_dir2_data_off_t)); -	xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); -	xfs_dir3_leaf_log_header(tp, lbp); -	xfs_dir3_leaf_log_bests(tp, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); -	xfs_dir3_leaf_log_tail(tp, lbp); -	xfs_dir3_leaf_check(mp, lbp); +	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); +	xfs_dir3_leaf_log_header(args, lbp); +	xfs_dir3_leaf_log_bests(args, lbp, 0, be32_to_cpu(ltp->bestcount) - 1); +	xfs_dir3_leaf_log_tail(args, lbp); +	xfs_dir3_leaf_check(dp, lbp);  	/*  	 * Get rid of the freespace block.  	 */ -	error = xfs_dir2_shrink_inode(args, XFS_DIR2_FREE_FIRSTDB(mp), fbp); +	error = xfs_dir2_shrink_inode(args, +			xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET), +			fbp);  	if (error) {  		/*  		 * This can't fail here because it can only happen when diff --git a/fs/xfs/xfs_dir2_node.c b/fs/xfs/xfs_dir2_node.c index 4c3dba7ffb7..da43d304fca 100644 --- a/fs/xfs/xfs_dir2_node.c +++ b/fs/xfs/xfs_dir2_node.c @@ -18,22 +18,21 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_bmap.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_cksum.h" @@ -55,21 +54,21 @@ static int xfs_dir2_node_addname_int(xfs_da_args_t *args,   * Check internal consistency of a leafn block.   */  #ifdef DEBUG -#define	xfs_dir3_leaf_check(mp, bp) \ +#define	xfs_dir3_leaf_check(dp, bp) \  do { \ -	if (!xfs_dir3_leafn_check((mp), (bp))) \ +	if (!xfs_dir3_leafn_check((dp), (bp))) \  		ASSERT(0); \  } while (0);  static bool  xfs_dir3_leafn_check( -	struct xfs_mount	*mp, +	struct xfs_inode	*dp,  	struct xfs_buf		*bp)  {  	struct xfs_dir2_leaf	*leaf = bp->b_addr;  	struct xfs_dir3_icleaf_hdr leafhdr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	if (leafhdr.magic == XFS_DIR3_LEAFN_MAGIC) {  		struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr; @@ -78,10 +77,10 @@ xfs_dir3_leafn_check(  	} else if (leafhdr.magic != XFS_DIR2_LEAFN_MAGIC)  		return false; -	return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf); +	return xfs_dir3_leaf_check_int(dp->i_mount, dp, &leafhdr, leaf);  }  #else -#define	xfs_dir3_leaf_check(mp, bp) +#define	xfs_dir3_leaf_check(dp, bp)  #endif  static bool @@ -116,13 +115,14 @@ xfs_dir3_free_read_verify(  {  	struct xfs_mount	*mp = bp->b_target->bt_mount; -	if ((xfs_sb_version_hascrc(&mp->m_sb) && -	     !xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					  XFS_DIR3_FREE_CRC_OFF)) || -	    !xfs_dir3_free_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (xfs_sb_version_hascrc(&mp->m_sb) && +	    !xfs_buf_verify_cksum(bp, XFS_DIR3_FREE_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_dir3_free_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -134,8 +134,8 @@ xfs_dir3_free_write_verify(  	struct xfs_dir3_blk_hdr	*hdr3 = bp->b_addr;  	if (!xfs_dir3_free_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -145,7 +145,7 @@ xfs_dir3_free_write_verify(  	if (bip)  		hdr3->lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), XFS_DIR3_FREE_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_DIR3_FREE_CRC_OFF);  }  const struct xfs_buf_ops xfs_dir3_free_buf_ops = { @@ -193,66 +193,20 @@ xfs_dir2_free_try_read(  	return __xfs_dir3_free_read(tp, dp, fbno, -2, bpp);  } - -void -xfs_dir3_free_hdr_from_disk( -	struct xfs_dir3_icfree_hdr	*to, -	struct xfs_dir2_free		*from) -{ -	if (from->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC)) { -		to->magic = be32_to_cpu(from->hdr.magic); -		to->firstdb = be32_to_cpu(from->hdr.firstdb); -		to->nvalid = be32_to_cpu(from->hdr.nvalid); -		to->nused = be32_to_cpu(from->hdr.nused); -	} else { -		struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from; - -		to->magic = be32_to_cpu(hdr3->hdr.magic); -		to->firstdb = be32_to_cpu(hdr3->firstdb); -		to->nvalid = be32_to_cpu(hdr3->nvalid); -		to->nused = be32_to_cpu(hdr3->nused); -	} - -	ASSERT(to->magic == XFS_DIR2_FREE_MAGIC || -	       to->magic == XFS_DIR3_FREE_MAGIC); -} - -static void -xfs_dir3_free_hdr_to_disk( -	struct xfs_dir2_free		*to, -	struct xfs_dir3_icfree_hdr	*from) -{ -	ASSERT(from->magic == XFS_DIR2_FREE_MAGIC || -	       from->magic == XFS_DIR3_FREE_MAGIC); - -	if (from->magic == XFS_DIR2_FREE_MAGIC) { -		to->hdr.magic = cpu_to_be32(from->magic); -		to->hdr.firstdb = cpu_to_be32(from->firstdb); -		to->hdr.nvalid = cpu_to_be32(from->nvalid); -		to->hdr.nused = cpu_to_be32(from->nused); -	} else { -		struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to; - -		hdr3->hdr.magic = cpu_to_be32(from->magic); -		hdr3->firstdb = cpu_to_be32(from->firstdb); -		hdr3->nvalid = cpu_to_be32(from->nvalid); -		hdr3->nused = cpu_to_be32(from->nused); -	} -} -  static int  xfs_dir3_free_get_buf( -	struct xfs_trans	*tp, -	struct xfs_inode	*dp, +	xfs_da_args_t		*args,  	xfs_dir2_db_t		fbno,  	struct xfs_buf		**bpp)  { +	struct xfs_trans	*tp = args->trans; +	struct xfs_inode	*dp = args->dp;  	struct xfs_mount	*mp = dp->i_mount;  	struct xfs_buf		*bp;  	int			error;  	struct xfs_dir3_icfree_hdr hdr; -	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(mp, fbno), +	error = xfs_da_get_buf(tp, dp, xfs_dir2_db_to_da(args->geo, fbno),  				   -1, &bp, XFS_DATA_FORK);  	if (error)  		return error; @@ -277,7 +231,7 @@ xfs_dir3_free_get_buf(  		uuid_copy(&hdr3->hdr.uuid, &mp->m_sb.sb_uuid);  	} else  		hdr.magic = XFS_DIR2_FREE_MAGIC; -	xfs_dir3_free_hdr_to_disk(bp->b_addr, &hdr); +	dp->d_ops->free_hdr_to_disk(bp->b_addr, &hdr);  	*bpp = bp;  	return 0;  } @@ -287,7 +241,7 @@ xfs_dir3_free_get_buf(   */  STATIC void  xfs_dir2_free_log_bests( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp,  	int			first,		/* first entry to log */  	int			last)		/* last entry to log */ @@ -296,10 +250,10 @@ xfs_dir2_free_log_bests(  	__be16			*bests;  	free = bp->b_addr; -	bests = xfs_dir3_free_bests_p(tp->t_mountp, free); +	bests = args->dp->d_ops->free_bests_p(free);  	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||  	       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC)); -	xfs_trans_log_buf(tp, bp, +	xfs_trans_log_buf(args->trans, bp,  		(uint)((char *)&bests[first] - (char *)free),  		(uint)((char *)&bests[last] - (char *)free +  		       sizeof(bests[0]) - 1)); @@ -310,7 +264,7 @@ xfs_dir2_free_log_bests(   */  static void  xfs_dir2_free_log_header( -	struct xfs_trans	*tp, +	struct xfs_da_args	*args,  	struct xfs_buf		*bp)  {  #ifdef DEBUG @@ -320,7 +274,8 @@ xfs_dir2_free_log_header(  	ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||  	       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));  #endif -	xfs_trans_log_buf(tp, bp, 0, xfs_dir3_free_hdr_size(tp->t_mountp) - 1); +	xfs_trans_log_buf(args->trans, bp, 0, +			  args->dp->d_ops->free_hdr_size - 1);  }  /* @@ -360,27 +315,27 @@ xfs_dir2_leaf_to_node(  	if ((error = xfs_dir2_grow_inode(args, XFS_DIR2_FREE_SPACE, &fdb))) {  		return error;  	} -	ASSERT(fdb == XFS_DIR2_FREE_FIRSTDB(mp)); +	ASSERT(fdb == xfs_dir2_byte_to_db(args->geo, XFS_DIR2_FREE_OFFSET));  	/*  	 * Get the buffer for the new freespace block.  	 */ -	error = xfs_dir3_free_get_buf(tp, dp, fdb, &fbp); +	error = xfs_dir3_free_get_buf(args, fdb, &fbp);  	if (error)  		return error;  	free = fbp->b_addr; -	xfs_dir3_free_hdr_from_disk(&freehdr, free); +	dp->d_ops->free_hdr_from_disk(&freehdr, free);  	leaf = lbp->b_addr; -	ltp = xfs_dir2_leaf_tail_p(mp, leaf); +	ltp = xfs_dir2_leaf_tail_p(args->geo, leaf);  	ASSERT(be32_to_cpu(ltp->bestcount) <= -				(uint)dp->i_d.di_size / mp->m_dirblksize); +				(uint)dp->i_d.di_size / args->geo->blksize);  	/*  	 * Copy freespace entries from the leaf block to the new block.  	 * Count active entries.  	 */  	from = xfs_dir2_leaf_bests_p(ltp); -	to = xfs_dir3_free_bests_p(mp, free); +	to = dp->d_ops->free_bests_p(free);  	for (i = n = 0; i < be32_to_cpu(ltp->bestcount); i++, from++, to++) {  		if ((off = be16_to_cpu(*from)) != NULLDATAOFF)  			n++; @@ -393,9 +348,9 @@ xfs_dir2_leaf_to_node(  	freehdr.nused = n;  	freehdr.nvalid = be32_to_cpu(ltp->bestcount); -	xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr); -	xfs_dir2_free_log_bests(tp, fbp, 0, freehdr.nvalid - 1); -	xfs_dir2_free_log_header(tp, fbp); +	dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); +	xfs_dir2_free_log_bests(args, fbp, 0, freehdr.nvalid - 1); +	xfs_dir2_free_log_header(args, fbp);  	/*  	 * Converting the leaf to a leafnode is just a matter of changing the @@ -409,8 +364,8 @@ xfs_dir2_leaf_to_node(  		leaf->hdr.info.magic = cpu_to_be16(XFS_DIR3_LEAFN_MAGIC);  	lbp->b_ops = &xfs_dir3_leafn_buf_ops;  	xfs_trans_buf_set_type(tp, lbp, XFS_BLFT_DIR_LEAFN_BUF); -	xfs_dir3_leaf_log_header(tp, lbp); -	xfs_dir3_leaf_check(mp, lbp); +	xfs_dir3_leaf_log_header(args, lbp); +	xfs_dir3_leaf_check(dp, lbp);  	return 0;  } @@ -443,8 +398,8 @@ xfs_dir2_leafn_add(  	mp = dp->i_mount;  	tp = args->trans;  	leaf = bp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf);  	/*  	 * Quick check just to make sure we are not going to index @@ -460,7 +415,7 @@ xfs_dir2_leafn_add(  	 * a compact.  	 */ -	if (leafhdr.count == xfs_dir3_max_leaf_ents(mp, leaf)) { +	if (leafhdr.count == dp->d_ops->leaf_max_ents(args->geo)) {  		if (!leafhdr.stale)  			return XFS_ERROR(ENOSPC);  		compact = leafhdr.stale > 1; @@ -495,33 +450,34 @@ xfs_dir2_leafn_add(  				       highstale, &lfloglow, &lfloghigh);  	lep->hashval = cpu_to_be32(args->hashval); -	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(mp, +	lep->address = cpu_to_be32(xfs_dir2_db_off_to_dataptr(args->geo,  				args->blkno, args->index)); -	xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); -	xfs_dir3_leaf_log_header(tp, bp); -	xfs_dir3_leaf_log_ents(tp, bp, lfloglow, lfloghigh); -	xfs_dir3_leaf_check(mp, bp); +	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); +	xfs_dir3_leaf_log_header(args, bp); +	xfs_dir3_leaf_log_ents(args, bp, lfloglow, lfloghigh); +	xfs_dir3_leaf_check(dp, bp);  	return 0;  }  #ifdef DEBUG  static void  xfs_dir2_free_hdr_check( -	struct xfs_mount *mp, +	struct xfs_inode *dp,  	struct xfs_buf	*bp,  	xfs_dir2_db_t	db)  {  	struct xfs_dir3_icfree_hdr hdr; -	xfs_dir3_free_hdr_from_disk(&hdr, bp->b_addr); +	dp->d_ops->free_hdr_from_disk(&hdr, bp->b_addr); -	ASSERT((hdr.firstdb % xfs_dir3_free_max_bests(mp)) == 0); +	ASSERT((hdr.firstdb % +		dp->d_ops->free_max_bests(dp->i_mount->m_dir_geo)) == 0);  	ASSERT(hdr.firstdb <= db);  	ASSERT(db < hdr.firstdb + hdr.nvalid);  }  #else -#define xfs_dir2_free_hdr_check(mp, dp, db) +#define xfs_dir2_free_hdr_check(dp, bp, db)  #endif	/* DEBUG */  /* @@ -530,6 +486,7 @@ xfs_dir2_free_hdr_check(   */  xfs_dahash_t					/* hash value */  xfs_dir2_leafn_lasthash( +	struct xfs_inode *dp,  	struct xfs_buf	*bp,			/* leaf buffer */  	int		*count)			/* count of entries in leaf */  { @@ -537,7 +494,7 @@ xfs_dir2_leafn_lasthash(  	struct xfs_dir2_leaf_entry *ents;  	struct xfs_dir3_icleaf_hdr leafhdr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf);  	ASSERT(leafhdr.magic == XFS_DIR2_LEAFN_MAGIC ||  	       leafhdr.magic == XFS_DIR3_LEAFN_MAGIC); @@ -547,7 +504,7 @@ xfs_dir2_leafn_lasthash(  	if (!leafhdr.count)  		return 0; -	ents = xfs_dir3_leaf_ents_p(leaf); +	ents = dp->d_ops->leaf_ents_p(leaf);  	return be32_to_cpu(ents[leafhdr.count - 1].hashval);  } @@ -584,10 +541,10 @@ xfs_dir2_leafn_lookup_for_addname(  	tp = args->trans;  	mp = dp->i_mount;  	leaf = bp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf); -	xfs_dir3_leaf_check(mp, bp); +	xfs_dir3_leaf_check(dp, bp);  	ASSERT(leafhdr.count > 0);  	/* @@ -605,7 +562,7 @@ xfs_dir2_leafn_lookup_for_addname(  		ASSERT(free->hdr.magic == cpu_to_be32(XFS_DIR2_FREE_MAGIC) ||  		       free->hdr.magic == cpu_to_be32(XFS_DIR3_FREE_MAGIC));  	} -	length = xfs_dir3_data_entsize(mp, args->namelen); +	length = dp->d_ops->data_entsize(args->namelen);  	/*  	 * Loop over leaf entries with the right hash value.  	 */ @@ -620,7 +577,8 @@ xfs_dir2_leafn_lookup_for_addname(  		/*  		 * Pull the data block number from the entry.  		 */ -		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); +		newdb = xfs_dir2_dataptr_to_db(args->geo, +					       be32_to_cpu(lep->address));  		/*  		 * For addname, we're looking for a place to put the new entry.  		 * We want to use a data block with an entry of equal @@ -637,7 +595,7 @@ xfs_dir2_leafn_lookup_for_addname(  			 * Convert the data block to the free block  			 * holding its freespace information.  			 */ -			newfdb = xfs_dir2_db_to_fdb(mp, newdb); +			newfdb = dp->d_ops->db_to_fdb(args->geo, newdb);  			/*  			 * If it's not the one we have in hand, read it in.  			 */ @@ -649,22 +607,23 @@ xfs_dir2_leafn_lookup_for_addname(  					xfs_trans_brelse(tp, curbp);  				error = xfs_dir2_free_read(tp, dp, -						xfs_dir2_db_to_da(mp, newfdb), +						xfs_dir2_db_to_da(args->geo, +								  newfdb),  						&curbp);  				if (error)  					return error;  				free = curbp->b_addr; -				xfs_dir2_free_hdr_check(mp, curbp, curdb); +				xfs_dir2_free_hdr_check(dp, curbp, curdb);  			}  			/*  			 * Get the index for our entry.  			 */ -			fi = xfs_dir2_db_to_fdindex(mp, curdb); +			fi = dp->d_ops->db_to_fdindex(args->geo, curdb);  			/*  			 * If it has room, return it.  			 */ -			bests = xfs_dir3_free_bests_p(mp, free); +			bests = dp->d_ops->free_bests_p(free);  			if (unlikely(bests[fi] == cpu_to_be16(NULLDATAOFF))) {  				XFS_ERROR_REPORT("xfs_dir2_leafn_lookup_int",  							XFS_ERRLEVEL_LOW, mp); @@ -734,10 +693,10 @@ xfs_dir2_leafn_lookup_for_entry(  	tp = args->trans;  	mp = dp->i_mount;  	leaf = bp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf); -	xfs_dir3_leaf_check(mp, bp); +	xfs_dir3_leaf_check(dp, bp);  	ASSERT(leafhdr.count > 0);  	/* @@ -765,7 +724,8 @@ xfs_dir2_leafn_lookup_for_entry(  		/*  		 * Pull the data block number from the entry.  		 */ -		newdb = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); +		newdb = xfs_dir2_dataptr_to_db(args->geo, +					       be32_to_cpu(lep->address));  		/*  		 * Not adding a new entry, so we really want to find  		 * the name given to us. @@ -790,7 +750,8 @@ xfs_dir2_leafn_lookup_for_entry(  				curbp = state->extrablk.bp;  			} else {  				error = xfs_dir3_data_read(tp, dp, -						xfs_dir2_db_to_da(mp, newdb), +						xfs_dir2_db_to_da(args->geo, +								  newdb),  						-1, &curbp);  				if (error)  					return error; @@ -802,7 +763,8 @@ xfs_dir2_leafn_lookup_for_entry(  		 * Point to the data entry.  		 */  		dep = (xfs_dir2_data_entry_t *)((char *)curbp->b_addr + -			xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address))); +			xfs_dir2_dataptr_to_off(args->geo, +						be32_to_cpu(lep->address)));  		/*  		 * Compare the entry and if it's an exact match, return  		 * EEXIST immediately. If it's the first case-insensitive @@ -816,7 +778,7 @@ xfs_dir2_leafn_lookup_for_entry(  				xfs_trans_brelse(tp, state->extrablk.bp);  			args->cmpresult = cmp;  			args->inumber = be64_to_cpu(dep->inumber); -			args->filetype = xfs_dir3_dirent_get_ftype(mp, dep); +			args->filetype = dp->d_ops->data_get_ftype(dep);  			*indexp = index;  			state->extravalid = 1;  			state->extrablk.bp = curbp; @@ -888,7 +850,6 @@ xfs_dir3_leafn_moveents(  	int				start_d,/* destination leaf index */  	int				count)	/* count of leaves to copy */  { -	struct xfs_trans		*tp = args->trans;  	int				stale;	/* count stale leaves copied */  	trace_xfs_dir2_leafn_moveents(args, start_s, start_d, count); @@ -907,7 +868,7 @@ xfs_dir3_leafn_moveents(  	if (start_d < dhdr->count) {  		memmove(&dents[start_d + count], &dents[start_d],  			(dhdr->count - start_d) * sizeof(xfs_dir2_leaf_entry_t)); -		xfs_dir3_leaf_log_ents(tp, bp_d, start_d + count, +		xfs_dir3_leaf_log_ents(args, bp_d, start_d + count,  				       count + dhdr->count - 1);  	}  	/* @@ -929,7 +890,7 @@ xfs_dir3_leafn_moveents(  	 */  	memcpy(&dents[start_d], &sents[start_s],  		count * sizeof(xfs_dir2_leaf_entry_t)); -	xfs_dir3_leaf_log_ents(tp, bp_d, start_d, start_d + count - 1); +	xfs_dir3_leaf_log_ents(args, bp_d, start_d, start_d + count - 1);  	/*  	 * If there are source entries after the ones we copied, @@ -938,7 +899,7 @@ xfs_dir3_leafn_moveents(  	if (start_s + count < shdr->count) {  		memmove(&sents[start_s], &sents[start_s + count],  			count * sizeof(xfs_dir2_leaf_entry_t)); -		xfs_dir3_leaf_log_ents(tp, bp_s, start_s, start_s + count - 1); +		xfs_dir3_leaf_log_ents(args, bp_s, start_s, start_s + count - 1);  	}  	/* @@ -956,6 +917,7 @@ xfs_dir3_leafn_moveents(   */  int						/* sort order */  xfs_dir2_leafn_order( +	struct xfs_inode	*dp,  	struct xfs_buf		*leaf1_bp,		/* leaf1 buffer */  	struct xfs_buf		*leaf2_bp)		/* leaf2 buffer */  { @@ -966,10 +928,10 @@ xfs_dir2_leafn_order(  	struct xfs_dir3_icleaf_hdr hdr1;  	struct xfs_dir3_icleaf_hdr hdr2; -	xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1); -	xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2); -	ents1 = xfs_dir3_leaf_ents_p(leaf1); -	ents2 = xfs_dir3_leaf_ents_p(leaf2); +	dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); +	dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); +	ents1 = dp->d_ops->leaf_ents_p(leaf1); +	ents2 = dp->d_ops->leaf_ents_p(leaf2);  	if (hdr1.count > 0 && hdr2.count > 0 &&  	    (be32_to_cpu(ents2[0].hashval) < be32_to_cpu(ents1[0].hashval) || @@ -1007,12 +969,13 @@ xfs_dir2_leafn_rebalance(  	struct xfs_dir2_leaf_entry *ents2;  	struct xfs_dir3_icleaf_hdr hdr1;  	struct xfs_dir3_icleaf_hdr hdr2; +	struct xfs_inode	*dp = state->args->dp;  	args = state->args;  	/*  	 * If the block order is wrong, swap the arguments.  	 */ -	if ((swap = xfs_dir2_leafn_order(blk1->bp, blk2->bp))) { +	if ((swap = xfs_dir2_leafn_order(dp, blk1->bp, blk2->bp))) {  		xfs_da_state_blk_t	*tmp;	/* temp for block swap */  		tmp = blk1; @@ -1021,10 +984,10 @@ xfs_dir2_leafn_rebalance(  	}  	leaf1 = blk1->bp->b_addr;  	leaf2 = blk2->bp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&hdr1, leaf1); -	xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf2); -	ents1 = xfs_dir3_leaf_ents_p(leaf1); -	ents2 = xfs_dir3_leaf_ents_p(leaf2); +	dp->d_ops->leaf_hdr_from_disk(&hdr1, leaf1); +	dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf2); +	ents1 = dp->d_ops->leaf_ents_p(leaf1); +	ents2 = dp->d_ops->leaf_ents_p(leaf2);  	oldsum = hdr1.count + hdr2.count;  #if defined(DEBUG) || defined(XFS_WARN) @@ -1070,13 +1033,13 @@ xfs_dir2_leafn_rebalance(  	ASSERT(hdr1.stale + hdr2.stale == oldstale);  	/* log the changes made when moving the entries */ -	xfs_dir3_leaf_hdr_to_disk(leaf1, &hdr1); -	xfs_dir3_leaf_hdr_to_disk(leaf2, &hdr2); -	xfs_dir3_leaf_log_header(args->trans, blk1->bp); -	xfs_dir3_leaf_log_header(args->trans, blk2->bp); +	dp->d_ops->leaf_hdr_to_disk(leaf1, &hdr1); +	dp->d_ops->leaf_hdr_to_disk(leaf2, &hdr2); +	xfs_dir3_leaf_log_header(args, blk1->bp); +	xfs_dir3_leaf_log_header(args, blk2->bp); -	xfs_dir3_leaf_check(args->dp->i_mount, blk1->bp); -	xfs_dir3_leaf_check(args->dp->i_mount, blk2->bp); +	xfs_dir3_leaf_check(dp, blk1->bp); +	xfs_dir3_leaf_check(dp, blk2->bp);  	/*  	 * Mark whether we're inserting into the old or new leaf. @@ -1097,11 +1060,11 @@ xfs_dir2_leafn_rebalance(  	 * Finally sanity check just to make sure we are not returning a  	 * negative index  	 */ -	if(blk2->index < 0) { +	if (blk2->index < 0) {  		state->inleaf = 1;  		blk2->index = 0; -		xfs_alert(args->dp->i_mount, -	"%s: picked the wrong leaf? reverting original leaf: blk1->index %d\n", +		xfs_alert(dp->i_mount, +	"%s: picked the wrong leaf? reverting original leaf: blk1->index %d",  			__func__, blk1->index);  	}  } @@ -1116,21 +1079,20 @@ xfs_dir3_data_block_free(  	struct xfs_buf		*fbp,  	int			longest)  { -	struct xfs_trans	*tp = args->trans;  	int			logfree = 0;  	__be16			*bests;  	struct xfs_dir3_icfree_hdr freehdr; +	struct xfs_inode	*dp = args->dp; -	xfs_dir3_free_hdr_from_disk(&freehdr, free); - -	bests = xfs_dir3_free_bests_p(tp->t_mountp, free); +	dp->d_ops->free_hdr_from_disk(&freehdr, free); +	bests = dp->d_ops->free_bests_p(free);  	if (hdr) {  		/*  		 * Data block is not empty, just set the free entry to the new  		 * value.  		 */  		bests[findex] = cpu_to_be16(longest); -		xfs_dir2_free_log_bests(tp, fbp, findex, findex); +		xfs_dir2_free_log_bests(args, fbp, findex, findex);  		return 0;  	} @@ -1157,8 +1119,8 @@ xfs_dir3_data_block_free(  		logfree = 1;  	} -	xfs_dir3_free_hdr_to_disk(free, &freehdr); -	xfs_dir2_free_log_header(tp, fbp); +	dp->d_ops->free_hdr_to_disk(free, &freehdr); +	xfs_dir2_free_log_header(args, fbp);  	/*  	 * If there are no useful entries left in the block, get rid of the @@ -1182,7 +1144,7 @@ xfs_dir3_data_block_free(  	/* Log the free entry that changed, unless we got rid of it.  */  	if (logfree) -		xfs_dir2_free_log_bests(tp, fbp, findex, findex); +		xfs_dir2_free_log_bests(args, fbp, findex, findex);  	return 0;  } @@ -1222,8 +1184,8 @@ xfs_dir2_leafn_remove(  	tp = args->trans;  	mp = dp->i_mount;  	leaf = bp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf);  	/*  	 * Point to the entry we're removing. @@ -1233,9 +1195,9 @@ xfs_dir2_leafn_remove(  	/*  	 * Extract the data block and offset from the entry.  	 */ -	db = xfs_dir2_dataptr_to_db(mp, be32_to_cpu(lep->address)); +	db = xfs_dir2_dataptr_to_db(args->geo, be32_to_cpu(lep->address));  	ASSERT(dblk->blkno == db); -	off = xfs_dir2_dataptr_to_off(mp, be32_to_cpu(lep->address)); +	off = xfs_dir2_dataptr_to_off(args->geo, be32_to_cpu(lep->address));  	ASSERT(dblk->index == off);  	/* @@ -1243,11 +1205,11 @@ xfs_dir2_leafn_remove(  	 * Log the leaf block changes.  	 */  	leafhdr.stale++; -	xfs_dir3_leaf_hdr_to_disk(leaf, &leafhdr); -	xfs_dir3_leaf_log_header(tp, bp); +	dp->d_ops->leaf_hdr_to_disk(leaf, &leafhdr); +	xfs_dir3_leaf_log_header(args, bp);  	lep->address = cpu_to_be32(XFS_DIR2_NULL_DATAPTR); -	xfs_dir3_leaf_log_ents(tp, bp, index, index); +	xfs_dir3_leaf_log_ents(args, bp, index, index);  	/*  	 * Make the data entry free.  Keep track of the longest freespace @@ -1256,19 +1218,19 @@ xfs_dir2_leafn_remove(  	dbp = dblk->bp;  	hdr = dbp->b_addr;  	dep = (xfs_dir2_data_entry_t *)((char *)hdr + off); -	bf = xfs_dir3_data_bestfree_p(hdr); +	bf = dp->d_ops->data_bestfree_p(hdr);  	longest = be16_to_cpu(bf[0].length);  	needlog = needscan = 0; -	xfs_dir2_data_make_free(tp, dbp, off, -		xfs_dir3_data_entsize(mp, dep->namelen), &needlog, &needscan); +	xfs_dir2_data_make_free(args, dbp, off, +		dp->d_ops->data_entsize(dep->namelen), &needlog, &needscan);  	/*  	 * Rescan the data block freespaces for bestfree.  	 * Log the data block header if needed.  	 */  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	if (needlog) -		xfs_dir2_data_log_header(tp, dbp); +		xfs_dir2_data_log_header(args, dbp);  	xfs_dir3_data_check(dp, dbp);  	/*  	 * If the longest data block freespace changes, need to update @@ -1285,8 +1247,9 @@ xfs_dir2_leafn_remove(  		 * Convert the data block number to a free block,  		 * read in the free block.  		 */ -		fdb = xfs_dir2_db_to_fdb(mp, db); -		error = xfs_dir2_free_read(tp, dp, xfs_dir2_db_to_da(mp, fdb), +		fdb = dp->d_ops->db_to_fdb(args->geo, db); +		error = xfs_dir2_free_read(tp, dp, +					   xfs_dir2_db_to_da(args->geo, fdb),  					   &fbp);  		if (error)  			return error; @@ -1294,22 +1257,23 @@ xfs_dir2_leafn_remove(  #ifdef DEBUG  	{  		struct xfs_dir3_icfree_hdr freehdr; -		xfs_dir3_free_hdr_from_disk(&freehdr, free); -		ASSERT(freehdr.firstdb == xfs_dir3_free_max_bests(mp) * -					  (fdb - XFS_DIR2_FREE_FIRSTDB(mp))); +		dp->d_ops->free_hdr_from_disk(&freehdr, free); +		ASSERT(freehdr.firstdb == dp->d_ops->free_max_bests(args->geo) * +			(fdb - xfs_dir2_byte_to_db(args->geo, +						   XFS_DIR2_FREE_OFFSET)));  	}  #endif  		/*  		 * Calculate which entry we need to fix.  		 */ -		findex = xfs_dir2_db_to_fdindex(mp, db); +		findex = dp->d_ops->db_to_fdindex(args->geo, db);  		longest = be16_to_cpu(bf[0].length);  		/*  		 * If the data block is now empty we can get rid of it  		 * (usually).  		 */ -		if (longest == mp->m_dirblksize - -			       xfs_dir3_data_entry_offset(hdr)) { +		if (longest == args->geo->blksize - +			       dp->d_ops->data_entry_offset) {  			/*  			 * Try to punch out the data block.  			 */ @@ -1336,14 +1300,14 @@ xfs_dir2_leafn_remove(  			return error;  	} -	xfs_dir3_leaf_check(mp, bp); +	xfs_dir3_leaf_check(dp, bp);  	/*  	 * Return indication of whether this leaf block is empty enough  	 * to justify trying to join it with a neighbor.  	 */ -	*rval = (xfs_dir3_leaf_hdr_size(leaf) + +	*rval = (dp->d_ops->leaf_hdr_size +  		 (uint)sizeof(ents[0]) * (leafhdr.count - leafhdr.stale)) < -		mp->m_dir_magicpct; +		args->geo->magicpct;  	return 0;  } @@ -1360,13 +1324,14 @@ xfs_dir2_leafn_split(  	xfs_dablk_t		blkno;		/* new leaf block number */  	int			error;		/* error return value */  	xfs_mount_t		*mp;		/* filesystem mount point */ +	struct xfs_inode	*dp;  	/*  	 * Allocate space for a new leaf node.  	 */  	args = state->args; -	mp = args->dp->i_mount; -	ASSERT(args != NULL); +	dp = args->dp; +	mp = dp->i_mount;  	ASSERT(oldblk->magic == XFS_DIR2_LEAFN_MAGIC);  	error = xfs_da_grow_inode(args, &blkno);  	if (error) { @@ -1375,7 +1340,7 @@ xfs_dir2_leafn_split(  	/*  	 * Initialize the new leaf block.  	 */ -	error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(mp, blkno), +	error = xfs_dir3_leaf_get_buf(args, xfs_dir2_da_to_db(args->geo, blkno),  				      &newblk->bp, XFS_DIR2_LEAFN_MAGIC);  	if (error)  		return error; @@ -1401,10 +1366,10 @@ xfs_dir2_leafn_split(  	/*  	 * Update last hashval in each block since we added the name.  	 */ -	oldblk->hashval = xfs_dir2_leafn_lasthash(oldblk->bp, NULL); -	newblk->hashval = xfs_dir2_leafn_lasthash(newblk->bp, NULL); -	xfs_dir3_leaf_check(mp, oldblk->bp); -	xfs_dir3_leaf_check(mp, newblk->bp); +	oldblk->hashval = xfs_dir2_leafn_lasthash(dp, oldblk->bp, NULL); +	newblk->hashval = xfs_dir2_leafn_lasthash(dp, newblk->bp, NULL); +	xfs_dir3_leaf_check(dp, oldblk->bp); +	xfs_dir3_leaf_check(dp, newblk->bp);  	return error;  } @@ -1434,6 +1399,7 @@ xfs_dir2_leafn_toosmall(  	int			rval;		/* result from path_shift */  	struct xfs_dir3_icleaf_hdr leafhdr;  	struct xfs_dir2_leaf_entry *ents; +	struct xfs_inode	*dp = state->args->dp;  	/*  	 * Check for the degenerate case of the block being over 50% full. @@ -1442,13 +1408,13 @@ xfs_dir2_leafn_toosmall(  	 */  	blk = &state->path.blk[state->path.active - 1];  	leaf = blk->bp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf); -	ents = xfs_dir3_leaf_ents_p(leaf); -	xfs_dir3_leaf_check(state->args->dp->i_mount, blk->bp); +	dp->d_ops->leaf_hdr_from_disk(&leafhdr, leaf); +	ents = dp->d_ops->leaf_ents_p(leaf); +	xfs_dir3_leaf_check(dp, blk->bp);  	count = leafhdr.count - leafhdr.stale; -	bytes = xfs_dir3_leaf_hdr_size(leaf) + count * sizeof(ents[0]); -	if (bytes > (state->blocksize >> 1)) { +	bytes = dp->d_ops->leaf_hdr_size + count * sizeof(ents[0]); +	if (bytes > (state->args->geo->blksize >> 1)) {  		/*  		 * Blk over 50%, don't try to join.  		 */ @@ -1492,7 +1458,7 @@ xfs_dir2_leafn_toosmall(  		/*  		 * Read the sibling leaf block.  		 */ -		error = xfs_dir3_leafn_read(state->args->trans, state->args->dp, +		error = xfs_dir3_leafn_read(state->args->trans, dp,  					    blkno, -1, &bp);  		if (error)  			return error; @@ -1501,11 +1467,12 @@ xfs_dir2_leafn_toosmall(  		 * Count bytes in the two blocks combined.  		 */  		count = leafhdr.count - leafhdr.stale; -		bytes = state->blocksize - (state->blocksize >> 2); +		bytes = state->args->geo->blksize - +			(state->args->geo->blksize >> 2);  		leaf = bp->b_addr; -		xfs_dir3_leaf_hdr_from_disk(&hdr2, leaf); -		ents = xfs_dir3_leaf_ents_p(leaf); +		dp->d_ops->leaf_hdr_from_disk(&hdr2, leaf); +		ents = dp->d_ops->leaf_ents_p(leaf);  		count += hdr2.count - hdr2.stale;  		bytes -= count * sizeof(ents[0]); @@ -1559,6 +1526,7 @@ xfs_dir2_leafn_unbalance(  	struct xfs_dir3_icleaf_hdr drophdr;  	struct xfs_dir2_leaf_entry *sents;  	struct xfs_dir2_leaf_entry *dents; +	struct xfs_inode	*dp = state->args->dp;  	args = state->args;  	ASSERT(drop_blk->magic == XFS_DIR2_LEAFN_MAGIC); @@ -1566,10 +1534,10 @@ xfs_dir2_leafn_unbalance(  	drop_leaf = drop_blk->bp->b_addr;  	save_leaf = save_blk->bp->b_addr; -	xfs_dir3_leaf_hdr_from_disk(&savehdr, save_leaf); -	xfs_dir3_leaf_hdr_from_disk(&drophdr, drop_leaf); -	sents = xfs_dir3_leaf_ents_p(save_leaf); -	dents = xfs_dir3_leaf_ents_p(drop_leaf); +	dp->d_ops->leaf_hdr_from_disk(&savehdr, save_leaf); +	dp->d_ops->leaf_hdr_from_disk(&drophdr, drop_leaf); +	sents = dp->d_ops->leaf_ents_p(save_leaf); +	dents = dp->d_ops->leaf_ents_p(drop_leaf);  	/*  	 * If there are any stale leaf entries, take this opportunity @@ -1584,7 +1552,7 @@ xfs_dir2_leafn_unbalance(  	 * Move the entries from drop to the appropriate end of save.  	 */  	drop_blk->hashval = be32_to_cpu(dents[drophdr.count - 1].hashval); -	if (xfs_dir2_leafn_order(save_blk->bp, drop_blk->bp)) +	if (xfs_dir2_leafn_order(dp, save_blk->bp, drop_blk->bp))  		xfs_dir3_leafn_moveents(args, drop_blk->bp, &drophdr, dents, 0,  					save_blk->bp, &savehdr, sents, 0,  					drophdr.count); @@ -1595,13 +1563,13 @@ xfs_dir2_leafn_unbalance(  	save_blk->hashval = be32_to_cpu(sents[savehdr.count - 1].hashval);  	/* log the changes made when moving the entries */ -	xfs_dir3_leaf_hdr_to_disk(save_leaf, &savehdr); -	xfs_dir3_leaf_hdr_to_disk(drop_leaf, &drophdr); -	xfs_dir3_leaf_log_header(args->trans, save_blk->bp); -	xfs_dir3_leaf_log_header(args->trans, drop_blk->bp); +	dp->d_ops->leaf_hdr_to_disk(save_leaf, &savehdr); +	dp->d_ops->leaf_hdr_to_disk(drop_leaf, &drophdr); +	xfs_dir3_leaf_log_header(args, save_blk->bp); +	xfs_dir3_leaf_log_header(args, drop_blk->bp); -	xfs_dir3_leaf_check(args->dp->i_mount, save_blk->bp); -	xfs_dir3_leaf_check(args->dp->i_mount, drop_blk->bp); +	xfs_dir3_leaf_check(dp, save_blk->bp); +	xfs_dir3_leaf_check(dp, drop_blk->bp);  }  /* @@ -1624,8 +1592,6 @@ xfs_dir2_node_addname(  	state = xfs_da_state_alloc();  	state->args = args;  	state->mp = args->dp->i_mount; -	state->blocksize = state->mp->m_dirblksize; -	state->node_ents = state->mp->m_dir_node_ents;  	/*  	 * Look up the name.  We're not supposed to find it, but  	 * this gives us the insertion point. @@ -1712,7 +1678,7 @@ xfs_dir2_node_addname_int(  	dp = args->dp;  	mp = dp->i_mount;  	tp = args->trans; -	length = xfs_dir3_data_entsize(mp, args->namelen); +	length = dp->d_ops->data_entsize(args->namelen);  	/*  	 * If we came in with a freespace block that means that lookup  	 * found an entry with our hash value.  This is the freespace @@ -1726,8 +1692,8 @@ xfs_dir2_node_addname_int(  		ifbno = fblk->blkno;  		free = fbp->b_addr;  		findex = fblk->index; -		bests = xfs_dir3_free_bests_p(mp, free); -		xfs_dir3_free_hdr_from_disk(&freehdr, free); +		bests = dp->d_ops->free_bests_p(free); +		dp->d_ops->free_hdr_from_disk(&freehdr, free);  		/*  		 * This means the free entry showed that the data block had @@ -1764,9 +1730,9 @@ xfs_dir2_node_addname_int(  	if (dbno == -1) {  		xfs_fileoff_t	fo;		/* freespace block number */ -		if ((error = xfs_bmap_last_offset(tp, dp, &fo, XFS_DATA_FORK))) +		if ((error = xfs_bmap_last_offset(dp, &fo, XFS_DATA_FORK)))  			return error; -		lastfbno = xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo); +		lastfbno = xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo);  		fbno = ifbno;  	}  	/* @@ -1784,7 +1750,8 @@ xfs_dir2_node_addname_int(  			 * us a freespace block to start with.  			 */  			if (++fbno == 0) -				fbno = XFS_DIR2_FREE_FIRSTDB(mp); +				fbno = xfs_dir2_byte_to_db(args->geo, +							XFS_DIR2_FREE_OFFSET);  			/*  			 * If it's ifbno we already looked at it.  			 */ @@ -1802,8 +1769,8 @@ xfs_dir2_node_addname_int(  			 * to avoid it.  			 */  			error = xfs_dir2_free_try_read(tp, dp, -						xfs_dir2_db_to_da(mp, fbno), -						&fbp); +					xfs_dir2_db_to_da(args->geo, fbno), +					&fbp);  			if (error)  				return error;  			if (!fbp) @@ -1819,8 +1786,8 @@ xfs_dir2_node_addname_int(  		 * and the freehdr are actually initialised if they are placed  		 * there, so we have to do it here to avoid warnings. Blech.  		 */ -		bests = xfs_dir3_free_bests_p(mp, free); -		xfs_dir3_free_hdr_from_disk(&freehdr, free); +		bests = dp->d_ops->free_bests_p(free); +		dp->d_ops->free_hdr_from_disk(&freehdr, free);  		if (be16_to_cpu(bests[findex]) != NULLDATAOFF &&  		    be16_to_cpu(bests[findex]) >= length)  			dbno = freehdr.firstdb + findex; @@ -1871,10 +1838,10 @@ xfs_dir2_node_addname_int(  		 * Get the freespace block corresponding to the data block  		 * that was just allocated.  		 */ -		fbno = xfs_dir2_db_to_fdb(mp, dbno); +		fbno = dp->d_ops->db_to_fdb(args->geo, dbno);  		error = xfs_dir2_free_try_read(tp, dp, -					       xfs_dir2_db_to_da(mp, fbno), -					       &fbp); +				       xfs_dir2_db_to_da(args->geo, fbno), +				       &fbp);  		if (error)  			return error; @@ -1888,12 +1855,13 @@ xfs_dir2_node_addname_int(  			if (error)  				return error; -			if (unlikely(xfs_dir2_db_to_fdb(mp, dbno) != fbno)) { +			if (dp->d_ops->db_to_fdb(args->geo, dbno) != fbno) {  				xfs_alert(mp,  			"%s: dir ino %llu needed freesp block %lld for\n"  			"  data block %lld, got %lld ifbno %llu lastfbno %d",  					__func__, (unsigned long long)dp->i_ino, -					(long long)xfs_dir2_db_to_fdb(mp, dbno), +					(long long)dp->d_ops->db_to_fdb( +								args->geo, dbno),  					(long long)dbno, (long long)fbno,  					(unsigned long long)ifbno, lastfbno);  				if (fblk) { @@ -1914,34 +1882,36 @@ xfs_dir2_node_addname_int(  			/*  			 * Get a buffer for the new block.  			 */ -			error = xfs_dir3_free_get_buf(tp, dp, fbno, &fbp); +			error = xfs_dir3_free_get_buf(args, fbno, &fbp);  			if (error)  				return error;  			free = fbp->b_addr; -			bests = xfs_dir3_free_bests_p(mp, free); -			xfs_dir3_free_hdr_from_disk(&freehdr, free); +			bests = dp->d_ops->free_bests_p(free); +			dp->d_ops->free_hdr_from_disk(&freehdr, free);  			/*  			 * Remember the first slot as our empty slot.  			 */ -			freehdr.firstdb = (fbno - XFS_DIR2_FREE_FIRSTDB(mp)) * -					xfs_dir3_free_max_bests(mp); +			freehdr.firstdb = +				(fbno - xfs_dir2_byte_to_db(args->geo, +							XFS_DIR2_FREE_OFFSET)) * +					dp->d_ops->free_max_bests(args->geo);  		} else {  			free = fbp->b_addr; -			bests = xfs_dir3_free_bests_p(mp, free); -			xfs_dir3_free_hdr_from_disk(&freehdr, free); +			bests = dp->d_ops->free_bests_p(free); +			dp->d_ops->free_hdr_from_disk(&freehdr, free);  		}  		/*  		 * Set the freespace block index from the data block number.  		 */ -		findex = xfs_dir2_db_to_fdindex(mp, dbno); +		findex = dp->d_ops->db_to_fdindex(args->geo, dbno);  		/*  		 * If it's after the end of the current entries in the  		 * freespace block, extend that table.  		 */  		if (findex >= freehdr.nvalid) { -			ASSERT(findex < xfs_dir3_free_max_bests(mp)); +			ASSERT(findex < dp->d_ops->free_max_bests(args->geo));  			freehdr.nvalid = findex + 1;  			/*  			 * Tag new entry so nused will go up. @@ -1954,8 +1924,8 @@ xfs_dir2_node_addname_int(  		 */  		if (bests[findex] == cpu_to_be16(NULLDATAOFF)) {  			freehdr.nused++; -			xfs_dir3_free_hdr_to_disk(fbp->b_addr, &freehdr); -			xfs_dir2_free_log_header(tp, fbp); +			dp->d_ops->free_hdr_to_disk(fbp->b_addr, &freehdr); +			xfs_dir2_free_log_header(args, fbp);  		}  		/*  		 * Update the real value in the table. @@ -1963,7 +1933,7 @@ xfs_dir2_node_addname_int(  		 * change again.  		 */  		hdr = dbp->b_addr; -		bf = xfs_dir3_data_bestfree_p(hdr); +		bf = dp->d_ops->data_bestfree_p(hdr);  		bests[findex] = bf[0].length;  		logfree = 1;  	} @@ -1980,12 +1950,13 @@ xfs_dir2_node_addname_int(  		/*  		 * Read the data block in.  		 */ -		error = xfs_dir3_data_read(tp, dp, xfs_dir2_db_to_da(mp, dbno), +		error = xfs_dir3_data_read(tp, dp, +					   xfs_dir2_db_to_da(args->geo, dbno),  					   -1, &dbp);  		if (error)  			return error;  		hdr = dbp->b_addr; -		bf = xfs_dir3_data_bestfree_p(hdr); +		bf = dp->d_ops->data_bestfree_p(hdr);  		logfree = 0;  	}  	ASSERT(be16_to_cpu(bf[0].length) >= length); @@ -1998,7 +1969,7 @@ xfs_dir2_node_addname_int(  	/*  	 * Mark the first part of the unused space, inuse for us.  	 */ -	xfs_dir2_data_use_free(tp, dbp, dup, +	xfs_dir2_data_use_free(args, dbp, dup,  		(xfs_dir2_data_aoff_t)((char *)dup - (char *)hdr), length,  		&needlog, &needscan);  	/* @@ -2008,24 +1979,24 @@ xfs_dir2_node_addname_int(  	dep->inumber = cpu_to_be64(args->inumber);  	dep->namelen = args->namelen;  	memcpy(dep->name, args->name, dep->namelen); -	xfs_dir3_dirent_put_ftype(mp, dep, args->filetype); -	tagp = xfs_dir3_data_entry_tag_p(mp, dep); +	dp->d_ops->data_put_ftype(dep, args->filetype); +	tagp = dp->d_ops->data_entry_tag_p(dep);  	*tagp = cpu_to_be16((char *)dep - (char *)hdr); -	xfs_dir2_data_log_entry(tp, dbp, dep); +	xfs_dir2_data_log_entry(args, dbp, dep);  	/*  	 * Rescan the block for bestfree if needed.  	 */  	if (needscan) -		xfs_dir2_data_freescan(mp, hdr, &needlog); +		xfs_dir2_data_freescan(dp, hdr, &needlog);  	/*  	 * Log the data block header if needed.  	 */  	if (needlog) -		xfs_dir2_data_log_header(tp, dbp); +		xfs_dir2_data_log_header(args, dbp);  	/*  	 * If the freespace entry is now wrong, update it.  	 */ -	bests = xfs_dir3_free_bests_p(mp, free); /* gcc is so stupid */ +	bests = dp->d_ops->free_bests_p(free); /* gcc is so stupid */  	if (be16_to_cpu(bests[findex]) != be16_to_cpu(bf[0].length)) {  		bests[findex] = bf[0].length;  		logfree = 1; @@ -2034,7 +2005,7 @@ xfs_dir2_node_addname_int(  	 * Log the freespace entry if needed.  	 */  	if (logfree) -		xfs_dir2_free_log_bests(tp, fbp, findex, findex); +		xfs_dir2_free_log_bests(args, fbp, findex, findex);  	/*  	 * Return the data block and offset in args, then drop the data block.  	 */ @@ -2065,8 +2036,6 @@ xfs_dir2_node_lookup(  	state = xfs_da_state_alloc();  	state->args = args;  	state->mp = args->dp->i_mount; -	state->blocksize = state->mp->m_dirblksize; -	state->node_ents = state->mp->m_dir_node_ents;  	/*  	 * Fill in the path to the entry in the cursor.  	 */ @@ -2105,12 +2074,12 @@ xfs_dir2_node_lookup(   */  int						/* error */  xfs_dir2_node_removename( -	xfs_da_args_t		*args)		/* operation arguments */ +	struct xfs_da_args	*args)		/* operation arguments */  { -	xfs_da_state_blk_t	*blk;		/* leaf block */ +	struct xfs_da_state_blk	*blk;		/* leaf block */  	int			error;		/* error return value */  	int			rval;		/* operation return value */ -	xfs_da_state_t		*state;		/* btree cursor */ +	struct xfs_da_state	*state;		/* btree cursor */  	trace_xfs_dir2_node_removename(args); @@ -2120,21 +2089,18 @@ xfs_dir2_node_removename(  	state = xfs_da_state_alloc();  	state->args = args;  	state->mp = args->dp->i_mount; -	state->blocksize = state->mp->m_dirblksize; -	state->node_ents = state->mp->m_dir_node_ents; -	/* -	 * Look up the entry we're deleting, set up the cursor. -	 */ + +	/* Look up the entry we're deleting, set up the cursor. */  	error = xfs_da3_node_lookup_int(state, &rval);  	if (error) -		rval = error; -	/* -	 * Didn't find it, upper layer screwed up. -	 */ +		goto out_free; + +	/* Didn't find it, upper layer screwed up. */  	if (rval != EEXIST) { -		xfs_da_state_free(state); -		return rval; +		error = rval; +		goto out_free;  	} +  	blk = &state->path.blk[state->path.active - 1];  	ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);  	ASSERT(state->extravalid); @@ -2145,7 +2111,7 @@ xfs_dir2_node_removename(  	error = xfs_dir2_leafn_remove(args, blk->bp, blk->index,  		&state->extrablk, &rval);  	if (error) -		return error; +		goto out_free;  	/*  	 * Fix the hash values up the btree.  	 */ @@ -2160,6 +2126,7 @@ xfs_dir2_node_removename(  	 */  	if (!error)  		error = xfs_dir2_node_to_leaf(state); +out_free:  	xfs_da_state_free(state);  	return error;  } @@ -2190,8 +2157,6 @@ xfs_dir2_node_replace(  	state = xfs_da_state_alloc();  	state->args = args;  	state->mp = args->dp->i_mount; -	state->blocksize = state->mp->m_dirblksize; -	state->node_ents = state->mp->m_dir_node_ents;  	inum = args->inumber;  	/*  	 * Lookup the entry to change in the btree. @@ -2212,7 +2177,7 @@ xfs_dir2_node_replace(  		blk = &state->path.blk[state->path.active - 1];  		ASSERT(blk->magic == XFS_DIR2_LEAFN_MAGIC);  		leaf = blk->bp->b_addr; -		ents = xfs_dir3_leaf_ents_p(leaf); +		ents = args->dp->d_ops->leaf_ents_p(leaf);  		lep = &ents[blk->index];  		ASSERT(state->extravalid);  		/* @@ -2223,14 +2188,15 @@ xfs_dir2_node_replace(  		       hdr->magic == cpu_to_be32(XFS_DIR3_DATA_MAGIC));  		dep = (xfs_dir2_data_entry_t *)  		      ((char *)hdr + -		       xfs_dir2_dataptr_to_off(state->mp, be32_to_cpu(lep->address))); +		       xfs_dir2_dataptr_to_off(args->geo, +					       be32_to_cpu(lep->address)));  		ASSERT(inum != be64_to_cpu(dep->inumber));  		/*  		 * Fill in the new inode number and log the entry.  		 */  		dep->inumber = cpu_to_be64(inum); -		xfs_dir3_dirent_put_ftype(state->mp, dep, args->filetype); -		xfs_dir2_data_log_entry(args->trans, state->extrablk.bp, dep); +		args->dp->d_ops->data_put_ftype(dep, args->filetype); +		xfs_dir2_data_log_entry(args, state->extrablk.bp, dep);  		rval = 0;  	}  	/* @@ -2285,7 +2251,7 @@ xfs_dir2_node_trim_free(  	if (!bp)  		return 0;  	free = bp->b_addr; -	xfs_dir3_free_hdr_from_disk(&freehdr, free); +	dp->d_ops->free_hdr_from_disk(&freehdr, free);  	/*  	 * If there are used entries, there's nothing to do. @@ -2298,9 +2264,9 @@ xfs_dir2_node_trim_free(  	/*  	 * Blow the block away.  	 */ -	if ((error = -	    xfs_dir2_shrink_inode(args, xfs_dir2_da_to_db(mp, (xfs_dablk_t)fo), -		    bp))) { +	error = xfs_dir2_shrink_inode(args, +			xfs_dir2_da_to_db(args->geo, (xfs_dablk_t)fo), bp); +	if (error) {  		/*  		 * Can't fail with ENOSPC since that only happens with no  		 * space reservation, when breaking up an extent into two diff --git a/fs/xfs/xfs_dir2_priv.h b/fs/xfs/xfs_dir2_priv.h index 1bad84c4082..27ce0794d19 100644 --- a/fs/xfs/xfs_dir2_priv.h +++ b/fs/xfs/xfs_dir2_priv.h @@ -20,6 +20,140 @@  struct dir_context; +/* + * Directory offset/block conversion functions. + * + * DB blocks here are logical directory block numbers, not filesystem blocks. + */ + +/* + * Convert dataptr to byte in file space + */ +static inline xfs_dir2_off_t +xfs_dir2_dataptr_to_byte(xfs_dir2_dataptr_t dp) +{ +	return (xfs_dir2_off_t)dp << XFS_DIR2_DATA_ALIGN_LOG; +} + +/* + * Convert byte in file space to dataptr.  It had better be aligned. + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_byte_to_dataptr(xfs_dir2_off_t by) +{ +	return (xfs_dir2_dataptr_t)(by >> XFS_DIR2_DATA_ALIGN_LOG); +} + +/* + * Convert byte in space to (DB) block + */ +static inline xfs_dir2_db_t +xfs_dir2_byte_to_db(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ +	return (xfs_dir2_db_t)(by >> geo->blklog); +} + +/* + * Convert dataptr to a block number + */ +static inline xfs_dir2_db_t +xfs_dir2_dataptr_to_db(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ +	return xfs_dir2_byte_to_db(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert byte in space to offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_byte_to_off(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ +	return (xfs_dir2_data_aoff_t)(by & (geo->blksize - 1)); +} + +/* + * Convert dataptr to a byte offset in a block + */ +static inline xfs_dir2_data_aoff_t +xfs_dir2_dataptr_to_off(struct xfs_da_geometry *geo, xfs_dir2_dataptr_t dp) +{ +	return xfs_dir2_byte_to_off(geo, xfs_dir2_dataptr_to_byte(dp)); +} + +/* + * Convert block and offset to byte in space + */ +static inline xfs_dir2_off_t +xfs_dir2_db_off_to_byte(struct xfs_da_geometry *geo, xfs_dir2_db_t db, +			xfs_dir2_data_aoff_t o) +{ +	return ((xfs_dir2_off_t)db << geo->blklog) + o; +} + +/* + * Convert block (DB) to block (dablk) + */ +static inline xfs_dablk_t +xfs_dir2_db_to_da(struct xfs_da_geometry *geo, xfs_dir2_db_t db) +{ +	return (xfs_dablk_t)(db << (geo->blklog - geo->fsblog)); +} + +/* + * Convert byte in space to (DA) block + */ +static inline xfs_dablk_t +xfs_dir2_byte_to_da(struct xfs_da_geometry *geo, xfs_dir2_off_t by) +{ +	return xfs_dir2_db_to_da(geo, xfs_dir2_byte_to_db(geo, by)); +} + +/* + * Convert block and offset to dataptr + */ +static inline xfs_dir2_dataptr_t +xfs_dir2_db_off_to_dataptr(struct xfs_da_geometry *geo, xfs_dir2_db_t db, +			   xfs_dir2_data_aoff_t o) +{ +	return xfs_dir2_byte_to_dataptr(xfs_dir2_db_off_to_byte(geo, db, o)); +} + +/* + * Convert block (dablk) to block (DB) + */ +static inline xfs_dir2_db_t +xfs_dir2_da_to_db(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ +	return (xfs_dir2_db_t)(da >> (geo->blklog - geo->fsblog)); +} + +/* + * Convert block (dablk) to byte offset in space + */ +static inline xfs_dir2_off_t +xfs_dir2_da_to_byte(struct xfs_da_geometry *geo, xfs_dablk_t da) +{ +	return xfs_dir2_db_off_to_byte(geo, xfs_dir2_da_to_db(geo, da), 0); +} + +/* + * Directory tail pointer accessor functions. Based on block geometry. + */ +static inline struct xfs_dir2_block_tail * +xfs_dir2_block_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_data_hdr *hdr) +{ +	return ((struct xfs_dir2_block_tail *) +		((char *)hdr + geo->blksize)) - 1; +} + +static inline struct xfs_dir2_leaf_tail * +xfs_dir2_leaf_tail_p(struct xfs_da_geometry *geo, struct xfs_dir2_leaf *lp) +{ +	return (struct xfs_dir2_leaf_tail *) +		((char *)lp + geo->blksize - +		  sizeof(struct xfs_dir2_leaf_tail)); +} +  /* xfs_dir2.c */  extern int xfs_dir_ino_validate(struct xfs_mount *mp, xfs_ino_t ino);  extern int xfs_dir2_grow_inode(struct xfs_da_args *args, int space, @@ -54,12 +188,13 @@ extern int xfs_dir2_leaf_to_block(struct xfs_da_args *args,  extern int __xfs_dir3_data_check(struct xfs_inode *dp, struct xfs_buf *bp);  extern int xfs_dir3_data_read(struct xfs_trans *tp, struct xfs_inode *dp,  		xfs_dablk_t bno, xfs_daddr_t mapped_bno, struct xfs_buf **bpp); -extern int xfs_dir3_data_readahead(struct xfs_trans *tp, struct xfs_inode *dp, -		xfs_dablk_t bno, xfs_daddr_t mapped_bno); +extern int xfs_dir3_data_readahead(struct xfs_inode *dp, xfs_dablk_t bno, +		xfs_daddr_t mapped_bno);  extern struct xfs_dir2_data_free *  xfs_dir2_data_freeinsert(struct xfs_dir2_data_hdr *hdr, -		struct xfs_dir2_data_unused *dup, int *loghead); +		struct xfs_dir2_data_free *bf, struct xfs_dir2_data_unused *dup, +		int *loghead);  extern int xfs_dir3_data_init(struct xfs_da_args *args, xfs_dir2_db_t blkno,  		struct xfs_buf **bpp); @@ -76,9 +211,9 @@ extern void xfs_dir3_leaf_compact_x1(struct xfs_dir3_icleaf_hdr *leafhdr,  		int *lowstalep, int *highstalep, int *lowlogp, int *highlogp);  extern int xfs_dir3_leaf_get_buf(struct xfs_da_args *args, xfs_dir2_db_t bno,  		struct xfs_buf **bpp, __uint16_t magic); -extern void xfs_dir3_leaf_log_ents(struct xfs_trans *tp, struct xfs_buf *bp, -		int first, int last); -extern void xfs_dir3_leaf_log_header(struct xfs_trans *tp, +extern void xfs_dir3_leaf_log_ents(struct xfs_da_args *args, +		struct xfs_buf *bp, int first, int last); +extern void xfs_dir3_leaf_log_header(struct xfs_da_args *args,  		struct xfs_buf *bp);  extern int xfs_dir2_leaf_lookup(struct xfs_da_args *args);  extern int xfs_dir2_leaf_removename(struct xfs_da_args *args); @@ -93,21 +228,18 @@ xfs_dir3_leaf_find_entry(struct xfs_dir3_icleaf_hdr *leafhdr,  		int lowstale, int highstale, int *lfloglow, int *lfloghigh);  extern int xfs_dir2_node_to_leaf(struct xfs_da_state *state); -extern void xfs_dir3_leaf_hdr_from_disk(struct xfs_dir3_icleaf_hdr *to, -		struct xfs_dir2_leaf *from); -extern void xfs_dir3_leaf_hdr_to_disk(struct xfs_dir2_leaf *to, -		struct xfs_dir3_icleaf_hdr *from); -extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, +extern bool xfs_dir3_leaf_check_int(struct xfs_mount *mp, struct xfs_inode *dp,  		struct xfs_dir3_icleaf_hdr *hdr, struct xfs_dir2_leaf *leaf);  /* xfs_dir2_node.c */  extern int xfs_dir2_leaf_to_node(struct xfs_da_args *args,  		struct xfs_buf *lbp); -extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_buf *bp, int *count); +extern xfs_dahash_t xfs_dir2_leafn_lasthash(struct xfs_inode *dp, +		struct xfs_buf *bp, int *count);  extern int xfs_dir2_leafn_lookup_int(struct xfs_buf *bp,  		struct xfs_da_args *args, int *indexp,  		struct xfs_da_state *state); -extern int xfs_dir2_leafn_order(struct xfs_buf *leaf1_bp, +extern int xfs_dir2_leafn_order(struct xfs_inode *dp, struct xfs_buf *leaf1_bp,  		struct xfs_buf *leaf2_bp);  extern int xfs_dir2_leafn_split(struct xfs_da_state *state,  	struct xfs_da_state_blk *oldblk, struct xfs_da_state_blk *newblk); diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c index 8993ec17452..48e99afb9cb 100644 --- a/fs/xfs/xfs_dir2_readdir.c +++ b/fs/xfs/xfs_dir2_readdir.c @@ -18,23 +18,23 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_bmap.h" +#include "xfs_trans.h" +#include "xfs_dinode.h"  /*   * Directory file type support functions @@ -76,26 +76,25 @@ const unsigned char xfs_mode_to_ftype[S_IFMT >> S_SHIFT] = {  STATIC int  xfs_dir2_sf_getdents( -	xfs_inode_t		*dp,		/* incore directory inode */ +	struct xfs_da_args	*args,  	struct dir_context	*ctx)  {  	int			i;		/* shortform entry number */ -	xfs_mount_t		*mp;		/* filesystem mount point */ +	struct xfs_inode	*dp = args->dp;	/* incore directory inode */  	xfs_dir2_dataptr_t	off;		/* current entry's offset */  	xfs_dir2_sf_entry_t	*sfep;		/* shortform directory entry */  	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */  	xfs_dir2_dataptr_t	dot_offset;  	xfs_dir2_dataptr_t	dotdot_offset;  	xfs_ino_t		ino; - -	mp = dp->i_mount; +	struct xfs_da_geometry	*geo = args->geo;  	ASSERT(dp->i_df.if_flags & XFS_IFINLINE);  	/*  	 * Give up if the directory is way too short.  	 */  	if (dp->i_d.di_size < offsetof(xfs_dir2_sf_hdr_t, parent)) { -		ASSERT(XFS_FORCED_SHUTDOWN(mp)); +		ASSERT(XFS_FORCED_SHUTDOWN(dp->i_mount));  		return XFS_ERROR(EIO);  	} @@ -109,19 +108,19 @@ xfs_dir2_sf_getdents(  	/*  	 * If the block number in the offset is out of range, we're done.  	 */ -	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) +	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)  		return 0;  	/*  	 * Precalculate offsets for . and .. as we will always need them.  	 *  	 * XXX(hch): the second argument is sometimes 0 and sometimes -	 * mp->m_dirdatablk. +	 * geo->datablk  	 */ -	dot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, -					     XFS_DIR3_DATA_DOT_OFFSET(mp)); -	dotdot_offset = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, -						XFS_DIR3_DATA_DOTDOT_OFFSET(mp)); +	dot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, +						dp->d_ops->data_dot_offset); +	dotdot_offset = xfs_dir2_db_off_to_dataptr(geo, geo->datablk, +						dp->d_ops->data_dotdot_offset);  	/*  	 * Put . entry unless we're starting past it. @@ -136,7 +135,7 @@ xfs_dir2_sf_getdents(  	 * Put .. entry unless we're starting past it.  	 */  	if (ctx->pos <= dotdot_offset) { -		ino = xfs_dir2_sf_get_parent_ino(sfp); +		ino = dp->d_ops->sf_get_parent_ino(sfp);  		ctx->pos = dotdot_offset & 0x7fffffff;  		if (!dir_emit(ctx, "..", 2, ino, DT_DIR))  			return 0; @@ -149,25 +148,25 @@ xfs_dir2_sf_getdents(  	for (i = 0; i < sfp->count; i++) {  		__uint8_t filetype; -		off = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, +		off = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,  				xfs_dir2_sf_get_offset(sfep));  		if (ctx->pos > off) { -			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); +			sfep = dp->d_ops->sf_nextentry(sfp, sfep);  			continue;  		} -		ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); -		filetype = xfs_dir3_sfe_get_ftype(mp, sfp, sfep); +		ino = dp->d_ops->sf_get_ino(sfp, sfep); +		filetype = dp->d_ops->sf_get_ftype(sfep);  		ctx->pos = off & 0x7fffffff;  		if (!dir_emit(ctx, (char *)sfep->name, sfep->namelen, ino, -			    xfs_dir3_get_dtype(mp, filetype))) +			    xfs_dir3_get_dtype(dp->i_mount, filetype)))  			return 0; -		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); +		sfep = dp->d_ops->sf_nextentry(sfp, sfep);  	} -	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & -			0x7fffffff; +	ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & +								0x7fffffff;  	return 0;  } @@ -176,9 +175,10 @@ xfs_dir2_sf_getdents(   */  STATIC int  xfs_dir2_block_getdents( -	xfs_inode_t		*dp,		/* incore inode */ +	struct xfs_da_args	*args,  	struct dir_context	*ctx)  { +	struct xfs_inode	*dp = args->dp;	/* incore directory inode */  	xfs_dir2_data_hdr_t	*hdr;		/* block header */  	struct xfs_buf		*bp;		/* buffer for block */  	xfs_dir2_block_tail_t	*btp;		/* block tail */ @@ -186,16 +186,15 @@ xfs_dir2_block_getdents(  	xfs_dir2_data_unused_t	*dup;		/* block unused entry */  	char			*endptr;	/* end of the data entries */  	int			error;		/* error return value */ -	xfs_mount_t		*mp;		/* filesystem mount point */  	char			*ptr;		/* current data entry */  	int			wantoff;	/* starting block offset */  	xfs_off_t		cook; +	struct xfs_da_geometry	*geo = args->geo; -	mp = dp->i_mount;  	/*  	 * If the block number in the offset is out of range, we're done.  	 */ -	if (xfs_dir2_dataptr_to_db(mp, ctx->pos) > mp->m_dirdatablk) +	if (xfs_dir2_dataptr_to_db(geo, ctx->pos) > geo->datablk)  		return 0;  	error = xfs_dir3_block_read(NULL, dp, &bp); @@ -206,14 +205,14 @@ xfs_dir2_block_getdents(  	 * Extract the byte offset we start at from the seek pointer.  	 * We'll skip entries before this.  	 */ -	wantoff = xfs_dir2_dataptr_to_off(mp, ctx->pos); +	wantoff = xfs_dir2_dataptr_to_off(geo, ctx->pos);  	hdr = bp->b_addr;  	xfs_dir3_data_check(dp, bp);  	/*  	 * Set up values for the loop.  	 */ -	btp = xfs_dir2_block_tail_p(mp, hdr); -	ptr = (char *)xfs_dir3_data_entry_p(hdr); +	btp = xfs_dir2_block_tail_p(geo, hdr); +	ptr = (char *)dp->d_ops->data_entry_p(hdr);  	endptr = (char *)xfs_dir2_block_leaf_p(btp);  	/* @@ -237,24 +236,24 @@ xfs_dir2_block_getdents(  		/*  		 * Bump pointer for the next iteration.  		 */ -		ptr += xfs_dir3_data_entsize(mp, dep->namelen); +		ptr += dp->d_ops->data_entsize(dep->namelen);  		/*  		 * The entry is before the desired starting point, skip it.  		 */  		if ((char *)dep - (char *)hdr < wantoff)  			continue; -		cook = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk, +		cook = xfs_dir2_db_off_to_dataptr(geo, geo->datablk,  					    (char *)dep - (char *)hdr);  		ctx->pos = cook & 0x7fffffff; -		filetype = xfs_dir3_dirent_get_ftype(mp, dep); +		filetype = dp->d_ops->data_get_ftype(dep);  		/*  		 * If it didn't fit, set the final offset to here & return.  		 */  		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,  			    be64_to_cpu(dep->inumber), -			    xfs_dir3_get_dtype(mp, filetype))) { +			    xfs_dir3_get_dtype(dp->i_mount, filetype))) {  			xfs_trans_brelse(NULL, bp);  			return 0;  		} @@ -264,8 +263,8 @@ xfs_dir2_block_getdents(  	 * Reached the end of the block.  	 * Set the offset to a non-existent block 1 and return.  	 */ -	ctx->pos = xfs_dir2_db_off_to_dataptr(mp, mp->m_dirdatablk + 1, 0) & -			0x7fffffff; +	ctx->pos = xfs_dir2_db_off_to_dataptr(geo, geo->datablk + 1, 0) & +								0x7fffffff;  	xfs_trans_brelse(NULL, bp);  	return 0;  } @@ -286,13 +285,13 @@ struct xfs_dir2_leaf_map_info {  STATIC int  xfs_dir2_leaf_readbuf( -	struct xfs_inode	*dp, +	struct xfs_da_args	*args,  	size_t			bufsize,  	struct xfs_dir2_leaf_map_info *mip,  	xfs_dir2_off_t		*curoff,  	struct xfs_buf		**bpp)  { -	struct xfs_mount	*mp = dp->i_mount; +	struct xfs_inode	*dp = args->dp;  	struct xfs_buf		*bp = *bpp;  	struct xfs_bmbt_irec	*map = mip->map;  	struct blk_plug		plug; @@ -300,6 +299,7 @@ xfs_dir2_leaf_readbuf(  	int			length;  	int			i;  	int			j; +	struct xfs_da_geometry	*geo = args->geo;  	/*  	 * If we have a buffer, we need to release it and @@ -309,12 +309,12 @@ xfs_dir2_leaf_readbuf(  	if (bp) {  		xfs_trans_brelse(NULL, bp);  		bp = NULL; -		mip->map_blocks -= mp->m_dirblkfsbs; +		mip->map_blocks -= geo->fsbcount;  		/*  		 * Loop to get rid of the extents for the  		 * directory block.  		 */ -		for (i = mp->m_dirblkfsbs; i > 0; ) { +		for (i = geo->fsbcount; i > 0; ) {  			j = min_t(int, map->br_blockcount, i);  			map->br_blockcount -= j;  			map->br_startblock += j; @@ -333,8 +333,7 @@ xfs_dir2_leaf_readbuf(  	/*  	 * Recalculate the readahead blocks wanted.  	 */ -	mip->ra_want = howmany(bufsize + mp->m_dirblksize, -			       mp->m_sb.sb_blocksize) - 1; +	mip->ra_want = howmany(bufsize + geo->blksize, (1 << geo->fsblog)) - 1;  	ASSERT(mip->ra_want >= 0);  	/* @@ -342,14 +341,14 @@ xfs_dir2_leaf_readbuf(  	 * run out of data blocks, get some more mappings.  	 */  	if (1 + mip->ra_want > mip->map_blocks && -	    mip->map_off < xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET)) { +	    mip->map_off < xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET)) {  		/*  		 * Get more bmaps, fill in after the ones  		 * we already have in the table.  		 */  		mip->nmap = mip->map_size - mip->map_valid;  		error = xfs_bmapi_read(dp, mip->map_off, -				xfs_dir2_byte_to_da(mp, XFS_DIR2_LEAF_OFFSET) - +				xfs_dir2_byte_to_da(geo, XFS_DIR2_LEAF_OFFSET) -  								mip->map_off,  				&map[mip->map_valid], &mip->nmap, 0); @@ -370,7 +369,7 @@ xfs_dir2_leaf_readbuf(  			i = mip->map_valid + mip->nmap - 1;  			mip->map_off = map[i].br_startoff + map[i].br_blockcount;  		} else -			mip->map_off = xfs_dir2_byte_to_da(mp, +			mip->map_off = xfs_dir2_byte_to_da(geo,  							XFS_DIR2_LEAF_OFFSET);  		/* @@ -396,18 +395,18 @@ xfs_dir2_leaf_readbuf(  	 * No valid mappings, so no more data blocks.  	 */  	if (!mip->map_valid) { -		*curoff = xfs_dir2_da_to_byte(mp, mip->map_off); +		*curoff = xfs_dir2_da_to_byte(geo, mip->map_off);  		goto out;  	}  	/*  	 * Read the directory block starting at the first mapping.  	 */ -	mip->curdb = xfs_dir2_da_to_db(mp, map->br_startoff); +	mip->curdb = xfs_dir2_da_to_db(geo, map->br_startoff);  	error = xfs_dir3_data_read(NULL, dp, map->br_startoff, -			map->br_blockcount >= mp->m_dirblkfsbs ? -			    XFS_FSB_TO_DADDR(mp, map->br_startblock) : -1, &bp); - +			map->br_blockcount >= geo->fsbcount ? +			    XFS_FSB_TO_DADDR(dp->i_mount, map->br_startblock) : +			    -1, &bp);  	/*  	 * Should just skip over the data block instead of giving up.  	 */ @@ -419,7 +418,7 @@ xfs_dir2_leaf_readbuf(  	 * was previously ra.  	 */  	if (mip->ra_current) -		mip->ra_current -= mp->m_dirblkfsbs; +		mip->ra_current -= geo->fsbcount;  	/*  	 * Do we need more readahead? @@ -427,16 +426,16 @@ xfs_dir2_leaf_readbuf(  	blk_start_plug(&plug);  	for (mip->ra_index = mip->ra_offset = i = 0;  	     mip->ra_want > mip->ra_current && i < mip->map_blocks; -	     i += mp->m_dirblkfsbs) { +	     i += geo->fsbcount) {  		ASSERT(mip->ra_index < mip->map_valid);  		/*  		 * Read-ahead a contiguous directory block.  		 */  		if (i > mip->ra_current && -		    map[mip->ra_index].br_blockcount >= mp->m_dirblkfsbs) { -			xfs_dir3_data_readahead(NULL, dp, +		    map[mip->ra_index].br_blockcount >= geo->fsbcount) { +			xfs_dir3_data_readahead(dp,  				map[mip->ra_index].br_startoff + mip->ra_offset, -				XFS_FSB_TO_DADDR(mp, +				XFS_FSB_TO_DADDR(dp->i_mount,  					map[mip->ra_index].br_startblock +  							mip->ra_offset));  			mip->ra_current = i; @@ -447,7 +446,7 @@ xfs_dir2_leaf_readbuf(  		 * use our mapping, but this is a very rare case.  		 */  		else if (i > mip->ra_current) { -			xfs_dir3_data_readahead(NULL, dp, +			xfs_dir3_data_readahead(dp,  					map[mip->ra_index].br_startoff +  							mip->ra_offset, -1);  			mip->ra_current = i; @@ -456,15 +455,14 @@ xfs_dir2_leaf_readbuf(  		/*  		 * Advance offset through the mapping table.  		 */ -		for (j = 0; j < mp->m_dirblkfsbs; j++) { +		for (j = 0; j < geo->fsbcount; j += length ) {  			/*  			 * The rest of this extent but not more than a dir  			 * block.  			 */ -			length = min_t(int, mp->m_dirblkfsbs, +			length = min_t(int, geo->fsbcount,  					map[mip->ra_index].br_blockcount -  							mip->ra_offset); -			j += length;  			mip->ra_offset += length;  			/* @@ -489,22 +487,23 @@ out:   */  STATIC int  xfs_dir2_leaf_getdents( -	xfs_inode_t		*dp,		/* incore directory inode */ +	struct xfs_da_args	*args,  	struct dir_context	*ctx,  	size_t			bufsize)  { +	struct xfs_inode	*dp = args->dp;  	struct xfs_buf		*bp = NULL;	/* data block buffer */  	xfs_dir2_data_hdr_t	*hdr;		/* data block header */  	xfs_dir2_data_entry_t	*dep;		/* data entry */  	xfs_dir2_data_unused_t	*dup;		/* unused entry */  	int			error = 0;	/* error return value */  	int			length;		/* temporary length value */ -	xfs_mount_t		*mp;		/* filesystem mount point */  	int			byteoff;	/* offset in current block */  	xfs_dir2_off_t		curoff;		/* current overall offset */  	xfs_dir2_off_t		newoff;		/* new curoff after new blk */  	char			*ptr = NULL;	/* pointer to current data */  	struct xfs_dir2_leaf_map_info *map_info; +	struct xfs_da_geometry	*geo = args->geo;  	/*  	 * If the offset is at or past the largest allowed value, @@ -513,15 +512,12 @@ xfs_dir2_leaf_getdents(  	if (ctx->pos >= XFS_DIR2_MAX_DATAPTR)  		return 0; -	mp = dp->i_mount; -  	/*  	 * Set up to bmap a number of blocks based on the caller's  	 * buffer size, the directory block size, and the filesystem  	 * block size.  	 */ -	length = howmany(bufsize + mp->m_dirblksize, -				     mp->m_sb.sb_blocksize); +	length = howmany(bufsize + geo->blksize, (1 << geo->fsblog));  	map_info = kmem_zalloc(offsetof(struct xfs_dir2_leaf_map_info, map) +  				(length * sizeof(struct xfs_bmbt_irec)),  			       KM_SLEEP | KM_NOFS); @@ -531,14 +527,14 @@ xfs_dir2_leaf_getdents(  	 * Inside the loop we keep the main offset value as a byte offset  	 * in the directory file.  	 */ -	curoff = xfs_dir2_dataptr_to_byte(mp, ctx->pos); +	curoff = xfs_dir2_dataptr_to_byte(ctx->pos);  	/*  	 * Force this conversion through db so we truncate the offset  	 * down to get the start of the data block.  	 */ -	map_info->map_off = xfs_dir2_db_to_da(mp, -					      xfs_dir2_byte_to_db(mp, curoff)); +	map_info->map_off = xfs_dir2_db_to_da(geo, +					      xfs_dir2_byte_to_db(geo, curoff));  	/*  	 * Loop over directory entries until we reach the end offset. @@ -551,9 +547,9 @@ xfs_dir2_leaf_getdents(  		 * If we have no buffer, or we're off the end of the  		 * current buffer, need to get another one.  		 */ -		if (!bp || ptr >= (char *)bp->b_addr + mp->m_dirblksize) { +		if (!bp || ptr >= (char *)bp->b_addr + geo->blksize) { -			error = xfs_dir2_leaf_readbuf(dp, bufsize, map_info, +			error = xfs_dir2_leaf_readbuf(args, bufsize, map_info,  						      &curoff, &bp);  			if (error || !map_info->map_valid)  				break; @@ -561,7 +557,8 @@ xfs_dir2_leaf_getdents(  			/*  			 * Having done a read, we need to set a new offset.  			 */ -			newoff = xfs_dir2_db_off_to_byte(mp, map_info->curdb, 0); +			newoff = xfs_dir2_db_off_to_byte(geo, +							 map_info->curdb, 0);  			/*  			 * Start of the current block.  			 */ @@ -571,20 +568,20 @@ xfs_dir2_leaf_getdents(  			 * Make sure we're in the right block.  			 */  			else if (curoff > newoff) -				ASSERT(xfs_dir2_byte_to_db(mp, curoff) == +				ASSERT(xfs_dir2_byte_to_db(geo, curoff) ==  				       map_info->curdb);  			hdr = bp->b_addr;  			xfs_dir3_data_check(dp, bp);  			/*  			 * Find our position in the block.  			 */ -			ptr = (char *)xfs_dir3_data_entry_p(hdr); -			byteoff = xfs_dir2_byte_to_off(mp, curoff); +			ptr = (char *)dp->d_ops->data_entry_p(hdr); +			byteoff = xfs_dir2_byte_to_off(geo, curoff);  			/*  			 * Skip past the header.  			 */  			if (byteoff == 0) -				curoff += xfs_dir3_data_entry_offset(hdr); +				curoff += dp->d_ops->data_entry_offset;  			/*  			 * Skip past entries until we reach our offset.  			 */ @@ -601,17 +598,17 @@ xfs_dir2_leaf_getdents(  					}  					dep = (xfs_dir2_data_entry_t *)ptr;  					length = -					   xfs_dir3_data_entsize(mp, dep->namelen); +					   dp->d_ops->data_entsize(dep->namelen);  					ptr += length;  				}  				/*  				 * Now set our real offset.  				 */  				curoff = -					xfs_dir2_db_off_to_byte(mp, -					    xfs_dir2_byte_to_db(mp, curoff), +					xfs_dir2_db_off_to_byte(geo, +					    xfs_dir2_byte_to_db(geo, curoff),  					    (char *)ptr - (char *)hdr); -				if (ptr >= (char *)hdr + mp->m_dirblksize) { +				if (ptr >= (char *)hdr + geo->blksize) {  					continue;  				}  			} @@ -632,13 +629,13 @@ xfs_dir2_leaf_getdents(  		}  		dep = (xfs_dir2_data_entry_t *)ptr; -		length = xfs_dir3_data_entsize(mp, dep->namelen); -		filetype = xfs_dir3_dirent_get_ftype(mp, dep); +		length = dp->d_ops->data_entsize(dep->namelen); +		filetype = dp->d_ops->data_get_ftype(dep); -		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; +		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;  		if (!dir_emit(ctx, (char *)dep->name, dep->namelen,  			    be64_to_cpu(dep->inumber), -			    xfs_dir3_get_dtype(mp, filetype))) +			    xfs_dir3_get_dtype(dp->i_mount, filetype)))  			break;  		/* @@ -653,10 +650,10 @@ xfs_dir2_leaf_getdents(  	/*  	 * All done.  Set output offset value to current offset.  	 */ -	if (curoff > xfs_dir2_dataptr_to_byte(mp, XFS_DIR2_MAX_DATAPTR)) +	if (curoff > xfs_dir2_dataptr_to_byte(XFS_DIR2_MAX_DATAPTR))  		ctx->pos = XFS_DIR2_MAX_DATAPTR & 0x7fffffff;  	else -		ctx->pos = xfs_dir2_byte_to_dataptr(mp, curoff) & 0x7fffffff; +		ctx->pos = xfs_dir2_byte_to_dataptr(curoff) & 0x7fffffff;  	kmem_free(map_info);  	if (bp)  		xfs_trans_brelse(NULL, bp); @@ -668,12 +665,14 @@ xfs_dir2_leaf_getdents(   */  int  xfs_readdir( -	xfs_inode_t	*dp, -	struct dir_context *ctx, -	size_t		bufsize) +	struct xfs_inode	*dp, +	struct dir_context	*ctx, +	size_t			bufsize)  { -	int		rval;		/* return value */ -	int		v;		/* type-checking value */ +	struct xfs_da_args	args = { NULL }; +	int			rval; +	int			v; +	uint			lock_mode;  	trace_xfs_readdir(dp); @@ -683,13 +682,19 @@ xfs_readdir(  	ASSERT(S_ISDIR(dp->i_d.di_mode));  	XFS_STATS_INC(xs_dir_getdents); +	args.dp = dp; +	args.geo = dp->i_mount->m_dir_geo; + +	lock_mode = xfs_ilock_data_map_shared(dp);  	if (dp->i_d.di_format == XFS_DINODE_FMT_LOCAL) -		rval = xfs_dir2_sf_getdents(dp, ctx); -	else if ((rval = xfs_dir2_isblock(NULL, dp, &v))) +		rval = xfs_dir2_sf_getdents(&args, ctx); +	else if ((rval = xfs_dir2_isblock(&args, &v)))  		;  	else if (v) -		rval = xfs_dir2_block_getdents(dp, ctx); +		rval = xfs_dir2_block_getdents(&args, ctx);  	else -		rval = xfs_dir2_leaf_getdents(dp, ctx, bufsize); +		rval = xfs_dir2_leaf_getdents(&args, ctx, bufsize); +	xfs_iunlock(dp, lock_mode); +  	return rval;  } diff --git a/fs/xfs/xfs_dir2_sf.c b/fs/xfs/xfs_dir2_sf.c index bb6e2848f47..53c3be619db 100644 --- a/fs/xfs/xfs_dir2_sf.c +++ b/fs/xfs/xfs_dir2_sf.c @@ -17,22 +17,22 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_error.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_trace.h" +#include "xfs_dinode.h"  /*   * Prototypes for internal functions. @@ -57,89 +57,6 @@ static void xfs_dir2_sf_toino8(xfs_da_args_t *args);  #endif /* XFS_BIG_INUMS */  /* - * Inode numbers in short-form directories can come in two versions, - * either 4 bytes or 8 bytes wide.  These helpers deal with the - * two forms transparently by looking at the headers i8count field. - * - * For 64-bit inode number the most significant byte must be zero. - */ -static xfs_ino_t -xfs_dir2_sf_get_ino( -	struct xfs_dir2_sf_hdr	*hdr, -	xfs_dir2_inou_t		*from) -{ -	if (hdr->i8count) -		return get_unaligned_be64(&from->i8.i) & 0x00ffffffffffffffULL; -	else -		return get_unaligned_be32(&from->i4.i); -} - -static void -xfs_dir2_sf_put_ino( -	struct xfs_dir2_sf_hdr	*hdr, -	xfs_dir2_inou_t		*to, -	xfs_ino_t		ino) -{ -	ASSERT((ino & 0xff00000000000000ULL) == 0); - -	if (hdr->i8count) -		put_unaligned_be64(ino, &to->i8.i); -	else -		put_unaligned_be32(ino, &to->i4.i); -} - -xfs_ino_t -xfs_dir2_sf_get_parent_ino( -	struct xfs_dir2_sf_hdr	*hdr) -{ -	return xfs_dir2_sf_get_ino(hdr, &hdr->parent); -} - -void -xfs_dir2_sf_put_parent_ino( -	struct xfs_dir2_sf_hdr	*hdr, -	xfs_ino_t		ino) -{ -	xfs_dir2_sf_put_ino(hdr, &hdr->parent, ino); -} - -/* - * In short-form directory entries the inode numbers are stored at variable - * offset behind the entry name. If the entry stores a filetype value, then it - * sits between the name and the inode number. Hence the inode numbers may only - * be accessed through the helpers below. - */ -static xfs_dir2_inou_t * -xfs_dir3_sfe_inop( -	struct xfs_mount	*mp, -	struct xfs_dir2_sf_entry *sfep) -{ -	__uint8_t	*ptr = &sfep->name[sfep->namelen]; -	if (xfs_sb_version_hasftype(&mp->m_sb)) -		ptr++; -	return (xfs_dir2_inou_t *)ptr; -} - -xfs_ino_t -xfs_dir3_sfe_get_ino( -	struct xfs_mount	*mp, -	struct xfs_dir2_sf_hdr	*hdr, -	struct xfs_dir2_sf_entry *sfep) -{ -	return xfs_dir2_sf_get_ino(hdr, xfs_dir3_sfe_inop(mp, sfep)); -} - -void -xfs_dir3_sfe_put_ino( -	struct xfs_mount	*mp, -	struct xfs_dir2_sf_hdr	*hdr, -	struct xfs_dir2_sf_entry *sfep, -	xfs_ino_t		ino) -{ -	xfs_dir2_sf_put_ino(hdr, xfs_dir3_sfe_inop(mp, sfep), ino); -} - -/*   * Given a block directory (dp/block), calculate its size as a shortform (sf)   * directory and a header for the sf directory, if it will fit it the   * space currently present in the inode.  If it won't fit, the output @@ -165,8 +82,10 @@ xfs_dir2_block_sfsize(  	xfs_ino_t		parent = 0;	/* parent inode number */  	int			size=0;		/* total computed size */  	int			has_ftype; +	struct xfs_da_geometry	*geo;  	mp = dp->i_mount; +	geo = mp->m_dir_geo;  	/*  	 * if there is a filetype field, add the extra byte to the namelen @@ -175,7 +94,7 @@ xfs_dir2_block_sfsize(  	has_ftype = xfs_sb_version_hasftype(&mp->m_sb) ? 1 : 0;  	count = i8count = namelen = 0; -	btp = xfs_dir2_block_tail_p(mp, hdr); +	btp = xfs_dir2_block_tail_p(geo, hdr);  	blp = xfs_dir2_block_leaf_p(btp);  	/* @@ -187,8 +106,8 @@ xfs_dir2_block_sfsize(  		/*  		 * Calculate the pointer to the entry at hand.  		 */ -		dep = (xfs_dir2_data_entry_t *) -		      ((char *)hdr + xfs_dir2_dataptr_to_off(mp, addr)); +		dep = (xfs_dir2_data_entry_t *)((char *)hdr + +				xfs_dir2_dataptr_to_off(geo, addr));  		/*  		 * Detect . and .., so we can special-case them.  		 * . is not included in sf directories. @@ -226,7 +145,7 @@ xfs_dir2_block_sfsize(  	 */  	sfhp->count = count;  	sfhp->i8count = i8count; -	xfs_dir2_sf_put_parent_ino(sfhp, parent); +	dp->d_ops->sf_put_parent_ino(sfhp, parent);  	return size;  } @@ -253,6 +172,7 @@ xfs_dir2_block_to_sf(  	char			*ptr;		/* current data pointer */  	xfs_dir2_sf_entry_t	*sfep;		/* shortform entry */  	xfs_dir2_sf_hdr_t	*sfp;		/* shortform directory header */ +	xfs_dir2_sf_hdr_t	*dst;		/* temporary data buffer */  	trace_xfs_dir2_block_to_sf(args); @@ -260,40 +180,25 @@ xfs_dir2_block_to_sf(  	mp = dp->i_mount;  	/* -	 * Make a copy of the block data, so we can shrink the inode -	 * and add local data. +	 * allocate a temporary destination buffer the size of the inode +	 * to format the data into. Once we have formatted the data, we +	 * can free the block and copy the formatted data into the inode literal +	 * area.  	 */ -	hdr = kmem_alloc(mp->m_dirblksize, KM_SLEEP); -	memcpy(hdr, bp->b_addr, mp->m_dirblksize); -	logflags = XFS_ILOG_CORE; -	if ((error = xfs_dir2_shrink_inode(args, mp->m_dirdatablk, bp))) { -		ASSERT(error != ENOSPC); -		goto out; -	} +	dst = kmem_alloc(mp->m_sb.sb_inodesize, KM_SLEEP); +	hdr = bp->b_addr;  	/* -	 * The buffer is now unconditionally gone, whether -	 * xfs_dir2_shrink_inode worked or not. -	 * -	 * Convert the inode to local format. -	 */ -	dp->i_df.if_flags &= ~XFS_IFEXTENTS; -	dp->i_df.if_flags |= XFS_IFINLINE; -	dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; -	ASSERT(dp->i_df.if_bytes == 0); -	xfs_idata_realloc(dp, size, XFS_DATA_FORK); -	logflags |= XFS_ILOG_DDATA; -	/*  	 * Copy the header into the newly allocate local space.  	 */ -	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; +	sfp = (xfs_dir2_sf_hdr_t *)dst;  	memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count)); -	dp->i_d.di_size = size; +  	/*  	 * Set up to loop over the block's entries.  	 */ -	btp = xfs_dir2_block_tail_p(mp, hdr); -	ptr = (char *)xfs_dir3_data_entry_p(hdr); +	btp = xfs_dir2_block_tail_p(args->geo, hdr); +	ptr = (char *)dp->d_ops->data_entry_p(hdr);  	endptr = (char *)xfs_dir2_block_leaf_p(btp);  	sfep = xfs_dir2_sf_firstentry(sfp);  	/* @@ -321,7 +226,7 @@ xfs_dir2_block_to_sf(  		else if (dep->namelen == 2 &&  			 dep->name[0] == '.' && dep->name[1] == '.')  			ASSERT(be64_to_cpu(dep->inumber) == -			       xfs_dir2_sf_get_parent_ino(sfp)); +			       dp->d_ops->sf_get_parent_ino(sfp));  		/*  		 * Normal entry, copy it into shortform.  		 */ @@ -331,20 +236,44 @@ xfs_dir2_block_to_sf(  				(xfs_dir2_data_aoff_t)  				((char *)dep - (char *)hdr));  			memcpy(sfep->name, dep->name, dep->namelen); -			xfs_dir3_sfe_put_ino(mp, sfp, sfep, -					     be64_to_cpu(dep->inumber)); -			xfs_dir3_sfe_put_ftype(mp, sfp, sfep, -					xfs_dir3_dirent_get_ftype(mp, dep)); +			dp->d_ops->sf_put_ino(sfp, sfep, +					      be64_to_cpu(dep->inumber)); +			dp->d_ops->sf_put_ftype(sfep, +					dp->d_ops->data_get_ftype(dep)); -			sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); +			sfep = dp->d_ops->sf_nextentry(sfp, sfep);  		} -		ptr += xfs_dir3_data_entsize(mp, dep->namelen); +		ptr += dp->d_ops->data_entsize(dep->namelen);  	}  	ASSERT((char *)sfep - (char *)sfp == size); + +	/* now we are done with the block, we can shrink the inode */ +	logflags = XFS_ILOG_CORE; +	error = xfs_dir2_shrink_inode(args, args->geo->datablk, bp); +	if (error) { +		ASSERT(error != ENOSPC); +		goto out; +	} + +	/* +	 * The buffer is now unconditionally gone, whether +	 * xfs_dir2_shrink_inode worked or not. +	 * +	 * Convert the inode to local format and copy the data in. +	 */ +	dp->i_df.if_flags &= ~XFS_IFEXTENTS; +	dp->i_df.if_flags |= XFS_IFINLINE; +	dp->i_d.di_format = XFS_DINODE_FMT_LOCAL; +	ASSERT(dp->i_df.if_bytes == 0); +	xfs_idata_realloc(dp, size, XFS_DATA_FORK); + +	logflags |= XFS_ILOG_DDATA; +	memcpy(dp->i_df.if_u1.if_data, dst, size); +	dp->i_d.di_size = size;  	xfs_dir2_sf_check(args);  out:  	xfs_trans_log_inode(args->trans, dp, logflags); -	kmem_free(hdr); +	kmem_free(dst);  	return error;  } @@ -358,14 +287,12 @@ int						/* error */  xfs_dir2_sf_addname(  	xfs_da_args_t		*args)		/* operation arguments */  { -	int			add_entsize;	/* size of the new entry */  	xfs_inode_t		*dp;		/* incore directory inode */  	int			error;		/* error return value */  	int			incr_isize;	/* total change in size */  	int			new_isize;	/* di_size after adding name */  	int			objchange;	/* changing to 8-byte inodes */  	xfs_dir2_data_aoff_t	offset = 0;	/* offset for new entry */ -	int			old_isize;	/* di_size before adding name */  	int			pick;		/* which algorithm to use */  	xfs_dir2_sf_hdr_t	*sfp;		/* shortform structure */  	xfs_dir2_sf_entry_t	*sfep = NULL;	/* shortform entry */ @@ -389,8 +316,7 @@ xfs_dir2_sf_addname(  	/*  	 * Compute entry (and change in) size.  	 */ -	add_entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen); -	incr_isize = add_entsize; +	incr_isize = dp->d_ops->sf_entsize(sfp, args->namelen);  	objchange = 0;  #if XFS_BIG_INUMS  	/* @@ -398,11 +324,8 @@ xfs_dir2_sf_addname(  	 */  	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && sfp->i8count == 0) {  		/* -		 * Yes, adjust the entry size and the total size. +		 * Yes, adjust the inode size.  old count + (parent + new)  		 */ -		add_entsize += -			(uint)sizeof(xfs_dir2_ino8_t) - -			(uint)sizeof(xfs_dir2_ino4_t);  		incr_isize +=  			(sfp->count + 2) *  			((uint)sizeof(xfs_dir2_ino8_t) - @@ -410,8 +333,7 @@ xfs_dir2_sf_addname(  		objchange = 1;  	}  #endif -	old_isize = (int)dp->i_d.di_size; -	new_isize = old_isize + incr_isize; +	new_isize = (int)dp->i_d.di_size + incr_isize;  	/*  	 * Won't fit as shortform any more (due to size),  	 * or the pick routine says it won't (due to offset values). @@ -483,8 +405,7 @@ xfs_dir2_sf_addname_easy(  	/*  	 * Grow the in-inode space.  	 */ -	xfs_idata_realloc(dp, -			  xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen), +	xfs_idata_realloc(dp, dp->d_ops->sf_entsize(sfp, args->namelen),  			  XFS_DATA_FORK);  	/*  	 * Need to set up again due to realloc of the inode data. @@ -497,8 +418,8 @@ xfs_dir2_sf_addname_easy(  	sfep->namelen = args->namelen;  	xfs_dir2_sf_put_offset(sfep, offset);  	memcpy(sfep->name, args->name, sfep->namelen); -	xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, args->inumber); -	xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, args->filetype); +	dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); +	dp->d_ops->sf_put_ftype(sfep, args->filetype);  	/*  	 * Update the header and inode. @@ -557,13 +478,13 @@ xfs_dir2_sf_addname_hard(  	 * to insert the new entry.  	 * If it's going to end up at the end then oldsfep will point there.  	 */ -	for (offset = XFS_DIR3_DATA_FIRST_OFFSET(mp), +	for (offset = dp->d_ops->data_first_offset,  	      oldsfep = xfs_dir2_sf_firstentry(oldsfp), -	      add_datasize = xfs_dir3_data_entsize(mp, args->namelen), +	      add_datasize = dp->d_ops->data_entsize(args->namelen),  	      eof = (char *)oldsfep == &buf[old_isize];  	     !eof; -	     offset = new_offset + xfs_dir3_data_entsize(mp, oldsfep->namelen), -	      oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep), +	     offset = new_offset + dp->d_ops->data_entsize(oldsfep->namelen), +	      oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep),  	      eof = (char *)oldsfep == &buf[old_isize]) {  		new_offset = xfs_dir2_sf_get_offset(oldsfep);  		if (offset + add_datasize <= new_offset) @@ -592,8 +513,8 @@ xfs_dir2_sf_addname_hard(  	sfep->namelen = args->namelen;  	xfs_dir2_sf_put_offset(sfep, offset);  	memcpy(sfep->name, args->name, sfep->namelen); -	xfs_dir3_sfe_put_ino(mp, sfp, sfep, args->inumber); -	xfs_dir3_sfe_put_ftype(mp, sfp, sfep, args->filetype); +	dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); +	dp->d_ops->sf_put_ftype(sfep, args->filetype);  	sfp->count++;  #if XFS_BIG_INUMS  	if (args->inumber > XFS_DIR2_MAX_SHORT_INUM && !objchange) @@ -603,7 +524,7 @@ xfs_dir2_sf_addname_hard(  	 * If there's more left to copy, do that.  	 */  	if (!eof) { -		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); +		sfep = dp->d_ops->sf_nextentry(sfp, sfep);  		memcpy(sfep, oldsfep, old_isize - nbytes);  	}  	kmem_free(buf); @@ -639,8 +560,8 @@ xfs_dir2_sf_addname_pick(  	mp = dp->i_mount;  	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; -	size = xfs_dir3_data_entsize(mp, args->namelen); -	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); +	size = dp->d_ops->data_entsize(args->namelen); +	offset = dp->d_ops->data_first_offset;  	sfep = xfs_dir2_sf_firstentry(sfp);  	holefit = 0;  	/* @@ -652,8 +573,8 @@ xfs_dir2_sf_addname_pick(  		if (!holefit)  			holefit = offset + size <= xfs_dir2_sf_get_offset(sfep);  		offset = xfs_dir2_sf_get_offset(sfep) + -			 xfs_dir3_data_entsize(mp, sfep->namelen); -		sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep); +			 dp->d_ops->data_entsize(sfep->namelen); +		sfep = dp->d_ops->sf_nextentry(sfp, sfep);  	}  	/*  	 * Calculate data bytes used excluding the new entry, if this @@ -667,7 +588,7 @@ xfs_dir2_sf_addname_pick(  	 * we'll go back, convert to block, then try the insert and convert  	 * to leaf.  	 */ -	if (used + (holefit ? 0 : size) > mp->m_dirblksize) +	if (used + (holefit ? 0 : size) > args->geo->blksize)  		return 0;  	/*  	 * If changing the inode number size, do it the hard way. @@ -682,7 +603,7 @@ xfs_dir2_sf_addname_pick(  	/*  	 * If it won't fit at the end then do it the hard way (use the hole).  	 */ -	if (used + size > mp->m_dirblksize) +	if (used + size > args->geo->blksize)  		return 2;  	/*  	 * Do it the easy way. @@ -713,28 +634,27 @@ xfs_dir2_sf_check(  	mp = dp->i_mount;  	sfp = (xfs_dir2_sf_hdr_t *)dp->i_df.if_u1.if_data; -	offset = XFS_DIR3_DATA_FIRST_OFFSET(mp); -	ino = xfs_dir2_sf_get_parent_ino(sfp); +	offset = dp->d_ops->data_first_offset; +	ino = dp->d_ops->sf_get_parent_ino(sfp);  	i8count = ino > XFS_DIR2_MAX_SHORT_INUM;  	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp);  	     i < sfp->count; -	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep)) { +	     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {  		ASSERT(xfs_dir2_sf_get_offset(sfep) >= offset); -		ino = xfs_dir3_sfe_get_ino(mp, sfp, sfep); +		ino = dp->d_ops->sf_get_ino(sfp, sfep);  		i8count += ino > XFS_DIR2_MAX_SHORT_INUM;  		offset =  			xfs_dir2_sf_get_offset(sfep) + -			xfs_dir3_data_entsize(mp, sfep->namelen); -		ASSERT(xfs_dir3_sfe_get_ftype(mp, sfp, sfep) < -							XFS_DIR3_FT_MAX); +			dp->d_ops->data_entsize(sfep->namelen); +		ASSERT(dp->d_ops->sf_get_ftype(sfep) < XFS_DIR3_FT_MAX);  	}  	ASSERT(i8count == sfp->i8count);  	ASSERT(XFS_BIG_INUMS || i8count == 0);  	ASSERT((char *)sfep - (char *)sfp == dp->i_d.di_size);  	ASSERT(offset +  	       (sfp->count + 2) * (uint)sizeof(xfs_dir2_leaf_entry_t) + -	       (uint)sizeof(xfs_dir2_block_tail_t) <= mp->m_dirblksize); +	       (uint)sizeof(xfs_dir2_block_tail_t) <= args->geo->blksize);  }  #endif	/* DEBUG */ @@ -783,7 +703,7 @@ xfs_dir2_sf_create(  	/*  	 * Now can put in the inode number, since i8count is set.  	 */ -	xfs_dir2_sf_put_parent_ino(sfp, pino); +	dp->d_ops->sf_put_parent_ino(sfp, pino);  	sfp->count = 0;  	dp->i_d.di_size = size;  	xfs_dir2_sf_check(args); @@ -838,7 +758,7 @@ xfs_dir2_sf_lookup(  	 */  	if (args->namelen == 2 &&  	    args->name[0] == '.' && args->name[1] == '.') { -		args->inumber = xfs_dir2_sf_get_parent_ino(sfp); +		args->inumber = dp->d_ops->sf_get_parent_ino(sfp);  		args->cmpresult = XFS_CMP_EXACT;  		args->filetype = XFS_DIR3_FT_DIR;  		return XFS_ERROR(EEXIST); @@ -848,7 +768,7 @@ xfs_dir2_sf_lookup(  	 */  	ci_sfep = NULL;  	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; -	     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { +	     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {  		/*  		 * Compare name and if it's an exact match, return the inode  		 * number. If it's the first case-insensitive match, store the @@ -858,10 +778,8 @@ xfs_dir2_sf_lookup(  								sfep->namelen);  		if (cmp != XFS_CMP_DIFFERENT && cmp != args->cmpresult) {  			args->cmpresult = cmp; -			args->inumber = xfs_dir3_sfe_get_ino(dp->i_mount, -							     sfp, sfep); -			args->filetype = xfs_dir3_sfe_get_ftype(dp->i_mount, -								sfp, sfep); +			args->inumber = dp->d_ops->sf_get_ino(sfp, sfep); +			args->filetype = dp->d_ops->sf_get_ftype(sfep);  			if (cmp == XFS_CMP_EXACT)  				return XFS_ERROR(EEXIST);  			ci_sfep = sfep; @@ -917,10 +835,10 @@ xfs_dir2_sf_removename(  	 * Find the one we're deleting.  	 */  	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; -	     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { +	     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {  		if (xfs_da_compname(args, sfep->name, sfep->namelen) ==  								XFS_CMP_EXACT) { -			ASSERT(xfs_dir3_sfe_get_ino(dp->i_mount, sfp, sfep) == +			ASSERT(dp->d_ops->sf_get_ino(sfp, sfep) ==  			       args->inumber);  			break;  		} @@ -934,7 +852,7 @@ xfs_dir2_sf_removename(  	 * Calculate sizes.  	 */  	byteoff = (int)((char *)sfep - (char *)sfp); -	entsize = xfs_dir3_sf_entsize(dp->i_mount, sfp, args->namelen); +	entsize = dp->d_ops->sf_entsize(sfp, args->namelen);  	newsize = oldsize - entsize;  	/*  	 * Copy the part if any after the removed entry, sliding it down. @@ -1041,28 +959,25 @@ xfs_dir2_sf_replace(  	if (args->namelen == 2 &&  	    args->name[0] == '.' && args->name[1] == '.') {  #if XFS_BIG_INUMS || defined(DEBUG) -		ino = xfs_dir2_sf_get_parent_ino(sfp); +		ino = dp->d_ops->sf_get_parent_ino(sfp);  		ASSERT(args->inumber != ino);  #endif -		xfs_dir2_sf_put_parent_ino(sfp, args->inumber); +		dp->d_ops->sf_put_parent_ino(sfp, args->inumber);  	}  	/*  	 * Normal entry, look for the name.  	 */  	else {  		for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp); i < sfp->count; -		     i++, sfep = xfs_dir3_sf_nextentry(dp->i_mount, sfp, sfep)) { +		     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep)) {  			if (xfs_da_compname(args, sfep->name, sfep->namelen) ==  								XFS_CMP_EXACT) {  #if XFS_BIG_INUMS || defined(DEBUG) -				ino = xfs_dir3_sfe_get_ino(dp->i_mount, -							   sfp, sfep); +				ino = dp->d_ops->sf_get_ino(sfp, sfep);  				ASSERT(args->inumber != ino);  #endif -				xfs_dir3_sfe_put_ino(dp->i_mount, sfp, sfep, -						     args->inumber); -				xfs_dir3_sfe_put_ftype(dp->i_mount, sfp, sfep, -						       args->filetype); +				dp->d_ops->sf_put_ino(sfp, sfep, args->inumber); +				dp->d_ops->sf_put_ftype(sfep, args->filetype);  				break;  			}  		} @@ -1165,22 +1080,21 @@ xfs_dir2_sf_toino4(  	 */  	sfp->count = oldsfp->count;  	sfp->i8count = 0; -	xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); +	dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));  	/*  	 * Copy the entries field by field.  	 */  	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),  		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);  	     i < sfp->count; -	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep), -		  oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) { +	     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), +		  oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {  		sfep->namelen = oldsfep->namelen;  		sfep->offset = oldsfep->offset;  		memcpy(sfep->name, oldsfep->name, sfep->namelen); -		xfs_dir3_sfe_put_ino(mp, sfp, sfep, -			xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep)); -		xfs_dir3_sfe_put_ftype(mp, sfp, sfep, -			xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep)); +		dp->d_ops->sf_put_ino(sfp, sfep, +				      dp->d_ops->sf_get_ino(oldsfp, oldsfep)); +		dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));  	}  	/*  	 * Clean up the inode. @@ -1191,9 +1105,9 @@ xfs_dir2_sf_toino4(  }  /* - * Convert from 4-byte inode numbers to 8-byte inode numbers. - * The new 8-byte inode number is not there yet, we leave with the - * count 1 but no corresponding entry. + * Convert existing entries from 4-byte inode numbers to 8-byte inode numbers. + * The new entry w/ an 8-byte inode number is not there yet; we leave with + * i8count set to 1, but no corresponding 8-byte entry.   */  static void  xfs_dir2_sf_toino8( @@ -1226,7 +1140,7 @@ xfs_dir2_sf_toino8(  	ASSERT(oldsfp->i8count == 0);  	memcpy(buf, oldsfp, oldsize);  	/* -	 * Compute the new inode size. +	 * Compute the new inode size (nb: entry count + 1 for parent)  	 */  	newsize =  		oldsize + @@ -1244,22 +1158,21 @@ xfs_dir2_sf_toino8(  	 */  	sfp->count = oldsfp->count;  	sfp->i8count = 1; -	xfs_dir2_sf_put_parent_ino(sfp, xfs_dir2_sf_get_parent_ino(oldsfp)); +	dp->d_ops->sf_put_parent_ino(sfp, dp->d_ops->sf_get_parent_ino(oldsfp));  	/*  	 * Copy the entries field by field.  	 */  	for (i = 0, sfep = xfs_dir2_sf_firstentry(sfp),  		    oldsfep = xfs_dir2_sf_firstentry(oldsfp);  	     i < sfp->count; -	     i++, sfep = xfs_dir3_sf_nextentry(mp, sfp, sfep), -		  oldsfep = xfs_dir3_sf_nextentry(mp, oldsfp, oldsfep)) { +	     i++, sfep = dp->d_ops->sf_nextentry(sfp, sfep), +		  oldsfep = dp->d_ops->sf_nextentry(oldsfp, oldsfep)) {  		sfep->namelen = oldsfep->namelen;  		sfep->offset = oldsfep->offset;  		memcpy(sfep->name, oldsfep->name, sfep->namelen); -		xfs_dir3_sfe_put_ino(mp, sfp, sfep, -			xfs_dir3_sfe_get_ino(mp, oldsfp, oldsfep)); -		xfs_dir3_sfe_put_ftype(mp, sfp, sfep, -			xfs_dir3_sfe_get_ftype(mp, oldsfp, oldsfep)); +		dp->d_ops->sf_put_ino(sfp, sfep, +				      dp->d_ops->sf_get_ino(oldsfp, oldsfep)); +		dp->d_ops->sf_put_ftype(sfep, dp->d_ops->sf_get_ftype(oldsfep));  	}  	/*  	 * Clean up the inode. diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 45560ee1a4b..4f11ef01113 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -17,22 +17,21 @@   */  #include "xfs.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h"  #include "xfs_quota.h" -#include "xfs_alloc_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h"  #include "xfs_inode.h" +#include "xfs_btree.h" +#include "xfs_alloc_btree.h"  #include "xfs_alloc.h"  #include "xfs_error.h"  #include "xfs_extent_busy.h"  #include "xfs_discard.h"  #include "xfs_trace.h" +#include "xfs_log.h"  STATIC int  xfs_trim_extents( @@ -158,7 +157,7 @@ xfs_ioc_trim(  	struct xfs_mount		*mp,  	struct fstrim_range __user	*urange)  { -	struct request_queue	*q = mp->m_ddev_targp->bt_bdev->bd_disk->queue; +	struct request_queue	*q = bdev_get_queue(mp->m_ddev_targp->bt_bdev);  	unsigned int		granularity = q->limits.discard_granularity;  	struct fstrim_range	range;  	xfs_daddr_t		start, end, minlen; @@ -181,7 +180,8 @@ xfs_ioc_trim(  	 * matter as trimming blocks is an advisory interface.  	 */  	if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || -	    range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp))) +	    range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || +	    range.len < mp->m_sb.sb_blocksize)  		return -XFS_ERROR(EINVAL);  	start = BTOBB(range.start); diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 71520e6e5d6..3ee0cd43edc 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -18,28 +18,28 @@  #include "xfs.h"  #include "xfs_fs.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" +#include "xfs_alloc.h" +#include "xfs_quota.h"  #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_trans_space.h"  #include "xfs_trans_priv.h"  #include "xfs_qm.h"  #include "xfs_cksum.h"  #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_bmap_btree.h"  /*   * Lock order: @@ -64,7 +64,8 @@ int xfs_dqerror_mod = 33;  struct kmem_zone		*xfs_qm_dqtrxzone;  static struct kmem_zone		*xfs_qm_dqzone; -static struct lock_class_key xfs_dquot_other_class; +static struct lock_class_key xfs_dquot_group_class; +static struct lock_class_key xfs_dquot_project_class;  /*   * This is called to free all the memory associated with a dquot @@ -291,118 +292,6 @@ xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp)  	dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5;  } -STATIC bool -xfs_dquot_buf_verify_crc( -	struct xfs_mount	*mp, -	struct xfs_buf		*bp) -{ -	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr; -	int			ndquots; -	int			i; - -	if (!xfs_sb_version_hascrc(&mp->m_sb)) -		return true; - -	/* -	 * if we are in log recovery, the quota subsystem has not been -	 * initialised so we have no quotainfo structure. In that case, we need -	 * to manually calculate the number of dquots in the buffer. -	 */ -	if (mp->m_quotainfo) -		ndquots = mp->m_quotainfo->qi_dqperchunk; -	else -		ndquots = xfs_qm_calc_dquots_per_chunk(mp, -					XFS_BB_TO_FSB(mp, bp->b_length)); - -	for (i = 0; i < ndquots; i++, d++) { -		if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), -				 XFS_DQUOT_CRC_OFF)) -			return false; -		if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) -			return false; -	} -	return true; -} - -STATIC bool -xfs_dquot_buf_verify( -	struct xfs_mount	*mp, -	struct xfs_buf		*bp) -{ -	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr; -	xfs_dqid_t		id = 0; -	int			ndquots; -	int			i; - -	/* -	 * if we are in log recovery, the quota subsystem has not been -	 * initialised so we have no quotainfo structure. In that case, we need -	 * to manually calculate the number of dquots in the buffer. -	 */ -	if (mp->m_quotainfo) -		ndquots = mp->m_quotainfo->qi_dqperchunk; -	else -		ndquots = xfs_qm_calc_dquots_per_chunk(mp, bp->b_length); - -	/* -	 * On the first read of the buffer, verify that each dquot is valid. -	 * We don't know what the id of the dquot is supposed to be, just that -	 * they should be increasing monotonically within the buffer. If the -	 * first id is corrupt, then it will fail on the second dquot in the -	 * buffer so corruptions could point to the wrong dquot in this case. -	 */ -	for (i = 0; i < ndquots; i++) { -		struct xfs_disk_dquot	*ddq; -		int			error; - -		ddq = &d[i].dd_diskdq; - -		if (i == 0) -			id = be32_to_cpu(ddq->d_id); - -		error = xfs_qm_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, -				       "xfs_dquot_buf_verify"); -		if (error) -			return false; -	} -	return true; -} - -static void -xfs_dquot_buf_read_verify( -	struct xfs_buf	*bp) -{ -	struct xfs_mount	*mp = bp->b_target->bt_mount; - -	if (!xfs_dquot_buf_verify_crc(mp, bp) || !xfs_dquot_buf_verify(mp, bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} -} - -/* - * we don't calculate the CRC here as that is done when the dquot is flushed to - * the buffer after the update is done. This ensures that the dquot in the - * buffer always has an up-to-date CRC value. - */ -void -xfs_dquot_buf_write_verify( -	struct xfs_buf	*bp) -{ -	struct xfs_mount	*mp = bp->b_target->bt_mount; - -	if (!xfs_dquot_buf_verify(mp, bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); -		xfs_buf_ioerror(bp, EFSCORRUPTED); -		return; -	} -} - -const struct xfs_buf_ops xfs_dquot_buf_ops = { -	.verify_read = xfs_dquot_buf_read_verify, -	.verify_write = xfs_dquot_buf_write_verify, -}; -  /*   * Allocate a block and fill it with dquots.   * This is called when the bmapi finds a hole. @@ -464,10 +353,10 @@ xfs_qm_dqalloc(  			       dqp->q_blkno,  			       mp->m_quotainfo->qi_dqchunklen,  			       0); - -	error = xfs_buf_geterror(bp); -	if (error) +	if (!bp) { +		error = ENOMEM;  		goto error1; +	}  	bp->b_ops = &xfs_dquot_buf_ops;  	/* @@ -513,6 +402,7 @@ xfs_qm_dqalloc(  	return (error);  } +  STATIC int  xfs_qm_dqrepair(  	struct xfs_mount	*mp, @@ -546,7 +436,7 @@ xfs_qm_dqrepair(  	/* Do the actual repair of dquots in this buffer */  	for (i = 0; i < mp->m_quotainfo->qi_dqperchunk; i++) {  		ddq = &d[i].dd_diskdq; -		error = xfs_qm_dqcheck(mp, ddq, firstid + i, +		error = xfs_dqcheck(mp, ddq, firstid + i,  				       dqp->dq_flags & XFS_DQ_ALLTYPES,  				       XFS_QMOPT_DQREPAIR, "xfs_qm_dqrepair");  		if (error) { @@ -579,16 +469,17 @@ xfs_qm_dqtobp(  	struct xfs_mount	*mp = dqp->q_mount;  	xfs_dqid_t		id = be32_to_cpu(dqp->q_core.d_id);  	struct xfs_trans	*tp = (tpp ? *tpp : NULL); +	uint			lock_mode;  	dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; -	xfs_ilock(quotip, XFS_ILOCK_SHARED); +	lock_mode = xfs_ilock_data_map_shared(quotip);  	if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) {  		/*  		 * Return if this type of quotas is turned off while we  		 * didn't have the quota inode lock.  		 */ -		xfs_iunlock(quotip, XFS_ILOCK_SHARED); +		xfs_iunlock(quotip, lock_mode);  		return ESRCH;  	} @@ -598,7 +489,7 @@ xfs_qm_dqtobp(  	error = xfs_bmapi_read(quotip, dqp->q_fileoffset,  			       XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); -	xfs_iunlock(quotip, XFS_ILOCK_SHARED); +	xfs_iunlock(quotip, lock_mode);  	if (error)  		return error; @@ -703,8 +594,20 @@ xfs_qm_dqread(  	 * Make sure group quotas have a different lock class than user  	 * quotas.  	 */ -	if (!(type & XFS_DQ_USER)) -		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_other_class); +	switch (type) { +	case XFS_DQ_USER: +		/* uses the default lock class */ +		break; +	case XFS_DQ_GROUP: +		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); +		break; +	case XFS_DQ_PROJ: +		lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); +		break; +	default: +		ASSERT(0); +		break; +	}  	XFS_STATS_INC(xs_qm_dquot); @@ -712,7 +615,7 @@ xfs_qm_dqread(  	if (flags & XFS_QMOPT_DQALLOC) {  		tp = xfs_trans_alloc(mp, XFS_TRANS_QM_DQALLOC); -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_attrsetm, +		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_dqalloc,  					  XFS_QM_DQALLOC_SPACE_RES(mp), 0);  		if (error)  			goto error1; @@ -929,47 +832,6 @@ restart:  	return (0);  } - -STATIC void -xfs_qm_dqput_final( -	struct xfs_dquot	*dqp) -{ -	struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo; -	struct xfs_dquot	*gdqp; -	struct xfs_dquot	*pdqp; - -	trace_xfs_dqput_free(dqp); - -	if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) -		XFS_STATS_INC(xs_qm_dquot_unused); - -	/* -	 * If we just added a udquot to the freelist, then we want to release -	 * the gdquot/pdquot reference that it (probably) has. Otherwise it'll -	 * keep the gdquot/pdquot from getting reclaimed. -	 */ -	gdqp = dqp->q_gdquot; -	if (gdqp) { -		xfs_dqlock(gdqp); -		dqp->q_gdquot = NULL; -	} - -	pdqp = dqp->q_pdquot; -	if (pdqp) { -		xfs_dqlock(pdqp); -		dqp->q_pdquot = NULL; -	} -	xfs_dqunlock(dqp); - -	/* -	 * If we had a group/project quota hint, release it now. -	 */ -	if (gdqp) -		xfs_qm_dqput(gdqp); -	if (pdqp) -		xfs_qm_dqput(pdqp); -} -  /*   * Release a reference to the dquot (decrement ref-count) and unlock it.   * @@ -985,10 +847,14 @@ xfs_qm_dqput(  	trace_xfs_dqput(dqp); -	if (--dqp->q_nrefs > 0) -		xfs_dqunlock(dqp); -	else -		xfs_qm_dqput_final(dqp); +	if (--dqp->q_nrefs == 0) { +		struct xfs_quotainfo	*qi = dqp->q_mount->m_quotainfo; +		trace_xfs_dqput_free(dqp); + +		if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) +			XFS_STATS_INC(xs_qm_dquot_unused); +	} +	xfs_dqunlock(dqp);  }  /* @@ -1120,7 +986,7 @@ xfs_qm_dqflush(  	/*  	 * A simple sanity check in case we got a corrupted dquot..  	 */ -	error = xfs_qm_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0, +	error = xfs_dqcheck(mp, &dqp->q_core, be32_to_cpu(ddqp->d_id), 0,  			   XFS_QMOPT_DOWARN, "dqflush (incore copy)");  	if (error) {  		xfs_buf_relse(bp); diff --git a/fs/xfs/xfs_dquot.h b/fs/xfs/xfs_dquot.h index 55abbca2883..68a68f70483 100644 --- a/fs/xfs/xfs_dquot.h +++ b/fs/xfs/xfs_dquot.h @@ -52,8 +52,6 @@ typedef struct xfs_dquot {  	int		 q_bufoffset;	/* off of dq in buffer (# dquots) */  	xfs_fileoff_t	 q_fileoffset;	/* offset in quotas file */ -	struct xfs_dquot*q_gdquot;	/* group dquot, hint only */ -	struct xfs_dquot*q_pdquot;	/* project dquot, hint only */  	xfs_disk_dquot_t q_core;	/* actual usage & quotas */  	xfs_dq_logitem_t q_logitem;	/* dquot log item */  	xfs_qcnt_t	 q_res_bcount;	/* total regular nblks used+reserved */ @@ -172,6 +170,4 @@ static inline struct xfs_dquot *xfs_qm_dqhold(struct xfs_dquot *dqp)  	return dqp;  } -extern const struct xfs_buf_ops xfs_dquot_buf_ops; -  #endif /* __XFS_DQUOT_H__ */ diff --git a/fs/xfs/xfs_dquot_buf.c b/fs/xfs/xfs_dquot_buf.c new file mode 100644 index 00000000000..c2ac0c611ad --- /dev/null +++ b/fs/xfs/xfs_dquot_buf.c @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2000-2006 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_trans.h" +#include "xfs_qm.h" +#include "xfs_error.h" +#include "xfs_cksum.h" +#include "xfs_trace.h" + +int +xfs_calc_dquots_per_chunk( +	unsigned int		nbblks)	/* basic block units */ +{ +	unsigned int	ndquots; + +	ASSERT(nbblks > 0); +	ndquots = BBTOB(nbblks); +	do_div(ndquots, sizeof(xfs_dqblk_t)); + +	return ndquots; +} + +/* + * Do some primitive error checking on ondisk dquot data structures. + */ +int +xfs_dqcheck( +	struct xfs_mount *mp, +	xfs_disk_dquot_t *ddq, +	xfs_dqid_t	 id, +	uint		 type,	  /* used only when IO_dorepair is true */ +	uint		 flags, +	char		 *str) +{ +	xfs_dqblk_t	 *d = (xfs_dqblk_t *)ddq; +	int		errs = 0; + +	/* +	 * We can encounter an uninitialized dquot buffer for 2 reasons: +	 * 1. If we crash while deleting the quotainode(s), and those blks got +	 *    used for user data. This is because we take the path of regular +	 *    file deletion; however, the size field of quotainodes is never +	 *    updated, so all the tricks that we play in itruncate_finish +	 *    don't quite matter. +	 * +	 * 2. We don't play the quota buffers when there's a quotaoff logitem. +	 *    But the allocation will be replayed so we'll end up with an +	 *    uninitialized quota block. +	 * +	 * This is all fine; things are still consistent, and we haven't lost +	 * any quota information. Just don't complain about bad dquot blks. +	 */ +	if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { +		if (flags & XFS_QMOPT_DOWARN) +			xfs_alert(mp, +			"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", +			str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); +		errs++; +	} +	if (ddq->d_version != XFS_DQUOT_VERSION) { +		if (flags & XFS_QMOPT_DOWARN) +			xfs_alert(mp, +			"%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", +			str, id, ddq->d_version, XFS_DQUOT_VERSION); +		errs++; +	} + +	if (ddq->d_flags != XFS_DQ_USER && +	    ddq->d_flags != XFS_DQ_PROJ && +	    ddq->d_flags != XFS_DQ_GROUP) { +		if (flags & XFS_QMOPT_DOWARN) +			xfs_alert(mp, +			"%s : XFS dquot ID 0x%x, unknown flags 0x%x", +			str, id, ddq->d_flags); +		errs++; +	} + +	if (id != -1 && id != be32_to_cpu(ddq->d_id)) { +		if (flags & XFS_QMOPT_DOWARN) +			xfs_alert(mp, +			"%s : ondisk-dquot 0x%p, ID mismatch: " +			"0x%x expected, found id 0x%x", +			str, ddq, id, be32_to_cpu(ddq->d_id)); +		errs++; +	} + +	if (!errs && ddq->d_id) { +		if (ddq->d_blk_softlimit && +		    be64_to_cpu(ddq->d_bcount) > +				be64_to_cpu(ddq->d_blk_softlimit)) { +			if (!ddq->d_btimer) { +				if (flags & XFS_QMOPT_DOWARN) +					xfs_alert(mp, +			"%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", +					str, (int)be32_to_cpu(ddq->d_id), ddq); +				errs++; +			} +		} +		if (ddq->d_ino_softlimit && +		    be64_to_cpu(ddq->d_icount) > +				be64_to_cpu(ddq->d_ino_softlimit)) { +			if (!ddq->d_itimer) { +				if (flags & XFS_QMOPT_DOWARN) +					xfs_alert(mp, +			"%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", +					str, (int)be32_to_cpu(ddq->d_id), ddq); +				errs++; +			} +		} +		if (ddq->d_rtb_softlimit && +		    be64_to_cpu(ddq->d_rtbcount) > +				be64_to_cpu(ddq->d_rtb_softlimit)) { +			if (!ddq->d_rtbtimer) { +				if (flags & XFS_QMOPT_DOWARN) +					xfs_alert(mp, +			"%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", +					str, (int)be32_to_cpu(ddq->d_id), ddq); +				errs++; +			} +		} +	} + +	if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) +		return errs; + +	if (flags & XFS_QMOPT_DOWARN) +		xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); + +	/* +	 * Typically, a repair is only requested by quotacheck. +	 */ +	ASSERT(id != -1); +	ASSERT(flags & XFS_QMOPT_DQREPAIR); +	memset(d, 0, sizeof(xfs_dqblk_t)); + +	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); +	d->dd_diskdq.d_version = XFS_DQUOT_VERSION; +	d->dd_diskdq.d_flags = type; +	d->dd_diskdq.d_id = cpu_to_be32(id); + +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); +		xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), +				 XFS_DQUOT_CRC_OFF); +	} + +	return errs; +} + +STATIC bool +xfs_dquot_buf_verify_crc( +	struct xfs_mount	*mp, +	struct xfs_buf		*bp) +{ +	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr; +	int			ndquots; +	int			i; + +	if (!xfs_sb_version_hascrc(&mp->m_sb)) +		return true; + +	/* +	 * if we are in log recovery, the quota subsystem has not been +	 * initialised so we have no quotainfo structure. In that case, we need +	 * to manually calculate the number of dquots in the buffer. +	 */ +	if (mp->m_quotainfo) +		ndquots = mp->m_quotainfo->qi_dqperchunk; +	else +		ndquots = xfs_calc_dquots_per_chunk( +					XFS_BB_TO_FSB(mp, bp->b_length)); + +	for (i = 0; i < ndquots; i++, d++) { +		if (!xfs_verify_cksum((char *)d, sizeof(struct xfs_dqblk), +				 XFS_DQUOT_CRC_OFF)) +			return false; +		if (!uuid_equal(&d->dd_uuid, &mp->m_sb.sb_uuid)) +			return false; +	} +	return true; +} + +STATIC bool +xfs_dquot_buf_verify( +	struct xfs_mount	*mp, +	struct xfs_buf		*bp) +{ +	struct xfs_dqblk	*d = (struct xfs_dqblk *)bp->b_addr; +	xfs_dqid_t		id = 0; +	int			ndquots; +	int			i; + +	/* +	 * if we are in log recovery, the quota subsystem has not been +	 * initialised so we have no quotainfo structure. In that case, we need +	 * to manually calculate the number of dquots in the buffer. +	 */ +	if (mp->m_quotainfo) +		ndquots = mp->m_quotainfo->qi_dqperchunk; +	else +		ndquots = xfs_calc_dquots_per_chunk(bp->b_length); + +	/* +	 * On the first read of the buffer, verify that each dquot is valid. +	 * We don't know what the id of the dquot is supposed to be, just that +	 * they should be increasing monotonically within the buffer. If the +	 * first id is corrupt, then it will fail on the second dquot in the +	 * buffer so corruptions could point to the wrong dquot in this case. +	 */ +	for (i = 0; i < ndquots; i++) { +		struct xfs_disk_dquot	*ddq; +		int			error; + +		ddq = &d[i].dd_diskdq; + +		if (i == 0) +			id = be32_to_cpu(ddq->d_id); + +		error = xfs_dqcheck(mp, ddq, id + i, 0, XFS_QMOPT_DOWARN, +				       "xfs_dquot_buf_verify"); +		if (error) +			return false; +	} +	return true; +} + +static void +xfs_dquot_buf_read_verify( +	struct xfs_buf	*bp) +{ +	struct xfs_mount	*mp = bp->b_target->bt_mount; + +	if (!xfs_dquot_buf_verify_crc(mp, bp)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_dquot_buf_verify(mp, bp)) +		xfs_buf_ioerror(bp, EFSCORRUPTED); + +	if (bp->b_error) +		xfs_verifier_error(bp); +} + +/* + * we don't calculate the CRC here as that is done when the dquot is flushed to + * the buffer after the update is done. This ensures that the dquot in the + * buffer always has an up-to-date CRC value. + */ +static void +xfs_dquot_buf_write_verify( +	struct xfs_buf	*bp) +{ +	struct xfs_mount	*mp = bp->b_target->bt_mount; + +	if (!xfs_dquot_buf_verify(mp, bp)) { +		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp); +		return; +	} +} + +const struct xfs_buf_ops xfs_dquot_buf_ops = { +	.verify_read = xfs_dquot_buf_read_verify, +	.verify_write = xfs_dquot_buf_write_verify, +}; + diff --git a/fs/xfs/xfs_dquot_item.c b/fs/xfs/xfs_dquot_item.c index e838d84b4e8..f33fbaaa4d8 100644 --- a/fs/xfs/xfs_dquot_item.c +++ b/fs/xfs/xfs_dquot_item.c @@ -18,23 +18,19 @@  #include "xfs.h"  #include "xfs_fs.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_quota.h"  #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_trans_priv.h"  #include "xfs_qm.h" +#include "xfs_log.h"  static inline struct xfs_dq_logitem *DQUOT_ITEM(struct xfs_log_item *lip)  { @@ -61,20 +57,24 @@ xfs_qm_dquot_logitem_size(  STATIC void  xfs_qm_dquot_logitem_format(  	struct xfs_log_item	*lip, -	struct xfs_log_iovec	*logvec) +	struct xfs_log_vec	*lv)  {  	struct xfs_dq_logitem	*qlip = DQUOT_ITEM(lip); - -	logvec->i_addr = &qlip->qli_format; -	logvec->i_len  = sizeof(xfs_dq_logformat_t); -	logvec->i_type = XLOG_REG_TYPE_QFORMAT; -	logvec++; -	logvec->i_addr = &qlip->qli_dquot->q_core; -	logvec->i_len  = sizeof(xfs_disk_dquot_t); -	logvec->i_type = XLOG_REG_TYPE_DQUOT; - -	qlip->qli_format.qlf_size = 2; - +	struct xfs_log_iovec	*vecp = NULL; +	struct xfs_dq_logformat	*qlf; + +	qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QFORMAT); +	qlf->qlf_type = XFS_LI_DQUOT; +	qlf->qlf_size = 2; +	qlf->qlf_id = be32_to_cpu(qlip->qli_dquot->q_core.d_id); +	qlf->qlf_blkno = qlip->qli_dquot->q_blkno; +	qlf->qlf_len = 1; +	qlf->qlf_boffset = qlip->qli_dquot->q_bufoffset; +	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_dq_logformat)); + +	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_DQUOT, +			&qlip->qli_dquot->q_core, +			sizeof(struct xfs_disk_dquot));  }  /* @@ -261,18 +261,6 @@ xfs_qm_dquot_logitem_init(  	xfs_log_item_init(dqp->q_mount, &lp->qli_item, XFS_LI_DQUOT,  					&xfs_dquot_item_ops);  	lp->qli_dquot = dqp; -	lp->qli_format.qlf_type = XFS_LI_DQUOT; -	lp->qli_format.qlf_id = be32_to_cpu(dqp->q_core.d_id); -	lp->qli_format.qlf_blkno = dqp->q_blkno; -	lp->qli_format.qlf_len = 1; -	/* -	 * This is just the offset of this dquot within its buffer -	 * (which is currently 1 FSB and probably won't change). -	 * Hence 32 bits for this offset should be just fine. -	 * Alternatively, we can store (bufoffset / sizeof(xfs_dqblk_t)) -	 * here, and recompute it at recovery time. -	 */ -	lp->qli_format.qlf_boffset = (__uint32_t)dqp->q_bufoffset;  }  /*------------------  QUOTAOFF LOG ITEMS  -------------------*/ @@ -298,26 +286,20 @@ xfs_qm_qoff_logitem_size(  	*nbytes += sizeof(struct xfs_qoff_logitem);  } -/* - * This is called to fill in the vector of log iovecs for the - * given quotaoff log item. We use only 1 iovec, and we point that - * at the quotaoff_log_format structure embedded in the quotaoff item. - * It is at this point that we assert that all of the extent - * slots in the quotaoff item have been filled. - */  STATIC void  xfs_qm_qoff_logitem_format(  	struct xfs_log_item	*lip, -	struct xfs_log_iovec	*log_vector) +	struct xfs_log_vec	*lv)  {  	struct xfs_qoff_logitem	*qflip = QOFF_ITEM(lip); - -	ASSERT(qflip->qql_format.qf_type == XFS_LI_QUOTAOFF); - -	log_vector->i_addr = &qflip->qql_format; -	log_vector->i_len = sizeof(xfs_qoff_logitem_t); -	log_vector->i_type = XLOG_REG_TYPE_QUOTAOFF; -	qflip->qql_format.qf_size = 1; +	struct xfs_log_iovec	*vecp = NULL; +	struct xfs_qoff_logformat *qlf; + +	qlf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_QUOTAOFF); +	qlf->qf_type = XFS_LI_QUOTAOFF; +	qlf->qf_size = 1; +	qlf->qf_flags = qflip->qql_flags; +	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_qoff_logitem));  }  /* @@ -457,8 +439,7 @@ xfs_qm_qoff_logitem_init(  	xfs_log_item_init(mp, &qf->qql_item, XFS_LI_QUOTAOFF, start ?  			&xfs_qm_qoffend_logitem_ops : &xfs_qm_qoff_logitem_ops);  	qf->qql_item.li_mountp = mp; -	qf->qql_format.qf_type = XFS_LI_QUOTAOFF; -	qf->qql_format.qf_flags = flags;  	qf->qql_start_lip = start; +	qf->qql_flags = flags;  	return qf;  } diff --git a/fs/xfs/xfs_dquot_item.h b/fs/xfs/xfs_dquot_item.h index 5acae2ada70..502e9464634 100644 --- a/fs/xfs/xfs_dquot_item.h +++ b/fs/xfs/xfs_dquot_item.h @@ -27,13 +27,12 @@ typedef struct xfs_dq_logitem {  	xfs_log_item_t		 qli_item;	   /* common portion */  	struct xfs_dquot	*qli_dquot;	   /* dquot ptr */  	xfs_lsn_t		 qli_flush_lsn;	   /* lsn at last flush */ -	xfs_dq_logformat_t	 qli_format;	   /* logged structure */  } xfs_dq_logitem_t;  typedef struct xfs_qoff_logitem {  	xfs_log_item_t		 qql_item;	/* common portion */  	struct xfs_qoff_logitem *qql_start_lip; /* qoff-start logitem, if any */ -	xfs_qoff_logformat_t	 qql_format;	/* logged structure */ +	unsigned int		qql_flags;  } xfs_qoff_logitem_t; diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c index 1123d93ff79..edac5b057d2 100644 --- a/fs/xfs/xfs_error.c +++ b/fs/xfs/xfs_error.c @@ -16,16 +16,13 @@   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */  #include "xfs.h" +#include "xfs_format.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" -#include "xfs_inode.h"  #include "xfs_error.h"  #ifdef DEBUG @@ -159,7 +156,7 @@ xfs_error_report(  {  	if (level <= xfs_error_level) {  		xfs_alert_tag(mp, XFS_PTAG_ERROR_REPORT, -		"Internal error %s at line %d of file %s.  Caller 0x%p\n", +		"Internal error %s at line %d of file %s.  Caller %pF",  			    tag, linenum, filename, ra);  		xfs_stack_trace(); @@ -181,3 +178,28 @@ xfs_corruption_error(  	xfs_error_report(tag, level, mp, filename, linenum, ra);  	xfs_alert(mp, "Corruption detected. Unmount and run xfs_repair");  } + +/* + * Warnings specifically for verifier errors.  Differentiate CRC vs. invalid + * values, and omit the stack trace unless the error level is tuned high. + */ +void +xfs_verifier_error( +	struct xfs_buf		*bp) +{ +	struct xfs_mount *mp = bp->b_target->bt_mount; + +	xfs_alert(mp, "Metadata %s detected at %pF, block 0x%llx", +		  bp->b_error == EFSBADCRC ? "CRC error" : "corruption", +		  __return_address, bp->b_bn); + +	xfs_alert(mp, "Unmount and run xfs_repair"); + +	if (xfs_error_level >= XFS_ERRLEVEL_LOW) { +		xfs_alert(mp, "First 64 bytes of corrupted metadata buffer:"); +		xfs_hex_dump(xfs_buf_offset(bp, 0), 64); +	} + +	if (xfs_error_level >= XFS_ERRLEVEL_HIGH) +		xfs_stack_trace(); +} diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 079a367f44e..c1c57d4a4b5 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -34,6 +34,7 @@ extern void xfs_error_report(const char *tag, int level, struct xfs_mount *mp,  extern void xfs_corruption_error(const char *tag, int level,  			struct xfs_mount *mp, void *p, const char *filename,  			int linenum, inst_t *ra); +extern void xfs_verifier_error(struct xfs_buf *bp);  #define	XFS_ERROR_REPORT(e, lvl, mp)	\  	xfs_error_report(e, lvl, mp, __FILE__, __LINE__, __return_address) diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c index 066df425c14..753e467aa1a 100644 --- a/fs/xfs/xfs_export.c +++ b/fs/xfs/xfs_export.c @@ -16,21 +16,21 @@   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */  #include "xfs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" +#include "xfs_da_format.h"  #include "xfs_dir2.h"  #include "xfs_export.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_trace.h"  #include "xfs_icache.h" +#include "xfs_log.h"  /*   * Note that we only accept fileids which are long enough rather than allow @@ -237,7 +237,7 @@ xfs_fs_nfs_commit_metadata(  	if (!lsn)  		return 0; -	return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); +	return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);  }  const struct export_operations xfs_export_operations = { diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c index e43708e2f08..fd22f69049d 100644 --- a/fs/xfs/xfs_extent_busy.c +++ b/fs/xfs/xfs_extent_busy.c @@ -19,17 +19,18 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_alloc.h" -#include "xfs_inode.h"  #include "xfs_extent_busy.h"  #include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_log.h"  void  xfs_extent_busy_insert( diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h index 985412d65ba..bfff284d2dc 100644 --- a/fs/xfs/xfs_extent_busy.h +++ b/fs/xfs/xfs_extent_busy.h @@ -20,6 +20,10 @@  #ifndef __XFS_EXTENT_BUSY_H__  #define	__XFS_EXTENT_BUSY_H__ +struct xfs_mount; +struct xfs_trans; +struct xfs_alloc_arg; +  /*   * Busy block/extent entry.  Indexed by a rbtree in perag to mark blocks that   * have been freed but whose transactions aren't committed to disk yet. diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index dc53e8febbb..fb7a4c1ce1c 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -17,15 +17,16 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_buf_item.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_trans.h"  #include "xfs_trans_priv.h" +#include "xfs_buf_item.h"  #include "xfs_extfree_item.h" +#include "xfs_log.h"  kmem_zone_t	*xfs_efi_zone; @@ -101,9 +102,10 @@ xfs_efi_item_size(  STATIC void  xfs_efi_item_format(  	struct xfs_log_item	*lip, -	struct xfs_log_iovec	*log_vector) +	struct xfs_log_vec	*lv)  {  	struct xfs_efi_log_item	*efip = EFI_ITEM(lip); +	struct xfs_log_iovec	*vecp = NULL;  	ASSERT(atomic_read(&efip->efi_next_extent) ==  				efip->efi_format.efi_nextents); @@ -111,10 +113,9 @@ xfs_efi_item_format(  	efip->efi_format.efi_type = XFS_LI_EFI;  	efip->efi_format.efi_size = 1; -	log_vector->i_addr = &efip->efi_format; -	log_vector->i_len = xfs_efi_item_sizeof(efip); -	log_vector->i_type = XLOG_REG_TYPE_EFI_FORMAT; -	ASSERT(log_vector->i_len >= sizeof(xfs_efi_log_format_t)); +	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFI_FORMAT, +			&efip->efi_format, +			xfs_efi_item_sizeof(efip));  } @@ -368,19 +369,19 @@ xfs_efd_item_size(  STATIC void  xfs_efd_item_format(  	struct xfs_log_item	*lip, -	struct xfs_log_iovec	*log_vector) +	struct xfs_log_vec	*lv)  {  	struct xfs_efd_log_item	*efdp = EFD_ITEM(lip); +	struct xfs_log_iovec	*vecp = NULL;  	ASSERT(efdp->efd_next_extent == efdp->efd_format.efd_nextents);  	efdp->efd_format.efd_type = XFS_LI_EFD;  	efdp->efd_format.efd_size = 1; -	log_vector->i_addr = &efdp->efd_format; -	log_vector->i_len = xfs_efd_item_sizeof(efdp); -	log_vector->i_type = XLOG_REG_TYPE_EFD_FORMAT; -	ASSERT(log_vector->i_len >= sizeof(xfs_efd_log_format_t)); +	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_EFD_FORMAT, +			&efdp->efd_format, +			xfs_efd_item_sizeof(efdp));  }  /* diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 4c749ab543d..1f66779d7a4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -17,25 +17,27 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_trans.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h"  #include "xfs_error.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_dir2_priv.h"  #include "xfs_ioctl.h"  #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_dinode.h"  #include <linux/aio.h>  #include <linux/dcache.h> @@ -153,7 +155,7 @@ xfs_dir_fsync(  	if (!lsn)  		return 0; -	return _xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL); +	return -_xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, NULL);  }  STATIC int @@ -227,39 +229,33 @@ xfs_file_fsync(  }  STATIC ssize_t -xfs_file_aio_read( +xfs_file_read_iter(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos) +	struct iov_iter		*to)  {  	struct file		*file = iocb->ki_filp;  	struct inode		*inode = file->f_mapping->host;  	struct xfs_inode	*ip = XFS_I(inode);  	struct xfs_mount	*mp = ip->i_mount; -	size_t			size = 0; +	size_t			size = iov_iter_count(to);  	ssize_t			ret = 0;  	int			ioflags = 0;  	xfs_fsize_t		n; +	loff_t			pos = iocb->ki_pos;  	XFS_STATS_INC(xs_read_calls); -	BUG_ON(iocb->ki_pos != pos); -  	if (unlikely(file->f_flags & O_DIRECT))  		ioflags |= IO_ISDIRECT;  	if (file->f_mode & FMODE_NOCMTIME)  		ioflags |= IO_INVIS; -	ret = generic_segment_checks(iovp, &nr_segs, &size, VERIFY_WRITE); -	if (ret < 0) -		return ret; -  	if (unlikely(ioflags & IO_ISDIRECT)) {  		xfs_buftarg_t	*target =  			XFS_IS_REALTIME_INODE(ip) ?  				mp->m_rtdev_targp : mp->m_ddev_targp; -		if ((pos & target->bt_smask) || (size & target->bt_smask)) { +		/* DIO must be aligned to device logical sector size */ +		if ((pos | size) & target->bt_logical_sectormask) {  			if (pos == i_size_read(inode))  				return 0;  			return -XFS_ERROR(EINVAL); @@ -292,7 +288,7 @@ xfs_file_aio_read(  		xfs_rw_ilock(ip, XFS_IOLOCK_EXCL);  		if (inode->i_mapping->nrpages) { -			ret = -filemap_write_and_wait_range( +			ret = filemap_write_and_wait_range(  							VFS_I(ip)->i_mapping,  							pos, -1);  			if (ret) { @@ -306,7 +302,7 @@ xfs_file_aio_read(  	trace_xfs_file_read(ip, size, pos, ioflags); -	ret = generic_file_aio_read(iocb, iovp, nr_segs, pos); +	ret = generic_file_read_iter(iocb, to);  	if (ret > 0)  		XFS_STATS_ADD(xs_read_bytes, ret); @@ -347,47 +343,6 @@ xfs_file_splice_read(  }  /* - * xfs_file_splice_write() does not use xfs_rw_ilock() because - * generic_file_splice_write() takes the i_mutex itself. This, in theory, - * couuld cause lock inversions between the aio_write path and the splice path - * if someone is doing concurrent splice(2) based writes and write(2) based - * writes to the same inode. The only real way to fix this is to re-implement - * the generic code here with correct locking orders. - */ -STATIC ssize_t -xfs_file_splice_write( -	struct pipe_inode_info	*pipe, -	struct file		*outfilp, -	loff_t			*ppos, -	size_t			count, -	unsigned int		flags) -{ -	struct inode		*inode = outfilp->f_mapping->host; -	struct xfs_inode	*ip = XFS_I(inode); -	int			ioflags = 0; -	ssize_t			ret; - -	XFS_STATS_INC(xs_write_calls); - -	if (outfilp->f_mode & FMODE_NOCMTIME) -		ioflags |= IO_INVIS; - -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) -		return -EIO; - -	xfs_ilock(ip, XFS_IOLOCK_EXCL); - -	trace_xfs_file_splice_write(ip, count, *ppos, ioflags); - -	ret = generic_file_splice_write(pipe, outfilp, ppos, count, flags); -	if (ret > 0) -		XFS_STATS_ADD(xs_write_bytes, ret); - -	xfs_iunlock(ip, XFS_IOLOCK_EXCL); -	return ret; -} - -/*   * This routine is called to handle zeroing any space in the last block of the   * file that is beyond the EOF.  We do this since the size is being increased   * without writing anything to that block and we don't want to read the @@ -622,10 +577,7 @@ restart:  STATIC ssize_t  xfs_file_dio_aio_write(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos, -	size_t			ocount) +	struct iov_iter		*from)  {  	struct file		*file = iocb->ki_filp;  	struct address_space	*mapping = file->f_mapping; @@ -633,15 +585,18 @@ xfs_file_dio_aio_write(  	struct xfs_inode	*ip = XFS_I(inode);  	struct xfs_mount	*mp = ip->i_mount;  	ssize_t			ret = 0; -	size_t			count = ocount;  	int			unaligned_io = 0;  	int			iolock; +	size_t			count = iov_iter_count(from); +	loff_t			pos = iocb->ki_pos;  	struct xfs_buftarg	*target = XFS_IS_REALTIME_INODE(ip) ?  					mp->m_rtdev_targp : mp->m_ddev_targp; -	if ((pos & target->bt_smask) || (count & target->bt_smask)) +	/* DIO must be aligned to device logical sector size */ +	if ((pos | count) & target->bt_logical_sectormask)  		return -XFS_ERROR(EINVAL); +	/* "unaligned" here means not aligned to a filesystem block */  	if ((pos & mp->m_blockmask) || ((pos + count) & mp->m_blockmask))  		unaligned_io = 1; @@ -672,9 +627,10 @@ xfs_file_dio_aio_write(  	ret = xfs_file_aio_write_checks(file, &pos, &count, &iolock);  	if (ret)  		goto out; +	iov_iter_truncate(from, count);  	if (mapping->nrpages) { -		ret = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping, +		ret = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,  						    pos, -1);  		if (ret)  			goto out; @@ -693,8 +649,7 @@ xfs_file_dio_aio_write(  	}  	trace_xfs_file_direct_write(ip, count, iocb->ki_pos, 0); -	ret = generic_file_direct_write(iocb, iovp, -			&nr_segs, pos, &iocb->ki_pos, count, ocount); +	ret = generic_file_direct_write(iocb, from, pos);  out:  	xfs_rw_iunlock(ip, iolock); @@ -707,10 +662,7 @@ out:  STATIC ssize_t  xfs_file_buffered_aio_write(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos, -	size_t			ocount) +	struct iov_iter		*from)  {  	struct file		*file = iocb->ki_filp;  	struct address_space	*mapping = file->f_mapping; @@ -719,7 +671,8 @@ xfs_file_buffered_aio_write(  	ssize_t			ret;  	int			enospc = 0;  	int			iolock = XFS_IOLOCK_EXCL; -	size_t			count = ocount; +	loff_t			pos = iocb->ki_pos; +	size_t			count = iov_iter_count(from);  	xfs_rw_ilock(ip, iolock); @@ -727,14 +680,15 @@ xfs_file_buffered_aio_write(  	if (ret)  		goto out; +	iov_iter_truncate(from, count);  	/* We can write back this queue in page reclaim */  	current->backing_dev_info = mapping->backing_dev_info;  write_retry:  	trace_xfs_file_buffered_write(ip, count, iocb->ki_pos, 0); -	ret = generic_file_buffered_write(iocb, iovp, nr_segs, -			pos, &iocb->ki_pos, count, 0); - +	ret = generic_perform_write(file, from, pos); +	if (likely(ret >= 0)) +		iocb->ki_pos = pos + ret;  	/*  	 * If we just got an ENOSPC, try to write back all dirty inodes to  	 * convert delalloc space to free up some of the excess reserved @@ -753,40 +707,29 @@ out:  }  STATIC ssize_t -xfs_file_aio_write( +xfs_file_write_iter(  	struct kiocb		*iocb, -	const struct iovec	*iovp, -	unsigned long		nr_segs, -	loff_t			pos) +	struct iov_iter		*from)  {  	struct file		*file = iocb->ki_filp;  	struct address_space	*mapping = file->f_mapping;  	struct inode		*inode = mapping->host;  	struct xfs_inode	*ip = XFS_I(inode);  	ssize_t			ret; -	size_t			ocount = 0; +	size_t			ocount = iov_iter_count(from);  	XFS_STATS_INC(xs_write_calls); -	BUG_ON(iocb->ki_pos != pos); - -	ret = generic_segment_checks(iovp, &nr_segs, &ocount, VERIFY_READ); -	if (ret) -		return ret; -  	if (ocount == 0)  		return 0; -	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { -		ret = -EIO; -		goto out; -	} +	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) +		return -EIO;  	if (unlikely(file->f_flags & O_DIRECT)) -		ret = xfs_file_dio_aio_write(iocb, iovp, nr_segs, pos, ocount); +		ret = xfs_file_dio_aio_write(iocb, from);  	else -		ret = xfs_file_buffered_aio_write(iocb, iovp, nr_segs, pos, -						  ocount); +		ret = xfs_file_buffered_aio_write(iocb, from);  	if (ret > 0) {  		ssize_t err; @@ -794,55 +737,99 @@ xfs_file_aio_write(  		XFS_STATS_ADD(xs_write_bytes, ret);  		/* Handle various SYNC-type writes */ -		err = generic_write_sync(file, pos, ret); +		err = generic_write_sync(file, iocb->ki_pos - ret, ret);  		if (err < 0)  			ret = err;  	} - -out:  	return ret;  }  STATIC long  xfs_file_fallocate( -	struct file	*file, -	int		mode, -	loff_t		offset, -	loff_t		len) +	struct file		*file, +	int			mode, +	loff_t			offset, +	loff_t			len)  { -	struct inode	*inode = file_inode(file); -	long		error; -	loff_t		new_size = 0; -	xfs_flock64_t	bf; -	xfs_inode_t	*ip = XFS_I(inode); -	int		cmd = XFS_IOC_RESVSP; -	int		attr_flags = XFS_ATTR_NOLOCK; +	struct inode		*inode = file_inode(file); +	struct xfs_inode	*ip = XFS_I(inode); +	struct xfs_trans	*tp; +	long			error; +	loff_t			new_size = 0; -	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) +	if (!S_ISREG(inode->i_mode)) +		return -EINVAL; +	if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | +		     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))  		return -EOPNOTSUPP; -	bf.l_whence = 0; -	bf.l_start = offset; -	bf.l_len = len; -  	xfs_ilock(ip, XFS_IOLOCK_EXCL); +	if (mode & FALLOC_FL_PUNCH_HOLE) { +		error = xfs_free_file_space(ip, offset, len); +		if (error) +			goto out_unlock; +	} else if (mode & FALLOC_FL_COLLAPSE_RANGE) { +		unsigned blksize_mask = (1 << inode->i_blkbits) - 1; + +		if (offset & blksize_mask || len & blksize_mask) { +			error = EINVAL; +			goto out_unlock; +		} + +		/* +		 * There is no need to overlap collapse range with EOF, +		 * in which case it is effectively a truncate operation +		 */ +		if (offset + len >= i_size_read(inode)) { +			error = EINVAL; +			goto out_unlock; +		} + +		new_size = i_size_read(inode) - len; -	if (mode & FALLOC_FL_PUNCH_HOLE) -		cmd = XFS_IOC_UNRESVSP; +		error = xfs_collapse_file_space(ip, offset, len); +		if (error) +			goto out_unlock; +	} else { +		if (!(mode & FALLOC_FL_KEEP_SIZE) && +		    offset + len > i_size_read(inode)) { +			new_size = offset + len; +			error = -inode_newsize_ok(inode, new_size); +			if (error) +				goto out_unlock; +		} -	/* check the new inode size is valid before allocating */ -	if (!(mode & FALLOC_FL_KEEP_SIZE) && -	    offset + len > i_size_read(inode)) { -		new_size = offset + len; -		error = inode_newsize_ok(inode, new_size); +		if (mode & FALLOC_FL_ZERO_RANGE) +			error = xfs_zero_file_space(ip, offset, len); +		else +			error = xfs_alloc_file_space(ip, offset, len, +						     XFS_BMAPI_PREALLOC);  		if (error)  			goto out_unlock;  	} -	if (file->f_flags & O_DSYNC) -		attr_flags |= XFS_ATTR_SYNC; +	tp = xfs_trans_alloc(ip->i_mount, XFS_TRANS_WRITEID); +	error = xfs_trans_reserve(tp, &M_RES(ip->i_mount)->tr_writeid, 0, 0); +	if (error) { +		xfs_trans_cancel(tp, 0); +		goto out_unlock; +	} + +	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +	ip->i_d.di_mode &= ~S_ISUID; +	if (ip->i_d.di_mode & S_IXGRP) +		ip->i_d.di_mode &= ~S_ISGID; + +	if (!(mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE))) +		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; + +	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); -	error = -xfs_change_file_space(ip, cmd, &bf, 0, attr_flags); +	if (file->f_flags & O_DSYNC) +		xfs_trans_set_sync(tp); +	error = xfs_trans_commit(tp, 0);  	if (error)  		goto out_unlock; @@ -852,12 +839,12 @@ xfs_file_fallocate(  		iattr.ia_valid = ATTR_SIZE;  		iattr.ia_size = new_size; -		error = -xfs_setattr_size(ip, &iattr, XFS_ATTR_NOLOCK); +		error = xfs_setattr_size(ip, &iattr);  	}  out_unlock:  	xfs_iunlock(ip, XFS_IOLOCK_EXCL); -	return error; +	return -error;  } @@ -890,9 +877,9 @@ xfs_dir_open(  	 * If there are any blocks, read-ahead block 0 as we're almost  	 * certain to have the next operation be a read there.  	 */ -	mode = xfs_ilock_map_shared(ip); +	mode = xfs_ilock_data_map_shared(ip);  	if (ip->i_d.di_nextents > 0) -		xfs_dir3_data_readahead(NULL, ip, 0, -1); +		xfs_dir3_data_readahead(ip, 0, -1);  	xfs_iunlock(ip, mode);  	return 0;  } @@ -1193,7 +1180,7 @@ xfs_seek_data(  	uint			lock;  	int			error; -	lock = xfs_ilock_map_shared(ip); +	lock = xfs_ilock_data_map_shared(ip);  	isize = i_size_read(inode);  	if (start >= isize) { @@ -1272,7 +1259,7 @@ out:  	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);  out_unlock: -	xfs_iunlock_map_shared(ip, lock); +	xfs_iunlock(ip, lock);  	if (error)  		return -error; @@ -1297,7 +1284,7 @@ xfs_seek_hole(  	if (XFS_FORCED_SHUTDOWN(mp))  		return -XFS_ERROR(EIO); -	lock = xfs_ilock_map_shared(ip); +	lock = xfs_ilock_data_map_shared(ip);  	isize = i_size_read(inode);  	if (start >= isize) { @@ -1380,7 +1367,7 @@ out:  	offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);  out_unlock: -	xfs_iunlock_map_shared(ip, lock); +	xfs_iunlock(ip, lock);  	if (error)  		return -error; @@ -1409,12 +1396,12 @@ xfs_file_llseek(  const struct file_operations xfs_file_operations = {  	.llseek		= xfs_file_llseek, -	.read		= do_sync_read, -	.write		= do_sync_write, -	.aio_read	= xfs_file_aio_read, -	.aio_write	= xfs_file_aio_write, +	.read		= new_sync_read, +	.write		= new_sync_write, +	.read_iter	= xfs_file_read_iter, +	.write_iter	= xfs_file_write_iter,  	.splice_read	= xfs_file_splice_read, -	.splice_write	= xfs_file_splice_write, +	.splice_write	= iter_file_splice_write,  	.unlocked_ioctl	= xfs_file_ioctl,  #ifdef CONFIG_COMPAT  	.compat_ioctl	= xfs_file_compat_ioctl, @@ -1440,6 +1427,7 @@ const struct file_operations xfs_dir_file_operations = {  static const struct vm_operations_struct xfs_file_vm_ops = {  	.fault		= filemap_fault, +	.map_pages	= filemap_map_pages,  	.page_mkwrite	= xfs_vm_page_mkwrite,  	.remap_pages	= generic_file_remap_pages,  }; diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c index ce78e654d37..8ec81bed799 100644 --- a/fs/xfs/xfs_filestream.c +++ b/fs/xfs/xfs_filestream.c @@ -1,5 +1,6 @@  /*   * Copyright (c) 2006-2007 Silicon Graphics, Inc. + * Copyright (c) 2014 Christoph Hellwig.   * All Rights Reserved.   *   * This program is free software; you can redistribute it and/or @@ -16,116 +17,36 @@   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */  #include "xfs.h" -#include "xfs_log.h" -#include "xfs_bmap_btree.h" -#include "xfs_inum.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_ag.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_mount.h" +#include "xfs_inum.h" +#include "xfs_inode.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h"  #include "xfs_alloc.h"  #include "xfs_mru_cache.h" +#include "xfs_dinode.h"  #include "xfs_filestream.h"  #include "xfs_trace.h" -#ifdef XFS_FILESTREAMS_TRACE - -ktrace_t *xfs_filestreams_trace_buf; - -STATIC void -xfs_filestreams_trace( -	xfs_mount_t	*mp,	/* mount point */ -	int		type,	/* type of trace */ -	const char	*func,	/* source function */ -	int		line,	/* source line number */ -	__psunsigned_t	arg0, -	__psunsigned_t	arg1, -	__psunsigned_t	arg2, -	__psunsigned_t	arg3, -	__psunsigned_t	arg4, -	__psunsigned_t	arg5) -{ -	ktrace_enter(xfs_filestreams_trace_buf, -		(void *)(__psint_t)(type | (line << 16)), -		(void *)func, -		(void *)(__psunsigned_t)current_pid(), -		(void *)mp, -		(void *)(__psunsigned_t)arg0, -		(void *)(__psunsigned_t)arg1, -		(void *)(__psunsigned_t)arg2, -		(void *)(__psunsigned_t)arg3, -		(void *)(__psunsigned_t)arg4, -		(void *)(__psunsigned_t)arg5, -		NULL, NULL, NULL, NULL, NULL, NULL); -} - -#define TRACE0(mp,t)			TRACE6(mp,t,0,0,0,0,0,0) -#define TRACE1(mp,t,a0)			TRACE6(mp,t,a0,0,0,0,0,0) -#define TRACE2(mp,t,a0,a1)		TRACE6(mp,t,a0,a1,0,0,0,0) -#define TRACE3(mp,t,a0,a1,a2)		TRACE6(mp,t,a0,a1,a2,0,0,0) -#define TRACE4(mp,t,a0,a1,a2,a3)	TRACE6(mp,t,a0,a1,a2,a3,0,0) -#define TRACE5(mp,t,a0,a1,a2,a3,a4)	TRACE6(mp,t,a0,a1,a2,a3,a4,0) -#define TRACE6(mp,t,a0,a1,a2,a3,a4,a5) \ -	xfs_filestreams_trace(mp, t, __func__, __LINE__, \ -				(__psunsigned_t)a0, (__psunsigned_t)a1, \ -				(__psunsigned_t)a2, (__psunsigned_t)a3, \ -				(__psunsigned_t)a4, (__psunsigned_t)a5) - -#define TRACE_AG_SCAN(mp, ag, ag2) \ -		TRACE2(mp, XFS_FSTRM_KTRACE_AGSCAN, ag, ag2); -#define TRACE_AG_PICK1(mp, max_ag, maxfree) \ -		TRACE2(mp, XFS_FSTRM_KTRACE_AGPICK1, max_ag, maxfree); -#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) \ -		TRACE6(mp, XFS_FSTRM_KTRACE_AGPICK2, ag, ag2, \ -			 cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) \ -		TRACE5(mp, XFS_FSTRM_KTRACE_UPDATE, ip, ag, cnt, ag2, cnt2) -#define TRACE_FREE(mp, ip, pip, ag, cnt) \ -		TRACE4(mp, XFS_FSTRM_KTRACE_FREE, ip, pip, ag, cnt) -#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) \ -		TRACE4(mp, XFS_FSTRM_KTRACE_ITEM_LOOKUP, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) \ -		TRACE4(mp, XFS_FSTRM_KTRACE_ASSOCIATE, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) \ -		TRACE6(mp, XFS_FSTRM_KTRACE_MOVEAG, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) \ -		TRACE2(mp, XFS_FSTRM_KTRACE_ORPHAN, ip, ag); - - -#else -#define TRACE_AG_SCAN(mp, ag, ag2) -#define TRACE_AG_PICK1(mp, max_ag, maxfree) -#define TRACE_AG_PICK2(mp, ag, ag2, cnt, free, scan, flag) -#define TRACE_UPDATE(mp, ip, ag, cnt, ag2, cnt2) -#define TRACE_FREE(mp, ip, pip, ag, cnt) -#define TRACE_LOOKUP(mp, ip, pip, ag, cnt) -#define TRACE_ASSOCIATE(mp, ip, pip, ag, cnt) -#define TRACE_MOVEAG(mp, ip, pip, oag, ocnt, nag, ncnt) -#define TRACE_ORPHAN(mp, ip, ag) -#endif - -static kmem_zone_t *item_zone; +struct xfs_fstrm_item { +	struct xfs_mru_cache_elem	mru; +	struct xfs_inode		*ip; +	xfs_agnumber_t			ag; /* AG in use for this directory */ +}; -/* - * Structure for associating a file or a directory with an allocation group. - * The parent directory pointer is only needed for files, but since there will - * generally be vastly more files than directories in the cache, using the same - * data structure simplifies the code with very little memory overhead. - */ -typedef struct fstrm_item -{ -	xfs_agnumber_t	ag;	/* AG currently in use for the file/directory. */ -	xfs_inode_t	*ip;	/* inode self-pointer. */ -	xfs_inode_t	*pip;	/* Parent directory inode pointer. */ -} fstrm_item_t; +enum xfs_fstrm_alloc { +	XFS_PICK_USERDATA = 1, +	XFS_PICK_LOWSPACE = 2, +};  /*   * Allocation group filestream associations are tracked with per-ag atomic - * counters.  These counters allow _xfs_filestream_pick_ag() to tell whether a + * counters.  These counters allow xfs_filestream_pick_ag() to tell whether a   * particular AG already has active filestreams associated with it. The mount   * point's m_peraglock is used to protect these counters from per-ag array   * re-allocation during a growfs operation.  When xfs_growfs_data_private() is @@ -160,7 +81,7 @@ typedef struct fstrm_item   * the cache that reference per-ag array elements that have since been   * reallocated.   */ -static int +int  xfs_filestream_peek_ag(  	xfs_mount_t	*mp,  	xfs_agnumber_t	agno) @@ -200,23 +121,40 @@ xfs_filestream_put_ag(  	xfs_perag_put(pag);  } +static void +xfs_fstrm_free_func( +	struct xfs_mru_cache_elem *mru) +{ +	struct xfs_fstrm_item	*item = +		container_of(mru, struct xfs_fstrm_item, mru); + +	xfs_filestream_put_ag(item->ip->i_mount, item->ag); + +	trace_xfs_filestream_free(item->ip, item->ag); + +	kmem_free(item); +} +  /*   * Scan the AGs starting at startag looking for an AG that isn't in use and has   * at least minlen blocks free.   */  static int -_xfs_filestream_pick_ag( -	xfs_mount_t	*mp, -	xfs_agnumber_t	startag, -	xfs_agnumber_t	*agp, -	int		flags, -	xfs_extlen_t	minlen) +xfs_filestream_pick_ag( +	struct xfs_inode	*ip, +	xfs_agnumber_t		startag, +	xfs_agnumber_t		*agp, +	int			flags, +	xfs_extlen_t		minlen)  { -	int		streams, max_streams; -	int		err, trylock, nscan; -	xfs_extlen_t	longest, free, minfree, maxfree = 0; -	xfs_agnumber_t	ag, max_ag = NULLAGNUMBER; -	struct xfs_perag *pag; +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_fstrm_item	*item; +	struct xfs_perag	*pag; +	xfs_extlen_t		longest, free = 0, minfree, maxfree = 0; +	xfs_agnumber_t		ag, max_ag = NULLAGNUMBER; +	int			err, trylock, nscan; + +	ASSERT(S_ISDIR(ip->i_d.di_mode));  	/* 2% of an AG's blocks must be free for it to be chosen. */  	minfree = mp->m_sb.sb_agblocks / 50; @@ -228,8 +166,9 @@ _xfs_filestream_pick_ag(  	trylock = XFS_ALLOC_FLAG_TRYLOCK;  	for (nscan = 0; 1; nscan++) { +		trace_xfs_filestream_scan(ip, ag); +  		pag = xfs_perag_get(mp, ag); -		TRACE_AG_SCAN(mp, ag, atomic_read(&pag->pagf_fstrms));  		if (!pag->pagf_init) {  			err = xfs_alloc_pagf_init(mp, NULL, ag, trylock); @@ -246,7 +185,6 @@ _xfs_filestream_pick_ag(  		/* Keep track of the AG with the most free blocks. */  		if (pag->pagf_freeblks > maxfree) {  			maxfree = pag->pagf_freeblks; -			max_streams = atomic_read(&pag->pagf_fstrms);  			max_ag = ag;  		} @@ -269,7 +207,6 @@ _xfs_filestream_pick_ag(  			/* Break out, retaining the reference on the AG. */  			free = pag->pagf_freeblks; -			streams = atomic_read(&pag->pagf_fstrms);  			xfs_perag_put(pag);  			*agp = ag;  			break; @@ -305,317 +242,98 @@ next_ag:  		 */  		if (max_ag != NULLAGNUMBER) {  			xfs_filestream_get_ag(mp, max_ag); -			TRACE_AG_PICK1(mp, max_ag, maxfree); -			streams = max_streams;  			free = maxfree;  			*agp = max_ag;  			break;  		}  		/* take AG 0 if none matched */ -		TRACE_AG_PICK1(mp, max_ag, maxfree); +		trace_xfs_filestream_pick(ip, *agp, free, nscan);  		*agp = 0;  		return 0;  	} -	TRACE_AG_PICK2(mp, startag, *agp, streams, free, nscan, flags); - -	return 0; -} - -/* - * Set the allocation group number for a file or a directory, updating inode - * references and per-AG references as appropriate. - */ -static int -_xfs_filestream_update_ag( -	xfs_inode_t	*ip, -	xfs_inode_t	*pip, -	xfs_agnumber_t	ag) -{ -	int		err = 0; -	xfs_mount_t	*mp; -	xfs_mru_cache_t	*cache; -	fstrm_item_t	*item; -	xfs_agnumber_t	old_ag; -	xfs_inode_t	*old_pip; +	trace_xfs_filestream_pick(ip, *agp, free, nscan); -	/* -	 * Either ip is a regular file and pip is a directory, or ip is a -	 * directory and pip is NULL. -	 */ -	ASSERT(ip && ((S_ISREG(ip->i_d.di_mode) && pip && -	               S_ISDIR(pip->i_d.di_mode)) || -	              (S_ISDIR(ip->i_d.di_mode) && !pip))); - -	mp = ip->i_mount; -	cache = mp->m_filestream; - -	item = xfs_mru_cache_lookup(cache, ip->i_ino); -	if (item) { -		ASSERT(item->ip == ip); -		old_ag = item->ag; -		item->ag = ag; -		old_pip = item->pip; -		item->pip = pip; -		xfs_mru_cache_done(cache); - -		/* -		 * If the AG has changed, drop the old ref and take a new one, -		 * effectively transferring the reference from old to new AG. -		 */ -		if (ag != old_ag) { -			xfs_filestream_put_ag(mp, old_ag); -			xfs_filestream_get_ag(mp, ag); -		} - -		/* -		 * If ip is a file and its pip has changed, drop the old ref and -		 * take a new one. -		 */ -		if (pip && pip != old_pip) { -			IRELE(old_pip); -			IHOLD(pip); -		} - -		TRACE_UPDATE(mp, ip, old_ag, xfs_filestream_peek_ag(mp, old_ag), -				ag, xfs_filestream_peek_ag(mp, ag)); +	if (*agp == NULLAGNUMBER)  		return 0; -	} -	item = kmem_zone_zalloc(item_zone, KM_MAYFAIL); +	err = ENOMEM; +	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);  	if (!item) -		return ENOMEM; +		goto out_put_ag; -	item->ag = ag; +	item->ag = *agp;  	item->ip = ip; -	item->pip = pip; -	err = xfs_mru_cache_insert(cache, ip->i_ino, item); +	err = xfs_mru_cache_insert(mp->m_filestream, ip->i_ino, &item->mru);  	if (err) { -		kmem_zone_free(item_zone, item); -		return err; +		if (err == EEXIST) +			err = 0; +		goto out_free_item;  	} -	/* Take a reference on the AG. */ -	xfs_filestream_get_ag(mp, ag); - -	/* -	 * Take a reference on the inode itself regardless of whether it's a -	 * regular file or a directory. -	 */ -	IHOLD(ip); - -	/* -	 * In the case of a regular file, take a reference on the parent inode -	 * as well to ensure it remains in-core. -	 */ -	if (pip) -		IHOLD(pip); - -	TRACE_UPDATE(mp, ip, ag, xfs_filestream_peek_ag(mp, ag), -			ag, xfs_filestream_peek_ag(mp, ag)); - -	return 0; -} - -/* xfs_fstrm_free_func(): callback for freeing cached stream items. */ -STATIC void -xfs_fstrm_free_func( -	unsigned long	ino, -	void		*data) -{ -	fstrm_item_t	*item  = (fstrm_item_t *)data; -	xfs_inode_t	*ip = item->ip; - -	ASSERT(ip->i_ino == ino); - -	xfs_iflags_clear(ip, XFS_IFILESTREAM); - -	/* Drop the reference taken on the AG when the item was added. */ -	xfs_filestream_put_ag(ip->i_mount, item->ag); - -	TRACE_FREE(ip->i_mount, ip, item->pip, item->ag, -		xfs_filestream_peek_ag(ip->i_mount, item->ag)); - -	/* -	 * _xfs_filestream_update_ag() always takes a reference on the inode -	 * itself, whether it's a file or a directory.  Release it here. -	 * This can result in the inode being freed and so we must -	 * not hold any inode locks when freeing filesstreams objects -	 * otherwise we can deadlock here. -	 */ -	IRELE(ip); - -	/* -	 * In the case of a regular file, _xfs_filestream_update_ag() also -	 * takes a ref on the parent inode to keep it in-core.  Release that -	 * too. -	 */ -	if (item->pip) -		IRELE(item->pip); - -	/* Finally, free the memory allocated for the item. */ -	kmem_zone_free(item_zone, item); -} - -/* - * xfs_filestream_init() is called at xfs initialisation time to set up the - * memory zone that will be used for filestream data structure allocation. - */ -int -xfs_filestream_init(void) -{ -	item_zone = kmem_zone_init(sizeof(fstrm_item_t), "fstrm_item"); -	if (!item_zone) -		return -ENOMEM; -  	return 0; -} - -/* - * xfs_filestream_uninit() is called at xfs termination time to destroy the - * memory zone that was used for filestream data structure allocation. - */ -void -xfs_filestream_uninit(void) -{ -	kmem_zone_destroy(item_zone); -} - -/* - * xfs_filestream_mount() is called when a file system is mounted with the - * filestream option.  It is responsible for allocating the data structures - * needed to track the new file system's file streams. - */ -int -xfs_filestream_mount( -	xfs_mount_t	*mp) -{ -	int		err; -	unsigned int	lifetime, grp_count; - -	/* -	 * The filestream timer tunable is currently fixed within the range of -	 * one second to four minutes, with five seconds being the default.  The -	 * group count is somewhat arbitrary, but it'd be nice to adhere to the -	 * timer tunable to within about 10 percent.  This requires at least 10 -	 * groups. -	 */ -	lifetime  = xfs_fstrm_centisecs * 10; -	grp_count = 10; - -	err = xfs_mru_cache_create(&mp->m_filestream, lifetime, grp_count, -	                     xfs_fstrm_free_func); +out_free_item: +	kmem_free(item); +out_put_ag: +	xfs_filestream_put_ag(mp, *agp);  	return err;  } -/* - * xfs_filestream_unmount() is called when a file system that was mounted with - * the filestream option is unmounted.  It drains the data structures created - * to track the file system's file streams and frees all the memory that was - * allocated. - */ -void -xfs_filestream_unmount( -	xfs_mount_t	*mp) +static struct xfs_inode * +xfs_filestream_get_parent( +	struct xfs_inode	*ip)  { -	xfs_mru_cache_destroy(mp->m_filestream); -} +	struct inode		*inode = VFS_I(ip), *dir = NULL; +	struct dentry		*dentry, *parent; -/* - * Return the AG of the filestream the file or directory belongs to, or - * NULLAGNUMBER otherwise. - */ -xfs_agnumber_t -xfs_filestream_lookup_ag( -	xfs_inode_t	*ip) -{ -	xfs_mru_cache_t	*cache; -	fstrm_item_t	*item; -	xfs_agnumber_t	ag; -	int		ref; - -	if (!S_ISREG(ip->i_d.di_mode) && !S_ISDIR(ip->i_d.di_mode)) { -		ASSERT(0); -		return NULLAGNUMBER; -	} +	dentry = d_find_alias(inode); +	if (!dentry) +		goto out; -	cache = ip->i_mount->m_filestream; -	item = xfs_mru_cache_lookup(cache, ip->i_ino); -	if (!item) { -		TRACE_LOOKUP(ip->i_mount, ip, NULL, NULLAGNUMBER, 0); -		return NULLAGNUMBER; -	} +	parent = dget_parent(dentry); +	if (!parent) +		goto out_dput; -	ASSERT(ip == item->ip); -	ag = item->ag; -	ref = xfs_filestream_peek_ag(ip->i_mount, ag); -	xfs_mru_cache_done(cache); +	dir = igrab(parent->d_inode); +	dput(parent); -	TRACE_LOOKUP(ip->i_mount, ip, item->pip, ag, ref); -	return ag; +out_dput: +	dput(dentry); +out: +	return dir ? XFS_I(dir) : NULL;  }  /* - * xfs_filestream_associate() should only be called to associate a regular file - * with its parent directory.  Calling it with a child directory isn't - * appropriate because filestreams don't apply to entire directory hierarchies. - * Creating a file in a child directory of an existing filestream directory - * starts a new filestream with its own allocation group association. + * Find the right allocation group for a file, either by finding an + * existing file stream or creating a new one.   * - * Returns < 0 on error, 0 if successful association occurred, > 0 if - * we failed to get an association because of locking issues. + * Returns NULLAGNUMBER in case of an error.   */ -int -xfs_filestream_associate( -	xfs_inode_t	*pip, -	xfs_inode_t	*ip) +xfs_agnumber_t +xfs_filestream_lookup_ag( +	struct xfs_inode	*ip)  { -	xfs_mount_t	*mp; -	xfs_mru_cache_t	*cache; -	fstrm_item_t	*item; -	xfs_agnumber_t	ag, rotorstep, startag; -	int		err = 0; +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_inode	*pip = NULL; +	xfs_agnumber_t		startag, ag = NULLAGNUMBER; +	struct xfs_mru_cache_elem *mru; -	ASSERT(S_ISDIR(pip->i_d.di_mode));  	ASSERT(S_ISREG(ip->i_d.di_mode)); -	if (!S_ISDIR(pip->i_d.di_mode) || !S_ISREG(ip->i_d.di_mode)) -		return -EINVAL; - -	mp = pip->i_mount; -	cache = mp->m_filestream; - -	/* -	 * We have a problem, Houston. -	 * -	 * Taking the iolock here violates inode locking order - we already -	 * hold the ilock. Hence if we block getting this lock we may never -	 * wake. Unfortunately, that means if we can't get the lock, we're -	 * screwed in terms of getting a stream association - we can't spin -	 * waiting for the lock because someone else is waiting on the lock we -	 * hold and we cannot drop that as we are in a transaction here. -	 * -	 * Lucky for us, this inversion is not a problem because it's a -	 * directory inode that we are trying to lock here. -	 * -	 * So, if we can't get the iolock without sleeping then just give up -	 */ -	if (!xfs_ilock_nowait(pip, XFS_IOLOCK_EXCL)) -		return 1; -	/* If the parent directory is already in the cache, use its AG. */ -	item = xfs_mru_cache_lookup(cache, pip->i_ino); -	if (item) { -		ASSERT(item->ip == pip); -		ag = item->ag; -		xfs_mru_cache_done(cache); +	pip = xfs_filestream_get_parent(ip); +	if (!pip) +		goto out; -		TRACE_LOOKUP(mp, pip, pip, ag, xfs_filestream_peek_ag(mp, ag)); -		err = _xfs_filestream_update_ag(ip, pip, ag); +	mru = xfs_mru_cache_lookup(mp->m_filestream, pip->i_ino); +	if (mru) { +		ag = container_of(mru, struct xfs_fstrm_item, mru)->ag; +		xfs_mru_cache_done(mp->m_filestream); -		goto exit; +		trace_xfs_filestream_lookup(ip, ag); +		goto out;  	}  	/* @@ -623,202 +341,94 @@ xfs_filestream_associate(  	 * use the directory inode's AG.  	 */  	if (mp->m_flags & XFS_MOUNT_32BITINODES) { -		rotorstep = xfs_rotorstep; +		xfs_agnumber_t	 rotorstep = xfs_rotorstep;  		startag = (mp->m_agfrotor / rotorstep) % mp->m_sb.sb_agcount;  		mp->m_agfrotor = (mp->m_agfrotor + 1) %  		                 (mp->m_sb.sb_agcount * rotorstep);  	} else  		startag = XFS_INO_TO_AGNO(mp, pip->i_ino); -	/* Pick a new AG for the parent inode starting at startag. */ -	err = _xfs_filestream_pick_ag(mp, startag, &ag, 0, 0); -	if (err || ag == NULLAGNUMBER) -		goto exit_did_pick; - -	/* Associate the parent inode with the AG. */ -	err = _xfs_filestream_update_ag(pip, NULL, ag); -	if (err) -		goto exit_did_pick; - -	/* Associate the file inode with the AG. */ -	err = _xfs_filestream_update_ag(ip, pip, ag); -	if (err) -		goto exit_did_pick; - -	TRACE_ASSOCIATE(mp, ip, pip, ag, xfs_filestream_peek_ag(mp, ag)); - -exit_did_pick: -	/* -	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the -	 * reference it took on it, since the file and directory will have taken -	 * their own now if they were successfully cached. -	 */ -	if (ag != NULLAGNUMBER) -		xfs_filestream_put_ag(mp, ag); - -exit: -	xfs_iunlock(pip, XFS_IOLOCK_EXCL); -	return -err; +	if (xfs_filestream_pick_ag(pip, startag, &ag, 0, 0)) +		ag = NULLAGNUMBER; +out: +	IRELE(pip); +	return ag;  }  /* - * Pick a new allocation group for the current file and its file stream.  This - * function is called by xfs_bmap_filestreams() with the mount point's per-ag - * lock held. + * Pick a new allocation group for the current file and its file stream. + * + * This is called when the allocator can't find a suitable extent in the + * current AG, and we have to move the stream into a new AG with more space.   */  int  xfs_filestream_new_ag(  	struct xfs_bmalloca	*ap,  	xfs_agnumber_t		*agp)  { -	int		flags, err; -	xfs_inode_t	*ip, *pip = NULL; -	xfs_mount_t	*mp; -	xfs_mru_cache_t	*cache; -	xfs_extlen_t	minlen; -	fstrm_item_t	*dir, *file; -	xfs_agnumber_t	ag = NULLAGNUMBER; - -	ip = ap->ip; -	mp = ip->i_mount; -	cache = mp->m_filestream; -	minlen = ap->length; -	*agp = NULLAGNUMBER; +	struct xfs_inode	*ip = ap->ip, *pip; +	struct xfs_mount	*mp = ip->i_mount; +	xfs_extlen_t		minlen = ap->length; +	xfs_agnumber_t		startag = 0; +	int			flags, err = 0; +	struct xfs_mru_cache_elem *mru; -	/* -	 * Look for the file in the cache, removing it if it's found.  Doing -	 * this allows it to be held across the dir lookup that follows. -	 */ -	file = xfs_mru_cache_remove(cache, ip->i_ino); -	if (file) { -		ASSERT(ip == file->ip); - -		/* Save the file's parent inode and old AG number for later. */ -		pip = file->pip; -		ag = file->ag; - -		/* Look for the file's directory in the cache. */ -		dir = xfs_mru_cache_lookup(cache, pip->i_ino); -		if (dir) { -			ASSERT(pip == dir->ip); - -			/* -			 * If the directory has already moved on to a new AG, -			 * use that AG as the new AG for the file. Don't -			 * forget to twiddle the AG refcounts to match the -			 * movement. -			 */ -			if (dir->ag != file->ag) { -				xfs_filestream_put_ag(mp, file->ag); -				xfs_filestream_get_ag(mp, dir->ag); -				*agp = file->ag = dir->ag; -			} - -			xfs_mru_cache_done(cache); -		} +	*agp = NULLAGNUMBER; -		/* -		 * Put the file back in the cache.  If this fails, the free -		 * function needs to be called to tidy up in the same way as if -		 * the item had simply expired from the cache. -		 */ -		err = xfs_mru_cache_insert(cache, ip->i_ino, file); -		if (err) { -			xfs_fstrm_free_func(ip->i_ino, file); -			return err; -		} +	pip = xfs_filestream_get_parent(ip); +	if (!pip) +		goto exit; -		/* -		 * If the file's AG was moved to the directory's new AG, there's -		 * nothing more to be done. -		 */ -		if (*agp != NULLAGNUMBER) { -			TRACE_MOVEAG(mp, ip, pip, -					ag, xfs_filestream_peek_ag(mp, ag), -					*agp, xfs_filestream_peek_ag(mp, *agp)); -			return 0; -		} +	mru = xfs_mru_cache_remove(mp->m_filestream, pip->i_ino); +	if (mru) { +		struct xfs_fstrm_item *item = +			container_of(mru, struct xfs_fstrm_item, mru); +		startag = (item->ag + 1) % mp->m_sb.sb_agcount;  	} -	/* -	 * If the file's parent directory is known, take its iolock in exclusive -	 * mode to prevent two sibling files from racing each other to migrate -	 * themselves and their parent to different AGs. -	 * -	 * Note that we lock the parent directory iolock inside the child -	 * iolock here.  That's fine as we never hold both parent and child -	 * iolock in any other place.  This is different from the ilock, -	 * which requires locking of the child after the parent for namespace -	 * operations. -	 */ -	if (pip) -		xfs_ilock(pip, XFS_IOLOCK_EXCL | XFS_IOLOCK_PARENT); - -	/* -	 * A new AG needs to be found for the file.  If the file's parent -	 * directory is also known, it will be moved to the new AG as well to -	 * ensure that files created inside it in future use the new AG. -	 */ -	ag = (ag == NULLAGNUMBER) ? 0 : (ag + 1) % mp->m_sb.sb_agcount;  	flags = (ap->userdata ? XFS_PICK_USERDATA : 0) |  	        (ap->flist->xbf_low ? XFS_PICK_LOWSPACE : 0); -	err = _xfs_filestream_pick_ag(mp, ag, agp, flags, minlen); -	if (err || *agp == NULLAGNUMBER) -		goto exit; +	err = xfs_filestream_pick_ag(pip, startag, agp, flags, minlen);  	/* -	 * If the file wasn't found in the file cache, then its parent directory -	 * inode isn't known.  For this to have happened, the file must either -	 * be pre-existing, or it was created long enough ago that its cache -	 * entry has expired.  This isn't the sort of usage that the filestreams -	 * allocator is trying to optimise, so there's no point trying to track -	 * its new AG somehow in the filestream data structures. +	 * Only free the item here so we skip over the old AG earlier.  	 */ -	if (!pip) { -		TRACE_ORPHAN(mp, ip, *agp); -		goto exit; -	} - -	/* Associate the parent inode with the AG. */ -	err = _xfs_filestream_update_ag(pip, NULL, *agp); -	if (err) -		goto exit; - -	/* Associate the file inode with the AG. */ -	err = _xfs_filestream_update_ag(ip, pip, *agp); -	if (err) -		goto exit; - -	TRACE_MOVEAG(mp, ip, pip, NULLAGNUMBER, 0, -			*agp, xfs_filestream_peek_ag(mp, *agp)); +	if (mru) +		xfs_fstrm_free_func(mru); +	IRELE(pip);  exit: -	/* -	 * If _xfs_filestream_pick_ag() returned a valid AG, remove the -	 * reference it took on it, since the file and directory will have taken -	 * their own now if they were successfully cached. -	 */ -	if (*agp != NULLAGNUMBER) -		xfs_filestream_put_ag(mp, *agp); -	else +	if (*agp == NULLAGNUMBER)  		*agp = 0; - -	if (pip) -		xfs_iunlock(pip, XFS_IOLOCK_EXCL); -  	return err;  } -/* - * Remove an association between an inode and a filestream object. - * Typically this is done on last close of an unlinked file. - */  void  xfs_filestream_deassociate( -	xfs_inode_t	*ip) +	struct xfs_inode	*ip)  { -	xfs_mru_cache_t	*cache = ip->i_mount->m_filestream; +	xfs_mru_cache_delete(ip->i_mount->m_filestream, ip->i_ino); +} -	xfs_mru_cache_delete(cache, ip->i_ino); +int +xfs_filestream_mount( +	xfs_mount_t	*mp) +{ +	/* +	 * The filestream timer tunable is currently fixed within the range of +	 * one second to four minutes, with five seconds being the default.  The +	 * group count is somewhat arbitrary, but it'd be nice to adhere to the +	 * timer tunable to within about 10 percent.  This requires at least 10 +	 * groups. +	 */ +	return xfs_mru_cache_create(&mp->m_filestream, xfs_fstrm_centisecs * 10, +				    10, xfs_fstrm_free_func); +} + +void +xfs_filestream_unmount( +	xfs_mount_t	*mp) +{ +	xfs_mru_cache_destroy(mp->m_filestream);  } diff --git a/fs/xfs/xfs_filestream.h b/fs/xfs/xfs_filestream.h index 6d61dbee856..2ef43406e53 100644 --- a/fs/xfs/xfs_filestream.h +++ b/fs/xfs/xfs_filestream.h @@ -20,50 +20,20 @@  struct xfs_mount;  struct xfs_inode; -struct xfs_perag;  struct xfs_bmalloca; -#ifdef XFS_FILESTREAMS_TRACE -#define XFS_FSTRM_KTRACE_INFO		1 -#define XFS_FSTRM_KTRACE_AGSCAN		2 -#define XFS_FSTRM_KTRACE_AGPICK1	3 -#define XFS_FSTRM_KTRACE_AGPICK2	4 -#define XFS_FSTRM_KTRACE_UPDATE		5 -#define XFS_FSTRM_KTRACE_FREE		6 -#define	XFS_FSTRM_KTRACE_ITEM_LOOKUP	7 -#define	XFS_FSTRM_KTRACE_ASSOCIATE	8 -#define	XFS_FSTRM_KTRACE_MOVEAG		9 -#define	XFS_FSTRM_KTRACE_ORPHAN		10 - -#define XFS_FSTRM_KTRACE_SIZE	16384 -extern ktrace_t *xfs_filestreams_trace_buf; - -#endif - -/* allocation selection flags */ -typedef enum xfs_fstrm_alloc { -	XFS_PICK_USERDATA = 1, -	XFS_PICK_LOWSPACE = 2, -} xfs_fstrm_alloc_t; - -/* prototypes for filestream.c */ -int xfs_filestream_init(void); -void xfs_filestream_uninit(void);  int xfs_filestream_mount(struct xfs_mount *mp);  void xfs_filestream_unmount(struct xfs_mount *mp); -xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip); -int xfs_filestream_associate(struct xfs_inode *dip, struct xfs_inode *ip);  void xfs_filestream_deassociate(struct xfs_inode *ip); +xfs_agnumber_t xfs_filestream_lookup_ag(struct xfs_inode *ip);  int xfs_filestream_new_ag(struct xfs_bmalloca *ap, xfs_agnumber_t *agp); +int xfs_filestream_peek_ag(struct xfs_mount *mp, xfs_agnumber_t agno); - -/* filestreams for the inode? */  static inline int  xfs_inode_is_filestream(  	struct xfs_inode	*ip)  {  	return (ip->i_mount->m_flags & XFS_MOUNT_FILESTREAMS) || -		xfs_iflags_test(ip, XFS_IFILESTREAM) ||  		(ip->i_d.di_flags & XFS_DIFLAG_FILESTREAM);  } diff --git a/fs/xfs/xfs_format.h b/fs/xfs/xfs_format.h index 35c08ff54ca..34d85aca305 100644 --- a/fs/xfs/xfs_format.h +++ b/fs/xfs/xfs_format.h @@ -145,6 +145,8 @@ struct xfs_dsymlink_hdr {  	__be64	sl_lsn;  }; +#define XFS_SYMLINK_CRC_OFF	offsetof(struct xfs_dsymlink_hdr, sl_crc) +  /*   * The maximum pathlen is 1024 bytes. Since the minimum file system   * blocksize is 512 bytes, we can get a max of 3 extents back from @@ -156,14 +158,271 @@ struct xfs_dsymlink_hdr {  	((bufsize) - (xfs_sb_version_hascrc(&(mp)->m_sb) ? \  			sizeof(struct xfs_dsymlink_hdr) : 0)) -int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); -int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, -			uint32_t size, struct xfs_buf *bp); -bool xfs_symlink_hdr_ok(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, -			uint32_t size, struct xfs_buf *bp); -void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, -				 struct xfs_inode *ip, struct xfs_ifork *ifp); -extern const struct xfs_buf_ops xfs_symlink_buf_ops; +/* + * Allocation Btree format definitions + * + * There are two on-disk btrees, one sorted by blockno and one sorted + * by blockcount and blockno.  All blocks look the same to make the code + * simpler; if we have time later, we'll make the optimizations. + */ +#define	XFS_ABTB_MAGIC		0x41425442	/* 'ABTB' for bno tree */ +#define	XFS_ABTB_CRC_MAGIC	0x41423342	/* 'AB3B' */ +#define	XFS_ABTC_MAGIC		0x41425443	/* 'ABTC' for cnt tree */ +#define	XFS_ABTC_CRC_MAGIC	0x41423343	/* 'AB3C' */ + +/* + * Data record/key structure + */ +typedef struct xfs_alloc_rec { +	__be32		ar_startblock;	/* starting block number */ +	__be32		ar_blockcount;	/* count of free blocks */ +} xfs_alloc_rec_t, xfs_alloc_key_t; + +typedef struct xfs_alloc_rec_incore { +	xfs_agblock_t	ar_startblock;	/* starting block number */ +	xfs_extlen_t	ar_blockcount;	/* count of free blocks */ +} xfs_alloc_rec_incore_t; + +/* btree pointer type */ +typedef __be32 xfs_alloc_ptr_t; + +/* + * Block numbers in the AG: + * SB is sector 0, AGF is sector 1, AGI is sector 2, AGFL is sector 3. + */ +#define	XFS_BNO_BLOCK(mp)	((xfs_agblock_t)(XFS_AGFL_BLOCK(mp) + 1)) +#define	XFS_CNT_BLOCK(mp)	((xfs_agblock_t)(XFS_BNO_BLOCK(mp) + 1)) + + +/* + * Inode Allocation Btree format definitions + * + * There is a btree for the inode map per allocation group. + */ +#define	XFS_IBT_MAGIC		0x49414254	/* 'IABT' */ +#define	XFS_IBT_CRC_MAGIC	0x49414233	/* 'IAB3' */ +#define	XFS_FIBT_MAGIC		0x46494254	/* 'FIBT' */ +#define	XFS_FIBT_CRC_MAGIC	0x46494233	/* 'FIB3' */ + +typedef	__uint64_t	xfs_inofree_t; +#define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t)) +#define	XFS_INODES_PER_CHUNK_LOG	(XFS_NBBYLOG + 3) +#define	XFS_INOBT_ALL_FREE		((xfs_inofree_t)-1) +#define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i)) + +static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) +{ +	return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; +} + +/* + * Data record structure + */ +typedef struct xfs_inobt_rec { +	__be32		ir_startino;	/* starting inode number */ +	__be32		ir_freecount;	/* count of free inodes (set bits) */ +	__be64		ir_free;	/* free inode mask */ +} xfs_inobt_rec_t; + +typedef struct xfs_inobt_rec_incore { +	xfs_agino_t	ir_startino;	/* starting inode number */ +	__int32_t	ir_freecount;	/* count of free inodes (set bits) */ +	xfs_inofree_t	ir_free;	/* free inode mask */ +} xfs_inobt_rec_incore_t; + + +/* + * Key structure + */ +typedef struct xfs_inobt_key { +	__be32		ir_startino;	/* starting inode number */ +} xfs_inobt_key_t; + +/* btree pointer type */ +typedef __be32 xfs_inobt_ptr_t; + +/* + * block numbers in the AG. + */ +#define	XFS_IBT_BLOCK(mp)		((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) +#define	XFS_FIBT_BLOCK(mp)		((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) + +/* + * The first data block of an AG depends on whether the filesystem was formatted + * with the finobt feature. If so, account for the finobt reserved root btree + * block. + */ +#define XFS_PREALLOC_BLOCKS(mp) \ +	(xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ +	 XFS_FIBT_BLOCK(mp) + 1 : \ +	 XFS_IBT_BLOCK(mp) + 1) + + + +/* + * BMAP Btree format definitions + * + * This includes both the root block definition that sits inside an inode fork + * and the record/pointer formats for the leaf/node in the blocks. + */ +#define XFS_BMAP_MAGIC		0x424d4150	/* 'BMAP' */ +#define XFS_BMAP_CRC_MAGIC	0x424d4133	/* 'BMA3' */ + +/* + * Bmap root header, on-disk form only. + */ +typedef struct xfs_bmdr_block { +	__be16		bb_level;	/* 0 is a leaf */ +	__be16		bb_numrecs;	/* current # of data records */ +} xfs_bmdr_block_t; + +/* + * Bmap btree record and extent descriptor. + *  l0:63 is an extent flag (value 1 indicates non-normal). + *  l0:9-62 are startoff. + *  l0:0-8 and l1:21-63 are startblock. + *  l1:0-20 are blockcount. + */ +#define BMBT_EXNTFLAG_BITLEN	1 +#define BMBT_STARTOFF_BITLEN	54 +#define BMBT_STARTBLOCK_BITLEN	52 +#define BMBT_BLOCKCOUNT_BITLEN	21 + +typedef struct xfs_bmbt_rec { +	__be64			l0, l1; +} xfs_bmbt_rec_t; + +typedef __uint64_t	xfs_bmbt_rec_base_t;	/* use this for casts */ +typedef xfs_bmbt_rec_t xfs_bmdr_rec_t; + +typedef struct xfs_bmbt_rec_host { +	__uint64_t		l0, l1; +} xfs_bmbt_rec_host_t; + +/* + * Values and macros for delayed-allocation startblock fields. + */ +#define STARTBLOCKVALBITS	17 +#define STARTBLOCKMASKBITS	(15 + XFS_BIG_BLKNOS * 20) +#define DSTARTBLOCKMASKBITS	(15 + 20) +#define STARTBLOCKMASK		\ +	(((((xfs_fsblock_t)1) << STARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) +#define DSTARTBLOCKMASK		\ +	(((((xfs_dfsbno_t)1) << DSTARTBLOCKMASKBITS) - 1) << STARTBLOCKVALBITS) + +static inline int isnullstartblock(xfs_fsblock_t x) +{ +	return ((x) & STARTBLOCKMASK) == STARTBLOCKMASK; +} + +static inline int isnulldstartblock(xfs_dfsbno_t x) +{ +	return ((x) & DSTARTBLOCKMASK) == DSTARTBLOCKMASK; +} + +static inline xfs_fsblock_t nullstartblock(int k) +{ +	ASSERT(k < (1 << STARTBLOCKVALBITS)); +	return STARTBLOCKMASK | (k); +} + +static inline xfs_filblks_t startblockval(xfs_fsblock_t x) +{ +	return (xfs_filblks_t)((x) & ~STARTBLOCKMASK); +} + +/* + * Possible extent formats. + */ +typedef enum { +	XFS_EXTFMT_NOSTATE = 0, +	XFS_EXTFMT_HASSTATE +} xfs_exntfmt_t; + +/* + * Possible extent states. + */ +typedef enum { +	XFS_EXT_NORM, XFS_EXT_UNWRITTEN, +	XFS_EXT_DMAPI_OFFLINE, XFS_EXT_INVALID +} xfs_exntst_t; + +/* + * Incore version of above. + */ +typedef struct xfs_bmbt_irec +{ +	xfs_fileoff_t	br_startoff;	/* starting file offset */ +	xfs_fsblock_t	br_startblock;	/* starting block number */ +	xfs_filblks_t	br_blockcount;	/* number of blocks */ +	xfs_exntst_t	br_state;	/* extent state */ +} xfs_bmbt_irec_t; + +/* + * Key structure for non-leaf levels of the tree. + */ +typedef struct xfs_bmbt_key { +	__be64		br_startoff;	/* starting file offset */ +} xfs_bmbt_key_t, xfs_bmdr_key_t; + +/* btree pointer type */ +typedef __be64 xfs_bmbt_ptr_t, xfs_bmdr_ptr_t; + + +/* + * Generic Btree block format definitions + * + * This is a combination of the actual format used on disk for short and long + * format btrees.  The first three fields are shared by both format, but the + * pointers are different and should be used with care. + * + * To get the size of the actual short or long form headers please use the size + * macros below.  Never use sizeof(xfs_btree_block). + * + * The blkno, crc, lsn, owner and uuid fields are only available in filesystems + * with the crc feature bit, and all accesses to them must be conditional on + * that flag. + */ +struct xfs_btree_block { +	__be32		bb_magic;	/* magic number for block type */ +	__be16		bb_level;	/* 0 is a leaf */ +	__be16		bb_numrecs;	/* current # of data records */ +	union { +		struct { +			__be32		bb_leftsib; +			__be32		bb_rightsib; + +			__be64		bb_blkno; +			__be64		bb_lsn; +			uuid_t		bb_uuid; +			__be32		bb_owner; +			__le32		bb_crc; +		} s;			/* short form pointers */ +		struct	{ +			__be64		bb_leftsib; +			__be64		bb_rightsib; + +			__be64		bb_blkno; +			__be64		bb_lsn; +			uuid_t		bb_uuid; +			__be64		bb_owner; +			__le32		bb_crc; +			__be32		bb_pad; /* padding for alignment */ +		} l;			/* long form pointers */ +	} bb_u;				/* rest */ +}; + +#define XFS_BTREE_SBLOCK_LEN	16	/* size of a short form block */ +#define XFS_BTREE_LBLOCK_LEN	24	/* size of a long form block */ + +/* sizes of CRC enabled btree blocks */ +#define XFS_BTREE_SBLOCK_CRC_LEN	(XFS_BTREE_SBLOCK_LEN + 40) +#define XFS_BTREE_LBLOCK_CRC_LEN	(XFS_BTREE_LBLOCK_LEN + 48) + +#define XFS_BTREE_SBLOCK_CRC_OFF \ +	offsetof(struct xfs_btree_block, bb_u.s.bb_crc) +#define XFS_BTREE_LBLOCK_CRC_OFF \ +	offsetof(struct xfs_btree_block, bb_u.l.bb_crc)  #endif /* __XFS_FORMAT_H__ */ diff --git a/fs/xfs/xfs_fs.h b/fs/xfs/xfs_fs.h index 18272c766a5..d34703dbcb4 100644 --- a/fs/xfs/xfs_fs.h +++ b/fs/xfs/xfs_fs.h @@ -233,11 +233,12 @@ typedef struct xfs_fsop_resblks {  #define XFS_FSOP_GEOM_FLAGS_LOGV2	0x0100	/* log format version 2	*/  #define XFS_FSOP_GEOM_FLAGS_SECTOR	0x0200	/* sector sizes >1BB	*/  #define XFS_FSOP_GEOM_FLAGS_ATTR2	0x0400	/* inline attributes rework */ -#define XFS_FSOP_GEOM_FLAGS_PROJID32	0x0800  /* 32-bit project IDs	*/ +#define XFS_FSOP_GEOM_FLAGS_PROJID32	0x0800	/* 32-bit project IDs	*/  #define XFS_FSOP_GEOM_FLAGS_DIRV2CI	0x1000	/* ASCII only CI names	*/  #define XFS_FSOP_GEOM_FLAGS_LAZYSB	0x4000	/* lazy superblock counters */  #define XFS_FSOP_GEOM_FLAGS_V5SB	0x8000	/* version 5 superblock */ - +#define XFS_FSOP_GEOM_FLAGS_FTYPE	0x10000	/* inode directory types */ +#define XFS_FSOP_GEOM_FLAGS_FINOBT	0x20000	/* free inode btree */  /*   * Minimum and maximum sizes need for growth checks. diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index e64ee5288b8..d2295561570 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -17,28 +17,31 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h" -#include "xfs_btree.h"  #include "xfs_error.h" +#include "xfs_btree.h" +#include "xfs_alloc_btree.h"  #include "xfs_alloc.h"  #include "xfs_ialloc.h"  #include "xfs_fsops.h"  #include "xfs_itable.h"  #include "xfs_trans_space.h"  #include "xfs_rtalloc.h" -#include "xfs_filestream.h"  #include "xfs_trace.h" +#include "xfs_log.h" +#include "xfs_dinode.h" +#include "xfs_filestream.h"  /*   * File system operations @@ -73,23 +76,18 @@ xfs_fs_geometry(  	}  	if (new_version >= 3) {  		geo->version = XFS_FSOP_GEOM_VERSION; -		geo->flags = +		geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK | +			     XFS_FSOP_GEOM_FLAGS_DIRV2 |  			(xfs_sb_version_hasattr(&mp->m_sb) ?  				XFS_FSOP_GEOM_FLAGS_ATTR : 0) | -			(xfs_sb_version_hasnlink(&mp->m_sb) ? -				XFS_FSOP_GEOM_FLAGS_NLINK : 0) |  			(xfs_sb_version_hasquota(&mp->m_sb) ?  				XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |  			(xfs_sb_version_hasalign(&mp->m_sb) ?  				XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |  			(xfs_sb_version_hasdalign(&mp->m_sb) ?  				XFS_FSOP_GEOM_FLAGS_DALIGN : 0) | -			(xfs_sb_version_hasshared(&mp->m_sb) ? -				XFS_FSOP_GEOM_FLAGS_SHARED : 0) |  			(xfs_sb_version_hasextflgbit(&mp->m_sb) ?  				XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) | -			(xfs_sb_version_hasdirv2(&mp->m_sb) ? -				XFS_FSOP_GEOM_FLAGS_DIRV2 : 0) |  			(xfs_sb_version_hassector(&mp->m_sb) ?  				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |  			(xfs_sb_version_hasasciici(&mp->m_sb) ? @@ -101,11 +99,15 @@ xfs_fs_geometry(  			(xfs_sb_version_hasprojid32bit(&mp->m_sb) ?  				XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |  			(xfs_sb_version_hascrc(&mp->m_sb) ? -				XFS_FSOP_GEOM_FLAGS_V5SB : 0); +				XFS_FSOP_GEOM_FLAGS_V5SB : 0) | +			(xfs_sb_version_hasftype(&mp->m_sb) ? +				XFS_FSOP_GEOM_FLAGS_FTYPE : 0) | +			(xfs_sb_version_hasfinobt(&mp->m_sb) ? +				XFS_FSOP_GEOM_FLAGS_FINOBT : 0);  		geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?  				mp->m_sb.sb_logsectsize : BBSIZE;  		geo->rtsectsize = mp->m_sb.sb_blocksize; -		geo->dirblocksize = mp->m_dirblksize; +		geo->dirblocksize = mp->m_dir_geo->blksize;  	}  	if (new_version >= 4) {  		geo->flags |= @@ -153,7 +155,7 @@ xfs_growfs_data_private(  	xfs_buf_t		*bp;  	int			bucket;  	int			dpct; -	int			error; +	int			error, saved_error = 0;  	xfs_agnumber_t		nagcount;  	xfs_agnumber_t		nagimax = 0;  	xfs_rfsblock_t		nb, nb_mod; @@ -217,6 +219,8 @@ xfs_growfs_data_private(  	 */  	nfree = 0;  	for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) { +		__be32	*agfl_bno; +  		/*  		 * AG freespace header block  		 */ @@ -276,8 +280,10 @@ xfs_growfs_data_private(  			agfl->agfl_seqno = cpu_to_be32(agno);  			uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_uuid);  		} + +		agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);  		for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++) -			agfl->agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); +			agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);  		error = xfs_bwrite(bp);  		xfs_buf_relse(bp); @@ -309,6 +315,10 @@ xfs_growfs_data_private(  		agi->agi_dirino = cpu_to_be32(NULLAGINO);  		if (xfs_sb_version_hascrc(&mp->m_sb))  			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_uuid); +		if (xfs_sb_version_hasfinobt(&mp->m_sb)) { +			agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); +			agi->agi_free_level = cpu_to_be32(1); +		}  		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)  			agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); @@ -400,6 +410,34 @@ xfs_growfs_data_private(  		xfs_buf_relse(bp);  		if (error)  			goto error0; + +		/* +		 * FINO btree root block +		 */ +		if (xfs_sb_version_hasfinobt(&mp->m_sb)) { +			bp = xfs_growfs_get_hdr_buf(mp, +				XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)), +				BTOBB(mp->m_sb.sb_blocksize), 0, +				&xfs_inobt_buf_ops); +			if (!bp) { +				error = ENOMEM; +				goto error0; +			} + +			if (xfs_sb_version_hascrc(&mp->m_sb)) +				xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC, +						     0, 0, agno, +						     XFS_BTREE_CRC_BLOCKS); +			else +				xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0, +						     0, agno, 0); + +			error = xfs_bwrite(bp); +			xfs_buf_relse(bp); +			if (error) +				goto error0; +		} +  	}  	xfs_trans_agblocks_delta(tp, nfree);  	/* @@ -496,29 +534,33 @@ xfs_growfs_data_private(  				error = ENOMEM;  		} +		/* +		 * If we get an error reading or writing alternate superblocks, +		 * continue.  xfs_repair chooses the "best" superblock based +		 * on most matches; if we break early, we'll leave more +		 * superblocks un-updated than updated, and xfs_repair may +		 * pick them over the properly-updated primary. +		 */  		if (error) {  			xfs_warn(mp,  		"error %d reading secondary superblock for ag %d",  				error, agno); -			break; +			saved_error = error; +			continue;  		}  		xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb, XFS_SB_ALL_BITS); -		/* -		 * If we get an error writing out the alternate superblocks, -		 * just issue a warning and continue.  The real work is -		 * already done and committed. -		 */  		error = xfs_bwrite(bp);  		xfs_buf_relse(bp);  		if (error) {  			xfs_warn(mp,  		"write error %d updating secondary superblock for ag %d",  				error, agno); -			break; /* no point in continuing */ +			saved_error = error; +			continue;  		}  	} -	return error; +	return saved_error ? saved_error : error;   error0:  	xfs_trans_cancel(tp, XFS_TRANS_ABORT); diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c index ccf2fb14396..5960e5593fe 100644 --- a/fs/xfs/xfs_ialloc.c +++ b/fs/xfs/xfs_ialloc.c @@ -17,29 +17,30 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h"  #include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_btree.h"  #include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h"  #include "xfs_alloc.h"  #include "xfs_rtalloc.h"  #include "xfs_error.h"  #include "xfs_bmap.h"  #include "xfs_cksum.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_icreate_item.h"  #include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_trace.h"  /* @@ -51,7 +52,7 @@ xfs_ialloc_cluster_alignment(  {  	if (xfs_sb_version_hasalign(&args->mp->m_sb) &&  	    args->mp->m_sb.sb_inoalignmt >= -	     XFS_B_TO_FSBT(args->mp, XFS_INODE_CLUSTER_SIZE(args->mp))) +	     XFS_B_TO_FSBT(args->mp, args->mp->m_inode_cluster_size))  		return args->mp->m_sb.sb_inoalignmt;  	return 1;  } @@ -111,6 +112,66 @@ xfs_inobt_get_rec(  }  /* + * Insert a single inobt record. Cursor must already point to desired location. + */ +STATIC int +xfs_inobt_insert_rec( +	struct xfs_btree_cur	*cur, +	__int32_t		freecount, +	xfs_inofree_t		free, +	int			*stat) +{ +	cur->bc_rec.i.ir_freecount = freecount; +	cur->bc_rec.i.ir_free = free; +	return xfs_btree_insert(cur, stat); +} + +/* + * Insert records describing a newly allocated inode chunk into the inobt. + */ +STATIC int +xfs_inobt_insert( +	struct xfs_mount	*mp, +	struct xfs_trans	*tp, +	struct xfs_buf		*agbp, +	xfs_agino_t		newino, +	xfs_agino_t		newlen, +	xfs_btnum_t		btnum) +{ +	struct xfs_btree_cur	*cur; +	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp); +	xfs_agnumber_t		agno = be32_to_cpu(agi->agi_seqno); +	xfs_agino_t		thisino; +	int			i; +	int			error; + +	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, btnum); + +	for (thisino = newino; +	     thisino < newino + newlen; +	     thisino += XFS_INODES_PER_CHUNK) { +		error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i); +		if (error) { +			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); +			return error; +		} +		ASSERT(i == 0); + +		error = xfs_inobt_insert_rec(cur, XFS_INODES_PER_CHUNK, +					     XFS_INOBT_ALL_FREE, &i); +		if (error) { +			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); +			return error; +		} +		ASSERT(i == 1); +	} + +	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); + +	return 0; +} + +/*   * Verify that the number of free inodes in the AGI is correct.   */  #ifdef DEBUG @@ -169,27 +230,20 @@ xfs_ialloc_inode_init(  {  	struct xfs_buf		*fbuf;  	struct xfs_dinode	*free; -	int			blks_per_cluster, nbufs, ninodes; +	int			nbufs, blks_per_cluster, inodes_per_cluster;  	int			version;  	int			i, j;  	xfs_daddr_t		d;  	xfs_ino_t		ino = 0;  	/* -	 * Loop over the new block(s), filling in the inodes. -	 * For small block sizes, manipulate the inodes in buffers -	 * which are multiples of the blocks size. +	 * Loop over the new block(s), filling in the inodes.  For small block +	 * sizes, manipulate the inodes in buffers  which are multiples of the +	 * blocks size.  	 */ -	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { -		blks_per_cluster = 1; -		nbufs = length; -		ninodes = mp->m_sb.sb_inopblock; -	} else { -		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / -				   mp->m_sb.sb_blocksize; -		nbufs = length / blks_per_cluster; -		ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; -	} +	blks_per_cluster = xfs_icluster_size_fsb(mp); +	inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; +	nbufs = length / blks_per_cluster;  	/*  	 * Figure out what version number to use in the inodes we create.  If @@ -224,12 +278,10 @@ xfs_ialloc_inode_init(  		 * they track in the AIL as if they were physically logged.  		 */  		if (tp) -			xfs_icreate_log(tp, agno, agbno, XFS_IALLOC_INODES(mp), +			xfs_icreate_log(tp, agno, agbno, mp->m_ialloc_inos,  					mp->m_sb.sb_inodesize, length, gen); -	} else if (xfs_sb_version_hasnlink(&mp->m_sb)) +	} else  		version = 2; -	else -		version = 1;  	for (j = 0; j < nbufs; j++) {  		/* @@ -245,7 +297,7 @@ xfs_ialloc_inode_init(  		/* Initialize the inode buffers and log them appropriately. */  		fbuf->b_ops = &xfs_inode_buf_ops;  		xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length)); -		for (i = 0; i < ninodes; i++) { +		for (i = 0; i < inodes_per_cluster; i++) {  			int	ioffset = i << mp->m_sb.sb_inodelog;  			uint	isize = xfs_dinode_size(version); @@ -309,13 +361,10 @@ xfs_ialloc_ag_alloc(  {  	xfs_agi_t	*agi;		/* allocation group header */  	xfs_alloc_arg_t	args;		/* allocation argument structure */ -	xfs_btree_cur_t	*cur;		/* inode btree cursor */  	xfs_agnumber_t	agno;  	int		error; -	int		i;  	xfs_agino_t	newino;		/* new first inode's number */  	xfs_agino_t	newlen;		/* new number of inodes */ -	xfs_agino_t	thisino;	/* current inode number, for loop */  	int		isaligned = 0;	/* inode allocation at stripe unit */  					/* boundary */  	struct xfs_perag *pag; @@ -328,11 +377,11 @@ xfs_ialloc_ag_alloc(  	 * Locking will ensure that we don't have two callers in here  	 * at one time.  	 */ -	newlen = XFS_IALLOC_INODES(args.mp); +	newlen = args.mp->m_ialloc_inos;  	if (args.mp->m_maxicount &&  	    args.mp->m_sb.sb_icount + newlen > args.mp->m_maxicount)  		return XFS_ERROR(ENOSPC); -	args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp); +	args.minlen = args.maxlen = args.mp->m_ialloc_blks;  	/*  	 * First try to allocate inodes contiguous with the last-allocated  	 * chunk of inodes.  If the filesystem is striped, this will fill @@ -342,7 +391,7 @@ xfs_ialloc_ag_alloc(  	newino = be32_to_cpu(agi->agi_newino);  	agno = be32_to_cpu(agi->agi_seqno);  	args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) + -			XFS_IALLOC_BLOCKS(args.mp); +		     args.mp->m_ialloc_blks;  	if (likely(newino != NULLAGINO &&  		  (args.agbno < be32_to_cpu(agi->agi_length)))) {  		args.fsbno = XFS_AGB_TO_FSB(args.mp, agno, args.agbno); @@ -369,6 +418,18 @@ xfs_ialloc_ag_alloc(  		args.minleft = args.mp->m_in_maxlevels - 1;  		if ((error = xfs_alloc_vextent(&args)))  			return error; + +		/* +		 * This request might have dirtied the transaction if the AG can +		 * satisfy the request, but the exact block was not available. +		 * If the allocation did fail, subsequent requests will relax +		 * the exact agbno requirement and increase the alignment +		 * instead. It is critical that the total size of the request +		 * (len + alignment + slop) does not increase from this point +		 * on, so reset minalignslop to ensure it is not included in +		 * subsequent requests. +		 */ +		args.minalignslop = 0;  	} else  		args.fsbno = NULLFSBLOCK; @@ -453,29 +514,19 @@ xfs_ialloc_ag_alloc(  	agi->agi_newino = cpu_to_be32(newino);  	/* -	 * Insert records describing the new inode chunk into the btree. +	 * Insert records describing the new inode chunk into the btrees.  	 */ -	cur = xfs_inobt_init_cursor(args.mp, tp, agbp, agno); -	for (thisino = newino; -	     thisino < newino + newlen; -	     thisino += XFS_INODES_PER_CHUNK) { -		cur->bc_rec.i.ir_startino = thisino; -		cur->bc_rec.i.ir_freecount = XFS_INODES_PER_CHUNK; -		cur->bc_rec.i.ir_free = XFS_INOBT_ALL_FREE; -		error = xfs_btree_lookup(cur, XFS_LOOKUP_EQ, &i); -		if (error) { -			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); -			return error; -		} -		ASSERT(i == 0); -		error = xfs_btree_insert(cur, &i); -		if (error) { -			xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); +	error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, +				 XFS_BTNUM_INO); +	if (error) +		return error; + +	if (xfs_sb_version_hasfinobt(&args.mp->m_sb)) { +		error = xfs_inobt_insert(args.mp, tp, agbp, newino, newlen, +					 XFS_BTNUM_FINO); +		if (error)  			return error; -		} -		ASSERT(i == 1);  	} -	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);  	/*  	 * Log allocation group header fields  	 */ @@ -584,7 +635,7 @@ xfs_ialloc_ag_select(  		 * Is there enough free space for the file plus a block of  		 * inodes? (if we need to allocate some)?  		 */ -		ineed = XFS_IALLOC_BLOCKS(mp); +		ineed = mp->m_ialloc_blks;  		longest = pag->pagf_longest;  		if (!longest)  			longest = pag->pagf_flcount > 0; @@ -669,13 +720,10 @@ xfs_ialloc_get_rec(  }  /* - * Allocate an inode. - * - * The caller selected an AG for us, and made sure that free inodes are - * available. + * Allocate an inode using the inobt-only algorithm.   */  STATIC int -xfs_dialloc_ag( +xfs_dialloc_ag_inobt(  	struct xfs_trans	*tp,  	struct xfs_buf		*agbp,  	xfs_ino_t		parent, @@ -701,7 +749,7 @@ xfs_dialloc_ag(  	ASSERT(pag->pagi_freecount > 0);   restart_pagno: -	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); +	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);  	/*  	 * If pagino is 0 (this is the root inode allocation) use newino.  	 * This must work because we've just allocated some. @@ -934,6 +982,294 @@ error0:  }  /* + * Use the free inode btree to allocate an inode based on distance from the + * parent. Note that the provided cursor may be deleted and replaced. + */ +STATIC int +xfs_dialloc_ag_finobt_near( +	xfs_agino_t			pagino, +	struct xfs_btree_cur		**ocur, +	struct xfs_inobt_rec_incore	*rec) +{ +	struct xfs_btree_cur		*lcur = *ocur;	/* left search cursor */ +	struct xfs_btree_cur		*rcur;	/* right search cursor */ +	struct xfs_inobt_rec_incore	rrec; +	int				error; +	int				i, j; + +	error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i); +	if (error) +		return error; + +	if (i == 1) { +		error = xfs_inobt_get_rec(lcur, rec, &i); +		if (error) +			return error; +		XFS_WANT_CORRUPTED_RETURN(i == 1); + +		/* +		 * See if we've landed in the parent inode record. The finobt +		 * only tracks chunks with at least one free inode, so record +		 * existence is enough. +		 */ +		if (pagino >= rec->ir_startino && +		    pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK)) +			return 0; +	} + +	error = xfs_btree_dup_cursor(lcur, &rcur); +	if (error) +		return error; + +	error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j); +	if (error) +		goto error_rcur; +	if (j == 1) { +		error = xfs_inobt_get_rec(rcur, &rrec, &j); +		if (error) +			goto error_rcur; +		XFS_WANT_CORRUPTED_GOTO(j == 1, error_rcur); +	} + +	XFS_WANT_CORRUPTED_GOTO(i == 1 || j == 1, error_rcur); +	if (i == 1 && j == 1) { +		/* +		 * Both the left and right records are valid. Choose the closer +		 * inode chunk to the target. +		 */ +		if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) > +		    (rrec.ir_startino - pagino)) { +			*rec = rrec; +			xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); +			*ocur = rcur; +		} else { +			xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); +		} +	} else if (j == 1) { +		/* only the right record is valid */ +		*rec = rrec; +		xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR); +		*ocur = rcur; +	} else if (i == 1) { +		/* only the left record is valid */ +		xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR); +	} + +	return 0; + +error_rcur: +	xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR); +	return error; +} + +/* + * Use the free inode btree to find a free inode based on a newino hint. If + * the hint is NULL, find the first free inode in the AG. + */ +STATIC int +xfs_dialloc_ag_finobt_newino( +	struct xfs_agi			*agi, +	struct xfs_btree_cur		*cur, +	struct xfs_inobt_rec_incore	*rec) +{ +	int error; +	int i; + +	if (agi->agi_newino != cpu_to_be32(NULLAGINO)) { +		error = xfs_inobt_lookup(cur, agi->agi_newino, XFS_LOOKUP_EQ, +					 &i); +		if (error) +			return error; +		if (i == 1) { +			error = xfs_inobt_get_rec(cur, rec, &i); +			if (error) +				return error; +			XFS_WANT_CORRUPTED_RETURN(i == 1); + +			return 0; +		} +	} + +	/* +	 * Find the first inode available in the AG. +	 */ +	error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i); +	if (error) +		return error; +	XFS_WANT_CORRUPTED_RETURN(i == 1); + +	error = xfs_inobt_get_rec(cur, rec, &i); +	if (error) +		return error; +	XFS_WANT_CORRUPTED_RETURN(i == 1); + +	return 0; +} + +/* + * Update the inobt based on a modification made to the finobt. Also ensure that + * the records from both trees are equivalent post-modification. + */ +STATIC int +xfs_dialloc_ag_update_inobt( +	struct xfs_btree_cur		*cur,	/* inobt cursor */ +	struct xfs_inobt_rec_incore	*frec,	/* finobt record */ +	int				offset) /* inode offset */ +{ +	struct xfs_inobt_rec_incore	rec; +	int				error; +	int				i; + +	error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i); +	if (error) +		return error; +	XFS_WANT_CORRUPTED_RETURN(i == 1); + +	error = xfs_inobt_get_rec(cur, &rec, &i); +	if (error) +		return error; +	XFS_WANT_CORRUPTED_RETURN(i == 1); +	ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) % +				   XFS_INODES_PER_CHUNK) == 0); + +	rec.ir_free &= ~XFS_INOBT_MASK(offset); +	rec.ir_freecount--; + +	XFS_WANT_CORRUPTED_RETURN((rec.ir_free == frec->ir_free) && +				  (rec.ir_freecount == frec->ir_freecount)); + +	error = xfs_inobt_update(cur, &rec); +	if (error) +		return error; + +	return 0; +} + +/* + * Allocate an inode using the free inode btree, if available. Otherwise, fall + * back to the inobt search algorithm. + * + * The caller selected an AG for us, and made sure that free inodes are + * available. + */ +STATIC int +xfs_dialloc_ag( +	struct xfs_trans	*tp, +	struct xfs_buf		*agbp, +	xfs_ino_t		parent, +	xfs_ino_t		*inop) +{ +	struct xfs_mount		*mp = tp->t_mountp; +	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp); +	xfs_agnumber_t			agno = be32_to_cpu(agi->agi_seqno); +	xfs_agnumber_t			pagno = XFS_INO_TO_AGNO(mp, parent); +	xfs_agino_t			pagino = XFS_INO_TO_AGINO(mp, parent); +	struct xfs_perag		*pag; +	struct xfs_btree_cur		*cur;	/* finobt cursor */ +	struct xfs_btree_cur		*icur;	/* inobt cursor */ +	struct xfs_inobt_rec_incore	rec; +	xfs_ino_t			ino; +	int				error; +	int				offset; +	int				i; + +	if (!xfs_sb_version_hasfinobt(&mp->m_sb)) +		return xfs_dialloc_ag_inobt(tp, agbp, parent, inop); + +	pag = xfs_perag_get(mp, agno); + +	/* +	 * If pagino is 0 (this is the root inode allocation) use newino. +	 * This must work because we've just allocated some. +	 */ +	if (!pagino) +		pagino = be32_to_cpu(agi->agi_newino); + +	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + +	error = xfs_check_agi_freecount(cur, agi); +	if (error) +		goto error_cur; + +	/* +	 * The search algorithm depends on whether we're in the same AG as the +	 * parent. If so, find the closest available inode to the parent. If +	 * not, consider the agi hint or find the first free inode in the AG. +	 */ +	if (agno == pagno) +		error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec); +	else +		error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec); +	if (error) +		goto error_cur; + +	offset = xfs_lowbit64(rec.ir_free); +	ASSERT(offset >= 0); +	ASSERT(offset < XFS_INODES_PER_CHUNK); +	ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) % +				   XFS_INODES_PER_CHUNK) == 0); +	ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino + offset); + +	/* +	 * Modify or remove the finobt record. +	 */ +	rec.ir_free &= ~XFS_INOBT_MASK(offset); +	rec.ir_freecount--; +	if (rec.ir_freecount) +		error = xfs_inobt_update(cur, &rec); +	else +		error = xfs_btree_delete(cur, &i); +	if (error) +		goto error_cur; + +	/* +	 * The finobt has now been updated appropriately. We haven't updated the +	 * agi and superblock yet, so we can create an inobt cursor and validate +	 * the original freecount. If all is well, make the equivalent update to +	 * the inobt using the finobt record and offset information. +	 */ +	icur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO); + +	error = xfs_check_agi_freecount(icur, agi); +	if (error) +		goto error_icur; + +	error = xfs_dialloc_ag_update_inobt(icur, &rec, offset); +	if (error) +		goto error_icur; + +	/* +	 * Both trees have now been updated. We must update the perag and +	 * superblock before we can check the freecount for each btree. +	 */ +	be32_add_cpu(&agi->agi_freecount, -1); +	xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT); +	pag->pagi_freecount--; + +	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1); + +	error = xfs_check_agi_freecount(icur, agi); +	if (error) +		goto error_icur; +	error = xfs_check_agi_freecount(cur, agi); +	if (error) +		goto error_icur; + +	xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR); +	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); +	xfs_perag_put(pag); +	*inop = ino; +	return 0; + +error_icur: +	xfs_btree_del_cursor(icur, XFS_BTREE_ERROR); +error_cur: +	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); +	xfs_perag_put(pag); +	return error; +} + +/*   * Allocate an inode on disk.   *   * Mode is used to tell whether the new inode will need space, and whether it @@ -998,7 +1334,7 @@ xfs_dialloc(  	 * inode.  	 */  	if (mp->m_maxicount && -	    mp->m_sb.sb_icount + XFS_IALLOC_INODES(mp) > mp->m_maxicount) { +	    mp->m_sb.sb_icount + mp->m_ialloc_inos > mp->m_maxicount) {  		noroom = 1;  		okalloc = 0;  	} @@ -1092,78 +1428,34 @@ out_error:  	return XFS_ERROR(error);  } -/* - * Free disk inode.  Carefully avoids touching the incore inode, all - * manipulations incore are the caller's responsibility. - * The on-disk inode is not changed by this operation, only the - * btree (free inode mask) is changed. - */ -int -xfs_difree( -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_ino_t	inode,		/* inode to be freed */ -	xfs_bmap_free_t	*flist,		/* extents to free */ -	int		*delete,	/* set if inode cluster was deleted */ -	xfs_ino_t	*first_ino)	/* first inode in deleted cluster */ +STATIC int +xfs_difree_inobt( +	struct xfs_mount		*mp, +	struct xfs_trans		*tp, +	struct xfs_buf			*agbp, +	xfs_agino_t			agino, +	struct xfs_bmap_free		*flist, +	int				*deleted, +	xfs_ino_t			*first_ino, +	struct xfs_inobt_rec_incore	*orec)  { -	/* REFERENCED */ -	xfs_agblock_t	agbno;	/* block number containing inode */ -	xfs_buf_t	*agbp;	/* buffer containing allocation group header */ -	xfs_agino_t	agino;	/* inode number relative to allocation group */ -	xfs_agnumber_t	agno;	/* allocation group number */ -	xfs_agi_t	*agi;	/* allocation group header */ -	xfs_btree_cur_t	*cur;	/* inode btree cursor */ -	int		error;	/* error return value */ -	int		i;	/* result code */ -	int		ilen;	/* inodes in an inode cluster */ -	xfs_mount_t	*mp;	/* mount structure for filesystem */ -	int		off;	/* offset of inode in inode chunk */ -	xfs_inobt_rec_incore_t rec;	/* btree record */ -	struct xfs_perag *pag; +	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp); +	xfs_agnumber_t			agno = be32_to_cpu(agi->agi_seqno); +	struct xfs_perag		*pag; +	struct xfs_btree_cur		*cur; +	struct xfs_inobt_rec_incore	rec; +	int				ilen; +	int				error; +	int				i; +	int				off; -	mp = tp->t_mountp; - -	/* -	 * Break up inode number into its components. -	 */ -	agno = XFS_INO_TO_AGNO(mp, inode); -	if (agno >= mp->m_sb.sb_agcount)  { -		xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", -			__func__, agno, mp->m_sb.sb_agcount); -		ASSERT(0); -		return XFS_ERROR(EINVAL); -	} -	agino = XFS_INO_TO_AGINO(mp, inode); -	if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  { -		xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", -			__func__, (unsigned long long)inode, -			(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); -		ASSERT(0); -		return XFS_ERROR(EINVAL); -	} -	agbno = XFS_AGINO_TO_AGBNO(mp, agino); -	if (agbno >= mp->m_sb.sb_agblocks)  { -		xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", -			__func__, agbno, mp->m_sb.sb_agblocks); -		ASSERT(0); -		return XFS_ERROR(EINVAL); -	} -	/* -	 * Get the allocation group header. -	 */ -	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); -	if (error) { -		xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", -			__func__, error); -		return error; -	} -	agi = XFS_BUF_TO_AGI(agbp);  	ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC)); -	ASSERT(agbno < be32_to_cpu(agi->agi_length)); +	ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length)); +  	/*  	 * Initialize the cursor.  	 */ -	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); +	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);  	error = xfs_check_agi_freecount(cur, agi);  	if (error) @@ -1201,9 +1493,9 @@ xfs_difree(  	 * When an inode cluster is free, it becomes eligible for removal  	 */  	if (!(mp->m_flags & XFS_MOUNT_IKEEP) && -	    (rec.ir_freecount == XFS_IALLOC_INODES(mp))) { +	    (rec.ir_freecount == mp->m_ialloc_inos)) { -		*delete = 1; +		*deleted = 1;  		*first_ino = XFS_AGINO_TO_INO(mp, agno, rec.ir_startino);  		/* @@ -1211,7 +1503,7 @@ xfs_difree(  		 * AGI and Superblock inode counts, and mark the disk space  		 * to be freed when the transaction is committed.  		 */ -		ilen = XFS_IALLOC_INODES(mp); +		ilen = mp->m_ialloc_inos;  		be32_add_cpu(&agi->agi_count, -ilen);  		be32_add_cpu(&agi->agi_freecount, -(ilen - 1));  		xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT); @@ -1227,11 +1519,11 @@ xfs_difree(  			goto error0;  		} -		xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, -				agno, XFS_INO_TO_AGBNO(mp,rec.ir_startino)), -				XFS_IALLOC_BLOCKS(mp), flist, mp); +		xfs_bmap_add_free(XFS_AGB_TO_FSB(mp, agno, +				  XFS_AGINO_TO_AGBNO(mp, rec.ir_startino)), +				  mp->m_ialloc_blks, flist, mp);  	} else { -		*delete = 0; +		*deleted = 0;  		error = xfs_inobt_update(cur, &rec);  		if (error) { @@ -1255,6 +1547,7 @@ xfs_difree(  	if (error)  		goto error0; +	*orec = rec;  	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);  	return 0; @@ -1263,6 +1556,182 @@ error0:  	return error;  } +/* + * Free an inode in the free inode btree. + */ +STATIC int +xfs_difree_finobt( +	struct xfs_mount		*mp, +	struct xfs_trans		*tp, +	struct xfs_buf			*agbp, +	xfs_agino_t			agino, +	struct xfs_inobt_rec_incore	*ibtrec) /* inobt record */ +{ +	struct xfs_agi			*agi = XFS_BUF_TO_AGI(agbp); +	xfs_agnumber_t			agno = be32_to_cpu(agi->agi_seqno); +	struct xfs_btree_cur		*cur; +	struct xfs_inobt_rec_incore	rec; +	int				offset = agino - ibtrec->ir_startino; +	int				error; +	int				i; + +	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_FINO); + +	error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i); +	if (error) +		goto error; +	if (i == 0) { +		/* +		 * If the record does not exist in the finobt, we must have just +		 * freed an inode in a previously fully allocated chunk. If not, +		 * something is out of sync. +		 */ +		XFS_WANT_CORRUPTED_GOTO(ibtrec->ir_freecount == 1, error); + +		error = xfs_inobt_insert_rec(cur, ibtrec->ir_freecount, +					     ibtrec->ir_free, &i); +		if (error) +			goto error; +		ASSERT(i == 1); + +		goto out; +	} + +	/* +	 * Read and update the existing record. We could just copy the ibtrec +	 * across here, but that would defeat the purpose of having redundant +	 * metadata. By making the modifications independently, we can catch +	 * corruptions that we wouldn't see if we just copied from one record +	 * to another. +	 */ +	error = xfs_inobt_get_rec(cur, &rec, &i); +	if (error) +		goto error; +	XFS_WANT_CORRUPTED_GOTO(i == 1, error); + +	rec.ir_free |= XFS_INOBT_MASK(offset); +	rec.ir_freecount++; + +	XFS_WANT_CORRUPTED_GOTO((rec.ir_free == ibtrec->ir_free) && +				(rec.ir_freecount == ibtrec->ir_freecount), +				error); + +	/* +	 * The content of inobt records should always match between the inobt +	 * and finobt. The lifecycle of records in the finobt is different from +	 * the inobt in that the finobt only tracks records with at least one +	 * free inode. Hence, if all of the inodes are free and we aren't +	 * keeping inode chunks permanently on disk, remove the record. +	 * Otherwise, update the record with the new information. +	 */ +	if (rec.ir_freecount == mp->m_ialloc_inos && +	    !(mp->m_flags & XFS_MOUNT_IKEEP)) { +		error = xfs_btree_delete(cur, &i); +		if (error) +			goto error; +		ASSERT(i == 1); +	} else { +		error = xfs_inobt_update(cur, &rec); +		if (error) +			goto error; +	} + +out: +	error = xfs_check_agi_freecount(cur, agi); +	if (error) +		goto error; + +	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR); +	return 0; + +error: +	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); +	return error; +} + +/* + * Free disk inode.  Carefully avoids touching the incore inode, all + * manipulations incore are the caller's responsibility. + * The on-disk inode is not changed by this operation, only the + * btree (free inode mask) is changed. + */ +int +xfs_difree( +	struct xfs_trans	*tp,		/* transaction pointer */ +	xfs_ino_t		inode,		/* inode to be freed */ +	struct xfs_bmap_free	*flist,		/* extents to free */ +	int			*deleted,/* set if inode cluster was deleted */ +	xfs_ino_t		*first_ino)/* first inode in deleted cluster */ +{ +	/* REFERENCED */ +	xfs_agblock_t		agbno;	/* block number containing inode */ +	struct xfs_buf		*agbp;	/* buffer for allocation group header */ +	xfs_agino_t		agino;	/* allocation group inode number */ +	xfs_agnumber_t		agno;	/* allocation group number */ +	int			error;	/* error return value */ +	struct xfs_mount	*mp;	/* mount structure for filesystem */ +	struct xfs_inobt_rec_incore rec;/* btree record */ + +	mp = tp->t_mountp; + +	/* +	 * Break up inode number into its components. +	 */ +	agno = XFS_INO_TO_AGNO(mp, inode); +	if (agno >= mp->m_sb.sb_agcount)  { +		xfs_warn(mp, "%s: agno >= mp->m_sb.sb_agcount (%d >= %d).", +			__func__, agno, mp->m_sb.sb_agcount); +		ASSERT(0); +		return XFS_ERROR(EINVAL); +	} +	agino = XFS_INO_TO_AGINO(mp, inode); +	if (inode != XFS_AGINO_TO_INO(mp, agno, agino))  { +		xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).", +			__func__, (unsigned long long)inode, +			(unsigned long long)XFS_AGINO_TO_INO(mp, agno, agino)); +		ASSERT(0); +		return XFS_ERROR(EINVAL); +	} +	agbno = XFS_AGINO_TO_AGBNO(mp, agino); +	if (agbno >= mp->m_sb.sb_agblocks)  { +		xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).", +			__func__, agbno, mp->m_sb.sb_agblocks); +		ASSERT(0); +		return XFS_ERROR(EINVAL); +	} +	/* +	 * Get the allocation group header. +	 */ +	error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); +	if (error) { +		xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.", +			__func__, error); +		return error; +	} + +	/* +	 * Fix up the inode allocation btree. +	 */ +	error = xfs_difree_inobt(mp, tp, agbp, agino, flist, deleted, first_ino, +				 &rec); +	if (error) +		goto error0; + +	/* +	 * Fix up the free inode btree. +	 */ +	if (xfs_sb_version_hasfinobt(&mp->m_sb)) { +		error = xfs_difree_finobt(mp, tp, agbp, agino, &rec); +		if (error) +			goto error0; +	} + +	return 0; + +error0: +	return error; +} +  STATIC int  xfs_imap_lookup(  	struct xfs_mount	*mp, @@ -1294,7 +1763,7 @@ xfs_imap_lookup(  	 * we have a record, we need to ensure it contains the inode number  	 * we are looking up.  	 */ -	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno); +	cur = xfs_inobt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_INO);  	error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);  	if (!error) {  		if (i) @@ -1310,7 +1779,7 @@ xfs_imap_lookup(  	/* check that the returned record contains the required inode */  	if (rec.ir_startino > agino || -	    rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino) +	    rec.ir_startino + mp->m_ialloc_inos <= agino)  		return EINVAL;  	/* for untrusted inodes check it is allocated first */ @@ -1383,7 +1852,7 @@ xfs_imap(  		return XFS_ERROR(EINVAL);  	} -	blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_blocklog; +	blks_per_cluster = xfs_icluster_size_fsb(mp);  	/*  	 * For bulkstat and handle lookups, we have an untrusted inode number @@ -1404,7 +1873,7 @@ xfs_imap(  	 * If the inode cluster size is the same as the blocksize or  	 * smaller we get to the buffer by simple arithmetics.  	 */ -	if (XFS_INODE_CLUSTER_SIZE(mp) <= mp->m_sb.sb_blocksize) { +	if (blks_per_cluster == 1) {  		offset = XFS_INO_TO_OFFSET(mp, ino);  		ASSERT(offset < mp->m_sb.sb_inopblock); @@ -1482,7 +1951,16 @@ xfs_ialloc_compute_maxlevels(  }  /* - * Log specified fields for the ag hdr (inode section) + * Log specified fields for the ag hdr (inode section). The growth of the agi + * structure over time requires that we interpret the buffer as two logical + * regions delineated by the end of the unlinked list. This is due to the size + * of the hash table and its location in the middle of the agi. + * + * For example, a request to log a field before agi_unlinked and a field after + * agi_unlinked could cause us to log the entire hash table and use an excessive + * amount of log space. To avoid this behavior, log the region up through + * agi_unlinked in one call and the region after agi_unlinked through the end of + * the structure in another.   */  void  xfs_ialloc_log_agi( @@ -1505,6 +1983,8 @@ xfs_ialloc_log_agi(  		offsetof(xfs_agi_t, agi_newino),  		offsetof(xfs_agi_t, agi_dirino),  		offsetof(xfs_agi_t, agi_unlinked), +		offsetof(xfs_agi_t, agi_free_root), +		offsetof(xfs_agi_t, agi_free_level),  		sizeof(xfs_agi_t)  	};  #ifdef DEBUG @@ -1513,15 +1993,30 @@ xfs_ialloc_log_agi(  	agi = XFS_BUF_TO_AGI(bp);  	ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));  #endif + +	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); +  	/* -	 * Compute byte offsets for the first and last fields. +	 * Compute byte offsets for the first and last fields in the first +	 * region and log the agi buffer. This only logs up through +	 * agi_unlinked.  	 */ -	xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS, &first, &last); +	if (fields & XFS_AGI_ALL_BITS_R1) { +		xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1, +				  &first, &last); +		xfs_trans_log_buf(tp, bp, first, last); +	} +  	/* -	 * Log the allocation group inode header buffer. +	 * Mask off the bits in the first region and calculate the first and +	 * last field offsets for any bits in the second region.  	 */ -	xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGI_BUF); -	xfs_trans_log_buf(tp, bp, first, last); +	fields &= ~XFS_AGI_ALL_BITS_R1; +	if (fields) { +		xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2, +				  &first, &last); +		xfs_trans_log_buf(tp, bp, first, last); +	}  }  #ifdef DEBUG @@ -1574,18 +2069,17 @@ xfs_agi_read_verify(  	struct xfs_buf	*bp)  {  	struct xfs_mount *mp = bp->b_target->bt_mount; -	int		agi_ok = 1; - -	if (xfs_sb_version_hascrc(&mp->m_sb)) -		agi_ok = xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					  offsetof(struct xfs_agi, agi_crc)); -	agi_ok = agi_ok && xfs_agi_verify(bp); -	if (unlikely(XFS_TEST_ERROR(!agi_ok, mp, XFS_ERRTAG_IALLOC_READ_AGI, -			XFS_RANDOM_IALLOC_READ_AGI))) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (xfs_sb_version_hascrc(&mp->m_sb) && +	    !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (XFS_TEST_ERROR(!xfs_agi_verify(bp), mp, +				XFS_ERRTAG_IALLOC_READ_AGI, +				XFS_RANDOM_IALLOC_READ_AGI))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -1596,8 +2090,8 @@ xfs_agi_write_verify(  	struct xfs_buf_log_item	*bip = bp->b_fspriv;  	if (!xfs_agi_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -1606,8 +2100,7 @@ xfs_agi_write_verify(  	if (bip)  		XFS_BUF_TO_AGI(bp)->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 offsetof(struct xfs_agi, agi_crc)); +	xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);  }  const struct xfs_buf_ops xfs_agi_buf_ops = { @@ -1627,15 +2120,15 @@ xfs_read_agi(  {  	int			error; -	ASSERT(agno != NULLAGNUMBER); +	trace_xfs_read_agi(mp, agno); +	ASSERT(agno != NULLAGNUMBER);  	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,  			XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),  			XFS_FSS_TO_BB(mp, 1), 0, bpp, &xfs_agi_buf_ops);  	if (error)  		return error; -	ASSERT(!xfs_buf_geterror(*bpp));  	xfs_buf_set_ref(*bpp, XFS_AGI_REF);  	return 0;  } @@ -1651,6 +2144,8 @@ xfs_ialloc_read_agi(  	struct xfs_perag	*pag;	/* per allocation group data */  	int			error; +	trace_xfs_ialloc_read_agi(mp, agno); +  	error = xfs_read_agi(mp, tp, agno, bpp);  	if (error)  		return error; diff --git a/fs/xfs/xfs_ialloc.h b/fs/xfs/xfs_ialloc.h index 68c07320f09..95ad1c002d6 100644 --- a/fs/xfs/xfs_ialloc.h +++ b/fs/xfs/xfs_ialloc.h @@ -23,18 +23,20 @@ struct xfs_dinode;  struct xfs_imap;  struct xfs_mount;  struct xfs_trans; +struct xfs_btree_cur; -/* - * Allocation parameters for inode allocation. - */ -#define	XFS_IALLOC_INODES(mp)	(mp)->m_ialloc_inos -#define	XFS_IALLOC_BLOCKS(mp)	(mp)->m_ialloc_blks - -/* - * Move inodes in clusters of this size. - */ +/* Move inodes in clusters of this size */  #define	XFS_INODE_BIG_CLUSTER_SIZE	8192 -#define	XFS_INODE_CLUSTER_SIZE(mp)	(mp)->m_inode_cluster_size + +/* Calculate and return the number of filesystem blocks per inode cluster */ +static inline int +xfs_icluster_size_fsb( +	struct xfs_mount	*mp) +{ +	if (mp->m_sb.sb_blocksize >= mp->m_inode_cluster_size) +		return 1; +	return mp->m_inode_cluster_size >> mp->m_sb.sb_blocklog; +}  /*   * Make an inode pointer out of the buffer/offset. @@ -42,7 +44,7 @@ struct xfs_trans;  static inline struct xfs_dinode *  xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)  { -	return (xfs_dinode_t *) +	return (struct xfs_dinode *)  		(xfs_buf_offset(b, o << (mp)->m_sb.sb_inodelog));  } @@ -88,7 +90,7 @@ xfs_difree(  	struct xfs_trans *tp,		/* transaction pointer */  	xfs_ino_t	inode,		/* inode to be freed */  	struct xfs_bmap_free *flist,	/* extents to free */ -	int		*delete,	/* set if inode cluster was deleted */ +	int		*deleted,	/* set if inode cluster was deleted */  	xfs_ino_t	*first_ino);	/* first inode in deleted cluster */  /* @@ -158,6 +160,4 @@ int xfs_ialloc_inode_init(struct xfs_mount *mp, struct xfs_trans *tp,  			  xfs_agnumber_t agno, xfs_agblock_t agbno,  			  xfs_agblock_t length, unsigned int gen); -extern const struct xfs_buf_ops xfs_agi_buf_ops; -  #endif	/* __XFS_IALLOC_H__ */ diff --git a/fs/xfs/xfs_ialloc_btree.c b/fs/xfs/xfs_ialloc_btree.c index 5448eb6b8c1..726f83a681a 100644 --- a/fs/xfs/xfs_ialloc_btree.c +++ b/fs/xfs/xfs_ialloc_btree.c @@ -17,24 +17,23 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_btree.h"  #include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h"  #include "xfs_alloc.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_cksum.h" +#include "xfs_trans.h"  STATIC int @@ -50,7 +49,8 @@ xfs_inobt_dup_cursor(  	struct xfs_btree_cur	*cur)  {  	return xfs_inobt_init_cursor(cur->bc_mp, cur->bc_tp, -			cur->bc_private.a.agbp, cur->bc_private.a.agno); +			cur->bc_private.a.agbp, cur->bc_private.a.agno, +			cur->bc_btnum);  }  STATIC void @@ -67,12 +67,26 @@ xfs_inobt_set_root(  	xfs_ialloc_log_agi(cur->bc_tp, agbp, XFS_AGI_ROOT | XFS_AGI_LEVEL);  } +STATIC void +xfs_finobt_set_root( +	struct xfs_btree_cur	*cur, +	union xfs_btree_ptr	*nptr, +	int			inc)	/* level change */ +{ +	struct xfs_buf		*agbp = cur->bc_private.a.agbp; +	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp); + +	agi->agi_free_root = nptr->s; +	be32_add_cpu(&agi->agi_free_level, inc); +	xfs_ialloc_log_agi(cur->bc_tp, agbp, +			   XFS_AGI_FREE_ROOT | XFS_AGI_FREE_LEVEL); +} +  STATIC int  xfs_inobt_alloc_block(  	struct xfs_btree_cur	*cur,  	union xfs_btree_ptr	*start,  	union xfs_btree_ptr	*new, -	int			length,  	int			*stat)  {  	xfs_alloc_arg_t		args;		/* block allocation args */ @@ -174,6 +188,17 @@ xfs_inobt_init_ptr_from_cur(  	ptr->s = agi->agi_root;  } +STATIC void +xfs_finobt_init_ptr_from_cur( +	struct xfs_btree_cur	*cur, +	union xfs_btree_ptr	*ptr) +{ +	struct xfs_agi		*agi = XFS_BUF_TO_AGI(cur->bc_private.a.agbp); + +	ASSERT(cur->bc_private.a.agno == be32_to_cpu(agi->agi_seqno)); +	ptr->s = agi->agi_free_root; +} +  STATIC __int64_t  xfs_inobt_key_diff(  	struct xfs_btree_cur	*cur, @@ -204,6 +229,7 @@ xfs_inobt_verify(  	 */  	switch (block->bb_magic) {  	case cpu_to_be32(XFS_IBT_CRC_MAGIC): +	case cpu_to_be32(XFS_FIBT_CRC_MAGIC):  		if (!xfs_sb_version_hascrc(&mp->m_sb))  			return false;  		if (!uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid)) @@ -215,6 +241,7 @@ xfs_inobt_verify(  			return false;  		/* fall through */  	case cpu_to_be32(XFS_IBT_MAGIC): +	case cpu_to_be32(XFS_FIBT_MAGIC):  		break;  	default:  		return 0; @@ -244,12 +271,14 @@ static void  xfs_inobt_read_verify(  	struct xfs_buf	*bp)  { -	if (!(xfs_btree_sblock_verify_crc(bp) && -	      xfs_inobt_verify(bp))) { -		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     bp->b_target->bt_mount, bp->b_addr); +	if (!xfs_btree_sblock_verify_crc(bp)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_inobt_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); + +	if (bp->b_error) { +		trace_xfs_btree_corrupt(bp, _RET_IP_); +		xfs_verifier_error(bp);  	}  } @@ -259,9 +288,9 @@ xfs_inobt_write_verify(  {  	if (!xfs_inobt_verify(bp)) {  		trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     bp->b_target->bt_mount, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp); +		return;  	}  	xfs_btree_sblock_calc_crc(bp); @@ -316,6 +345,28 @@ static const struct xfs_btree_ops xfs_inobt_ops = {  #endif  }; +static const struct xfs_btree_ops xfs_finobt_ops = { +	.rec_len		= sizeof(xfs_inobt_rec_t), +	.key_len		= sizeof(xfs_inobt_key_t), + +	.dup_cursor		= xfs_inobt_dup_cursor, +	.set_root		= xfs_finobt_set_root, +	.alloc_block		= xfs_inobt_alloc_block, +	.free_block		= xfs_inobt_free_block, +	.get_minrecs		= xfs_inobt_get_minrecs, +	.get_maxrecs		= xfs_inobt_get_maxrecs, +	.init_key_from_rec	= xfs_inobt_init_key_from_rec, +	.init_rec_from_key	= xfs_inobt_init_rec_from_key, +	.init_rec_from_cur	= xfs_inobt_init_rec_from_cur, +	.init_ptr_from_cur	= xfs_finobt_init_ptr_from_cur, +	.key_diff		= xfs_inobt_key_diff, +	.buf_ops		= &xfs_inobt_buf_ops, +#if defined(DEBUG) || defined(XFS_WARN) +	.keys_inorder		= xfs_inobt_keys_inorder, +	.recs_inorder		= xfs_inobt_recs_inorder, +#endif +}; +  /*   * Allocate a new inode btree cursor.   */ @@ -324,7 +375,8 @@ xfs_inobt_init_cursor(  	struct xfs_mount	*mp,		/* file system mount point */  	struct xfs_trans	*tp,		/* transaction pointer */  	struct xfs_buf		*agbp,		/* buffer for agi structure */ -	xfs_agnumber_t		agno)		/* allocation group number */ +	xfs_agnumber_t		agno,		/* allocation group number */ +	xfs_btnum_t		btnum)		/* ialloc or free ino btree */  {  	struct xfs_agi		*agi = XFS_BUF_TO_AGI(agbp);  	struct xfs_btree_cur	*cur; @@ -333,11 +385,17 @@ xfs_inobt_init_cursor(  	cur->bc_tp = tp;  	cur->bc_mp = mp; -	cur->bc_nlevels = be32_to_cpu(agi->agi_level); -	cur->bc_btnum = XFS_BTNUM_INO; +	cur->bc_btnum = btnum; +	if (btnum == XFS_BTNUM_INO) { +		cur->bc_nlevels = be32_to_cpu(agi->agi_level); +		cur->bc_ops = &xfs_inobt_ops; +	} else { +		cur->bc_nlevels = be32_to_cpu(agi->agi_free_level); +		cur->bc_ops = &xfs_finobt_ops; +	} +  	cur->bc_blocklog = mp->m_sb.sb_blocklog; -	cur->bc_ops = &xfs_inobt_ops;  	if (xfs_sb_version_hascrc(&mp->m_sb))  		cur->bc_flags |= XFS_BTREE_CRC_BLOCKS; diff --git a/fs/xfs/xfs_ialloc_btree.h b/fs/xfs/xfs_ialloc_btree.h index 3ac36b7642e..d7ebea72c2d 100644 --- a/fs/xfs/xfs_ialloc_btree.h +++ b/fs/xfs/xfs_ialloc_btree.h @@ -27,55 +27,6 @@ struct xfs_btree_cur;  struct xfs_mount;  /* - * There is a btree for the inode map per allocation group. - */ -#define	XFS_IBT_MAGIC		0x49414254	/* 'IABT' */ -#define	XFS_IBT_CRC_MAGIC	0x49414233	/* 'IAB3' */ - -typedef	__uint64_t	xfs_inofree_t; -#define	XFS_INODES_PER_CHUNK		(NBBY * sizeof(xfs_inofree_t)) -#define	XFS_INODES_PER_CHUNK_LOG	(XFS_NBBYLOG + 3) -#define	XFS_INOBT_ALL_FREE		((xfs_inofree_t)-1) -#define	XFS_INOBT_MASK(i)		((xfs_inofree_t)1 << (i)) - -static inline xfs_inofree_t xfs_inobt_maskn(int i, int n) -{ -	return ((n >= XFS_INODES_PER_CHUNK ? 0 : XFS_INOBT_MASK(n)) - 1) << i; -} - -/* - * Data record structure - */ -typedef struct xfs_inobt_rec { -	__be32		ir_startino;	/* starting inode number */ -	__be32		ir_freecount;	/* count of free inodes (set bits) */ -	__be64		ir_free;	/* free inode mask */ -} xfs_inobt_rec_t; - -typedef struct xfs_inobt_rec_incore { -	xfs_agino_t	ir_startino;	/* starting inode number */ -	__int32_t	ir_freecount;	/* count of free inodes (set bits) */ -	xfs_inofree_t	ir_free;	/* free inode mask */ -} xfs_inobt_rec_incore_t; - - -/* - * Key structure - */ -typedef struct xfs_inobt_key { -	__be32		ir_startino;	/* starting inode number */ -} xfs_inobt_key_t; - -/* btree pointer type */ -typedef __be32 xfs_inobt_ptr_t; - -/* - * block numbers in the AG. - */ -#define	XFS_IBT_BLOCK(mp)		((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) -#define	XFS_PREALLOC_BLOCKS(mp)		((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) - -/*   * Btree block header size depends on a superblock flag.   */  #define XFS_INOBT_BLOCK_LEN(mp) \ @@ -107,9 +58,8 @@ typedef __be32 xfs_inobt_ptr_t;  		 ((index) - 1) * sizeof(xfs_inobt_ptr_t)))  extern struct xfs_btree_cur *xfs_inobt_init_cursor(struct xfs_mount *, -		struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t); +		struct xfs_trans *, struct xfs_buf *, xfs_agnumber_t, +		xfs_btnum_t);  extern int xfs_inobt_maxrecs(struct xfs_mount *, int, int); -extern const struct xfs_buf_ops xfs_inobt_buf_ops; -  #endif	/* __XFS_IALLOC_BTREE_H__ */ diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 474807a401c..c48df5f25b9 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -18,24 +18,19 @@  #include "xfs.h"  #include "xfs_fs.h"  #include "xfs_format.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_log_priv.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h" -#include "xfs_dinode.h"  #include "xfs_error.h" -#include "xfs_filestream.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h"  #include "xfs_inode_item.h"  #include "xfs_quota.h"  #include "xfs_trace.h" -#include "xfs_fsops.h"  #include "xfs_icache.h"  #include "xfs_bmap_util.h" @@ -500,11 +495,6 @@ xfs_inode_ag_walk_grab(  	if (!igrab(inode))  		return ENOENT; -	if (is_bad_inode(inode)) { -		IRELE(ip); -		return ENOENT; -	} -  	/* inode is valid */  	return 0; @@ -517,8 +507,7 @@ STATIC int  xfs_inode_ag_walk(  	struct xfs_mount	*mp,  	struct xfs_perag	*pag, -	int			(*execute)(struct xfs_inode *ip, -					   struct xfs_perag *pag, int flags, +	int			(*execute)(struct xfs_inode *ip, int flags,  					   void *args),  	int			flags,  	void			*args, @@ -592,7 +581,7 @@ restart:  		for (i = 0; i < nr_found; i++) {  			if (!batch[i])  				continue; -			error = execute(batch[i], pag, flags, args); +			error = execute(batch[i], flags, args);  			IRELE(batch[i]);  			if (error == EAGAIN) {  				skipped++; @@ -646,8 +635,7 @@ xfs_eofblocks_worker(  int  xfs_inode_ag_iterator(  	struct xfs_mount	*mp, -	int			(*execute)(struct xfs_inode *ip, -					   struct xfs_perag *pag, int flags, +	int			(*execute)(struct xfs_inode *ip, int flags,  					   void *args),  	int			flags,  	void			*args) @@ -674,8 +662,7 @@ xfs_inode_ag_iterator(  int  xfs_inode_ag_iterator_tag(  	struct xfs_mount	*mp, -	int			(*execute)(struct xfs_inode *ip, -					   struct xfs_perag *pag, int flags, +	int			(*execute)(struct xfs_inode *ip, int flags,  					   void *args),  	int			flags,  	void			*args, @@ -918,8 +905,6 @@ restart:  		xfs_iflock(ip);  	} -	if (is_bad_inode(VFS_I(ip))) -		goto reclaim;  	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {  		xfs_iunpin_wait(ip);  		xfs_iflush_abort(ip, false); @@ -1221,7 +1206,6 @@ xfs_inode_match_id(  STATIC int  xfs_inode_free_eofblocks(  	struct xfs_inode	*ip, -	struct xfs_perag	*pag,  	int			flags,  	void			*args)  { diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index 9ed68bb750f..9cf017b899b 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -60,12 +60,10 @@ int xfs_icache_free_eofblocks(struct xfs_mount *, struct xfs_eofblocks *);  void xfs_eofblocks_worker(struct work_struct *);  int xfs_inode_ag_iterator(struct xfs_mount *mp, -	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, -		int flags, void *args), +	int (*execute)(struct xfs_inode *ip, int flags, void *args),  	int flags, void *args);  int xfs_inode_ag_iterator_tag(struct xfs_mount *mp, -	int (*execute)(struct xfs_inode *ip, struct xfs_perag *pag, -		int flags, void *args), +	int (*execute)(struct xfs_inode *ip, int flags, void *args),  	int flags, void *args, int tag);  static inline int diff --git a/fs/xfs/xfs_icreate_item.c b/fs/xfs/xfs_icreate_item.c index 5a5a593994d..7e454923325 100644 --- a/fs/xfs/xfs_icreate_item.c +++ b/fs/xfs/xfs_icreate_item.c @@ -17,16 +17,18 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_trans.h"  #include "xfs_trans_priv.h"  #include "xfs_error.h"  #include "xfs_icreate_item.h" +#include "xfs_log.h"  kmem_zone_t	*xfs_icreate_zone;		/* inode create item zone */ @@ -57,13 +59,14 @@ xfs_icreate_item_size(  STATIC void  xfs_icreate_item_format(  	struct xfs_log_item	*lip, -	struct xfs_log_iovec	*log_vector) +	struct xfs_log_vec	*lv)  {  	struct xfs_icreate_item	*icp = ICR_ITEM(lip); +	struct xfs_log_iovec	*vecp = NULL; -	log_vector->i_addr = (xfs_caddr_t)&icp->ic_format; -	log_vector->i_len  = sizeof(struct xfs_icreate_log); -	log_vector->i_type = XLOG_REG_TYPE_ICREATE; +	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE, +			&icp->ic_format, +			sizeof(struct xfs_icreate_log));  } diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index e3d75385aa7..a6115fe1ac9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -19,29 +19,24 @@  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_space.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h"  #include "xfs_attr_sf.h"  #include "xfs_attr.h" -#include "xfs_dinode.h" -#include "xfs_inode.h" +#include "xfs_trans_space.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_inode_item.h" -#include "xfs_btree.h" -#include "xfs_alloc.h"  #include "xfs_ialloc.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" @@ -52,6 +47,9 @@  #include "xfs_trace.h"  #include "xfs_icache.h"  #include "xfs_symlink.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" +#include "xfs_bmap_btree.h"  kmem_zone_t *xfs_inode_zone; @@ -63,6 +61,8 @@ kmem_zone_t *xfs_inode_zone;  STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *); +STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *); +  /*   * helper function to extract extent size hint from inode   */ @@ -78,48 +78,44 @@ xfs_get_extsz_hint(  }  /* - * This is a wrapper routine around the xfs_ilock() routine used to centralize - * some grungy code.  It is used in places that wish to lock the inode solely - * for reading the extents.  The reason these places can't just call - * xfs_ilock(SHARED) is that the inode lock also guards to bringing in of the - * extents from disk for a file in b-tree format.  If the inode is in b-tree - * format, then we need to lock the inode exclusively until the extents are read - * in.  Locking it exclusively all the time would limit our parallelism - * unnecessarily, though.  What we do instead is check to see if the extents - * have been read in yet, and only lock the inode exclusively if they have not. + * These two are wrapper routines around the xfs_ilock() routine used to + * centralize some grungy code.  They are used in places that wish to lock the + * inode solely for reading the extents.  The reason these places can't just + * call xfs_ilock(ip, XFS_ILOCK_SHARED) is that the inode lock also guards to + * bringing in of the extents from disk for a file in b-tree format.  If the + * inode is in b-tree format, then we need to lock the inode exclusively until + * the extents are read in.  Locking it exclusively all the time would limit + * our parallelism unnecessarily, though.  What we do instead is check to see + * if the extents have been read in yet, and only lock the inode exclusively + * if they have not.   * - * The function returns a value which should be given to the corresponding - * xfs_iunlock_map_shared().  This value is the mode in which the lock was - * actually taken. + * The functions return a value which should be given to the corresponding + * xfs_iunlock() call.   */  uint -xfs_ilock_map_shared( -	xfs_inode_t	*ip) +xfs_ilock_data_map_shared( +	struct xfs_inode	*ip)  { -	uint	lock_mode; +	uint			lock_mode = XFS_ILOCK_SHARED; -	if ((ip->i_d.di_format == XFS_DINODE_FMT_BTREE) && -	    ((ip->i_df.if_flags & XFS_IFEXTENTS) == 0)) { +	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE && +	    (ip->i_df.if_flags & XFS_IFEXTENTS) == 0)  		lock_mode = XFS_ILOCK_EXCL; -	} else { -		lock_mode = XFS_ILOCK_SHARED; -	} -  	xfs_ilock(ip, lock_mode); -  	return lock_mode;  } -/* - * This is simply the unlock routine to go with xfs_ilock_map_shared(). - * All it does is call xfs_iunlock() with the given lock_mode. - */ -void -xfs_iunlock_map_shared( -	xfs_inode_t	*ip, -	unsigned int	lock_mode) +uint +xfs_ilock_attr_map_shared( +	struct xfs_inode	*ip)  { -	xfs_iunlock(ip, lock_mode); +	uint			lock_mode = XFS_ILOCK_SHARED; + +	if (ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE && +	    (ip->i_afp->if_flags & XFS_IFEXTENTS) == 0) +		lock_mode = XFS_ILOCK_EXCL; +	xfs_ilock(ip, lock_mode); +	return lock_mode;  }  /* @@ -589,9 +585,9 @@ xfs_lookup(  	if (XFS_FORCED_SHUTDOWN(dp->i_mount))  		return XFS_ERROR(EIO); -	lock_mode = xfs_ilock_map_shared(dp); +	lock_mode = xfs_ilock_data_map_shared(dp);  	error = xfs_dir_lookup(NULL, dp, name, &inum, ci_name); -	xfs_iunlock_map_shared(dp, lock_mode); +	xfs_iunlock(dp, lock_mode);  	if (error)  		goto out; @@ -659,7 +655,6 @@ xfs_ialloc(  	uint		flags;  	int		error;  	timespec_t	tv; -	int		filestreams = 0;  	/*  	 * Call the space management code to pick @@ -686,6 +681,14 @@ xfs_ialloc(  		return error;  	ASSERT(ip != NULL); +	/* +	 * We always convert v1 inodes to v2 now - we only support filesystems +	 * with >= v2 inode capability, so there is no reason for ever leaving +	 * an inode in v1 format. +	 */ +	if (ip->i_d.di_version == 1) +		ip->i_d.di_version = 2; +  	ip->i_d.di_mode = mode;  	ip->i_d.di_onlink = 0;  	ip->i_d.di_nlink = nlink; @@ -695,27 +698,6 @@ xfs_ialloc(  	xfs_set_projid(ip, prid);  	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); -	/* -	 * If the superblock version is up to where we support new format -	 * inodes and this is currently an old format inode, then change -	 * the inode version number now.  This way we only do the conversion -	 * here rather than here and in the flush/logging code. -	 */ -	if (xfs_sb_version_hasnlink(&mp->m_sb) && -	    ip->i_d.di_version == 1) { -		ip->i_d.di_version = 2; -		/* -		 * We've already zeroed the old link count, the projid field, -		 * and the pad field. -		 */ -	} - -	/* -	 * Project ids won't be stored on disk if we are using a version 1 inode. -	 */ -	if ((prid != 0) && (ip->i_d.di_version == 1)) -		xfs_bump_ino_vers2(tp, ip); -  	if (pip && XFS_INHERIT_GID(pip)) {  		ip->i_d.di_gid = pip->i_d.di_gid;  		if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) { @@ -776,13 +758,6 @@ xfs_ialloc(  		flags |= XFS_ILOG_DEV;  		break;  	case S_IFREG: -		/* -		 * we can't set up filestreams until after the VFS inode -		 * is set up properly. -		 */ -		if (pip && xfs_inode_is_filestream(pip)) -			filestreams = 1; -		/* fall through */  	case S_IFDIR:  		if (pip && (pip->i_d.di_flags & XFS_DIFLAG_ANY)) {  			uint	di_flags = 0; @@ -848,15 +823,6 @@ xfs_ialloc(  	/* now that we have an i_mode we can setup inode ops and unlock */  	xfs_setup_inode(ip); -	/* now we have set up the vfs inode we can associate the filestream */ -	if (filestreams) { -		error = xfs_filestream_associate(pip, ip); -		if (error < 0) -			return -error; -		if (!error) -			xfs_iflags_set(ip, XFS_IFILESTREAM); -	} -  	*ipp = ip;  	return 0;  } @@ -1077,40 +1043,6 @@ xfs_droplink(  }  /* - * This gets called when the inode's version needs to be changed from 1 to 2. - * Currently this happens when the nlink field overflows the old 16-bit value - * or when chproj is called to change the project for the first time. - * As a side effect the superblock version will also get rev'd - * to contain the NLINK bit. - */ -void -xfs_bump_ino_vers2( -	xfs_trans_t	*tp, -	xfs_inode_t	*ip) -{ -	xfs_mount_t	*mp; - -	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	ASSERT(ip->i_d.di_version == 1); - -	ip->i_d.di_version = 2; -	ip->i_d.di_onlink = 0; -	memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); -	mp = tp->t_mountp; -	if (!xfs_sb_version_hasnlink(&mp->m_sb)) { -		spin_lock(&mp->m_sb_lock); -		if (!xfs_sb_version_hasnlink(&mp->m_sb)) { -			xfs_sb_version_addnlink(&mp->m_sb); -			spin_unlock(&mp->m_sb_lock); -			xfs_mod_sb(tp, XFS_SB_VERSIONNUM); -		} else { -			spin_unlock(&mp->m_sb_lock); -		} -	} -	/* Caller must log the inode */ -} - -/*   * Increment the link count on an inode & log the change.   */  int @@ -1120,22 +1052,10 @@ xfs_bumplink(  {  	xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG); -	ASSERT(ip->i_d.di_nlink > 0); +	ASSERT(ip->i_d.di_version > 1); +	ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));  	ip->i_d.di_nlink++;  	inc_nlink(VFS_I(ip)); -	if ((ip->i_d.di_version == 1) && -	    (ip->i_d.di_nlink > XFS_MAXLINK_1)) { -		/* -		 * The inode has increased its number of links beyond -		 * what can fit in an old format inode.  It now needs -		 * to be converted to a version 2 inode with a 32 bit -		 * link count.  If this is the first inode in the file -		 * system to do this, then we need to bump the superblock -		 * version number as well. -		 */ -		xfs_bump_ino_vers2(tp, ip); -	} -  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);  	return 0;  } @@ -1170,10 +1090,7 @@ xfs_create(  	if (XFS_FORCED_SHUTDOWN(mp))  		return XFS_ERROR(EIO); -	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) -		prid = xfs_get_projid(dp); -	else -		prid = XFS_PROJID_DEFAULT; +	prid = xfs_get_initial_prid(dp);  	/*  	 * Make sure that we have allocated dquot(s) on disk. @@ -1338,6 +1255,114 @@ xfs_create(  }  int +xfs_create_tmpfile( +	struct xfs_inode	*dp, +	struct dentry		*dentry, +	umode_t			mode, +	struct xfs_inode	**ipp) +{ +	struct xfs_mount	*mp = dp->i_mount; +	struct xfs_inode	*ip = NULL; +	struct xfs_trans	*tp = NULL; +	int			error; +	uint			cancel_flags = XFS_TRANS_RELEASE_LOG_RES; +	prid_t                  prid; +	struct xfs_dquot	*udqp = NULL; +	struct xfs_dquot	*gdqp = NULL; +	struct xfs_dquot	*pdqp = NULL; +	struct xfs_trans_res	*tres; +	uint			resblks; + +	if (XFS_FORCED_SHUTDOWN(mp)) +		return XFS_ERROR(EIO); + +	prid = xfs_get_initial_prid(dp); + +	/* +	 * Make sure that we have allocated dquot(s) on disk. +	 */ +	error = xfs_qm_vop_dqalloc(dp, xfs_kuid_to_uid(current_fsuid()), +				xfs_kgid_to_gid(current_fsgid()), prid, +				XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, +				&udqp, &gdqp, &pdqp); +	if (error) +		return error; + +	resblks = XFS_IALLOC_SPACE_RES(mp); +	tp = xfs_trans_alloc(mp, XFS_TRANS_CREATE_TMPFILE); + +	tres = &M_RES(mp)->tr_create_tmpfile; +	error = xfs_trans_reserve(tp, tres, resblks, 0); +	if (error == ENOSPC) { +		/* No space at all so try a "no-allocation" reservation */ +		resblks = 0; +		error = xfs_trans_reserve(tp, tres, 0, 0); +	} +	if (error) { +		cancel_flags = 0; +		goto out_trans_cancel; +	} + +	error = xfs_trans_reserve_quota(tp, mp, udqp, gdqp, +						pdqp, resblks, 1, 0); +	if (error) +		goto out_trans_cancel; + +	error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, +				prid, resblks > 0, &ip, NULL); +	if (error) { +		if (error == ENOSPC) +			goto out_trans_cancel; +		goto out_trans_abort; +	} + +	if (mp->m_flags & XFS_MOUNT_WSYNC) +		xfs_trans_set_sync(tp); + +	/* +	 * Attach the dquot(s) to the inodes and modify them incore. +	 * These ids of the inode couldn't have changed since the new +	 * inode has been locked ever since it was created. +	 */ +	xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp); + +	ip->i_d.di_nlink--; +	error = xfs_iunlink(tp, ip); +	if (error) +		goto out_trans_abort; + +	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +	if (error) +		goto out_release_inode; + +	xfs_qm_dqrele(udqp); +	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp); + +	*ipp = ip; +	return 0; + + out_trans_abort: +	cancel_flags |= XFS_TRANS_ABORT; + out_trans_cancel: +	xfs_trans_cancel(tp, cancel_flags); + out_release_inode: +	/* +	 * Wait until after the current transaction is aborted to +	 * release the inode.  This prevents recursive transactions +	 * and deadlocks from xfs_inactive. +	 */ +	if (ip) +		IRELE(ip); + +	xfs_qm_dqrele(udqp); +	xfs_qm_dqrele(gdqp); +	xfs_qm_dqrele(pdqp); + +	return error; +} + +int  xfs_link(  	xfs_inode_t		*tdp,  	xfs_inode_t		*sip, @@ -1402,6 +1427,12 @@ xfs_link(  	xfs_bmap_init(&free_list, &first_block); +	if (sip->i_d.di_nlink == 0) { +		error = xfs_iunlink_remove(tp, sip); +		if (error) +			goto abort_return; +	} +  	error = xfs_dir_createname(tp, tdp, target_name, sip->i_ino,  					&first_block, &free_list, resblks);  	if (error) @@ -1592,16 +1623,6 @@ xfs_release(  		int truncated;  		/* -		 * If we are using filestreams, and we have an unlinked -		 * file that we are processing the last close on, then nothing -		 * will be able to reopen and write to this file. Purge this -		 * inode from the filestreams cache so that it doesn't delay -		 * teardown of the inode. -		 */ -		if ((ip->i_d.di_nlink == 0) && xfs_inode_is_filestream(ip)) -			xfs_filestream_deassociate(ip); - -		/*  		 * If we previously truncated this file and removed old data  		 * in the process, we want to initiate "early" writeout on  		 * the last close.  This is an attempt to combat the notorious @@ -1663,6 +1684,150 @@ xfs_release(  }  /* + * xfs_inactive_truncate + * + * Called to perform a truncate when an inode becomes unlinked. + */ +STATIC int +xfs_inactive_truncate( +	struct xfs_inode *ip) +{ +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_trans	*tp; +	int			error; + +	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); +	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); +	if (error) { +		ASSERT(XFS_FORCED_SHUTDOWN(mp)); +		xfs_trans_cancel(tp, 0); +		return error; +	} + +	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, 0); + +	/* +	 * Log the inode size first to prevent stale data exposure in the event +	 * of a system crash before the truncate completes. See the related +	 * comment in xfs_setattr_size() for details. +	 */ +	ip->i_d.di_size = 0; +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); + +	error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); +	if (error) +		goto error_trans_cancel; + +	ASSERT(ip->i_d.di_nextents == 0); + +	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +	if (error) +		goto error_unlock; + +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	return 0; + +error_trans_cancel: +	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); +error_unlock: +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	return error; +} + +/* + * xfs_inactive_ifree() + * + * Perform the inode free when an inode is unlinked. + */ +STATIC int +xfs_inactive_ifree( +	struct xfs_inode *ip) +{ +	xfs_bmap_free_t		free_list; +	xfs_fsblock_t		first_block; +	int			committed; +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_trans	*tp; +	int			error; + +	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); + +	/* +	 * The ifree transaction might need to allocate blocks for record +	 * insertion to the finobt. We don't want to fail here at ENOSPC, so +	 * allow ifree to dip into the reserved block pool if necessary. +	 * +	 * Freeing large sets of inodes generally means freeing inode chunks, +	 * directory and file data blocks, so this should be relatively safe. +	 * Only under severe circumstances should it be possible to free enough +	 * inodes to exhaust the reserve block pool via finobt expansion while +	 * at the same time not creating free space in the filesystem. +	 * +	 * Send a warning if the reservation does happen to fail, as the inode +	 * now remains allocated and sits on the unlinked list until the fs is +	 * repaired. +	 */ +	tp->t_flags |= XFS_TRANS_RESERVE; +	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, +				  XFS_IFREE_SPACE_RES(mp), 0); +	if (error) { +		if (error == ENOSPC) { +			xfs_warn_ratelimited(mp, +			"Failed to remove inode(s) from unlinked list. " +			"Please free space, unmount and run xfs_repair."); +		} else { +			ASSERT(XFS_FORCED_SHUTDOWN(mp)); +		} +		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES); +		return error; +	} + +	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, 0); + +	xfs_bmap_init(&free_list, &first_block); +	error = xfs_ifree(tp, ip, &free_list); +	if (error) { +		/* +		 * If we fail to free the inode, shut down.  The cancel +		 * might do that, we need to make sure.  Otherwise the +		 * inode might be lost for a long time or forever. +		 */ +		if (!XFS_FORCED_SHUTDOWN(mp)) { +			xfs_notice(mp, "%s: xfs_ifree returned error %d", +				__func__, error); +			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); +		} +		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); +		xfs_iunlock(ip, XFS_ILOCK_EXCL); +		return error; +	} + +	/* +	 * Credit the quota account(s). The inode is gone. +	 */ +	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); + +	/* +	 * Just ignore errors at this point.  There is nothing we can +	 * do except to try to keep going. Make sure it's not a silent +	 * error. +	 */ +	error = xfs_bmap_finish(&tp,  &free_list, &committed); +	if (error) +		xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", +			__func__, error); +	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +	if (error) +		xfs_notice(mp, "%s: xfs_trans_commit returned error %d", +			__func__, error); + +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +	return 0; +} + +/*   * xfs_inactive   *   * This is called when the vnode reference count for the vnode @@ -1670,16 +1835,11 @@ xfs_release(   * now be truncated.  Also, we clear all of the read-ahead state   * kept for the inode here since the file is now closed.   */ -int +void  xfs_inactive(  	xfs_inode_t	*ip)  { -	xfs_bmap_free_t		free_list; -	xfs_fsblock_t		first_block; -	int			committed; -	struct xfs_trans	*tp;  	struct xfs_mount	*mp; -	struct xfs_trans_res	*resp;  	int			error;  	int			truncate = 0; @@ -1687,19 +1847,17 @@ xfs_inactive(  	 * If the inode is already free, then there can be nothing  	 * to clean up here.  	 */ -	if (ip->i_d.di_mode == 0 || is_bad_inode(VFS_I(ip))) { +	if (ip->i_d.di_mode == 0) {  		ASSERT(ip->i_df.if_real_bytes == 0);  		ASSERT(ip->i_df.if_broot_bytes == 0); -		return VN_INACTIVE_CACHE; +		return;  	}  	mp = ip->i_mount; -	error = 0; -  	/* If this is a read-only mount, don't do this (would generate I/O) */  	if (mp->m_flags & XFS_MOUNT_RDONLY) -		goto out; +		return;  	if (ip->i_d.di_nlink != 0) {  		/* @@ -1707,12 +1865,10 @@ xfs_inactive(  		 * cache. Post-eof blocks must be freed, lest we end up with  		 * broken free space accounting.  		 */ -		if (xfs_can_free_eofblocks(ip, true)) { -			error = xfs_free_eofblocks(mp, ip, false); -			if (error) -				return VN_INACTIVE_CACHE; -		} -		goto out; +		if (xfs_can_free_eofblocks(ip, true)) +			xfs_free_eofblocks(mp, ip, false); + +		return;  	}  	if (S_ISREG(ip->i_d.di_mode) && @@ -1722,36 +1878,14 @@ xfs_inactive(  	error = xfs_qm_dqattach(ip, 0);  	if (error) -		return VN_INACTIVE_CACHE; - -	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); -	resp = (truncate || S_ISLNK(ip->i_d.di_mode)) ? -		&M_RES(mp)->tr_itruncate : &M_RES(mp)->tr_ifree; - -	error = xfs_trans_reserve(tp, resp, 0, 0); -	if (error) { -		ASSERT(XFS_FORCED_SHUTDOWN(mp)); -		xfs_trans_cancel(tp, 0); -		return VN_INACTIVE_CACHE; -	} - -	xfs_ilock(ip, XFS_ILOCK_EXCL); -	xfs_trans_ijoin(tp, ip, 0); - -	if (S_ISLNK(ip->i_d.di_mode)) { -		error = xfs_inactive_symlink(ip, &tp); -		if (error) -			goto out_cancel; -	} else if (truncate) { -		ip->i_d.di_size = 0; -		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +		return; -		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK, 0); -		if (error) -			goto out_cancel; - -		ASSERT(ip->i_d.di_nextents == 0); -	} +	if (S_ISLNK(ip->i_d.di_mode)) +		error = xfs_inactive_symlink(ip); +	else if (truncate) +		error = xfs_inactive_truncate(ip); +	if (error) +		return;  	/*  	 * If there are attributes associated with the file then blow them away @@ -1762,25 +1896,9 @@ xfs_inactive(  	if (ip->i_d.di_anextents > 0) {  		ASSERT(ip->i_d.di_forkoff != 0); -		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -		if (error) -			goto out_unlock; - -		xfs_iunlock(ip, XFS_ILOCK_EXCL); -  		error = xfs_attr_inactive(ip);  		if (error) -			goto out; - -		tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ifree, 0, 0); -		if (error) { -			xfs_trans_cancel(tp, 0); -			goto out; -		} - -		xfs_ilock(ip, XFS_ILOCK_EXCL); -		xfs_trans_ijoin(tp, ip, 0); +			return;  	}  	if (ip->i_afp) @@ -1791,52 +1909,14 @@ xfs_inactive(  	/*  	 * Free the inode.  	 */ -	xfs_bmap_init(&free_list, &first_block); -	error = xfs_ifree(tp, ip, &free_list); -	if (error) { -		/* -		 * If we fail to free the inode, shut down.  The cancel -		 * might do that, we need to make sure.  Otherwise the -		 * inode might be lost for a long time or forever. -		 */ -		if (!XFS_FORCED_SHUTDOWN(mp)) { -			xfs_notice(mp, "%s: xfs_ifree returned error %d", -				__func__, error); -			xfs_force_shutdown(mp, SHUTDOWN_META_IO_ERROR); -		} -		xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES|XFS_TRANS_ABORT); -	} else { -		/* -		 * Credit the quota account(s). The inode is gone. -		 */ -		xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1); - -		/* -		 * Just ignore errors at this point.  There is nothing we can -		 * do except to try to keep going. Make sure it's not a silent -		 * error. -		 */ -		error = xfs_bmap_finish(&tp,  &free_list, &committed); -		if (error) -			xfs_notice(mp, "%s: xfs_bmap_finish returned error %d", -				__func__, error); -		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -		if (error) -			xfs_notice(mp, "%s: xfs_trans_commit returned error %d", -				__func__, error); -	} +	error = xfs_inactive_ifree(ip); +	if (error) +		return;  	/*  	 * Release the dquots held by inode, if any.  	 */  	xfs_qm_dqdetach(ip); -out_unlock: -	xfs_iunlock(ip, XFS_ILOCK_EXCL); -out: -	return VN_INACTIVE_CACHE; -out_cancel: -	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); -	goto out_unlock;  }  /* @@ -2107,8 +2187,8 @@ xfs_ifree_cluster(  {  	xfs_mount_t		*mp = free_ip->i_mount;  	int			blks_per_cluster; +	int			inodes_per_cluster;  	int			nbufs; -	int			ninodes;  	int			i, j;  	xfs_daddr_t		blkno;  	xfs_buf_t		*bp; @@ -2118,18 +2198,11 @@ xfs_ifree_cluster(  	struct xfs_perag	*pag;  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, inum)); -	if (mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp)) { -		blks_per_cluster = 1; -		ninodes = mp->m_sb.sb_inopblock; -		nbufs = XFS_IALLOC_BLOCKS(mp); -	} else { -		blks_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) / -					mp->m_sb.sb_blocksize; -		ninodes = blks_per_cluster * mp->m_sb.sb_inopblock; -		nbufs = XFS_IALLOC_BLOCKS(mp) / blks_per_cluster; -	} +	blks_per_cluster = xfs_icluster_size_fsb(mp); +	inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog; +	nbufs = mp->m_ialloc_blks / blks_per_cluster; -	for (j = 0; j < nbufs; j++, inum += ninodes) { +	for (j = 0; j < nbufs; j++, inum += inodes_per_cluster) {  		blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),  					 XFS_INO_TO_AGBNO(mp, inum)); @@ -2191,7 +2264,7 @@ xfs_ifree_cluster(  		 * transaction stale above, which means there is no point in  		 * even trying to lock them.  		 */ -		for (i = 0; i < ninodes; i++) { +		for (i = 0; i < inodes_per_cluster; i++) {  retry:  			rcu_read_lock();  			ip = radix_tree_lookup(&pag->pag_ici_root, @@ -2370,6 +2443,33 @@ xfs_iunpin_wait(  		__xfs_iunpin_wait(ip);  } +/* + * Removing an inode from the namespace involves removing the directory entry + * and dropping the link count on the inode. Removing the directory entry can + * result in locking an AGF (directory blocks were freed) and removing a link + * count can result in placing the inode on an unlinked list which results in + * locking an AGI. + * + * The big problem here is that we have an ordering constraint on AGF and AGI + * locking - inode allocation locks the AGI, then can allocate a new extent for + * new inodes, locking the AGF after the AGI. Similarly, freeing the inode + * removes the inode from the unlinked list, requiring that we lock the AGI + * first, and then freeing the inode can result in an inode chunk being freed + * and hence freeing disk space requiring that we lock an AGF. + * + * Hence the ordering that is imposed by other parts of the code is AGI before + * AGF. This means we cannot remove the directory entry before we drop the inode + * reference count and put it on the unlinked list as this results in a lock + * order of AGF then AGI, and this can deadlock against inode allocation and + * freeing. Therefore we must drop the link counts before we remove the + * directory entry. + * + * This is still safe from a transactional point of view - it is not until we + * get to xfs_bmap_finish() that we have the possibility of multiple + * transactions in this operation. Hence as long as we remove the directory + * entry and drop the link count in the first transaction of the remove + * operation, there are no transactional constraints on the ordering here. + */  int  xfs_remove(  	xfs_inode_t             *dp, @@ -2439,6 +2539,7 @@ xfs_remove(  	/*  	 * If we're removing a directory perform some additional validation.  	 */ +	cancel_flags |= XFS_TRANS_ABORT;  	if (is_dir) {  		ASSERT(ip->i_d.di_nlink >= 2);  		if (ip->i_d.di_nlink != 2) { @@ -2449,31 +2550,16 @@ xfs_remove(  			error = XFS_ERROR(ENOTEMPTY);  			goto out_trans_cancel;  		} -	} - -	xfs_bmap_init(&free_list, &first_block); -	error = xfs_dir_removename(tp, dp, name, ip->i_ino, -					&first_block, &free_list, resblks); -	if (error) { -		ASSERT(error != ENOENT); -		goto out_bmap_cancel; -	} -	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); -	if (is_dir) { -		/* -		 * Drop the link from ip's "..". -		 */ +		/* Drop the link from ip's "..".  */  		error = xfs_droplink(tp, dp);  		if (error) -			goto out_bmap_cancel; +			goto out_trans_cancel; -		/* -		 * Drop the "." link from ip to self. -		 */ +		/* Drop the "." link from ip to self.  */  		error = xfs_droplink(tp, ip);  		if (error) -			goto out_bmap_cancel; +			goto out_trans_cancel;  	} else {  		/*  		 * When removing a non-directory we need to log the parent @@ -2482,20 +2568,24 @@ xfs_remove(  		 */  		xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);  	} +	xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); -	/* -	 * Drop the link from dp to ip. -	 */ +	/* Drop the link from dp to ip. */  	error = xfs_droplink(tp, ip);  	if (error) -		goto out_bmap_cancel; +		goto out_trans_cancel; -	/* -	 * Determine if this is the last link while -	 * we are in the transaction. -	 */ +	/* Determine if this is the last link while the inode is locked */  	link_zero = (ip->i_d.di_nlink == 0); +	xfs_bmap_init(&free_list, &first_block); +	error = xfs_dir_removename(tp, dp, name, ip->i_ino, +					&first_block, &free_list, resblks); +	if (error) { +		ASSERT(error != ENOENT); +		goto out_bmap_cancel; +	} +  	/*  	 * If this is a synchronous mount, make sure that the  	 * remove transaction goes to disk before returning to @@ -2512,20 +2602,13 @@ xfs_remove(  	if (error)  		goto std_return; -	/* -	 * If we are using filestreams, kill the stream association. -	 * If the file is still open it may get a new one but that -	 * will get killed on last close in xfs_close() so we don't -	 * have to worry about that. -	 */ -	if (!is_dir && link_zero && xfs_inode_is_filestream(ip)) +	if (is_dir && xfs_inode_is_filestream(ip))  		xfs_filestream_deassociate(ip);  	return 0;   out_bmap_cancel:  	xfs_bmap_cancel(&free_list); -	cancel_flags |= XFS_TRANS_ABORT;   out_trans_cancel:  	xfs_trans_cancel(tp, cancel_flags);   std_return: @@ -2856,13 +2939,13 @@ xfs_iflush_cluster(  	pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); -	inodes_per_cluster = XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog; +	inodes_per_cluster = mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog;  	ilist_size = inodes_per_cluster * sizeof(xfs_inode_t *);  	ilist = kmem_alloc(ilist_size, KM_MAYFAIL|KM_NOFS);  	if (!ilist)  		goto out_put; -	mask = ~(((XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog)) - 1); +	mask = ~(((mp->m_inode_cluster_size >> mp->m_sb.sb_inodelog)) - 1);  	first_index = XFS_INO_TO_AGINO(mp, ip->i_ino) & mask;  	rcu_read_lock();  	/* really need a gang lookup range call here */ @@ -3107,6 +3190,7 @@ xfs_iflush_int(  	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||  	       ip->i_d.di_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK));  	ASSERT(iip != NULL && iip->ili_fields != 0); +	ASSERT(ip->i_d.di_version > 1);  	/* set *dip = inode's place in the buffer */  	dip = (xfs_dinode_t *)xfs_buf_offset(bp, ip->i_imap.im_boffset); @@ -3167,7 +3251,7 @@ xfs_iflush_int(  	}  	/* -	 * Inode item log recovery for v1/v2 inodes are dependent on the +	 * Inode item log recovery for v2 inodes are dependent on the  	 * di_flushiter count for correct sequencing. We bump the flush  	 * iteration count so we can detect flushes which postdate a log record  	 * during recovery. This is redundant as we now log every change and @@ -3190,40 +3274,9 @@ xfs_iflush_int(  	if (ip->i_d.di_flushiter == DI_MAX_FLUSH)  		ip->i_d.di_flushiter = 0; -	/* -	 * If this is really an old format inode and the superblock version -	 * has not been updated to support only new format inodes, then -	 * convert back to the old inode format.  If the superblock version -	 * has been updated, then make the conversion permanent. -	 */ -	ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); -	if (ip->i_d.di_version == 1) { -		if (!xfs_sb_version_hasnlink(&mp->m_sb)) { -			/* -			 * Convert it back. -			 */ -			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); -			dip->di_onlink = cpu_to_be16(ip->i_d.di_nlink); -		} else { -			/* -			 * The superblock version has already been bumped, -			 * so just make the conversion to the new inode -			 * format permanent. -			 */ -			ip->i_d.di_version = 2; -			dip->di_version = 2; -			ip->i_d.di_onlink = 0; -			dip->di_onlink = 0; -			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); -			memset(&(dip->di_pad[0]), 0, -			      sizeof(dip->di_pad)); -			ASSERT(xfs_get_projid(ip) == 0); -		} -	} - -	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK, bp); +	xfs_iflush_fork(ip, dip, iip, XFS_DATA_FORK);  	if (XFS_IFORK_Q(ip)) -		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK, bp); +		xfs_iflush_fork(ip, dip, iip, XFS_ATTR_FORK);  	xfs_inobp_check(mp, bp);  	/* diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 4a91358c147..f72bffa6726 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -20,11 +20,11 @@  #include "xfs_inode_buf.h"  #include "xfs_inode_fork.h" +#include "xfs_dinode.h"  /*   * Kernel only inode definitions   */ -  struct xfs_dinode;  struct xfs_inode;  struct xfs_buf; @@ -50,6 +50,9 @@ typedef struct xfs_inode {  	xfs_ifork_t		*i_afp;		/* attribute fork pointer */  	xfs_ifork_t		i_df;		/* data fork */ +	/* operations vectors */ +	const struct xfs_dir_ops *d_ops;		/* directory ops vector */ +  	/* Transaction and locking information. */  	struct xfs_inode_log_item *i_itemp;	/* logging information */  	mrlock_t		i_lock;		/* inode lock */ @@ -190,6 +193,15 @@ xfs_set_projid(struct xfs_inode *ip,  	ip->i_d.di_projid_lo = (__uint16_t) (projid & 0xffff);  } +static inline prid_t +xfs_get_initial_prid(struct xfs_inode *dp) +{ +	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) +		return xfs_get_projid(dp); + +	return XFS_PROJID_DEFAULT; +} +  /*   * In-core inode flags.   */ @@ -197,7 +209,6 @@ xfs_set_projid(struct xfs_inode *ip,  #define XFS_ISTALE		(1 << 1) /* inode has been staled */  #define XFS_IRECLAIMABLE	(1 << 2) /* inode can be reclaimed */  #define XFS_INEW		(1 << 3) /* inode has just been allocated */ -#define XFS_IFILESTREAM		(1 << 4) /* inode is in a filestream dir. */  #define XFS_ITRUNCATED		(1 << 5) /* truncated down so flush-on-close */  #define XFS_IDIRTY_RELEASE	(1 << 6) /* dirty release already seen */  #define __XFS_IFLOCK_BIT	7	 /* inode is being flushed right now */ @@ -213,8 +224,7 @@ xfs_set_projid(struct xfs_inode *ip,   */  #define XFS_IRECLAIM_RESET_FLAGS	\  	(XFS_IRECLAIMABLE | XFS_IRECLAIM | \ -	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED | \ -	 XFS_IFILESTREAM); +	 XFS_IDIRTY_RELEASE | XFS_ITRUNCATED)  /*   * Synchronize processes attempting to flush the in-core inode back to disk. @@ -316,11 +326,13 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)  int		xfs_release(struct xfs_inode *ip); -int		xfs_inactive(struct xfs_inode *ip); +void		xfs_inactive(struct xfs_inode *ip);  int		xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,  			   struct xfs_inode **ipp, struct xfs_name *ci_name);  int		xfs_create(struct xfs_inode *dp, struct xfs_name *name,  			   umode_t mode, xfs_dev_t rdev, struct xfs_inode **ipp); +int		xfs_create_tmpfile(struct xfs_inode *dp, struct dentry *dentry, +			   umode_t mode, struct xfs_inode **ipp);  int		xfs_remove(struct xfs_inode *dp, struct xfs_name *name,  			   struct xfs_inode *ip);  int		xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, @@ -335,8 +347,8 @@ int		xfs_ilock_nowait(xfs_inode_t *, uint);  void		xfs_iunlock(xfs_inode_t *, uint);  void		xfs_ilock_demote(xfs_inode_t *, uint);  int		xfs_isilocked(xfs_inode_t *, uint); -uint		xfs_ilock_map_shared(xfs_inode_t *); -void		xfs_iunlock_map_shared(xfs_inode_t *, uint); +uint		xfs_ilock_data_map_shared(struct xfs_inode *); +uint		xfs_ilock_attr_map_shared(struct xfs_inode *);  int		xfs_ialloc(struct xfs_trans *, xfs_inode_t *, umode_t,  			   xfs_nlink_t, xfs_dev_t, prid_t, int,  			   struct xfs_buf **, xfs_inode_t **); @@ -365,7 +377,6 @@ int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,  			       struct xfs_inode **, int *);  int		xfs_droplink(struct xfs_trans *, struct xfs_inode *);  int		xfs_bumplink(struct xfs_trans *, struct xfs_inode *); -void		xfs_bump_ino_vers2(struct xfs_trans *, struct xfs_inode *);  /* from xfs_file.c */  int		xfs_zero_eof(struct xfs_inode *, xfs_off_t, xfs_fsize_t); diff --git a/fs/xfs/xfs_inode_buf.c b/fs/xfs/xfs_inode_buf.c index 63382d37f56..cb35ae41d4a 100644 --- a/fs/xfs/xfs_inode_buf.c +++ b/fs/xfs/xfs_inode_buf.c @@ -17,20 +17,20 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_error.h"  #include "xfs_cksum.h"  #include "xfs_icache.h" +#include "xfs_trans.h"  #include "xfs_ialloc.h" +#include "xfs_dinode.h"  /*   * Check that none of the inode's in the buffer have a next @@ -102,8 +102,7 @@ xfs_inode_buf_verify(  			}  			xfs_buf_ioerror(bp, EFSCORRUPTED); -			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH, -					     mp, dip); +			xfs_verifier_error(bp);  #ifdef DEBUG  			xfs_alert(mp,  				"bad inode magic/vsn daddr %lld #%d (magic=%x)", @@ -306,7 +305,7 @@ xfs_dinode_verify(  	if (!xfs_sb_version_hascrc(&mp->m_sb))  		return false;  	if (!xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, -			      offsetof(struct xfs_dinode, di_crc))) +			      XFS_DINODE_CRC_OFF))  		return false;  	if (be64_to_cpu(dip->di_ino) != ip->i_ino)  		return false; @@ -327,7 +326,7 @@ xfs_dinode_calc_crc(  	ASSERT(xfs_sb_version_hascrc(&mp->m_sb));  	crc = xfs_start_cksum((char *)dip, mp->m_sb.sb_inodesize, -			      offsetof(struct xfs_dinode, di_crc)); +			      XFS_DINODE_CRC_OFF);  	dip->di_crc = xfs_end_cksum(crc);  } @@ -438,17 +437,16 @@ xfs_iread(  	}  	/* -	 * The inode format changed when we moved the link count and -	 * made it 32 bits long.  If this is an old format inode, -	 * convert it in memory to look like a new one.  If it gets -	 * flushed to disk we will convert back before flushing or -	 * logging it.  We zero out the new projid field and the old link -	 * count field.  We'll handle clearing the pad field (the remains -	 * of the old uuid field) when we actually convert the inode to -	 * the new format. We don't change the version number so that we -	 * can distinguish this from a real new format inode. +	 * Automatically convert version 1 inode formats in memory to version 2 +	 * inode format. If the inode is modified, it will get logged and +	 * rewritten as a version 2 inode. We can do this because we set the +	 * superblock feature bit for v2 inodes unconditionally during mount +	 * and it means the reast of the code can assume the inode version is 2 +	 * or higher.  	 */  	if (ip->i_d.di_version == 1) { +		ip->i_d.di_version = 2; +		memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));  		ip->i_d.di_nlink = ip->i_d.di_onlink;  		ip->i_d.di_onlink = 0;  		xfs_set_projid(ip, 0); diff --git a/fs/xfs/xfs_inode_buf.h b/fs/xfs/xfs_inode_buf.h index abba0ae8cf2..9308c47f2a5 100644 --- a/fs/xfs/xfs_inode_buf.h +++ b/fs/xfs/xfs_inode_buf.h @@ -47,7 +47,4 @@ void	xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);  #define	xfs_inobp_check(mp, bp)  #endif /* DEBUG */ -extern const struct xfs_buf_ops xfs_inode_buf_ops; -extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; -  #endif	/* __XFS_INODE_BUF_H__ */ diff --git a/fs/xfs/xfs_inode_fork.c b/fs/xfs/xfs_inode_fork.c index 02f1083955b..b031e8d0d92 100644 --- a/fs/xfs/xfs_inode_fork.c +++ b/fs/xfs/xfs_inode_fork.c @@ -20,31 +20,21 @@  #include "xfs.h"  #include "xfs_fs.h"  #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h" -#include "xfs_btree.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" +#include "xfs_bmap_btree.h"  #include "xfs_bmap.h"  #include "xfs_error.h" -#include "xfs_quota.h" -#include "xfs_filestream.h" -#include "xfs_cksum.h"  #include "xfs_trace.h" -#include "xfs_icache.h" +#include "xfs_attr_sf.h" +#include "xfs_dinode.h"  kmem_zone_t *xfs_ifork_zone; @@ -441,6 +431,8 @@ xfs_iread_extents(  	xfs_ifork_t	*ifp;  	xfs_extnum_t	nextents; +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); +  	if (unlikely(XFS_IFORK_FORMAT(ip, whichfork) != XFS_DINODE_FMT_BTREE)) {  		XFS_ERROR_REPORT("xfs_iread_extents", XFS_ERRLEVEL_LOW,  				 ip->i_mount); @@ -731,15 +723,16 @@ xfs_idestroy_fork(  }  /* - * xfs_iextents_copy() + * Convert in-core extents to on-disk form   * - * This is called to copy the REAL extents (as opposed to the delayed - * allocation extents) from the inode into the given buffer.  It - * returns the number of bytes copied into the buffer. + * For either the data or attr fork in extent format, we need to endian convert + * the in-core extent as we place them into the on-disk inode.   * - * If there are no delayed allocation extents, then we can just - * memcpy() the extents into the buffer.  Otherwise, we need to - * examine each extent in turn and skip those which are delayed. + * In the case of the data fork, the in-core and on-disk fork sizes can be + * different due to delayed allocation extents. We only copy on-disk extents + * here, so callers must always use the physical fork size to determine the + * size of the buffer passed to this routine.  We will return the size actually + * used.   */  int  xfs_iextents_copy( @@ -805,8 +798,7 @@ xfs_iflush_fork(  	xfs_inode_t		*ip,  	xfs_dinode_t		*dip,  	xfs_inode_log_item_t	*iip, -	int			whichfork, -	xfs_buf_t		*bp) +	int			whichfork)  {  	char			*cp;  	xfs_ifork_t		*ifp; @@ -1031,15 +1023,14 @@ xfs_iext_add(  		 * the next index needed in the indirection array.  		 */  		else { -			int	count = ext_diff; +			uint	count = ext_diff;  			while (count) {  				erp = xfs_iext_irec_new(ifp, erp_idx); -				erp->er_extcount = count; -				count -= MIN(count, (int)XFS_LINEAR_EXTS); -				if (count) { +				erp->er_extcount = min(count, XFS_LINEAR_EXTS); +				count -= erp->er_extcount; +				if (count)  					erp_idx++; -				}  			}  		}  	} @@ -1359,7 +1350,7 @@ xfs_iext_remove_indirect(  void  xfs_iext_realloc_direct(  	xfs_ifork_t	*ifp,		/* inode fork pointer */ -	int		new_size)	/* new size of extents */ +	int		new_size)	/* new size of extents after adding */  {  	int		rnew_size;	/* real new size of extents */ @@ -1397,13 +1388,8 @@ xfs_iext_realloc_direct(  				rnew_size - ifp->if_real_bytes);  		}  	} -	/* -	 * Switch from the inline extent buffer to a direct -	 * extent list. Be sure to include the inline extent -	 * bytes in new_size. -	 */ +	/* Switch from the inline extent buffer to a direct extent list */  	else { -		new_size += ifp->if_bytes;  		if (!is_power_of_2(new_size)) {  			rnew_size = roundup_pow_of_two(new_size);  		} diff --git a/fs/xfs/xfs_inode_fork.h b/fs/xfs/xfs_inode_fork.h index 28661a0d905..7d3b1ed6dcb 100644 --- a/fs/xfs/xfs_inode_fork.h +++ b/fs/xfs/xfs_inode_fork.h @@ -19,6 +19,7 @@  #define	__XFS_INODE_FORK_H__  struct xfs_inode_log_item; +struct xfs_dinode;  /*   * The following xfs_ext_irec_t struct introduces a second (top) level @@ -126,8 +127,7 @@ typedef struct xfs_ifork {  int		xfs_iformat_fork(struct xfs_inode *, struct xfs_dinode *);  void		xfs_iflush_fork(struct xfs_inode *, struct xfs_dinode *, -				struct xfs_inode_log_item *, int, -				struct xfs_buf *); +				struct xfs_inode_log_item *, int);  void		xfs_idestroy_fork(struct xfs_inode *, int);  void		xfs_idata_realloc(struct xfs_inode *, int, int);  void		xfs_iroot_realloc(struct xfs_inode *, int, int); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index 37808110984..a640137b357 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -17,19 +17,20 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_trans_priv.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_trans_priv.h" +#include "xfs_dinode.h" +#include "xfs_log.h"  kmem_zone_t	*xfs_ili_zone;		/* inode log item zone */ @@ -39,27 +40,14 @@ static inline struct xfs_inode_log_item *INODE_ITEM(struct xfs_log_item *lip)  	return container_of(lip, struct xfs_inode_log_item, ili_item);  } - -/* - * This returns the number of iovecs needed to log the given inode item. - * - * We need one iovec for the inode log format structure, one for the - * inode core, and possibly one for the inode data/extents/b-tree root - * and one for the inode attribute data/extents/b-tree root. - */  STATIC void -xfs_inode_item_size( -	struct xfs_log_item	*lip, +xfs_inode_item_data_fork_size( +	struct xfs_inode_log_item *iip,  	int			*nvecs,  	int			*nbytes)  { -	struct xfs_inode_log_item *iip = INODE_ITEM(lip);  	struct xfs_inode	*ip = iip->ili_inode; -	*nvecs += 2; -	*nbytes += sizeof(struct xfs_inode_log_format) + -		   xfs_icdinode_size(ip->i_d.di_version); -  	switch (ip->i_d.di_format) {  	case XFS_DINODE_FMT_EXTENTS:  		if ((iip->ili_fields & XFS_ILOG_DEXT) && @@ -70,7 +58,6 @@ xfs_inode_item_size(  			*nvecs += 1;  		}  		break; -  	case XFS_DINODE_FMT_BTREE:  		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&  		    ip->i_df.if_broot_bytes > 0) { @@ -78,7 +65,6 @@ xfs_inode_item_size(  			*nvecs += 1;  		}  		break; -  	case XFS_DINODE_FMT_LOCAL:  		if ((iip->ili_fields & XFS_ILOG_DDATA) &&  		    ip->i_df.if_bytes > 0) { @@ -90,19 +76,20 @@ xfs_inode_item_size(  	case XFS_DINODE_FMT_DEV:  	case XFS_DINODE_FMT_UUID:  		break; -  	default:  		ASSERT(0);  		break;  	} +} -	if (!XFS_IFORK_Q(ip)) -		return; - +STATIC void +xfs_inode_item_attr_fork_size( +	struct xfs_inode_log_item *iip, +	int			*nvecs, +	int			*nbytes) +{ +	struct xfs_inode	*ip = iip->ili_inode; -	/* -	 * Log any necessary attribute data. -	 */  	switch (ip->i_d.di_aformat) {  	case XFS_DINODE_FMT_EXTENTS:  		if ((iip->ili_fields & XFS_ILOG_AEXT) && @@ -113,7 +100,6 @@ xfs_inode_item_size(  			*nvecs += 1;  		}  		break; -  	case XFS_DINODE_FMT_BTREE:  		if ((iip->ili_fields & XFS_ILOG_ABROOT) &&  		    ip->i_afp->if_broot_bytes > 0) { @@ -121,7 +107,6 @@ xfs_inode_item_size(  			*nvecs += 1;  		}  		break; -  	case XFS_DINODE_FMT_LOCAL:  		if ((iip->ili_fields & XFS_ILOG_ADATA) &&  		    ip->i_afp->if_bytes > 0) { @@ -129,7 +114,6 @@ xfs_inode_item_size(  			*nvecs += 1;  		}  		break; -  	default:  		ASSERT(0);  		break; @@ -137,98 +121,39 @@ xfs_inode_item_size(  }  /* - * xfs_inode_item_format_extents - convert in-core extents to on-disk form - * - * For either the data or attr fork in extent format, we need to endian convert - * the in-core extent as we place them into the on-disk inode. In this case, we - * need to do this conversion before we write the extents into the log. Because - * we don't have the disk inode to write into here, we allocate a buffer and - * format the extents into it via xfs_iextents_copy(). We free the buffer in - * the unlock routine after the copy for the log has been made. + * This returns the number of iovecs needed to log the given inode item.   * - * In the case of the data fork, the in-core and on-disk fork sizes can be - * different due to delayed allocation extents. We only log on-disk extents - * here, so always use the physical fork size to determine the size of the - * buffer we need to allocate. + * We need one iovec for the inode log format structure, one for the + * inode core, and possibly one for the inode data/extents/b-tree root + * and one for the inode attribute data/extents/b-tree root.   */  STATIC void -xfs_inode_item_format_extents( -	struct xfs_inode	*ip, -	struct xfs_log_iovec	*vecp, -	int			whichfork, -	int			type) +xfs_inode_item_size( +	struct xfs_log_item	*lip, +	int			*nvecs, +	int			*nbytes)  { -	xfs_bmbt_rec_t		*ext_buffer; +	struct xfs_inode_log_item *iip = INODE_ITEM(lip); +	struct xfs_inode	*ip = iip->ili_inode; -	ext_buffer = kmem_alloc(XFS_IFORK_SIZE(ip, whichfork), KM_SLEEP); -	if (whichfork == XFS_DATA_FORK) -		ip->i_itemp->ili_extents_buf = ext_buffer; -	else -		ip->i_itemp->ili_aextents_buf = ext_buffer; +	*nvecs += 2; +	*nbytes += sizeof(struct xfs_inode_log_format) + +		   xfs_icdinode_size(ip->i_d.di_version); -	vecp->i_addr = ext_buffer; -	vecp->i_len = xfs_iextents_copy(ip, ext_buffer, whichfork); -	vecp->i_type = type; +	xfs_inode_item_data_fork_size(iip, nvecs, nbytes); +	if (XFS_IFORK_Q(ip)) +		xfs_inode_item_attr_fork_size(iip, nvecs, nbytes);  } -/* - * This is called to fill in the vector of log iovecs for the - * given inode log item.  It fills the first item with an inode - * log format structure, the second with the on-disk inode structure, - * and a possible third and/or fourth with the inode data/extents/b-tree - * root and inode attributes data/extents/b-tree root. - */  STATIC void -xfs_inode_item_format( -	struct xfs_log_item	*lip, -	struct xfs_log_iovec	*vecp) +xfs_inode_item_format_data_fork( +	struct xfs_inode_log_item *iip, +	struct xfs_inode_log_format *ilf, +	struct xfs_log_vec	*lv, +	struct xfs_log_iovec	**vecp)  { -	struct xfs_inode_log_item *iip = INODE_ITEM(lip);  	struct xfs_inode	*ip = iip->ili_inode; -	uint			nvecs;  	size_t			data_bytes; -	xfs_mount_t		*mp; - -	vecp->i_addr = &iip->ili_format; -	vecp->i_len  = sizeof(xfs_inode_log_format_t); -	vecp->i_type = XLOG_REG_TYPE_IFORMAT; -	vecp++; -	nvecs	     = 1; - -	vecp->i_addr = &ip->i_d; -	vecp->i_len  = xfs_icdinode_size(ip->i_d.di_version); -	vecp->i_type = XLOG_REG_TYPE_ICORE; -	vecp++; -	nvecs++; - -	/* -	 * If this is really an old format inode, then we need to -	 * log it as such.  This means that we have to copy the link -	 * count from the new field to the old.  We don't have to worry -	 * about the new fields, because nothing trusts them as long as -	 * the old inode version number is there.  If the superblock already -	 * has a new version number, then we don't bother converting back. -	 */ -	mp = ip->i_mount; -	ASSERT(ip->i_d.di_version == 1 || xfs_sb_version_hasnlink(&mp->m_sb)); -	if (ip->i_d.di_version == 1) { -		if (!xfs_sb_version_hasnlink(&mp->m_sb)) { -			/* -			 * Convert it back. -			 */ -			ASSERT(ip->i_d.di_nlink <= XFS_MAXLINK_1); -			ip->i_d.di_onlink = ip->i_d.di_nlink; -		} else { -			/* -			 * The superblock version has already been bumped, -			 * so just make the conversion to the new inode -			 * format permanent. -			 */ -			ip->i_d.di_version = 2; -			ip->i_d.di_onlink = 0; -			memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad)); -		} -	}  	switch (ip->i_d.di_format) {  	case XFS_DINODE_FMT_EXTENTS: @@ -239,36 +164,23 @@ xfs_inode_item_format(  		if ((iip->ili_fields & XFS_ILOG_DEXT) &&  		    ip->i_d.di_nextents > 0 &&  		    ip->i_df.if_bytes > 0) { +			struct xfs_bmbt_rec *p; +  			ASSERT(ip->i_df.if_u1.if_extents != NULL);  			ASSERT(ip->i_df.if_bytes / sizeof(xfs_bmbt_rec_t) > 0); -			ASSERT(iip->ili_extents_buf == NULL); - -#ifdef XFS_NATIVE_HOST -                       if (ip->i_d.di_nextents == ip->i_df.if_bytes / -                                               (uint)sizeof(xfs_bmbt_rec_t)) { -				/* -				 * There are no delayed allocation -				 * extents, so just point to the -				 * real extents array. -				 */ -				vecp->i_addr = ip->i_df.if_u1.if_extents; -				vecp->i_len = ip->i_df.if_bytes; -				vecp->i_type = XLOG_REG_TYPE_IEXT; -			} else -#endif -			{ -				xfs_inode_item_format_extents(ip, vecp, -					XFS_DATA_FORK, XLOG_REG_TYPE_IEXT); -			} -			ASSERT(vecp->i_len <= ip->i_df.if_bytes); -			iip->ili_format.ilf_dsize = vecp->i_len; -			vecp++; -			nvecs++; + +			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IEXT); +			data_bytes = xfs_iextents_copy(ip, p, XFS_DATA_FORK); +			xlog_finish_iovec(lv, *vecp, data_bytes); + +			ASSERT(data_bytes <= ip->i_df.if_bytes); + +			ilf->ilf_dsize = data_bytes; +			ilf->ilf_size++;  		} else {  			iip->ili_fields &= ~XFS_ILOG_DEXT;  		}  		break; -  	case XFS_DINODE_FMT_BTREE:  		iip->ili_fields &=  			~(XFS_ILOG_DDATA | XFS_ILOG_DEXT | @@ -277,80 +189,70 @@ xfs_inode_item_format(  		if ((iip->ili_fields & XFS_ILOG_DBROOT) &&  		    ip->i_df.if_broot_bytes > 0) {  			ASSERT(ip->i_df.if_broot != NULL); -			vecp->i_addr = ip->i_df.if_broot; -			vecp->i_len = ip->i_df.if_broot_bytes; -			vecp->i_type = XLOG_REG_TYPE_IBROOT; -			vecp++; -			nvecs++; -			iip->ili_format.ilf_dsize = ip->i_df.if_broot_bytes; +			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IBROOT, +					ip->i_df.if_broot, +					ip->i_df.if_broot_bytes); +			ilf->ilf_dsize = ip->i_df.if_broot_bytes; +			ilf->ilf_size++;  		} else {  			ASSERT(!(iip->ili_fields &  				 XFS_ILOG_DBROOT));  			iip->ili_fields &= ~XFS_ILOG_DBROOT;  		}  		break; -  	case XFS_DINODE_FMT_LOCAL:  		iip->ili_fields &=  			~(XFS_ILOG_DEXT | XFS_ILOG_DBROOT |  			  XFS_ILOG_DEV | XFS_ILOG_UUID);  		if ((iip->ili_fields & XFS_ILOG_DDATA) &&  		    ip->i_df.if_bytes > 0) { -			ASSERT(ip->i_df.if_u1.if_data != NULL); -			ASSERT(ip->i_d.di_size > 0); - -			vecp->i_addr = ip->i_df.if_u1.if_data;  			/*  			 * Round i_bytes up to a word boundary.  			 * The underlying memory is guaranteed to  			 * to be there by xfs_idata_realloc().  			 */  			data_bytes = roundup(ip->i_df.if_bytes, 4); -			ASSERT((ip->i_df.if_real_bytes == 0) || -			       (ip->i_df.if_real_bytes == data_bytes)); -			vecp->i_len = (int)data_bytes; -			vecp->i_type = XLOG_REG_TYPE_ILOCAL; -			vecp++; -			nvecs++; -			iip->ili_format.ilf_dsize = (unsigned)data_bytes; +			ASSERT(ip->i_df.if_real_bytes == 0 || +			       ip->i_df.if_real_bytes == data_bytes); +			ASSERT(ip->i_df.if_u1.if_data != NULL); +			ASSERT(ip->i_d.di_size > 0); +			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_ILOCAL, +					ip->i_df.if_u1.if_data, data_bytes); +			ilf->ilf_dsize = (unsigned)data_bytes; +			ilf->ilf_size++;  		} else {  			iip->ili_fields &= ~XFS_ILOG_DDATA;  		}  		break; -  	case XFS_DINODE_FMT_DEV:  		iip->ili_fields &=  			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |  			  XFS_ILOG_DEXT | XFS_ILOG_UUID); -		if (iip->ili_fields & XFS_ILOG_DEV) { -			iip->ili_format.ilf_u.ilfu_rdev = -				ip->i_df.if_u2.if_rdev; -		} +		if (iip->ili_fields & XFS_ILOG_DEV) +			ilf->ilf_u.ilfu_rdev = ip->i_df.if_u2.if_rdev;  		break; -  	case XFS_DINODE_FMT_UUID:  		iip->ili_fields &=  			~(XFS_ILOG_DDATA | XFS_ILOG_DBROOT |  			  XFS_ILOG_DEXT | XFS_ILOG_DEV); -		if (iip->ili_fields & XFS_ILOG_UUID) { -			iip->ili_format.ilf_u.ilfu_uuid = -				ip->i_df.if_u2.if_uuid; -		} +		if (iip->ili_fields & XFS_ILOG_UUID) +			ilf->ilf_u.ilfu_uuid = ip->i_df.if_u2.if_uuid;  		break; -  	default:  		ASSERT(0);  		break;  	} +} -	/* -	 * If there are no attributes associated with the file, then we're done. -	 */ -	if (!XFS_IFORK_Q(ip)) { -		iip->ili_fields &= -			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); -		goto out; -	} +STATIC void +xfs_inode_item_format_attr_fork( +	struct xfs_inode_log_item *iip, +	struct xfs_inode_log_format *ilf, +	struct xfs_log_vec	*lv, +	struct xfs_log_iovec	**vecp) +{ +	struct xfs_inode	*ip = iip->ili_inode; +	size_t			data_bytes;  	switch (ip->i_d.di_aformat) {  	case XFS_DINODE_FMT_EXTENTS: @@ -360,30 +262,22 @@ xfs_inode_item_format(  		if ((iip->ili_fields & XFS_ILOG_AEXT) &&  		    ip->i_d.di_anextents > 0 &&  		    ip->i_afp->if_bytes > 0) { +			struct xfs_bmbt_rec *p; +  			ASSERT(ip->i_afp->if_bytes / sizeof(xfs_bmbt_rec_t) ==  				ip->i_d.di_anextents);  			ASSERT(ip->i_afp->if_u1.if_extents != NULL); -#ifdef XFS_NATIVE_HOST -			/* -			 * There are not delayed allocation extents -			 * for attributes, so just point at the array. -			 */ -			vecp->i_addr = ip->i_afp->if_u1.if_extents; -			vecp->i_len = ip->i_afp->if_bytes; -			vecp->i_type = XLOG_REG_TYPE_IATTR_EXT; -#else -			ASSERT(iip->ili_aextents_buf == NULL); -			xfs_inode_item_format_extents(ip, vecp, -					XFS_ATTR_FORK, XLOG_REG_TYPE_IATTR_EXT); -#endif -			iip->ili_format.ilf_asize = vecp->i_len; -			vecp++; -			nvecs++; + +			p = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_EXT); +			data_bytes = xfs_iextents_copy(ip, p, XFS_ATTR_FORK); +			xlog_finish_iovec(lv, *vecp, data_bytes); + +			ilf->ilf_asize = data_bytes; +			ilf->ilf_size++;  		} else {  			iip->ili_fields &= ~XFS_ILOG_AEXT;  		}  		break; -  	case XFS_DINODE_FMT_BTREE:  		iip->ili_fields &=  			~(XFS_ILOG_ADATA | XFS_ILOG_AEXT); @@ -392,61 +286,89 @@ xfs_inode_item_format(  		    ip->i_afp->if_broot_bytes > 0) {  			ASSERT(ip->i_afp->if_broot != NULL); -			vecp->i_addr = ip->i_afp->if_broot; -			vecp->i_len = ip->i_afp->if_broot_bytes; -			vecp->i_type = XLOG_REG_TYPE_IATTR_BROOT; -			vecp++; -			nvecs++; -			iip->ili_format.ilf_asize = ip->i_afp->if_broot_bytes; +			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_BROOT, +					ip->i_afp->if_broot, +					ip->i_afp->if_broot_bytes); +			ilf->ilf_asize = ip->i_afp->if_broot_bytes; +			ilf->ilf_size++;  		} else {  			iip->ili_fields &= ~XFS_ILOG_ABROOT;  		}  		break; -  	case XFS_DINODE_FMT_LOCAL:  		iip->ili_fields &=  			~(XFS_ILOG_AEXT | XFS_ILOG_ABROOT);  		if ((iip->ili_fields & XFS_ILOG_ADATA) &&  		    ip->i_afp->if_bytes > 0) { -			ASSERT(ip->i_afp->if_u1.if_data != NULL); - -			vecp->i_addr = ip->i_afp->if_u1.if_data;  			/*  			 * Round i_bytes up to a word boundary.  			 * The underlying memory is guaranteed to  			 * to be there by xfs_idata_realloc().  			 */  			data_bytes = roundup(ip->i_afp->if_bytes, 4); -			ASSERT((ip->i_afp->if_real_bytes == 0) || -			       (ip->i_afp->if_real_bytes == data_bytes)); -			vecp->i_len = (int)data_bytes; -			vecp->i_type = XLOG_REG_TYPE_IATTR_LOCAL; -			vecp++; -			nvecs++; -			iip->ili_format.ilf_asize = (unsigned)data_bytes; +			ASSERT(ip->i_afp->if_real_bytes == 0 || +			       ip->i_afp->if_real_bytes == data_bytes); +			ASSERT(ip->i_afp->if_u1.if_data != NULL); +			xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_IATTR_LOCAL, +					ip->i_afp->if_u1.if_data, +					data_bytes); +			ilf->ilf_asize = (unsigned)data_bytes; +			ilf->ilf_size++;  		} else {  			iip->ili_fields &= ~XFS_ILOG_ADATA;  		}  		break; -  	default:  		ASSERT(0);  		break;  	} - -out: -	/* -	 * Now update the log format that goes out to disk from the in-core -	 * values.  We always write the inode core to make the arithmetic -	 * games in recovery easier, which isn't a big deal as just about any -	 * transaction would dirty it anyway. -	 */ -	iip->ili_format.ilf_fields = XFS_ILOG_CORE | -		(iip->ili_fields & ~XFS_ILOG_TIMESTAMP); -	iip->ili_format.ilf_size = nvecs;  } +/* + * This is called to fill in the vector of log iovecs for the given inode + * log item.  It fills the first item with an inode log format structure, + * the second with the on-disk inode structure, and a possible third and/or + * fourth with the inode data/extents/b-tree root and inode attributes + * data/extents/b-tree root. + */ +STATIC void +xfs_inode_item_format( +	struct xfs_log_item	*lip, +	struct xfs_log_vec	*lv) +{ +	struct xfs_inode_log_item *iip = INODE_ITEM(lip); +	struct xfs_inode	*ip = iip->ili_inode; +	struct xfs_inode_log_format *ilf; +	struct xfs_log_iovec	*vecp = NULL; + +	ASSERT(ip->i_d.di_version > 1); + +	ilf = xlog_prepare_iovec(lv, &vecp, XLOG_REG_TYPE_IFORMAT); +	ilf->ilf_type = XFS_LI_INODE; +	ilf->ilf_ino = ip->i_ino; +	ilf->ilf_blkno = ip->i_imap.im_blkno; +	ilf->ilf_len = ip->i_imap.im_len; +	ilf->ilf_boffset = ip->i_imap.im_boffset; +	ilf->ilf_fields = XFS_ILOG_CORE; +	ilf->ilf_size = 2; /* format + core */ +	xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format)); + +	xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE, +			&ip->i_d, +			xfs_icdinode_size(ip->i_d.di_version)); + +	xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp); +	if (XFS_IFORK_Q(ip)) { +		xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp); +	} else { +		iip->ili_fields &= +			~(XFS_ILOG_ADATA | XFS_ILOG_ABROOT | XFS_ILOG_AEXT); +	} + +	/* update the format with the exact fields we actually logged */ +	ilf->ilf_fields |= (iip->ili_fields & ~XFS_ILOG_TIMESTAMP); +}  /*   * This is called to pin the inode associated with the inode log @@ -563,27 +485,6 @@ xfs_inode_item_unlock(  	ASSERT(ip->i_itemp != NULL);  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	/* -	 * If the inode needed a separate buffer with which to log -	 * its extents, then free it now. -	 */ -	if (iip->ili_extents_buf != NULL) { -		ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS); -		ASSERT(ip->i_d.di_nextents > 0); -		ASSERT(iip->ili_fields & XFS_ILOG_DEXT); -		ASSERT(ip->i_df.if_bytes > 0); -		kmem_free(iip->ili_extents_buf); -		iip->ili_extents_buf = NULL; -	} -	if (iip->ili_aextents_buf != NULL) { -		ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_EXTENTS); -		ASSERT(ip->i_d.di_anextents > 0); -		ASSERT(iip->ili_fields & XFS_ILOG_AEXT); -		ASSERT(ip->i_afp->if_bytes > 0); -		kmem_free(iip->ili_aextents_buf); -		iip->ili_aextents_buf = NULL; -	} -  	lock_flags = iip->ili_lock_flags;  	iip->ili_lock_flags = 0;  	if (lock_flags) @@ -670,11 +571,6 @@ xfs_inode_item_init(  	iip->ili_inode = ip;  	xfs_log_item_init(mp, &iip->ili_item, XFS_LI_INODE,  						&xfs_inode_item_ops); -	iip->ili_format.ilf_type = XFS_LI_INODE; -	iip->ili_format.ilf_ino = ip->i_ino; -	iip->ili_format.ilf_blkno = ip->i_imap.im_blkno; -	iip->ili_format.ilf_len = ip->i_imap.im_len; -	iip->ili_format.ilf_boffset = ip->i_imap.im_boffset;  }  /* diff --git a/fs/xfs/xfs_inode_item.h b/fs/xfs/xfs_inode_item.h index dce4d656768..488d81254e2 100644 --- a/fs/xfs/xfs_inode_item.h +++ b/fs/xfs/xfs_inode_item.h @@ -34,11 +34,6 @@ typedef struct xfs_inode_log_item {  	unsigned short		ili_logged;	   /* flushed logged data */  	unsigned int		ili_last_fields;   /* fields when flushed */  	unsigned int		ili_fields;	   /* fields to be logged */ -	struct xfs_bmbt_rec	*ili_extents_buf;  /* array of logged -						      data exts */ -	struct xfs_bmbt_rec	*ili_aextents_buf; /* array of logged -						      attr exts */ -	xfs_inode_log_format_t	ili_format;	   /* logged structure */  } xfs_inode_log_item_t;  static inline int xfs_inode_clean(xfs_inode_t *ip) diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 668e8f4ccf5..8bc1bbce745 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -17,32 +17,31 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_ioctl.h" +#include "xfs_alloc.h"  #include "xfs_rtalloc.h"  #include "xfs_itable.h"  #include "xfs_error.h"  #include "xfs_attr.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" -#include "xfs_buf_item.h"  #include "xfs_fsops.h"  #include "xfs_discard.h"  #include "xfs_quota.h" -#include "xfs_inode_item.h"  #include "xfs_export.h"  #include "xfs_trace.h"  #include "xfs_icache.h"  #include "xfs_symlink.h" +#include "xfs_dinode.h" +#include "xfs_trans.h"  #include <linux/capability.h>  #include <linux/dcache.h> @@ -113,15 +112,11 @@ xfs_find_handle(  		memset(&handle.ha_fid, 0, sizeof(handle.ha_fid));  		hsize = sizeof(xfs_fsid_t);  	} else { -		int		lock_mode; - -		lock_mode = xfs_ilock_map_shared(ip);  		handle.ha_fid.fid_len = sizeof(xfs_fid_t) -  					sizeof(handle.ha_fid.fid_len);  		handle.ha_fid.fid_pad = 0;  		handle.ha_fid.fid_gen = ip->i_d.di_gen;  		handle.ha_fid.fid_ino = ip->i_ino; -		xfs_iunlock_map_shared(ip, lock_mode);  		hsize = XFS_HSIZE(handle);  	} @@ -276,32 +271,6 @@ xfs_open_by_handle(  	return error;  } -/* - * This is a copy from fs/namei.c:vfs_readlink(), except for removing it's - * unused first argument. - */ -STATIC int -do_readlink( -	char __user		*buffer, -	int			buflen, -	const char		*link) -{ -        int len; - -	len = PTR_ERR(link); -	if (IS_ERR(link)) -		goto out; - -	len = strlen(link); -	if (len > (unsigned) buflen) -		len = buflen; -	if (copy_to_user(buffer, link, len)) -		len = -EFAULT; - out: -	return len; -} - -  int  xfs_readlink_by_handle(  	struct file		*parfilp, @@ -339,7 +308,7 @@ xfs_readlink_by_handle(  	error = -xfs_readlink(XFS_I(dentry->d_inode), link);  	if (error)  		goto out_kfree; -	error = do_readlink(hreq->ohandle, olen, link); +	error = readlink_copy(hreq->ohandle, olen, link);  	if (error)  		goto out_kfree; @@ -443,7 +412,8 @@ xfs_attrlist_by_handle(  		return -XFS_ERROR(EPERM);  	if (copy_from_user(&al_hreq, arg, sizeof(xfs_fsop_attrlist_handlereq_t)))  		return -XFS_ERROR(EFAULT); -	if (al_hreq.buflen > XATTR_LIST_MAX) +	if (al_hreq.buflen < sizeof(struct attrlist) || +	    al_hreq.buflen > XATTR_LIST_MAX)  		return -XFS_ERROR(EINVAL);  	/* @@ -573,10 +543,11 @@ xfs_attrmulti_by_handle(  	ops = memdup_user(am_hreq.ops, size);  	if (IS_ERR(ops)) { -		error = PTR_ERR(ops); +		error = -PTR_ERR(ops);  		goto out_dput;  	} +	error = ENOMEM;  	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);  	if (!attr_name)  		goto out_kfree_ops; @@ -586,7 +557,7 @@ xfs_attrmulti_by_handle(  		ops[i].am_error = strncpy_from_user((char *)attr_name,  				ops[i].am_attrname, MAXNAMELEN);  		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) -			error = -ERANGE; +			error = ERANGE;  		if (ops[i].am_error < 0)  			break; @@ -641,7 +612,11 @@ xfs_ioc_space(  	unsigned int		cmd,  	xfs_flock64_t		*bf)  { -	int			attr_flags = 0; +	struct xfs_mount	*mp = ip->i_mount; +	struct xfs_trans	*tp; +	struct iattr		iattr; +	bool			setprealloc = false; +	bool			clrprealloc = false;  	int			error;  	/* @@ -661,19 +636,128 @@ xfs_ioc_space(  	if (!S_ISREG(inode->i_mode))  		return -XFS_ERROR(EINVAL); -	if (filp->f_flags & (O_NDELAY|O_NONBLOCK)) -		attr_flags |= XFS_ATTR_NONBLOCK; +	error = mnt_want_write_file(filp); +	if (error) +		return error; -	if (filp->f_flags & O_DSYNC) -		attr_flags |= XFS_ATTR_SYNC; +	xfs_ilock(ip, XFS_IOLOCK_EXCL); + +	switch (bf->l_whence) { +	case 0: /*SEEK_SET*/ +		break; +	case 1: /*SEEK_CUR*/ +		bf->l_start += filp->f_pos; +		break; +	case 2: /*SEEK_END*/ +		bf->l_start += XFS_ISIZE(ip); +		break; +	default: +		error = XFS_ERROR(EINVAL); +		goto out_unlock; +	} -	if (ioflags & IO_INVIS) -		attr_flags |= XFS_ATTR_DMI; +	/* +	 * length of <= 0 for resv/unresv/zero is invalid.  length for +	 * alloc/free is ignored completely and we have no idea what userspace +	 * might have set it to, so set it to zero to allow range +	 * checks to pass. +	 */ +	switch (cmd) { +	case XFS_IOC_ZERO_RANGE: +	case XFS_IOC_RESVSP: +	case XFS_IOC_RESVSP64: +	case XFS_IOC_UNRESVSP: +	case XFS_IOC_UNRESVSP64: +		if (bf->l_len <= 0) { +			error = XFS_ERROR(EINVAL); +			goto out_unlock; +		} +		break; +	default: +		bf->l_len = 0; +		break; +	} + +	if (bf->l_start < 0 || +	    bf->l_start > mp->m_super->s_maxbytes || +	    bf->l_start + bf->l_len < 0 || +	    bf->l_start + bf->l_len >= mp->m_super->s_maxbytes) { +		error = XFS_ERROR(EINVAL); +		goto out_unlock; +	} + +	switch (cmd) { +	case XFS_IOC_ZERO_RANGE: +		error = xfs_zero_file_space(ip, bf->l_start, bf->l_len); +		if (!error) +			setprealloc = true; +		break; +	case XFS_IOC_RESVSP: +	case XFS_IOC_RESVSP64: +		error = xfs_alloc_file_space(ip, bf->l_start, bf->l_len, +						XFS_BMAPI_PREALLOC); +		if (!error) +			setprealloc = true; +		break; +	case XFS_IOC_UNRESVSP: +	case XFS_IOC_UNRESVSP64: +		error = xfs_free_file_space(ip, bf->l_start, bf->l_len); +		break; +	case XFS_IOC_ALLOCSP: +	case XFS_IOC_ALLOCSP64: +	case XFS_IOC_FREESP: +	case XFS_IOC_FREESP64: +		if (bf->l_start > XFS_ISIZE(ip)) { +			error = xfs_alloc_file_space(ip, XFS_ISIZE(ip), +					bf->l_start - XFS_ISIZE(ip), 0); +			if (error) +				goto out_unlock; +		} + +		iattr.ia_valid = ATTR_SIZE; +		iattr.ia_size = bf->l_start; + +		error = xfs_setattr_size(ip, &iattr); +		if (!error) +			clrprealloc = true; +		break; +	default: +		ASSERT(0); +		error = XFS_ERROR(EINVAL); +	} -	error = mnt_want_write_file(filp);  	if (error) -		return error; -	error = xfs_change_file_space(ip, cmd, bf, filp->f_pos, attr_flags); +		goto out_unlock; + +	tp = xfs_trans_alloc(mp, XFS_TRANS_WRITEID); +	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_writeid, 0, 0); +	if (error) { +		xfs_trans_cancel(tp, 0); +		goto out_unlock; +	} + +	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + +	if (!(ioflags & IO_INVIS)) { +		ip->i_d.di_mode &= ~S_ISUID; +		if (ip->i_d.di_mode & S_IXGRP) +			ip->i_d.di_mode &= ~S_ISGID; +		xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); +	} + +	if (setprealloc) +		ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; +	else if (clrprealloc) +		ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; + +	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +	if (filp->f_flags & O_DSYNC) +		xfs_trans_set_sync(tp); +	error = xfs_trans_commit(tp, 0); + +out_unlock: +	xfs_iunlock(ip, XFS_IOLOCK_EXCL);  	mnt_drop_write_file(filp);  	return -error;  } @@ -1132,7 +1216,7 @@ xfs_ioctl_setattr(  		 * cleared upon successful return from chown()  		 */  		if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) && -		    !inode_capable(VFS_I(ip), CAP_FSETID)) +		    !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))  			ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);  		/* @@ -1144,15 +1228,8 @@ xfs_ioctl_setattr(  				olddquot = xfs_qm_vop_chown(tp, ip,  							&ip->i_pdquot, pdqp);  			} +			ASSERT(ip->i_d.di_version > 1);  			xfs_set_projid(ip, fa->fsx_projid); - -			/* -			 * We may have to rev the inode as well as -			 * the superblock version number since projids didn't -			 * exist before DINODE_VERSION_2 and SB_VERSION_NLINK. -			 */ -			if (ip->i_d.di_version == 1) -				xfs_bump_ino_vers2(tp, ip);  		}  	} @@ -1474,7 +1551,7 @@ xfs_file_ioctl(  			XFS_IS_REALTIME_INODE(ip) ?  			mp->m_rtdev_targp : mp->m_ddev_targp; -		da.d_mem = da.d_miniosz = 1 << target->bt_sshift; +		da.d_mem =  da.d_miniosz = target->bt_logical_sectorsize;  		da.d_maxiosz = INT_MAX & ~(da.d_miniosz - 1);  		if (copy_to_user(arg, &da, sizeof(da))) diff --git a/fs/xfs/xfs_ioctl32.c b/fs/xfs/xfs_ioctl32.c index f671f7e472a..944d5baa710 100644 --- a/fs/xfs/xfs_ioctl32.c +++ b/fs/xfs/xfs_ioctl32.c @@ -22,14 +22,13 @@  #include <asm/uaccess.h>  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_vnode.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_itable.h"  #include "xfs_error.h" @@ -357,7 +356,8 @@ xfs_compat_attrlist_by_handle(  	if (copy_from_user(&al_hreq, arg,  			   sizeof(compat_xfs_fsop_attrlist_handlereq_t)))  		return -XFS_ERROR(EFAULT); -	if (al_hreq.buflen > XATTR_LIST_MAX) +	if (al_hreq.buflen < sizeof(struct attrlist) || +	    al_hreq.buflen > XATTR_LIST_MAX)  		return -XFS_ERROR(EINVAL);  	/* @@ -424,10 +424,11 @@ xfs_compat_attrmulti_by_handle(  	ops = memdup_user(compat_ptr(am_hreq.ops), size);  	if (IS_ERR(ops)) { -		error = PTR_ERR(ops); +		error = -PTR_ERR(ops);  		goto out_dput;  	} +	error = ENOMEM;  	attr_name = kmalloc(MAXNAMELEN, GFP_KERNEL);  	if (!attr_name)  		goto out_kfree_ops; @@ -438,7 +439,7 @@ xfs_compat_attrmulti_by_handle(  				compat_ptr(ops[i].am_attrname),  				MAXNAMELEN);  		if (ops[i].am_error == 0 || ops[i].am_error == MAXNAMELEN) -			error = -ERANGE; +			error = ERANGE;  		if (ops[i].am_error < 0)  			break; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 8d4d49b6fbf..6d3ec2b6ee2 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -17,34 +17,28 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_inode_item.h"  #include "xfs_btree.h" +#include "xfs_bmap_btree.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h"  #include "xfs_error.h" -#include "xfs_itable.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h"  #include "xfs_trans_space.h"  #include "xfs_iomap.h"  #include "xfs_trace.h"  #include "xfs_icache.h" +#include "xfs_quota.h"  #include "xfs_dquot_item.h"  #include "xfs_dquot.h" +#include "xfs_dinode.h"  #define XFS_WRITEIO_ALIGN(mp,off)	(((off) >> mp->m_writeio_log) \ @@ -110,7 +104,7 @@ xfs_alert_fsblock_zero(  	xfs_alert_tag(ip->i_mount, XFS_PTAG_FSBLOCK_ZERO,  			"Access to block zero in inode %llu "  			"start_block: %llx start_off: %llx " -			"blkcnt: %llx extent-state: %x\n", +			"blkcnt: %llx extent-state: %x",  		(unsigned long long)ip->i_ino,  		(unsigned long long)imap->br_startblock,  		(unsigned long long)imap->br_startoff, @@ -134,7 +128,6 @@ xfs_iomap_write_direct(  	xfs_fsblock_t	firstfsb;  	xfs_extlen_t	extsz, temp;  	int		nimaps; -	int		bmapi_flag;  	int		quota_flag;  	int		rt;  	xfs_trans_t	*tp; @@ -206,18 +199,15 @@ xfs_iomap_write_direct(  	xfs_trans_ijoin(tp, ip, 0); -	bmapi_flag = 0; -	if (offset < XFS_ISIZE(ip) || extsz) -		bmapi_flag |= XFS_BMAPI_PREALLOC; -  	/*  	 * From this point onwards we overwrite the imap pointer that the  	 * caller gave to us.  	 */  	xfs_bmap_init(&free_list, &firstfsb);  	nimaps = 1; -	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, bmapi_flag, -				&firstfsb, 0, imap, &nimaps, &free_list); +	error = xfs_bmapi_write(tp, ip, offset_fsb, count_fsb, +				XFS_BMAPI_PREALLOC, &firstfsb, 0, +				imap, &nimaps, &free_list);  	if (error)  		goto out_bmap_cancel; @@ -655,7 +645,6 @@ int  xfs_iomap_write_allocate(  	xfs_inode_t	*ip,  	xfs_off_t	offset, -	size_t		count,  	xfs_bmbt_irec_t *imap)  {  	xfs_mount_t	*mp = ip->i_mount; @@ -741,7 +730,7 @@ xfs_iomap_write_allocate(  			 */  			nimaps = 1;  			end_fsb = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)); -			error = xfs_bmap_last_offset(NULL, ip, &last_block, +			error = xfs_bmap_last_offset(ip, &last_block,  							XFS_DATA_FORK);  			if (error)  				goto trans_cancel; @@ -760,8 +749,7 @@ xfs_iomap_write_allocate(  			 * pointer that the caller gave to us.  			 */  			error = xfs_bmapi_write(tp, ip, map_start_fsb, -						count_fsb, -						XFS_BMAPI_STACK_SWITCH, +						count_fsb, 0,  						&first_block, 1,  						imap, &nimaps, &free_list);  			if (error) diff --git a/fs/xfs/xfs_iomap.h b/fs/xfs/xfs_iomap.h index 80615760959..411fbb8919e 100644 --- a/fs/xfs/xfs_iomap.h +++ b/fs/xfs/xfs_iomap.h @@ -21,12 +21,12 @@  struct xfs_inode;  struct xfs_bmbt_irec; -extern int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t, +int xfs_iomap_write_direct(struct xfs_inode *, xfs_off_t, size_t,  			struct xfs_bmbt_irec *, int); -extern int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t, +int xfs_iomap_write_delay(struct xfs_inode *, xfs_off_t, size_t,  			struct xfs_bmbt_irec *); -extern int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t, size_t, +int xfs_iomap_write_allocate(struct xfs_inode *, xfs_off_t,  			struct xfs_bmbt_irec *); -extern int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t); +int xfs_iomap_write_unwritten(struct xfs_inode *, xfs_off_t, size_t);  #endif /* __XFS_IOMAP_H__*/ diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 2b8952d9199..205613a0606 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -17,32 +17,29 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_acl.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h"  #include "xfs_inode.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" +#include "xfs_acl.h" +#include "xfs_quota.h"  #include "xfs_error.h" -#include "xfs_itable.h"  #include "xfs_attr.h" -#include "xfs_buf_item.h" -#include "xfs_inode_item.h" +#include "xfs_trans.h"  #include "xfs_trace.h"  #include "xfs_icache.h"  #include "xfs_symlink.h"  #include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2_priv.h" +#include "xfs_dinode.h" +#include "xfs_trans_space.h"  #include <linux/capability.h>  #include <linux/xattr.h> @@ -52,6 +49,18 @@  #include <linux/fiemap.h>  #include <linux/slab.h> +/* + * Directories have different lock order w.r.t. mmap_sem compared to regular + * files. This is due to readdir potentially triggering page faults on a user + * buffer inside filldir(), and this happens with the ilock on the directory + * held. For regular files, the lock order is the other way around - the + * mmap_sem is taken during the page fault, and then we lock the ilock to do + * block mapping. Hence we need a different class for the directory ilock so + * that lockdep can tell them apart. + */ +static struct lock_class_key xfs_nondir_ilock_class; +static struct lock_class_key xfs_dir_ilock_class; +  static int  xfs_initxattrs(  	struct inode		*inode, @@ -63,8 +72,8 @@ xfs_initxattrs(  	int			error = 0;  	for (xattr = xattr_array; xattr->name != NULL; xattr++) { -		error = xfs_attr_set(ip, xattr->name, xattr->value, -				     xattr->value_len, ATTR_SECURE); +		error = -xfs_attr_set(ip, xattr->name, xattr->value, +				      xattr->value_len, ATTR_SECURE);  		if (error < 0)  			break;  	} @@ -84,8 +93,8 @@ xfs_init_security(  	struct inode	*dir,  	const struct qstr *qstr)  { -	return security_inode_init_security(inode, dir, qstr, -					    &xfs_initxattrs, NULL); +	return -security_inode_init_security(inode, dir, qstr, +					     &xfs_initxattrs, NULL);  }  static void @@ -115,19 +124,19 @@ xfs_cleanup_inode(  	xfs_dentry_to_name(&teardown, dentry, 0);  	xfs_remove(XFS_I(dir), &teardown, XFS_I(inode)); -	iput(inode);  }  STATIC int -xfs_vn_mknod( +xfs_generic_create(  	struct inode	*dir,  	struct dentry	*dentry,  	umode_t		mode, -	dev_t		rdev) +	dev_t		rdev, +	bool		tmpfile)	/* unnamed file */  {  	struct inode	*inode;  	struct xfs_inode *ip = NULL; -	struct posix_acl *default_acl = NULL; +	struct posix_acl *default_acl, *acl;  	struct xfs_name	name;  	int		error; @@ -143,17 +152,16 @@ xfs_vn_mknod(  		rdev = 0;  	} -	if (IS_POSIXACL(dir)) { -		default_acl = xfs_get_acl(dir, ACL_TYPE_DEFAULT); -		if (IS_ERR(default_acl)) -			return PTR_ERR(default_acl); +	error = posix_acl_create(dir, &mode, &default_acl, &acl); +	if (error) +		return error; -		if (!default_acl) -			mode &= ~current_umask(); +	if (!tmpfile) { +		xfs_dentry_to_name(&name, dentry, mode); +		error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip); +	} else { +		error = xfs_create_tmpfile(XFS_I(dir), dentry, mode, &ip);  	} - -	xfs_dentry_to_name(&name, dentry, mode); -	error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);  	if (unlikely(error))  		goto out_free_acl; @@ -163,22 +171,46 @@ xfs_vn_mknod(  	if (unlikely(error))  		goto out_cleanup_inode; +#ifdef CONFIG_XFS_POSIX_ACL  	if (default_acl) { -		error = -xfs_inherit_acl(inode, default_acl); -		default_acl = NULL; -		if (unlikely(error)) +		error = -xfs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT); +		if (error) +			goto out_cleanup_inode; +	} +	if (acl) { +		error = -xfs_set_acl(inode, acl, ACL_TYPE_ACCESS); +		if (error)  			goto out_cleanup_inode;  	} +#endif +	if (tmpfile) +		d_tmpfile(dentry, inode); +	else +		d_instantiate(dentry, inode); -	d_instantiate(dentry, inode); + out_free_acl: +	if (default_acl) +		posix_acl_release(default_acl); +	if (acl) +		posix_acl_release(acl);  	return -error;   out_cleanup_inode: -	xfs_cleanup_inode(dir, inode, dentry); - out_free_acl: -	posix_acl_release(default_acl); -	return -error; +	if (!tmpfile) +		xfs_cleanup_inode(dir, inode, dentry); +	iput(inode); +	goto out_free_acl; +} + +STATIC int +xfs_vn_mknod( +	struct inode	*dir, +	struct dentry	*dentry, +	umode_t		mode, +	dev_t		rdev) +{ +	return xfs_generic_create(dir, dentry, mode, rdev, false);  }  STATIC int @@ -341,6 +373,7 @@ xfs_vn_symlink(   out_cleanup_inode:  	xfs_cleanup_inode(dir, inode, dentry); +	iput(inode);   out:  	return -error;  } @@ -395,18 +428,6 @@ xfs_vn_follow_link(  	return NULL;  } -STATIC void -xfs_vn_put_link( -	struct dentry	*dentry, -	struct nameidata *nd, -	void		*p) -{ -	char		*s = nd_get_link(nd); - -	if (!IS_ERR(s)) -		kfree(s); -} -  STATIC int  xfs_vn_getattr(  	struct vfsmount		*mnt, @@ -463,14 +484,12 @@ xfs_vn_getattr(  static void  xfs_setattr_mode( -	struct xfs_trans	*tp,  	struct xfs_inode	*ip,  	struct iattr		*iattr)  { -	struct inode	*inode = VFS_I(ip); -	umode_t		mode = iattr->ia_mode; +	struct inode		*inode = VFS_I(ip); +	umode_t			mode = iattr->ia_mode; -	ASSERT(tp);  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	ip->i_d.di_mode &= S_IFMT; @@ -480,6 +499,32 @@ xfs_setattr_mode(  	inode->i_mode |= mode & ~S_IFMT;  } +static void +xfs_setattr_time( +	struct xfs_inode	*ip, +	struct iattr		*iattr) +{ +	struct inode		*inode = VFS_I(ip); + +	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + +	if (iattr->ia_valid & ATTR_ATIME) { +		inode->i_atime = iattr->ia_atime; +		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; +		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; +	} +	if (iattr->ia_valid & ATTR_CTIME) { +		inode->i_ctime = iattr->ia_ctime; +		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; +		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; +	} +	if (iattr->ia_valid & ATTR_MTIME) { +		inode->i_mtime = iattr->ia_mtime; +		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; +		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; +	} +} +  int  xfs_setattr_nonsize(  	struct xfs_inode	*ip, @@ -622,7 +667,8 @@ xfs_setattr_nonsize(  		}  		if (!gid_eq(igid, gid)) {  			if (XFS_IS_QUOTA_RUNNING(mp) && XFS_IS_GQUOTA_ON(mp)) { -				ASSERT(!XFS_IS_PQUOTA_ON(mp)); +				ASSERT(xfs_sb_version_has_pquotino(&mp->m_sb) || +				       !XFS_IS_PQUOTA_ON(mp));  				ASSERT(mask & ATTR_GID);  				ASSERT(gdqp);  				olddquot2 = xfs_qm_vop_chown(tp, ip, @@ -633,30 +679,10 @@ xfs_setattr_nonsize(  		}  	} -	/* -	 * Change file access modes. -	 */  	if (mask & ATTR_MODE) -		xfs_setattr_mode(tp, ip, iattr); - -	/* -	 * Change file access or modified times. -	 */ -	if (mask & ATTR_ATIME) { -		inode->i_atime = iattr->ia_atime; -		ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec; -		ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec; -	} -	if (mask & ATTR_CTIME) { -		inode->i_ctime = iattr->ia_ctime; -		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; -		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; -	} -	if (mask & ATTR_MTIME) { -		inode->i_mtime = iattr->ia_mtime; -		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; -		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; -	} +		xfs_setattr_mode(ip, iattr); +	if (mask & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) +		xfs_setattr_time(ip, iattr);  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -687,7 +713,7 @@ xfs_setattr_nonsize(  	 * 	     Posix ACL code seems to care about this issue either.  	 */  	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { -		error = -xfs_acl_chmod(inode); +		error = -posix_acl_chmod(inode, inode->i_mode);  		if (error)  			return XFS_ERROR(error);  	} @@ -709,12 +735,10 @@ out_dqrele:  int  xfs_setattr_size(  	struct xfs_inode	*ip, -	struct iattr		*iattr, -	int			flags) +	struct iattr		*iattr)  {  	struct xfs_mount	*mp = ip->i_mount;  	struct inode		*inode = VFS_I(ip); -	int			mask = iattr->ia_valid;  	xfs_off_t		oldsize, newsize;  	struct xfs_trans	*tp;  	int			error; @@ -733,14 +757,10 @@ xfs_setattr_size(  	if (error)  		return XFS_ERROR(error); +	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));  	ASSERT(S_ISREG(ip->i_d.di_mode)); -	ASSERT((mask & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| -			ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0); - -	if (!(flags & XFS_ATTR_NOLOCK)) { -		lock_flags |= XFS_IOLOCK_EXCL; -		xfs_ilock(ip, lock_flags); -	} +	ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET| +		ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);  	oldsize = inode->i_size;  	newsize = iattr->ia_size; @@ -749,13 +769,12 @@ xfs_setattr_size(  	 * Short circuit the truncate case for zero length files.  	 */  	if (newsize == 0 && oldsize == 0 && ip->i_d.di_nextents == 0) { -		if (!(mask & (ATTR_CTIME|ATTR_MTIME))) -			goto out_unlock; +		if (!(iattr->ia_valid & (ATTR_CTIME|ATTR_MTIME))) +			return 0;  		/*  		 * Use the regular setattr path to update the timestamps.  		 */ -		xfs_iunlock(ip, lock_flags);  		iattr->ia_valid &= ~ATTR_SIZE;  		return xfs_setattr_nonsize(ip, iattr, 0);  	} @@ -765,7 +784,7 @@ xfs_setattr_size(  	 */  	error = xfs_qm_dqattach(ip, 0);  	if (error) -		goto out_unlock; +		return error;  	/*  	 * Now we can make the changes.  Before we join the inode to the @@ -783,7 +802,7 @@ xfs_setattr_size(  		 */  		error = xfs_zero_eof(ip, newsize, oldsize);  		if (error) -			goto out_unlock; +			return error;  	}  	/* @@ -802,7 +821,7 @@ xfs_setattr_size(  		error = -filemap_write_and_wait_range(VFS_I(ip)->i_mapping,  						      ip->i_d.di_size, newsize);  		if (error) -			goto out_unlock; +			return error;  	}  	/* @@ -810,22 +829,34 @@ xfs_setattr_size(  	 */  	inode_dio_wait(inode); +	/* +	 * Do all the page cache truncate work outside the transaction context +	 * as the "lock" order is page lock->log space reservation.  i.e. +	 * locking pages inside the transaction can ABBA deadlock with +	 * writeback. We have to do the VFS inode size update before we truncate +	 * the pagecache, however, to avoid racing with page faults beyond the +	 * new EOF they are not serialised against truncate operations except by +	 * page locks and size updates. +	 * +	 * Hence we are in a situation where a truncate can fail with ENOMEM +	 * from xfs_trans_reserve(), but having already truncated the in-memory +	 * version of the file (i.e. made user visible changes). There's not +	 * much we can do about this, except to hope that the caller sees ENOMEM +	 * and retries the truncate operation. +	 */  	error = -block_truncate_page(inode->i_mapping, newsize, xfs_get_blocks);  	if (error) -		goto out_unlock; +		return error; +	truncate_setsize(inode, newsize);  	tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_SIZE);  	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0);  	if (error)  		goto out_trans_cancel; -	truncate_setsize(inode, newsize); -  	commit_flags = XFS_TRANS_RELEASE_LOG_RES;  	lock_flags |= XFS_ILOCK_EXCL; -  	xfs_ilock(ip, XFS_ILOCK_EXCL); -  	xfs_trans_ijoin(tp, ip, 0);  	/* @@ -838,10 +869,11 @@ xfs_setattr_size(  	 * these flags set.  For all other operations the VFS set these flags  	 * explicitly if it wants a timestamp update.  	 */ -	if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME)))) { +	if (newsize != oldsize && +	    !(iattr->ia_valid & (ATTR_CTIME | ATTR_MTIME))) {  		iattr->ia_ctime = iattr->ia_mtime =  			current_fs_time(inode->i_sb); -		mask |= ATTR_CTIME | ATTR_MTIME; +		iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;  	}  	/* @@ -877,22 +909,10 @@ xfs_setattr_size(  		xfs_inode_clear_eofblocks_tag(ip);  	} -	/* -	 * Change file access modes. -	 */ -	if (mask & ATTR_MODE) -		xfs_setattr_mode(tp, ip, iattr); - -	if (mask & ATTR_CTIME) { -		inode->i_ctime = iattr->ia_ctime; -		ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec; -		ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec; -	} -	if (mask & ATTR_MTIME) { -		inode->i_mtime = iattr->ia_mtime; -		ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec; -		ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec; -	} +	if (iattr->ia_valid & ATTR_MODE) +		xfs_setattr_mode(ip, iattr); +	if (iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)) +		xfs_setattr_time(ip, iattr);  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); @@ -916,12 +936,21 @@ out_trans_cancel:  STATIC int  xfs_vn_setattr( -	struct dentry	*dentry, -	struct iattr	*iattr) +	struct dentry		*dentry, +	struct iattr		*iattr)  { -	if (iattr->ia_valid & ATTR_SIZE) -		return -xfs_setattr_size(XFS_I(dentry->d_inode), iattr, 0); -	return -xfs_setattr_nonsize(XFS_I(dentry->d_inode), iattr, 0); +	struct xfs_inode	*ip = XFS_I(dentry->d_inode); +	int			error; + +	if (iattr->ia_valid & ATTR_SIZE) { +		xfs_ilock(ip, XFS_IOLOCK_EXCL); +		error = xfs_setattr_size(ip, iattr); +		xfs_iunlock(ip, XFS_IOLOCK_EXCL); +	} else { +		error = xfs_setattr_nonsize(ip, iattr, 0); +	} + +	return -error;  }  STATIC int @@ -1051,8 +1080,18 @@ xfs_vn_fiemap(  	return 0;  } +STATIC int +xfs_vn_tmpfile( +	struct inode	*dir, +	struct dentry	*dentry, +	umode_t		mode) +{ +	return xfs_generic_create(dir, dentry, mode, 0, true); +} +  static const struct inode_operations xfs_inode_operations = {  	.get_acl		= xfs_get_acl, +	.set_acl		= xfs_set_acl,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1080,6 +1119,7 @@ static const struct inode_operations xfs_dir_inode_operations = {  	.mknod			= xfs_vn_mknod,  	.rename			= xfs_vn_rename,  	.get_acl		= xfs_get_acl, +	.set_acl		= xfs_set_acl,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1087,6 +1127,7 @@ static const struct inode_operations xfs_dir_inode_operations = {  	.removexattr		= generic_removexattr,  	.listxattr		= xfs_vn_listxattr,  	.update_time		= xfs_vn_update_time, +	.tmpfile		= xfs_vn_tmpfile,  };  static const struct inode_operations xfs_dir_ci_inode_operations = { @@ -1106,6 +1147,7 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {  	.mknod			= xfs_vn_mknod,  	.rename			= xfs_vn_rename,  	.get_acl		= xfs_get_acl, +	.set_acl		= xfs_set_acl,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1113,13 +1155,13 @@ static const struct inode_operations xfs_dir_ci_inode_operations = {  	.removexattr		= generic_removexattr,  	.listxattr		= xfs_vn_listxattr,  	.update_time		= xfs_vn_update_time, +	.tmpfile		= xfs_vn_tmpfile,  };  static const struct inode_operations xfs_symlink_inode_operations = {  	.readlink		= generic_readlink,  	.follow_link		= xfs_vn_follow_link, -	.put_link		= xfs_vn_put_link, -	.get_acl		= xfs_get_acl, +	.put_link		= kfree_put_link,  	.getattr		= xfs_vn_getattr,  	.setattr		= xfs_vn_setattr,  	.setxattr		= generic_setxattr, @@ -1169,6 +1211,7 @@ xfs_setup_inode(  	struct xfs_inode	*ip)  {  	struct inode		*inode = &ip->i_vnode; +	gfp_t			gfp_mask;  	inode->i_ino = ip->i_ino;  	inode->i_state = I_NEW; @@ -1204,6 +1247,8 @@ xfs_setup_inode(  	inode->i_ctime.tv_nsec	= ip->i_d.di_ctime.t_nsec;  	xfs_diflags_to_iflags(inode, ip); +	ip->d_ops = ip->i_mount->m_nondir_inode_ops; +	lockdep_set_class(&ip->i_lock.mr_lock, &xfs_nondir_ilock_class);  	switch (inode->i_mode & S_IFMT) {  	case S_IFREG:  		inode->i_op = &xfs_inode_operations; @@ -1211,11 +1256,13 @@ xfs_setup_inode(  		inode->i_mapping->a_ops = &xfs_address_space_operations;  		break;  	case S_IFDIR: +		lockdep_set_class(&ip->i_lock.mr_lock, &xfs_dir_ilock_class);  		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))  			inode->i_op = &xfs_dir_ci_inode_operations;  		else  			inode->i_op = &xfs_dir_inode_operations;  		inode->i_fop = &xfs_dir_file_operations; +		ip->d_ops = ip->i_mount->m_dir_inode_ops;  		break;  	case S_IFLNK:  		inode->i_op = &xfs_symlink_inode_operations; @@ -1229,6 +1276,14 @@ xfs_setup_inode(  	}  	/* +	 * Ensure all page cache allocations are done from GFP_NOFS context to +	 * prevent direct reclaim recursion back into the filesystem and blowing +	 * stacks or deadlocking. +	 */ +	gfp_mask = mapping_gfp_mask(inode->i_mapping); +	mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & ~(__GFP_FS))); + +	/*  	 * If there is no attribute fork no ACL can exist on this inode,  	 * and it can't have any file capabilities attached to it either.  	 */ diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index d81fb41205e..1c34e433592 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -30,14 +30,10 @@ extern void xfs_setup_inode(struct xfs_inode *);  /*   * Internal setattr interfaces.   */ -#define	XFS_ATTR_DMI		0x01	/* invocation from a DMI function */ -#define	XFS_ATTR_NONBLOCK	0x02	/* return EAGAIN if op would block */ -#define XFS_ATTR_NOLOCK		0x04	/* Don't grab any conflicting locks */ -#define XFS_ATTR_NOACL		0x08	/* Don't call xfs_acl_chmod */ -#define XFS_ATTR_SYNC		0x10	/* synchronous operation required */ +#define XFS_ATTR_NOACL		0x01	/* Don't call posix_acl_chmod */  extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,  			       int flags); -extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap, int flags); +extern int xfs_setattr_size(struct xfs_inode *ip, struct iattr *vap);  #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 084b3e1741f..cb64f222d60 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -17,24 +17,23 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_btree.h"  #include "xfs_ialloc.h" +#include "xfs_ialloc_btree.h"  #include "xfs_itable.h"  #include "xfs_error.h" -#include "xfs_btree.h"  #include "xfs_trace.h"  #include "xfs_icache.h" +#include "xfs_dinode.h"  STATIC int  xfs_internal_inum( @@ -210,9 +209,8 @@ xfs_bulkstat(  	xfs_inobt_rec_incore_t	*irbuf;	/* start of irec buffer */  	xfs_inobt_rec_incore_t	*irbufend; /* end of good irec buffer entries */  	xfs_ino_t		lastino; /* last inode number returned */ -	int			nbcluster; /* # of blocks in a cluster */ -	int			nicluster; /* # of inodes in a cluster */ -	int			nimask;	/* mask for inode clusters */ +	int			blks_per_cluster; /* # of blocks per cluster */ +	int			inodes_per_cluster;/* # of inodes per cluster */  	int			nirbuf;	/* size of irbuf */  	int			rval;	/* return value error code */  	int			tmp;	/* result value from btree calls */ @@ -244,11 +242,8 @@ xfs_bulkstat(  	*done = 0;  	fmterror = 0;  	ubufp = ubuffer; -	nicluster = mp->m_sb.sb_blocksize >= XFS_INODE_CLUSTER_SIZE(mp) ? -		mp->m_sb.sb_inopblock : -		(XFS_INODE_CLUSTER_SIZE(mp) >> mp->m_sb.sb_inodelog); -	nimask = ~(nicluster - 1); -	nbcluster = nicluster >> mp->m_sb.sb_inopblog; +	blks_per_cluster = xfs_icluster_size_fsb(mp); +	inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;  	irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);  	if (!irbuf)  		return ENOMEM; @@ -275,7 +270,8 @@ xfs_bulkstat(  		/*  		 * Allocate and initialize a btree cursor for ialloc btree.  		 */ -		cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); +		cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, +					    XFS_BTNUM_INO);  		irbp = irbuf;  		irbufend = irbuf + nirbuf;  		end_of_ag = 0; @@ -391,12 +387,12 @@ xfs_bulkstat(  				agbno = XFS_AGINO_TO_AGBNO(mp, r.ir_startino);  				for (chunkidx = 0;  				     chunkidx < XFS_INODES_PER_CHUNK; -				     chunkidx += nicluster, -				     agbno += nbcluster) { -					if (xfs_inobt_maskn(chunkidx, nicluster) -							& ~r.ir_free) +				     chunkidx += inodes_per_cluster, +				     agbno += blks_per_cluster) { +					if (xfs_inobt_maskn(chunkidx, +					    inodes_per_cluster) & ~r.ir_free)  						xfs_btree_reada_bufs(mp, agno, -							agbno, nbcluster, +							agbno, blks_per_cluster,  							&xfs_inode_buf_ops);  				}  				blk_finish_plug(&plug); @@ -626,7 +622,8 @@ xfs_inumbers(  				agino = 0;  				continue;  			} -			cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno); +			cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno, +						    XFS_BTNUM_INO);  			error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,  						 &tmp);  			if (error) { diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index f9bb590acc0..825249d2dfc 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -119,6 +119,7 @@ typedef __uint64_t __psunsigned_t;  #include "xfs_iops.h"  #include "xfs_aops.h"  #include "xfs_super.h" +#include "xfs_cksum.h"  #include "xfs_buf.h"  #include "xfs_message.h" @@ -178,6 +179,7 @@ typedef __uint64_t __psunsigned_t;  #define ENOATTR		ENODATA		/* Attribute not found */  #define EWRONGFS	EINVAL		/* Mount with wrong filesystem type */  #define EFSCORRUPTED	EUCLEAN		/* Filesystem is corrupted */ +#define EFSBADCRC	EBADMSG		/* Bad CRC detected */  #define SYNCHRONIZE()	barrier()  #define __return_address __builtin_return_address(0) diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index a2dea108071..292308dede6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -17,21 +17,19 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h"  #include "xfs_error.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h"  #include "xfs_log_priv.h" -#include "xfs_buf_item.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h"  #include "xfs_log_recover.h" -#include "xfs_trans_priv.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_trace.h"  #include "xfs_fsops.h" @@ -618,11 +616,13 @@ xfs_log_mount(  	int		error = 0;  	int		min_logfsbs; -	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) -		xfs_notice(mp, "Mounting Filesystem"); -	else { +	if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { +		xfs_notice(mp, "Mounting V%d Filesystem", +			   XFS_SB_VERSION_NUM(&mp->m_sb)); +	} else {  		xfs_notice(mp, -"Mounting filesystem in no-recovery mode.  Filesystem will be inconsistent."); +"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.", +			   XFS_SB_VERSION_NUM(&mp->m_sb));  		ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);  	} @@ -1000,27 +1000,34 @@ xfs_log_space_wake(  }  /* - * Determine if we have a transaction that has gone to disk - * that needs to be covered. To begin the transition to the idle state - * firstly the log needs to be idle (no AIL and nothing in the iclogs). - * If we are then in a state where covering is needed, the caller is informed - * that dummy transactions are required to move the log into the idle state. + * Determine if we have a transaction that has gone to disk that needs to be + * covered. To begin the transition to the idle state firstly the log needs to + * be idle. That means the CIL, the AIL and the iclogs needs to be empty before + * we start attempting to cover the log. + * + * Only if we are then in a state where covering is needed, the caller is + * informed that dummy transactions are required to move the log into the idle + * state.   * - * Because this is called as part of the sync process, we should also indicate - * that dummy transactions should be issued in anything but the covered or - * idle states. This ensures that the log tail is accurately reflected in - * the log at the end of the sync, hence if a crash occurrs avoids replay - * of transactions where the metadata is already on disk. + * If there are any items in the AIl or CIL, then we do not want to attempt to + * cover the log as we may be in a situation where there isn't log space + * available to run a dummy transaction and this can lead to deadlocks when the + * tail of the log is pinned by an item that is modified in the CIL.  Hence + * there's no point in running a dummy transaction at this point because we + * can't start trying to idle the log until both the CIL and AIL are empty.   */  int  xfs_log_need_covered(xfs_mount_t *mp)  { -	int		needed = 0;  	struct xlog	*log = mp->m_log; +	int		needed = 0;  	if (!xfs_fs_writable(mp))  		return 0; +	if (!xlog_cil_empty(log)) +		return 0; +  	spin_lock(&log->l_icloglock);  	switch (log->l_covered_state) {  	case XLOG_STATE_COVER_DONE: @@ -1029,14 +1036,17 @@ xfs_log_need_covered(xfs_mount_t *mp)  		break;  	case XLOG_STATE_COVER_NEED:  	case XLOG_STATE_COVER_NEED2: -		if (!xfs_ail_min_lsn(log->l_ailp) && -		    xlog_iclogs_empty(log)) { -			if (log->l_covered_state == XLOG_STATE_COVER_NEED) -				log->l_covered_state = XLOG_STATE_COVER_DONE; -			else -				log->l_covered_state = XLOG_STATE_COVER_DONE2; -		} -		/* FALLTHRU */ +		if (xfs_ail_min_lsn(log->l_ailp)) +			break; +		if (!xlog_iclogs_empty(log)) +			break; + +		needed = 1; +		if (log->l_covered_state == XLOG_STATE_COVER_NEED) +			log->l_covered_state = XLOG_STATE_COVER_DONE; +		else +			log->l_covered_state = XLOG_STATE_COVER_DONE2; +		break;  	default:  		needed = 1;  		break; @@ -1068,6 +1078,7 @@ xlog_assign_tail_lsn_locked(  		tail_lsn = lip->li_lsn;  	else  		tail_lsn = atomic64_read(&log->l_last_sync_lsn); +	trace_xfs_log_assign_tail_lsn(log, tail_lsn);  	atomic64_set(&log->l_tail_lsn, tail_lsn);  	return tail_lsn;  } @@ -1154,7 +1165,7 @@ xlog_iodone(xfs_buf_t *bp)  	/*  	 * Race to shutdown the filesystem if we see an error.  	 */ -	if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, +	if (XFS_TEST_ERROR(bp->b_error, l->l_mp,  			XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {  		xfs_buf_ioerror_alert(bp, __func__);  		xfs_buf_stale(bp); @@ -1172,11 +1183,14 @@ xlog_iodone(xfs_buf_t *bp)  	/* log I/O is always issued ASYNC */  	ASSERT(XFS_BUF_ISASYNC(bp));  	xlog_state_done_syncing(iclog, aborted); +  	/* -	 * do not reference the buffer (bp) here as we could race -	 * with it being freed after writing the unmount record to the -	 * log. +	 * drop the buffer lock now that we are done. Nothing references +	 * the buffer after this, so an unmount waiting on this lock can now +	 * tear it down safely. As such, it is unsafe to reference the buffer +	 * (bp) after the unlock as we could race with it being freed.  	 */ +	xfs_buf_unlock(bp);  }  /* @@ -1359,8 +1373,16 @@ xlog_alloc_log(  	bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);  	if (!bp)  		goto out_free_log; -	bp->b_iodone = xlog_iodone; + +	/* +	 * The iclogbuf buffer locks are held over IO but we are not going to do +	 * IO yet.  Hence unlock the buffer so that the log IO path can grab it +	 * when appropriately. +	 */  	ASSERT(xfs_buf_islocked(bp)); +	xfs_buf_unlock(bp); + +	bp->b_iodone = xlog_iodone;  	log->l_xbuf = bp;  	spin_lock_init(&log->l_icloglock); @@ -1389,6 +1411,9 @@ xlog_alloc_log(  		if (!bp)  			goto out_free_iclog; +		ASSERT(xfs_buf_islocked(bp)); +		xfs_buf_unlock(bp); +  		bp->b_iodone = xlog_iodone;  		iclog->ic_bp = bp;  		iclog->ic_data = bp->b_addr; @@ -1413,7 +1438,6 @@ xlog_alloc_log(  		iclog->ic_callback_tail = &(iclog->ic_callback);  		iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; -		ASSERT(xfs_buf_islocked(iclog->ic_bp));  		init_waitqueue_head(&iclog->ic_force_wait);  		init_waitqueue_head(&iclog->ic_write_wait); @@ -1622,6 +1646,12 @@ xlog_cksum(   * we transition the iclogs to IOERROR state *after* flushing all existing   * iclogs to disk. This is because we don't want anymore new transactions to be   * started or completed afterwards. + * + * We lock the iclogbufs here so that we can serialise against IO completion + * during unmount. We might be processing a shutdown triggered during unmount, + * and that can occur asynchronously to the unmount thread, and hence we need to + * ensure that completes before tearing down the iclogbufs. Hence we need to + * hold the buffer lock across the log IO to acheive that.   */  STATIC int  xlog_bdstrat( @@ -1629,6 +1659,7 @@ xlog_bdstrat(  {  	struct xlog_in_core	*iclog = bp->b_fspriv; +	xfs_buf_lock(bp);  	if (iclog->ic_state & XLOG_STATE_IOERROR) {  		xfs_buf_ioerror(bp, EIO);  		xfs_buf_stale(bp); @@ -1636,7 +1667,8 @@ xlog_bdstrat(  		/*  		 * It would seem logical to return EIO here, but we rely on  		 * the log state machine to propagate I/O errors instead of -		 * doing it here. +		 * doing it here. Similarly, IO completion will unlock the +		 * buffer, so we don't do it here.  		 */  		return 0;  	} @@ -1838,14 +1870,28 @@ xlog_dealloc_log(  	xlog_cil_destroy(log);  	/* -	 * always need to ensure that the extra buffer does not point to memory -	 * owned by another log buffer before we free it. +	 * Cycle all the iclogbuf locks to make sure all log IO completion +	 * is done before we tear down these buffers.  	 */ +	iclog = log->l_iclog; +	for (i = 0; i < log->l_iclog_bufs; i++) { +		xfs_buf_lock(iclog->ic_bp); +		xfs_buf_unlock(iclog->ic_bp); +		iclog = iclog->ic_next; +	} + +	/* +	 * Always need to ensure that the extra buffer does not point to memory +	 * owned by another log buffer before we free it. Also, cycle the lock +	 * first to ensure we've completed IO on it. +	 */ +	xfs_buf_lock(log->l_xbuf); +	xfs_buf_unlock(log->l_xbuf);  	xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));  	xfs_buf_free(log->l_xbuf);  	iclog = log->l_iclog; -	for (i=0; i<log->l_iclog_bufs; i++) { +	for (i = 0; i < log->l_iclog_bufs; i++) {  		xfs_buf_free(iclog->ic_bp);  		next_iclog = iclog->ic_next;  		kmem_free(iclog); @@ -1979,7 +2025,7 @@ xlog_print_tic_res(  	for (i = 0; i < ticket->t_res_num; i++) {  		uint r_type = ticket->t_res_arr[i].r_type; -		xfs_warn(mp, "region[%u]: %s - %u bytes\n", i, +		xfs_warn(mp, "region[%u]: %s - %u bytes", i,  			    ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?  			    "bad-rtype" : res_type_str[r_type-1]),  			    ticket->t_res_arr[i].r_len); @@ -3702,11 +3748,9 @@ xlog_verify_iclog(  	/* check validity of iclog pointers */  	spin_lock(&log->l_icloglock);  	icptr = log->l_iclog; -	for (i=0; i < log->l_iclog_bufs; i++) { -		if (icptr == NULL) -			xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); -		icptr = icptr->ic_next; -	} +	for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next) +		ASSERT(icptr); +  	if (icptr != log->l_iclog)  		xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);  	spin_unlock(&log->l_icloglock); @@ -3908,11 +3952,14 @@ xfs_log_force_umount(  		retval = xlog_state_ioerror(log);  		spin_unlock(&log->l_icloglock);  	} +  	/* -	 * Wake up everybody waiting on xfs_log_force. -	 * Callback all log item committed functions as if the -	 * log writes were completed. +	 * Wake up everybody waiting on xfs_log_force. Wake the CIL push first +	 * as if the log writes were completed. The abort handling in the log +	 * item committed callback functions will do this again under lock to +	 * avoid races.  	 */ +	wake_up_all(&log->l_cilp->xc_commit_wait);  	xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);  #ifdef XFSERRORDEBUG diff --git a/fs/xfs/xfs_log.h b/fs/xfs/xfs_log.h index 1c458487f00..84e0deb95ab 100644 --- a/fs/xfs/xfs_log.h +++ b/fs/xfs/xfs_log.h @@ -18,20 +18,71 @@  #ifndef	__XFS_LOG_H__  #define __XFS_LOG_H__ -#include "xfs_log_format.h" -  struct xfs_log_vec {  	struct xfs_log_vec	*lv_next;	/* next lv in build list */  	int			lv_niovecs;	/* number of iovecs in lv */  	struct xfs_log_iovec	*lv_iovecp;	/* iovec array */  	struct xfs_log_item	*lv_item;	/* owner */  	char			*lv_buf;	/* formatted buffer */ -	int			lv_buf_len;	/* size of formatted buffer */ +	int			lv_bytes;	/* accounted space in buffer */ +	int			lv_buf_len;	/* aligned size of buffer */  	int			lv_size;	/* size of allocated lv */  };  #define XFS_LOG_VEC_ORDERED	(-1) +static inline void * +xlog_prepare_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, +		uint type) +{ +	struct xfs_log_iovec *vec = *vecp; + +	if (vec) { +		ASSERT(vec - lv->lv_iovecp < lv->lv_niovecs); +		vec++; +	} else { +		vec = &lv->lv_iovecp[0]; +	} + +	vec->i_type = type; +	vec->i_addr = lv->lv_buf + lv->lv_buf_len; + +	ASSERT(IS_ALIGNED((unsigned long)vec->i_addr, sizeof(uint64_t))); + +	*vecp = vec; +	return vec->i_addr; +} + +/* + * We need to make sure the next buffer is naturally aligned for the biggest + * basic data type we put into it.  We already accounted for this padding when + * sizing the buffer. + * + * However, this padding does not get written into the log, and hence we have to + * track the space used by the log vectors separately to prevent log space hangs + * due to inaccurate accounting (i.e. a leak) of the used log space through the + * CIL context ticket. + */ +static inline void +xlog_finish_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec *vec, int len) +{ +	lv->lv_buf_len += round_up(len, sizeof(uint64_t)); +	lv->lv_bytes += len; +	vec->i_len = len; +} + +static inline void * +xlog_copy_iovec(struct xfs_log_vec *lv, struct xfs_log_iovec **vecp, +		uint type, void *data, int len) +{ +	void *buf; + +	buf = xlog_prepare_iovec(lv, vecp, type); +	memcpy(buf, data, len); +	xlog_finish_iovec(lv, *vecp, len); +	return buf; +} +  /*   * Structure used to pass callback function and the function's argument   * to the log manager. @@ -82,11 +133,7 @@ struct xlog_ticket;  struct xfs_log_item;  struct xfs_item_ops;  struct xfs_trans; - -void	xfs_log_item_init(struct xfs_mount	*mp, -			struct xfs_log_item	*item, -			int			type, -			const struct xfs_item_ops *ops); +struct xfs_log_callback;  xfs_lsn_t xfs_log_done(struct xfs_mount *mp,  		       struct xlog_ticket *ticket, @@ -114,7 +161,7 @@ xfs_lsn_t xlog_assign_tail_lsn_locked(struct xfs_mount *mp);  void	  xfs_log_space_wake(struct xfs_mount *mp);  int	  xfs_log_notify(struct xfs_mount	*mp,  			 struct xlog_in_core	*iclog, -			 xfs_log_callback_t	*callback_entry); +			 struct xfs_log_callback *callback_entry);  int	  xfs_log_release_iclog(struct xfs_mount *mp,  			 struct xlog_in_core	 *iclog);  int	  xfs_log_reserve(struct xfs_mount *mp, @@ -135,7 +182,7 @@ void	  xlog_iodone(struct xfs_buf *);  struct xlog_ticket *xfs_log_ticket_get(struct xlog_ticket *ticket);  void	  xfs_log_ticket_put(struct xlog_ticket *ticket); -int	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp, +void	xfs_log_commit_cil(struct xfs_mount *mp, struct xfs_trans *tp,  				xfs_lsn_t *commit_lsn, int flags);  bool	xfs_log_item_in_current_chkpt(struct xfs_log_item *lip); diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index cfe97973ba3..b3425b34e3d 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -17,11 +17,9 @@  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_log_priv.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" @@ -29,6 +27,10 @@  #include "xfs_alloc.h"  #include "xfs_extent_busy.h"  #include "xfs_discard.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" +#include "xfs_log_priv.h"  /*   * Allocate a new ticket. Failing to get a new ticket makes it really hard to @@ -80,36 +82,6 @@ xlog_cil_init_post_recovery(  								log->l_curr_block);  } -STATIC int -xlog_cil_lv_item_format( -	struct xfs_log_item	*lip, -	struct xfs_log_vec	*lv) -{ -	int	index; -	char	*ptr; - -	/* format new vectors into array */ -	lip->li_ops->iop_format(lip, lv->lv_iovecp); - -	/* copy data into existing array */ -	ptr = lv->lv_buf; -	for (index = 0; index < lv->lv_niovecs; index++) { -		struct xfs_log_iovec *vec = &lv->lv_iovecp[index]; - -		memcpy(ptr, vec->i_addr, vec->i_len); -		vec->i_addr = ptr; -		ptr += vec->i_len; -	} - -	/* -	 * some size calculations for log vectors over-estimate, so the caller -	 * doesn't know the amount of space actually used by the item. Return -	 * the byte count to the caller so they can check and store it -	 * appropriately. -	 */ -	return ptr - lv->lv_buf; -} -  /*   * Prepare the log item for insertion into the CIL. Calculate the difference in   * log space and vectors it will consume, and if it is a new item pin it as @@ -125,7 +97,7 @@ xfs_cil_prepare_item(  {  	/* Account for the new LV being passed in */  	if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { -		*diff_len += lv->lv_buf_len; +		*diff_len += lv->lv_bytes;  		*diff_iovecs += lv->lv_niovecs;  	} @@ -139,7 +111,7 @@ xfs_cil_prepare_item(  	else if (old_lv != lv) {  		ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); -		*diff_len -= old_lv->lv_buf_len; +		*diff_len -= old_lv->lv_bytes;  		*diff_iovecs -= old_lv->lv_niovecs;  		kmem_free(old_lv);  	} @@ -230,12 +202,28 @@ xlog_cil_insert_format_items(  			nbytes = 0;  		} +		/* +		 * We 64-bit align the length of each iovec so that the start +		 * of the next one is naturally aligned.  We'll need to +		 * account for that slack space here. Then round nbytes up +		 * to 64-bit alignment so that the initial buffer alignment is +		 * easy to calculate and verify. +		 */ +		nbytes += niovecs * sizeof(uint64_t); +		nbytes = round_up(nbytes, sizeof(uint64_t)); +  		/* grab the old item if it exists for reservation accounting */  		old_lv = lip->li_lv; -		/* calc buffer size */ -		buf_size = sizeof(struct xfs_log_vec) + nbytes + -				niovecs * sizeof(struct xfs_log_iovec); +		/* +		 * The data buffer needs to start 64-bit aligned, so round up +		 * that space to ensure we can align it appropriately and not +		 * overrun the buffer. +		 */ +		buf_size = nbytes + +			   round_up((sizeof(struct xfs_log_vec) + +				     niovecs * sizeof(struct xfs_log_iovec)), +				    sizeof(uint64_t));  		/* compare to existing item size */  		if (lip->li_lv && buf_size <= lip->li_lv->lv_size) { @@ -251,35 +239,31 @@ xlog_cil_insert_format_items(  			 * that the space reservation accounting is correct.  			 */  			*diff_iovecs -= lv->lv_niovecs; -			*diff_len -= lv->lv_buf_len; - -			/* Ensure the lv is set up according to ->iop_size */ -			lv->lv_niovecs = niovecs; -			lv->lv_buf = (char *)lv + buf_size - nbytes; - -			lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); -			goto insert; +			*diff_len -= lv->lv_bytes; +		} else { +			/* allocate new data chunk */ +			lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); +			lv->lv_item = lip; +			lv->lv_size = buf_size; +			if (ordered) { +				/* track as an ordered logvec */ +				ASSERT(lip->li_lv == NULL); +				lv->lv_buf_len = XFS_LOG_VEC_ORDERED; +				goto insert; +			} +			lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];  		} -		/* allocate new data chunk */ -		lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); -		lv->lv_item = lip; -		lv->lv_size = buf_size; +		/* Ensure the lv is set up according to ->iop_size */  		lv->lv_niovecs = niovecs; -		if (ordered) { -			/* track as an ordered logvec */ -			ASSERT(lip->li_lv == NULL); -			lv->lv_buf_len = XFS_LOG_VEC_ORDERED; -			goto insert; -		} - -		/* The allocated iovec region lies beyond the log vector. */ -		lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];  		/* The allocated data region lies beyond the iovec region */ +		lv->lv_buf_len = 0; +		lv->lv_bytes = 0;  		lv->lv_buf = (char *)lv + buf_size - nbytes; +		ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); -		lv->lv_buf_len = xlog_cil_lv_item_format(lip, lv); +		lip->li_ops->iop_format(lip, lv);  insert:  		ASSERT(lv->lv_buf_len <= nbytes);  		xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); @@ -402,7 +386,15 @@ xlog_cil_committed(  	xfs_extent_busy_clear(mp, &ctx->busy_extents,  			     (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); +	/* +	 * If we are aborting the commit, wake up anyone waiting on the +	 * committing list.  If we don't, then a shutdown we can leave processes +	 * waiting in xlog_cil_force_lsn() waiting on a sequence commit that +	 * will never happen because we aborted it. +	 */  	spin_lock(&ctx->cil->xc_push_lock); +	if (abort) +		wake_up_all(&ctx->cil->xc_commit_wait);  	list_del(&ctx->committing);  	spin_unlock(&ctx->cil->xc_push_lock); @@ -516,13 +508,6 @@ xlog_cil_push(  	cil->xc_ctx = new_ctx;  	/* -	 * mirror the new sequence into the cil structure so that we can do -	 * unlocked checks against the current sequence in log forces without -	 * risking deferencing a freed context pointer. -	 */ -	cil->xc_current_sequence = new_ctx->sequence; - -	/*  	 * The switch is now done, so we can drop the context lock and move out  	 * of a shared context. We can't just go straight to the commit record,  	 * though - we need to synchronise with previous and future commits so @@ -540,8 +525,15 @@ xlog_cil_push(  	 * Hence we need to add this context to the committing context list so  	 * that higher sequences will wait for us to write out a commit record  	 * before they do. +	 * +	 * xfs_log_force_lsn requires us to mirror the new sequence into the cil +	 * structure atomically with the addition of this sequence to the +	 * committing list. This also ensures that we can do unlocked checks +	 * against the current sequence in log forces without risking +	 * deferencing a freed context pointer.  	 */  	spin_lock(&cil->xc_push_lock); +	cil->xc_current_sequence = new_ctx->sequence;  	list_add(&ctx->committing, &cil->xc_committing);  	spin_unlock(&cil->xc_push_lock);  	up_write(&cil->xc_ctx_lock); @@ -581,8 +573,18 @@ restart:  	spin_lock(&cil->xc_push_lock);  	list_for_each_entry(new_ctx, &cil->xc_committing, committing) {  		/* +		 * Avoid getting stuck in this loop because we were woken by the +		 * shutdown, but then went back to sleep once already in the +		 * shutdown state. +		 */ +		if (XLOG_FORCED_SHUTDOWN(log)) { +			spin_unlock(&cil->xc_push_lock); +			goto out_abort_free_ticket; +		} + +		/*  		 * Higher sequences will wait for this one so skip them. -		 * Don't wait for own own sequence, either. +		 * Don't wait for our own sequence, either.  		 */  		if (new_ctx->sequence >= ctx->sequence)  			continue; @@ -679,8 +681,14 @@ xlog_cil_push_background(  } +/* + * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence + * number that is passed. When it returns, the work will be queued for + * @push_seq, but it won't be completed. The caller is expected to do any + * waiting for push_seq to complete if it is required. + */  static void -xlog_cil_push_foreground( +xlog_cil_push_now(  	struct xlog	*log,  	xfs_lsn_t	push_seq)  { @@ -705,10 +713,22 @@ xlog_cil_push_foreground(  	}  	cil->xc_push_seq = push_seq; +	queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);  	spin_unlock(&cil->xc_push_lock); +} -	/* do the push now */ -	xlog_cil_push(log); +bool +xlog_cil_empty( +	struct xlog	*log) +{ +	struct xfs_cil	*cil = log->l_cilp; +	bool		empty = false; + +	spin_lock(&cil->xc_push_lock); +	if (list_empty(&cil->xc_cil)) +		empty = true; +	spin_unlock(&cil->xc_push_lock); +	return empty;  }  /* @@ -724,7 +744,7 @@ xlog_cil_push_foreground(   * background commit, returns without it held once background commits are   * allowed again.   */ -int +void  xfs_log_commit_cil(  	struct xfs_mount	*mp,  	struct xfs_trans	*tp, @@ -770,7 +790,6 @@ xfs_log_commit_cil(  	xlog_cil_push_background(log);  	up_read(&cil->xc_ctx_lock); -	return 0;  }  /* @@ -799,7 +818,8 @@ xlog_cil_force_lsn(  	 * xlog_cil_push() handles racing pushes for the same sequence,  	 * so no need to deal with it here.  	 */ -	xlog_cil_push_foreground(log, sequence); +restart: +	xlog_cil_push_now(log, sequence);  	/*  	 * See if we can find a previous sequence still committing. @@ -807,9 +827,15 @@ xlog_cil_force_lsn(  	 * before allowing the force of push_seq to go ahead. Hence block  	 * on commits for those as well.  	 */ -restart:  	spin_lock(&cil->xc_push_lock);  	list_for_each_entry(ctx, &cil->xc_committing, committing) { +		/* +		 * Avoid getting stuck in this loop because we were woken by the +		 * shutdown, but then went back to sleep once already in the +		 * shutdown state. +		 */ +		if (XLOG_FORCED_SHUTDOWN(log)) +			goto out_shutdown;  		if (ctx->sequence > sequence)  			continue;  		if (!ctx->commit_lsn) { @@ -825,8 +851,39 @@ restart:  		/* found it! */  		commit_lsn = ctx->commit_lsn;  	} + +	/* +	 * The call to xlog_cil_push_now() executes the push in the background. +	 * Hence by the time we have got here it our sequence may not have been +	 * pushed yet. This is true if the current sequence still matches the +	 * push sequence after the above wait loop and the CIL still contains +	 * dirty objects. +	 * +	 * When the push occurs, it will empty the CIL and atomically increment +	 * the currect sequence past the push sequence and move it into the +	 * committing list. Of course, if the CIL is clean at the time of the +	 * push, it won't have pushed the CIL at all, so in that case we should +	 * try the push for this sequence again from the start just in case. +	 */ +	if (sequence == cil->xc_current_sequence && +	    !list_empty(&cil->xc_cil)) { +		spin_unlock(&cil->xc_push_lock); +		goto restart; +	} +  	spin_unlock(&cil->xc_push_lock);  	return commit_lsn; + +	/* +	 * We detected a shutdown in progress. We need to trigger the log force +	 * to pass through it's iclog state machine error handling, even though +	 * we are already in a shutdown state. Hence we can't return +	 * NULLCOMMITLSN here as that has special meaning to log forces (i.e. +	 * LSN is already stable), so we return a zero LSN instead. +	 */ +out_shutdown: +	spin_unlock(&cil->xc_push_lock); +	return 0;  }  /* diff --git a/fs/xfs/xfs_log_format.h b/fs/xfs/xfs_log_format.h index ca7e28a8ed3..f0969c77bdb 100644 --- a/fs/xfs/xfs_log_format.h +++ b/fs/xfs/xfs_log_format.h @@ -234,178 +234,6 @@ typedef struct xfs_trans_header {  	{ XFS_LI_ICREATE,	"XFS_LI_ICREATE" }  /* - * Transaction types.  Used to distinguish types of buffers. - */ -#define XFS_TRANS_SETATTR_NOT_SIZE	1 -#define XFS_TRANS_SETATTR_SIZE		2 -#define XFS_TRANS_INACTIVE		3 -#define XFS_TRANS_CREATE		4 -#define XFS_TRANS_CREATE_TRUNC		5 -#define XFS_TRANS_TRUNCATE_FILE		6 -#define XFS_TRANS_REMOVE		7 -#define XFS_TRANS_LINK			8 -#define XFS_TRANS_RENAME		9 -#define XFS_TRANS_MKDIR			10 -#define XFS_TRANS_RMDIR			11 -#define XFS_TRANS_SYMLINK		12 -#define XFS_TRANS_SET_DMATTRS		13 -#define XFS_TRANS_GROWFS		14 -#define XFS_TRANS_STRAT_WRITE		15 -#define XFS_TRANS_DIOSTRAT		16 -/* 17 was XFS_TRANS_WRITE_SYNC */ -#define	XFS_TRANS_WRITEID		18 -#define	XFS_TRANS_ADDAFORK		19 -#define	XFS_TRANS_ATTRINVAL		20 -#define	XFS_TRANS_ATRUNCATE		21 -#define	XFS_TRANS_ATTR_SET		22 -#define	XFS_TRANS_ATTR_RM		23 -#define	XFS_TRANS_ATTR_FLAG		24 -#define	XFS_TRANS_CLEAR_AGI_BUCKET	25 -#define XFS_TRANS_QM_SBCHANGE		26 -/* - * Dummy entries since we use the transaction type to index into the - * trans_type[] in xlog_recover_print_trans_head() - */ -#define XFS_TRANS_DUMMY1		27 -#define XFS_TRANS_DUMMY2		28 -#define XFS_TRANS_QM_QUOTAOFF		29 -#define XFS_TRANS_QM_DQALLOC		30 -#define XFS_TRANS_QM_SETQLIM		31 -#define XFS_TRANS_QM_DQCLUSTER		32 -#define XFS_TRANS_QM_QINOCREATE		33 -#define XFS_TRANS_QM_QUOTAOFF_END	34 -#define XFS_TRANS_SB_UNIT		35 -#define XFS_TRANS_FSYNC_TS		36 -#define	XFS_TRANS_GROWFSRT_ALLOC	37 -#define	XFS_TRANS_GROWFSRT_ZERO		38 -#define	XFS_TRANS_GROWFSRT_FREE		39 -#define	XFS_TRANS_SWAPEXT		40 -#define	XFS_TRANS_SB_COUNT		41 -#define	XFS_TRANS_CHECKPOINT		42 -#define	XFS_TRANS_ICREATE		43 -#define	XFS_TRANS_TYPE_MAX		43 -/* new transaction types need to be reflected in xfs_logprint(8) */ - -#define XFS_TRANS_TYPES \ -	{ XFS_TRANS_SETATTR_NOT_SIZE,	"SETATTR_NOT_SIZE" }, \ -	{ XFS_TRANS_SETATTR_SIZE,	"SETATTR_SIZE" }, \ -	{ XFS_TRANS_INACTIVE,		"INACTIVE" }, \ -	{ XFS_TRANS_CREATE,		"CREATE" }, \ -	{ XFS_TRANS_CREATE_TRUNC,	"CREATE_TRUNC" }, \ -	{ XFS_TRANS_TRUNCATE_FILE,	"TRUNCATE_FILE" }, \ -	{ XFS_TRANS_REMOVE,		"REMOVE" }, \ -	{ XFS_TRANS_LINK,		"LINK" }, \ -	{ XFS_TRANS_RENAME,		"RENAME" }, \ -	{ XFS_TRANS_MKDIR,		"MKDIR" }, \ -	{ XFS_TRANS_RMDIR,		"RMDIR" }, \ -	{ XFS_TRANS_SYMLINK,		"SYMLINK" }, \ -	{ XFS_TRANS_SET_DMATTRS,	"SET_DMATTRS" }, \ -	{ XFS_TRANS_GROWFS,		"GROWFS" }, \ -	{ XFS_TRANS_STRAT_WRITE,	"STRAT_WRITE" }, \ -	{ XFS_TRANS_DIOSTRAT,		"DIOSTRAT" }, \ -	{ XFS_TRANS_WRITEID,		"WRITEID" }, \ -	{ XFS_TRANS_ADDAFORK,		"ADDAFORK" }, \ -	{ XFS_TRANS_ATTRINVAL,		"ATTRINVAL" }, \ -	{ XFS_TRANS_ATRUNCATE,		"ATRUNCATE" }, \ -	{ XFS_TRANS_ATTR_SET,		"ATTR_SET" }, \ -	{ XFS_TRANS_ATTR_RM,		"ATTR_RM" }, \ -	{ XFS_TRANS_ATTR_FLAG,		"ATTR_FLAG" }, \ -	{ XFS_TRANS_CLEAR_AGI_BUCKET,	"CLEAR_AGI_BUCKET" }, \ -	{ XFS_TRANS_QM_SBCHANGE,	"QM_SBCHANGE" }, \ -	{ XFS_TRANS_QM_QUOTAOFF,	"QM_QUOTAOFF" }, \ -	{ XFS_TRANS_QM_DQALLOC,		"QM_DQALLOC" }, \ -	{ XFS_TRANS_QM_SETQLIM,		"QM_SETQLIM" }, \ -	{ XFS_TRANS_QM_DQCLUSTER,	"QM_DQCLUSTER" }, \ -	{ XFS_TRANS_QM_QINOCREATE,	"QM_QINOCREATE" }, \ -	{ XFS_TRANS_QM_QUOTAOFF_END,	"QM_QOFF_END" }, \ -	{ XFS_TRANS_SB_UNIT,		"SB_UNIT" }, \ -	{ XFS_TRANS_FSYNC_TS,		"FSYNC_TS" }, \ -	{ XFS_TRANS_GROWFSRT_ALLOC,	"GROWFSRT_ALLOC" }, \ -	{ XFS_TRANS_GROWFSRT_ZERO,	"GROWFSRT_ZERO" }, \ -	{ XFS_TRANS_GROWFSRT_FREE,	"GROWFSRT_FREE" }, \ -	{ XFS_TRANS_SWAPEXT,		"SWAPEXT" }, \ -	{ XFS_TRANS_SB_COUNT,		"SB_COUNT" }, \ -	{ XFS_TRANS_CHECKPOINT,		"CHECKPOINT" }, \ -	{ XFS_TRANS_DUMMY1,		"DUMMY1" }, \ -	{ XFS_TRANS_DUMMY2,		"DUMMY2" }, \ -	{ XLOG_UNMOUNT_REC_TYPE,	"UNMOUNT" } - -/* - * This structure is used to track log items associated with - * a transaction.  It points to the log item and keeps some - * flags to track the state of the log item.  It also tracks - * the amount of space needed to log the item it describes - * once we get to commit processing (see xfs_trans_commit()). - */ -struct xfs_log_item_desc { -	struct xfs_log_item	*lid_item; -	struct list_head	lid_trans; -	unsigned char		lid_flags; -}; - -#define XFS_LID_DIRTY		0x1 - -/* - * Values for t_flags. - */ -#define	XFS_TRANS_DIRTY		0x01	/* something needs to be logged */ -#define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */ -#define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */ -#define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */ -#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */ -#define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */ -#define XFS_TRANS_FREEZE_PROT	0x40	/* Transaction has elevated writer -					   count in superblock */ - -/* - * Values for call flags parameter. - */ -#define	XFS_TRANS_RELEASE_LOG_RES	0x4 -#define	XFS_TRANS_ABORT			0x8 - -/* - * Field values for xfs_trans_mod_sb. - */ -#define	XFS_TRANS_SB_ICOUNT		0x00000001 -#define	XFS_TRANS_SB_IFREE		0x00000002 -#define	XFS_TRANS_SB_FDBLOCKS		0x00000004 -#define	XFS_TRANS_SB_RES_FDBLOCKS	0x00000008 -#define	XFS_TRANS_SB_FREXTENTS		0x00000010 -#define	XFS_TRANS_SB_RES_FREXTENTS	0x00000020 -#define	XFS_TRANS_SB_DBLOCKS		0x00000040 -#define	XFS_TRANS_SB_AGCOUNT		0x00000080 -#define	XFS_TRANS_SB_IMAXPCT		0x00000100 -#define	XFS_TRANS_SB_REXTSIZE		0x00000200 -#define	XFS_TRANS_SB_RBMBLOCKS		0x00000400 -#define	XFS_TRANS_SB_RBLOCKS		0x00000800 -#define	XFS_TRANS_SB_REXTENTS		0x00001000 -#define	XFS_TRANS_SB_REXTSLOG		0x00002000 - -/* - * Here we centralize the specification of XFS meta-data buffer - * reference count values.  This determine how hard the buffer - * cache tries to hold onto the buffer. - */ -#define	XFS_AGF_REF		4 -#define	XFS_AGI_REF		4 -#define	XFS_AGFL_REF		3 -#define	XFS_INO_BTREE_REF	3 -#define	XFS_ALLOC_BTREE_REF	2 -#define	XFS_BMAP_BTREE_REF	2 -#define	XFS_DIR_BTREE_REF	2 -#define	XFS_INO_REF		2 -#define	XFS_ATTR_BTREE_REF	1 -#define	XFS_DQUOT_REF		1 - -/* - * Flags for xfs_trans_ichgtime(). - */ -#define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */ -#define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */ -#define	XFS_ICHGTIME_CREATE	0x4	/* inode create timestamp */ - - -/*   * Inode Log Item Format definitions.   *   * This is the structure used to lay out an inode log item in the @@ -797,7 +625,6 @@ typedef struct xfs_qoff_logformat {  	char			qf_pad[12];	/* padding for future */  } xfs_qoff_logformat_t; -  /*   * Disk quotas status in m_qflags, and also sb_qflags. 16 bits.   */ @@ -849,8 +676,4 @@ struct xfs_icreate_log {  	__be32		icl_gen;	/* inode generation number to use */  }; -int	xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); -int	xfs_log_calc_minimum_size(struct xfs_mount *); - -  #endif /* __XFS_LOG_FORMAT_H__ */ diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h index 136654b9400..9bc403a9e54 100644 --- a/fs/xfs/xfs_log_priv.h +++ b/fs/xfs/xfs_log_priv.h @@ -22,6 +22,7 @@ struct xfs_buf;  struct xlog;  struct xlog_ticket;  struct xfs_mount; +struct xfs_log_callback;  /*   * Flags for log structure @@ -227,8 +228,8 @@ typedef struct xlog_in_core {  	/* Callback structures need their own cacheline */  	spinlock_t		ic_callback_lock ____cacheline_aligned_in_smp; -	xfs_log_callback_t	*ic_callback; -	xfs_log_callback_t	**ic_callback_tail; +	struct xfs_log_callback	*ic_callback; +	struct xfs_log_callback	**ic_callback_tail;  	/* reference counts need their own cacheline */  	atomic_t		ic_refcnt ____cacheline_aligned_in_smp; @@ -254,7 +255,7 @@ struct xfs_cil_ctx {  	int			space_used;	/* aggregate size of regions */  	struct list_head	busy_extents;	/* busy extents in chkpt */  	struct xfs_log_vec	*lv_chain;	/* logvecs being pushed */ -	xfs_log_callback_t	log_cb;		/* completion callback hook. */ +	struct xfs_log_callback	log_cb;		/* completion callback hook. */  	struct list_head	committing;	/* ctx committing list */  }; @@ -514,12 +515,10 @@ xlog_assign_grant_head(atomic64_t *head, int cycle, int space)  /*   * Committed Item List interfaces   */ -int -xlog_cil_init(struct xlog *log); -void -xlog_cil_init_post_recovery(struct xlog *log); -void -xlog_cil_destroy(struct xlog *log); +int	xlog_cil_init(struct xlog *log); +void	xlog_cil_init_post_recovery(struct xlog *log); +void	xlog_cil_destroy(struct xlog *log); +bool	xlog_cil_empty(struct xlog *log);  /*   * CIL force routines diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index cc179878fe4..981af0f6504 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -17,42 +17,34 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h"  #include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h"  #include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_alloc.h" -#include "xfs_ialloc.h" +#include "xfs_trans.h" +#include "xfs_log.h"  #include "xfs_log_priv.h" -#include "xfs_buf_item.h"  #include "xfs_log_recover.h" +#include "xfs_inode_item.h"  #include "xfs_extfree_item.h"  #include "xfs_trans_priv.h" +#include "xfs_alloc.h" +#include "xfs_ialloc.h"  #include "xfs_quota.h"  #include "xfs_cksum.h"  #include "xfs_trace.h"  #include "xfs_icache.h" -#include "xfs_icreate_item.h" - -/* Need all the magic numbers and buffer ops structures from these headers */ -#include "xfs_symlink.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" +#include "xfs_bmap_btree.h" +#include "xfs_dinode.h" +#include "xfs_error.h"  #include "xfs_dir2.h" -#include "xfs_attr_leaf.h" -#include "xfs_attr_remote.h"  #define BLK_AVG(blk1, blk2)	((blk1+blk2) >> 1) @@ -201,7 +193,10 @@ xlog_bread_noalign(  	bp->b_io_length = nbblks;  	bp->b_error = 0; -	xfsbdstrat(log->l_mp, bp); +	if (XFS_FORCED_SHUTDOWN(log->l_mp)) +		return XFS_ERROR(EIO); + +	xfs_buf_iorequest(bp);  	error = xfs_buf_iowait(bp);  	if (error)  		xfs_buf_ioerror_alert(bp, __func__); @@ -305,9 +300,9 @@ xlog_header_check_dump(  	xfs_mount_t		*mp,  	xlog_rec_header_t	*head)  { -	xfs_debug(mp, "%s:  SB : uuid = %pU, fmt = %d\n", +	xfs_debug(mp, "%s:  SB : uuid = %pU, fmt = %d",  		__func__, &mp->m_sb.sb_uuid, XLOG_FMT); -	xfs_debug(mp, "    log : uuid = %pU, fmt = %d\n", +	xfs_debug(mp, "    log : uuid = %pU, fmt = %d",  		&head->h_fs_uuid, be32_to_cpu(head->h_fmt));  }  #else @@ -1585,6 +1580,7 @@ xlog_recover_add_to_trans(  		"bad number of regions (%d) in inode log format",  				  in_f->ilf_size);  			ASSERT(0); +			kmem_free(ptr);  			return XFS_ERROR(EIO);  		} @@ -1658,6 +1654,7 @@ xlog_recover_reorder_trans(  	int			pass)  {  	xlog_recover_item_t	*item, *n; +	int			error = 0;  	LIST_HEAD(sort_list);  	LIST_HEAD(cancel_list);  	LIST_HEAD(buffer_list); @@ -1699,9 +1696,17 @@ xlog_recover_reorder_trans(  				"%s: unrecognized type of log operation",  				__func__);  			ASSERT(0); -			return XFS_ERROR(EIO); +			/* +			 * return the remaining items back to the transaction +			 * item list so they can be freed in caller. +			 */ +			if (!list_empty(&sort_list)) +				list_splice_init(&sort_list, &trans->r_itemq); +			error = XFS_ERROR(EIO); +			goto out;  		}  	} +out:  	ASSERT(list_empty(&sort_list));  	if (!list_empty(&buffer_list))  		list_splice(&buffer_list, &trans->r_itemq); @@ -1711,7 +1716,7 @@ xlog_recover_reorder_trans(  		list_splice_tail(&inode_buffer_list, &trans->r_itemq);  	if (!list_empty(&cancel_list))  		list_splice_tail(&cancel_list, &trans->r_itemq); -	return 0; +	return error;  }  /* @@ -2133,7 +2138,9 @@ xlog_recover_validate_buf_type(  			bp->b_ops = &xfs_allocbt_buf_ops;  			break;  		case XFS_IBT_CRC_MAGIC: +		case XFS_FIBT_CRC_MAGIC:  		case XFS_IBT_MAGIC: +		case XFS_FIBT_MAGIC:  			bp->b_ops = &xfs_inobt_buf_ops;  			break;  		case XFS_BMAP_CRC_MAGIC: @@ -2361,7 +2368,7 @@ xlog_recover_do_reg_buffer(  					item->ri_buf[i].i_len, __func__);  				goto next;  			} -			error = xfs_qm_dqcheck(mp, item->ri_buf[i].i_addr, +			error = xfs_dqcheck(mp, item->ri_buf[i].i_addr,  					       -1, 0, XFS_QMOPT_DOWARN,  					       "dquot_buf_recover");  			if (error) @@ -2393,133 +2400,6 @@ xlog_recover_do_reg_buffer(  }  /* - * Do some primitive error checking on ondisk dquot data structures. - */ -int -xfs_qm_dqcheck( -	struct xfs_mount *mp, -	xfs_disk_dquot_t *ddq, -	xfs_dqid_t	 id, -	uint		 type,	  /* used only when IO_dorepair is true */ -	uint		 flags, -	char		 *str) -{ -	xfs_dqblk_t	 *d = (xfs_dqblk_t *)ddq; -	int		errs = 0; - -	/* -	 * We can encounter an uninitialized dquot buffer for 2 reasons: -	 * 1. If we crash while deleting the quotainode(s), and those blks got -	 *    used for user data. This is because we take the path of regular -	 *    file deletion; however, the size field of quotainodes is never -	 *    updated, so all the tricks that we play in itruncate_finish -	 *    don't quite matter. -	 * -	 * 2. We don't play the quota buffers when there's a quotaoff logitem. -	 *    But the allocation will be replayed so we'll end up with an -	 *    uninitialized quota block. -	 * -	 * This is all fine; things are still consistent, and we haven't lost -	 * any quota information. Just don't complain about bad dquot blks. -	 */ -	if (ddq->d_magic != cpu_to_be16(XFS_DQUOT_MAGIC)) { -		if (flags & XFS_QMOPT_DOWARN) -			xfs_alert(mp, -			"%s : XFS dquot ID 0x%x, magic 0x%x != 0x%x", -			str, id, be16_to_cpu(ddq->d_magic), XFS_DQUOT_MAGIC); -		errs++; -	} -	if (ddq->d_version != XFS_DQUOT_VERSION) { -		if (flags & XFS_QMOPT_DOWARN) -			xfs_alert(mp, -			"%s : XFS dquot ID 0x%x, version 0x%x != 0x%x", -			str, id, ddq->d_version, XFS_DQUOT_VERSION); -		errs++; -	} - -	if (ddq->d_flags != XFS_DQ_USER && -	    ddq->d_flags != XFS_DQ_PROJ && -	    ddq->d_flags != XFS_DQ_GROUP) { -		if (flags & XFS_QMOPT_DOWARN) -			xfs_alert(mp, -			"%s : XFS dquot ID 0x%x, unknown flags 0x%x", -			str, id, ddq->d_flags); -		errs++; -	} - -	if (id != -1 && id != be32_to_cpu(ddq->d_id)) { -		if (flags & XFS_QMOPT_DOWARN) -			xfs_alert(mp, -			"%s : ondisk-dquot 0x%p, ID mismatch: " -			"0x%x expected, found id 0x%x", -			str, ddq, id, be32_to_cpu(ddq->d_id)); -		errs++; -	} - -	if (!errs && ddq->d_id) { -		if (ddq->d_blk_softlimit && -		    be64_to_cpu(ddq->d_bcount) > -				be64_to_cpu(ddq->d_blk_softlimit)) { -			if (!ddq->d_btimer) { -				if (flags & XFS_QMOPT_DOWARN) -					xfs_alert(mp, -			"%s : Dquot ID 0x%x (0x%p) BLK TIMER NOT STARTED", -					str, (int)be32_to_cpu(ddq->d_id), ddq); -				errs++; -			} -		} -		if (ddq->d_ino_softlimit && -		    be64_to_cpu(ddq->d_icount) > -				be64_to_cpu(ddq->d_ino_softlimit)) { -			if (!ddq->d_itimer) { -				if (flags & XFS_QMOPT_DOWARN) -					xfs_alert(mp, -			"%s : Dquot ID 0x%x (0x%p) INODE TIMER NOT STARTED", -					str, (int)be32_to_cpu(ddq->d_id), ddq); -				errs++; -			} -		} -		if (ddq->d_rtb_softlimit && -		    be64_to_cpu(ddq->d_rtbcount) > -				be64_to_cpu(ddq->d_rtb_softlimit)) { -			if (!ddq->d_rtbtimer) { -				if (flags & XFS_QMOPT_DOWARN) -					xfs_alert(mp, -			"%s : Dquot ID 0x%x (0x%p) RTBLK TIMER NOT STARTED", -					str, (int)be32_to_cpu(ddq->d_id), ddq); -				errs++; -			} -		} -	} - -	if (!errs || !(flags & XFS_QMOPT_DQREPAIR)) -		return errs; - -	if (flags & XFS_QMOPT_DOWARN) -		xfs_notice(mp, "Re-initializing dquot ID 0x%x", id); - -	/* -	 * Typically, a repair is only requested by quotacheck. -	 */ -	ASSERT(id != -1); -	ASSERT(flags & XFS_QMOPT_DQREPAIR); -	memset(d, 0, sizeof(xfs_dqblk_t)); - -	d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); -	d->dd_diskdq.d_version = XFS_DQUOT_VERSION; -	d->dd_diskdq.d_flags = type; -	d->dd_diskdq.d_id = cpu_to_be32(id); - -	if (xfs_sb_version_hascrc(&mp->m_sb)) { -		uuid_copy(&d->dd_uuid, &mp->m_sb.sb_uuid); -		xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), -				 XFS_DQUOT_CRC_OFF); -	} - -	return errs; -} - -/*   * Perform a dquot buffer recovery.   * Simple algorithm: if we have found a QUOTAOFF log item of the same type   * (ie. USR or GRP), then just toss this buffer away; don't recover it. @@ -2648,19 +2528,19 @@ xlog_recover_buffer_pass2(  	 *  	 * Also make sure that only inode buffers with good sizes stay in  	 * the buffer cache.  The kernel moves inodes in buffers of 1 block -	 * or XFS_INODE_CLUSTER_SIZE bytes, whichever is bigger.  The inode +	 * or mp->m_inode_cluster_size bytes, whichever is bigger.  The inode  	 * buffers in the log can be a different size if the log was generated  	 * by an older kernel using unclustered inode buffers or a newer kernel  	 * running with a different inode cluster size.  Regardless, if the -	 * the inode buffer size isn't MAX(blocksize, XFS_INODE_CLUSTER_SIZE) -	 * for *our* value of XFS_INODE_CLUSTER_SIZE, then we need to keep +	 * the inode buffer size isn't MAX(blocksize, mp->m_inode_cluster_size) +	 * for *our* value of mp->m_inode_cluster_size, then we need to keep  	 * the buffer out of the buffer cache so that the buffer won't  	 * overlap with future reads of those inodes.  	 */  	if (XFS_DINODE_MAGIC ==  	    be16_to_cpu(*((__be16 *)xfs_buf_offset(bp, 0))) &&  	    (BBTOB(bp->b_io_length) != MAX(log->l_mp->m_sb.sb_blocksize, -			(__uint32_t)XFS_INODE_CLUSTER_SIZE(log->l_mp)))) { +			(__uint32_t)log->l_mp->m_inode_cluster_size))) {  		xfs_buf_stale(bp);  		error = xfs_bwrite(bp);  	} else { @@ -3124,7 +3004,7 @@ xlog_recover_dquot_pass2(  	 */  	dq_f = item->ri_buf[0].i_addr;  	ASSERT(dq_f); -	error = xfs_qm_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, +	error = xfs_dqcheck(mp, recddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,  			   "xlog_recover_dquot_pass2 (log copy)");  	if (error)  		return XFS_ERROR(EIO); @@ -3144,7 +3024,7 @@ xlog_recover_dquot_pass2(  	 * was among a chunk of dquots created earlier, and we did some  	 * minimal initialization then.  	 */ -	error = xfs_qm_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN, +	error = xfs_dqcheck(mp, ddq, dq_f->qlf_id, 0, XFS_QMOPT_DOWARN,  			   "xlog_recover_dquot_pass2");  	if (error) {  		xfs_buf_relse(bp); @@ -3267,7 +3147,7 @@ xlog_recover_efd_pass2(  		}  		lip = xfs_trans_ail_cursor_next(ailp, &cur);  	} -	xfs_trans_ail_cursor_done(ailp, &cur); +	xfs_trans_ail_cursor_done(&cur);  	spin_unlock(&ailp->xa_lock);  	return 0; @@ -3333,10 +3213,10 @@ xlog_recover_do_icreate_pass2(  	}  	/* existing allocation is fixed value */ -	ASSERT(count == XFS_IALLOC_INODES(mp)); -	ASSERT(length == XFS_IALLOC_BLOCKS(mp)); -	if (count != XFS_IALLOC_INODES(mp) || -	     length != XFS_IALLOC_BLOCKS(mp)) { +	ASSERT(count == mp->m_ialloc_inos); +	ASSERT(length == mp->m_ialloc_blks); +	if (count != mp->m_ialloc_inos || +	     length != mp->m_ialloc_blks) {  		xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count 2");  		return EINVAL;  	} @@ -3642,8 +3522,7 @@ out:  STATIC int  xlog_recover_unmount_trans( -	struct xlog		*log, -	struct xlog_recover	*trans) +	struct xlog		*log)  {  	/* Do nothing now */  	xfs_warn(log->l_mp, "%s: Unmount LR", __func__); @@ -3717,7 +3596,7 @@ xlog_recover_process_data(  								trans, pass);  				break;  			case XLOG_UNMOUNT_TRANS: -				error = xlog_recover_unmount_trans(log, trans); +				error = xlog_recover_unmount_trans(log);  				break;  			case XLOG_WAS_CONT_TRANS:  				error = xlog_recover_add_to_cont_trans(log, @@ -3742,8 +3621,10 @@ xlog_recover_process_data(  				error = XFS_ERROR(EIO);  				break;  			} -			if (error) +			if (error) { +				xlog_recover_free_trans(trans);  				return error; +			}  		}  		dp += be32_to_cpu(ohead->oh_len);  		num_logops--; @@ -3877,7 +3758,7 @@ xlog_recover_process_efis(  		lip = xfs_trans_ail_cursor_next(ailp, &cur);  	}  out: -	xfs_trans_ail_cursor_done(ailp, &cur); +	xfs_trans_ail_cursor_done(&cur);  	spin_unlock(&ailp->xa_lock);  	return error;  } @@ -4076,7 +3957,7 @@ xlog_unpack_data_crc(  	if (crc != rhead->h_crc) {  		if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) {  			xfs_alert(log->l_mp, -		"log record CRC mismatch: found 0x%x, expected 0x%x.\n", +		"log record CRC mismatch: found 0x%x, expected 0x%x.",  					le32_to_cpu(rhead->h_crc),  					le32_to_cpu(crc));  			xfs_hex_dump(dp, 32); @@ -4531,7 +4412,13 @@ xlog_do_recover(  	XFS_BUF_READ(bp);  	XFS_BUF_UNASYNC(bp);  	bp->b_ops = &xfs_sb_buf_ops; -	xfsbdstrat(log->l_mp, bp); + +	if (XFS_FORCED_SHUTDOWN(log->l_mp)) { +		xfs_buf_relse(bp); +		return XFS_ERROR(EIO); +	} + +	xfs_buf_iorequest(bp);  	error = xfs_buf_iowait(bp);  	if (error) {  		xfs_buf_ioerror_alert(bp, __func__); diff --git a/fs/xfs/xfs_log_rlimit.c b/fs/xfs/xfs_log_rlimit.c index bbcec0bbc12..ee7e0e80246 100644 --- a/fs/xfs/xfs_log_rlimit.c +++ b/fs/xfs/xfs_log_rlimit.c @@ -17,16 +17,19 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_ag.h"  #include "xfs_sb.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_trans_space.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h"  #include "xfs_da_btree.h"  #include "xfs_attr_leaf.h" +#include "xfs_bmap_btree.h"  /*   * Calculate the maximum length in bytes that would be required for a local @@ -39,7 +42,7 @@ xfs_log_calc_max_attrsetm_res(  	int			size;  	int			nblks; -	size = xfs_attr_leaf_entsize_local_max(mp->m_sb.sb_blocksize) - +	size = xfs_attr_leaf_entsize_local_max(mp->m_attr_geo->blksize) -  	       MAXNAMELEN - 1;  	nblks = XFS_DAENTER_SPACE_RES(mp, XFS_ATTR_FORK);  	nblks += XFS_B_TO_FSB(mp, size); diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 9163dc14053..63ca2f0420b 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -17,9 +17,8 @@  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5dcc68019d1..3507cd0ec40 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -17,35 +17,31 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h"  #include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h"  #include "xfs_inode.h" -#include "xfs_btree.h" +#include "xfs_dir2.h"  #include "xfs_ialloc.h"  #include "xfs_alloc.h"  #include "xfs_rtalloc.h"  #include "xfs_bmap.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h"  #include "xfs_error.h"  #include "xfs_quota.h"  #include "xfs_fsops.h"  #include "xfs_trace.h"  #include "xfs_icache.h" -#include "xfs_cksum.h" -#include "xfs_buf_item.h" +#include "xfs_dinode.h"  #ifdef HAVE_PERCPU_SB @@ -286,22 +282,29 @@ xfs_readsb(  	struct xfs_sb	*sbp = &mp->m_sb;  	int		error;  	int		loud = !(flags & XFS_MFSI_QUIET); +	const struct xfs_buf_ops *buf_ops;  	ASSERT(mp->m_sb_bp == NULL);  	ASSERT(mp->m_ddev_targp != NULL);  	/* +	 * For the initial read, we must guess at the sector +	 * size based on the block device.  It's enough to +	 * get the sb_sectsize out of the superblock and +	 * then reread with the proper length. +	 * We don't verify it yet, because it may not be complete. +	 */ +	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); +	buf_ops = NULL; + +	/*  	 * Allocate a (locked) buffer to hold the superblock.  	 * This will be kept around at all times to optimize  	 * access to the superblock.  	 */ -	sector_size = xfs_getsize_buftarg(mp->m_ddev_targp); -  reread:  	bp = xfs_buf_read_uncached(mp->m_ddev_targp, XFS_SB_DADDR, -				   BTOBB(sector_size), 0, -				   loud ? &xfs_sb_buf_ops -				        : &xfs_sb_quiet_buf_ops); +				   BTOBB(sector_size), 0, buf_ops);  	if (!bp) {  		if (loud)  			xfs_warn(mp, "SB buffer read failed"); @@ -311,14 +314,28 @@ reread:  		error = bp->b_error;  		if (loud)  			xfs_warn(mp, "SB validate failed with error %d.", error); +		/* bad CRC means corrupted metadata */ +		if (error == EFSBADCRC) +			error = EFSCORRUPTED;  		goto release_buf;  	}  	/*  	 * Initialize the mount structure from the superblock.  	 */ -	xfs_sb_from_disk(&mp->m_sb, XFS_BUF_TO_SBP(bp)); -	xfs_sb_quota_from_disk(&mp->m_sb); +	xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp)); +	xfs_sb_quota_from_disk(sbp); + +	/* +	 * If we haven't validated the superblock, do so now before we try +	 * to check the sector size and reread the superblock appropriately. +	 */ +	if (sbp->sb_magicnum != XFS_SB_MAGIC) { +		if (loud) +			xfs_warn(mp, "Invalid superblock magic number"); +		error = EINVAL; +		goto release_buf; +	}  	/*  	 * We must be able to do sector-sized and sector-aligned IO. @@ -331,13 +348,14 @@ reread:  		goto release_buf;  	} -	/* -	 * If device sector size is smaller than the superblock size, -	 * re-read the superblock so the buffer is correctly sized. -	 */ -	if (sector_size < sbp->sb_sectsize) { +	if (buf_ops == NULL) { +		/* +		 * Re-read the superblock so the buffer is correctly sized, +		 * and properly verified. +		 */  		xfs_buf_relse(bp);  		sector_size = sbp->sb_sectsize; +		buf_ops = loud ? &xfs_sb_buf_ops : &xfs_sb_quiet_buf_ops;  		goto reread;  	} @@ -690,6 +708,12 @@ xfs_mountfs(  			mp->m_update_flags |= XFS_SB_VERSIONNUM;  	} +	/* always use v2 inodes by default now */ +	if (!(mp->m_sb.sb_versionnum & XFS_SB_VERSION_NLINKBIT)) { +		mp->m_sb.sb_versionnum |= XFS_SB_VERSION_NLINKBIT; +		mp->m_update_flags |= XFS_SB_VERSIONNUM; +	} +  	/*  	 * Check if sb_agblocks is aligned at stripe boundary  	 * If sb_agblocks is NOT aligned turn off m_dalign since @@ -723,8 +747,20 @@ xfs_mountfs(  	 * Set the inode cluster size.  	 * This may still be overridden by the file system  	 * block size if it is larger than the chosen cluster size. +	 * +	 * For v5 filesystems, scale the cluster size with the inode size to +	 * keep a constant ratio of inode per cluster buffer, but only if mkfs +	 * has set the inode alignment value appropriately for larger cluster +	 * sizes.  	 */  	mp->m_inode_cluster_size = XFS_INODE_BIG_CLUSTER_SIZE; +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		int	new_size = mp->m_inode_cluster_size; + +		new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE; +		if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size)) +			mp->m_inode_cluster_size = new_size; +	}  	/*  	 * Set inode alignment fields @@ -755,12 +791,11 @@ xfs_mountfs(  	mp->m_dmevmask = 0;	/* not persistent; set after each mount */ -	xfs_dir_mount(mp); - -	/* -	 * Initialize the attribute manager's entries. -	 */ -	mp->m_attr_magicpct = (mp->m_sb.sb_blocksize * 37) / 100; +	error = xfs_da_mount(mp); +	if (error) { +		xfs_warn(mp, "Failed dir/attr init: %d", error); +		goto out_remove_uuid; +	}  	/*  	 * Initialize the precomputed transaction reservations values. @@ -775,7 +810,7 @@ xfs_mountfs(  	error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);  	if (error) {  		xfs_warn(mp, "Failed per-ag init: %d", error); -		goto out_remove_uuid; +		goto out_free_dir;  	}  	if (!sbp->sb_logblocks) { @@ -950,6 +985,8 @@ xfs_mountfs(  	xfs_wait_buftarg(mp->m_ddev_targp);   out_free_perag:  	xfs_free_perag(mp); + out_free_dir: +	xfs_da_unmount(mp);   out_remove_uuid:  	xfs_uuid_unmount(mp);   out: @@ -1027,6 +1064,7 @@ xfs_unmountfs(  				"Freespace may not be correct on next mount.");  	xfs_log_unmount(mp); +	xfs_da_unmount(mp);  	xfs_uuid_unmount(mp);  #if defined(DEBUG) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1fa0584b562..7295a0b7c34 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -26,6 +26,8 @@ struct xfs_mru_cache;  struct xfs_nameops;  struct xfs_ail;  struct xfs_quotainfo; +struct xfs_dir_ops; +struct xfs_da_geometry;  #ifdef HAVE_PERCPU_SB @@ -95,6 +97,8 @@ typedef struct xfs_mount {  	uint			m_readio_blocks; /* min read size blocks */  	uint			m_writeio_log;	/* min write size log bytes */  	uint			m_writeio_blocks; /* min write size blocks */ +	struct xfs_da_geometry	*m_dir_geo;	/* directory block geometry */ +	struct xfs_da_geometry	*m_attr_geo;	/* attribute block geometry */  	struct xlog		*m_log;		/* log specific stuff */  	int			m_logbufs;	/* number of log buffers */  	int			m_logbsize;	/* size of each log buffer */ @@ -111,7 +115,7 @@ typedef struct xfs_mount {  	__uint8_t		m_blkbb_log;	/* blocklog - BBSHIFT */  	__uint8_t		m_agno_log;	/* log #ag's */  	__uint8_t		m_agino_log;	/* #bits for agino in inum */ -	__uint16_t		m_inode_cluster_size;/* min inode buf size */ +	uint			m_inode_cluster_size;/* min inode buf size */  	uint			m_blockmask;	/* sb_blocksize-1 */  	uint			m_blockwsize;	/* sb_blocksize in words */  	uint			m_blockwmask;	/* blockwsize-1 */ @@ -130,8 +134,6 @@ typedef struct xfs_mount {  	int			m_fixedfsid[2];	/* unchanged for life of FS */  	uint			m_dmevmask;	/* DMI events for this FS */  	__uint64_t		m_flags;	/* global mount flags */ -	uint			m_dir_node_ents; /* #entries in a dir danode */ -	uint			m_attr_node_ents; /* #entries in attr danode */  	int			m_ialloc_inos;	/* inodes in inode allocation */  	int			m_ialloc_blks;	/* blocks in inode allocation */  	int			m_inoalign_mask;/* mask sb_inoalignmt if used */ @@ -144,15 +146,10 @@ typedef struct xfs_mount {  	int			m_dalign;	/* stripe unit */  	int			m_swidth;	/* stripe width */  	int			m_sinoalign;	/* stripe unit inode alignment */ -	int			m_attr_magicpct;/* 37% of the blocksize */ -	int			m_dir_magicpct;	/* 37% of the dir blocksize */  	__uint8_t		m_sectbb_log;	/* sectlog - BBSHIFT */  	const struct xfs_nameops *m_dirnameops;	/* vector of dir name ops */ -	int			m_dirblksize;	/* directory block sz--bytes */ -	int			m_dirblkfsbs;	/* directory block sz--fsbs */ -	xfs_dablk_t		m_dirdatablk;	/* blockno of dir data v2 */ -	xfs_dablk_t		m_dirleafblk;	/* blockno of dir non-data v2 */ -	xfs_dablk_t		m_dirfreeblk;	/* blockno of dirfreeindex v2 */ +	const struct xfs_dir_ops *m_dir_inode_ops; /* vector of dir inode ops */ +	const struct xfs_dir_ops *m_nondir_inode_ops; /* !dir inode ops */  	uint			m_chsize;	/* size of next field */  	atomic_t		m_active_trans;	/* number trans frozen */  #ifdef HAVE_PERCPU_SB diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c index 4aff5639573..f99b4933dc2 100644 --- a/fs/xfs/xfs_mru_cache.c +++ b/fs/xfs/xfs_mru_cache.c @@ -100,14 +100,20 @@   * likely result in a loop in one of the lists.  That's a sure-fire recipe for   * an infinite loop in the code.   */ -typedef struct xfs_mru_cache_elem -{ -	struct list_head list_node; -	unsigned long	key; -	void		*value; -} xfs_mru_cache_elem_t; +struct xfs_mru_cache { +	struct radix_tree_root	store;     /* Core storage data structure.  */ +	struct list_head	*lists;    /* Array of lists, one per grp.  */ +	struct list_head	reap_list; /* Elements overdue for reaping. */ +	spinlock_t		lock;      /* Lock to protect this struct.  */ +	unsigned int		grp_count; /* Number of discrete groups.    */ +	unsigned int		grp_time;  /* Time period spanned by grps.  */ +	unsigned int		lru_grp;   /* Group containing time zero.   */ +	unsigned long		time_zero; /* Time first element was added. */ +	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ +	struct delayed_work	work;      /* Workqueue data for reaping.   */ +	unsigned int		queued;	   /* work has been queued */ +}; -static kmem_zone_t		*xfs_mru_elem_zone;  static struct workqueue_struct	*xfs_mru_reap_wq;  /* @@ -129,12 +135,12 @@ static struct workqueue_struct	*xfs_mru_reap_wq;   */  STATIC unsigned long  _xfs_mru_cache_migrate( -	xfs_mru_cache_t	*mru, -	unsigned long	now) +	struct xfs_mru_cache	*mru, +	unsigned long		now)  { -	unsigned int	grp; -	unsigned int	migrated = 0; -	struct list_head *lru_list; +	unsigned int		grp; +	unsigned int		migrated = 0; +	struct list_head	*lru_list;  	/* Nothing to do if the data store is empty. */  	if (!mru->time_zero) @@ -193,11 +199,11 @@ _xfs_mru_cache_migrate(   */  STATIC void  _xfs_mru_cache_list_insert( -	xfs_mru_cache_t		*mru, -	xfs_mru_cache_elem_t	*elem) +	struct xfs_mru_cache	*mru, +	struct xfs_mru_cache_elem *elem)  { -	unsigned int	grp = 0; -	unsigned long	now = jiffies; +	unsigned int		grp = 0; +	unsigned long		now = jiffies;  	/*  	 * If the data store is empty, initialise time zero, leave grp set to @@ -231,10 +237,10 @@ _xfs_mru_cache_list_insert(   */  STATIC void  _xfs_mru_cache_clear_reap_list( -	xfs_mru_cache_t		*mru) __releases(mru->lock) __acquires(mru->lock) - +	struct xfs_mru_cache	*mru) +		__releases(mru->lock) __acquires(mru->lock)  { -	xfs_mru_cache_elem_t	*elem, *next; +	struct xfs_mru_cache_elem *elem, *next;  	struct list_head	tmp;  	INIT_LIST_HEAD(&tmp); @@ -252,15 +258,8 @@ _xfs_mru_cache_clear_reap_list(  	spin_unlock(&mru->lock);  	list_for_each_entry_safe(elem, next, &tmp, list_node) { - -		/* Remove the element from the reap list. */  		list_del_init(&elem->list_node); - -		/* Call the client's free function with the key and value pointer. */ -		mru->free_func(elem->key, elem->value); - -		/* Free the element structure. */ -		kmem_zone_free(xfs_mru_elem_zone, elem); +		mru->free_func(elem);  	}  	spin_lock(&mru->lock); @@ -277,7 +276,8 @@ STATIC void  _xfs_mru_cache_reap(  	struct work_struct	*work)  { -	xfs_mru_cache_t		*mru = container_of(work, xfs_mru_cache_t, work.work); +	struct xfs_mru_cache	*mru = +		container_of(work, struct xfs_mru_cache, work.work);  	unsigned long		now, next;  	ASSERT(mru && mru->lists); @@ -304,28 +304,16 @@ _xfs_mru_cache_reap(  int  xfs_mru_cache_init(void)  { -	xfs_mru_elem_zone = kmem_zone_init(sizeof(xfs_mru_cache_elem_t), -	                                 "xfs_mru_cache_elem"); -	if (!xfs_mru_elem_zone) -		goto out; -  	xfs_mru_reap_wq = alloc_workqueue("xfs_mru_cache", WQ_MEM_RECLAIM, 1);  	if (!xfs_mru_reap_wq) -		goto out_destroy_mru_elem_zone; - +		return -ENOMEM;  	return 0; - - out_destroy_mru_elem_zone: -	kmem_zone_destroy(xfs_mru_elem_zone); - out: -	return -ENOMEM;  }  void  xfs_mru_cache_uninit(void)  {  	destroy_workqueue(xfs_mru_reap_wq); -	kmem_zone_destroy(xfs_mru_elem_zone);  }  /* @@ -336,14 +324,14 @@ xfs_mru_cache_uninit(void)   */  int  xfs_mru_cache_create( -	xfs_mru_cache_t		**mrup, +	struct xfs_mru_cache	**mrup,  	unsigned int		lifetime_ms,  	unsigned int		grp_count,  	xfs_mru_cache_free_func_t free_func)  { -	xfs_mru_cache_t	*mru = NULL; -	int		err = 0, grp; -	unsigned int	grp_time; +	struct xfs_mru_cache	*mru = NULL; +	int			err = 0, grp; +	unsigned int		grp_time;  	if (mrup)  		*mrup = NULL; @@ -400,7 +388,7 @@ exit:   */  static void  xfs_mru_cache_flush( -	xfs_mru_cache_t		*mru) +	struct xfs_mru_cache	*mru)  {  	if (!mru || !mru->lists)  		return; @@ -420,7 +408,7 @@ xfs_mru_cache_flush(  void  xfs_mru_cache_destroy( -	xfs_mru_cache_t		*mru) +	struct xfs_mru_cache	*mru)  {  	if (!mru || !mru->lists)  		return; @@ -438,38 +426,30 @@ xfs_mru_cache_destroy(   */  int  xfs_mru_cache_insert( -	xfs_mru_cache_t	*mru, -	unsigned long	key, -	void		*value) +	struct xfs_mru_cache	*mru, +	unsigned long		key, +	struct xfs_mru_cache_elem *elem)  { -	xfs_mru_cache_elem_t *elem; +	int			error;  	ASSERT(mru && mru->lists);  	if (!mru || !mru->lists)  		return EINVAL; -	elem = kmem_zone_zalloc(xfs_mru_elem_zone, KM_SLEEP); -	if (!elem) +	if (radix_tree_preload(GFP_KERNEL))  		return ENOMEM; -	if (radix_tree_preload(GFP_KERNEL)) { -		kmem_zone_free(xfs_mru_elem_zone, elem); -		return ENOMEM; -	} -  	INIT_LIST_HEAD(&elem->list_node);  	elem->key = key; -	elem->value = value;  	spin_lock(&mru->lock); - -	radix_tree_insert(&mru->store, key, elem); +	error = -radix_tree_insert(&mru->store, key, elem);  	radix_tree_preload_end(); -	_xfs_mru_cache_list_insert(mru, elem); - +	if (!error) +		_xfs_mru_cache_list_insert(mru, elem);  	spin_unlock(&mru->lock); -	return 0; +	return error;  }  /* @@ -478,13 +458,12 @@ xfs_mru_cache_insert(   * the client data pointer for the removed element is returned, otherwise this   * function will return a NULL pointer.   */ -void * +struct xfs_mru_cache_elem *  xfs_mru_cache_remove( -	xfs_mru_cache_t	*mru, -	unsigned long	key) +	struct xfs_mru_cache	*mru, +	unsigned long		key)  { -	xfs_mru_cache_elem_t *elem; -	void		*value = NULL; +	struct xfs_mru_cache_elem *elem;  	ASSERT(mru && mru->lists);  	if (!mru || !mru->lists) @@ -492,17 +471,11 @@ xfs_mru_cache_remove(  	spin_lock(&mru->lock);  	elem = radix_tree_delete(&mru->store, key); -	if (elem) { -		value = elem->value; +	if (elem)  		list_del(&elem->list_node); -	} -  	spin_unlock(&mru->lock); -	if (elem) -		kmem_zone_free(xfs_mru_elem_zone, elem); - -	return value; +	return elem;  }  /* @@ -511,13 +484,14 @@ xfs_mru_cache_remove(   */  void  xfs_mru_cache_delete( -	xfs_mru_cache_t	*mru, -	unsigned long	key) +	struct xfs_mru_cache	*mru, +	unsigned long		key)  { -	void		*value = xfs_mru_cache_remove(mru, key); +	struct xfs_mru_cache_elem *elem; -	if (value) -		mru->free_func(key, value); +	elem = xfs_mru_cache_remove(mru, key); +	if (elem) +		mru->free_func(elem);  }  /* @@ -540,12 +514,12 @@ xfs_mru_cache_delete(   * status, we need to help it get it right by annotating the path that does   * not release the lock.   */ -void * +struct xfs_mru_cache_elem *  xfs_mru_cache_lookup( -	xfs_mru_cache_t	*mru, -	unsigned long	key) +	struct xfs_mru_cache	*mru, +	unsigned long		key)  { -	xfs_mru_cache_elem_t *elem; +	struct xfs_mru_cache_elem *elem;  	ASSERT(mru && mru->lists);  	if (!mru || !mru->lists) @@ -560,7 +534,7 @@ xfs_mru_cache_lookup(  	} else  		spin_unlock(&mru->lock); -	return elem ? elem->value : NULL; +	return elem;  }  /* @@ -570,7 +544,8 @@ xfs_mru_cache_lookup(   */  void  xfs_mru_cache_done( -	xfs_mru_cache_t	*mru) __releases(mru->lock) +	struct xfs_mru_cache	*mru) +		__releases(mru->lock)  {  	spin_unlock(&mru->lock);  } diff --git a/fs/xfs/xfs_mru_cache.h b/fs/xfs/xfs_mru_cache.h index 36dd3ec8b4e..fb5245ba5ff 100644 --- a/fs/xfs/xfs_mru_cache.h +++ b/fs/xfs/xfs_mru_cache.h @@ -18,24 +18,15 @@  #ifndef __XFS_MRU_CACHE_H__  #define __XFS_MRU_CACHE_H__ +struct xfs_mru_cache; -/* Function pointer type for callback to free a client's data pointer. */ -typedef void (*xfs_mru_cache_free_func_t)(unsigned long, void*); +struct xfs_mru_cache_elem { +	struct list_head list_node; +	unsigned long	key; +}; -typedef struct xfs_mru_cache -{ -	struct radix_tree_root	store;     /* Core storage data structure.  */ -	struct list_head	*lists;    /* Array of lists, one per grp.  */ -	struct list_head	reap_list; /* Elements overdue for reaping. */ -	spinlock_t		lock;      /* Lock to protect this struct.  */ -	unsigned int		grp_count; /* Number of discrete groups.    */ -	unsigned int		grp_time;  /* Time period spanned by grps.  */ -	unsigned int		lru_grp;   /* Group containing time zero.   */ -	unsigned long		time_zero; /* Time first element was added. */ -	xfs_mru_cache_free_func_t free_func; /* Function pointer for freeing. */ -	struct delayed_work	work;      /* Workqueue data for reaping.   */ -	unsigned int		queued;	   /* work has been queued */ -} xfs_mru_cache_t; +/* Function pointer type for callback to free a client's data pointer. */ +typedef void (*xfs_mru_cache_free_func_t)(struct xfs_mru_cache_elem *elem);  int xfs_mru_cache_init(void);  void xfs_mru_cache_uninit(void); @@ -44,10 +35,12 @@ int xfs_mru_cache_create(struct xfs_mru_cache **mrup, unsigned int lifetime_ms,  			     xfs_mru_cache_free_func_t free_func);  void xfs_mru_cache_destroy(struct xfs_mru_cache *mru);  int xfs_mru_cache_insert(struct xfs_mru_cache *mru, unsigned long key, -				void *value); -void * xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key); +		struct xfs_mru_cache_elem *elem); +struct xfs_mru_cache_elem * +xfs_mru_cache_remove(struct xfs_mru_cache *mru, unsigned long key);  void xfs_mru_cache_delete(struct xfs_mru_cache *mru, unsigned long key); -void *xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key); +struct xfs_mru_cache_elem * +xfs_mru_cache_lookup(struct xfs_mru_cache *mru, unsigned long key);  void xfs_mru_cache_done(struct xfs_mru_cache *mru);  #endif /* __XFS_MRU_CACHE_H__ */ diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 3e6c2e6c9cd..6d26759c779 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -17,31 +17,28 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_ialloc.h"  #include "xfs_itable.h" -#include "xfs_rtalloc.h" +#include "xfs_quota.h"  #include "xfs_error.h"  #include "xfs_bmap.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans.h"  #include "xfs_trans_space.h"  #include "xfs_qm.h"  #include "xfs_trace.h"  #include "xfs_icache.h"  #include "xfs_cksum.h" +#include "xfs_dinode.h"  /*   * The global quota manager. There is only one of these for the entire @@ -137,8 +134,6 @@ xfs_qm_dqpurge(  {  	struct xfs_mount	*mp = dqp->q_mount;  	struct xfs_quotainfo	*qi = mp->m_quotainfo; -	struct xfs_dquot	*gdqp = NULL; -	struct xfs_dquot	*pdqp = NULL;  	xfs_dqlock(dqp);  	if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) { @@ -146,21 +141,6 @@ xfs_qm_dqpurge(  		return EAGAIN;  	} -	/* -	 * If this quota has a hint attached, prepare for releasing it now. -	 */ -	gdqp = dqp->q_gdquot; -	if (gdqp) { -		xfs_dqlock(gdqp); -		dqp->q_gdquot = NULL; -	} - -	pdqp = dqp->q_pdquot; -	if (pdqp) { -		xfs_dqlock(pdqp); -		dqp->q_pdquot = NULL; -	} -  	dqp->dq_flags |= XFS_DQ_FREEING;  	xfs_dqflock(dqp); @@ -209,11 +189,6 @@ xfs_qm_dqpurge(  	XFS_STATS_DEC(xs_qm_dquot_unused);  	xfs_qm_dqdestroy(dqp); - -	if (gdqp) -		xfs_qm_dqput(gdqp); -	if (pdqp) -		xfs_qm_dqput(pdqp);  	return 0;  } @@ -383,7 +358,6 @@ xfs_qm_dqattach_one(  	xfs_dqid_t	id,  	uint		type,  	uint		doalloc, -	xfs_dquot_t	*udqhint, /* hint */  	xfs_dquot_t	**IO_idqpp)  {  	xfs_dquot_t	*dqp; @@ -393,9 +367,9 @@ xfs_qm_dqattach_one(  	error = 0;  	/* -	 * See if we already have it in the inode itself. IO_idqpp is -	 * &i_udquot or &i_gdquot. This made the code look weird, but -	 * made the logic a lot simpler. +	 * See if we already have it in the inode itself. IO_idqpp is &i_udquot +	 * or &i_gdquot. This made the code look weird, but made the logic a lot +	 * simpler.  	 */  	dqp = *IO_idqpp;  	if (dqp) { @@ -404,49 +378,10 @@ xfs_qm_dqattach_one(  	}  	/* -	 * udqhint is the i_udquot field in inode, and is non-NULL only -	 * when the type arg is group/project. Its purpose is to save a -	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside -	 * the user dquot. -	 */ -	if (udqhint) { -		ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); -		xfs_dqlock(udqhint); - -		/* -		 * No need to take dqlock to look at the id. -		 * -		 * The ID can't change until it gets reclaimed, and it won't -		 * be reclaimed as long as we have a ref from inode and we -		 * hold the ilock. -		 */ -		if (type == XFS_DQ_GROUP) -			dqp = udqhint->q_gdquot; -		else -			dqp = udqhint->q_pdquot; -		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) { -			ASSERT(*IO_idqpp == NULL); - -			*IO_idqpp = xfs_qm_dqhold(dqp); -			xfs_dqunlock(udqhint); -			return 0; -		} - -		/* -		 * We can't hold a dquot lock when we call the dqget code. -		 * We'll deadlock in no time, because of (not conforming to) -		 * lock ordering - the inodelock comes before any dquot lock, -		 * and we may drop and reacquire the ilock in xfs_qm_dqget(). -		 */ -		xfs_dqunlock(udqhint); -	} - -	/* -	 * Find the dquot from somewhere. This bumps the -	 * reference count of dquot and returns it locked. -	 * This can return ENOENT if dquot didn't exist on -	 * disk and we didn't ask it to allocate; -	 * ESRCH if quotas got turned off suddenly. +	 * Find the dquot from somewhere. This bumps the reference count of +	 * dquot and returns it locked.  This can return ENOENT if dquot didn't +	 * exist on disk and we didn't ask it to allocate; ESRCH if quotas got +	 * turned off suddenly.  	 */  	error = xfs_qm_dqget(ip->i_mount, ip, id, type,  			     doalloc | XFS_QMOPT_DOWARN, &dqp); @@ -464,48 +399,6 @@ xfs_qm_dqattach_one(  	return 0;  } - -/* - * Given a udquot and group/project type, attach the group/project - * dquot pointer to the udquot as a hint for future lookups. - */ -STATIC void -xfs_qm_dqattach_hint( -	struct xfs_inode	*ip, -	int			type) -{ -	struct xfs_dquot **dqhintp; -	struct xfs_dquot *dqp; -	struct xfs_dquot *udq = ip->i_udquot; - -	ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ); - -	xfs_dqlock(udq); - -	if (type == XFS_DQ_GROUP) { -		dqp = ip->i_gdquot; -		dqhintp = &udq->q_gdquot; -	} else { -		dqp = ip->i_pdquot; -		dqhintp = &udq->q_pdquot; -	} - -	if (*dqhintp) { -		struct xfs_dquot *tmp; - -		if (*dqhintp == dqp) -			goto done; - -		tmp = *dqhintp; -		*dqhintp = NULL; -		xfs_qm_dqrele(tmp); -	} - -	*dqhintp = xfs_qm_dqhold(dqp); -done: -	xfs_dqunlock(udq); -} -  static bool  xfs_qm_need_dqattach(  	struct xfs_inode	*ip) @@ -536,7 +429,6 @@ xfs_qm_dqattach_locked(  	uint		flags)  {  	xfs_mount_t	*mp = ip->i_mount; -	uint		nquotas = 0;  	int		error = 0;  	if (!xfs_qm_need_dqattach(ip)) @@ -544,77 +436,39 @@ xfs_qm_dqattach_locked(  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	if (XFS_IS_UQUOTA_ON(mp)) { +	if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {  		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,  						flags & XFS_QMOPT_DQALLOC, -						NULL, &ip->i_udquot); +						&ip->i_udquot);  		if (error)  			goto done; -		nquotas++; +		ASSERT(ip->i_udquot);  	} -	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	if (XFS_IS_GQUOTA_ON(mp)) { +	if (XFS_IS_GQUOTA_ON(mp) && !ip->i_gdquot) {  		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,  						flags & XFS_QMOPT_DQALLOC, -						ip->i_udquot, &ip->i_gdquot); -		/* -		 * Don't worry about the udquot that we may have -		 * attached above. It'll get detached, if not already. -		 */ +						&ip->i_gdquot);  		if (error)  			goto done; -		nquotas++; +		ASSERT(ip->i_gdquot);  	} -	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -	if (XFS_IS_PQUOTA_ON(mp)) { +	if (XFS_IS_PQUOTA_ON(mp) && !ip->i_pdquot) {  		error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,  						flags & XFS_QMOPT_DQALLOC, -						ip->i_udquot, &ip->i_pdquot); -		/* -		 * Don't worry about the udquot that we may have -		 * attached above. It'll get detached, if not already. -		 */ +						&ip->i_pdquot);  		if (error)  			goto done; -		nquotas++; +		ASSERT(ip->i_pdquot);  	} +done:  	/* -	 * Attach this group/project quota to the user quota as a hint. -	 * This WON'T, in general, result in a thrash. +	 * Don't worry about the dquots that we may have attached before any +	 * error - they'll get detached later if it has not already been done.  	 */ -	if (nquotas > 1 && ip->i_udquot) { -		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -		ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp)); -		ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp)); - -		/* -		 * We do not have i_udquot locked at this point, but this check -		 * is OK since we don't depend on the i_gdquot to be accurate -		 * 100% all the time. It is just a hint, and this will -		 * succeed in general. -		 */ -		if (ip->i_udquot->q_gdquot != ip->i_gdquot) -			xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP); - -		if (ip->i_udquot->q_pdquot != ip->i_pdquot) -			xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ); -	} - - done: -#ifdef DEBUG -	if (!error) { -		if (XFS_IS_UQUOTA_ON(mp)) -			ASSERT(ip->i_udquot); -		if (XFS_IS_GQUOTA_ON(mp)) -			ASSERT(ip->i_gdquot); -		if (XFS_IS_PQUOTA_ON(mp)) -			ASSERT(ip->i_pdquot); -	}  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -#endif  	return error;  } @@ -664,20 +518,6 @@ xfs_qm_dqdetach(  	}  } -int -xfs_qm_calc_dquots_per_chunk( -	struct xfs_mount	*mp, -	unsigned int		nbblks)	/* basic block units */ -{ -	unsigned int	ndquots; - -	ASSERT(nbblks > 0); -	ndquots = BBTOB(nbblks); -	do_div(ndquots, sizeof(xfs_dqblk_t)); - -	return ndquots; -} -  struct xfs_qm_isolate {  	struct list_head	buffers;  	struct list_head	dispose; @@ -831,22 +671,17 @@ xfs_qm_init_quotainfo(  	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP); -	if ((error = list_lru_init(&qinf->qi_lru))) { -		kmem_free(qinf); -		mp->m_quotainfo = NULL; -		return error; -	} +	error = -list_lru_init(&qinf->qi_lru); +	if (error) +		goto out_free_qinf;  	/*  	 * See if quotainodes are setup, and if not, allocate them,  	 * and change the superblock accordingly.  	 */ -	if ((error = xfs_qm_init_quotainos(mp))) { -		list_lru_destroy(&qinf->qi_lru); -		kmem_free(qinf); -		mp->m_quotainfo = NULL; -		return error; -	} +	error = xfs_qm_init_quotainos(mp); +	if (error) +		goto out_free_lru;  	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);  	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS); @@ -858,8 +693,7 @@ xfs_qm_init_quotainfo(  	/* Precalc some constants */  	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB); -	qinf->qi_dqperchunk = xfs_qm_calc_dquots_per_chunk(mp, -							qinf->qi_dqchunklen); +	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(qinf->qi_dqchunklen);  	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD); @@ -906,7 +740,7 @@ xfs_qm_init_quotainfo(  		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);  		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);  		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); -  +  		xfs_qm_dqdestroy(dqp);  	} else {  		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; @@ -923,6 +757,13 @@ xfs_qm_init_quotainfo(  	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;  	register_shrinker(&qinf->qi_shrinker);  	return 0; + +out_free_lru: +	list_lru_destroy(&qinf->qi_lru); +out_free_qinf: +	kmem_free(qinf); +	mp->m_quotainfo = NULL; +	return error;  } @@ -1092,10 +933,10 @@ xfs_qm_reset_dqcounts(  		/*  		 * Do a sanity check, and if needed, repair the dqblk. Don't  		 * output any warnings because it's perfectly possible to -		 * find uninitialised dquot blks. See comment in xfs_qm_dqcheck. +		 * find uninitialised dquot blks. See comment in xfs_dqcheck.  		 */ -		(void) xfs_qm_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, -				      "xfs_quotacheck"); +		xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR, +			    "xfs_quotacheck");  		ddq->d_bcount = 0;  		ddq->d_icount = 0;  		ddq->d_rtbcount = 0; @@ -1210,16 +1051,18 @@ xfs_qm_dqiterate(  	lblkno = 0;  	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);  	do { +		uint		lock_mode; +  		nmaps = XFS_DQITER_MAP_SIZE;  		/*  		 * We aren't changing the inode itself. Just changing  		 * some of its data. No new blocks are added here, and  		 * the inode is never added to the transaction.  		 */ -		xfs_ilock(qip, XFS_ILOCK_SHARED); +		lock_mode = xfs_ilock_data_map_shared(qip);  		error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,  				       map, &nmaps, 0); -		xfs_iunlock(qip, XFS_ILOCK_SHARED); +		xfs_iunlock(qip, lock_mode);  		if (error)  			break; @@ -2099,24 +1942,21 @@ xfs_qm_vop_create_dqattach(  	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));  	ASSERT(XFS_IS_QUOTA_RUNNING(mp)); -	if (udqp) { +	if (udqp && XFS_IS_UQUOTA_ON(mp)) {  		ASSERT(ip->i_udquot == NULL); -		ASSERT(XFS_IS_UQUOTA_ON(mp));  		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));  		ip->i_udquot = xfs_qm_dqhold(udqp);  		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);  	} -	if (gdqp) { +	if (gdqp && XFS_IS_GQUOTA_ON(mp)) {  		ASSERT(ip->i_gdquot == NULL); -		ASSERT(XFS_IS_GQUOTA_ON(mp));  		ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));  		ip->i_gdquot = xfs_qm_dqhold(gdqp);  		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);  	} -	if (pdqp) { +	if (pdqp && XFS_IS_PQUOTA_ON(mp)) {  		ASSERT(ip->i_pdquot == NULL); -		ASSERT(XFS_IS_PQUOTA_ON(mp));  		ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));  		ip->i_pdquot = xfs_qm_dqhold(pdqp); diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 2b602df9c24..797fd463627 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -20,13 +20,29 @@  #include "xfs_dquot_item.h"  #include "xfs_dquot.h" -#include "xfs_quota_priv.h"  struct xfs_inode;  extern struct kmem_zone	*xfs_qm_dqtrxzone;  /* + * Number of bmaps that we ask from bmapi when doing a quotacheck. + * We make this restriction to keep the memory usage to a minimum. + */ +#define XFS_DQITER_MAP_SIZE	10 + +#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ +	!dqp->q_core.d_blk_hardlimit && \ +	!dqp->q_core.d_blk_softlimit && \ +	!dqp->q_core.d_rtb_hardlimit && \ +	!dqp->q_core.d_rtb_softlimit && \ +	!dqp->q_core.d_ino_hardlimit && \ +	!dqp->q_core.d_ino_softlimit && \ +	!dqp->q_core.d_bcount && \ +	!dqp->q_core.d_rtbcount && \ +	!dqp->q_core.d_icount) + +/*   * This defines the unit of allocation of dquots.   * Currently, it is just one file system block, and a 4K blk contains 30   * (136 * 30 = 4080) dquots. It's probably not worth trying to make @@ -103,8 +119,6 @@ xfs_dq_to_quota_inode(struct xfs_dquot *dqp)  	return NULL;  } -extern int	xfs_qm_calc_dquots_per_chunk(struct xfs_mount *mp, -					     unsigned int nbblks);  extern void	xfs_trans_mod_dquot(struct xfs_trans *,  					struct xfs_dquot *, uint, long);  extern int	xfs_trans_reserve_quota_bydquots(struct xfs_trans *, diff --git a/fs/xfs/xfs_qm_bhv.c b/fs/xfs/xfs_qm_bhv.c index 3af50ccdfac..e9be63abd8d 100644 --- a/fs/xfs/xfs_qm_bhv.c +++ b/fs/xfs/xfs_qm_bhv.c @@ -18,21 +18,15 @@  #include "xfs.h"  #include "xfs_fs.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h"  #include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h"  #include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h"  #include "xfs_qm.h" diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 8174aad0b38..bbc813caba4 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -20,24 +20,18 @@  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h" -#include "xfs_inode_item.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_trans.h"  #include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_quota.h"  #include "xfs_qm.h"  #include "xfs_trace.h"  #include "xfs_icache.h" @@ -284,22 +278,29 @@ xfs_qm_scall_trunc_qfiles(  	xfs_mount_t	*mp,  	uint		flags)  { -	int		error = 0, error2 = 0; +	int		error = EINVAL; -	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0) { -		xfs_debug(mp, "%s: flags=%x m_qflags=%x\n", +	if (!xfs_sb_version_hasquota(&mp->m_sb) || flags == 0 || +	    (flags & ~XFS_DQ_ALLTYPES)) { +		xfs_debug(mp, "%s: flags=%x m_qflags=%x",  			__func__, flags, mp->m_qflags);  		return XFS_ERROR(EINVAL);  	} -	if (flags & XFS_DQ_USER) +	if (flags & XFS_DQ_USER) {  		error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_uquotino); -	if (flags & XFS_DQ_GROUP) -		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); +		if (error) +			return error; +	} +	if (flags & XFS_DQ_GROUP) { +		error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_gquotino); +		if (error) +			return error; +	}  	if (flags & XFS_DQ_PROJ) -		error2 = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); +		error = xfs_qm_scall_trunc_qfile(mp, mp->m_sb.sb_pquotino); -	return error ? error : error2; +	return error;  }  /* @@ -325,7 +326,7 @@ xfs_qm_scall_quotaon(  	sbflags = 0;  	if (flags == 0) { -		xfs_debug(mp, "%s: zero flags, m_qflags=%x\n", +		xfs_debug(mp, "%s: zero flags, m_qflags=%x",  			__func__, mp->m_qflags);  		return XFS_ERROR(EINVAL);  	} @@ -348,7 +349,7 @@ xfs_qm_scall_quotaon(  	     (mp->m_sb.sb_qflags & XFS_PQUOTA_ACCT) == 0 &&  	     (flags & XFS_PQUOTA_ENFD))) {  		xfs_debug(mp, -			"%s: Can't enforce without acct, flags=%x sbflags=%x\n", +			"%s: Can't enforce without acct, flags=%x sbflags=%x",  			__func__, flags, mp->m_sb.sb_qflags);  		return XFS_ERROR(EINVAL);  	} @@ -648,7 +649,7 @@ xfs_qm_scall_setqlim(  			q->qi_bsoftlimit = soft;  		}  	} else { -		xfs_debug(mp, "blkhard %Ld < blksoft %Ld\n", hard, soft); +		xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);  	}  	hard = (newlim->d_fieldmask & FS_DQ_RTBHARD) ?  		(xfs_qcnt_t) XFS_BB_TO_FSB(mp, newlim->d_rtb_hardlimit) : @@ -664,7 +665,7 @@ xfs_qm_scall_setqlim(  			q->qi_rtbsoftlimit = soft;  		}  	} else { -		xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld\n", hard, soft); +		xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);  	}  	hard = (newlim->d_fieldmask & FS_DQ_IHARD) ? @@ -681,7 +682,7 @@ xfs_qm_scall_setqlim(  			q->qi_isoftlimit = soft;  		}  	} else { -		xfs_debug(mp, "ihard %Ld < isoft %Ld\n", hard, soft); +		xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);  	}  	/* @@ -959,7 +960,6 @@ xfs_qm_export_flags(  STATIC int  xfs_dqrele_inode(  	struct xfs_inode	*ip, -	struct xfs_perag	*pag,  	int			flags,  	void			*args)  { diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index e7d84d2d868..5376dd406ba 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -150,10 +150,6 @@ static inline int xfs_trans_reserve_quota_bydquots(struct xfs_trans *tp,  	xfs_trans_reserve_quota_bydquots(tp, mp, ud, gd, pd, nb, ni, \  				f | XFS_QMOPT_RES_REGBLKS) -extern int xfs_qm_dqcheck(struct xfs_mount *, xfs_disk_dquot_t *, -				xfs_dqid_t, uint, uint, char *);  extern int xfs_mount_reset_sbqflags(struct xfs_mount *); -extern const struct xfs_buf_ops xfs_dquot_buf_ops; -  #endif	/* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_quota_defs.h b/fs/xfs/xfs_quota_defs.h index e6b0d6e1f4f..137e2093707 100644 --- a/fs/xfs/xfs_quota_defs.h +++ b/fs/xfs/xfs_quota_defs.h @@ -154,4 +154,8 @@ typedef __uint16_t	xfs_qwarncnt_t;  		(XFS_QMOPT_UQUOTA | XFS_QMOPT_PQUOTA | XFS_QMOPT_GQUOTA)  #define XFS_QMOPT_RESBLK_MASK	(XFS_QMOPT_RES_REGBLKS | XFS_QMOPT_RES_RTBLKS) +extern int xfs_dqcheck(struct xfs_mount *mp, xfs_disk_dquot_t *ddq, +		       xfs_dqid_t id, uint type, uint flags, char *str); +extern int xfs_calc_dquots_per_chunk(unsigned int nbblks); +  #endif	/* __XFS_QUOTA_H__ */ diff --git a/fs/xfs/xfs_quota_priv.h b/fs/xfs/xfs_quota_priv.h deleted file mode 100644 index 6d86219d93d..00000000000 --- a/fs/xfs/xfs_quota_priv.h +++ /dev/null @@ -1,42 +0,0 @@ -/* - * Copyright (c) 2000-2003 Silicon Graphics, Inc. - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it would be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write the Free Software Foundation, - * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA - */ -#ifndef __XFS_QUOTA_PRIV_H__ -#define __XFS_QUOTA_PRIV_H__ - -/* - * Number of bmaps that we ask from bmapi when doing a quotacheck. - * We make this restriction to keep the memory usage to a minimum. - */ -#define XFS_DQITER_MAP_SIZE	10 - -#define XFS_IS_DQUOT_UNINITIALIZED(dqp) ( \ -	!dqp->q_core.d_blk_hardlimit && \ -	!dqp->q_core.d_blk_softlimit && \ -	!dqp->q_core.d_rtb_hardlimit && \ -	!dqp->q_core.d_rtb_softlimit && \ -	!dqp->q_core.d_ino_hardlimit && \ -	!dqp->q_core.d_ino_softlimit && \ -	!dqp->q_core.d_bcount && \ -	!dqp->q_core.d_rtbcount && \ -	!dqp->q_core.d_icount) - -#define DQFLAGTO_TYPESTR(d)	(((d)->dq_flags & XFS_DQ_USER) ? "USR" : \ -				 (((d)->dq_flags & XFS_DQ_GROUP) ? "GRP" : \ -				 (((d)->dq_flags & XFS_DQ_PROJ) ? "PRJ":"???"))) - -#endif	/* __XFS_QUOTA_PRIV_H__ */ diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 1326d81596c..2ad1b9822e9 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -17,15 +17,14 @@   */  #include "xfs.h"  #include "xfs_format.h" +#include "xfs_log_format.h"  #include "xfs_trans_resv.h" -#include "xfs_log.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_inode.h"  #include "xfs_quota.h"  #include "xfs_trans.h" -#include "xfs_bmap_btree.h" -#include "xfs_inode.h"  #include "xfs_qm.h"  #include <linux/quota.h> @@ -101,16 +100,36 @@ xfs_fs_set_xstate(  		if (!XFS_IS_QUOTA_ON(mp))  			return -EINVAL;  		return -xfs_qm_scall_quotaoff(mp, flags); -	case Q_XQUOTARM: -		if (XFS_IS_QUOTA_ON(mp)) -			return -EINVAL; -		return -xfs_qm_scall_trunc_qfiles(mp, flags);  	}  	return -EINVAL;  }  STATIC int +xfs_fs_rm_xquota( +	struct super_block	*sb, +	unsigned int		uflags) +{ +	struct xfs_mount	*mp = XFS_M(sb); +	unsigned int		flags = 0; +	 +	if (sb->s_flags & MS_RDONLY) +		return -EROFS; + +	if (XFS_IS_QUOTA_ON(mp)) +		return -EINVAL; + +	if (uflags & FS_USER_QUOTA) +		flags |= XFS_DQ_USER; +	if (uflags & FS_GROUP_QUOTA) +		flags |= XFS_DQ_GROUP; +	if (uflags & FS_USER_QUOTA) +		flags |= XFS_DQ_PROJ; + +	return -xfs_qm_scall_trunc_qfiles(mp, flags); +}	 + +STATIC int  xfs_fs_get_dqblk(  	struct super_block	*sb,  	struct kqid		qid, @@ -150,6 +169,7 @@ const struct quotactl_ops xfs_quotactl_operations = {  	.get_xstatev		= xfs_fs_get_xstatev,  	.get_xstate		= xfs_fs_get_xstate,  	.set_xstate		= xfs_fs_set_xstate, +	.rm_xquota		= xfs_fs_rm_xquota,  	.get_dqblk		= xfs_fs_get_dqblk,  	.set_dqblk		= xfs_fs_set_dqblk,  }; diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 6f9e63c9fc2..ec5ca65c621 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -17,172 +17,260 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_alloc.h"  #include "xfs_bmap.h"  #include "xfs_bmap_util.h" -#include "xfs_rtalloc.h" -#include "xfs_fsops.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h"  #include "xfs_error.h" -#include "xfs_inode_item.h" +#include "xfs_trans.h"  #include "xfs_trans_space.h"  #include "xfs_trace.h"  #include "xfs_buf.h"  #include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_rtalloc.h"  /* - * Prototypes for internal functions. + * Read and return the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache.   */ +STATIC int				/* error */ +xfs_rtget_summary( +	xfs_mount_t	*mp,		/* file system mount structure */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	int		log,		/* log2 of extent size */ +	xfs_rtblock_t	bbno,		/* bitmap block number */ +	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ +	xfs_fsblock_t	*rsb,		/* in/out: summary block number */ +	xfs_suminfo_t	*sum)		/* out: summary info for this block */ +{ +	xfs_buf_t	*bp;		/* buffer for summary block */ +	int		error;		/* error value */ +	xfs_fsblock_t	sb;		/* summary fsblock */ +	int		so;		/* index into the summary file */ +	xfs_suminfo_t	*sp;		/* pointer to returned data */ +	/* +	 * Compute entry number in the summary file. +	 */ +	so = XFS_SUMOFFS(mp, log, bbno); +	/* +	 * Compute the block number in the summary file. +	 */ +	sb = XFS_SUMOFFSTOBLOCK(mp, so); +	/* +	 * If we have an old buffer, and the block number matches, use that. +	 */ +	if (rbpp && *rbpp && *rsb == sb) +		bp = *rbpp; +	/* +	 * Otherwise we have to get the buffer. +	 */ +	else { +		/* +		 * If there was an old one, get rid of it first. +		 */ +		if (rbpp && *rbpp) +			xfs_trans_brelse(tp, *rbpp); +		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); +		if (error) { +			return error; +		} +		/* +		 * Remember this buffer and block for the next call. +		 */ +		if (rbpp) { +			*rbpp = bp; +			*rsb = sb; +		} +	} +	/* +	 * Point to the summary information & copy it out. +	 */ +	sp = XFS_SUMPTR(mp, bp, so); +	*sum = *sp; +	/* +	 * Drop the buffer if we're not asked to remember it. +	 */ +	if (!rbpp) +		xfs_trans_brelse(tp, bp); +	return 0; +} -STATIC int xfs_rtallocate_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, -		xfs_extlen_t, xfs_buf_t **, xfs_fsblock_t *); -STATIC int xfs_rtany_summary(xfs_mount_t *, xfs_trans_t *, int, int, -		xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, int *); -STATIC int xfs_rtcheck_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, -		xfs_extlen_t, int, xfs_rtblock_t *, int *); -STATIC int xfs_rtfind_back(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, -		xfs_rtblock_t, xfs_rtblock_t *); -STATIC int xfs_rtfind_forw(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, -		xfs_rtblock_t, xfs_rtblock_t *); -STATIC int xfs_rtget_summary( xfs_mount_t *, xfs_trans_t *, int, -		xfs_rtblock_t, xfs_buf_t **, xfs_fsblock_t *, xfs_suminfo_t *); -STATIC int xfs_rtmodify_range(xfs_mount_t *, xfs_trans_t *, xfs_rtblock_t, -		xfs_extlen_t, int); -STATIC int xfs_rtmodify_summary(xfs_mount_t *, xfs_trans_t *, int, -		xfs_rtblock_t, int, xfs_buf_t **, xfs_fsblock_t *); - -/* - * Internal functions. - */  /* - * Allocate space to the bitmap or summary file, and zero it, for growfs. + * Return whether there are any free extents in the size range given + * by low and high, for the bitmap block bbno.   */  STATIC int				/* error */ -xfs_growfs_rt_alloc( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_extlen_t	oblocks,	/* old count of blocks */ -	xfs_extlen_t	nblocks,	/* new count of blocks */ -	xfs_inode_t	*ip)		/* inode (bitmap/summary) */ +xfs_rtany_summary( +	xfs_mount_t	*mp,		/* file system mount structure */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	int		low,		/* low log2 extent size */ +	int		high,		/* high log2 extent size */ +	xfs_rtblock_t	bbno,		/* bitmap block number */ +	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ +	xfs_fsblock_t	*rsb,		/* in/out: summary block number */ +	int		*stat)		/* out: any good extents here? */  { -	xfs_fileoff_t	bno;		/* block number in file */ -	xfs_buf_t	*bp;		/* temporary buffer for zeroing */ -	int		committed;	/* transaction committed flag */ -	xfs_daddr_t	d;		/* disk block address */ -	int		error;		/* error return value */ -	xfs_fsblock_t	firstblock;	/* first block allocated in xaction */ -	xfs_bmap_free_t	flist;		/* list of freed blocks */ -	xfs_fsblock_t	fsbno;		/* filesystem block for bno */ -	xfs_bmbt_irec_t	map;		/* block map output */ -	int		nmap;		/* number of block maps */ -	int		resblks;	/* space reservation */ +	int		error;		/* error value */ +	int		log;		/* loop counter, log2 of ext. size */ +	xfs_suminfo_t	sum;		/* summary data */  	/* -	 * Allocate space to the file, as necessary. +	 * Loop over logs of extent sizes.  Order is irrelevant.  	 */ -	while (oblocks < nblocks) { -		int		cancelflags = 0; -		xfs_trans_t	*tp; - -		tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); -		resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks); +	for (log = low; log <= high; log++) {  		/* -		 * Reserve space & log for one extent added to the file. +		 * Get one summary datum.  		 */ -		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growdata, -					  resblks, 0); -		if (error) -			goto error_cancel; -		cancelflags = XFS_TRANS_RELEASE_LOG_RES; +		error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum); +		if (error) { +			return error; +		}  		/* -		 * Lock the inode. +		 * If there are any, return success.  		 */ -		xfs_ilock(ip, XFS_ILOCK_EXCL); -		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); +		if (sum) { +			*stat = 1; +			return 0; +		} +	} +	/* +	 * Found nothing, return failure. +	 */ +	*stat = 0; +	return 0; +} -		xfs_bmap_init(&flist, &firstblock); -		/* -		 * Allocate blocks to the bitmap file. -		 */ -		nmap = 1; -		cancelflags |= XFS_TRANS_ABORT; -		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, -					XFS_BMAPI_METADATA, &firstblock, -					resblks, &map, &nmap, &flist); -		if (!error && nmap < 1) -			error = XFS_ERROR(ENOSPC); -		if (error) -			goto error_cancel; -		/* -		 * Free any blocks freed up in the transaction, then commit. -		 */ -		error = xfs_bmap_finish(&tp, &flist, &committed); -		if (error) -			goto error_cancel; -		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); -		if (error) -			goto error; -		/* -		 * Now we need to clear the allocated blocks. -		 * Do this one block per transaction, to keep it simple. -		 */ -		cancelflags = 0; -		for (bno = map.br_startoff, fsbno = map.br_startblock; -		     bno < map.br_startoff + map.br_blockcount; -		     bno++, fsbno++) { -			tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO); -			/* -			 * Reserve log for one block zeroing. -			 */ -			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, -						  0, 0); + +/* + * Copy and transform the summary file, given the old and new + * parameters in the mount structures. + */ +STATIC int				/* error */ +xfs_rtcopy_summary( +	xfs_mount_t	*omp,		/* old file system mount point */ +	xfs_mount_t	*nmp,		/* new file system mount point */ +	xfs_trans_t	*tp)		/* transaction pointer */ +{ +	xfs_rtblock_t	bbno;		/* bitmap block number */ +	xfs_buf_t	*bp;		/* summary buffer */ +	int		error;		/* error return value */ +	int		log;		/* summary level number (log length) */ +	xfs_suminfo_t	sum;		/* summary data */ +	xfs_fsblock_t	sumbno;		/* summary block number */ + +	bp = NULL; +	for (log = omp->m_rsumlevels - 1; log >= 0; log--) { +		for (bbno = omp->m_sb.sb_rbmblocks - 1; +		     (xfs_srtblock_t)bbno >= 0; +		     bbno--) { +			error = xfs_rtget_summary(omp, tp, log, bbno, &bp, +				&sumbno, &sum);  			if (error) -				goto error_cancel; -			/* -			 * Lock the bitmap inode. -			 */ -			xfs_ilock(ip, XFS_ILOCK_EXCL); -			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); -			/* -			 * Get a buffer for the block. -			 */ -			d = XFS_FSB_TO_DADDR(mp, fsbno); -			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, -				mp->m_bsize, 0); -			if (bp == NULL) { -				error = XFS_ERROR(EIO); -error_cancel: -				xfs_trans_cancel(tp, cancelflags); -				goto error; -			} -			memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); -			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1); -			/* -			 * Commit the transaction. -			 */ -			error = xfs_trans_commit(tp, 0); +				return error; +			if (sum == 0) +				continue; +			error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum, +				&bp, &sumbno);  			if (error) -				goto error; +				return error; +			error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum, +				&bp, &sumbno); +			if (error) +				return error; +			ASSERT(sum > 0);  		} -		/* -		 * Go on to the next extent, if any. -		 */ -		oblocks = map.br_startoff + map.br_blockcount;  	}  	return 0; +} +/* + * Mark an extent specified by start and len allocated. + * Updates all the summary information as well as the bitmap. + */ +STATIC int				/* error */ +xfs_rtallocate_range( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	start,		/* start block to allocate */ +	xfs_extlen_t	len,		/* length to allocate */ +	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ +	xfs_fsblock_t	*rsb)		/* in/out: summary block number */ +{ +	xfs_rtblock_t	end;		/* end of the allocated extent */ +	int		error;		/* error value */ +	xfs_rtblock_t	postblock = 0;	/* first block allocated > end */ +	xfs_rtblock_t	preblock = 0;	/* first block allocated < start */ -error: +	end = start + len - 1; +	/* +	 * Assume we're allocating out of the middle of a free extent. +	 * We need to find the beginning and end of the extent so we can +	 * properly update the summary. +	 */ +	error = xfs_rtfind_back(mp, tp, start, 0, &preblock); +	if (error) { +		return error; +	} +	/* +	 * Find the next allocated block (end of free extent). +	 */ +	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, +		&postblock); +	if (error) { +		return error; +	} +	/* +	 * Decrement the summary information corresponding to the entire +	 * (old) free extent. +	 */ +	error = xfs_rtmodify_summary(mp, tp, +		XFS_RTBLOCKLOG(postblock + 1 - preblock), +		XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); +	if (error) { +		return error; +	} +	/* +	 * If there are blocks not being allocated at the front of the +	 * old extent, add summary data for them to be free. +	 */ +	if (preblock < start) { +		error = xfs_rtmodify_summary(mp, tp, +			XFS_RTBLOCKLOG(start - preblock), +			XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); +		if (error) { +			return error; +		} +	} +	/* +	 * If there are blocks not being allocated at the end of the +	 * old extent, add summary data for them to be free. +	 */ +	if (postblock > end) { +		error = xfs_rtmodify_summary(mp, tp, +			XFS_RTBLOCKLOG(postblock - end), +			XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb); +		if (error) { +			return error; +		} +	} +	/* +	 * Modify the bitmap to mark this extent allocated. +	 */ +	error = xfs_rtmodify_range(mp, tp, start, len, 0);  	return error;  } @@ -721,1112 +809,126 @@ xfs_rtallocate_extent_size(  }  /* - * Mark an extent specified by start and len allocated. - * Updates all the summary information as well as the bitmap. + * Allocate space to the bitmap or summary file, and zero it, for growfs.   */  STATIC int				/* error */ -xfs_rtallocate_range( +xfs_growfs_rt_alloc(  	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	start,		/* start block to allocate */ -	xfs_extlen_t	len,		/* length to allocate */ -	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ -	xfs_fsblock_t	*rsb)		/* in/out: summary block number */ +	xfs_extlen_t	oblocks,	/* old count of blocks */ +	xfs_extlen_t	nblocks,	/* new count of blocks */ +	xfs_inode_t	*ip)		/* inode (bitmap/summary) */  { -	xfs_rtblock_t	end;		/* end of the allocated extent */ -	int		error;		/* error value */ -	xfs_rtblock_t	postblock = 0;	/* first block allocated > end */ -	xfs_rtblock_t	preblock = 0;	/* first block allocated < start */ +	xfs_fileoff_t	bno;		/* block number in file */ +	xfs_buf_t	*bp;		/* temporary buffer for zeroing */ +	int		committed;	/* transaction committed flag */ +	xfs_daddr_t	d;		/* disk block address */ +	int		error;		/* error return value */ +	xfs_fsblock_t	firstblock;	/* first block allocated in xaction */ +	xfs_bmap_free_t	flist;		/* list of freed blocks */ +	xfs_fsblock_t	fsbno;		/* filesystem block for bno */ +	xfs_bmbt_irec_t	map;		/* block map output */ +	int		nmap;		/* number of block maps */ +	int		resblks;	/* space reservation */ -	end = start + len - 1; -	/* -	 * Assume we're allocating out of the middle of a free extent. -	 * We need to find the beginning and end of the extent so we can -	 * properly update the summary. -	 */ -	error = xfs_rtfind_back(mp, tp, start, 0, &preblock); -	if (error) { -		return error; -	} -	/* -	 * Find the next allocated block (end of free extent). -	 */ -	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, -		&postblock); -	if (error) { -		return error; -	} -	/* -	 * Decrement the summary information corresponding to the entire -	 * (old) free extent. -	 */ -	error = xfs_rtmodify_summary(mp, tp, -		XFS_RTBLOCKLOG(postblock + 1 - preblock), -		XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); -	if (error) { -		return error; -	} -	/* -	 * If there are blocks not being allocated at the front of the -	 * old extent, add summary data for them to be free. -	 */ -	if (preblock < start) { -		error = xfs_rtmodify_summary(mp, tp, -			XFS_RTBLOCKLOG(start - preblock), -			XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); -		if (error) { -			return error; -		} -	} -	/* -	 * If there are blocks not being allocated at the end of the -	 * old extent, add summary data for them to be free. -	 */ -	if (postblock > end) { -		error = xfs_rtmodify_summary(mp, tp, -			XFS_RTBLOCKLOG(postblock - end), -			XFS_BITTOBLOCK(mp, end + 1), 1, rbpp, rsb); -		if (error) { -			return error; -		} -	}  	/* -	 * Modify the bitmap to mark this extent allocated. +	 * Allocate space to the file, as necessary.  	 */ -	error = xfs_rtmodify_range(mp, tp, start, len, 0); -	return error; -} - -/* - * Return whether there are any free extents in the size range given - * by low and high, for the bitmap block bbno. - */ -STATIC int				/* error */ -xfs_rtany_summary( -	xfs_mount_t	*mp,		/* file system mount structure */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	int		low,		/* low log2 extent size */ -	int		high,		/* high log2 extent size */ -	xfs_rtblock_t	bbno,		/* bitmap block number */ -	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ -	xfs_fsblock_t	*rsb,		/* in/out: summary block number */ -	int		*stat)		/* out: any good extents here? */ -{ -	int		error;		/* error value */ -	int		log;		/* loop counter, log2 of ext. size */ -	xfs_suminfo_t	sum;		/* summary data */ +	while (oblocks < nblocks) { +		int		cancelflags = 0; +		xfs_trans_t	*tp; -	/* -	 * Loop over logs of extent sizes.  Order is irrelevant. -	 */ -	for (log = low; log <= high; log++) { +		tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ALLOC); +		resblks = XFS_GROWFSRT_SPACE_RES(mp, nblocks - oblocks);  		/* -		 * Get one summary datum. +		 * Reserve space & log for one extent added to the file.  		 */ -		error = xfs_rtget_summary(mp, tp, log, bbno, rbpp, rsb, &sum); -		if (error) { -			return error; -		} +		error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtalloc, +					  resblks, 0); +		if (error) +			goto error_cancel; +		cancelflags = XFS_TRANS_RELEASE_LOG_RES;  		/* -		 * If there are any, return success. +		 * Lock the inode.  		 */ -		if (sum) { -			*stat = 1; -			return 0; -		} -	} -	/* -	 * Found nothing, return failure. -	 */ -	*stat = 0; -	return 0; -} - -/* - * Get a buffer for the bitmap or summary file block specified. - * The buffer is returned read and locked. - */ -STATIC int				/* error */ -xfs_rtbuf_get( -	xfs_mount_t	*mp,		/* file system mount structure */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	block,		/* block number in bitmap or summary */ -	int		issum,		/* is summary not bitmap */ -	xfs_buf_t	**bpp)		/* output: buffer for the block */ -{ -	xfs_buf_t	*bp;		/* block buffer, result */ -	xfs_inode_t	*ip;		/* bitmap or summary inode */ -	xfs_bmbt_irec_t	map; -	int		nmap = 1; -	int		error;		/* error value */ - -	ip = issum ? mp->m_rsumip : mp->m_rbmip; - -	error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); -	if (error) -		return error; - -	ASSERT(map.br_startblock != NULLFSBLOCK); -	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, -				   XFS_FSB_TO_DADDR(mp, map.br_startblock), -				   mp->m_bsize, 0, &bp, NULL); -	if (error) -		return error; -	ASSERT(!xfs_buf_geterror(bp)); -	*bpp = bp; -	return 0; -} - -#ifdef DEBUG -/* - * Check that the given extent (block range) is allocated already. - */ -STATIC int				/* error */ -xfs_rtcheck_alloc_range( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	bno,		/* starting block number of extent */ -	xfs_extlen_t	len,		/* length of extent */ -	int		*stat)		/* out: 1 for allocated, 0 for not */ -{ -	xfs_rtblock_t	new;		/* dummy for xfs_rtcheck_range */ - -	return xfs_rtcheck_range(mp, tp, bno, len, 0, &new, stat); -} -#endif - -/* - * Check that the given range is either all allocated (val = 0) or - * all free (val = 1). - */ -STATIC int				/* error */ -xfs_rtcheck_range( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	start,		/* starting block number of extent */ -	xfs_extlen_t	len,		/* length of extent */ -	int		val,		/* 1 for free, 0 for allocated */ -	xfs_rtblock_t	*new,		/* out: first block not matching */ -	int		*stat)		/* out: 1 for matches, 0 for not */ -{ -	xfs_rtword_t	*b;		/* current word in buffer */ -	int		bit;		/* bit number in the word */ -	xfs_rtblock_t	block;		/* bitmap block number */ -	xfs_buf_t	*bp;		/* buf for the block */ -	xfs_rtword_t	*bufp;		/* starting word in buffer */ -	int		error;		/* error value */ -	xfs_rtblock_t	i;		/* current bit number rel. to start */ -	xfs_rtblock_t	lastbit;	/* last useful bit in word */ -	xfs_rtword_t	mask;		/* mask of relevant bits for value */ -	xfs_rtword_t	wdiff;		/* difference from wanted value */ -	int		word;		/* word number in the buffer */ +		xfs_ilock(ip, XFS_ILOCK_EXCL); +		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); -	/* -	 * Compute starting bitmap block number -	 */ -	block = XFS_BITTOBLOCK(mp, start); -	/* -	 * Read the bitmap block. -	 */ -	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); -	if (error) { -		return error; -	} -	bufp = bp->b_addr; -	/* -	 * Compute the starting word's address, and starting bit. -	 */ -	word = XFS_BITTOWORD(mp, start); -	b = &bufp[word]; -	bit = (int)(start & (XFS_NBWORD - 1)); -	/* -	 * 0 (allocated) => all zero's; 1 (free) => all one's. -	 */ -	val = -val; -	/* -	 * If not starting on a word boundary, deal with the first -	 * (partial) word. -	 */ -	if (bit) { -		/* -		 * Compute first bit not examined. -		 */ -		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); -		/* -		 * Mask of relevant bits. -		 */ -		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; -		/* -		 * Compute difference between actual and desired value. -		 */ -		if ((wdiff = (*b ^ val) & mask)) { -			/* -			 * Different, compute first wrong bit and return. -			 */ -			xfs_trans_brelse(tp, bp); -			i = XFS_RTLOBIT(wdiff) - bit; -			*new = start + i; -			*stat = 0; -			return 0; -		} -		i = lastbit - bit; -		/* -		 * Go on to next block if that's where the next word is -		 * and we need the next word. -		 */ -		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { -			/* -			 * If done with this block, get the next one. -			 */ -			xfs_trans_brelse(tp, bp); -			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); -			if (error) { -				return error; -			} -			b = bufp = bp->b_addr; -			word = 0; -		} else { -			/* -			 * Go on to the next word in the buffer. -			 */ -			b++; -		} -	} else { -		/* -		 * Starting on a word boundary, no partial word. -		 */ -		i = 0; -	} -	/* -	 * Loop over whole words in buffers.  When we use up one buffer -	 * we move on to the next one. -	 */ -	while (len - i >= XFS_NBWORD) { -		/* -		 * Compute difference between actual and desired value. -		 */ -		if ((wdiff = *b ^ val)) { -			/* -			 * Different, compute first wrong bit and return. -			 */ -			xfs_trans_brelse(tp, bp); -			i += XFS_RTLOBIT(wdiff); -			*new = start + i; -			*stat = 0; -			return 0; -		} -		i += XFS_NBWORD; +		xfs_bmap_init(&flist, &firstblock);  		/* -		 * Go on to next block if that's where the next word is -		 * and we need the next word. +		 * Allocate blocks to the bitmap file.  		 */ -		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { -			/* -			 * If done with this block, get the next one. -			 */ -			xfs_trans_brelse(tp, bp); -			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); -			if (error) { -				return error; -			} -			b = bufp = bp->b_addr; -			word = 0; -		} else { -			/* -			 * Go on to the next word in the buffer. -			 */ -			b++; -		} -	} -	/* -	 * If not ending on a word boundary, deal with the last -	 * (partial) word. -	 */ -	if ((lastbit = len - i)) { +		nmap = 1; +		cancelflags |= XFS_TRANS_ABORT; +		error = xfs_bmapi_write(tp, ip, oblocks, nblocks - oblocks, +					XFS_BMAPI_METADATA, &firstblock, +					resblks, &map, &nmap, &flist); +		if (!error && nmap < 1) +			error = XFS_ERROR(ENOSPC); +		if (error) +			goto error_cancel;  		/* -		 * Mask of relevant bits. +		 * Free any blocks freed up in the transaction, then commit.  		 */ -		mask = ((xfs_rtword_t)1 << lastbit) - 1; +		error = xfs_bmap_finish(&tp, &flist, &committed); +		if (error) +			goto error_cancel; +		error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES); +		if (error) +			goto error;  		/* -		 * Compute difference between actual and desired value. +		 * Now we need to clear the allocated blocks. +		 * Do this one block per transaction, to keep it simple.  		 */ -		if ((wdiff = (*b ^ val) & mask)) { +		cancelflags = 0; +		for (bno = map.br_startoff, fsbno = map.br_startblock; +		     bno < map.br_startoff + map.br_blockcount; +		     bno++, fsbno++) { +			tp = xfs_trans_alloc(mp, XFS_TRANS_GROWFSRT_ZERO);  			/* -			 * Different, compute first wrong bit and return. +			 * Reserve log for one block zeroing.  			 */ -			xfs_trans_brelse(tp, bp); -			i += XFS_RTLOBIT(wdiff); -			*new = start + i; -			*stat = 0; -			return 0; -		} else -			i = len; -	} -	/* -	 * Successful, return. -	 */ -	xfs_trans_brelse(tp, bp); -	*new = start + i; -	*stat = 1; -	return 0; -} - -/* - * Copy and transform the summary file, given the old and new - * parameters in the mount structures. - */ -STATIC int				/* error */ -xfs_rtcopy_summary( -	xfs_mount_t	*omp,		/* old file system mount point */ -	xfs_mount_t	*nmp,		/* new file system mount point */ -	xfs_trans_t	*tp)		/* transaction pointer */ -{ -	xfs_rtblock_t	bbno;		/* bitmap block number */ -	xfs_buf_t	*bp;		/* summary buffer */ -	int		error;		/* error return value */ -	int		log;		/* summary level number (log length) */ -	xfs_suminfo_t	sum;		/* summary data */ -	xfs_fsblock_t	sumbno;		/* summary block number */ - -	bp = NULL; -	for (log = omp->m_rsumlevels - 1; log >= 0; log--) { -		for (bbno = omp->m_sb.sb_rbmblocks - 1; -		     (xfs_srtblock_t)bbno >= 0; -		     bbno--) { -			error = xfs_rtget_summary(omp, tp, log, bbno, &bp, -				&sumbno, &sum); -			if (error) -				return error; -			if (sum == 0) -				continue; -			error = xfs_rtmodify_summary(omp, tp, log, bbno, -sum, -				&bp, &sumbno); -			if (error) -				return error; -			error = xfs_rtmodify_summary(nmp, tp, log, bbno, sum, -				&bp, &sumbno); +			error = xfs_trans_reserve(tp, &M_RES(mp)->tr_growrtzero, +						  0, 0);  			if (error) -				return error; -			ASSERT(sum > 0); -		} -	} -	return 0; -} - -/* - * Searching backward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -STATIC int				/* error */ -xfs_rtfind_back( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	start,		/* starting block to look at */ -	xfs_rtblock_t	limit,		/* last block to look at */ -	xfs_rtblock_t	*rtblock)	/* out: start block found */ -{ -	xfs_rtword_t	*b;		/* current word in buffer */ -	int		bit;		/* bit number in the word */ -	xfs_rtblock_t	block;		/* bitmap block number */ -	xfs_buf_t	*bp;		/* buf for the block */ -	xfs_rtword_t	*bufp;		/* starting word in buffer */ -	int		error;		/* error value */ -	xfs_rtblock_t	firstbit;	/* first useful bit in the word */ -	xfs_rtblock_t	i;		/* current bit number rel. to start */ -	xfs_rtblock_t	len;		/* length of inspected area */ -	xfs_rtword_t	mask;		/* mask of relevant bits for value */ -	xfs_rtword_t	want;		/* mask for "good" values */ -	xfs_rtword_t	wdiff;		/* difference from wanted value */ -	int		word;		/* word number in the buffer */ - -	/* -	 * Compute and read in starting bitmap block for starting block. -	 */ -	block = XFS_BITTOBLOCK(mp, start); -	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); -	if (error) { -		return error; -	} -	bufp = bp->b_addr; -	/* -	 * Get the first word's index & point to it. -	 */ -	word = XFS_BITTOWORD(mp, start); -	b = &bufp[word]; -	bit = (int)(start & (XFS_NBWORD - 1)); -	len = start - limit + 1; -	/* -	 * Compute match value, based on the bit at start: if 1 (free) -	 * then all-ones, else all-zeroes. -	 */ -	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; -	/* -	 * If the starting position is not word-aligned, deal with the -	 * partial word. -	 */ -	if (bit < XFS_NBWORD - 1) { -		/* -		 * Calculate first (leftmost) bit number to look at, -		 * and mask for all the relevant bits in this word. -		 */ -		firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); -		mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << -			firstbit; -		/* -		 * Calculate the difference between the value there -		 * and what we're looking for. -		 */ -		if ((wdiff = (*b ^ want) & mask)) { -			/* -			 * Different.  Mark where we are and return. -			 */ -			xfs_trans_brelse(tp, bp); -			i = bit - XFS_RTHIBIT(wdiff); -			*rtblock = start - i + 1; -			return 0; -		} -		i = bit - firstbit + 1; -		/* -		 * Go on to previous block if that's where the previous word is -		 * and we need the previous word. -		 */ -		if (--word == -1 && i < len) { -			/* -			 * If done with this block, get the previous one. -			 */ -			xfs_trans_brelse(tp, bp); -			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); -			if (error) { -				return error; -			} -			bufp = bp->b_addr; -			word = XFS_BLOCKWMASK(mp); -			b = &bufp[word]; -		} else { -			/* -			 * Go on to the previous word in the buffer. -			 */ -			b--; -		} -	} else { -		/* -		 * Starting on a word boundary, no partial word. -		 */ -		i = 0; -	} -	/* -	 * Loop over whole words in buffers.  When we use up one buffer -	 * we move on to the previous one. -	 */ -	while (len - i >= XFS_NBWORD) { -		/* -		 * Compute difference between actual and desired value. -		 */ -		if ((wdiff = *b ^ want)) { -			/* -			 * Different, mark where we are and return. -			 */ -			xfs_trans_brelse(tp, bp); -			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); -			*rtblock = start - i + 1; -			return 0; -		} -		i += XFS_NBWORD; -		/* -		 * Go on to previous block if that's where the previous word is -		 * and we need the previous word. -		 */ -		if (--word == -1 && i < len) { -			/* -			 * If done with this block, get the previous one. -			 */ -			xfs_trans_brelse(tp, bp); -			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); -			if (error) { -				return error; -			} -			bufp = bp->b_addr; -			word = XFS_BLOCKWMASK(mp); -			b = &bufp[word]; -		} else { -			/* -			 * Go on to the previous word in the buffer. -			 */ -			b--; -		} -	} -	/* -	 * If not ending on a word boundary, deal with the last -	 * (partial) word. -	 */ -	if (len - i) { -		/* -		 * Calculate first (leftmost) bit number to look at, -		 * and mask for all the relevant bits in this word. -		 */ -		firstbit = XFS_NBWORD - (len - i); -		mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; -		/* -		 * Compute difference between actual and desired value. -		 */ -		if ((wdiff = (*b ^ want) & mask)) { -			/* -			 * Different, mark where we are and return. -			 */ -			xfs_trans_brelse(tp, bp); -			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); -			*rtblock = start - i + 1; -			return 0; -		} else -			i = len; -	} -	/* -	 * No match, return that we scanned the whole area. -	 */ -	xfs_trans_brelse(tp, bp); -	*rtblock = start - i + 1; -	return 0; -} - -/* - * Searching forward from start to limit, find the first block whose - * allocated/free state is different from start's. - */ -STATIC int				/* error */ -xfs_rtfind_forw( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	start,		/* starting block to look at */ -	xfs_rtblock_t	limit,		/* last block to look at */ -	xfs_rtblock_t	*rtblock)	/* out: start block found */ -{ -	xfs_rtword_t	*b;		/* current word in buffer */ -	int		bit;		/* bit number in the word */ -	xfs_rtblock_t	block;		/* bitmap block number */ -	xfs_buf_t	*bp;		/* buf for the block */ -	xfs_rtword_t	*bufp;		/* starting word in buffer */ -	int		error;		/* error value */ -	xfs_rtblock_t	i;		/* current bit number rel. to start */ -	xfs_rtblock_t	lastbit;	/* last useful bit in the word */ -	xfs_rtblock_t	len;		/* length of inspected area */ -	xfs_rtword_t	mask;		/* mask of relevant bits for value */ -	xfs_rtword_t	want;		/* mask for "good" values */ -	xfs_rtword_t	wdiff;		/* difference from wanted value */ -	int		word;		/* word number in the buffer */ - -	/* -	 * Compute and read in starting bitmap block for starting block. -	 */ -	block = XFS_BITTOBLOCK(mp, start); -	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); -	if (error) { -		return error; -	} -	bufp = bp->b_addr; -	/* -	 * Get the first word's index & point to it. -	 */ -	word = XFS_BITTOWORD(mp, start); -	b = &bufp[word]; -	bit = (int)(start & (XFS_NBWORD - 1)); -	len = limit - start + 1; -	/* -	 * Compute match value, based on the bit at start: if 1 (free) -	 * then all-ones, else all-zeroes. -	 */ -	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; -	/* -	 * If the starting position is not word-aligned, deal with the -	 * partial word. -	 */ -	if (bit) { -		/* -		 * Calculate last (rightmost) bit number to look at, -		 * and mask for all the relevant bits in this word. -		 */ -		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); -		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; -		/* -		 * Calculate the difference between the value there -		 * and what we're looking for. -		 */ -		if ((wdiff = (*b ^ want) & mask)) { -			/* -			 * Different.  Mark where we are and return. -			 */ -			xfs_trans_brelse(tp, bp); -			i = XFS_RTLOBIT(wdiff) - bit; -			*rtblock = start + i - 1; -			return 0; -		} -		i = lastbit - bit; -		/* -		 * Go on to next block if that's where the next word is -		 * and we need the next word. -		 */ -		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { -			/* -			 * If done with this block, get the previous one. -			 */ -			xfs_trans_brelse(tp, bp); -			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); -			if (error) { -				return error; -			} -			b = bufp = bp->b_addr; -			word = 0; -		} else { -			/* -			 * Go on to the previous word in the buffer. -			 */ -			b++; -		} -	} else { -		/* -		 * Starting on a word boundary, no partial word. -		 */ -		i = 0; -	} -	/* -	 * Loop over whole words in buffers.  When we use up one buffer -	 * we move on to the next one. -	 */ -	while (len - i >= XFS_NBWORD) { -		/* -		 * Compute difference between actual and desired value. -		 */ -		if ((wdiff = *b ^ want)) { +				goto error_cancel;  			/* -			 * Different, mark where we are and return. +			 * Lock the bitmap inode.  			 */ -			xfs_trans_brelse(tp, bp); -			i += XFS_RTLOBIT(wdiff); -			*rtblock = start + i - 1; -			return 0; -		} -		i += XFS_NBWORD; -		/* -		 * Go on to next block if that's where the next word is -		 * and we need the next word. -		 */ -		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { +			xfs_ilock(ip, XFS_ILOCK_EXCL); +			xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);  			/* -			 * If done with this block, get the next one. +			 * Get a buffer for the block.  			 */ -			xfs_trans_brelse(tp, bp); -			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); -			if (error) { -				return error; +			d = XFS_FSB_TO_DADDR(mp, fsbno); +			bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, +				mp->m_bsize, 0); +			if (bp == NULL) { +				error = XFS_ERROR(EIO); +error_cancel: +				xfs_trans_cancel(tp, cancelflags); +				goto error;  			} -			b = bufp = bp->b_addr; -			word = 0; -		} else { +			memset(bp->b_addr, 0, mp->m_sb.sb_blocksize); +			xfs_trans_log_buf(tp, bp, 0, mp->m_sb.sb_blocksize - 1);  			/* -			 * Go on to the next word in the buffer. +			 * Commit the transaction.  			 */ -			b++; +			error = xfs_trans_commit(tp, 0); +			if (error) +				goto error;  		} -	} -	/* -	 * If not ending on a word boundary, deal with the last -	 * (partial) word. -	 */ -	if ((lastbit = len - i)) {  		/* -		 * Calculate mask for all the relevant bits in this word. -		 */ -		mask = ((xfs_rtword_t)1 << lastbit) - 1; -		/* -		 * Compute difference between actual and desired value. +		 * Go on to the next extent, if any.  		 */ -		if ((wdiff = (*b ^ want) & mask)) { -			/* -			 * Different, mark where we are and return. -			 */ -			xfs_trans_brelse(tp, bp); -			i += XFS_RTLOBIT(wdiff); -			*rtblock = start + i - 1; -			return 0; -		} else -			i = len; +		oblocks = map.br_startoff + map.br_blockcount;  	} -	/* -	 * No match, return that we scanned the whole area. -	 */ -	xfs_trans_brelse(tp, bp); -	*rtblock = start + i - 1;  	return 0; -} -/* - * Mark an extent specified by start and len freed. - * Updates all the summary information as well as the bitmap. - */ -STATIC int				/* error */ -xfs_rtfree_range( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	start,		/* starting block to free */ -	xfs_extlen_t	len,		/* length to free */ -	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ -	xfs_fsblock_t	*rsb)		/* in/out: summary block number */ -{ -	xfs_rtblock_t	end;		/* end of the freed extent */ -	int		error;		/* error value */ -	xfs_rtblock_t	postblock;	/* first block freed > end */ -	xfs_rtblock_t	preblock;	/* first block freed < start */ - -	end = start + len - 1; -	/* -	 * Modify the bitmap to mark this extent freed. -	 */ -	error = xfs_rtmodify_range(mp, tp, start, len, 1); -	if (error) { -		return error; -	} -	/* -	 * Assume we're freeing out of the middle of an allocated extent. -	 * We need to find the beginning and end of the extent so we can -	 * properly update the summary. -	 */ -	error = xfs_rtfind_back(mp, tp, start, 0, &preblock); -	if (error) { -		return error; -	} -	/* -	 * Find the next allocated block (end of allocated extent). -	 */ -	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, -		&postblock); -	if (error) -		return error; -	/* -	 * If there are blocks not being freed at the front of the -	 * old extent, add summary data for them to be allocated. -	 */ -	if (preblock < start) { -		error = xfs_rtmodify_summary(mp, tp, -			XFS_RTBLOCKLOG(start - preblock), -			XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); -		if (error) { -			return error; -		} -	} -	/* -	 * If there are blocks not being freed at the end of the -	 * old extent, add summary data for them to be allocated. -	 */ -	if (postblock > end) { -		error = xfs_rtmodify_summary(mp, tp, -			XFS_RTBLOCKLOG(postblock - end), -			XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); -		if (error) { -			return error; -		} -	} -	/* -	 * Increment the summary information corresponding to the entire -	 * (new) free extent. -	 */ -	error = xfs_rtmodify_summary(mp, tp, -		XFS_RTBLOCKLOG(postblock + 1 - preblock), -		XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); +error:  	return error;  }  /* - * Read and return the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. - */ -STATIC int				/* error */ -xfs_rtget_summary( -	xfs_mount_t	*mp,		/* file system mount structure */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	int		log,		/* log2 of extent size */ -	xfs_rtblock_t	bbno,		/* bitmap block number */ -	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ -	xfs_fsblock_t	*rsb,		/* in/out: summary block number */ -	xfs_suminfo_t	*sum)		/* out: summary info for this block */ -{ -	xfs_buf_t	*bp;		/* buffer for summary block */ -	int		error;		/* error value */ -	xfs_fsblock_t	sb;		/* summary fsblock */ -	int		so;		/* index into the summary file */ -	xfs_suminfo_t	*sp;		/* pointer to returned data */ - -	/* -	 * Compute entry number in the summary file. -	 */ -	so = XFS_SUMOFFS(mp, log, bbno); -	/* -	 * Compute the block number in the summary file. -	 */ -	sb = XFS_SUMOFFSTOBLOCK(mp, so); -	/* -	 * If we have an old buffer, and the block number matches, use that. -	 */ -	if (rbpp && *rbpp && *rsb == sb) -		bp = *rbpp; -	/* -	 * Otherwise we have to get the buffer. -	 */ -	else { -		/* -		 * If there was an old one, get rid of it first. -		 */ -		if (rbpp && *rbpp) -			xfs_trans_brelse(tp, *rbpp); -		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); -		if (error) { -			return error; -		} -		/* -		 * Remember this buffer and block for the next call. -		 */ -		if (rbpp) { -			*rbpp = bp; -			*rsb = sb; -		} -	} -	/* -	 * Point to the summary information & copy it out. -	 */ -	sp = XFS_SUMPTR(mp, bp, so); -	*sum = *sp; -	/* -	 * Drop the buffer if we're not asked to remember it. -	 */ -	if (!rbpp) -		xfs_trans_brelse(tp, bp); -	return 0; -} - -/* - * Set the given range of bitmap bits to the given value. - * Do whatever I/O and logging is required. - */ -STATIC int				/* error */ -xfs_rtmodify_range( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	start,		/* starting block to modify */ -	xfs_extlen_t	len,		/* length of extent to modify */ -	int		val)		/* 1 for free, 0 for allocated */ -{ -	xfs_rtword_t	*b;		/* current word in buffer */ -	int		bit;		/* bit number in the word */ -	xfs_rtblock_t	block;		/* bitmap block number */ -	xfs_buf_t	*bp;		/* buf for the block */ -	xfs_rtword_t	*bufp;		/* starting word in buffer */ -	int		error;		/* error value */ -	xfs_rtword_t	*first;		/* first used word in the buffer */ -	int		i;		/* current bit number rel. to start */ -	int		lastbit;	/* last useful bit in word */ -	xfs_rtword_t	mask;		/* mask o frelevant bits for value */ -	int		word;		/* word number in the buffer */ - -	/* -	 * Compute starting bitmap block number. -	 */ -	block = XFS_BITTOBLOCK(mp, start); -	/* -	 * Read the bitmap block, and point to its data. -	 */ -	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); -	if (error) { -		return error; -	} -	bufp = bp->b_addr; -	/* -	 * Compute the starting word's address, and starting bit. -	 */ -	word = XFS_BITTOWORD(mp, start); -	first = b = &bufp[word]; -	bit = (int)(start & (XFS_NBWORD - 1)); -	/* -	 * 0 (allocated) => all zeroes; 1 (free) => all ones. -	 */ -	val = -val; -	/* -	 * If not starting on a word boundary, deal with the first -	 * (partial) word. -	 */ -	if (bit) { -		/* -		 * Compute first bit not changed and mask of relevant bits. -		 */ -		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); -		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; -		/* -		 * Set/clear the active bits. -		 */ -		if (val) -			*b |= mask; -		else -			*b &= ~mask; -		i = lastbit - bit; -		/* -		 * Go on to the next block if that's where the next word is -		 * and we need the next word. -		 */ -		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { -			/* -			 * Log the changed part of this block. -			 * Get the next one. -			 */ -			xfs_trans_log_buf(tp, bp, -				(uint)((char *)first - (char *)bufp), -				(uint)((char *)b - (char *)bufp)); -			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); -			if (error) { -				return error; -			} -			first = b = bufp = bp->b_addr; -			word = 0; -		} else { -			/* -			 * Go on to the next word in the buffer -			 */ -			b++; -		} -	} else { -		/* -		 * Starting on a word boundary, no partial word. -		 */ -		i = 0; -	} -	/* -	 * Loop over whole words in buffers.  When we use up one buffer -	 * we move on to the next one. -	 */ -	while (len - i >= XFS_NBWORD) { -		/* -		 * Set the word value correctly. -		 */ -		*b = val; -		i += XFS_NBWORD; -		/* -		 * Go on to the next block if that's where the next word is -		 * and we need the next word. -		 */ -		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { -			/* -			 * Log the changed part of this block. -			 * Get the next one. -			 */ -			xfs_trans_log_buf(tp, bp, -				(uint)((char *)first - (char *)bufp), -				(uint)((char *)b - (char *)bufp)); -			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); -			if (error) { -				return error; -			} -			first = b = bufp = bp->b_addr; -			word = 0; -		} else { -			/* -			 * Go on to the next word in the buffer -			 */ -			b++; -		} -	} -	/* -	 * If not ending on a word boundary, deal with the last -	 * (partial) word. -	 */ -	if ((lastbit = len - i)) { -		/* -		 * Compute a mask of relevant bits. -		 */ -		bit = 0; -		mask = ((xfs_rtword_t)1 << lastbit) - 1; -		/* -		 * Set/clear the active bits. -		 */ -		if (val) -			*b |= mask; -		else -			*b &= ~mask; -		b++; -	} -	/* -	 * Log any remaining changed bytes. -	 */ -	if (b > first) -		xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), -			(uint)((char *)b - (char *)bufp - 1)); -	return 0; -} - -/* - * Read and modify the summary information for a given extent size, - * bitmap block combination. - * Keeps track of a current summary block, so we don't keep reading - * it from the buffer cache. - */ -STATIC int				/* error */ -xfs_rtmodify_summary( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	int		log,		/* log2 of extent size */ -	xfs_rtblock_t	bbno,		/* bitmap block number */ -	int		delta,		/* change to make to summary info */ -	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ -	xfs_fsblock_t	*rsb)		/* in/out: summary block number */ -{ -	xfs_buf_t	*bp;		/* buffer for the summary block */ -	int		error;		/* error value */ -	xfs_fsblock_t	sb;		/* summary fsblock */ -	int		so;		/* index into the summary file */ -	xfs_suminfo_t	*sp;		/* pointer to returned data */ - -	/* -	 * Compute entry number in the summary file. -	 */ -	so = XFS_SUMOFFS(mp, log, bbno); -	/* -	 * Compute the block number in the summary file. -	 */ -	sb = XFS_SUMOFFSTOBLOCK(mp, so); -	/* -	 * If we have an old buffer, and the block number matches, use that. -	 */ -	if (rbpp && *rbpp && *rsb == sb) -		bp = *rbpp; -	/* -	 * Otherwise we have to get the buffer. -	 */ -	else { -		/* -		 * If there was an old one, get rid of it first. -		 */ -		if (rbpp && *rbpp) -			xfs_trans_brelse(tp, *rbpp); -		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); -		if (error) { -			return error; -		} -		/* -		 * Remember this buffer and block for the next call. -		 */ -		if (rbpp) { -			*rbpp = bp; -			*rsb = sb; -		} -	} -	/* -	 * Point to the summary information, modify and log it. -	 */ -	sp = XFS_SUMPTR(mp, bp, so); -	*sp += delta; -	xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), -		(uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); -	return 0; -} - -/*   * Visible (exported) functions.   */ @@ -2129,66 +1231,6 @@ xfs_rtallocate_extent(  }  /* - * Free an extent in the realtime subvolume.  Length is expressed in - * realtime extents, as is the block number. - */ -int					/* error */ -xfs_rtfree_extent( -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_rtblock_t	bno,		/* starting block number to free */ -	xfs_extlen_t	len)		/* length of extent freed */ -{ -	int		error;		/* error value */ -	xfs_mount_t	*mp;		/* file system mount structure */ -	xfs_fsblock_t	sb;		/* summary file block number */ -	xfs_buf_t	*sumbp;		/* summary file block buffer */ - -	mp = tp->t_mountp; - -	ASSERT(mp->m_rbmip->i_itemp != NULL); -	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); - -#ifdef DEBUG -	/* -	 * Check to see that this whole range is currently allocated. -	 */ -	{ -		int	stat;		/* result from checking range */ - -		error = xfs_rtcheck_alloc_range(mp, tp, bno, len, &stat); -		if (error) { -			return error; -		} -		ASSERT(stat); -	} -#endif -	sumbp = NULL; -	/* -	 * Free the range of realtime blocks. -	 */ -	error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); -	if (error) { -		return error; -	} -	/* -	 * Mark more blocks free in the superblock. -	 */ -	xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); -	/* -	 * If we've now freed all the blocks, reset the file sequence -	 * number to 0. -	 */ -	if (tp->t_frextents_delta + mp->m_sb.sb_frextents == -	    mp->m_sb.sb_rextents) { -		if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) -			mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; -		*(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; -		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); -	} -	return 0; -} - -/*   * Initialize realtime fields in the mount structure.   */  int				/* error */ diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index b2a1a24c0e2..752b63d1030 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -95,6 +95,30 @@ xfs_growfs_rt(  	struct xfs_mount	*mp,	/* file system mount structure */  	xfs_growfs_rt_t		*in);	/* user supplied growfs struct */ +/* + * From xfs_rtbitmap.c + */ +int xfs_rtbuf_get(struct xfs_mount *mp, struct xfs_trans *tp, +		  xfs_rtblock_t block, int issum, struct xfs_buf **bpp); +int xfs_rtcheck_range(struct xfs_mount *mp, struct xfs_trans *tp, +		      xfs_rtblock_t start, xfs_extlen_t len, int val, +		      xfs_rtblock_t *new, int *stat); +int xfs_rtfind_back(struct xfs_mount *mp, struct xfs_trans *tp, +		    xfs_rtblock_t start, xfs_rtblock_t limit, +		    xfs_rtblock_t *rtblock); +int xfs_rtfind_forw(struct xfs_mount *mp, struct xfs_trans *tp, +		    xfs_rtblock_t start, xfs_rtblock_t limit, +		    xfs_rtblock_t *rtblock); +int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, +		       xfs_rtblock_t start, xfs_extlen_t len, int val); +int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, +			 xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, +			 xfs_fsblock_t *rsb); +int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, +		     xfs_rtblock_t start, xfs_extlen_t len, +		     struct xfs_buf **rbpp, xfs_fsblock_t *rsb); + +  #else  # define xfs_rtallocate_extent(t,b,min,max,l,a,f,p,rb)  (ENOSYS)  # define xfs_rtfree_extent(t,b,l)                       (ENOSYS) diff --git a/fs/xfs/xfs_rtbitmap.c b/fs/xfs/xfs_rtbitmap.c new file mode 100644 index 00000000000..f4dd697cac0 --- /dev/null +++ b/fs/xfs/xfs_rtbitmap.c @@ -0,0 +1,973 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_inode.h" +#include "xfs_bmap.h" +#include "xfs_bmap_util.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc.h" +#include "xfs_error.h" +#include "xfs_trans.h" +#include "xfs_trans_space.h" +#include "xfs_trace.h" +#include "xfs_buf.h" +#include "xfs_icache.h" +#include "xfs_dinode.h" +#include "xfs_rtalloc.h" + + +/* + * Realtime allocator bitmap functions shared with userspace. + */ + +/* + * Get a buffer for the bitmap or summary file block specified. + * The buffer is returned read and locked. + */ +int +xfs_rtbuf_get( +	xfs_mount_t	*mp,		/* file system mount structure */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	block,		/* block number in bitmap or summary */ +	int		issum,		/* is summary not bitmap */ +	xfs_buf_t	**bpp)		/* output: buffer for the block */ +{ +	xfs_buf_t	*bp;		/* block buffer, result */ +	xfs_inode_t	*ip;		/* bitmap or summary inode */ +	xfs_bmbt_irec_t	map; +	int		nmap = 1; +	int		error;		/* error value */ + +	ip = issum ? mp->m_rsumip : mp->m_rbmip; + +	error = xfs_bmapi_read(ip, block, 1, &map, &nmap, XFS_DATA_FORK); +	if (error) +		return error; + +	ASSERT(map.br_startblock != NULLFSBLOCK); +	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, +				   XFS_FSB_TO_DADDR(mp, map.br_startblock), +				   mp->m_bsize, 0, &bp, NULL); +	if (error) +		return error; +	*bpp = bp; +	return 0; +} + +/* + * Searching backward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +int +xfs_rtfind_back( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	start,		/* starting block to look at */ +	xfs_rtblock_t	limit,		/* last block to look at */ +	xfs_rtblock_t	*rtblock)	/* out: start block found */ +{ +	xfs_rtword_t	*b;		/* current word in buffer */ +	int		bit;		/* bit number in the word */ +	xfs_rtblock_t	block;		/* bitmap block number */ +	xfs_buf_t	*bp;		/* buf for the block */ +	xfs_rtword_t	*bufp;		/* starting word in buffer */ +	int		error;		/* error value */ +	xfs_rtblock_t	firstbit;	/* first useful bit in the word */ +	xfs_rtblock_t	i;		/* current bit number rel. to start */ +	xfs_rtblock_t	len;		/* length of inspected area */ +	xfs_rtword_t	mask;		/* mask of relevant bits for value */ +	xfs_rtword_t	want;		/* mask for "good" values */ +	xfs_rtword_t	wdiff;		/* difference from wanted value */ +	int		word;		/* word number in the buffer */ + +	/* +	 * Compute and read in starting bitmap block for starting block. +	 */ +	block = XFS_BITTOBLOCK(mp, start); +	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); +	if (error) { +		return error; +	} +	bufp = bp->b_addr; +	/* +	 * Get the first word's index & point to it. +	 */ +	word = XFS_BITTOWORD(mp, start); +	b = &bufp[word]; +	bit = (int)(start & (XFS_NBWORD - 1)); +	len = start - limit + 1; +	/* +	 * Compute match value, based on the bit at start: if 1 (free) +	 * then all-ones, else all-zeroes. +	 */ +	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; +	/* +	 * If the starting position is not word-aligned, deal with the +	 * partial word. +	 */ +	if (bit < XFS_NBWORD - 1) { +		/* +		 * Calculate first (leftmost) bit number to look at, +		 * and mask for all the relevant bits in this word. +		 */ +		firstbit = XFS_RTMAX((xfs_srtblock_t)(bit - len + 1), 0); +		mask = (((xfs_rtword_t)1 << (bit - firstbit + 1)) - 1) << +			firstbit; +		/* +		 * Calculate the difference between the value there +		 * and what we're looking for. +		 */ +		if ((wdiff = (*b ^ want) & mask)) { +			/* +			 * Different.  Mark where we are and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i = bit - XFS_RTHIBIT(wdiff); +			*rtblock = start - i + 1; +			return 0; +		} +		i = bit - firstbit + 1; +		/* +		 * Go on to previous block if that's where the previous word is +		 * and we need the previous word. +		 */ +		if (--word == -1 && i < len) { +			/* +			 * If done with this block, get the previous one. +			 */ +			xfs_trans_brelse(tp, bp); +			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); +			if (error) { +				return error; +			} +			bufp = bp->b_addr; +			word = XFS_BLOCKWMASK(mp); +			b = &bufp[word]; +		} else { +			/* +			 * Go on to the previous word in the buffer. +			 */ +			b--; +		} +	} else { +		/* +		 * Starting on a word boundary, no partial word. +		 */ +		i = 0; +	} +	/* +	 * Loop over whole words in buffers.  When we use up one buffer +	 * we move on to the previous one. +	 */ +	while (len - i >= XFS_NBWORD) { +		/* +		 * Compute difference between actual and desired value. +		 */ +		if ((wdiff = *b ^ want)) { +			/* +			 * Different, mark where we are and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); +			*rtblock = start - i + 1; +			return 0; +		} +		i += XFS_NBWORD; +		/* +		 * Go on to previous block if that's where the previous word is +		 * and we need the previous word. +		 */ +		if (--word == -1 && i < len) { +			/* +			 * If done with this block, get the previous one. +			 */ +			xfs_trans_brelse(tp, bp); +			error = xfs_rtbuf_get(mp, tp, --block, 0, &bp); +			if (error) { +				return error; +			} +			bufp = bp->b_addr; +			word = XFS_BLOCKWMASK(mp); +			b = &bufp[word]; +		} else { +			/* +			 * Go on to the previous word in the buffer. +			 */ +			b--; +		} +	} +	/* +	 * If not ending on a word boundary, deal with the last +	 * (partial) word. +	 */ +	if (len - i) { +		/* +		 * Calculate first (leftmost) bit number to look at, +		 * and mask for all the relevant bits in this word. +		 */ +		firstbit = XFS_NBWORD - (len - i); +		mask = (((xfs_rtword_t)1 << (len - i)) - 1) << firstbit; +		/* +		 * Compute difference between actual and desired value. +		 */ +		if ((wdiff = (*b ^ want) & mask)) { +			/* +			 * Different, mark where we are and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i += XFS_NBWORD - 1 - XFS_RTHIBIT(wdiff); +			*rtblock = start - i + 1; +			return 0; +		} else +			i = len; +	} +	/* +	 * No match, return that we scanned the whole area. +	 */ +	xfs_trans_brelse(tp, bp); +	*rtblock = start - i + 1; +	return 0; +} + +/* + * Searching forward from start to limit, find the first block whose + * allocated/free state is different from start's. + */ +int +xfs_rtfind_forw( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	start,		/* starting block to look at */ +	xfs_rtblock_t	limit,		/* last block to look at */ +	xfs_rtblock_t	*rtblock)	/* out: start block found */ +{ +	xfs_rtword_t	*b;		/* current word in buffer */ +	int		bit;		/* bit number in the word */ +	xfs_rtblock_t	block;		/* bitmap block number */ +	xfs_buf_t	*bp;		/* buf for the block */ +	xfs_rtword_t	*bufp;		/* starting word in buffer */ +	int		error;		/* error value */ +	xfs_rtblock_t	i;		/* current bit number rel. to start */ +	xfs_rtblock_t	lastbit;	/* last useful bit in the word */ +	xfs_rtblock_t	len;		/* length of inspected area */ +	xfs_rtword_t	mask;		/* mask of relevant bits for value */ +	xfs_rtword_t	want;		/* mask for "good" values */ +	xfs_rtword_t	wdiff;		/* difference from wanted value */ +	int		word;		/* word number in the buffer */ + +	/* +	 * Compute and read in starting bitmap block for starting block. +	 */ +	block = XFS_BITTOBLOCK(mp, start); +	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); +	if (error) { +		return error; +	} +	bufp = bp->b_addr; +	/* +	 * Get the first word's index & point to it. +	 */ +	word = XFS_BITTOWORD(mp, start); +	b = &bufp[word]; +	bit = (int)(start & (XFS_NBWORD - 1)); +	len = limit - start + 1; +	/* +	 * Compute match value, based on the bit at start: if 1 (free) +	 * then all-ones, else all-zeroes. +	 */ +	want = (*b & ((xfs_rtword_t)1 << bit)) ? -1 : 0; +	/* +	 * If the starting position is not word-aligned, deal with the +	 * partial word. +	 */ +	if (bit) { +		/* +		 * Calculate last (rightmost) bit number to look at, +		 * and mask for all the relevant bits in this word. +		 */ +		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); +		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; +		/* +		 * Calculate the difference between the value there +		 * and what we're looking for. +		 */ +		if ((wdiff = (*b ^ want) & mask)) { +			/* +			 * Different.  Mark where we are and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i = XFS_RTLOBIT(wdiff) - bit; +			*rtblock = start + i - 1; +			return 0; +		} +		i = lastbit - bit; +		/* +		 * Go on to next block if that's where the next word is +		 * and we need the next word. +		 */ +		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { +			/* +			 * If done with this block, get the previous one. +			 */ +			xfs_trans_brelse(tp, bp); +			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); +			if (error) { +				return error; +			} +			b = bufp = bp->b_addr; +			word = 0; +		} else { +			/* +			 * Go on to the previous word in the buffer. +			 */ +			b++; +		} +	} else { +		/* +		 * Starting on a word boundary, no partial word. +		 */ +		i = 0; +	} +	/* +	 * Loop over whole words in buffers.  When we use up one buffer +	 * we move on to the next one. +	 */ +	while (len - i >= XFS_NBWORD) { +		/* +		 * Compute difference between actual and desired value. +		 */ +		if ((wdiff = *b ^ want)) { +			/* +			 * Different, mark where we are and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i += XFS_RTLOBIT(wdiff); +			*rtblock = start + i - 1; +			return 0; +		} +		i += XFS_NBWORD; +		/* +		 * Go on to next block if that's where the next word is +		 * and we need the next word. +		 */ +		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { +			/* +			 * If done with this block, get the next one. +			 */ +			xfs_trans_brelse(tp, bp); +			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); +			if (error) { +				return error; +			} +			b = bufp = bp->b_addr; +			word = 0; +		} else { +			/* +			 * Go on to the next word in the buffer. +			 */ +			b++; +		} +	} +	/* +	 * If not ending on a word boundary, deal with the last +	 * (partial) word. +	 */ +	if ((lastbit = len - i)) { +		/* +		 * Calculate mask for all the relevant bits in this word. +		 */ +		mask = ((xfs_rtword_t)1 << lastbit) - 1; +		/* +		 * Compute difference between actual and desired value. +		 */ +		if ((wdiff = (*b ^ want) & mask)) { +			/* +			 * Different, mark where we are and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i += XFS_RTLOBIT(wdiff); +			*rtblock = start + i - 1; +			return 0; +		} else +			i = len; +	} +	/* +	 * No match, return that we scanned the whole area. +	 */ +	xfs_trans_brelse(tp, bp); +	*rtblock = start + i - 1; +	return 0; +} + +/* + * Read and modify the summary information for a given extent size, + * bitmap block combination. + * Keeps track of a current summary block, so we don't keep reading + * it from the buffer cache. + */ +int +xfs_rtmodify_summary( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	int		log,		/* log2 of extent size */ +	xfs_rtblock_t	bbno,		/* bitmap block number */ +	int		delta,		/* change to make to summary info */ +	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ +	xfs_fsblock_t	*rsb)		/* in/out: summary block number */ +{ +	xfs_buf_t	*bp;		/* buffer for the summary block */ +	int		error;		/* error value */ +	xfs_fsblock_t	sb;		/* summary fsblock */ +	int		so;		/* index into the summary file */ +	xfs_suminfo_t	*sp;		/* pointer to returned data */ + +	/* +	 * Compute entry number in the summary file. +	 */ +	so = XFS_SUMOFFS(mp, log, bbno); +	/* +	 * Compute the block number in the summary file. +	 */ +	sb = XFS_SUMOFFSTOBLOCK(mp, so); +	/* +	 * If we have an old buffer, and the block number matches, use that. +	 */ +	if (rbpp && *rbpp && *rsb == sb) +		bp = *rbpp; +	/* +	 * Otherwise we have to get the buffer. +	 */ +	else { +		/* +		 * If there was an old one, get rid of it first. +		 */ +		if (rbpp && *rbpp) +			xfs_trans_brelse(tp, *rbpp); +		error = xfs_rtbuf_get(mp, tp, sb, 1, &bp); +		if (error) { +			return error; +		} +		/* +		 * Remember this buffer and block for the next call. +		 */ +		if (rbpp) { +			*rbpp = bp; +			*rsb = sb; +		} +	} +	/* +	 * Point to the summary information, modify and log it. +	 */ +	sp = XFS_SUMPTR(mp, bp, so); +	*sp += delta; +	xfs_trans_log_buf(tp, bp, (uint)((char *)sp - (char *)bp->b_addr), +		(uint)((char *)sp - (char *)bp->b_addr + sizeof(*sp) - 1)); +	return 0; +} + +/* + * Set the given range of bitmap bits to the given value. + * Do whatever I/O and logging is required. + */ +int +xfs_rtmodify_range( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	start,		/* starting block to modify */ +	xfs_extlen_t	len,		/* length of extent to modify */ +	int		val)		/* 1 for free, 0 for allocated */ +{ +	xfs_rtword_t	*b;		/* current word in buffer */ +	int		bit;		/* bit number in the word */ +	xfs_rtblock_t	block;		/* bitmap block number */ +	xfs_buf_t	*bp;		/* buf for the block */ +	xfs_rtword_t	*bufp;		/* starting word in buffer */ +	int		error;		/* error value */ +	xfs_rtword_t	*first;		/* first used word in the buffer */ +	int		i;		/* current bit number rel. to start */ +	int		lastbit;	/* last useful bit in word */ +	xfs_rtword_t	mask;		/* mask o frelevant bits for value */ +	int		word;		/* word number in the buffer */ + +	/* +	 * Compute starting bitmap block number. +	 */ +	block = XFS_BITTOBLOCK(mp, start); +	/* +	 * Read the bitmap block, and point to its data. +	 */ +	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); +	if (error) { +		return error; +	} +	bufp = bp->b_addr; +	/* +	 * Compute the starting word's address, and starting bit. +	 */ +	word = XFS_BITTOWORD(mp, start); +	first = b = &bufp[word]; +	bit = (int)(start & (XFS_NBWORD - 1)); +	/* +	 * 0 (allocated) => all zeroes; 1 (free) => all ones. +	 */ +	val = -val; +	/* +	 * If not starting on a word boundary, deal with the first +	 * (partial) word. +	 */ +	if (bit) { +		/* +		 * Compute first bit not changed and mask of relevant bits. +		 */ +		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); +		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; +		/* +		 * Set/clear the active bits. +		 */ +		if (val) +			*b |= mask; +		else +			*b &= ~mask; +		i = lastbit - bit; +		/* +		 * Go on to the next block if that's where the next word is +		 * and we need the next word. +		 */ +		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { +			/* +			 * Log the changed part of this block. +			 * Get the next one. +			 */ +			xfs_trans_log_buf(tp, bp, +				(uint)((char *)first - (char *)bufp), +				(uint)((char *)b - (char *)bufp)); +			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); +			if (error) { +				return error; +			} +			first = b = bufp = bp->b_addr; +			word = 0; +		} else { +			/* +			 * Go on to the next word in the buffer +			 */ +			b++; +		} +	} else { +		/* +		 * Starting on a word boundary, no partial word. +		 */ +		i = 0; +	} +	/* +	 * Loop over whole words in buffers.  When we use up one buffer +	 * we move on to the next one. +	 */ +	while (len - i >= XFS_NBWORD) { +		/* +		 * Set the word value correctly. +		 */ +		*b = val; +		i += XFS_NBWORD; +		/* +		 * Go on to the next block if that's where the next word is +		 * and we need the next word. +		 */ +		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { +			/* +			 * Log the changed part of this block. +			 * Get the next one. +			 */ +			xfs_trans_log_buf(tp, bp, +				(uint)((char *)first - (char *)bufp), +				(uint)((char *)b - (char *)bufp)); +			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); +			if (error) { +				return error; +			} +			first = b = bufp = bp->b_addr; +			word = 0; +		} else { +			/* +			 * Go on to the next word in the buffer +			 */ +			b++; +		} +	} +	/* +	 * If not ending on a word boundary, deal with the last +	 * (partial) word. +	 */ +	if ((lastbit = len - i)) { +		/* +		 * Compute a mask of relevant bits. +		 */ +		bit = 0; +		mask = ((xfs_rtword_t)1 << lastbit) - 1; +		/* +		 * Set/clear the active bits. +		 */ +		if (val) +			*b |= mask; +		else +			*b &= ~mask; +		b++; +	} +	/* +	 * Log any remaining changed bytes. +	 */ +	if (b > first) +		xfs_trans_log_buf(tp, bp, (uint)((char *)first - (char *)bufp), +			(uint)((char *)b - (char *)bufp - 1)); +	return 0; +} + +/* + * Mark an extent specified by start and len freed. + * Updates all the summary information as well as the bitmap. + */ +int +xfs_rtfree_range( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	start,		/* starting block to free */ +	xfs_extlen_t	len,		/* length to free */ +	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */ +	xfs_fsblock_t	*rsb)		/* in/out: summary block number */ +{ +	xfs_rtblock_t	end;		/* end of the freed extent */ +	int		error;		/* error value */ +	xfs_rtblock_t	postblock;	/* first block freed > end */ +	xfs_rtblock_t	preblock;	/* first block freed < start */ + +	end = start + len - 1; +	/* +	 * Modify the bitmap to mark this extent freed. +	 */ +	error = xfs_rtmodify_range(mp, tp, start, len, 1); +	if (error) { +		return error; +	} +	/* +	 * Assume we're freeing out of the middle of an allocated extent. +	 * We need to find the beginning and end of the extent so we can +	 * properly update the summary. +	 */ +	error = xfs_rtfind_back(mp, tp, start, 0, &preblock); +	if (error) { +		return error; +	} +	/* +	 * Find the next allocated block (end of allocated extent). +	 */ +	error = xfs_rtfind_forw(mp, tp, end, mp->m_sb.sb_rextents - 1, +		&postblock); +	if (error) +		return error; +	/* +	 * If there are blocks not being freed at the front of the +	 * old extent, add summary data for them to be allocated. +	 */ +	if (preblock < start) { +		error = xfs_rtmodify_summary(mp, tp, +			XFS_RTBLOCKLOG(start - preblock), +			XFS_BITTOBLOCK(mp, preblock), -1, rbpp, rsb); +		if (error) { +			return error; +		} +	} +	/* +	 * If there are blocks not being freed at the end of the +	 * old extent, add summary data for them to be allocated. +	 */ +	if (postblock > end) { +		error = xfs_rtmodify_summary(mp, tp, +			XFS_RTBLOCKLOG(postblock - end), +			XFS_BITTOBLOCK(mp, end + 1), -1, rbpp, rsb); +		if (error) { +			return error; +		} +	} +	/* +	 * Increment the summary information corresponding to the entire +	 * (new) free extent. +	 */ +	error = xfs_rtmodify_summary(mp, tp, +		XFS_RTBLOCKLOG(postblock + 1 - preblock), +		XFS_BITTOBLOCK(mp, preblock), 1, rbpp, rsb); +	return error; +} + +/* + * Check that the given range is either all allocated (val = 0) or + * all free (val = 1). + */ +int +xfs_rtcheck_range( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	start,		/* starting block number of extent */ +	xfs_extlen_t	len,		/* length of extent */ +	int		val,		/* 1 for free, 0 for allocated */ +	xfs_rtblock_t	*new,		/* out: first block not matching */ +	int		*stat)		/* out: 1 for matches, 0 for not */ +{ +	xfs_rtword_t	*b;		/* current word in buffer */ +	int		bit;		/* bit number in the word */ +	xfs_rtblock_t	block;		/* bitmap block number */ +	xfs_buf_t	*bp;		/* buf for the block */ +	xfs_rtword_t	*bufp;		/* starting word in buffer */ +	int		error;		/* error value */ +	xfs_rtblock_t	i;		/* current bit number rel. to start */ +	xfs_rtblock_t	lastbit;	/* last useful bit in word */ +	xfs_rtword_t	mask;		/* mask of relevant bits for value */ +	xfs_rtword_t	wdiff;		/* difference from wanted value */ +	int		word;		/* word number in the buffer */ + +	/* +	 * Compute starting bitmap block number +	 */ +	block = XFS_BITTOBLOCK(mp, start); +	/* +	 * Read the bitmap block. +	 */ +	error = xfs_rtbuf_get(mp, tp, block, 0, &bp); +	if (error) { +		return error; +	} +	bufp = bp->b_addr; +	/* +	 * Compute the starting word's address, and starting bit. +	 */ +	word = XFS_BITTOWORD(mp, start); +	b = &bufp[word]; +	bit = (int)(start & (XFS_NBWORD - 1)); +	/* +	 * 0 (allocated) => all zero's; 1 (free) => all one's. +	 */ +	val = -val; +	/* +	 * If not starting on a word boundary, deal with the first +	 * (partial) word. +	 */ +	if (bit) { +		/* +		 * Compute first bit not examined. +		 */ +		lastbit = XFS_RTMIN(bit + len, XFS_NBWORD); +		/* +		 * Mask of relevant bits. +		 */ +		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; +		/* +		 * Compute difference between actual and desired value. +		 */ +		if ((wdiff = (*b ^ val) & mask)) { +			/* +			 * Different, compute first wrong bit and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i = XFS_RTLOBIT(wdiff) - bit; +			*new = start + i; +			*stat = 0; +			return 0; +		} +		i = lastbit - bit; +		/* +		 * Go on to next block if that's where the next word is +		 * and we need the next word. +		 */ +		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { +			/* +			 * If done with this block, get the next one. +			 */ +			xfs_trans_brelse(tp, bp); +			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); +			if (error) { +				return error; +			} +			b = bufp = bp->b_addr; +			word = 0; +		} else { +			/* +			 * Go on to the next word in the buffer. +			 */ +			b++; +		} +	} else { +		/* +		 * Starting on a word boundary, no partial word. +		 */ +		i = 0; +	} +	/* +	 * Loop over whole words in buffers.  When we use up one buffer +	 * we move on to the next one. +	 */ +	while (len - i >= XFS_NBWORD) { +		/* +		 * Compute difference between actual and desired value. +		 */ +		if ((wdiff = *b ^ val)) { +			/* +			 * Different, compute first wrong bit and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i += XFS_RTLOBIT(wdiff); +			*new = start + i; +			*stat = 0; +			return 0; +		} +		i += XFS_NBWORD; +		/* +		 * Go on to next block if that's where the next word is +		 * and we need the next word. +		 */ +		if (++word == XFS_BLOCKWSIZE(mp) && i < len) { +			/* +			 * If done with this block, get the next one. +			 */ +			xfs_trans_brelse(tp, bp); +			error = xfs_rtbuf_get(mp, tp, ++block, 0, &bp); +			if (error) { +				return error; +			} +			b = bufp = bp->b_addr; +			word = 0; +		} else { +			/* +			 * Go on to the next word in the buffer. +			 */ +			b++; +		} +	} +	/* +	 * If not ending on a word boundary, deal with the last +	 * (partial) word. +	 */ +	if ((lastbit = len - i)) { +		/* +		 * Mask of relevant bits. +		 */ +		mask = ((xfs_rtword_t)1 << lastbit) - 1; +		/* +		 * Compute difference between actual and desired value. +		 */ +		if ((wdiff = (*b ^ val) & mask)) { +			/* +			 * Different, compute first wrong bit and return. +			 */ +			xfs_trans_brelse(tp, bp); +			i += XFS_RTLOBIT(wdiff); +			*new = start + i; +			*stat = 0; +			return 0; +		} else +			i = len; +	} +	/* +	 * Successful, return. +	 */ +	xfs_trans_brelse(tp, bp); +	*new = start + i; +	*stat = 1; +	return 0; +} + +#ifdef DEBUG +/* + * Check that the given extent (block range) is allocated already. + */ +STATIC int				/* error */ +xfs_rtcheck_alloc_range( +	xfs_mount_t	*mp,		/* file system mount point */ +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	bno,		/* starting block number of extent */ +	xfs_extlen_t	len)		/* length of extent */ +{ +	xfs_rtblock_t	new;		/* dummy for xfs_rtcheck_range */ +	int		stat; +	int		error; + +	error = xfs_rtcheck_range(mp, tp, bno, len, 0, &new, &stat); +	if (error) +		return error; +	ASSERT(stat); +	return 0; +} +#else +#define xfs_rtcheck_alloc_range(m,t,b,l)	(0) +#endif +/* + * Free an extent in the realtime subvolume.  Length is expressed in + * realtime extents, as is the block number. + */ +int					/* error */ +xfs_rtfree_extent( +	xfs_trans_t	*tp,		/* transaction pointer */ +	xfs_rtblock_t	bno,		/* starting block number to free */ +	xfs_extlen_t	len)		/* length of extent freed */ +{ +	int		error;		/* error value */ +	xfs_mount_t	*mp;		/* file system mount structure */ +	xfs_fsblock_t	sb;		/* summary file block number */ +	xfs_buf_t	*sumbp = NULL;	/* summary file block buffer */ + +	mp = tp->t_mountp; + +	ASSERT(mp->m_rbmip->i_itemp != NULL); +	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); + +	error = xfs_rtcheck_alloc_range(mp, tp, bno, len); +	if (error) +		return error; + +	/* +	 * Free the range of realtime blocks. +	 */ +	error = xfs_rtfree_range(mp, tp, bno, len, &sumbp, &sb); +	if (error) { +		return error; +	} +	/* +	 * Mark more blocks free in the superblock. +	 */ +	xfs_trans_mod_sb(tp, XFS_TRANS_SB_FREXTENTS, (long)len); +	/* +	 * If we've now freed all the blocks, reset the file sequence +	 * number to 0. +	 */ +	if (tp->t_frextents_delta + mp->m_sb.sb_frextents == +	    mp->m_sb.sb_rextents) { +		if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) +			mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM; +		*(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0; +		xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE); +	} +	return 0; +} + diff --git a/fs/xfs/xfs_sb.c b/fs/xfs/xfs_sb.c index a5b59d92eb7..7703fa6770f 100644 --- a/fs/xfs/xfs_sb.c +++ b/fs/xfs/xfs_sb.c @@ -17,34 +17,26 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_da_btree.h" -#include "xfs_dir2_format.h" -#include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_btree.h"  #include "xfs_ialloc.h"  #include "xfs_alloc.h" -#include "xfs_rtalloc.h" -#include "xfs_bmap.h"  #include "xfs_error.h" -#include "xfs_quota.h" -#include "xfs_fsops.h"  #include "xfs_trace.h"  #include "xfs_cksum.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h" +#include "xfs_dinode.h" +#include "xfs_bmap_btree.h" +#include "xfs_alloc_btree.h" +#include "xfs_ialloc_btree.h"  /*   * Physical superblock buffer manipulations. Shared with libxfs in userspace. @@ -209,10 +201,6 @@ xfs_mount_validate_sb(  	 * write validation, we don't need to check feature masks.  	 */  	if (check_version && XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) { -		xfs_alert(mp, -"Version 5 superblock detected. This kernel has EXPERIMENTAL support enabled!\n" -"Use of these features in this kernel is at your own risk!"); -  		if (xfs_sb_has_compat_feature(sbp,  					XFS_SB_FEAT_COMPAT_UNKNOWN)) {  			xfs_warn(mp, @@ -249,13 +237,13 @@ xfs_mount_validate_sb(  	if (xfs_sb_version_has_pquotino(sbp)) {  		if (sbp->sb_qflags & (XFS_OQUOTA_ENFD | XFS_OQUOTA_CHKD)) {  			xfs_notice(mp, -			   "Version 5 of Super block has XFS_OQUOTA bits.\n"); +			   "Version 5 of Super block has XFS_OQUOTA bits.");  			return XFS_ERROR(EFSCORRUPTED);  		}  	} else if (sbp->sb_qflags & (XFS_PQUOTA_ENFD | XFS_GQUOTA_ENFD |  				XFS_PQUOTA_CHKD | XFS_GQUOTA_CHKD)) {  			xfs_notice(mp, -"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.\n"); +"Superblock earlier than Version 5 has XFS_[PQ]UOTA_{ENFD|CHKD} bits.");  			return XFS_ERROR(EFSCORRUPTED);  	} @@ -296,15 +284,16 @@ xfs_mount_validate_sb(  	    sbp->sb_inodelog < XFS_DINODE_MIN_LOG			||  	    sbp->sb_inodelog > XFS_DINODE_MAX_LOG			||  	    sbp->sb_inodesize != (1 << sbp->sb_inodelog)		|| +	    sbp->sb_inopblock != howmany(sbp->sb_blocksize,sbp->sb_inodesize) ||  	    (sbp->sb_blocklog - sbp->sb_inodelog != sbp->sb_inopblog)	||  	    (sbp->sb_rextsize * sbp->sb_blocksize > XFS_MAX_RTEXTSIZE)	||  	    (sbp->sb_rextsize * sbp->sb_blocksize < XFS_MIN_RTEXTSIZE)	||  	    (sbp->sb_imax_pct > 100 /* zero sb_imax_pct is valid */)	||  	    sbp->sb_dblocks == 0					||  	    sbp->sb_dblocks > XFS_MAX_DBLOCKS(sbp)			|| -	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp))) { -		XFS_CORRUPTION_ERROR("SB sanity check failed", -				XFS_ERRLEVEL_LOW, mp, sbp); +	    sbp->sb_dblocks < XFS_MIN_DBLOCKS(sbp)			|| +	    sbp->sb_shared_vn != 0)) { +		xfs_notice(mp, "SB sanity check failed");  		return XFS_ERROR(EFSCORRUPTED);  	} @@ -345,15 +334,6 @@ xfs_mount_validate_sb(  		xfs_warn(mp, "Offline file system operation in progress!");  		return XFS_ERROR(EFSCORRUPTED);  	} - -	/* -	 * Version 1 directory format has never worked on Linux. -	 */ -	if (unlikely(!xfs_sb_version_hasdirv2(sbp))) { -		xfs_warn(mp, "file system using version 1 directory format"); -		return XFS_ERROR(ENOSYS); -	} -  	return 0;  } @@ -503,10 +483,16 @@ xfs_sb_quota_to_disk(  	}  	/* -	 * GQUOTINO and PQUOTINO cannot be used together in versions -	 * of superblock that do not have pquotino. from->sb_flags -	 * tells us which quota is active and should be copied to -	 * disk. +	 * GQUOTINO and PQUOTINO cannot be used together in versions of +	 * superblock that do not have pquotino. from->sb_flags tells us which +	 * quota is active and should be copied to disk. If neither are active, +	 * make sure we write NULLFSINO to the sb_gquotino field as a quota +	 * inode value of "0" is invalid when the XFS_SB_VERSION_QUOTA feature +	 * bit is set. +	 * +	 * Note that we don't need to handle the sb_uquotino or sb_pquotino here +	 * as they do not require any translation. Hence the main sb field loop +	 * will write them appropriately from the in-core superblock.  	 */  	if ((*fields & XFS_SB_GQUOTINO) &&  				(from->sb_qflags & XFS_GQUOTA_ACCT)) @@ -514,6 +500,17 @@ xfs_sb_quota_to_disk(  	else if ((*fields & XFS_SB_PQUOTINO) &&  				(from->sb_qflags & XFS_PQUOTA_ACCT))  		to->sb_gquotino = cpu_to_be64(from->sb_pquotino); +	else { +		/* +		 * We can't rely on just the fields being logged to tell us +		 * that it is safe to write NULLFSINO - we should only do that +		 * if quotas are not actually enabled. Hence only write +		 * NULLFSINO if both in-core quota inodes are NULL. +		 */ +		if (from->sb_gquotino == NULLFSINO && +		    from->sb_pquotino == NULLFSINO) +			to->sb_gquotino = cpu_to_be64(NULLFSINO); +	}  	*fields &= ~(XFS_SB_PQUOTINO | XFS_SB_GQUOTINO);  } @@ -596,6 +593,11 @@ xfs_sb_verify(   * single bit error could clear the feature bit and unused parts of the   * superblock are supposed to be zero. Hence a non-null crc field indicates that   * we've potentially lost a feature bit and we should check it anyway. + * + * However, past bugs (i.e. in growfs) left non-zeroed regions beyond the + * last field in V4 secondary superblocks.  So for secondary superblocks, + * we are more forgiving, and ignore CRC failures if the primary doesn't + * indicate that the fs version is V5.   */  static void  xfs_sb_read_verify( @@ -614,19 +616,22 @@ xfs_sb_read_verify(  						XFS_SB_VERSION_5) ||  	     dsb->sb_crc != 0)) { -		if (!xfs_verify_cksum(bp->b_addr, be16_to_cpu(dsb->sb_sectsize), -				      offsetof(struct xfs_sb, sb_crc))) { -			error = EFSCORRUPTED; -			goto out_error; +		if (!xfs_buf_verify_cksum(bp, XFS_SB_CRC_OFF)) { +			/* Only fail bad secondaries on a known V5 filesystem */ +			if (bp->b_bn == XFS_SB_DADDR || +			    xfs_sb_version_hascrc(&mp->m_sb)) { +				error = EFSBADCRC; +				goto out_error; +			}  		}  	}  	error = xfs_sb_verify(bp, true);  out_error:  	if (error) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     mp, bp->b_addr);  		xfs_buf_ioerror(bp, error); +		if (error == EFSCORRUPTED || error == EFSBADCRC) +			xfs_verifier_error(bp);  	}  } @@ -642,7 +647,6 @@ xfs_sb_quiet_read_verify(  {  	struct xfs_dsb	*dsb = XFS_BUF_TO_SBP(bp); -  	if (dsb->sb_magicnum == cpu_to_be32(XFS_SB_MAGIC)) {  		/* XFS filesystem, verify noisily! */  		xfs_sb_read_verify(bp); @@ -662,9 +666,8 @@ xfs_sb_write_verify(  	error = xfs_sb_verify(bp, false);  	if (error) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, -				     mp, bp->b_addr);  		xfs_buf_ioerror(bp, error); +		xfs_verifier_error(bp);  		return;  	} @@ -674,8 +677,7 @@ xfs_sb_write_verify(  	if (bip)  		XFS_BUF_TO_SBP(bp)->sb_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 offsetof(struct xfs_sb, sb_crc)); +	xfs_buf_update_cksum(bp, XFS_SB_CRC_OFF);  }  const struct xfs_buf_ops xfs_sb_buf_ops = { diff --git a/fs/xfs/xfs_sb.h b/fs/xfs/xfs_sb.h index 6835b44f850..c43c2d609a2 100644 --- a/fs/xfs/xfs_sb.h +++ b/fs/xfs/xfs_sb.h @@ -36,8 +36,6 @@ struct xfs_trans;  #define	XFS_SB_VERSION_5	5		/* CRC enabled filesystem */  #define	XFS_SB_VERSION_NUMBITS		0x000f  #define	XFS_SB_VERSION_ALLFBITS		0xfff0 -#define	XFS_SB_VERSION_SASHFBITS	0xf000 -#define	XFS_SB_VERSION_REALFBITS	0x0ff0  #define	XFS_SB_VERSION_ATTRBIT		0x0010  #define	XFS_SB_VERSION_NLINKBIT		0x0020  #define	XFS_SB_VERSION_QUOTABIT		0x0040 @@ -50,24 +48,15 @@ struct xfs_trans;  #define	XFS_SB_VERSION_DIRV2BIT		0x2000  #define	XFS_SB_VERSION_BORGBIT		0x4000	/* ASCII only case-insens. */  #define	XFS_SB_VERSION_MOREBITSBIT	0x8000 -#define	XFS_SB_VERSION_OKSASHFBITS	\ -	(XFS_SB_VERSION_EXTFLGBIT | \ -	 XFS_SB_VERSION_DIRV2BIT | \ -	 XFS_SB_VERSION_BORGBIT) -#define	XFS_SB_VERSION_OKREALFBITS	\ -	(XFS_SB_VERSION_ATTRBIT | \ -	 XFS_SB_VERSION_NLINKBIT | \ -	 XFS_SB_VERSION_QUOTABIT | \ -	 XFS_SB_VERSION_ALIGNBIT | \ -	 XFS_SB_VERSION_DALIGNBIT | \ -	 XFS_SB_VERSION_SHAREDBIT | \ -	 XFS_SB_VERSION_LOGV2BIT | \ -	 XFS_SB_VERSION_SECTORBIT | \ -	 XFS_SB_VERSION_MOREBITSBIT) -#define	XFS_SB_VERSION_OKREALBITS	\ -	(XFS_SB_VERSION_NUMBITS | \ -	 XFS_SB_VERSION_OKREALFBITS | \ -	 XFS_SB_VERSION_OKSASHFBITS) + +/* + * Supported feature bit list is just all bits in the versionnum field because + * we've used them all up and understand them all. Except, of course, for the + * shared superblock bit, which nobody knows what it does and so is unsupported. + */ +#define	XFS_SB_VERSION_OKBITS		\ +	((XFS_SB_VERSION_NUMBITS | XFS_SB_VERSION_ALLFBITS) & \ +		~XFS_SB_VERSION_SHAREDBIT)  /*   * There are two words to hold XFS "feature" bits: the original @@ -76,7 +65,6 @@ struct xfs_trans;   *   * These defines represent bits in sb_features2.   */ -#define XFS_SB_VERSION2_REALFBITS	0x00ffffff	/* Mask: features */  #define XFS_SB_VERSION2_RESERVED1BIT	0x00000001  #define XFS_SB_VERSION2_LAZYSBCOUNTBIT	0x00000002	/* Superblk counters */  #define XFS_SB_VERSION2_RESERVED4BIT	0x00000004 @@ -86,16 +74,11 @@ struct xfs_trans;  #define XFS_SB_VERSION2_CRCBIT		0x00000100	/* metadata CRCs */  #define XFS_SB_VERSION2_FTYPE		0x00000200	/* inode type in dir */ -#define	XFS_SB_VERSION2_OKREALFBITS	\ +#define	XFS_SB_VERSION2_OKBITS		\  	(XFS_SB_VERSION2_LAZYSBCOUNTBIT	| \  	 XFS_SB_VERSION2_ATTR2BIT	| \  	 XFS_SB_VERSION2_PROJID32BIT	| \  	 XFS_SB_VERSION2_FTYPE) -#define	XFS_SB_VERSION2_OKSASHFBITS	\ -	(0) -#define XFS_SB_VERSION2_OKREALBITS	\ -	(XFS_SB_VERSION2_OKREALFBITS |	\ -	 XFS_SB_VERSION2_OKSASHFBITS )  /*   * Superblock - in core version.  Must match the ondisk version below. @@ -182,6 +165,8 @@ typedef struct xfs_sb {  	/* must be padded to 64 bit alignment */  } xfs_sb_t; +#define XFS_SB_CRC_OFF		offsetof(struct xfs_sb, sb_crc) +  /*   * Superblock - on disk version.  Must match the in core version above.   * Must be padded to 64 bit alignment. @@ -343,214 +328,140 @@ typedef enum {  #define	XFS_SB_VERSION_NUM(sbp)	((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -static inline int xfs_sb_good_version(xfs_sb_t *sbp) -{ -	/* We always support version 1-3 */ -	if (sbp->sb_versionnum >= XFS_SB_VERSION_1 && -	    sbp->sb_versionnum <= XFS_SB_VERSION_3) -		return 1; - -	/* We support version 4 if all feature bits are supported */ -	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) { -		if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKREALBITS) || -		    ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && -		     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKREALBITS))) -			return 0; - -		if (sbp->sb_shared_vn > XFS_SB_MAX_SHARED_VN) -			return 0; -		return 1; -	} -	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) -		return 1; - -	return 0; -} -  /* - * Detect a mismatched features2 field.  Older kernels read/wrote - * this into the wrong slot, so to be safe we keep them in sync. + * The first XFS version we support is a v4 superblock with V2 directories.   */ -static inline int xfs_sb_has_mismatched_features2(xfs_sb_t *sbp) +static inline bool xfs_sb_good_v4_features(struct xfs_sb *sbp)  { -	return (sbp->sb_bad_features2 != sbp->sb_features2); -} - -static inline unsigned xfs_sb_version_tonew(unsigned v) -{ -	if (v == XFS_SB_VERSION_1) -		return XFS_SB_VERSION_4; - -	if (v == XFS_SB_VERSION_2) -		return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; +	if (!(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)) +		return false; -	return XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT | -		XFS_SB_VERSION_NLINKBIT; -} +	/* check for unknown features in the fs */ +	if ((sbp->sb_versionnum & ~XFS_SB_VERSION_OKBITS) || +	    ((sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT) && +	     (sbp->sb_features2 & ~XFS_SB_VERSION2_OKBITS))) +		return false; -static inline unsigned xfs_sb_version_toold(unsigned v) -{ -	if (v & (XFS_SB_VERSION_QUOTABIT | XFS_SB_VERSION_ALIGNBIT)) -		return 0; -	if (v & XFS_SB_VERSION_NLINKBIT) -		return XFS_SB_VERSION_3; -	if (v & XFS_SB_VERSION_ATTRBIT) -		return XFS_SB_VERSION_2; -	return XFS_SB_VERSION_1; +	return true;  } -static inline int xfs_sb_version_hasattr(xfs_sb_t *sbp) +static inline bool xfs_sb_good_version(struct xfs_sb *sbp)  { -	return sbp->sb_versionnum == XFS_SB_VERSION_2 || -		sbp->sb_versionnum == XFS_SB_VERSION_3 || -		(XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && -		 (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT)); +	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) +		return true; +	if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4) +		return xfs_sb_good_v4_features(sbp); +	return false;  } -static inline void xfs_sb_version_addattr(xfs_sb_t *sbp) +/* + * Detect a mismatched features2 field.  Older kernels read/wrote + * this into the wrong slot, so to be safe we keep them in sync. + */ +static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)  { -	if (sbp->sb_versionnum == XFS_SB_VERSION_1) -		sbp->sb_versionnum = XFS_SB_VERSION_2; -	else if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4) -		sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT; -	else -		sbp->sb_versionnum = XFS_SB_VERSION_4 | XFS_SB_VERSION_ATTRBIT; +	return sbp->sb_bad_features2 != sbp->sb_features2;  } -static inline int xfs_sb_version_hasnlink(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasattr(struct xfs_sb *sbp)  { -	return sbp->sb_versionnum == XFS_SB_VERSION_3 || -		 (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && -		  (sbp->sb_versionnum & XFS_SB_VERSION_NLINKBIT)); +	return (sbp->sb_versionnum & XFS_SB_VERSION_ATTRBIT);  } -static inline void xfs_sb_version_addnlink(xfs_sb_t *sbp) +static inline void xfs_sb_version_addattr(struct xfs_sb *sbp)  { -	if (sbp->sb_versionnum <= XFS_SB_VERSION_2) -		sbp->sb_versionnum = XFS_SB_VERSION_3; -	else -		sbp->sb_versionnum |= XFS_SB_VERSION_NLINKBIT; +	sbp->sb_versionnum |= XFS_SB_VERSION_ATTRBIT;  } -static inline int xfs_sb_version_hasquota(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasquota(struct xfs_sb *sbp)  { -	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT); +	return (sbp->sb_versionnum & XFS_SB_VERSION_QUOTABIT);  } -static inline void xfs_sb_version_addquota(xfs_sb_t *sbp) +static inline void xfs_sb_version_addquota(struct xfs_sb *sbp)  { -	if (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4) -		sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT; -	else -		sbp->sb_versionnum = xfs_sb_version_tonew(sbp->sb_versionnum) | -					XFS_SB_VERSION_QUOTABIT; +	sbp->sb_versionnum |= XFS_SB_VERSION_QUOTABIT;  } -static inline int xfs_sb_version_hasalign(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasalign(struct xfs_sb *sbp)  { -	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || -	       (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && +	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 ||  		(sbp->sb_versionnum & XFS_SB_VERSION_ALIGNBIT));  } -static inline int xfs_sb_version_hasdalign(xfs_sb_t *sbp) -{ -	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT); -} - -static inline int xfs_sb_version_hasshared(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasdalign(struct xfs_sb *sbp)  { -	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_SHAREDBIT); -} - -static inline int xfs_sb_version_hasdirv2(xfs_sb_t *sbp) -{ -	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || -	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_DIRV2BIT)); +	return (sbp->sb_versionnum & XFS_SB_VERSION_DALIGNBIT);  } -static inline int xfs_sb_version_haslogv2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_haslogv2(struct xfs_sb *sbp)  { -	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || -	       (XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT)); +	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || +	       (sbp->sb_versionnum & XFS_SB_VERSION_LOGV2BIT);  } -static inline int xfs_sb_version_hasextflgbit(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasextflgbit(struct xfs_sb *sbp)  { -	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || -	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT)); +	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || +	       (sbp->sb_versionnum & XFS_SB_VERSION_EXTFLGBIT);  } -static inline int xfs_sb_version_hassector(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hassector(struct xfs_sb *sbp)  { -	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT); +	return (sbp->sb_versionnum & XFS_SB_VERSION_SECTORBIT);  } -static inline int xfs_sb_version_hasasciici(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasasciici(struct xfs_sb *sbp)  { -	return XFS_SB_VERSION_NUM(sbp) >= XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT); +	return (sbp->sb_versionnum & XFS_SB_VERSION_BORGBIT);  } -static inline int xfs_sb_version_hasmorebits(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)  { -	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) || -	       (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_4 && -		(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT)); +	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 || +	       (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);  }  /*   * sb_features2 bit version macros. - * - * For example, for a bit defined as XFS_SB_VERSION2_FUNBIT, has a macro: - * - * SB_VERSION_HASFUNBIT(xfs_sb_t *sbp) - *	((xfs_sb_version_hasmorebits(sbp) && - *	 ((sbp)->sb_features2 & XFS_SB_VERSION2_FUNBIT)   */ - -static inline int xfs_sb_version_haslazysbcount(xfs_sb_t *sbp) +static inline bool xfs_sb_version_haslazysbcount(struct xfs_sb *sbp)  {  	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||  	       (xfs_sb_version_hasmorebits(sbp) &&  		(sbp->sb_features2 & XFS_SB_VERSION2_LAZYSBCOUNTBIT));  } -static inline int xfs_sb_version_hasattr2(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasattr2(struct xfs_sb *sbp)  {  	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||  	       (xfs_sb_version_hasmorebits(sbp) &&  		(sbp->sb_features2 & XFS_SB_VERSION2_ATTR2BIT));  } -static inline void xfs_sb_version_addattr2(xfs_sb_t *sbp) +static inline void xfs_sb_version_addattr2(struct xfs_sb *sbp)  {  	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;  	sbp->sb_features2 |= XFS_SB_VERSION2_ATTR2BIT; +	sbp->sb_bad_features2 |= XFS_SB_VERSION2_ATTR2BIT;  } -static inline void xfs_sb_version_removeattr2(xfs_sb_t *sbp) +static inline void xfs_sb_version_removeattr2(struct xfs_sb *sbp)  {  	sbp->sb_features2 &= ~XFS_SB_VERSION2_ATTR2BIT; +	sbp->sb_bad_features2 &= ~XFS_SB_VERSION2_ATTR2BIT;  	if (!sbp->sb_features2)  		sbp->sb_versionnum &= ~XFS_SB_VERSION_MOREBITSBIT;  } -static inline int xfs_sb_version_hasprojid32bit(xfs_sb_t *sbp) +static inline bool xfs_sb_version_hasprojid32bit(struct xfs_sb *sbp)  {  	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) ||  	       (xfs_sb_version_hasmorebits(sbp) &&  		(sbp->sb_features2 & XFS_SB_VERSION2_PROJID32BIT));  } -static inline void xfs_sb_version_addprojid32bit(xfs_sb_t *sbp) +static inline void xfs_sb_version_addprojid32bit(struct xfs_sb *sbp)  {  	sbp->sb_versionnum |= XFS_SB_VERSION_MOREBITSBIT;  	sbp->sb_features2 |= XFS_SB_VERSION2_PROJID32BIT; @@ -585,7 +496,9 @@ xfs_sb_has_compat_feature(  	return (sbp->sb_features_compat & feature) != 0;  } -#define XFS_SB_FEAT_RO_COMPAT_ALL 0 +#define XFS_SB_FEAT_RO_COMPAT_FINOBT   (1 << 0)		/* free inode btree */ +#define XFS_SB_FEAT_RO_COMPAT_ALL \ +		(XFS_SB_FEAT_RO_COMPAT_FINOBT)  #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN	~XFS_SB_FEAT_RO_COMPAT_ALL  static inline bool  xfs_sb_has_ro_compat_feature( @@ -621,12 +534,12 @@ xfs_sb_has_incompat_log_feature(  /*   * V5 superblock specific feature checks   */ -static inline int xfs_sb_version_hascrc(xfs_sb_t *sbp) +static inline int xfs_sb_version_hascrc(struct xfs_sb *sbp)  {  	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;  } -static inline int xfs_sb_version_has_pquotino(xfs_sb_t *sbp) +static inline int xfs_sb_version_has_pquotino(struct xfs_sb *sbp)  {  	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;  } @@ -639,6 +552,12 @@ static inline int xfs_sb_version_hasftype(struct xfs_sb *sbp)  		 (sbp->sb_features2 & XFS_SB_VERSION2_FTYPE));  } +static inline int xfs_sb_version_hasfinobt(xfs_sb_t *sbp) +{ +	return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && +		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FINOBT); +} +  /*   * end of superblock version macros   */ @@ -699,7 +618,4 @@ extern void	xfs_sb_from_disk(struct xfs_sb *, struct xfs_dsb *);  extern void	xfs_sb_to_disk(struct xfs_dsb *, struct xfs_sb *, __int64_t);  extern void	xfs_sb_quota_from_disk(struct xfs_sb *sbp); -extern const struct xfs_buf_ops xfs_sb_buf_ops; -extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; -  #endif	/* __XFS_SB_H__ */ diff --git a/fs/xfs/xfs_shared.h b/fs/xfs/xfs_shared.h new file mode 100644 index 00000000000..82404da2ca6 --- /dev/null +++ b/fs/xfs/xfs_shared.h @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2000-2005 Silicon Graphics, Inc. + * Copyright (c) 2013 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA + */ +#ifndef __XFS_SHARED_H__ +#define __XFS_SHARED_H__ + +/* + * Definitions shared between kernel and userspace that don't fit into any other + * header file that is shared with userspace. + */ +struct xfs_ifork; +struct xfs_buf; +struct xfs_buf_ops; +struct xfs_mount; +struct xfs_trans; +struct xfs_inode; + +/* + * Buffer verifier operations are widely used, including userspace tools + */ +extern const struct xfs_buf_ops xfs_agf_buf_ops; +extern const struct xfs_buf_ops xfs_agi_buf_ops; +extern const struct xfs_buf_ops xfs_agf_buf_ops; +extern const struct xfs_buf_ops xfs_agfl_buf_ops; +extern const struct xfs_buf_ops xfs_allocbt_buf_ops; +extern const struct xfs_buf_ops xfs_attr3_leaf_buf_ops; +extern const struct xfs_buf_ops xfs_attr3_rmt_buf_ops; +extern const struct xfs_buf_ops xfs_bmbt_buf_ops; +extern const struct xfs_buf_ops xfs_da3_node_buf_ops; +extern const struct xfs_buf_ops xfs_dquot_buf_ops; +extern const struct xfs_buf_ops xfs_symlink_buf_ops; +extern const struct xfs_buf_ops xfs_agi_buf_ops; +extern const struct xfs_buf_ops xfs_inobt_buf_ops; +extern const struct xfs_buf_ops xfs_inode_buf_ops; +extern const struct xfs_buf_ops xfs_inode_buf_ra_ops; +extern const struct xfs_buf_ops xfs_dquot_buf_ops; +extern const struct xfs_buf_ops xfs_sb_buf_ops; +extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; +extern const struct xfs_buf_ops xfs_symlink_buf_ops; + +/* + * Transaction types.  Used to distinguish types of buffers. These never reach + * the log. + */ +#define XFS_TRANS_SETATTR_NOT_SIZE	1 +#define XFS_TRANS_SETATTR_SIZE		2 +#define XFS_TRANS_INACTIVE		3 +#define XFS_TRANS_CREATE		4 +#define XFS_TRANS_CREATE_TRUNC		5 +#define XFS_TRANS_TRUNCATE_FILE		6 +#define XFS_TRANS_REMOVE		7 +#define XFS_TRANS_LINK			8 +#define XFS_TRANS_RENAME		9 +#define XFS_TRANS_MKDIR			10 +#define XFS_TRANS_RMDIR			11 +#define XFS_TRANS_SYMLINK		12 +#define XFS_TRANS_SET_DMATTRS		13 +#define XFS_TRANS_GROWFS		14 +#define XFS_TRANS_STRAT_WRITE		15 +#define XFS_TRANS_DIOSTRAT		16 +/* 17 was XFS_TRANS_WRITE_SYNC */ +#define	XFS_TRANS_WRITEID		18 +#define	XFS_TRANS_ADDAFORK		19 +#define	XFS_TRANS_ATTRINVAL		20 +#define	XFS_TRANS_ATRUNCATE		21 +#define	XFS_TRANS_ATTR_SET		22 +#define	XFS_TRANS_ATTR_RM		23 +#define	XFS_TRANS_ATTR_FLAG		24 +#define	XFS_TRANS_CLEAR_AGI_BUCKET	25 +#define XFS_TRANS_QM_SBCHANGE		26 +/* + * Dummy entries since we use the transaction type to index into the + * trans_type[] in xlog_recover_print_trans_head() + */ +#define XFS_TRANS_DUMMY1		27 +#define XFS_TRANS_DUMMY2		28 +#define XFS_TRANS_QM_QUOTAOFF		29 +#define XFS_TRANS_QM_DQALLOC		30 +#define XFS_TRANS_QM_SETQLIM		31 +#define XFS_TRANS_QM_DQCLUSTER		32 +#define XFS_TRANS_QM_QINOCREATE		33 +#define XFS_TRANS_QM_QUOTAOFF_END	34 +#define XFS_TRANS_SB_UNIT		35 +#define XFS_TRANS_FSYNC_TS		36 +#define	XFS_TRANS_GROWFSRT_ALLOC	37 +#define	XFS_TRANS_GROWFSRT_ZERO		38 +#define	XFS_TRANS_GROWFSRT_FREE		39 +#define	XFS_TRANS_SWAPEXT		40 +#define	XFS_TRANS_SB_COUNT		41 +#define	XFS_TRANS_CHECKPOINT		42 +#define	XFS_TRANS_ICREATE		43 +#define	XFS_TRANS_CREATE_TMPFILE	44 +#define	XFS_TRANS_TYPE_MAX		44 +/* new transaction types need to be reflected in xfs_logprint(8) */ + +#define XFS_TRANS_TYPES \ +	{ XFS_TRANS_SETATTR_NOT_SIZE,	"SETATTR_NOT_SIZE" }, \ +	{ XFS_TRANS_SETATTR_SIZE,	"SETATTR_SIZE" }, \ +	{ XFS_TRANS_INACTIVE,		"INACTIVE" }, \ +	{ XFS_TRANS_CREATE,		"CREATE" }, \ +	{ XFS_TRANS_CREATE_TMPFILE,	"CREATE_TMPFILE" }, \ +	{ XFS_TRANS_CREATE_TRUNC,	"CREATE_TRUNC" }, \ +	{ XFS_TRANS_TRUNCATE_FILE,	"TRUNCATE_FILE" }, \ +	{ XFS_TRANS_REMOVE,		"REMOVE" }, \ +	{ XFS_TRANS_LINK,		"LINK" }, \ +	{ XFS_TRANS_RENAME,		"RENAME" }, \ +	{ XFS_TRANS_MKDIR,		"MKDIR" }, \ +	{ XFS_TRANS_RMDIR,		"RMDIR" }, \ +	{ XFS_TRANS_SYMLINK,		"SYMLINK" }, \ +	{ XFS_TRANS_SET_DMATTRS,	"SET_DMATTRS" }, \ +	{ XFS_TRANS_GROWFS,		"GROWFS" }, \ +	{ XFS_TRANS_STRAT_WRITE,	"STRAT_WRITE" }, \ +	{ XFS_TRANS_DIOSTRAT,		"DIOSTRAT" }, \ +	{ XFS_TRANS_WRITEID,		"WRITEID" }, \ +	{ XFS_TRANS_ADDAFORK,		"ADDAFORK" }, \ +	{ XFS_TRANS_ATTRINVAL,		"ATTRINVAL" }, \ +	{ XFS_TRANS_ATRUNCATE,		"ATRUNCATE" }, \ +	{ XFS_TRANS_ATTR_SET,		"ATTR_SET" }, \ +	{ XFS_TRANS_ATTR_RM,		"ATTR_RM" }, \ +	{ XFS_TRANS_ATTR_FLAG,		"ATTR_FLAG" }, \ +	{ XFS_TRANS_CLEAR_AGI_BUCKET,	"CLEAR_AGI_BUCKET" }, \ +	{ XFS_TRANS_QM_SBCHANGE,	"QM_SBCHANGE" }, \ +	{ XFS_TRANS_QM_QUOTAOFF,	"QM_QUOTAOFF" }, \ +	{ XFS_TRANS_QM_DQALLOC,		"QM_DQALLOC" }, \ +	{ XFS_TRANS_QM_SETQLIM,		"QM_SETQLIM" }, \ +	{ XFS_TRANS_QM_DQCLUSTER,	"QM_DQCLUSTER" }, \ +	{ XFS_TRANS_QM_QINOCREATE,	"QM_QINOCREATE" }, \ +	{ XFS_TRANS_QM_QUOTAOFF_END,	"QM_QOFF_END" }, \ +	{ XFS_TRANS_SB_UNIT,		"SB_UNIT" }, \ +	{ XFS_TRANS_FSYNC_TS,		"FSYNC_TS" }, \ +	{ XFS_TRANS_GROWFSRT_ALLOC,	"GROWFSRT_ALLOC" }, \ +	{ XFS_TRANS_GROWFSRT_ZERO,	"GROWFSRT_ZERO" }, \ +	{ XFS_TRANS_GROWFSRT_FREE,	"GROWFSRT_FREE" }, \ +	{ XFS_TRANS_SWAPEXT,		"SWAPEXT" }, \ +	{ XFS_TRANS_SB_COUNT,		"SB_COUNT" }, \ +	{ XFS_TRANS_CHECKPOINT,		"CHECKPOINT" }, \ +	{ XFS_TRANS_DUMMY1,		"DUMMY1" }, \ +	{ XFS_TRANS_DUMMY2,		"DUMMY2" }, \ +	{ XLOG_UNMOUNT_REC_TYPE,	"UNMOUNT" } + +/* + * This structure is used to track log items associated with + * a transaction.  It points to the log item and keeps some + * flags to track the state of the log item.  It also tracks + * the amount of space needed to log the item it describes + * once we get to commit processing (see xfs_trans_commit()). + */ +struct xfs_log_item_desc { +	struct xfs_log_item	*lid_item; +	struct list_head	lid_trans; +	unsigned char		lid_flags; +}; + +#define XFS_LID_DIRTY		0x1 + +/* log size calculation functions */ +int	xfs_log_calc_unit_res(struct xfs_mount *mp, int unit_bytes); +int	xfs_log_calc_minimum_size(struct xfs_mount *); + + +/* + * Values for t_flags. + */ +#define	XFS_TRANS_DIRTY		0x01	/* something needs to be logged */ +#define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */ +#define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */ +#define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */ +#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */ +#define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */ +#define XFS_TRANS_FREEZE_PROT	0x40	/* Transaction has elevated writer +					   count in superblock */ +/* + * Values for call flags parameter. + */ +#define	XFS_TRANS_RELEASE_LOG_RES	0x4 +#define	XFS_TRANS_ABORT			0x8 + +/* + * Field values for xfs_trans_mod_sb. + */ +#define	XFS_TRANS_SB_ICOUNT		0x00000001 +#define	XFS_TRANS_SB_IFREE		0x00000002 +#define	XFS_TRANS_SB_FDBLOCKS		0x00000004 +#define	XFS_TRANS_SB_RES_FDBLOCKS	0x00000008 +#define	XFS_TRANS_SB_FREXTENTS		0x00000010 +#define	XFS_TRANS_SB_RES_FREXTENTS	0x00000020 +#define	XFS_TRANS_SB_DBLOCKS		0x00000040 +#define	XFS_TRANS_SB_AGCOUNT		0x00000080 +#define	XFS_TRANS_SB_IMAXPCT		0x00000100 +#define	XFS_TRANS_SB_REXTSIZE		0x00000200 +#define	XFS_TRANS_SB_RBMBLOCKS		0x00000400 +#define	XFS_TRANS_SB_RBLOCKS		0x00000800 +#define	XFS_TRANS_SB_REXTENTS		0x00001000 +#define	XFS_TRANS_SB_REXTSLOG		0x00002000 + +/* + * Here we centralize the specification of XFS meta-data buffer reference count + * values.  This determines how hard the buffer cache tries to hold onto the + * buffer. + */ +#define	XFS_AGF_REF		4 +#define	XFS_AGI_REF		4 +#define	XFS_AGFL_REF		3 +#define	XFS_INO_BTREE_REF	3 +#define	XFS_ALLOC_BTREE_REF	2 +#define	XFS_BMAP_BTREE_REF	2 +#define	XFS_DIR_BTREE_REF	2 +#define	XFS_INO_REF		2 +#define	XFS_ATTR_BTREE_REF	1 +#define	XFS_DQUOT_REF		1 + +/* + * Flags for xfs_trans_ichgtime(). + */ +#define	XFS_ICHGTIME_MOD	0x1	/* data fork modification timestamp */ +#define	XFS_ICHGTIME_CHG	0x2	/* inode field change timestamp */ +#define	XFS_ICHGTIME_CREATE	0x4	/* inode create timestamp */ + + +/* + * Symlink decoding/encoding functions + */ +int xfs_symlink_blocks(struct xfs_mount *mp, int pathlen); +int xfs_symlink_hdr_set(struct xfs_mount *mp, xfs_ino_t ino, uint32_t offset, +			uint32_t size, struct xfs_buf *bp); +bool xfs_symlink_hdr_ok(xfs_ino_t ino, uint32_t offset, +			uint32_t size, struct xfs_buf *bp); +void xfs_symlink_local_to_remote(struct xfs_trans *tp, struct xfs_buf *bp, +				 struct xfs_inode *ip, struct xfs_ifork *ifp); + +#endif /* __XFS_SHARED_H__ */ diff --git a/fs/xfs/xfs_stats.c b/fs/xfs/xfs_stats.c index ce372b7d564..f2240383d4b 100644 --- a/fs/xfs/xfs_stats.c +++ b/fs/xfs/xfs_stats.c @@ -59,6 +59,7 @@ static int xfs_stat_proc_show(struct seq_file *m, void *v)  		{ "abtc2",		XFSSTAT_END_ABTC_V2		},  		{ "bmbt2",		XFSSTAT_END_BMBT_V2		},  		{ "ibt2",		XFSSTAT_END_IBT_V2		}, +		{ "fibt2",		XFSSTAT_END_FIBT_V2		},  		/* we print both series of quota information together */  		{ "qm",			XFSSTAT_END_QM			},  	}; diff --git a/fs/xfs/xfs_stats.h b/fs/xfs/xfs_stats.h index c03ad38ceae..c8f238b8299 100644 --- a/fs/xfs/xfs_stats.h +++ b/fs/xfs/xfs_stats.h @@ -183,7 +183,23 @@ struct xfsstats {  	__uint32_t		xs_ibt_2_alloc;  	__uint32_t		xs_ibt_2_free;  	__uint32_t		xs_ibt_2_moves; -#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_IBT_V2+6) +#define XFSSTAT_END_FIBT_V2		(XFSSTAT_END_IBT_V2+15) +	__uint32_t		xs_fibt_2_lookup; +	__uint32_t		xs_fibt_2_compare; +	__uint32_t		xs_fibt_2_insrec; +	__uint32_t		xs_fibt_2_delrec; +	__uint32_t		xs_fibt_2_newroot; +	__uint32_t		xs_fibt_2_killroot; +	__uint32_t		xs_fibt_2_increment; +	__uint32_t		xs_fibt_2_decrement; +	__uint32_t		xs_fibt_2_lshift; +	__uint32_t		xs_fibt_2_rshift; +	__uint32_t		xs_fibt_2_split; +	__uint32_t		xs_fibt_2_join; +	__uint32_t		xs_fibt_2_alloc; +	__uint32_t		xs_fibt_2_free; +	__uint32_t		xs_fibt_2_moves; +#define XFSSTAT_END_XQMSTAT		(XFSSTAT_END_FIBT_V2+6)  	__uint32_t		xs_qm_dqreclaims;  	__uint32_t		xs_qm_dqreclaim_misses;  	__uint32_t		xs_qm_dquot_dups; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 15188cc9944..8f0333b3f7a 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -17,34 +17,26 @@   */  #include "xfs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_da_format.h"  #include "xfs_inode.h"  #include "xfs_btree.h" -#include "xfs_ialloc.h"  #include "xfs_bmap.h" -#include "xfs_rtalloc.h" +#include "xfs_alloc.h"  #include "xfs_error.h" -#include "xfs_itable.h"  #include "xfs_fsops.h" -#include "xfs_attr.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h" +#include "xfs_log.h"  #include "xfs_log_priv.h" -#include "xfs_trans_priv.h" -#include "xfs_filestream.h"  #include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h"  #include "xfs_extfree_item.h"  #include "xfs_mru_cache.h" @@ -52,6 +44,9 @@  #include "xfs_icache.h"  #include "xfs_trace.h"  #include "xfs_icreate_item.h" +#include "xfs_dinode.h" +#include "xfs_filestream.h" +#include "xfs_quota.h"  #include <linux/namei.h>  #include <linux/init.h> @@ -770,20 +765,18 @@ xfs_open_devices(  	 * Setup xfs_mount buffer target pointers  	 */  	error = ENOMEM; -	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname); +	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev);  	if (!mp->m_ddev_targp)  		goto out_close_rtdev;  	if (rtdev) { -		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1, -							mp->m_fsname); +		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev);  		if (!mp->m_rtdev_targp)  			goto out_free_ddev_targ;  	}  	if (logdev && logdev != ddev) { -		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1, -							mp->m_fsname); +		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev);  		if (!mp->m_logdev_targp)  			goto out_free_rtdev_targ;  	} else { @@ -816,8 +809,7 @@ xfs_setup_devices(  {  	int			error; -	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize, -				    mp->m_sb.sb_sectsize); +	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_sectsize);  	if (error)  		return error; @@ -827,14 +819,12 @@ xfs_setup_devices(  		if (xfs_sb_version_hassector(&mp->m_sb))  			log_sector_size = mp->m_sb.sb_logsectsize;  		error = xfs_setsize_buftarg(mp->m_logdev_targp, -					    mp->m_sb.sb_blocksize,  					    log_sector_size);  		if (error)  			return error;  	}  	if (mp->m_rtdev_targp) {  		error = xfs_setsize_buftarg(mp->m_rtdev_targp, -					    mp->m_sb.sb_blocksize,  					    mp->m_sb.sb_sectsize);  		if (error)  			return error; @@ -946,10 +936,6 @@ xfs_fs_destroy_inode(  	XFS_STATS_INC(vn_reclaim); -	/* bad inode, get out here ASAP */ -	if (is_bad_inode(inode)) -		goto out_reclaim; -  	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);  	/* @@ -965,7 +951,6 @@ xfs_fs_destroy_inode(  	 * this more efficiently than we can here, so simply let background  	 * reclaim tear down all inodes.  	 */ -out_reclaim:  	xfs_inode_set_reclaim_tag(ip);  } @@ -1006,7 +991,7 @@ xfs_fs_evict_inode(  	trace_xfs_evict_inode(ip); -	truncate_inode_pages(&inode->i_data, 0); +	truncate_inode_pages_final(&inode->i_data);  	clear_inode(inode);  	XFS_STATS_INC(vn_rele);  	XFS_STATS_INC(vn_remove); @@ -1165,7 +1150,7 @@ xfs_restore_resvblks(struct xfs_mount *mp)   * Note: xfs_log_quiesce() stops background log work - the callers must ensure   * it is started again when appropriate.   */ -void +static void  xfs_quiesce_attr(  	struct xfs_mount	*mp)  { @@ -1207,6 +1192,7 @@ xfs_fs_remount(  	char			*p;  	int			error; +	sync_filesystem(sb);  	while ((p = strsep(&options, ",")) != NULL) {  		int token; @@ -1246,7 +1232,7 @@ xfs_fs_remount(  			 */  #if 0  			xfs_info(mp, -		"mount option \"%s\" not supported for remount\n", p); +		"mount option \"%s\" not supported for remount", p);  			return -EINVAL;  #else  			break; @@ -1442,11 +1428,11 @@ xfs_fs_fill_super(  	if (error)  		goto out_free_fsname; -	error = xfs_init_mount_workqueues(mp); +	error = -xfs_init_mount_workqueues(mp);  	if (error)  		goto out_close_devices; -	error = xfs_icsb_init_counters(mp); +	error = -xfs_icsb_init_counters(mp);  	if (error)  		goto out_destroy_workqueues; @@ -1491,10 +1477,6 @@ xfs_fs_fill_super(  		error = ENOENT;  		goto out_unmount;  	} -	if (is_bad_inode(root)) { -		error = EINVAL; -		goto out_unmount; -	}  	sb->s_root = d_make_root(root);  	if (!sb->s_root) {  		error = ENOMEM; @@ -1767,13 +1749,9 @@ init_xfs_fs(void)  	if (error)  		goto out_destroy_wq; -	error = xfs_filestream_init(); -	if (error) -		goto out_mru_cache_uninit; -  	error = xfs_buf_init();  	if (error) -		goto out_filestream_uninit; +		goto out_mru_cache_uninit;  	error = xfs_init_procfs();  	if (error) @@ -1800,8 +1778,6 @@ init_xfs_fs(void)  	xfs_cleanup_procfs();   out_buf_terminate:  	xfs_buf_terminate(); - out_filestream_uninit: -	xfs_filestream_uninit();   out_mru_cache_uninit:  	xfs_mru_cache_uninit();   out_destroy_wq: @@ -1820,7 +1796,6 @@ exit_xfs_fs(void)  	xfs_sysctl_unregister();  	xfs_cleanup_procfs();  	xfs_buf_terminate(); -	xfs_filestream_uninit();  	xfs_mru_cache_uninit();  	xfs_destroy_workqueues();  	xfs_destroy_zones(); diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index f622a97a7e3..d69363c833e 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -17,31 +17,32 @@   * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA   */  #include "xfs.h" +#include "xfs_shared.h"  #include "xfs_fs.h"  #include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_dir2_format.h"  #include "xfs_dir2.h" -#include "xfs_bmap_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h"  #include "xfs_ialloc.h"  #include "xfs_alloc.h"  #include "xfs_bmap.h" +#include "xfs_bmap_btree.h"  #include "xfs_bmap_util.h"  #include "xfs_error.h"  #include "xfs_quota.h"  #include "xfs_trans_space.h"  #include "xfs_trace.h"  #include "xfs_symlink.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h" +#include "xfs_log.h" +#include "xfs_dinode.h"  /* ----- Kernel only functions below ----- */  STATIC int @@ -80,6 +81,10 @@ xfs_readlink_bmap(  		if (error) {  			xfs_buf_ioerror_alert(bp, __func__);  			xfs_buf_relse(bp); + +			/* bad CRC means corrupted metadata */ +			if (error == EFSBADCRC) +				error = EFSCORRUPTED;  			goto out;  		}  		byte_cnt = XFS_SYMLINK_BUF_SPACE(mp, byte_cnt); @@ -88,7 +93,7 @@ xfs_readlink_bmap(  		cur_chunk = bp->b_addr;  		if (xfs_sb_version_hascrc(&mp->m_sb)) { -			if (!xfs_symlink_hdr_ok(mp, ip->i_ino, offset, +			if (!xfs_symlink_hdr_ok(ip->i_ino, offset,  							byte_cnt, bp)) {  				error = EFSCORRUPTED;  				xfs_alert(mp, @@ -208,10 +213,7 @@ xfs_symlink(  		return XFS_ERROR(ENAMETOOLONG);  	udqp = gdqp = NULL; -	if (dp->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) -		prid = xfs_get_projid(dp); -	else -		prid = XFS_PROJID_DEFAULT; +	prid = xfs_get_initial_prid(dp);  	/*  	 * Make sure that we have allocated dquot(s) on disk. @@ -424,8 +426,7 @@ xfs_symlink(   */  STATIC int  xfs_inactive_symlink_rmt( -	xfs_inode_t	*ip, -	xfs_trans_t	**tpp) +	struct xfs_inode *ip)  {  	xfs_buf_t	*bp;  	int		committed; @@ -437,11 +438,9 @@ xfs_inactive_symlink_rmt(  	xfs_mount_t	*mp;  	xfs_bmbt_irec_t	mval[XFS_SYMLINK_MAPS];  	int		nmaps; -	xfs_trans_t	*ntp;  	int		size;  	xfs_trans_t	*tp; -	tp = *tpp;  	mp = ip->i_mount;  	ASSERT(ip->i_df.if_flags & XFS_IFEXTENTS);  	/* @@ -453,6 +452,16 @@ xfs_inactive_symlink_rmt(  	 */  	ASSERT(ip->i_d.di_nextents > 0 && ip->i_d.di_nextents <= 2); +	tp = xfs_trans_alloc(mp, XFS_TRANS_INACTIVE); +	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); +	if (error) { +		xfs_trans_cancel(tp, 0); +		return error; +	} + +	xfs_ilock(ip, XFS_ILOCK_EXCL); +	xfs_trans_ijoin(tp, ip, 0); +  	/*  	 * Lock the inode, fix the size, and join it to the transaction.  	 * Hold it so in the normal path, we still have it locked for @@ -471,7 +480,7 @@ xfs_inactive_symlink_rmt(  	error = xfs_bmapi_read(ip, 0, xfs_symlink_blocks(mp, size),  				mval, &nmaps, 0);  	if (error) -		goto error0; +		goto error_trans_cancel;  	/*  	 * Invalidate the block(s). No validation is done.  	 */ @@ -481,22 +490,24 @@ xfs_inactive_symlink_rmt(  			XFS_FSB_TO_BB(mp, mval[i].br_blockcount), 0);  		if (!bp) {  			error = ENOMEM; -			goto error1; +			goto error_bmap_cancel;  		}  		xfs_trans_binval(tp, bp);  	}  	/*  	 * Unmap the dead block(s) to the free_list.  	 */ -	if ((error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, -			&first_block, &free_list, &done))) -		goto error1; +	error = xfs_bunmapi(tp, ip, 0, size, XFS_BMAPI_METADATA, nmaps, +			    &first_block, &free_list, &done); +	if (error) +		goto error_bmap_cancel;  	ASSERT(done);  	/*  	 * Commit the first transaction.  This logs the EFI and the inode.  	 */ -	if ((error = xfs_bmap_finish(&tp, &free_list, &committed))) -		goto error1; +	error = xfs_bmap_finish(&tp, &free_list, &committed); +	if (error) +		goto error_bmap_cancel;  	/*  	 * The transaction must have been committed, since there were  	 * actually extents freed by xfs_bunmapi.  See xfs_bmap_finish. @@ -511,26 +522,13 @@ xfs_inactive_symlink_rmt(  	xfs_trans_ijoin(tp, ip, 0);  	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);  	/* -	 * Get a new, empty transaction to return to our caller. -	 */ -	ntp = xfs_trans_dup(tp); -	/*  	 * Commit the transaction containing extent freeing and EFDs. -	 * If we get an error on the commit here or on the reserve below, -	 * we need to unlock the inode since the new transaction doesn't -	 * have the inode attached.  	 */ -	error = xfs_trans_commit(tp, 0); -	tp = ntp; +	error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);  	if (error) {  		ASSERT(XFS_FORCED_SHUTDOWN(mp)); -		goto error0; +		goto error_unlock;  	} -	/* -	 * transaction commit worked ok so we can drop the extra ticket -	 * reference that we gained in xfs_trans_dup() -	 */ -	xfs_log_ticket_put(tp->t_ticket);  	/*  	 * Remove the memory for extent descriptions (just bookkeeping). @@ -538,23 +536,16 @@ xfs_inactive_symlink_rmt(  	if (ip->i_df.if_bytes)  		xfs_idata_realloc(ip, -ip->i_df.if_bytes, XFS_DATA_FORK);  	ASSERT(ip->i_df.if_bytes == 0); -	/* -	 * Put an itruncate log reservation in the new transaction -	 * for our caller. -	 */ -	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_itruncate, 0, 0); -	if (error) { -		ASSERT(XFS_FORCED_SHUTDOWN(mp)); -		goto error0; -	} -	xfs_trans_ijoin(tp, ip, 0); -	*tpp = tp; +	xfs_iunlock(ip, XFS_ILOCK_EXCL);  	return 0; - error1: +error_bmap_cancel:  	xfs_bmap_cancel(&free_list); - error0: +error_trans_cancel: +	xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT); +error_unlock: +	xfs_iunlock(ip, XFS_ILOCK_EXCL);  	return error;  } @@ -563,41 +554,46 @@ xfs_inactive_symlink_rmt(   */  int  xfs_inactive_symlink( -	struct xfs_inode	*ip, -	struct xfs_trans	**tp) +	struct xfs_inode	*ip)  {  	struct xfs_mount	*mp = ip->i_mount;  	int			pathlen;  	trace_xfs_inactive_symlink(ip); -	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); -  	if (XFS_FORCED_SHUTDOWN(mp))  		return XFS_ERROR(EIO); +	xfs_ilock(ip, XFS_ILOCK_EXCL); +  	/*  	 * Zero length symlinks _can_ exist.  	 */  	pathlen = (int)ip->i_d.di_size; -	if (!pathlen) +	if (!pathlen) { +		xfs_iunlock(ip, XFS_ILOCK_EXCL);  		return 0; +	}  	if (pathlen < 0 || pathlen > MAXPATHLEN) {  		xfs_alert(mp, "%s: inode (0x%llx) bad symlink length (%d)",  			 __func__, (unsigned long long)ip->i_ino, pathlen); +		xfs_iunlock(ip, XFS_ILOCK_EXCL);  		ASSERT(0);  		return XFS_ERROR(EFSCORRUPTED);  	}  	if (ip->i_df.if_flags & XFS_IFINLINE) { -		if (ip->i_df.if_bytes > 0) +		if (ip->i_df.if_bytes > 0)   			xfs_idata_realloc(ip, -(ip->i_df.if_bytes),  					  XFS_DATA_FORK); +		xfs_iunlock(ip, XFS_ILOCK_EXCL);  		ASSERT(ip->i_df.if_bytes == 0);  		return 0;  	} +	xfs_iunlock(ip, XFS_ILOCK_EXCL); +  	/* remove the remote symlink */ -	return xfs_inactive_symlink_rmt(ip, tp); +	return xfs_inactive_symlink_rmt(ip);  } diff --git a/fs/xfs/xfs_symlink.h b/fs/xfs/xfs_symlink.h index 99338ba666a..e75245d0911 100644 --- a/fs/xfs/xfs_symlink.h +++ b/fs/xfs/xfs_symlink.h @@ -22,6 +22,6 @@  int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,  		const char *target_path, umode_t mode, struct xfs_inode **ipp);  int xfs_readlink(struct xfs_inode *ip, char *link); -int xfs_inactive_symlink(struct xfs_inode *ip, struct xfs_trans **tpp); +int xfs_inactive_symlink(struct xfs_inode *ip);  #endif /* __XFS_SYMLINK_H */ diff --git a/fs/xfs/xfs_symlink_remote.c b/fs/xfs/xfs_symlink_remote.c index 01c85e3f647..23c2f2577c8 100644 --- a/fs/xfs/xfs_symlink_remote.c +++ b/fs/xfs/xfs_symlink_remote.c @@ -19,8 +19,9 @@  #include "xfs.h"  #include "xfs_fs.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h"  #include "xfs_ag.h"  #include "xfs_sb.h"  #include "xfs_mount.h" @@ -30,6 +31,7 @@  #include "xfs_trace.h"  #include "xfs_symlink.h"  #include "xfs_cksum.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h" @@ -78,7 +80,6 @@ xfs_symlink_hdr_set(   */  bool  xfs_symlink_hdr_ok( -	struct xfs_mount	*mp,  	xfs_ino_t		ino,  	uint32_t		offset,  	uint32_t		size, @@ -131,12 +132,13 @@ xfs_symlink_read_verify(  	if (!xfs_sb_version_hascrc(&mp->m_sb))  		return; -	if (!xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -				  offsetof(struct xfs_dsymlink_hdr, sl_crc)) || -	    !xfs_symlink_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr); +	if (!xfs_buf_verify_cksum(bp, XFS_SYMLINK_CRC_OFF)) +		xfs_buf_ioerror(bp, EFSBADCRC); +	else if (!xfs_symlink_verify(bp))  		xfs_buf_ioerror(bp, EFSCORRUPTED); -	} + +	if (bp->b_error) +		xfs_verifier_error(bp);  }  static void @@ -151,8 +153,8 @@ xfs_symlink_write_verify(  		return;  	if (!xfs_symlink_verify(bp)) { -		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, bp->b_addr);  		xfs_buf_ioerror(bp, EFSCORRUPTED); +		xfs_verifier_error(bp);  		return;  	} @@ -160,8 +162,7 @@ xfs_symlink_write_verify(  		struct xfs_dsymlink_hdr *dsl = bp->b_addr;  		dsl->sl_lsn = cpu_to_be64(bip->bli_item.li_lsn);  	} -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 offsetof(struct xfs_dsymlink_hdr, sl_crc)); +	xfs_buf_update_cksum(bp, XFS_SYMLINK_CRC_OFF);  }  const struct xfs_buf_ops xfs_symlink_buf_ops = { diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 5d7b3e40705..1e85bcd0e41 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -17,19 +17,16 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h" +#include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_inode.h"  #include "xfs_btree.h" -#include "xfs_mount.h"  #include "xfs_da_btree.h"  #include "xfs_ialloc.h"  #include "xfs_itable.h" @@ -37,6 +34,8 @@  #include "xfs_bmap.h"  #include "xfs_attr.h"  #include "xfs_attr_leaf.h" +#include "xfs_trans.h" +#include "xfs_log.h"  #include "xfs_log_priv.h"  #include "xfs_buf_item.h"  #include "xfs_quota.h" @@ -46,6 +45,8 @@  #include "xfs_dquot.h"  #include "xfs_log_recover.h"  #include "xfs_inode_item.h" +#include "xfs_bmap_btree.h" +#include "xfs_filestream.h"  /*   * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 47910e638c1..152f8278263 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -31,8 +31,8 @@ struct xfs_da_args;  struct xfs_da_node_entry;  struct xfs_dquot;  struct xfs_log_item; -struct xlog_ticket;  struct xlog; +struct xlog_ticket;  struct xlog_recover;  struct xlog_recover_item;  struct xfs_buf_log_format; @@ -135,6 +135,31 @@ DEFINE_PERAG_REF_EVENT(xfs_perag_clear_reclaim);  DEFINE_PERAG_REF_EVENT(xfs_perag_set_eofblocks);  DEFINE_PERAG_REF_EVENT(xfs_perag_clear_eofblocks); +DECLARE_EVENT_CLASS(xfs_ag_class, +	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno), +	TP_ARGS(mp, agno), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(xfs_agnumber_t, agno) +	), +	TP_fast_assign( +		__entry->dev = mp->m_super->s_dev; +		__entry->agno = agno; +	), +	TP_printk("dev %d:%d agno %u", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  __entry->agno) +); +#define DEFINE_AG_EVENT(name)	\ +DEFINE_EVENT(xfs_ag_class, name,	\ +	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno),	\ +	TP_ARGS(mp, agno)) + +DEFINE_AG_EVENT(xfs_read_agf); +DEFINE_AG_EVENT(xfs_alloc_read_agf); +DEFINE_AG_EVENT(xfs_read_agi); +DEFINE_AG_EVENT(xfs_ialloc_read_agi); +  TRACE_EVENT(xfs_attr_list_node_descend,  	TP_PROTO(struct xfs_attr_list_context *ctx,  		 struct xfs_da_node_entry *btree), @@ -513,6 +538,64 @@ DEFINE_BUF_ITEM_EVENT(xfs_trans_bhold_release);  DEFINE_BUF_ITEM_EVENT(xfs_trans_binval);  DEFINE_BUF_ITEM_EVENT(xfs_trans_buf_ordered); +DECLARE_EVENT_CLASS(xfs_filestream_class, +	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), +	TP_ARGS(ip, agno), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(xfs_ino_t, ino) +		__field(xfs_agnumber_t, agno) +		__field(int, streams) +	), +	TP_fast_assign( +		__entry->dev = VFS_I(ip)->i_sb->s_dev; +		__entry->ino = ip->i_ino; +		__entry->agno = agno; +		__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); +	), +	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  __entry->ino, +		  __entry->agno, +		  __entry->streams) +) +#define DEFINE_FILESTREAM_EVENT(name) \ +DEFINE_EVENT(xfs_filestream_class, name, \ +	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno), \ +	TP_ARGS(ip, agno)) +DEFINE_FILESTREAM_EVENT(xfs_filestream_free); +DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup); +DEFINE_FILESTREAM_EVENT(xfs_filestream_scan); + +TRACE_EVENT(xfs_filestream_pick, +	TP_PROTO(struct xfs_inode *ip, xfs_agnumber_t agno, +		 xfs_extlen_t free, int nscan), +	TP_ARGS(ip, agno, free, nscan), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(xfs_ino_t, ino) +		__field(xfs_agnumber_t, agno) +		__field(int, streams) +		__field(xfs_extlen_t, free) +		__field(int, nscan) +	), +	TP_fast_assign( +		__entry->dev = VFS_I(ip)->i_sb->s_dev; +		__entry->ino = ip->i_ino; +		__entry->agno = agno; +		__entry->streams = xfs_filestream_peek_ag(ip->i_mount, agno); +		__entry->free = free; +		__entry->nscan = nscan; +	), +	TP_printk("dev %d:%d ino 0x%llx agno %u streams %d free %d nscan %d", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  __entry->ino, +		  __entry->agno, +		  __entry->streams, +		  __entry->free, +		  __entry->nscan) +); +  DECLARE_EVENT_CLASS(xfs_lock_class,  	TP_PROTO(struct xfs_inode *ip, unsigned lock_flags,  		 unsigned long caller_ip), @@ -578,6 +661,8 @@ DEFINE_INODE_EVENT(xfs_readlink);  DEFINE_INODE_EVENT(xfs_inactive_symlink);  DEFINE_INODE_EVENT(xfs_alloc_file_space);  DEFINE_INODE_EVENT(xfs_free_file_space); +DEFINE_INODE_EVENT(xfs_zero_file_space); +DEFINE_INODE_EVENT(xfs_collapse_file_space);  DEFINE_INODE_EVENT(xfs_readdir);  #ifdef CONFIG_XFS_POSIX_ACL  DEFINE_INODE_EVENT(xfs_get_acl); @@ -938,6 +1023,63 @@ DEFINE_LOG_ITEM_EVENT(xfs_ail_pinned);  DEFINE_LOG_ITEM_EVENT(xfs_ail_locked);  DEFINE_LOG_ITEM_EVENT(xfs_ail_flushing); +DECLARE_EVENT_CLASS(xfs_ail_class, +	TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), +	TP_ARGS(lip, old_lsn, new_lsn), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(void *, lip) +		__field(uint, type) +		__field(uint, flags) +		__field(xfs_lsn_t, old_lsn) +		__field(xfs_lsn_t, new_lsn) +	), +	TP_fast_assign( +		__entry->dev = lip->li_mountp->m_super->s_dev; +		__entry->lip = lip; +		__entry->type = lip->li_type; +		__entry->flags = lip->li_flags; +		__entry->old_lsn = old_lsn; +		__entry->new_lsn = new_lsn; +	), +	TP_printk("dev %d:%d lip 0x%p old lsn %d/%d new lsn %d/%d type %s flags %s", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  __entry->lip, +		  CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), +		  CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), +		  __print_symbolic(__entry->type, XFS_LI_TYPE_DESC), +		  __print_flags(__entry->flags, "|", XFS_LI_FLAGS)) +) + +#define DEFINE_AIL_EVENT(name) \ +DEFINE_EVENT(xfs_ail_class, name, \ +	TP_PROTO(struct xfs_log_item *lip, xfs_lsn_t old_lsn, xfs_lsn_t new_lsn), \ +	TP_ARGS(lip, old_lsn, new_lsn)) +DEFINE_AIL_EVENT(xfs_ail_insert); +DEFINE_AIL_EVENT(xfs_ail_move); +DEFINE_AIL_EVENT(xfs_ail_delete); + +TRACE_EVENT(xfs_log_assign_tail_lsn, +	TP_PROTO(struct xlog *log, xfs_lsn_t new_lsn), +	TP_ARGS(log, new_lsn), +	TP_STRUCT__entry( +		__field(dev_t, dev) +		__field(xfs_lsn_t, new_lsn) +		__field(xfs_lsn_t, old_lsn) +		__field(xfs_lsn_t, last_sync_lsn) +	), +	TP_fast_assign( +		__entry->dev = log->l_mp->m_super->s_dev; +		__entry->new_lsn = new_lsn; +		__entry->old_lsn = atomic64_read(&log->l_tail_lsn); +		__entry->last_sync_lsn = atomic64_read(&log->l_last_sync_lsn); +	), +	TP_printk("dev %d:%d new tail lsn %d/%d, old lsn %d/%d, last sync %d/%d", +		  MAJOR(__entry->dev), MINOR(__entry->dev), +		  CYCLE_LSN(__entry->new_lsn), BLOCK_LSN(__entry->new_lsn), +		  CYCLE_LSN(__entry->old_lsn), BLOCK_LSN(__entry->old_lsn), +		  CYCLE_LSN(__entry->last_sync_lsn), BLOCK_LSN(__entry->last_sync_lsn)) +)  DECLARE_EVENT_CLASS(xfs_file_class,  	TP_PROTO(struct xfs_inode *ip, size_t count, loff_t offset, int flags), @@ -976,7 +1118,6 @@ DEFINE_RW_EVENT(xfs_file_read);  DEFINE_RW_EVENT(xfs_file_buffered_write);  DEFINE_RW_EVENT(xfs_file_direct_write);  DEFINE_RW_EVENT(xfs_file_splice_read); -DEFINE_RW_EVENT(xfs_file_splice_write);  DECLARE_EVENT_CLASS(xfs_page_class,  	TP_PROTO(struct inode *inode, struct page *page, unsigned long off, diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index 5411e01ab45..d03932564cc 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -18,32 +18,21 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_error.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_btree.h" -#include "xfs_ialloc.h" -#include "xfs_alloc.h"  #include "xfs_extent_busy.h" -#include "xfs_bmap.h"  #include "xfs_quota.h" -#include "xfs_qm.h" +#include "xfs_trans.h"  #include "xfs_trans_priv.h" -#include "xfs_trans_space.h" -#include "xfs_inode_item.h" -#include "xfs_log_priv.h" -#include "xfs_buf_item.h" +#include "xfs_log.h"  #include "xfs_trace.h" +#include "xfs_error.h"  kmem_zone_t	*xfs_trans_zone;  kmem_zone_t	*xfs_log_item_desc_zone; @@ -838,7 +827,7 @@ xfs_trans_committed_bulk(  		xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);  	spin_lock(&ailp->xa_lock); -	xfs_trans_ail_cursor_done(ailp, &cur); +	xfs_trans_ail_cursor_done(&cur);  	spin_unlock(&ailp->xa_lock);  } @@ -898,12 +887,7 @@ xfs_trans_commit(  		xfs_trans_apply_sb_deltas(tp);  	xfs_trans_apply_dquot_deltas(tp); -	error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); -	if (error == ENOMEM) { -		xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); -		error = XFS_ERROR(EIO); -		goto out_unreserve; -	} +	xfs_log_commit_cil(mp, tp, &commit_lsn, flags);  	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);  	xfs_trans_free(tp); @@ -913,10 +897,7 @@ xfs_trans_commit(  	 * log out now and wait for it.  	 */  	if (sync) { -		if (!error) { -			error = _xfs_log_force_lsn(mp, commit_lsn, -				      XFS_LOG_SYNC, NULL); -		} +		error = _xfs_log_force_lsn(mp, commit_lsn, XFS_LOG_SYNC, NULL);  		XFS_STATS_INC(xs_trans_sync);  	} else {  		XFS_STATS_INC(xs_trans_async); diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 09cf40b89e8..b5bc1ab3c4d 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -18,10 +18,6 @@  #ifndef	__XFS_TRANS_H__  #define	__XFS_TRANS_H__ -struct xfs_log_item; - -#include "xfs_trans_resv.h" -  /* kernel only transaction subsystem defines */  struct xfs_buf; @@ -68,7 +64,7 @@ typedef struct xfs_log_item {  struct xfs_item_ops {  	void (*iop_size)(xfs_log_item_t *, int *, int *); -	void (*iop_format)(xfs_log_item_t *, struct xfs_log_iovec *); +	void (*iop_format)(xfs_log_item_t *, struct xfs_log_vec *);  	void (*iop_pin)(xfs_log_item_t *);  	void (*iop_unpin)(xfs_log_item_t *, int remove);  	uint (*iop_push)(struct xfs_log_item *, struct list_head *); @@ -77,6 +73,9 @@ struct xfs_item_ops {  	void (*iop_committing)(xfs_log_item_t *, xfs_lsn_t);  }; +void	xfs_log_item_init(struct xfs_mount *mp, struct xfs_log_item *item, +			  int type, const struct xfs_item_ops *ops); +  /*   * Return values for the iop_push() routines.   */ @@ -85,18 +84,12 @@ struct xfs_item_ops {  #define XFS_ITEM_LOCKED		2  #define XFS_ITEM_FLUSHING	3 -/* - * This is the type of function which can be given to xfs_trans_callback() - * to be called upon the transaction's commit to disk. - */ -typedef void (*xfs_trans_callback_t)(struct xfs_trans *, void *);  /*   * This is the structure maintained for every active transaction.   */  typedef struct xfs_trans {  	unsigned int		t_magic;	/* magic number */ -	xfs_log_callback_t	t_logcb;	/* log callback struct */  	unsigned int		t_type;		/* transaction type */  	unsigned int		t_log_res;	/* amt of log space resvd */  	unsigned int		t_log_count;	/* count for perm log res */ @@ -132,7 +125,6 @@ typedef struct xfs_trans {  	int64_t			t_rextents_delta;/* superblocks rextents chg */  	int64_t			t_rextslog_delta;/* superblocks rextslog chg */  	struct list_head	t_items;	/* log item descriptors */ -	xfs_trans_header_t	t_header;	/* header for in-log trans */  	struct list_head	t_busy;		/* list of busy extents */  	unsigned long		t_pflags;	/* saved process flags state */  } xfs_trans_t; @@ -237,10 +229,16 @@ void		xfs_trans_log_efd_extent(xfs_trans_t *,  					 xfs_fsblock_t,  					 xfs_extlen_t);  int		xfs_trans_commit(xfs_trans_t *, uint flags); +int		xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);  void		xfs_trans_cancel(xfs_trans_t *, int);  int		xfs_trans_ail_init(struct xfs_mount *);  void		xfs_trans_ail_destroy(struct xfs_mount *); +void		xfs_trans_buf_set_type(struct xfs_trans *, struct xfs_buf *, +				       enum xfs_blft); +void		xfs_trans_buf_copy_type(struct xfs_buf *dst_bp, +					struct xfs_buf *src_bp); +  extern kmem_zone_t	*xfs_trans_zone;  extern kmem_zone_t	*xfs_log_item_desc_zone; diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c index 21c6d7ddbc0..cb0f3a84cc6 100644 --- a/fs/xfs/xfs_trans_ail.c +++ b/fs/xfs/xfs_trans_ail.c @@ -18,15 +18,16 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_trans.h"  #include "xfs_trans_priv.h"  #include "xfs_trace.h"  #include "xfs_error.h" +#include "xfs_log.h"  #ifdef DEBUG  /* @@ -172,7 +173,6 @@ xfs_trans_ail_cursor_next(   */  void  xfs_trans_ail_cursor_done( -	struct xfs_ail		*ailp,  	struct xfs_ail_cursor	*cur)  {  	cur->item = NULL; @@ -367,7 +367,7 @@ xfsaild_push(  		 * If the AIL is empty or our push has reached the end we are  		 * done now.  		 */ -		xfs_trans_ail_cursor_done(ailp, &cur); +		xfs_trans_ail_cursor_done(&cur);  		spin_unlock(&ailp->xa_lock);  		goto out_done;  	} @@ -452,7 +452,7 @@ xfsaild_push(  			break;  		lsn = lip->li_lsn;  	} -	xfs_trans_ail_cursor_done(ailp, &cur); +	xfs_trans_ail_cursor_done(&cur);  	spin_unlock(&ailp->xa_lock);  	if (xfs_buf_delwri_submit_nowait(&ailp->xa_buf_list)) @@ -658,11 +658,13 @@ xfs_trans_ail_update_bulk(  			if (XFS_LSN_CMP(lsn, lip->li_lsn) <= 0)  				continue; +			trace_xfs_ail_move(lip, lip->li_lsn, lsn);  			xfs_ail_delete(ailp, lip);  			if (mlip == lip)  				mlip_changed = 1;  		} else {  			lip->li_flags |= XFS_LI_IN_AIL; +			trace_xfs_ail_insert(lip, 0, lsn);  		}  		lip->li_lsn = lsn;  		list_add(&lip->li_ail, &tmp); @@ -731,6 +733,7 @@ xfs_trans_ail_delete_bulk(  			return;  		} +		trace_xfs_ail_delete(lip, mlip->li_lsn, lip->li_lsn);  		xfs_ail_delete(ailp, lip);  		lip->li_flags &= ~XFS_LI_IN_AIL;  		lip->li_lsn = 0; diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 8c75b8f6727..b8eef0549f3 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -17,17 +17,15 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_buf_item.h"  #include "xfs_trans_priv.h"  #include "xfs_error.h" @@ -277,6 +275,10 @@ xfs_trans_read_buf_map(  			XFS_BUF_UNDONE(bp);  			xfs_buf_stale(bp);  			xfs_buf_relse(bp); + +			/* bad CRC means corrupted metadata */ +			if (error == EFSBADCRC) +				error = EFSCORRUPTED;  			return error;  		}  #ifdef DEBUG @@ -316,7 +318,18 @@ xfs_trans_read_buf_map(  			ASSERT(bp->b_iodone == NULL);  			XFS_BUF_READ(bp);  			bp->b_ops = ops; -			xfsbdstrat(tp->t_mountp, bp); + +			/* +			 * XXX(hch): clean up the error handling here to be less +			 * of a mess.. +			 */ +			if (XFS_FORCED_SHUTDOWN(mp)) { +				trace_xfs_bdstrat_shut(bp, _RET_IP_); +				xfs_bioerror_relse(bp); +			} else { +				xfs_buf_iorequest(bp); +			} +  			error = xfs_buf_iowait(bp);  			if (error) {  				xfs_buf_ioerror_alert(bp, __func__); @@ -329,6 +342,9 @@ xfs_trans_read_buf_map(  				if (tp->t_flags & XFS_TRANS_DIRTY)  					xfs_force_shutdown(tp->t_mountp,  							SHUTDOWN_META_IO_ERROR); +				/* bad CRC means corrupted metadata */ +				if (error == EFSBADCRC) +					error = EFSCORRUPTED;  				return error;  			}  		} @@ -366,6 +382,10 @@ xfs_trans_read_buf_map(  		if (tp->t_flags & XFS_TRANS_DIRTY)  			xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);  		xfs_buf_relse(bp); + +		/* bad CRC means corrupted metadata */ +		if (error == EFSBADCRC) +			error = EFSCORRUPTED;  		return error;  	}  #ifdef DEBUG diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 54ee3c5dee7..41172861e85 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -17,23 +17,18 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h" -#include "xfs_alloc.h" -#include "xfs_quota.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h"  #include "xfs_inode.h" -#include "xfs_itable.h" -#include "xfs_bmap.h" -#include "xfs_rtalloc.h"  #include "xfs_error.h" -#include "xfs_attr.h" -#include "xfs_buf_item.h" +#include "xfs_trans.h"  #include "xfs_trans_priv.h" +#include "xfs_quota.h"  #include "xfs_qm.h"  STATIC void	xfs_trans_alloc_dqinfo(xfs_trans_t *); @@ -300,8 +295,8 @@ xfs_trans_mod_dquot(  /*   * Given an array of dqtrx structures, lock all the dquots associated and join   * them to the transaction, provided they have been modified.  We know that the - * highest number of dquots of one type - usr, grp OR prj - involved in a - * transaction is 2 so we don't need to make this very generic. + * highest number of dquots of one type - usr, grp and prj - involved in a + * transaction is 3 so we don't need to make this very generic.   */  STATIC void  xfs_trans_dqlockedjoin( diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 8d71b16ecca..47978ba89da 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -17,12 +17,13 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" +#include "xfs_trans.h"  #include "xfs_trans_priv.h"  #include "xfs_extfree_item.h" diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c index 53dfe46f368..50c3f561428 100644 --- a/fs/xfs/xfs_trans_inode.c +++ b/fs/xfs/xfs_trans_inode.c @@ -17,18 +17,15 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_btree.h" +#include "xfs_trans.h"  #include "xfs_trans_priv.h"  #include "xfs_inode_item.h"  #include "xfs_trace.h" @@ -114,12 +111,14 @@ xfs_trans_log_inode(  	/*  	 * First time we log the inode in a transaction, bump the inode change -	 * counter if it is configured for this to occur. +	 * counter if it is configured for this to occur. We don't use +	 * inode_inc_version() because there is no need for extra locking around +	 * i_version as we already hold the inode locked exclusively for +	 * metadata modification.  	 */  	if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&  	    IS_I_VERSION(VFS_I(ip))) { -		inode_inc_iversion(VFS_I(ip)); -		ip->i_d.di_changecount = VFS_I(ip)->i_version; +		ip->i_d.di_changecount = ++VFS_I(ip)->i_version;  		flags |= XFS_ILOG_CORE;  	} diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h index c52def0b441..bd1281862ad 100644 --- a/fs/xfs/xfs_trans_priv.h +++ b/fs/xfs/xfs_trans_priv.h @@ -27,7 +27,6 @@ struct xfs_log_vec;  void	xfs_trans_init(struct xfs_mount *); -int	xfs_trans_roll(struct xfs_trans **, struct xfs_inode *);  void	xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);  void	xfs_trans_del_item(struct xfs_log_item *);  void	xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn, @@ -134,8 +133,7 @@ struct xfs_log_item *	xfs_trans_ail_cursor_last(struct xfs_ail *ailp,  					xfs_lsn_t lsn);  struct xfs_log_item *	xfs_trans_ail_cursor_next(struct xfs_ail *ailp,  					struct xfs_ail_cursor *cur); -void			xfs_trans_ail_cursor_done(struct xfs_ail *ailp, -					struct xfs_ail_cursor *cur); +void			xfs_trans_ail_cursor_done(struct xfs_ail_cursor *cur);  #if BITS_PER_LONG != 64  static inline void diff --git a/fs/xfs/xfs_trans_resv.c b/fs/xfs/xfs_trans_resv.c index a65a3cc4061..f2bda7c76b8 100644 --- a/fs/xfs/xfs_trans_resv.c +++ b/fs/xfs/xfs_trans_resv.c @@ -18,27 +18,20 @@   */  #include "xfs.h"  #include "xfs_fs.h" +#include "xfs_shared.h"  #include "xfs_format.h" -#include "xfs_log.h" +#include "xfs_log_format.h"  #include "xfs_trans_resv.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_error.h" +#include "xfs_da_format.h"  #include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" -#include "xfs_btree.h" +#include "xfs_bmap_btree.h"  #include "xfs_ialloc.h" -#include "xfs_alloc.h" -#include "xfs_extent_busy.h" -#include "xfs_bmap.h" -#include "xfs_bmap_util.h"  #include "xfs_quota.h" +#include "xfs_trans.h"  #include "xfs_qm.h"  #include "xfs_trans_space.h"  #include "xfs_trace.h" @@ -89,20 +82,69 @@ xfs_calc_buf_res(   * on disk. Hence we need an inode reservation function that calculates all this   * correctly. So, we log:   * - * - log op headers for object + * - 4 log op headers for object + *	- for the ilf, the inode core and 2 forks   * - inode log format object - * - the entire inode contents (core + 2 forks) - * - two bmap btree block headers + * - the inode core + * - two inode forks containing bmap btree root blocks. + *	- the btree data contained by both forks will fit into the inode size, + *	  hence when combined with the inode core above, we have a total of the + *	  actual inode size. + *	- the BMBT headers need to be accounted separately, as they are + *	  additional to the records and pointers that fit inside the inode + *	  forks.   */  STATIC uint  xfs_calc_inode_res(  	struct xfs_mount	*mp,  	uint			ninodes)  { -	return ninodes * (sizeof(struct xlog_op_header) + -			  sizeof(struct xfs_inode_log_format) + -			  mp->m_sb.sb_inodesize + -			  2 * XFS_BMBT_BLOCK_LEN(mp)); +	return ninodes * +		(4 * sizeof(struct xlog_op_header) + +		 sizeof(struct xfs_inode_log_format) + +		 mp->m_sb.sb_inodesize + +		 2 * XFS_BMBT_BLOCK_LEN(mp)); +} + +/* + * The free inode btree is a conditional feature and the log reservation + * requirements differ slightly from that of the traditional inode allocation + * btree. The finobt tracks records for inode chunks with at least one free + * inode. A record can be removed from the tree for an inode allocation + * or free and thus the finobt reservation is unconditional across: + * + * 	- inode allocation + * 	- inode free + * 	- inode chunk allocation + * + * The 'modify' param indicates to include the record modification scenario. The + * 'alloc' param indicates to include the reservation for free space btree + * modifications on behalf of finobt modifications. This is required only for + * transactions that do not already account for free space btree modifications. + * + * the free inode btree: max depth * block size + * the allocation btrees: 2 trees * (max depth - 1) * block size + * the free inode btree entry: block size + */ +STATIC uint +xfs_calc_finobt_res( +	struct xfs_mount 	*mp, +	int			alloc, +	int			modify) +{ +	uint res; + +	if (!xfs_sb_version_hasfinobt(&mp->m_sb)) +		return 0; + +	res = xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)); +	if (alloc) +		res += xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),  +					XFS_FSB_TO_B(mp, 1)); +	if (modify) +		res += (uint)XFS_FSB_TO_B(mp, 1); + +	return res;  }  /* @@ -182,7 +224,7 @@ xfs_calc_itruncate_reservation(  		    xfs_calc_buf_res(5, 0) +  		    xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),  				     XFS_FSB_TO_B(mp, 1)) + -		    xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + +		    xfs_calc_buf_res(2 + mp->m_ialloc_blks +  				     mp->m_in_maxlevels, 0)));  } @@ -212,6 +254,19 @@ xfs_calc_rename_reservation(  }  /* + * For removing an inode from unlinked list at first, we can modify: + *    the agi hash list and counters: sector size + *    the on disk inode before ours in the agi hash list: inode cluster size + */ +STATIC uint +xfs_calc_iunlink_remove_reservation( +	struct xfs_mount        *mp) +{ +	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + +	       max_t(uint, XFS_FSB_TO_B(mp, 1), mp->m_inode_cluster_size); +} + +/*   * For creating a link to an inode:   *    the parent directory inode: inode size   *    the linked inode: inode size @@ -228,6 +283,7 @@ xfs_calc_link_reservation(  	struct xfs_mount	*mp)  {  	return XFS_DQUOT_LOGRES(mp) + +		xfs_calc_iunlink_remove_reservation(mp) +  		MAX((xfs_calc_inode_res(mp, 2) +  		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),  				      XFS_FSB_TO_B(mp, 1))), @@ -237,6 +293,18 @@ xfs_calc_link_reservation(  }  /* + * For adding an inode to unlinked list we can modify: + *    the agi hash list: sector size + *    the unlinked inode: inode size + */ +STATIC uint +xfs_calc_iunlink_add_reservation(xfs_mount_t *mp) +{ +	return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + +		xfs_calc_inode_res(mp, 1); +} + +/*   * For removing a directory entry we can modify:   *    the parent directory inode: inode size   *    the removed inode: inode size @@ -253,10 +321,11 @@ xfs_calc_remove_reservation(  	struct xfs_mount	*mp)  {  	return XFS_DQUOT_LOGRES(mp) + -		MAX((xfs_calc_inode_res(mp, 2) + +		xfs_calc_iunlink_add_reservation(mp) + +		MAX((xfs_calc_inode_res(mp, 1) +  		     xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp),  				      XFS_FSB_TO_B(mp, 1))), -		    (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + +		    (xfs_calc_buf_res(4, mp->m_sb.sb_sectsize) +  		     xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2),  				      XFS_FSB_TO_B(mp, 1))));  } @@ -275,6 +344,7 @@ xfs_calc_remove_reservation(   *    the superblock for the nlink flag: sector size   *    the directory btree: (max depth + v2) * dir block size   *    the directory inode's bmap btree: (max depth + v2) * block size + *    the finobt (record modification and allocation btrees)   */  STATIC uint  xfs_calc_create_resv_modify( @@ -283,14 +353,15 @@ xfs_calc_create_resv_modify(  	return xfs_calc_inode_res(mp, 2) +  		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +  		(uint)XFS_FSB_TO_B(mp, 1) + -		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); +		xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)) + +		xfs_calc_finobt_res(mp, 1, 1);  }  /*   * For create we can allocate some inodes giving:   *    the agi and agf of the ag getting the new inodes: 2 * sectorsize   *    the superblock for the nlink flag: sector size - *    the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize + *    the inode blocks allocated: mp->m_ialloc_blks * blocksize   *    the inode btree: max depth * blocksize   *    the allocation btrees: 2 trees * (max depth - 1) * block size   */ @@ -300,7 +371,7 @@ xfs_calc_create_resv_alloc(  {  	return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) +  		mp->m_sb.sb_sectsize + -		xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + +		xfs_calc_buf_res(mp->m_ialloc_blks, XFS_FSB_TO_B(mp, 1)) +  		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +  		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1),  				 XFS_FSB_TO_B(mp, 1)); @@ -321,6 +392,7 @@ __xfs_calc_create_reservation(   *    the superblock for the nlink flag: sector size   *    the inode btree: max depth * blocksize   *    the allocation btrees: 2 trees * (max depth - 1) * block size + *    the finobt (record insertion)   */  STATIC uint  xfs_calc_icreate_resv_alloc( @@ -330,7 +402,8 @@ xfs_calc_icreate_resv_alloc(  		mp->m_sb.sb_sectsize +  		xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) +  		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), -				 XFS_FSB_TO_B(mp, 1)); +				 XFS_FSB_TO_B(mp, 1)) + +		xfs_calc_finobt_res(mp, 0, 0);  }  STATIC uint @@ -351,6 +424,20 @@ xfs_calc_create_reservation(  } +STATIC uint +xfs_calc_create_tmpfile_reservation( +	struct xfs_mount        *mp) +{ +	uint	res = XFS_DQUOT_LOGRES(mp); + +	if (xfs_sb_version_hascrc(&mp->m_sb)) +		res += xfs_calc_icreate_resv_alloc(mp); +	else +		res += xfs_calc_create_resv_alloc(mp); + +	return res + xfs_calc_iunlink_add_reservation(mp); +} +  /*   * Making a new directory is the same as creating a new file.   */ @@ -384,6 +471,7 @@ xfs_calc_symlink_reservation(   *    the on disk inode before ours in the agi hash list: inode cluster size   *    the inode btree: max depth * blocksize   *    the allocation btrees: 2 trees * (max depth - 1) * block size + *    the finobt (record insertion, removal or modification)   */  STATIC uint  xfs_calc_ifree_reservation( @@ -391,15 +479,15 @@ xfs_calc_ifree_reservation(  {  	return XFS_DQUOT_LOGRES(mp) +  		xfs_calc_inode_res(mp, 1) + -		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + +		xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) +  		xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + -		MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), -		    XFS_INODE_CLUSTER_SIZE(mp)) + +		xfs_calc_iunlink_remove_reservation(mp) +  		xfs_calc_buf_res(1, 0) + -		xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + +		xfs_calc_buf_res(2 + mp->m_ialloc_blks +  				 mp->m_in_maxlevels, 0) +  		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), -				 XFS_FSB_TO_B(mp, 1)); +				 XFS_FSB_TO_B(mp, 1)) + +		xfs_calc_finobt_res(mp, 0, 1);  }  /* @@ -522,7 +610,7 @@ xfs_calc_addafork_reservation(  	return XFS_DQUOT_LOGRES(mp) +  		xfs_calc_inode_res(mp, 1) +  		xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + -		xfs_calc_buf_res(1, mp->m_dirblksize) + +		xfs_calc_buf_res(1, mp->m_dir_geo->blksize) +  		xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1,  				 XFS_FSB_TO_B(mp, 1)) +  		xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), @@ -653,15 +741,14 @@ xfs_calc_qm_setqlim_reservation(  /*   * Allocating quota on disk if needed. - *	the write transaction log space: M_RES(mp)->tr_write.tr_logres + *	the write transaction log space for quota file extent allocation   *	the unit of quota allocation: one system block size   */  STATIC uint  xfs_calc_qm_dqalloc_reservation(  	struct xfs_mount	*mp)  { -	ASSERT(M_RES(mp)->tr_write.tr_logres); -	return M_RES(mp)->tr_write.tr_logres + +	return xfs_calc_write_reservation(mp) +  		xfs_calc_buf_res(1,  			XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1);  } @@ -738,6 +825,11 @@ xfs_trans_resv_calc(  	resp->tr_create.tr_logcount = XFS_CREATE_LOG_COUNT;  	resp->tr_create.tr_logflags |= XFS_TRANS_PERM_LOG_RES; +	resp->tr_create_tmpfile.tr_logres = +			xfs_calc_create_tmpfile_reservation(mp); +	resp->tr_create_tmpfile.tr_logcount = XFS_CREATE_TMPFILE_LOG_COUNT; +	resp->tr_create_tmpfile.tr_logflags |= XFS_TRANS_PERM_LOG_RES; +  	resp->tr_mkdir.tr_logres = xfs_calc_mkdir_reservation(mp);  	resp->tr_mkdir.tr_logcount = XFS_MKDIR_LOG_COUNT;  	resp->tr_mkdir.tr_logflags |= XFS_TRANS_PERM_LOG_RES; @@ -793,7 +885,6 @@ xfs_trans_resv_calc(  	/* The following transaction are logged in logical format */  	resp->tr_ichange.tr_logres = xfs_calc_ichange_reservation(mp);  	resp->tr_growdata.tr_logres = xfs_calc_growdata_reservation(mp); -	resp->tr_swrite.tr_logres = xfs_calc_swrite_reservation(mp);  	resp->tr_fsyncts.tr_logres = xfs_calc_swrite_reservation(mp);  	resp->tr_writeid.tr_logres = xfs_calc_writeid_reservation(mp);  	resp->tr_attrsetrt.tr_logres = xfs_calc_attrsetrt_reservation(mp); diff --git a/fs/xfs/xfs_trans_resv.h b/fs/xfs/xfs_trans_resv.h index de7de9aaad8..1097d14cd58 100644 --- a/fs/xfs/xfs_trans_resv.h +++ b/fs/xfs/xfs_trans_resv.h @@ -38,11 +38,11 @@ struct xfs_trans_resv {  	struct xfs_trans_res	tr_remove;	/* unlink trans */  	struct xfs_trans_res	tr_symlink;	/* symlink trans */  	struct xfs_trans_res	tr_create;	/* create trans */ +	struct xfs_trans_res	tr_create_tmpfile; /* create O_TMPFILE trans */  	struct xfs_trans_res	tr_mkdir;	/* mkdir trans */  	struct xfs_trans_res	tr_ifree;	/* inode free trans */  	struct xfs_trans_res	tr_ichange;	/* inode update trans */  	struct xfs_trans_res	tr_growdata;	/* fs data section grow trans */ -	struct xfs_trans_res	tr_swrite;	/* sync write inode trans */  	struct xfs_trans_res	tr_addafork;	/* add inode attr fork trans */  	struct xfs_trans_res	tr_writeid;	/* write setuid/setgid file */  	struct xfs_trans_res	tr_attrinval;	/* attr fork buffer @@ -100,6 +100,7 @@ struct xfs_trans_resv {  #define	XFS_ITRUNCATE_LOG_COUNT		2  #define XFS_INACTIVE_LOG_COUNT		2  #define	XFS_CREATE_LOG_COUNT		2 +#define	XFS_CREATE_TMPFILE_LOG_COUNT	2  #define	XFS_MKDIR_LOG_COUNT		3  #define	XFS_SYMLINK_LOG_COUNT		3  #define	XFS_REMOVE_LOG_COUNT		2 diff --git a/fs/xfs/xfs_trans_space.h b/fs/xfs/xfs_trans_space.h index 7d2c920dfb9..bf9c4579334 100644 --- a/fs/xfs/xfs_trans_space.h +++ b/fs/xfs/xfs_trans_space.h @@ -28,7 +28,8 @@  	(((b + XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp) - 1) / \  	  XFS_MAX_CONTIG_EXTENTS_PER_BLOCK(mp)) * \  	  XFS_EXTENTADD_SPACE_RES(mp,w)) -#define	XFS_DAENTER_1B(mp,w)	((w) == XFS_DATA_FORK ? (mp)->m_dirblkfsbs : 1) +#define	XFS_DAENTER_1B(mp,w)	\ +	((w) == XFS_DATA_FORK ? (mp)->m_dir_geo->fsbcount : 1)  #define	XFS_DAENTER_DBS(mp,w)	\  	(XFS_DA_NODE_MAXDEPTH + (((w) == XFS_DATA_FORK) ? 2 : 0))  #define	XFS_DAENTER_BLOCKS(mp,w)	\ @@ -47,13 +48,15 @@  #define	XFS_DIRREMOVE_SPACE_RES(mp)	\  	XFS_DAREMOVE_SPACE_RES(mp, XFS_DATA_FORK)  #define	XFS_IALLOC_SPACE_RES(mp)	\ -	(XFS_IALLOC_BLOCKS(mp) + (mp)->m_in_maxlevels - 1) +	((mp)->m_ialloc_blks + \ +	 (xfs_sb_version_hasfinobt(&mp->m_sb) ? 2 : 1 * \ +	  ((mp)->m_in_maxlevels - 1)))  /*   * Space reservation values for various transactions.   */  #define	XFS_ADDAFORK_SPACE_RES(mp)	\ -	((mp)->m_dirblkfsbs + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK)) +	((mp)->m_dir_geo->fsbcount + XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK))  #define	XFS_ATTRRM_SPACE_RES(mp)	\  	XFS_DAREMOVE_SPACE_RES(mp, XFS_ATTR_FORK)  /* This macro is not used - see inline code in xfs_attr_set */ @@ -82,5 +85,8 @@  	(XFS_DIRREMOVE_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl))  #define	XFS_SYMLINK_SPACE_RES(mp,nl,b)	\  	(XFS_IALLOC_SPACE_RES(mp) + XFS_DIRENTER_SPACE_RES(mp,nl) + (b)) +#define XFS_IFREE_SPACE_RES(mp)		\ +	(xfs_sb_version_hasfinobt(&mp->m_sb) ? (mp)->m_in_maxlevels : 0) +  #endif	/* __XFS_TRANS_SPACE_H__ */ diff --git a/fs/xfs/xfs_types.h b/fs/xfs/xfs_types.h index 82bbc34d54a..65c6e6650b1 100644 --- a/fs/xfs/xfs_types.h +++ b/fs/xfs/xfs_types.h @@ -134,7 +134,7 @@ typedef enum {  typedef enum {  	XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, -	XFS_BTNUM_MAX +	XFS_BTNUM_FINOi, XFS_BTNUM_MAX  } xfs_btnum_t;  struct xfs_name { diff --git a/fs/xfs/xfs_vnode.h b/fs/xfs/xfs_vnode.h index db14d0c0868..e8a77383c0d 100644 --- a/fs/xfs/xfs_vnode.h +++ b/fs/xfs/xfs_vnode.h @@ -25,14 +25,6 @@ struct xfs_inode;  struct attrlist_cursor_kern;  /* - * Return values for xfs_inactive.  A return value of - * VN_INACTIVE_NOCACHE implies that the file system behavior - * has disassociated its state and bhv_desc_t from the vnode. - */ -#define	VN_INACTIVE_CACHE	0 -#define	VN_INACTIVE_NOCACHE	1 - -/*   * Flags for read/write calls - same values as IRIX   */  #define IO_ISDIRECT	0x00004		/* bypass page cache */ @@ -43,15 +35,6 @@ struct attrlist_cursor_kern;  	{ IO_INVIS,	"INVIS"}  /* - * Flush/Invalidate options for vop_toss/flush/flushinval_pages. - */ -#define FI_NONE			0	/* none */ -#define FI_REMAPF		1	/* Do a remapf prior to the operation */ -#define FI_REMAPF_LOCKED	2	/* Do a remapf prior to the operation. -					   Prevent VM access to the pages until -					   the operation completes. */ - -/*   * Some useful predicates.   */  #define VN_MAPPED(vp)	mapping_mapped(vp->i_mapping) diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index e01f35ea76b..78ed92a46fd 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -17,9 +17,13 @@   */  #include "xfs.h" +#include "xfs_format.h"  #include "xfs_log_format.h" -#include "xfs_da_btree.h" -#include "xfs_bmap_btree.h" +#include "xfs_trans_resv.h" +#include "xfs_sb.h" +#include "xfs_ag.h" +#include "xfs_mount.h" +#include "xfs_da_format.h"  #include "xfs_inode.h"  #include "xfs_attr.h"  #include "xfs_attr_leaf.h" @@ -98,8 +102,8 @@ const struct xattr_handler *xfs_xattr_handlers[] = {  	&xfs_xattr_trusted_handler,  	&xfs_xattr_security_handler,  #ifdef CONFIG_XFS_POSIX_ACL -	&xfs_xattr_acl_access_handler, -	&xfs_xattr_acl_default_handler, +	&posix_acl_access_xattr_handler, +	&posix_acl_default_xattr_handler,  #endif  	NULL  };  | 
