diff options
Diffstat (limited to 'fs/xfs/xfs_btree.c')
| -rw-r--r-- | fs/xfs/xfs_btree.c | 717 | 
1 files changed, 553 insertions, 164 deletions
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 04f9cca8da7..cf893bc1e37 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -17,24 +17,23 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h" +#include "xfs_buf_item.h"  #include "xfs_btree.h" -#include "xfs_btree_trace.h"  #include "xfs_error.h"  #include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_alloc.h"  /*   * Cursor allocation zone. @@ -44,9 +43,14 @@ kmem_zone_t	*xfs_btree_cur_zone;  /*   * Btree magic numbers.   */ -const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { -	XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC +static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { +	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, +	  XFS_FIBT_MAGIC }, +	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, +	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }  }; +#define xfs_btree_magic(cur) \ +	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum]  STATIC int				/* error (0 or EFSCORRUPTED) */ @@ -56,30 +60,38 @@ xfs_btree_check_lblock(  	int			level,	/* level of the btree block */  	struct xfs_buf		*bp)	/* buffer for block, if any */  { -	int			lblock_ok; /* block passes checks */ +	int			lblock_ok = 1; /* block passes checks */  	struct xfs_mount	*mp;	/* file system mount point */  	mp = cur->bc_mp; -	lblock_ok = -		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		lblock_ok = lblock_ok && +			uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && +			block->bb_u.l.bb_blkno == cpu_to_be64( +				bp ? bp->b_bn : XFS_BUF_DADDR_NULL); +	} + +	lblock_ok = lblock_ok && +		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&  		be16_to_cpu(block->bb_level) == level &&  		be16_to_cpu(block->bb_numrecs) <=  			cur->bc_ops->get_maxrecs(cur, level) &&  		block->bb_u.l.bb_leftsib && -		(be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || +		(block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) ||  		 XFS_FSB_SANITY_CHECK(mp, -		 	be64_to_cpu(block->bb_u.l.bb_leftsib))) && +			be64_to_cpu(block->bb_u.l.bb_leftsib))) &&  		block->bb_u.l.bb_rightsib && -		(be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || +		(block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) ||  		 XFS_FSB_SANITY_CHECK(mp, -		 	be64_to_cpu(block->bb_u.l.bb_rightsib))); +			be64_to_cpu(block->bb_u.l.bb_rightsib))); +  	if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp,  			XFS_ERRTAG_BTREE_CHECK_LBLOCK,  			XFS_RANDOM_BTREE_CHECK_LBLOCK))) {  		if (bp)  			trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, -				 mp); +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);  		return XFS_ERROR(EFSCORRUPTED);  	}  	return 0; @@ -92,32 +104,42 @@ xfs_btree_check_sblock(  	int			level,	/* level of the btree block */  	struct xfs_buf		*bp)	/* buffer containing block */  { +	struct xfs_mount	*mp;	/* file system mount point */  	struct xfs_buf		*agbp;	/* buffer for ag. freespace struct */  	struct xfs_agf		*agf;	/* ag. freespace structure */  	xfs_agblock_t		agflen;	/* native ag. freespace length */ -	int			sblock_ok; /* block passes checks */ +	int			sblock_ok = 1; /* block passes checks */ +	mp = cur->bc_mp;  	agbp = cur->bc_private.a.agbp;  	agf = XFS_BUF_TO_AGF(agbp);  	agflen = be32_to_cpu(agf->agf_length); -	sblock_ok = -		be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + +	if (xfs_sb_version_hascrc(&mp->m_sb)) { +		sblock_ok = sblock_ok && +			uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && +			block->bb_u.s.bb_blkno == cpu_to_be64( +				bp ? bp->b_bn : XFS_BUF_DADDR_NULL); +	} + +	sblock_ok = sblock_ok && +		be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) &&  		be16_to_cpu(block->bb_level) == level &&  		be16_to_cpu(block->bb_numrecs) <=  			cur->bc_ops->get_maxrecs(cur, level) && -		(be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || +		(block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) ||  		 be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) &&  		block->bb_u.s.bb_leftsib && -		(be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || +		(block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) ||  		 be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) &&  		block->bb_u.s.bb_rightsib; -	if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, + +	if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp,  			XFS_ERRTAG_BTREE_CHECK_SBLOCK,  			XFS_RANDOM_BTREE_CHECK_SBLOCK))) {  		if (bp)  			trace_xfs_btree_corrupt(bp, _RET_IP_); -		XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", -			XFS_ERRLEVEL_LOW, cur->bc_mp, block); +		XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp);  		return XFS_ERROR(EFSCORRUPTED);  	}  	return 0; @@ -196,6 +218,70 @@ xfs_btree_check_ptr(  #endif  /* + * Calculate CRC on the whole btree block and stuff it into the + * long-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_lblock_calc_crc( +	struct xfs_buf		*bp) +{ +	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp); +	struct xfs_buf_log_item	*bip = bp->b_fspriv; + +	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return; +	if (bip) +		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); +	xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); +} + +bool +xfs_btree_lblock_verify_crc( +	struct xfs_buf		*bp) +{ +	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + +	return true; +} + +/* + * Calculate CRC on the whole btree block and stuff it into the + * short-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_sblock_calc_crc( +	struct xfs_buf		*bp) +{ +	struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp); +	struct xfs_buf_log_item	*bip = bp->b_fspriv; + +	if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return; +	if (bip) +		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); +	xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); +} + +bool +xfs_btree_sblock_verify_crc( +	struct xfs_buf		*bp) +{ +	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) +		return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + +	return true; +} + +/*   * Delete the btree cursor.   */  void @@ -268,18 +354,19 @@ xfs_btree_dup_cursor(  	for (i = 0; i < new->bc_nlevels; i++) {  		new->bc_ptrs[i] = cur->bc_ptrs[i];  		new->bc_ra[i] = cur->bc_ra[i]; -		if ((bp = cur->bc_bufs[i])) { -			if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, -				XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) { +		bp = cur->bc_bufs[i]; +		if (bp) { +			error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, +						   XFS_BUF_ADDR(bp), mp->m_bsize, +						   0, &bp, +						   cur->bc_ops->buf_ops); +			if (error) {  				xfs_btree_del_cursor(new, error);  				*ncur = NULL;  				return error;  			} -			new->bc_bufs[i] = bp; -			ASSERT(bp); -			ASSERT(!XFS_BUF_GETERROR(bp)); -		} else -			new->bc_bufs[i] = NULL; +		} +		new->bc_bufs[i] = bp;  	}  	*ncur = new;  	return 0; @@ -320,9 +407,14 @@ xfs_btree_dup_cursor(   */  static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur)  { -	return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? -		XFS_BTREE_LBLOCK_LEN : -		XFS_BTREE_SBLOCK_LEN; +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { +		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) +			return XFS_BTREE_LBLOCK_CRC_LEN; +		return XFS_BTREE_LBLOCK_LEN; +	} +	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) +		return XFS_BTREE_SBLOCK_CRC_LEN; +	return XFS_BTREE_SBLOCK_LEN;  }  /* @@ -416,7 +508,7 @@ xfs_btree_ptr_addr(  }  /* - * Get a the root block which is stored in the inode. + * Get the root block which is stored in the inode.   *   * For now this btree implementation assumes the btree root is always   * stored in the if_broot field of an inode fork. @@ -462,15 +554,11 @@ xfs_btree_get_bufl(  	xfs_fsblock_t	fsbno,		/* file system block number */  	uint		lock)		/* lock flags for get_buf */  { -	xfs_buf_t	*bp;		/* buffer pointer (return value) */  	xfs_daddr_t		d;		/* real disk block address */  	ASSERT(fsbno != NULLFSBLOCK);  	d = XFS_FSB_TO_DADDR(mp, fsbno); -	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -	ASSERT(bp); -	ASSERT(!XFS_BUF_GETERROR(bp)); -	return bp; +	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);  }  /* @@ -485,16 +573,12 @@ xfs_btree_get_bufs(  	xfs_agblock_t	agbno,		/* allocation group block number */  	uint		lock)		/* lock flags for get_buf */  { -	xfs_buf_t	*bp;		/* buffer pointer (return value) */  	xfs_daddr_t		d;		/* real disk block address */  	ASSERT(agno != NULLAGNUMBER);  	ASSERT(agbno != NULLAGBLOCK);  	d = XFS_AGB_TO_DADDR(mp, agno, agbno); -	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -	ASSERT(bp); -	ASSERT(!XFS_BUF_GETERROR(bp)); -	return bp; +	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);  }  /* @@ -511,9 +595,9 @@ xfs_btree_islastblock(  	block = xfs_btree_get_block(cur, level, &bp);  	xfs_btree_check_block(cur, block, level, bp);  	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) -		return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; +		return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO);  	else -		return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; +		return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK);  }  /* @@ -614,29 +698,28 @@ xfs_btree_offsets(   * Get a buffer for the block, return it read in.   * Long-form addressing.   */ -int					/* error */ +int  xfs_btree_read_bufl( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_trans_t	*tp,		/* transaction pointer */ -	xfs_fsblock_t	fsbno,		/* file system block number */ -	uint		lock,		/* lock flags for read_buf */ -	xfs_buf_t	**bpp,		/* buffer for fsbno */ -	int		refval)		/* ref count value for buffer */ -{ -	xfs_buf_t	*bp;		/* return value */ +	struct xfs_mount	*mp,		/* file system mount point */ +	struct xfs_trans	*tp,		/* transaction pointer */ +	xfs_fsblock_t		fsbno,		/* file system block number */ +	uint			lock,		/* lock flags for read_buf */ +	struct xfs_buf		**bpp,		/* buffer for fsbno */ +	int			refval,		/* ref count value for buffer */ +	const struct xfs_buf_ops *ops) +{ +	struct xfs_buf		*bp;		/* return value */  	xfs_daddr_t		d;		/* real disk block address */ -	int		error; +	int			error;  	ASSERT(fsbno != NULLFSBLOCK);  	d = XFS_FSB_TO_DADDR(mp, fsbno); -	if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, -			mp->m_bsize, lock, &bp))) { +	error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, +				   mp->m_bsize, lock, &bp, ops); +	if (error)  		return error; -	} -	ASSERT(!bp || !XFS_BUF_GETERROR(bp)); -	if (bp != NULL) { -		XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); -	} +	if (bp) +		xfs_buf_set_ref(bp, refval);  	*bpp = bp;  	return 0;  } @@ -648,15 +731,16 @@ xfs_btree_read_bufl(  /* ARGSUSED */  void  xfs_btree_reada_bufl( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_fsblock_t	fsbno,		/* file system block number */ -	xfs_extlen_t	count)		/* count of filesystem blocks */ +	struct xfs_mount	*mp,		/* file system mount point */ +	xfs_fsblock_t		fsbno,		/* file system block number */ +	xfs_extlen_t		count,		/* count of filesystem blocks */ +	const struct xfs_buf_ops *ops)  {  	xfs_daddr_t		d;  	ASSERT(fsbno != NULLFSBLOCK);  	d = XFS_FSB_TO_DADDR(mp, fsbno); -	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); +	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);  }  /* @@ -666,17 +750,18 @@ xfs_btree_reada_bufl(  /* ARGSUSED */  void  xfs_btree_reada_bufs( -	xfs_mount_t	*mp,		/* file system mount point */ -	xfs_agnumber_t	agno,		/* allocation group number */ -	xfs_agblock_t	agbno,		/* allocation group block number */ -	xfs_extlen_t	count)		/* count of filesystem blocks */ +	struct xfs_mount	*mp,		/* file system mount point */ +	xfs_agnumber_t		agno,		/* allocation group number */ +	xfs_agblock_t		agbno,		/* allocation group block number */ +	xfs_extlen_t		count,		/* count of filesystem blocks */ +	const struct xfs_buf_ops *ops)  {  	xfs_daddr_t		d;  	ASSERT(agno != NULLAGNUMBER);  	ASSERT(agbno != NULLAGBLOCK);  	d = XFS_AGB_TO_DADDR(mp, agno, agbno); -	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count); +	xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops);  }  STATIC int @@ -690,12 +775,14 @@ xfs_btree_readahead_lblock(  	xfs_dfsbno_t		right = be64_to_cpu(block->bb_u.l.bb_rightsib);  	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { -		xfs_btree_reada_bufl(cur->bc_mp, left, 1); +		xfs_btree_reada_bufl(cur->bc_mp, left, 1, +				     cur->bc_ops->buf_ops);  		rval++;  	}  	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { -		xfs_btree_reada_bufl(cur->bc_mp, right, 1); +		xfs_btree_reada_bufl(cur->bc_mp, right, 1, +				     cur->bc_ops->buf_ops);  		rval++;  	} @@ -715,13 +802,13 @@ xfs_btree_readahead_sblock(  	if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) {  		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, -				     left, 1); +				     left, 1, cur->bc_ops->buf_ops);  		rval++;  	}  	if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) {  		xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, -				     right, 1); +				     right, 1, cur->bc_ops->buf_ops);  		rval++;  	} @@ -759,6 +846,41 @@ xfs_btree_readahead(  	return xfs_btree_readahead_sblock(cur, lr, block);  } +STATIC xfs_daddr_t +xfs_btree_ptr_to_daddr( +	struct xfs_btree_cur	*cur, +	union xfs_btree_ptr	*ptr) +{ +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { +		ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + +		return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); +	} else { +		ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); +		ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); + +		return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, +					be32_to_cpu(ptr->s)); +	} +} + +/* + * Readahead @count btree blocks at the given @ptr location. + * + * We don't need to care about long or short form btrees here as we have a + * method of converting the ptr directly to a daddr available to us. + */ +STATIC void +xfs_btree_readahead_ptr( +	struct xfs_btree_cur	*cur, +	union xfs_btree_ptr	*ptr, +	xfs_extlen_t		count) +{ +	xfs_buf_readahead(cur->bc_mp->m_ddev_targp, +			  xfs_btree_ptr_to_daddr(cur, ptr), +			  cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); +} +  /*   * Set the buffer for level "lev" in the cursor to bp, releasing   * any previous buffer. @@ -778,14 +900,14 @@ xfs_btree_setbuf(  	b = XFS_BUF_TO_BLOCK(bp);  	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { -		if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) +		if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO))  			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; -		if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) +		if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO))  			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;  	} else { -		if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK) +		if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK))  			cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; -		if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK) +		if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK))  			cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA;  	}  } @@ -796,9 +918,9 @@ xfs_btree_ptr_is_null(  	union xfs_btree_ptr	*ptr)  {  	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) -		return be64_to_cpu(ptr->l) == NULLDFSBNO; +		return ptr->l == cpu_to_be64(NULLDFSBNO);  	else -		return be32_to_cpu(ptr->s) == NULLAGBLOCK; +		return ptr->s == cpu_to_be32(NULLAGBLOCK);  }  STATIC void @@ -859,29 +981,88 @@ xfs_btree_set_sibling(  	}  } -STATIC void +void +xfs_btree_init_block_int( +	struct xfs_mount	*mp, +	struct xfs_btree_block	*buf, +	xfs_daddr_t		blkno, +	__u32			magic, +	__u16			level, +	__u16			numrecs, +	__u64			owner, +	unsigned int		flags) +{ +	buf->bb_magic = cpu_to_be32(magic); +	buf->bb_level = cpu_to_be16(level); +	buf->bb_numrecs = cpu_to_be16(numrecs); + +	if (flags & XFS_BTREE_LONG_PTRS) { +		buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); +		buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); +		if (flags & XFS_BTREE_CRC_BLOCKS) { +			buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); +			buf->bb_u.l.bb_owner = cpu_to_be64(owner); +			uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); +			buf->bb_u.l.bb_pad = 0; +			buf->bb_u.l.bb_lsn = 0; +		} +	} else { +		/* owner is a 32 bit value on short blocks */ +		__u32 __owner = (__u32)owner; + +		buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); +		buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); +		if (flags & XFS_BTREE_CRC_BLOCKS) { +			buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); +			buf->bb_u.s.bb_owner = cpu_to_be32(__owner); +			uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); +			buf->bb_u.s.bb_lsn = 0; +		} +	} +} + +void  xfs_btree_init_block( +	struct xfs_mount *mp, +	struct xfs_buf	*bp, +	__u32		magic, +	__u16		level, +	__u16		numrecs, +	__u64		owner, +	unsigned int	flags) +{ +	xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, +				 magic, level, numrecs, owner, flags); +} + +STATIC void +xfs_btree_init_block_cur(  	struct xfs_btree_cur	*cur, +	struct xfs_buf		*bp,  	int			level, -	int			numrecs, -	struct xfs_btree_block	*new)	/* new block */ +	int			numrecs)  { -	new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); -	new->bb_level = cpu_to_be16(level); -	new->bb_numrecs = cpu_to_be16(numrecs); +	__u64 owner; -	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { -		new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); -		new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); -	} else { -		new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); -		new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); -	} +	/* +	 * we can pull the owner from the cursor right now as the different +	 * owners align directly with the pointer size of the btree. This may +	 * change in future, but is safe for current users of the generic btree +	 * code. +	 */ +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) +		owner = cur->bc_private.b.ip->i_ino; +	else +		owner = cur->bc_private.a.agno; + +	xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, +				 xfs_btree_magic(cur), level, numrecs, +				 owner, cur->bc_flags);  }  /*   * Return true if ptr is the last record in the btree and - * we need to track updateѕ to this record.  The decision + * we need to track updates to this record.  The decision   * will be further refined in the update_lastrec method.   */  STATIC int @@ -918,24 +1099,6 @@ xfs_btree_buf_to_ptr(  	}  } -STATIC xfs_daddr_t -xfs_btree_ptr_to_daddr( -	struct xfs_btree_cur	*cur, -	union xfs_btree_ptr	*ptr) -{ -	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { -		ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO); - -		return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); -	} else { -		ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); -		ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); - -		return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, -					be32_to_cpu(ptr->s)); -	} -} -  STATIC void  xfs_btree_set_refs(  	struct xfs_btree_cur	*cur, @@ -944,13 +1107,14 @@ xfs_btree_set_refs(  	switch (cur->bc_btnum) {  	case XFS_BTNUM_BNO:  	case XFS_BTNUM_CNT: -		XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); +		xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);  		break;  	case XFS_BTNUM_INO: -		XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); +	case XFS_BTNUM_FINO: +		xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);  		break;  	case XFS_BTNUM_BMAP: -		XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); +		xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF);  		break;  	default:  		ASSERT(0); @@ -975,9 +1139,10 @@ xfs_btree_get_buf_block(  	*bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d,  				 mp->m_bsize, flags); -	ASSERT(*bpp); -	ASSERT(!XFS_BUF_GETERROR(*bpp)); +	if (!*bpp) +		return ENOMEM; +	(*bpp)->b_ops = cur->bc_ops->buf_ops;  	*block = XFS_BUF_TO_BLOCK(*bpp);  	return 0;  } @@ -990,7 +1155,6 @@ STATIC int  xfs_btree_read_buf_block(  	struct xfs_btree_cur	*cur,  	union xfs_btree_ptr	*ptr, -	int			level,  	int			flags,  	struct xfs_btree_block	**block,  	struct xfs_buf		**bpp) @@ -1004,20 +1168,14 @@ xfs_btree_read_buf_block(  	d = xfs_btree_ptr_to_daddr(cur, ptr);  	error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, -				   mp->m_bsize, flags, bpp); +				   mp->m_bsize, flags, bpp, +				   cur->bc_ops->buf_ops);  	if (error)  		return error; -	ASSERT(*bpp != NULL); -	ASSERT(!XFS_BUF_GETERROR(*bpp)); -  	xfs_btree_set_refs(cur, *bpp);  	*block = XFS_BUF_TO_BLOCK(*bpp); - -	error = xfs_btree_check_block(cur, *block, level, *bpp); -	if (error) -		xfs_trans_brelse(cur->bc_tp, *bpp); -	return error; +	return 0;  }  /* @@ -1133,6 +1291,7 @@ xfs_btree_log_keys(  	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last);  	if (bp) { +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);  		xfs_trans_log_buf(cur->bc_tp, bp,  				  xfs_btree_key_offset(cur, first),  				  xfs_btree_key_offset(cur, last + 1) - 1); @@ -1157,6 +1316,7 @@ xfs_btree_log_recs(  	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);  	XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); +	xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);  	xfs_trans_log_buf(cur->bc_tp, bp,  			  xfs_btree_rec_offset(cur, first),  			  xfs_btree_rec_offset(cur, last + 1) - 1); @@ -1181,6 +1341,7 @@ xfs_btree_log_ptrs(  		struct xfs_btree_block	*block = XFS_BUF_TO_BLOCK(bp);  		int			level = xfs_btree_get_level(block); +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);  		xfs_trans_log_buf(cur->bc_tp, bp,  				xfs_btree_ptr_offset(cur, first, level),  				xfs_btree_ptr_offset(cur, last + 1, level) - 1); @@ -1209,7 +1370,12 @@ xfs_btree_log_block(  		offsetof(struct xfs_btree_block, bb_numrecs),  		offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib),  		offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), -		XFS_BTREE_SBLOCK_LEN +		offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), +		offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), +		offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), +		offsetof(struct xfs_btree_block, bb_u.s.bb_owner), +		offsetof(struct xfs_btree_block, bb_u.s.bb_crc), +		XFS_BTREE_SBLOCK_CRC_LEN  	};  	static const short	loffsets[] = {	/* table of offsets (long) */  		offsetof(struct xfs_btree_block, bb_magic), @@ -1217,17 +1383,40 @@ xfs_btree_log_block(  		offsetof(struct xfs_btree_block, bb_numrecs),  		offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib),  		offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), -		XFS_BTREE_LBLOCK_LEN +		offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), +		offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), +		offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), +		offsetof(struct xfs_btree_block, bb_u.l.bb_owner), +		offsetof(struct xfs_btree_block, bb_u.l.bb_crc), +		offsetof(struct xfs_btree_block, bb_u.l.bb_pad), +		XFS_BTREE_LBLOCK_CRC_LEN  	};  	XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY);  	XFS_BTREE_TRACE_ARGBI(cur, bp, fields);  	if (bp) { +		int nbits; + +		if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { +			/* +			 * We don't log the CRC when updating a btree +			 * block but instead recreate it during log +			 * recovery.  As the log buffers have checksums +			 * of their own this is safe and avoids logging a crc +			 * update in a lot of places. +			 */ +			if (fields == XFS_BB_ALL_BITS) +				fields = XFS_BB_ALL_BITS_CRC; +			nbits = XFS_BB_NUM_BITS_CRC; +		} else { +			nbits = XFS_BB_NUM_BITS; +		}  		xfs_btree_offsets(fields,  				  (cur->bc_flags & XFS_BTREE_LONG_PTRS) ?  					loffsets : soffsets, -				  XFS_BB_NUM_BITS, &first, &last); +				  nbits, &first, &last); +		xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF);  		xfs_trans_log_buf(cur->bc_tp, bp, first, last);  	} else {  		xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, @@ -1322,8 +1511,8 @@ xfs_btree_increment(  		union xfs_btree_ptr	*ptrp;  		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); -		error = xfs_btree_read_buf_block(cur, ptrp, --lev, -							0, &block, &bp); +		--lev; +		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);  		if (error)  			goto error0; @@ -1421,8 +1610,8 @@ xfs_btree_decrement(  		union xfs_btree_ptr	*ptrp;  		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); -		error = xfs_btree_read_buf_block(cur, ptrp, --lev, -							0, &block, &bp); +		--lev; +		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);  		if (error)  			goto error0;  		xfs_btree_setbuf(cur, lev, bp); @@ -1472,7 +1661,7 @@ xfs_btree_lookup_get_block(  		return 0;  	} -	error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); +	error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);  	if (error)  		return error; @@ -1504,7 +1693,7 @@ xfs_lookup_get_search_key(  /*   * Lookup the record.  The cursor is made to point to it, based on dir. - * Return 0 if can't find any such record, 1 for success. + * stat is set to 0 if can't find any such record, 1 for success.   */  int					/* error */  xfs_btree_lookup( @@ -1823,7 +2012,7 @@ xfs_btree_lshift(  		goto out0;  	/* Set up the left neighbor as "left". */ -	error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); +	error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  	if (error)  		goto error0; @@ -2007,7 +2196,7 @@ xfs_btree_rshift(  		goto out0;  	/* Set up the right neighbor as "right". */ -	error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); +	error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  	if (error)  		goto error0; @@ -2135,7 +2324,7 @@ error1:   * record (to be inserted into parent).   */  STATIC int					/* error */ -xfs_btree_split( +__xfs_btree_split(  	struct xfs_btree_cur	*cur,  	int			level,  	union xfs_btree_ptr	*ptrp, @@ -2177,7 +2366,7 @@ xfs_btree_split(  	xfs_btree_buf_to_ptr(cur, lbp, &lptr);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) @@ -2190,7 +2379,7 @@ xfs_btree_split(  		goto error0;  	/* Fill in the btree header for the new right block. */ -	xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right); +	xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0);  	/*  	 * Split the entries between the old and the new block evenly. @@ -2275,7 +2464,7 @@ xfs_btree_split(  	 * point back to right instead of to left.  	 */  	if (!xfs_btree_ptr_is_null(cur, &rrptr)) { -		error = xfs_btree_read_buf_block(cur, &rrptr, level, +		error = xfs_btree_read_buf_block(cur, &rrptr,  							0, &rrblock, &rrbp);  		if (error)  			goto error0; @@ -2315,6 +2504,85 @@ error0:  	return error;  } +struct xfs_btree_split_args { +	struct xfs_btree_cur	*cur; +	int			level; +	union xfs_btree_ptr	*ptrp; +	union xfs_btree_key	*key; +	struct xfs_btree_cur	**curp; +	int			*stat;		/* success/failure */ +	int			result; +	bool			kswapd;	/* allocation in kswapd context */ +	struct completion	*done; +	struct work_struct	work; +}; + +/* + * Stack switching interfaces for allocation + */ +static void +xfs_btree_split_worker( +	struct work_struct	*work) +{ +	struct xfs_btree_split_args	*args = container_of(work, +						struct xfs_btree_split_args, work); +	unsigned long		pflags; +	unsigned long		new_pflags = PF_FSTRANS; + +	/* +	 * we are in a transaction context here, but may also be doing work +	 * in kswapd context, and hence we may need to inherit that state +	 * temporarily to ensure that we don't block waiting for memory reclaim +	 * in any way. +	 */ +	if (args->kswapd) +		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + +	current_set_flags_nested(&pflags, new_pflags); + +	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, +					 args->key, args->curp, args->stat); +	complete(args->done); + +	current_restore_flags_nested(&pflags, new_pflags); +} + +/* + * BMBT split requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. For the other + * btree types, just call directly to avoid the context switch overhead here. + */ +STATIC int					/* error */ +xfs_btree_split( +	struct xfs_btree_cur	*cur, +	int			level, +	union xfs_btree_ptr	*ptrp, +	union xfs_btree_key	*key, +	struct xfs_btree_cur	**curp, +	int			*stat)		/* success/failure */ +{ +	struct xfs_btree_split_args	args; +	DECLARE_COMPLETION_ONSTACK(done); + +	if (cur->bc_btnum != XFS_BTNUM_BMAP) +		return __xfs_btree_split(cur, level, ptrp, key, curp, stat); + +	args.cur = cur; +	args.level = level; +	args.ptrp = ptrp; +	args.key = key; +	args.curp = curp; +	args.stat = stat; +	args.done = &done; +	args.kswapd = current_is_kswapd(); +	INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); +	queue_work(xfs_alloc_wq, &args.work); +	wait_for_completion(&done); +	destroy_work_on_stack(&args.work); +	return args.result; +} + +  /*   * Copy the old inode root contents into a real block and make the   * broot point to it. @@ -2350,7 +2618,7 @@ xfs_btree_new_iroot(  	pp = xfs_btree_ptr_addr(cur, 1, block);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) { @@ -2364,7 +2632,17 @@ xfs_btree_new_iroot(  	if (error)  		goto error0; +	/* +	 * we can't just memcpy() the root in for CRC enabled btree blocks. +	 * In that case have to also ensure the blkno remains correct +	 */  	memcpy(cblock, block, xfs_btree_block_len(cur)); +	if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { +		if (cur->bc_flags & XFS_BTREE_LONG_PTRS) +			cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); +		else +			cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); +	}  	be16_add_cpu(&block->bb_level, 1);  	xfs_btree_set_numrecs(block, 1); @@ -2444,7 +2722,7 @@ xfs_btree_new_root(  	cur->bc_ops->init_ptr_from_cur(cur, &rptr);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) @@ -2479,8 +2757,7 @@ xfs_btree_new_root(  		lbp = bp;  		xfs_btree_buf_to_ptr(cur, lbp, &lptr);  		left = block; -		error = xfs_btree_read_buf_block(cur, &rptr, -					cur->bc_nlevels - 1, 0, &right, &rbp); +		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  		if (error)  			goto error0;  		bp = rbp; @@ -2491,15 +2768,14 @@ xfs_btree_new_root(  		xfs_btree_buf_to_ptr(cur, rbp, &rptr);  		right = block;  		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); -		error = xfs_btree_read_buf_block(cur, &lptr, -					cur->bc_nlevels - 1, 0, &left, &lbp); +		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  		if (error)  			goto error0;  		bp = lbp;  		nptr = 2;  	}  	/* Fill in the new block's btree header and log it. */ -	xfs_btree_init_block(cur, cur->bc_nlevels, 2, new); +	xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2);  	xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS);  	ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) &&  			!xfs_btree_ptr_is_null(cur, &rptr)); @@ -2566,7 +2842,6 @@ xfs_btree_make_block_unfull(  		if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) {  			/* A root block that can be made bigger. */ -  			xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork);  		} else {  			/* A root block that needs replacing */ @@ -3445,8 +3720,7 @@ xfs_btree_delrec(  		rptr = cptr;  		right = block;  		rbp = bp; -		error = xfs_btree_read_buf_block(cur, &lptr, level, -							0, &left, &lbp); +		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  		if (error)  			goto error0; @@ -3463,8 +3737,7 @@ xfs_btree_delrec(  		lptr = cptr;  		left = block;  		lbp = bp; -		error = xfs_btree_read_buf_block(cur, &rptr, level, -							0, &right, &rbp); +		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  		if (error)  			goto error0; @@ -3536,8 +3809,7 @@ xfs_btree_delrec(  	/* If there is a right sibling, point it to the remaining block. */  	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);  	if (!xfs_btree_ptr_is_null(cur, &cptr)) { -		error = xfs_btree_read_buf_block(cur, &cptr, level, -							0, &rrblock, &rrbp); +		error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);  		if (error)  			goto error0;  		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); @@ -3678,3 +3950,120 @@ xfs_btree_get_rec(  	*stat = 1;  	return 0;  } + +/* + * Change the owner of a btree. + * + * The mechanism we use here is ordered buffer logging. Because we don't know + * how many buffers were are going to need to modify, we don't really want to + * have to make transaction reservations for the worst case of every buffer in a + * full size btree as that may be more space that we can fit in the log.... + * + * We do the btree walk in the most optimal manner possible - we have sibling + * pointers so we can just walk all the blocks on each level from left to right + * in a single pass, and then move to the next level and do the same. We can + * also do readahead on the sibling pointers to get IO moving more quickly, + * though for slow disks this is unlikely to make much difference to performance + * as the amount of CPU work we have to do before moving to the next block is + * relatively small. + * + * For each btree block that we load, modify the owner appropriately, set the + * buffer as an ordered buffer and log it appropriately. We need to ensure that + * we mark the region we change dirty so that if the buffer is relogged in + * a subsequent transaction the changes we make here as an ordered buffer are + * correctly relogged in that transaction.  If we are in recovery context, then + * just queue the modified buffer as delayed write buffer so the transaction + * recovery completion writes the changes to disk. + */ +static int +xfs_btree_block_change_owner( +	struct xfs_btree_cur	*cur, +	int			level, +	__uint64_t		new_owner, +	struct list_head	*buffer_list) +{ +	struct xfs_btree_block	*block; +	struct xfs_buf		*bp; +	union xfs_btree_ptr     rptr; + +	/* do right sibling readahead */ +	xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + +	/* modify the owner */ +	block = xfs_btree_get_block(cur, level, &bp); +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS) +		block->bb_u.l.bb_owner = cpu_to_be64(new_owner); +	else +		block->bb_u.s.bb_owner = cpu_to_be32(new_owner); + +	/* +	 * If the block is a root block hosted in an inode, we might not have a +	 * buffer pointer here and we shouldn't attempt to log the change as the +	 * information is already held in the inode and discarded when the root +	 * block is formatted into the on-disk inode fork. We still change it, +	 * though, so everything is consistent in memory. +	 */ +	if (bp) { +		if (cur->bc_tp) { +			xfs_trans_ordered_buf(cur->bc_tp, bp); +			xfs_btree_log_block(cur, bp, XFS_BB_OWNER); +		} else { +			xfs_buf_delwri_queue(bp, buffer_list); +		} +	} else { +		ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); +		ASSERT(level == cur->bc_nlevels - 1); +	} + +	/* now read rh sibling block for next iteration */ +	xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); +	if (xfs_btree_ptr_is_null(cur, &rptr)) +		return ENOENT; + +	return xfs_btree_lookup_get_block(cur, level, &rptr, &block); +} + +int +xfs_btree_change_owner( +	struct xfs_btree_cur	*cur, +	__uint64_t		new_owner, +	struct list_head	*buffer_list) +{ +	union xfs_btree_ptr     lptr; +	int			level; +	struct xfs_btree_block	*block = NULL; +	int			error = 0; + +	cur->bc_ops->init_ptr_from_cur(cur, &lptr); + +	/* for each level */ +	for (level = cur->bc_nlevels - 1; level >= 0; level--) { +		/* grab the left hand block */ +		error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); +		if (error) +			return error; + +		/* readahead the left most block for the next level down */ +		if (level > 0) { +			union xfs_btree_ptr     *ptr; + +			ptr = xfs_btree_ptr_addr(cur, 1, block); +			xfs_btree_readahead_ptr(cur, ptr, 1); + +			/* save for the next iteration of the loop */ +			lptr = *ptr; +		} + +		/* for each buffer in the level */ +		do { +			error = xfs_btree_block_change_owner(cur, level, +							     new_owner, +							     buffer_list); +		} while (!error); + +		if (error != ENOENT) +			return error; +	} + +	return 0; +}  | 
