diff options
Diffstat (limited to 'fs/xfs/xfs_btree.c')
| -rw-r--r-- | fs/xfs/xfs_btree.c | 164 | 
1 files changed, 114 insertions, 50 deletions
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 5690e102243..cf893bc1e37 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -17,24 +17,23 @@   */  #include "xfs.h"  #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h"  #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_trans.h"  #include "xfs_sb.h"  #include "xfs_ag.h"  #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dinode.h"  #include "xfs_inode.h" +#include "xfs_trans.h"  #include "xfs_inode_item.h"  #include "xfs_buf_item.h"  #include "xfs_btree.h"  #include "xfs_error.h"  #include "xfs_trace.h"  #include "xfs_cksum.h" +#include "xfs_alloc.h"  /*   * Cursor allocation zone. @@ -45,9 +44,10 @@ kmem_zone_t	*xfs_btree_cur_zone;   * Btree magic numbers.   */  static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { -	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC }, +	{ XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, +	  XFS_FIBT_MAGIC },  	{ XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, -	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC } +	  XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC }  };  #define xfs_btree_magic(cur) \  	xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] @@ -236,8 +236,7 @@ xfs_btree_lblock_calc_crc(  		return;  	if (bip)  		block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 XFS_BTREE_LBLOCK_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF);  }  bool @@ -245,8 +244,8 @@ xfs_btree_lblock_verify_crc(  	struct xfs_buf		*bp)  {  	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) -		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					XFS_BTREE_LBLOCK_CRC_OFF); +		return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); +  	return true;  } @@ -269,8 +268,7 @@ xfs_btree_sblock_calc_crc(  		return;  	if (bip)  		block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); -	xfs_update_cksum(bp->b_addr, BBTOB(bp->b_length), -			 XFS_BTREE_SBLOCK_CRC_OFF); +	xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF);  }  bool @@ -278,8 +276,8 @@ xfs_btree_sblock_verify_crc(  	struct xfs_buf		*bp)  {  	if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) -		return xfs_verify_cksum(bp->b_addr, BBTOB(bp->b_length), -					XFS_BTREE_SBLOCK_CRC_OFF); +		return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); +  	return true;  } @@ -556,14 +554,11 @@ xfs_btree_get_bufl(  	xfs_fsblock_t	fsbno,		/* file system block number */  	uint		lock)		/* lock flags for get_buf */  { -	xfs_buf_t	*bp;		/* buffer pointer (return value) */  	xfs_daddr_t		d;		/* real disk block address */  	ASSERT(fsbno != NULLFSBLOCK);  	d = XFS_FSB_TO_DADDR(mp, fsbno); -	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -	ASSERT(!xfs_buf_geterror(bp)); -	return bp; +	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);  }  /* @@ -578,15 +573,12 @@ xfs_btree_get_bufs(  	xfs_agblock_t	agbno,		/* allocation group block number */  	uint		lock)		/* lock flags for get_buf */  { -	xfs_buf_t	*bp;		/* buffer pointer (return value) */  	xfs_daddr_t		d;		/* real disk block address */  	ASSERT(agno != NULLAGNUMBER);  	ASSERT(agbno != NULLAGBLOCK);  	d = XFS_AGB_TO_DADDR(mp, agno, agbno); -	bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); -	ASSERT(!xfs_buf_geterror(bp)); -	return bp; +	return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock);  }  /* @@ -726,7 +718,6 @@ xfs_btree_read_bufl(  				   mp->m_bsize, lock, &bp, ops);  	if (error)  		return error; -	ASSERT(!xfs_buf_geterror(bp));  	if (bp)  		xfs_buf_set_ref(bp, refval);  	*bpp = bp; @@ -1119,6 +1110,7 @@ xfs_btree_set_refs(  		xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF);  		break;  	case XFS_BTNUM_INO: +	case XFS_BTNUM_FINO:  		xfs_buf_set_ref(bp, XFS_INO_BTREE_REF);  		break;  	case XFS_BTNUM_BMAP: @@ -1163,7 +1155,6 @@ STATIC int  xfs_btree_read_buf_block(  	struct xfs_btree_cur	*cur,  	union xfs_btree_ptr	*ptr, -	int			level,  	int			flags,  	struct xfs_btree_block	**block,  	struct xfs_buf		**bpp) @@ -1182,7 +1173,6 @@ xfs_btree_read_buf_block(  	if (error)  		return error; -	ASSERT(!xfs_buf_geterror(*bpp));  	xfs_btree_set_refs(cur, *bpp);  	*block = XFS_BUF_TO_BLOCK(*bpp);  	return 0; @@ -1521,8 +1511,8 @@ xfs_btree_increment(  		union xfs_btree_ptr	*ptrp;  		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); -		error = xfs_btree_read_buf_block(cur, ptrp, --lev, -							0, &block, &bp); +		--lev; +		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);  		if (error)  			goto error0; @@ -1620,8 +1610,8 @@ xfs_btree_decrement(  		union xfs_btree_ptr	*ptrp;  		ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); -		error = xfs_btree_read_buf_block(cur, ptrp, --lev, -							0, &block, &bp); +		--lev; +		error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp);  		if (error)  			goto error0;  		xfs_btree_setbuf(cur, lev, bp); @@ -1671,7 +1661,7 @@ xfs_btree_lookup_get_block(  		return 0;  	} -	error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); +	error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp);  	if (error)  		return error; @@ -2022,7 +2012,7 @@ xfs_btree_lshift(  		goto out0;  	/* Set up the left neighbor as "left". */ -	error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); +	error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  	if (error)  		goto error0; @@ -2206,7 +2196,7 @@ xfs_btree_rshift(  		goto out0;  	/* Set up the right neighbor as "right". */ -	error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); +	error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  	if (error)  		goto error0; @@ -2334,7 +2324,7 @@ error1:   * record (to be inserted into parent).   */  STATIC int					/* error */ -xfs_btree_split( +__xfs_btree_split(  	struct xfs_btree_cur	*cur,  	int			level,  	union xfs_btree_ptr	*ptrp, @@ -2376,7 +2366,7 @@ xfs_btree_split(  	xfs_btree_buf_to_ptr(cur, lbp, &lptr);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) @@ -2474,7 +2464,7 @@ xfs_btree_split(  	 * point back to right instead of to left.  	 */  	if (!xfs_btree_ptr_is_null(cur, &rrptr)) { -		error = xfs_btree_read_buf_block(cur, &rrptr, level, +		error = xfs_btree_read_buf_block(cur, &rrptr,  							0, &rrblock, &rrbp);  		if (error)  			goto error0; @@ -2514,6 +2504,85 @@ error0:  	return error;  } +struct xfs_btree_split_args { +	struct xfs_btree_cur	*cur; +	int			level; +	union xfs_btree_ptr	*ptrp; +	union xfs_btree_key	*key; +	struct xfs_btree_cur	**curp; +	int			*stat;		/* success/failure */ +	int			result; +	bool			kswapd;	/* allocation in kswapd context */ +	struct completion	*done; +	struct work_struct	work; +}; + +/* + * Stack switching interfaces for allocation + */ +static void +xfs_btree_split_worker( +	struct work_struct	*work) +{ +	struct xfs_btree_split_args	*args = container_of(work, +						struct xfs_btree_split_args, work); +	unsigned long		pflags; +	unsigned long		new_pflags = PF_FSTRANS; + +	/* +	 * we are in a transaction context here, but may also be doing work +	 * in kswapd context, and hence we may need to inherit that state +	 * temporarily to ensure that we don't block waiting for memory reclaim +	 * in any way. +	 */ +	if (args->kswapd) +		new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + +	current_set_flags_nested(&pflags, new_pflags); + +	args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, +					 args->key, args->curp, args->stat); +	complete(args->done); + +	current_restore_flags_nested(&pflags, new_pflags); +} + +/* + * BMBT split requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. For the other + * btree types, just call directly to avoid the context switch overhead here. + */ +STATIC int					/* error */ +xfs_btree_split( +	struct xfs_btree_cur	*cur, +	int			level, +	union xfs_btree_ptr	*ptrp, +	union xfs_btree_key	*key, +	struct xfs_btree_cur	**curp, +	int			*stat)		/* success/failure */ +{ +	struct xfs_btree_split_args	args; +	DECLARE_COMPLETION_ONSTACK(done); + +	if (cur->bc_btnum != XFS_BTNUM_BMAP) +		return __xfs_btree_split(cur, level, ptrp, key, curp, stat); + +	args.cur = cur; +	args.level = level; +	args.ptrp = ptrp; +	args.key = key; +	args.curp = curp; +	args.stat = stat; +	args.done = &done; +	args.kswapd = current_is_kswapd(); +	INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); +	queue_work(xfs_alloc_wq, &args.work); +	wait_for_completion(&done); +	destroy_work_on_stack(&args.work); +	return args.result; +} + +  /*   * Copy the old inode root contents into a real block and make the   * broot point to it. @@ -2549,7 +2618,7 @@ xfs_btree_new_iroot(  	pp = xfs_btree_ptr_addr(cur, 1, block);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) { @@ -2653,7 +2722,7 @@ xfs_btree_new_root(  	cur->bc_ops->init_ptr_from_cur(cur, &rptr);  	/* Allocate the new block. If we can't do it, we're toast. Give up. */ -	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); +	error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat);  	if (error)  		goto error0;  	if (*stat == 0) @@ -2688,8 +2757,7 @@ xfs_btree_new_root(  		lbp = bp;  		xfs_btree_buf_to_ptr(cur, lbp, &lptr);  		left = block; -		error = xfs_btree_read_buf_block(cur, &rptr, -					cur->bc_nlevels - 1, 0, &right, &rbp); +		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  		if (error)  			goto error0;  		bp = rbp; @@ -2700,8 +2768,7 @@ xfs_btree_new_root(  		xfs_btree_buf_to_ptr(cur, rbp, &rptr);  		right = block;  		xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); -		error = xfs_btree_read_buf_block(cur, &lptr, -					cur->bc_nlevels - 1, 0, &left, &lbp); +		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  		if (error)  			goto error0;  		bp = lbp; @@ -3653,8 +3720,7 @@ xfs_btree_delrec(  		rptr = cptr;  		right = block;  		rbp = bp; -		error = xfs_btree_read_buf_block(cur, &lptr, level, -							0, &left, &lbp); +		error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp);  		if (error)  			goto error0; @@ -3671,8 +3737,7 @@ xfs_btree_delrec(  		lptr = cptr;  		left = block;  		lbp = bp; -		error = xfs_btree_read_buf_block(cur, &rptr, level, -							0, &right, &rbp); +		error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp);  		if (error)  			goto error0; @@ -3744,8 +3809,7 @@ xfs_btree_delrec(  	/* If there is a right sibling, point it to the remaining block. */  	xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);  	if (!xfs_btree_ptr_is_null(cur, &cptr)) { -		error = xfs_btree_read_buf_block(cur, &cptr, level, -							0, &rrblock, &rrbp); +		error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp);  		if (error)  			goto error0;  		xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB);  | 
