diff options
Diffstat (limited to 'fs/xfs/xfs_btree.c')
| -rw-r--r-- | fs/xfs/xfs_btree.c | 778 |
1 files changed, 598 insertions, 180 deletions
diff --git a/fs/xfs/xfs_btree.c b/fs/xfs/xfs_btree.c index 36a0992dd66..cf893bc1e37 100644 --- a/fs/xfs/xfs_btree.c +++ b/fs/xfs/xfs_btree.c @@ -17,29 +17,23 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" +#include "xfs_trans.h" #include "xfs_inode_item.h" +#include "xfs_buf_item.h" #include "xfs_btree.h" -#include "xfs_btree_trace.h" -#include "xfs_ialloc.h" #include "xfs_error.h" #include "xfs_trace.h" +#include "xfs_cksum.h" +#include "xfs_alloc.h" /* * Cursor allocation zone. @@ -49,9 +43,14 @@ kmem_zone_t *xfs_btree_cur_zone; /* * Btree magic numbers. */ -const __uint32_t xfs_magics[XFS_BTNUM_MAX] = { - XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC +static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, + XFS_FIBT_MAGIC }, + { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, + XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } }; +#define xfs_btree_magic(cur) \ + xfs_magics[!!((cur)->bc_flags & XFS_BTREE_CRC_BLOCKS)][cur->bc_btnum] STATIC int /* error (0 or EFSCORRUPTED) */ @@ -61,30 +60,38 @@ xfs_btree_check_lblock( int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer for block, if any */ { - int lblock_ok; /* block passes checks */ + int lblock_ok = 1; /* block passes checks */ struct xfs_mount *mp; /* file system mount point */ mp = cur->bc_mp; - lblock_ok = - be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + lblock_ok = lblock_ok && + uuid_equal(&block->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid) && + block->bb_u.l.bb_blkno == cpu_to_be64( + bp ? bp->b_bn : XFS_BUF_DADDR_NULL); + } + + lblock_ok = lblock_ok && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && block->bb_u.l.bb_leftsib && - (be64_to_cpu(block->bb_u.l.bb_leftsib) == NULLDFSBNO || + (block->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO) || XFS_FSB_SANITY_CHECK(mp, - be64_to_cpu(block->bb_u.l.bb_leftsib))) && + be64_to_cpu(block->bb_u.l.bb_leftsib))) && block->bb_u.l.bb_rightsib && - (be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO || + (block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO) || XFS_FSB_SANITY_CHECK(mp, - be64_to_cpu(block->bb_u.l.bb_rightsib))); + be64_to_cpu(block->bb_u.l.bb_rightsib))); + if (unlikely(XFS_TEST_ERROR(!lblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_LBLOCK, XFS_RANDOM_BTREE_CHECK_LBLOCK))) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_ERROR_REPORT("xfs_btree_check_lblock", XFS_ERRLEVEL_LOW, - mp); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } return 0; @@ -97,32 +104,42 @@ xfs_btree_check_sblock( int level, /* level of the btree block */ struct xfs_buf *bp) /* buffer containing block */ { + struct xfs_mount *mp; /* file system mount point */ struct xfs_buf *agbp; /* buffer for ag. freespace struct */ struct xfs_agf *agf; /* ag. freespace structure */ xfs_agblock_t agflen; /* native ag. freespace length */ - int sblock_ok; /* block passes checks */ + int sblock_ok = 1; /* block passes checks */ + mp = cur->bc_mp; agbp = cur->bc_private.a.agbp; agf = XFS_BUF_TO_AGF(agbp); agflen = be32_to_cpu(agf->agf_length); - sblock_ok = - be32_to_cpu(block->bb_magic) == xfs_magics[cur->bc_btnum] && + + if (xfs_sb_version_hascrc(&mp->m_sb)) { + sblock_ok = sblock_ok && + uuid_equal(&block->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid) && + block->bb_u.s.bb_blkno == cpu_to_be64( + bp ? bp->b_bn : XFS_BUF_DADDR_NULL); + } + + sblock_ok = sblock_ok && + be32_to_cpu(block->bb_magic) == xfs_btree_magic(cur) && be16_to_cpu(block->bb_level) == level && be16_to_cpu(block->bb_numrecs) <= cur->bc_ops->get_maxrecs(cur, level) && - (be32_to_cpu(block->bb_u.s.bb_leftsib) == NULLAGBLOCK || + (block->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_leftsib) < agflen) && block->bb_u.s.bb_leftsib && - (be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK || + (block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK) || be32_to_cpu(block->bb_u.s.bb_rightsib) < agflen) && block->bb_u.s.bb_rightsib; - if (unlikely(XFS_TEST_ERROR(!sblock_ok, cur->bc_mp, + + if (unlikely(XFS_TEST_ERROR(!sblock_ok, mp, XFS_ERRTAG_BTREE_CHECK_SBLOCK, XFS_RANDOM_BTREE_CHECK_SBLOCK))) { if (bp) trace_xfs_btree_corrupt(bp, _RET_IP_); - XFS_CORRUPTION_ERROR("xfs_btree_check_sblock", - XFS_ERRLEVEL_LOW, cur->bc_mp, block); + XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_LOW, mp); return XFS_ERROR(EFSCORRUPTED); } return 0; @@ -201,6 +218,70 @@ xfs_btree_check_ptr( #endif /* + * Calculate CRC on the whole btree block and stuff it into the + * long-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_lblock_calc_crc( + struct xfs_buf *bp) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return; + if (bip) + block->bb_u.l.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + xfs_buf_update_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); +} + +bool +xfs_btree_lblock_verify_crc( + struct xfs_buf *bp) +{ + if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return xfs_buf_verify_cksum(bp, XFS_BTREE_LBLOCK_CRC_OFF); + + return true; +} + +/* + * Calculate CRC on the whole btree block and stuff it into the + * short-form btree header. + * + * Prior to calculting the CRC, pull the LSN out of the buffer log item and put + * it into the buffer so recovery knows what the last modifcation was that made + * it to disk. + */ +void +xfs_btree_sblock_calc_crc( + struct xfs_buf *bp) +{ + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return; + if (bip) + block->bb_u.s.bb_lsn = cpu_to_be64(bip->bli_item.li_lsn); + xfs_buf_update_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); +} + +bool +xfs_btree_sblock_verify_crc( + struct xfs_buf *bp) +{ + if (xfs_sb_version_hascrc(&bp->b_target->bt_mount->m_sb)) + return xfs_buf_verify_cksum(bp, XFS_BTREE_SBLOCK_CRC_OFF); + + return true; +} + +/* * Delete the btree cursor. */ void @@ -222,7 +303,7 @@ xfs_btree_del_cursor( */ for (i = 0; i < cur->bc_nlevels; i++) { if (cur->bc_bufs[i]) - xfs_btree_setbuf(cur, i, NULL); + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[i]); else if (!error) break; } @@ -273,18 +354,19 @@ xfs_btree_dup_cursor( for (i = 0; i < new->bc_nlevels; i++) { new->bc_ptrs[i] = cur->bc_ptrs[i]; new->bc_ra[i] = cur->bc_ra[i]; - if ((bp = cur->bc_bufs[i])) { - if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, - XFS_BUF_ADDR(bp), mp->m_bsize, 0, &bp))) { + bp = cur->bc_bufs[i]; + if (bp) { + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, + XFS_BUF_ADDR(bp), mp->m_bsize, + 0, &bp, + cur->bc_ops->buf_ops); + if (error) { xfs_btree_del_cursor(new, error); *ncur = NULL; return error; } - new->bc_bufs[i] = bp; - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); - } else - new->bc_bufs[i] = NULL; + } + new->bc_bufs[i] = bp; } *ncur = new; return 0; @@ -325,9 +407,14 @@ xfs_btree_dup_cursor( */ static inline size_t xfs_btree_block_len(struct xfs_btree_cur *cur) { - return (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? - XFS_BTREE_LBLOCK_LEN : - XFS_BTREE_SBLOCK_LEN; + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) + return XFS_BTREE_LBLOCK_CRC_LEN; + return XFS_BTREE_LBLOCK_LEN; + } + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) + return XFS_BTREE_SBLOCK_CRC_LEN; + return XFS_BTREE_SBLOCK_LEN; } /* @@ -421,7 +508,7 @@ xfs_btree_ptr_addr( } /* - * Get a the root block which is stored in the inode. + * Get the root block which is stored in the inode. * * For now this btree implementation assumes the btree root is always * stored in the if_broot field of an inode fork. @@ -467,15 +554,11 @@ xfs_btree_get_bufl( xfs_fsblock_t fsbno, /* file system block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -490,16 +573,12 @@ xfs_btree_get_bufs( xfs_agblock_t agbno, /* allocation group block number */ uint lock) /* lock flags for get_buf */ { - xfs_buf_t *bp; /* buffer pointer (return value) */ xfs_daddr_t d; /* real disk block address */ ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - bp = xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); - return bp; + return xfs_trans_get_buf(tp, mp->m_ddev_targp, d, mp->m_bsize, lock); } /* @@ -516,9 +595,9 @@ xfs_btree_islastblock( block = xfs_btree_get_block(cur, level, &bp); xfs_btree_check_block(cur, block, level, bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return be64_to_cpu(block->bb_u.l.bb_rightsib) == NULLDFSBNO; + return block->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO); else - return be32_to_cpu(block->bb_u.s.bb_rightsib) == NULLAGBLOCK; + return block->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK); } /* @@ -619,29 +698,28 @@ xfs_btree_offsets( * Get a buffer for the block, return it read in. * Long-form addressing. */ -int /* error */ +int xfs_btree_read_bufl( - xfs_mount_t *mp, /* file system mount point */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_fsblock_t fsbno, /* file system block number */ - uint lock, /* lock flags for read_buf */ - xfs_buf_t **bpp, /* buffer for fsbno */ - int refval) /* ref count value for buffer */ -{ - xfs_buf_t *bp; /* return value */ + struct xfs_mount *mp, /* file system mount point */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t fsbno, /* file system block number */ + uint lock, /* lock flags for read_buf */ + struct xfs_buf **bpp, /* buffer for fsbno */ + int refval, /* ref count value for buffer */ + const struct xfs_buf_ops *ops) +{ + struct xfs_buf *bp; /* return value */ xfs_daddr_t d; /* real disk block address */ - int error; + int error; ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - if ((error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, - mp->m_bsize, lock, &bp))) { + error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, d, + mp->m_bsize, lock, &bp, ops); + if (error) return error; - } - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (bp != NULL) { - XFS_BUF_SET_VTYPE_REF(bp, B_FS_MAP, refval); - } + if (bp) + xfs_buf_set_ref(bp, refval); *bpp = bp; return 0; } @@ -653,15 +731,16 @@ xfs_btree_read_bufl( /* ARGSUSED */ void xfs_btree_reada_bufl( - xfs_mount_t *mp, /* file system mount point */ - xfs_fsblock_t fsbno, /* file system block number */ - xfs_extlen_t count) /* count of filesystem blocks */ + struct xfs_mount *mp, /* file system mount point */ + xfs_fsblock_t fsbno, /* file system block number */ + xfs_extlen_t count, /* count of filesystem blocks */ + const struct xfs_buf_ops *ops) { xfs_daddr_t d; ASSERT(fsbno != NULLFSBLOCK); d = XFS_FSB_TO_DADDR(mp, fsbno); - xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); } /* @@ -671,17 +750,18 @@ xfs_btree_reada_bufl( /* ARGSUSED */ void xfs_btree_reada_bufs( - xfs_mount_t *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* allocation group block number */ - xfs_extlen_t count) /* count of filesystem blocks */ + struct xfs_mount *mp, /* file system mount point */ + xfs_agnumber_t agno, /* allocation group number */ + xfs_agblock_t agbno, /* allocation group block number */ + xfs_extlen_t count, /* count of filesystem blocks */ + const struct xfs_buf_ops *ops) { xfs_daddr_t d; ASSERT(agno != NULLAGNUMBER); ASSERT(agbno != NULLAGBLOCK); d = XFS_AGB_TO_DADDR(mp, agno, agbno); - xfs_baread(mp->m_ddev_targp, d, mp->m_bsize * count); + xfs_buf_readahead(mp->m_ddev_targp, d, mp->m_bsize * count, ops); } STATIC int @@ -695,12 +775,14 @@ xfs_btree_readahead_lblock( xfs_dfsbno_t right = be64_to_cpu(block->bb_u.l.bb_rightsib); if ((lr & XFS_BTCUR_LEFTRA) && left != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, left, 1); + xfs_btree_reada_bufl(cur->bc_mp, left, 1, + cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLDFSBNO) { - xfs_btree_reada_bufl(cur->bc_mp, right, 1); + xfs_btree_reada_bufl(cur->bc_mp, right, 1, + cur->bc_ops->buf_ops); rval++; } @@ -720,13 +802,13 @@ xfs_btree_readahead_sblock( if ((lr & XFS_BTCUR_LEFTRA) && left != NULLAGBLOCK) { xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - left, 1); + left, 1, cur->bc_ops->buf_ops); rval++; } if ((lr & XFS_BTCUR_RIGHTRA) && right != NULLAGBLOCK) { xfs_btree_reada_bufs(cur->bc_mp, cur->bc_private.a.agno, - right, 1); + right, 1, cur->bc_ops->buf_ops); rval++; } @@ -764,36 +846,68 @@ xfs_btree_readahead( return xfs_btree_readahead_sblock(cur, lr, block); } +STATIC xfs_daddr_t +xfs_btree_ptr_to_daddr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr) +{ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { + ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO)); + + return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); + } else { + ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); + ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK)); + + return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, + be32_to_cpu(ptr->s)); + } +} + +/* + * Readahead @count btree blocks at the given @ptr location. + * + * We don't need to care about long or short form btrees here as we have a + * method of converting the ptr directly to a daddr available to us. + */ +STATIC void +xfs_btree_readahead_ptr( + struct xfs_btree_cur *cur, + union xfs_btree_ptr *ptr, + xfs_extlen_t count) +{ + xfs_buf_readahead(cur->bc_mp->m_ddev_targp, + xfs_btree_ptr_to_daddr(cur, ptr), + cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops); +} + /* * Set the buffer for level "lev" in the cursor to bp, releasing * any previous buffer. */ -void +STATIC void xfs_btree_setbuf( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ xfs_buf_t *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ - xfs_buf_t *obp; /* old buffer pointer */ - obp = cur->bc_bufs[lev]; - if (obp) - xfs_trans_brelse(cur->bc_tp, obp); + if (cur->bc_bufs[lev]) + xfs_trans_brelse(cur->bc_tp, cur->bc_bufs[lev]); cur->bc_bufs[lev] = bp; cur->bc_ra[lev] = 0; - if (!bp) - return; + b = XFS_BUF_TO_BLOCK(bp); if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - if (be64_to_cpu(b->bb_u.l.bb_leftsib) == NULLDFSBNO) + if (b->bb_u.l.bb_leftsib == cpu_to_be64(NULLDFSBNO)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (be64_to_cpu(b->bb_u.l.bb_rightsib) == NULLDFSBNO) + if (b->bb_u.l.bb_rightsib == cpu_to_be64(NULLDFSBNO)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } else { - if (be32_to_cpu(b->bb_u.s.bb_leftsib) == NULLAGBLOCK) + if (b->bb_u.s.bb_leftsib == cpu_to_be32(NULLAGBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_LEFTRA; - if (be32_to_cpu(b->bb_u.s.bb_rightsib) == NULLAGBLOCK) + if (b->bb_u.s.bb_rightsib == cpu_to_be32(NULLAGBLOCK)) cur->bc_ra[lev] |= XFS_BTCUR_RIGHTRA; } } @@ -804,9 +918,9 @@ xfs_btree_ptr_is_null( union xfs_btree_ptr *ptr) { if (cur->bc_flags & XFS_BTREE_LONG_PTRS) - return be64_to_cpu(ptr->l) == NULLDFSBNO; + return ptr->l == cpu_to_be64(NULLDFSBNO); else - return be32_to_cpu(ptr->s) == NULLAGBLOCK; + return ptr->s == cpu_to_be32(NULLAGBLOCK); } STATIC void @@ -867,29 +981,88 @@ xfs_btree_set_sibling( } } -STATIC void +void +xfs_btree_init_block_int( + struct xfs_mount *mp, + struct xfs_btree_block *buf, + xfs_daddr_t blkno, + __u32 magic, + __u16 level, + __u16 numrecs, + __u64 owner, + unsigned int flags) +{ + buf->bb_magic = cpu_to_be32(magic); + buf->bb_level = cpu_to_be16(level); + buf->bb_numrecs = cpu_to_be16(numrecs); + + if (flags & XFS_BTREE_LONG_PTRS) { + buf->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); + buf->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); + if (flags & XFS_BTREE_CRC_BLOCKS) { + buf->bb_u.l.bb_blkno = cpu_to_be64(blkno); + buf->bb_u.l.bb_owner = cpu_to_be64(owner); + uuid_copy(&buf->bb_u.l.bb_uuid, &mp->m_sb.sb_uuid); + buf->bb_u.l.bb_pad = 0; + buf->bb_u.l.bb_lsn = 0; + } + } else { + /* owner is a 32 bit value on short blocks */ + __u32 __owner = (__u32)owner; + + buf->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); + buf->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); + if (flags & XFS_BTREE_CRC_BLOCKS) { + buf->bb_u.s.bb_blkno = cpu_to_be64(blkno); + buf->bb_u.s.bb_owner = cpu_to_be32(__owner); + uuid_copy(&buf->bb_u.s.bb_uuid, &mp->m_sb.sb_uuid); + buf->bb_u.s.bb_lsn = 0; + } + } +} + +void xfs_btree_init_block( + struct xfs_mount *mp, + struct xfs_buf *bp, + __u32 magic, + __u16 level, + __u16 numrecs, + __u64 owner, + unsigned int flags) +{ + xfs_btree_init_block_int(mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, + magic, level, numrecs, owner, flags); +} + +STATIC void +xfs_btree_init_block_cur( struct xfs_btree_cur *cur, + struct xfs_buf *bp, int level, - int numrecs, - struct xfs_btree_block *new) /* new block */ + int numrecs) { - new->bb_magic = cpu_to_be32(xfs_magics[cur->bc_btnum]); - new->bb_level = cpu_to_be16(level); - new->bb_numrecs = cpu_to_be16(numrecs); + __u64 owner; - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - new->bb_u.l.bb_leftsib = cpu_to_be64(NULLDFSBNO); - new->bb_u.l.bb_rightsib = cpu_to_be64(NULLDFSBNO); - } else { - new->bb_u.s.bb_leftsib = cpu_to_be32(NULLAGBLOCK); - new->bb_u.s.bb_rightsib = cpu_to_be32(NULLAGBLOCK); - } + /* + * we can pull the owner from the cursor right now as the different + * owners align directly with the pointer size of the btree. This may + * change in future, but is safe for current users of the generic btree + * code. + */ + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + owner = cur->bc_private.b.ip->i_ino; + else + owner = cur->bc_private.a.agno; + + xfs_btree_init_block_int(cur->bc_mp, XFS_BUF_TO_BLOCK(bp), bp->b_bn, + xfs_btree_magic(cur), level, numrecs, + owner, cur->bc_flags); } /* * Return true if ptr is the last record in the btree and - * we need to track updateѕ to this record. The decision + * we need to track updates to this record. The decision * will be further refined in the update_lastrec method. */ STATIC int @@ -926,24 +1099,6 @@ xfs_btree_buf_to_ptr( } } -STATIC xfs_daddr_t -xfs_btree_ptr_to_daddr( - struct xfs_btree_cur *cur, - union xfs_btree_ptr *ptr) -{ - if (cur->bc_flags & XFS_BTREE_LONG_PTRS) { - ASSERT(be64_to_cpu(ptr->l) != NULLDFSBNO); - - return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l)); - } else { - ASSERT(cur->bc_private.a.agno != NULLAGNUMBER); - ASSERT(be32_to_cpu(ptr->s) != NULLAGBLOCK); - - return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno, - be32_to_cpu(ptr->s)); - } -} - STATIC void xfs_btree_set_refs( struct xfs_btree_cur *cur, @@ -952,13 +1107,14 @@ xfs_btree_set_refs( switch (cur->bc_btnum) { case XFS_BTNUM_BNO: case XFS_BTNUM_CNT: - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_ALLOC_BTREE_REF); + xfs_buf_set_ref(bp, XFS_ALLOC_BTREE_REF); break; case XFS_BTNUM_INO: - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_INOMAP, XFS_INO_BTREE_REF); + case XFS_BTNUM_FINO: + xfs_buf_set_ref(bp, XFS_INO_BTREE_REF); break; case XFS_BTNUM_BMAP: - XFS_BUF_SET_VTYPE_REF(*bpp, B_FS_MAP, XFS_BMAP_BTREE_REF); + xfs_buf_set_ref(bp, XFS_BMAP_BTREE_REF); break; default: ASSERT(0); @@ -977,15 +1133,16 @@ xfs_btree_get_buf_block( xfs_daddr_t d; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); *bpp = xfs_trans_get_buf(cur->bc_tp, mp->m_ddev_targp, d, mp->m_bsize, flags); - ASSERT(*bpp); - ASSERT(!XFS_BUF_GETERROR(*bpp)); + if (!*bpp) + return ENOMEM; + (*bpp)->b_ops = cur->bc_ops->buf_ops; *block = XFS_BUF_TO_BLOCK(*bpp); return 0; } @@ -998,7 +1155,6 @@ STATIC int xfs_btree_read_buf_block( struct xfs_btree_cur *cur, union xfs_btree_ptr *ptr, - int level, int flags, struct xfs_btree_block **block, struct xfs_buf **bpp) @@ -1008,24 +1164,18 @@ xfs_btree_read_buf_block( int error; /* need to sort out how callers deal with failures first */ - ASSERT(!(flags & XFS_BUF_TRYLOCK)); + ASSERT(!(flags & XBF_TRYLOCK)); d = xfs_btree_ptr_to_daddr(cur, ptr); error = xfs_trans_read_buf(mp, cur->bc_tp, mp->m_ddev_targp, d, - mp->m_bsize, flags, bpp); + mp->m_bsize, flags, bpp, + cur->bc_ops->buf_ops); if (error) return error; - ASSERT(*bpp != NULL); - ASSERT(!XFS_BUF_GETERROR(*bpp)); - xfs_btree_set_refs(cur, *bpp); *block = XFS_BUF_TO_BLOCK(*bpp); - - error = xfs_btree_check_block(cur, *block, level, *bpp); - if (error) - xfs_trans_brelse(cur->bc_tp, *bpp); - return error; + return 0; } /* @@ -1141,6 +1291,7 @@ xfs_btree_log_keys( XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); if (bp) { + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, xfs_btree_key_offset(cur, first), xfs_btree_key_offset(cur, last + 1) - 1); @@ -1165,6 +1316,7 @@ xfs_btree_log_recs( XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_TRACE_ARGBII(cur, bp, first, last); + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, xfs_btree_rec_offset(cur, first), xfs_btree_rec_offset(cur, last + 1) - 1); @@ -1189,6 +1341,7 @@ xfs_btree_log_ptrs( struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); int level = xfs_btree_get_level(block); + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, xfs_btree_ptr_offset(cur, first, level), xfs_btree_ptr_offset(cur, last + 1, level) - 1); @@ -1217,7 +1370,12 @@ xfs_btree_log_block( offsetof(struct xfs_btree_block, bb_numrecs), offsetof(struct xfs_btree_block, bb_u.s.bb_leftsib), offsetof(struct xfs_btree_block, bb_u.s.bb_rightsib), - XFS_BTREE_SBLOCK_LEN + offsetof(struct xfs_btree_block, bb_u.s.bb_blkno), + offsetof(struct xfs_btree_block, bb_u.s.bb_lsn), + offsetof(struct xfs_btree_block, bb_u.s.bb_uuid), + offsetof(struct xfs_btree_block, bb_u.s.bb_owner), + offsetof(struct xfs_btree_block, bb_u.s.bb_crc), + XFS_BTREE_SBLOCK_CRC_LEN }; static const short loffsets[] = { /* table of offsets (long) */ offsetof(struct xfs_btree_block, bb_magic), @@ -1225,17 +1383,40 @@ xfs_btree_log_block( offsetof(struct xfs_btree_block, bb_numrecs), offsetof(struct xfs_btree_block, bb_u.l.bb_leftsib), offsetof(struct xfs_btree_block, bb_u.l.bb_rightsib), - XFS_BTREE_LBLOCK_LEN + offsetof(struct xfs_btree_block, bb_u.l.bb_blkno), + offsetof(struct xfs_btree_block, bb_u.l.bb_lsn), + offsetof(struct xfs_btree_block, bb_u.l.bb_uuid), + offsetof(struct xfs_btree_block, bb_u.l.bb_owner), + offsetof(struct xfs_btree_block, bb_u.l.bb_crc), + offsetof(struct xfs_btree_block, bb_u.l.bb_pad), + XFS_BTREE_LBLOCK_CRC_LEN }; XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); XFS_BTREE_TRACE_ARGBI(cur, bp, fields); if (bp) { + int nbits; + + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { + /* + * We don't log the CRC when updating a btree + * block but instead recreate it during log + * recovery. As the log buffers have checksums + * of their own this is safe and avoids logging a crc + * update in a lot of places. + */ + if (fields == XFS_BB_ALL_BITS) + fields = XFS_BB_ALL_BITS_CRC; + nbits = XFS_BB_NUM_BITS_CRC; + } else { + nbits = XFS_BB_NUM_BITS; + } xfs_btree_offsets(fields, (cur->bc_flags & XFS_BTREE_LONG_PTRS) ? loffsets : soffsets, - XFS_BB_NUM_BITS, &first, &last); + nbits, &first, &last); + xfs_trans_buf_set_type(cur->bc_tp, bp, XFS_BLFT_BTREE_BUF); xfs_trans_log_buf(cur->bc_tp, bp, first, last); } else { xfs_trans_log_inode(cur->bc_tp, cur->bc_private.b.ip, @@ -1330,8 +1511,8 @@ xfs_btree_increment( union xfs_btree_ptr *ptrp; ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - error = xfs_btree_read_buf_block(cur, ptrp, --lev, - 0, &block, &bp); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; @@ -1429,8 +1610,8 @@ xfs_btree_decrement( union xfs_btree_ptr *ptrp; ptrp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[lev], block); - error = xfs_btree_read_buf_block(cur, ptrp, --lev, - 0, &block, &bp); + --lev; + error = xfs_btree_read_buf_block(cur, ptrp, 0, &block, &bp); if (error) goto error0; xfs_btree_setbuf(cur, lev, bp); @@ -1480,7 +1661,7 @@ xfs_btree_lookup_get_block( return 0; } - error = xfs_btree_read_buf_block(cur, pp, level, 0, blkp, &bp); + error = xfs_btree_read_buf_block(cur, pp, 0, blkp, &bp); if (error) return error; @@ -1512,7 +1693,7 @@ xfs_lookup_get_search_key( /* * Lookup the record. The cursor is made to point to it, based on dir. - * Return 0 if can't find any such record, 1 for success. + * stat is set to 0 if can't find any such record, 1 for success. */ int /* error */ xfs_btree_lookup( @@ -1831,7 +2012,7 @@ xfs_btree_lshift( goto out0; /* Set up the left neighbor as "left". */ - error = xfs_btree_read_buf_block(cur, &lptr, level, 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; @@ -2015,7 +2196,7 @@ xfs_btree_rshift( goto out0; /* Set up the right neighbor as "right". */ - error = xfs_btree_read_buf_block(cur, &rptr, level, 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; @@ -2143,7 +2324,7 @@ error1: * record (to be inserted into parent). */ STATIC int /* error */ -xfs_btree_split( +__xfs_btree_split( struct xfs_btree_cur *cur, int level, union xfs_btree_ptr *ptrp, @@ -2185,7 +2366,7 @@ xfs_btree_split( xfs_btree_buf_to_ptr(cur, lbp, &lptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, &lptr, &rptr, stat); if (error) goto error0; if (*stat == 0) @@ -2198,7 +2379,7 @@ xfs_btree_split( goto error0; /* Fill in the btree header for the new right block. */ - xfs_btree_init_block(cur, xfs_btree_get_level(left), 0, right); + xfs_btree_init_block_cur(cur, rbp, xfs_btree_get_level(left), 0); /* * Split the entries between the old and the new block evenly. @@ -2283,7 +2464,7 @@ xfs_btree_split( * point back to right instead of to left. */ if (!xfs_btree_ptr_is_null(cur, &rrptr)) { - error = xfs_btree_read_buf_block(cur, &rrptr, level, + error = xfs_btree_read_buf_block(cur, &rrptr, 0, &rrblock, &rrbp); if (error) goto error0; @@ -2323,6 +2504,85 @@ error0: return error; } +struct xfs_btree_split_args { + struct xfs_btree_cur *cur; + int level; + union xfs_btree_ptr *ptrp; + union xfs_btree_key *key; + struct xfs_btree_cur **curp; + int *stat; /* success/failure */ + int result; + bool kswapd; /* allocation in kswapd context */ + struct completion *done; + struct work_struct work; +}; + +/* + * Stack switching interfaces for allocation + */ +static void +xfs_btree_split_worker( + struct work_struct *work) +{ + struct xfs_btree_split_args *args = container_of(work, + struct xfs_btree_split_args, work); + unsigned long pflags; + unsigned long new_pflags = PF_FSTRANS; + + /* + * we are in a transaction context here, but may also be doing work + * in kswapd context, and hence we may need to inherit that state + * temporarily to ensure that we don't block waiting for memory reclaim + * in any way. + */ + if (args->kswapd) + new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + + current_set_flags_nested(&pflags, new_pflags); + + args->result = __xfs_btree_split(args->cur, args->level, args->ptrp, + args->key, args->curp, args->stat); + complete(args->done); + + current_restore_flags_nested(&pflags, new_pflags); +} + +/* + * BMBT split requests often come in with little stack to work on. Push + * them off to a worker thread so there is lots of stack to use. For the other + * btree types, just call directly to avoid the context switch overhead here. + */ +STATIC int /* error */ +xfs_btree_split( + struct xfs_btree_cur *cur, + int level, + union xfs_btree_ptr *ptrp, + union xfs_btree_key *key, + struct xfs_btree_cur **curp, + int *stat) /* success/failure */ +{ + struct xfs_btree_split_args args; + DECLARE_COMPLETION_ONSTACK(done); + + if (cur->bc_btnum != XFS_BTNUM_BMAP) + return __xfs_btree_split(cur, level, ptrp, key, curp, stat); + + args.cur = cur; + args.level = level; + args.ptrp = ptrp; + args.key = key; + args.curp = curp; + args.stat = stat; + args.done = &done; + args.kswapd = current_is_kswapd(); + INIT_WORK_ONSTACK(&args.work, xfs_btree_split_worker); + queue_work(xfs_alloc_wq, &args.work); + wait_for_completion(&done); + destroy_work_on_stack(&args.work); + return args.result; +} + + /* * Copy the old inode root contents into a real block and make the * broot point to it. @@ -2358,7 +2618,7 @@ xfs_btree_new_iroot( pp = xfs_btree_ptr_addr(cur, 1, block); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, pp, &nptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, pp, &nptr, stat); if (error) goto error0; if (*stat == 0) { @@ -2372,7 +2632,17 @@ xfs_btree_new_iroot( if (error) goto error0; + /* + * we can't just memcpy() the root in for CRC enabled btree blocks. + * In that case have to also ensure the blkno remains correct + */ memcpy(cblock, block, xfs_btree_block_len(cur)); + if (cur->bc_flags & XFS_BTREE_CRC_BLOCKS) { + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + cblock->bb_u.l.bb_blkno = cpu_to_be64(cbp->b_bn); + else + cblock->bb_u.s.bb_blkno = cpu_to_be64(cbp->b_bn); + } be16_add_cpu(&block->bb_level, 1); xfs_btree_set_numrecs(block, 1); @@ -2452,7 +2722,7 @@ xfs_btree_new_root( cur->bc_ops->init_ptr_from_cur(cur, &rptr); /* Allocate the new block. If we can't do it, we're toast. Give up. */ - error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, 1, stat); + error = cur->bc_ops->alloc_block(cur, &rptr, &lptr, stat); if (error) goto error0; if (*stat == 0) @@ -2487,8 +2757,7 @@ xfs_btree_new_root( lbp = bp; xfs_btree_buf_to_ptr(cur, lbp, &lptr); left = block; - error = xfs_btree_read_buf_block(cur, &rptr, - cur->bc_nlevels - 1, 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; bp = rbp; @@ -2499,15 +2768,14 @@ xfs_btree_new_root( xfs_btree_buf_to_ptr(cur, rbp, &rptr); right = block; xfs_btree_get_sibling(cur, right, &lptr, XFS_BB_LEFTSIB); - error = xfs_btree_read_buf_block(cur, &lptr, - cur->bc_nlevels - 1, 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; bp = lbp; nptr = 2; } /* Fill in the new block's btree header and log it. */ - xfs_btree_init_block(cur, cur->bc_nlevels, 2, new); + xfs_btree_init_block_cur(cur, nbp, cur->bc_nlevels, 2); xfs_btree_log_block(cur, nbp, XFS_BB_ALL_BITS); ASSERT(!xfs_btree_ptr_is_null(cur, &lptr) && !xfs_btree_ptr_is_null(cur, &rptr)); @@ -2574,7 +2842,6 @@ xfs_btree_make_block_unfull( if (numrecs < cur->bc_ops->get_dmaxrecs(cur, level)) { /* A root block that can be made bigger. */ - xfs_iroot_realloc(ip, 1, cur->bc_private.b.whichfork); } else { /* A root block that needs replacing */ @@ -3016,6 +3283,43 @@ out0: return 0; } +/* + * Kill the current root node, and replace it with it's only child node. + */ +STATIC int +xfs_btree_kill_root( + struct xfs_btree_cur *cur, + struct xfs_buf *bp, + int level, + union xfs_btree_ptr *newroot) +{ + int error; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_ENTRY); + XFS_BTREE_STATS_INC(cur, killroot); + + /* + * Update the root pointer, decreasing the level by 1 and then + * free the old root. + */ + cur->bc_ops->set_root(cur, newroot, -1); + + error = cur->bc_ops->free_block(cur, bp); + if (error) { + XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR); + return error; + } + + XFS_BTREE_STATS_INC(cur, free); + + cur->bc_bufs[level] = NULL; + cur->bc_ra[level] = 0; + cur->bc_nlevels--; + + XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); + return 0; +} + STATIC int xfs_btree_dec_cursor( struct xfs_btree_cur *cur, @@ -3200,7 +3504,7 @@ xfs_btree_delrec( * Make it the new root of the btree. */ pp = xfs_btree_ptr_addr(cur, 1, block); - error = cur->bc_ops->kill_root(cur, bp, level, pp); + error = xfs_btree_kill_root(cur, bp, level, pp); if (error) goto error0; } else if (level > 0) { @@ -3416,8 +3720,7 @@ xfs_btree_delrec( rptr = cptr; right = block; rbp = bp; - error = xfs_btree_read_buf_block(cur, &lptr, level, - 0, &left, &lbp); + error = xfs_btree_read_buf_block(cur, &lptr, 0, &left, &lbp); if (error) goto error0; @@ -3434,8 +3737,7 @@ xfs_btree_delrec( lptr = cptr; left = block; lbp = bp; - error = xfs_btree_read_buf_block(cur, &rptr, level, - 0, &right, &rbp); + error = xfs_btree_read_buf_block(cur, &rptr, 0, &right, &rbp); if (error) goto error0; @@ -3507,8 +3809,7 @@ xfs_btree_delrec( /* If there is a right sibling, point it to the remaining block. */ xfs_btree_get_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); if (!xfs_btree_ptr_is_null(cur, &cptr)) { - error = xfs_btree_read_buf_block(cur, &cptr, level, - 0, &rrblock, &rrbp); + error = xfs_btree_read_buf_block(cur, &cptr, 0, &rrblock, &rrbp); if (error) goto error0; xfs_btree_set_sibling(cur, rrblock, &lptr, XFS_BB_LEFTSIB); @@ -3649,3 +3950,120 @@ xfs_btree_get_rec( *stat = 1; return 0; } + +/* + * Change the owner of a btree. + * + * The mechanism we use here is ordered buffer logging. Because we don't know + * how many buffers were are going to need to modify, we don't really want to + * have to make transaction reservations for the worst case of every buffer in a + * full size btree as that may be more space that we can fit in the log.... + * + * We do the btree walk in the most optimal manner possible - we have sibling + * pointers so we can just walk all the blocks on each level from left to right + * in a single pass, and then move to the next level and do the same. We can + * also do readahead on the sibling pointers to get IO moving more quickly, + * though for slow disks this is unlikely to make much difference to performance + * as the amount of CPU work we have to do before moving to the next block is + * relatively small. + * + * For each btree block that we load, modify the owner appropriately, set the + * buffer as an ordered buffer and log it appropriately. We need to ensure that + * we mark the region we change dirty so that if the buffer is relogged in + * a subsequent transaction the changes we make here as an ordered buffer are + * correctly relogged in that transaction. If we are in recovery context, then + * just queue the modified buffer as delayed write buffer so the transaction + * recovery completion writes the changes to disk. + */ +static int +xfs_btree_block_change_owner( + struct xfs_btree_cur *cur, + int level, + __uint64_t new_owner, + struct list_head *buffer_list) +{ + struct xfs_btree_block *block; + struct xfs_buf *bp; + union xfs_btree_ptr rptr; + + /* do right sibling readahead */ + xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA); + + /* modify the owner */ + block = xfs_btree_get_block(cur, level, &bp); + if (cur->bc_flags & XFS_BTREE_LONG_PTRS) + block->bb_u.l.bb_owner = cpu_to_be64(new_owner); + else + block->bb_u.s.bb_owner = cpu_to_be32(new_owner); + + /* + * If the block is a root block hosted in an inode, we might not have a + * buffer pointer here and we shouldn't attempt to log the change as the + * information is already held in the inode and discarded when the root + * block is formatted into the on-disk inode fork. We still change it, + * though, so everything is consistent in memory. + */ + if (bp) { + if (cur->bc_tp) { + xfs_trans_ordered_buf(cur->bc_tp, bp); + xfs_btree_log_block(cur, bp, XFS_BB_OWNER); + } else { + xfs_buf_delwri_queue(bp, buffer_list); + } + } else { + ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE); + ASSERT(level == cur->bc_nlevels - 1); + } + + /* now read rh sibling block for next iteration */ + xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB); + if (xfs_btree_ptr_is_null(cur, &rptr)) + return ENOENT; + + return xfs_btree_lookup_get_block(cur, level, &rptr, &block); +} + +int +xfs_btree_change_owner( + struct xfs_btree_cur *cur, + __uint64_t new_owner, + struct list_head *buffer_list) +{ + union xfs_btree_ptr lptr; + int level; + struct xfs_btree_block *block = NULL; + int error = 0; + + cur->bc_ops->init_ptr_from_cur(cur, &lptr); + + /* for each level */ + for (level = cur->bc_nlevels - 1; level >= 0; level--) { + /* grab the left hand block */ + error = xfs_btree_lookup_get_block(cur, level, &lptr, &block); + if (error) + return error; + + /* readahead the left most block for the next level down */ + if (level > 0) { + union xfs_btree_ptr *ptr; + + ptr = xfs_btree_ptr_addr(cur, 1, block); + xfs_btree_readahead_ptr(cur, ptr, 1); + + /* save for the next iteration of the loop */ + lptr = *ptr; + } + + /* for each buffer in the level */ + do { + error = xfs_btree_block_change_owner(cur, level, + new_owner, + buffer_list); + } while (!error); + + if (error != ENOENT) + return error; + } + + return 0; +} |
