diff options
Diffstat (limited to 'fs/xfs/xfs_alloc.c')
| -rw-r--r-- | fs/xfs/xfs_alloc.c | 1690 |
1 files changed, 848 insertions, 842 deletions
diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index bdbfbbee495..d43813267a8 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -17,106 +17,153 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" -#include "xfs_dir2.h" -#include "xfs_dmapi.h" #include "xfs_mount.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" -#include "xfs_dir2_sf.h" -#include "xfs_attr_sf.h" -#include "xfs_dinode.h" #include "xfs_inode.h" #include "xfs_btree.h" -#include "xfs_ialloc.h" +#include "xfs_alloc_btree.h" #include "xfs_alloc.h" +#include "xfs_extent_busy.h" #include "xfs_error.h" +#include "xfs_cksum.h" +#include "xfs_trace.h" +#include "xfs_trans.h" +#include "xfs_buf_item.h" +#include "xfs_log.h" +struct workqueue_struct *xfs_alloc_wq; #define XFS_ABSDIFF(a,b) (((a) <= (b)) ? ((b) - (a)) : ((a) - (b))) #define XFSA_FIXUP_BNO_OK 1 #define XFSA_FIXUP_CNT_OK 2 -STATIC int -xfs_alloc_search_busy(xfs_trans_t *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len); - -#if defined(XFS_ALLOC_TRACE) -ktrace_t *xfs_alloc_trace_buf; - -#define TRACE_ALLOC(s,a) \ - xfs_alloc_trace_alloc(__FUNCTION__, s, a, __LINE__) -#define TRACE_FREE(s,a,b,x,f) \ - xfs_alloc_trace_free(__FUNCTION__, s, mp, a, b, x, f, __LINE__) -#define TRACE_MODAGF(s,a,f) \ - xfs_alloc_trace_modagf(__FUNCTION__, s, mp, a, f, __LINE__) -#define TRACE_BUSY(__FUNCTION__,s,ag,agb,l,sl,tp) \ - xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSY, __LINE__) -#define TRACE_UNBUSY(__FUNCTION__,s,ag,sl,tp) \ - xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, -1, -1, sl, tp, XFS_ALLOC_KTRACE_UNBUSY, __LINE__) -#define TRACE_BUSYSEARCH(__FUNCTION__,s,ag,agb,l,sl,tp) \ - xfs_alloc_trace_busy(__FUNCTION__, s, mp, ag, agb, l, sl, tp, XFS_ALLOC_KTRACE_BUSYSEARCH, __LINE__) -#else -#define TRACE_ALLOC(s,a) -#define TRACE_FREE(s,a,b,x,f) -#define TRACE_MODAGF(s,a,f) -#define TRACE_BUSY(s,a,ag,agb,l,sl,tp) -#define TRACE_UNBUSY(fname,s,ag,sl,tp) -#define TRACE_BUSYSEARCH(fname,s,ag,agb,l,sl,tp) -#endif /* XFS_ALLOC_TRACE */ - -/* - * Prototypes for per-ag allocation routines - */ - STATIC int xfs_alloc_ag_vextent_exact(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, - xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); + xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); + +/* + * Lookup the record equal to [bno, len] in the btree given by cur. + */ +STATIC int /* error */ +xfs_alloc_lookup_eq( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat); +} + +/* + * Lookup the first record greater than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_ge( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat); +} + +/* + * Lookup the first record less than or equal to [bno, len] + * in the btree given by cur. + */ +int /* error */ +xfs_alloc_lookup_le( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len, /* length of extent */ + int *stat) /* success/failure */ +{ + cur->bc_rec.a.ar_startblock = bno; + cur->bc_rec.a.ar_blockcount = len; + return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat); +} /* - * Internal functions. + * Update the record referred to by cur to the value given + * by [bno, len]. + * This either works (return 0) or gets an EFSCORRUPTED error. */ +STATIC int /* error */ +xfs_alloc_update( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t bno, /* starting block of extent */ + xfs_extlen_t len) /* length of extent */ +{ + union xfs_btree_rec rec; + + rec.alloc.ar_startblock = cpu_to_be32(bno); + rec.alloc.ar_blockcount = cpu_to_be32(len); + return xfs_btree_update(cur, &rec); +} + +/* + * Get the data from the pointed-to record. + */ +int /* error */ +xfs_alloc_get_rec( + struct xfs_btree_cur *cur, /* btree cursor */ + xfs_agblock_t *bno, /* output: starting block of extent */ + xfs_extlen_t *len, /* output: length of extent */ + int *stat) /* output: success/failure */ +{ + union xfs_btree_rec *rec; + int error; + + error = xfs_btree_get_rec(cur, &rec, stat); + if (!error && *stat == 1) { + *bno = be32_to_cpu(rec->alloc.ar_startblock); + *len = be32_to_cpu(rec->alloc.ar_blockcount); + } + return error; +} /* * Compute aligned version of the found extent. * Takes alignment and min length into account. */ -STATIC int /* success (>= minlen) */ +STATIC void xfs_alloc_compute_aligned( + xfs_alloc_arg_t *args, /* allocation argument structure */ xfs_agblock_t foundbno, /* starting block in found extent */ xfs_extlen_t foundlen, /* length in found extent */ - xfs_extlen_t alignment, /* alignment for allocation */ - xfs_extlen_t minlen, /* minimum length for allocation */ xfs_agblock_t *resbno, /* result block number */ xfs_extlen_t *reslen) /* result length */ { xfs_agblock_t bno; - xfs_extlen_t diff; xfs_extlen_t len; - if (alignment > 1 && foundlen >= minlen) { - bno = roundup(foundbno, alignment); - diff = bno - foundbno; - len = diff >= foundlen ? 0 : foundlen - diff; + /* Trim busy sections out of found extent */ + xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); + + if (args->alignment > 1 && len >= args->minlen) { + xfs_agblock_t aligned_bno = roundup(bno, args->alignment); + xfs_extlen_t diff = aligned_bno - bno; + + *resbno = aligned_bno; + *reslen = diff >= len ? 0 : len - diff; } else { - bno = foundbno; - len = foundlen; + *resbno = bno; + *reslen = len; } - *resbno = bno; - *reslen = len; - return len >= minlen; } /* @@ -128,6 +175,7 @@ xfs_alloc_compute_diff( xfs_agblock_t wantbno, /* target starting block */ xfs_extlen_t wantlen, /* target length */ xfs_extlen_t alignment, /* target alignment */ + char userdata, /* are we allocating data? */ xfs_agblock_t freebno, /* freespace's starting block */ xfs_extlen_t freelen, /* freespace's length */ xfs_agblock_t *newbnop) /* result: best start block from free */ @@ -142,7 +190,14 @@ xfs_alloc_compute_diff( ASSERT(freelen >= wantlen); freeend = freebno + freelen; wantend = wantbno + wantlen; - if (freebno >= wantbno) { + /* + * We want to allocate from the start of a free extent if it is past + * the desired block or if we are allocating user data and the free + * extent is before desired block. The second case is there to allow + * for contiguous allocation from the remaining free space if the file + * grows in the short term. + */ + if (freebno >= wantbno || (userdata && freeend < wantend)) { if ((newbno1 = roundup(freebno, alignment)) >= freeend) newbno1 = NULLAGBLOCK; } else if (freeend >= wantend && alignment > 1) { @@ -202,16 +257,14 @@ xfs_alloc_fix_len( k = rlen % args->prod; if (k == args->mod) return; - if (k > args->mod) { - if ((int)(rlen = rlen - k - args->mod) < (int)args->minlen) - return; - } else { - if ((int)(rlen = rlen - args->prod - (args->mod - k)) < - (int)args->minlen) - return; - } - ASSERT(rlen >= args->minlen); - ASSERT(rlen <= args->maxlen); + if (k > args->mod) + rlen = rlen - (k - args->mod); + else + rlen = rlen - args->prod + (args->mod - k); + if ((int)rlen < (int)args->minlen) + return; + ASSERT(rlen >= args->minlen && rlen <= args->maxlen); + ASSERT(rlen % args->prod == args->mod); args->len = rlen; } @@ -230,7 +283,6 @@ xfs_alloc_fix_minleft( return 1; agf = XFS_BUF_TO_AGF(args->agbp); diff = be32_to_cpu(agf->agf_freeblks) - + be32_to_cpu(agf->agf_flcount) - args->len - args->minleft; if (diff >= 0) return 1; @@ -295,21 +347,20 @@ xfs_alloc_fixup_trees( return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } + #ifdef DEBUG - { - xfs_alloc_block_t *bnoblock; - xfs_alloc_block_t *cntblock; - - if (bno_cur->bc_nlevels == 1 && - cnt_cur->bc_nlevels == 1) { - bnoblock = XFS_BUF_TO_ALLOC_BLOCK(bno_cur->bc_bufs[0]); - cntblock = XFS_BUF_TO_ALLOC_BLOCK(cnt_cur->bc_bufs[0]); - XFS_WANT_CORRUPTED_RETURN( - be16_to_cpu(bnoblock->bb_numrecs) == - be16_to_cpu(cntblock->bb_numrecs)); - } + if (bno_cur->bc_nlevels == 1 && cnt_cur->bc_nlevels == 1) { + struct xfs_btree_block *bnoblock; + struct xfs_btree_block *cntblock; + + bnoblock = XFS_BUF_TO_BLOCK(bno_cur->bc_bufs[0]); + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); + + XFS_WANT_CORRUPTED_RETURN( + bnoblock->bb_numrecs == cntblock->bb_numrecs); } #endif + /* * Deal with all four cases: the allocated record is contained * within the freespace record, so we can have new freespace @@ -334,7 +385,7 @@ xfs_alloc_fixup_trees( /* * Delete the entry from the by-size btree. */ - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); /* @@ -344,7 +395,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -352,7 +403,7 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } @@ -363,7 +414,7 @@ xfs_alloc_fixup_trees( /* * No remaining freespace, just delete the by-block tree entry. */ - if ((error = xfs_alloc_delete(bno_cur, &i))) + if ((error = xfs_btree_delete(bno_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } else { @@ -380,13 +431,94 @@ xfs_alloc_fixup_trees( if ((error = xfs_alloc_lookup_eq(bno_cur, nfbno2, nflen2, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 0); - if ((error = xfs_alloc_insert(bno_cur, &i))) + if ((error = xfs_btree_insert(bno_cur, &i))) return error; XFS_WANT_CORRUPTED_RETURN(i == 1); } return 0; } +static bool +xfs_agfl_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); + int i; + + if (!uuid_equal(&agfl->agfl_uuid, &mp->m_sb.sb_uuid)) + return false; + if (be32_to_cpu(agfl->agfl_magicnum) != XFS_AGFL_MAGIC) + return false; + /* + * during growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag && be32_to_cpu(agfl->agfl_seqno) != bp->b_pag->pag_agno) + return false; + + for (i = 0; i < XFS_AGFL_SIZE(mp); i++) { + if (be32_to_cpu(agfl->agfl_bno[i]) != NULLAGBLOCK && + be32_to_cpu(agfl->agfl_bno[i]) >= mp->m_sb.sb_agblocks) + return false; + } + return true; +} + +static void +xfs_agfl_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + /* + * There is no verification of non-crc AGFLs because mkfs does not + * initialise the AGFL to zero or NULL. Hence the only valid part of the + * AGFL is what the AGF says is active. We can't get to the AGF, so we + * can't verify just those entries are valid. + */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (!xfs_buf_verify_cksum(bp, XFS_AGFL_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (!xfs_agfl_verify(bp)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_agfl_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + + /* no verification of non-crc AGFLs */ + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (!xfs_agfl_verify(bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (bip) + XFS_BUF_TO_AGFL(bp)->agfl_lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_AGFL_CRC_OFF); +} + +const struct xfs_buf_ops xfs_agfl_buf_ops = { + .verify_read = xfs_agfl_read_verify, + .verify_write = xfs_agfl_write_verify, +}; + /* * Read in the allocation group free block array. */ @@ -404,133 +536,34 @@ xfs_alloc_read_agfl( error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), 0, &bp); + XFS_FSS_TO_BB(mp, 1), 0, &bp, &xfs_agfl_buf_ops); if (error) return error; - ASSERT(bp); - ASSERT(!XFS_BUF_GETERROR(bp)); - XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGFL, XFS_AGFL_REF); + xfs_buf_set_ref(bp, XFS_AGFL_REF); *bpp = bp; return 0; } -#if defined(XFS_ALLOC_TRACE) -/* - * Add an allocation trace entry for an alloc call. - */ -STATIC void -xfs_alloc_trace_alloc( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_alloc_arg_t *args, /* allocation argument structure */ - int line) /* source line number */ +STATIC int +xfs_alloc_update_counters( + struct xfs_trans *tp, + struct xfs_perag *pag, + struct xfs_buf *agbp, + long len) { - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(XFS_ALLOC_KTRACE_ALLOC | (line << 16)), - (void *)name, - (void *)str, - (void *)args->mp, - (void *)(__psunsigned_t)args->agno, - (void *)(__psunsigned_t)args->agbno, - (void *)(__psunsigned_t)args->minlen, - (void *)(__psunsigned_t)args->maxlen, - (void *)(__psunsigned_t)args->mod, - (void *)(__psunsigned_t)args->prod, - (void *)(__psunsigned_t)args->minleft, - (void *)(__psunsigned_t)args->total, - (void *)(__psunsigned_t)args->alignment, - (void *)(__psunsigned_t)args->len, - (void *)((((__psint_t)args->type) << 16) | - (__psint_t)args->otype), - (void *)(__psint_t)((args->wasdel << 3) | - (args->wasfromfl << 2) | - (args->isfl << 1) | - (args->userdata << 0))); -} + struct xfs_agf *agf = XFS_BUF_TO_AGF(agbp); -/* - * Add an allocation trace entry for a free call. - */ -STATIC void -xfs_alloc_trace_free( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_mount_t *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* a.g. relative block number */ - xfs_extlen_t len, /* length of extent */ - int isfl, /* set if is freelist allocation/free */ - int line) /* source line number */ -{ - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(XFS_ALLOC_KTRACE_FREE | (line << 16)), - (void *)name, - (void *)str, - (void *)mp, - (void *)(__psunsigned_t)agno, - (void *)(__psunsigned_t)agbno, - (void *)(__psunsigned_t)len, - (void *)(__psint_t)isfl, - NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL); -} + pag->pagf_freeblks += len; + be32_add_cpu(&agf->agf_freeblks, len); -/* - * Add an allocation trace entry for modifying an agf. - */ -STATIC void -xfs_alloc_trace_modagf( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_mount_t *mp, /* file system mount point */ - xfs_agf_t *agf, /* new agf value */ - int flags, /* logging flags for agf */ - int line) /* source line number */ -{ - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(XFS_ALLOC_KTRACE_MODAGF | (line << 16)), - (void *)name, - (void *)str, - (void *)mp, - (void *)(__psint_t)flags, - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_seqno), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_length), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_BNO]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_roots[XFS_BTNUM_CNT]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNO]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flfirst), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_fllast), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_flcount), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_freeblks), - (void *)(__psunsigned_t)be32_to_cpu(agf->agf_longest)); -} + xfs_trans_agblocks_delta(tp, len); + if (unlikely(be32_to_cpu(agf->agf_freeblks) > + be32_to_cpu(agf->agf_length))) + return EFSCORRUPTED; -STATIC void -xfs_alloc_trace_busy( - const char *name, /* function tag string */ - char *str, /* additional string */ - xfs_mount_t *mp, /* file system mount point */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t agbno, /* a.g. relative block number */ - xfs_extlen_t len, /* length of extent */ - int slot, /* perag Busy slot */ - xfs_trans_t *tp, - int trtype, /* type: add, delete, search */ - int line) /* source line number */ -{ - ktrace_enter(xfs_alloc_trace_buf, - (void *)(__psint_t)(trtype | (line << 16)), - (void *)name, - (void *)str, - (void *)mp, - (void *)(__psunsigned_t)agno, - (void *)(__psunsigned_t)agbno, - (void *)(__psunsigned_t)len, - (void *)(__psint_t)slot, - (void *)tp, - NULL, NULL, NULL, NULL, NULL, NULL, NULL); + xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); + return 0; } -#endif /* XFS_ALLOC_TRACE */ /* * Allocation group level functions. @@ -573,46 +606,36 @@ xfs_alloc_ag_vextent( ASSERT(0); /* NOTREACHED */ } - if (error) + + if (error || args->agbno == NULLAGBLOCK) return error; - /* - * If the allocation worked, need to change the agf structure - * (and log it), and the superblock. - */ - if (args->agbno != NULLAGBLOCK) { - xfs_agf_t *agf; /* allocation group freelist header */ -#ifdef XFS_ALLOC_TRACE - xfs_mount_t *mp = args->mp; -#endif - long slen = (long)args->len; - ASSERT(args->len >= args->minlen && args->len <= args->maxlen); - ASSERT(!(args->wasfromfl) || !args->isfl); - ASSERT(args->agbno % args->alignment == 0); - if (!(args->wasfromfl)) { - - agf = XFS_BUF_TO_AGF(args->agbp); - be32_add_cpu(&agf->agf_freeblks, -(args->len)); - xfs_trans_agblocks_delta(args->tp, - -((long)(args->len))); - args->pag->pagf_freeblks -= args->len; - ASSERT(be32_to_cpu(agf->agf_freeblks) <= - be32_to_cpu(agf->agf_length)); - TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); - xfs_alloc_log_agf(args->tp, args->agbp, - XFS_AGF_FREEBLKS); - /* search the busylist for these blocks */ - xfs_alloc_search_busy(args->tp, args->agno, - args->agbno, args->len); - } - if (!args->isfl) - xfs_trans_mod_sb(args->tp, - args->wasdel ? XFS_TRANS_SB_RES_FDBLOCKS : - XFS_TRANS_SB_FDBLOCKS, -slen); - XFS_STATS_INC(xs_allocx); - XFS_STATS_ADD(xs_allocb, args->len); + ASSERT(args->len >= args->minlen); + ASSERT(args->len <= args->maxlen); + ASSERT(!args->wasfromfl || !args->isfl); + ASSERT(args->agbno % args->alignment == 0); + + if (!args->wasfromfl) { + error = xfs_alloc_update_counters(args->tp, args->pag, + args->agbp, + -((long)(args->len))); + if (error) + return error; + + ASSERT(!xfs_extent_busy_search(args->mp, args->agno, + args->agbno, args->len)); } - return 0; + + if (!args->isfl) { + xfs_trans_mod_sb(args->tp, args->wasdel ? + XFS_TRANS_SB_RES_FDBLOCKS : + XFS_TRANS_SB_FDBLOCKS, + -((long)(args->len))); + } + + XFS_STATS_INC(xs_allocx); + XFS_STATS_ADD(xs_allocb, args->len); + return error; } /* @@ -627,94 +650,194 @@ xfs_alloc_ag_vextent_exact( { xfs_btree_cur_t *bno_cur;/* by block-number btree cursor */ xfs_btree_cur_t *cnt_cur;/* by count btree cursor */ - xfs_agblock_t end; /* end of allocated extent */ int error; xfs_agblock_t fbno; /* start block of found extent */ - xfs_agblock_t fend; /* end block of found extent */ xfs_extlen_t flen; /* length of found extent */ + xfs_agblock_t tbno; /* start block of trimmed extent */ + xfs_extlen_t tlen; /* length of trimmed extent */ + xfs_agblock_t tend; /* end block of trimmed extent */ int i; /* success/failure of operation */ - xfs_agblock_t maxend; /* end of maximal extent */ - xfs_agblock_t minend; /* end of minimal extent */ - xfs_extlen_t rlen; /* length of returned extent */ ASSERT(args->alignment == 1); + /* * Allocate/initialize a cursor for the by-number freespace btree. */ - bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); + /* * Lookup bno and minlen in the btree (minlen is irrelevant, really). * Look for the closest free block <= bno, it must contain bno * if any free block does. */ - if ((error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i))) + error = xfs_alloc_lookup_le(bno_cur, args->agbno, args->minlen, &i); + if (error) goto error0; - if (!i) { - /* - * Didn't find it, return null. - */ - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - args->agbno = NULLAGBLOCK; - return 0; - } + if (!i) + goto not_found; + /* * Grab the freespace record. */ - if ((error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i))) + error = xfs_alloc_get_rec(bno_cur, &fbno, &flen, &i); + if (error) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); ASSERT(fbno <= args->agbno); - minend = args->agbno + args->minlen; - maxend = args->agbno + args->maxlen; - fend = fbno + flen; + /* - * Give up if the freespace isn't long enough for the minimum request. + * Check for overlapping busy extents. */ - if (fend < minend) { - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - args->agbno = NULLAGBLOCK; - return 0; - } + xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); + /* - * End of extent will be smaller of the freespace end and the - * maximal requested end. + * Give up if the start of the extent is busy, or the freespace isn't + * long enough for the minimum request. */ - end = XFS_AGBLOCK_MIN(fend, maxend); + if (tbno > args->agbno) + goto not_found; + if (tlen < args->minlen) + goto not_found; + tend = tbno + tlen; + if (tend < args->agbno + args->minlen) + goto not_found; + /* + * End of extent will be smaller of the freespace end and the + * maximal requested end. + * * Fix the length according to mod and prod if given. */ - args->len = end - args->agbno; + args->len = XFS_AGBLOCK_MIN(tend, args->agbno + args->maxlen) + - args->agbno; xfs_alloc_fix_len(args); - if (!xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); - return 0; - } - rlen = args->len; - ASSERT(args->agbno + rlen <= fend); - end = args->agbno + rlen; + if (!xfs_alloc_fix_minleft(args)) + goto not_found; + + ASSERT(args->agbno + args->len <= tend); + /* - * We are allocating agbno for rlen [agbno .. end] + * We are allocating agbno for args->len * Allocate/initialize a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); ASSERT(args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); - if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, - args->agbno, args->len, XFSA_FIXUP_BNO_OK))) { + error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, + args->len, XFSA_FIXUP_BNO_OK); + if (error) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); goto error0; } + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("normal", args); + args->wasfromfl = 0; + trace_xfs_alloc_exact_done(args); + return 0; + +not_found: + /* Didn't find it, return null. */ + xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR); + args->agbno = NULLAGBLOCK; + trace_xfs_alloc_exact_notfound(args); return 0; error0: xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); - TRACE_ALLOC("error", args); + trace_xfs_alloc_exact_error(args); + return error; +} + +/* + * Search the btree in a given direction via the search cursor and compare + * the records found against the good extent we've already found. + */ +STATIC int +xfs_alloc_find_best_extent( + struct xfs_alloc_arg *args, /* allocation argument structure */ + struct xfs_btree_cur **gcur, /* good cursor */ + struct xfs_btree_cur **scur, /* searching cursor */ + xfs_agblock_t gdiff, /* difference for search comparison */ + xfs_agblock_t *sbno, /* extent found by search */ + xfs_extlen_t *slen, /* extent length */ + xfs_agblock_t *sbnoa, /* aligned extent found by search */ + xfs_extlen_t *slena, /* aligned extent length */ + int dir) /* 0 = search right, 1 = search left */ +{ + xfs_agblock_t new; + xfs_agblock_t sdiff; + int error; + int i; + + /* The good extent is perfect, no need to search. */ + if (!gdiff) + goto out_use_good; + + /* + * Look until we find a better one, run out of space or run off the end. + */ + do { + error = xfs_alloc_get_rec(*scur, sbno, slen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + xfs_alloc_compute_aligned(args, *sbno, *slen, sbnoa, slena); + + /* + * The good extent is closer than this one. + */ + if (!dir) { + if (*sbnoa >= args->agbno + gdiff) + goto out_use_good; + } else { + if (*sbnoa <= args->agbno - gdiff) + goto out_use_good; + } + + /* + * Same distance, compare length and pick the best. + */ + if (*slena >= args->minlen) { + args->len = XFS_EXTLEN_MIN(*slena, args->maxlen); + xfs_alloc_fix_len(args); + + sdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, + args->userdata, *sbnoa, + *slena, &new); + + /* + * Choose closer size and invalidate other cursor. + */ + if (sdiff < gdiff) + goto out_use_search; + goto out_use_good; + } + + if (!dir) + error = xfs_btree_increment(*scur, 0, &i); + else + error = xfs_btree_decrement(*scur, 0, &i); + if (error) + goto error0; + } while (i); + +out_use_good: + xfs_btree_del_cursor(*scur, XFS_BTREE_NOERROR); + *scur = NULL; + return 0; + +out_use_search: + xfs_btree_del_cursor(*gcur, XFS_BTREE_NOERROR); + *gcur = NULL; + return 0; + +error0: + /* caller invalidates cursors */ return error; } @@ -743,27 +866,33 @@ xfs_alloc_ag_vextent_near( xfs_agblock_t ltbno; /* start bno of left side entry */ xfs_agblock_t ltbnoa; /* aligned ... */ xfs_extlen_t ltdiff; /* difference to left side entry */ - /*REFERENCED*/ - xfs_agblock_t ltend; /* end bno of left side entry */ xfs_extlen_t ltlen; /* length of left side entry */ xfs_extlen_t ltlena; /* aligned ... */ xfs_agblock_t ltnew; /* useful start bno of left side */ xfs_extlen_t rlen; /* length of returned extent */ -#if defined(DEBUG) && defined(__KERNEL__) + int forced = 0; +#ifdef DEBUG /* * Randomly don't execute the first algorithm. */ int dofirst; /* set to do first algorithm */ - dofirst = random32() & 1; + dofirst = prandom_u32() & 1; #endif + +restart: + bno_cur_lt = NULL; + bno_cur_gt = NULL; + ltlen = 0; + gtlena = 0; + ltlena = 0; + /* * Get a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); - ltlen = 0; - bno_cur_lt = bno_cur_gt = NULL; + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); + /* * See if there are any free extents as big as maxlen. */ @@ -779,11 +908,13 @@ xfs_alloc_ag_vextent_near( goto error0; if (i == 0 || ltlen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_near_noentry(args); return 0; } ASSERT(i == 1); } args->wasfromfl = 0; + /* * First algorithm. * If the requested extent is large wrt the freespaces available @@ -800,8 +931,8 @@ xfs_alloc_ag_vextent_near( xfs_extlen_t blen=0; xfs_agblock_t bnew=0; -#if defined(DEBUG) && defined(__KERNEL__) - if (!dofirst) +#ifdef DEBUG + if (dofirst) break; #endif /* @@ -819,7 +950,7 @@ xfs_alloc_ag_vextent_near( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (ltlen >= args->minlen) break; - if ((error = xfs_alloc_increment(cnt_cur, 0, &i))) + if ((error = xfs_btree_increment(cnt_cur, 0, &i))) goto error0; } while (i); ASSERT(ltlen >= args->minlen); @@ -829,7 +960,7 @@ xfs_alloc_ag_vextent_near( i = cnt_cur->bc_ptrs[0]; for (j = 1, blen = 0, bdiff = 0; !error && j && (blen < args->maxlen || bdiff > 0); - error = xfs_alloc_increment(cnt_cur, 0, &j)) { + error = xfs_btree_increment(cnt_cur, 0, &j)) { /* * For each entry, decide if it's better than * the previous best entry. @@ -837,9 +968,9 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (!xfs_alloc_compute_aligned(ltbno, ltlen, - args->alignment, args->minlen, - <bnoa, <lena)) + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); + if (ltlena < args->minlen) continue; args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); @@ -847,7 +978,8 @@ xfs_alloc_ag_vextent_near( if (args->len < blen) continue; ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, - args->alignment, ltbno, ltlen, <new); + args->alignment, args->userdata, ltbnoa, + ltlena, <new); if (ltnew != NULLAGBLOCK && (args->len > blen || ltdiff < bdiff)) { bdiff = ltdiff; @@ -869,12 +1001,11 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(cnt_cur, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - ltend = ltbno + ltlen; - ASSERT(ltend <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); + ASSERT(ltbno + ltlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->len = blen; if (!xfs_alloc_fix_minleft(args)) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("nominleft", args); + trace_xfs_alloc_near_nominleft(args); return 0; } blen = args->len; @@ -883,12 +1014,12 @@ xfs_alloc_ag_vextent_near( */ args->agbno = bnew; ASSERT(bnew >= ltbno); - ASSERT(bnew + blen <= ltend); + ASSERT(bnew + blen <= ltbno + ltlen); /* * Set up a cursor for the by-bno tree. */ - bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, - args->agbp, args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, + args->agbp, args->agno, XFS_BTNUM_BNO); /* * Fix up the btree entries. */ @@ -897,7 +1028,8 @@ xfs_alloc_ag_vextent_near( goto error0; xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); - TRACE_ALLOC("first", args); + + trace_xfs_alloc_near_first(args); return 0; } /* @@ -915,8 +1047,8 @@ xfs_alloc_ag_vextent_near( /* * Allocate and initialize the cursor for the leftward search. */ - bno_cur_lt = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur_lt = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); /* * Lookup <= bno to find the leftward search's starting point. */ @@ -939,7 +1071,7 @@ xfs_alloc_ag_vextent_near( * Increment the cursor, so we will point at the entry just right * of the leftward entry if any, or to the leftmost entry. */ - if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) goto error0; if (!i) { /* @@ -958,11 +1090,11 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(bno_cur_lt, <bno, <len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (xfs_alloc_compute_aligned(ltbno, ltlen, - args->alignment, args->minlen, - <bnoa, <lena)) + xfs_alloc_compute_aligned(args, ltbno, ltlen, + <bnoa, <lena); + if (ltlena >= args->minlen) break; - if ((error = xfs_alloc_decrement(bno_cur_lt, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur_lt, 0, &i))) goto error0; if (!i) { xfs_btree_del_cursor(bno_cur_lt, @@ -974,11 +1106,11 @@ xfs_alloc_ag_vextent_near( if ((error = xfs_alloc_get_rec(bno_cur_gt, >bno, >len, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if (xfs_alloc_compute_aligned(gtbno, gtlen, - args->alignment, args->minlen, - >bnoa, >lena)) + xfs_alloc_compute_aligned(args, gtbno, gtlen, + >bnoa, >lena); + if (gtlena >= args->minlen) break; - if ((error = xfs_alloc_increment(bno_cur_gt, 0, &i))) + if ((error = xfs_btree_increment(bno_cur_gt, 0, &i))) goto error0; if (!i) { xfs_btree_del_cursor(bno_cur_gt, @@ -987,211 +1119,65 @@ xfs_alloc_ag_vextent_near( } } } while (bno_cur_lt || bno_cur_gt); + /* * Got both cursors still active, need to find better entry. */ if (bno_cur_lt && bno_cur_gt) { - /* - * Left side is long enough, look for a right side entry. - */ if (ltlena >= args->minlen) { /* - * Fix up the length. + * Left side is good, look for a right side entry. */ args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); - rlen = args->len; - ltdiff = xfs_alloc_compute_diff(args->agbno, rlen, - args->alignment, ltbno, ltlen, <new); - /* - * Not perfect. - */ - if (ltdiff) { - /* - * Look until we find a better one, run out of - * space, or run off the end. - */ - while (bno_cur_lt && bno_cur_gt) { - if ((error = xfs_alloc_get_rec( - bno_cur_gt, >bno, - >len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(gtbno, gtlen, - args->alignment, args->minlen, - >bnoa, >lena); - /* - * The left one is clearly better. - */ - if (gtbnoa >= args->agbno + ltdiff) { - xfs_btree_del_cursor( - bno_cur_gt, - XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - break; - } - /* - * If we reach a big enough entry, - * compare the two and pick the best. - */ - if (gtlena >= args->minlen) { - args->len = - XFS_EXTLEN_MIN(gtlena, - args->maxlen); - xfs_alloc_fix_len(args); - rlen = args->len; - gtdiff = xfs_alloc_compute_diff( - args->agbno, rlen, - args->alignment, - gtbno, gtlen, >new); - /* - * Right side is better. - */ - if (gtdiff < ltdiff) { - xfs_btree_del_cursor( - bno_cur_lt, - XFS_BTREE_NOERROR); - bno_cur_lt = NULL; - } - /* - * Left side is better. - */ - else { - xfs_btree_del_cursor( - bno_cur_gt, - XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - } - break; - } - /* - * Fell off the right end. - */ - if ((error = xfs_alloc_increment( - bno_cur_gt, 0, &i))) - goto error0; - if (!i) { - xfs_btree_del_cursor( - bno_cur_gt, - XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - break; - } - } - } - /* - * The left side is perfect, trash the right side. - */ - else { - xfs_btree_del_cursor(bno_cur_gt, - XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - } - } - /* - * It's the right side that was found first, look left. - */ - else { + ltdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, args->userdata, ltbnoa, + ltlena, <new); + + error = xfs_alloc_find_best_extent(args, + &bno_cur_lt, &bno_cur_gt, + ltdiff, >bno, >len, + >bnoa, >lena, + 0 /* search right */); + } else { + ASSERT(gtlena >= args->minlen); + /* - * Fix up the length. + * Right side is good, look for a left side entry. */ args->len = XFS_EXTLEN_MIN(gtlena, args->maxlen); xfs_alloc_fix_len(args); - rlen = args->len; - gtdiff = xfs_alloc_compute_diff(args->agbno, rlen, - args->alignment, gtbno, gtlen, >new); - /* - * Right side entry isn't perfect. - */ - if (gtdiff) { - /* - * Look until we find a better one, run out of - * space, or run off the end. - */ - while (bno_cur_lt && bno_cur_gt) { - if ((error = xfs_alloc_get_rec( - bno_cur_lt, <bno, - <len, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - xfs_alloc_compute_aligned(ltbno, ltlen, - args->alignment, args->minlen, - <bnoa, <lena); - /* - * The right one is clearly better. - */ - if (ltbnoa <= args->agbno - gtdiff) { - xfs_btree_del_cursor( - bno_cur_lt, - XFS_BTREE_NOERROR); - bno_cur_lt = NULL; - break; - } - /* - * If we reach a big enough entry, - * compare the two and pick the best. - */ - if (ltlena >= args->minlen) { - args->len = XFS_EXTLEN_MIN( - ltlena, args->maxlen); - xfs_alloc_fix_len(args); - rlen = args->len; - ltdiff = xfs_alloc_compute_diff( - args->agbno, rlen, - args->alignment, - ltbno, ltlen, <new); - /* - * Left side is better. - */ - if (ltdiff < gtdiff) { - xfs_btree_del_cursor( - bno_cur_gt, - XFS_BTREE_NOERROR); - bno_cur_gt = NULL; - } - /* - * Right side is better. - */ - else { - xfs_btree_del_cursor( - bno_cur_lt, - XFS_BTREE_NOERROR); - bno_cur_lt = NULL; - } - break; - } - /* - * Fell off the left end. - */ - if ((error = xfs_alloc_decrement( - bno_cur_lt, 0, &i))) - goto error0; - if (!i) { - xfs_btree_del_cursor(bno_cur_lt, - XFS_BTREE_NOERROR); - bno_cur_lt = NULL; - break; - } - } - } - /* - * The right side is perfect, trash the left side. - */ - else { - xfs_btree_del_cursor(bno_cur_lt, - XFS_BTREE_NOERROR); - bno_cur_lt = NULL; - } + gtdiff = xfs_alloc_compute_diff(args->agbno, args->len, + args->alignment, args->userdata, gtbnoa, + gtlena, >new); + + error = xfs_alloc_find_best_extent(args, + &bno_cur_gt, &bno_cur_lt, + gtdiff, <bno, <len, + <bnoa, <lena, + 1 /* search left */); } + + if (error) + goto error0; } + /* * If we couldn't get anything, give up. */ if (bno_cur_lt == NULL && bno_cur_gt == NULL) { - TRACE_ALLOC("neither", args); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + + if (!forced++) { + trace_xfs_alloc_near_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + trace_xfs_alloc_size_neither(args); args->agbno = NULLAGBLOCK; return 0; } + /* * At this point we have selected a freespace entry, either to the * left or to the right. If it's on the right, copy all the @@ -1208,35 +1194,41 @@ xfs_alloc_ag_vextent_near( j = 1; } else j = 0; + /* * Fix up the length and compute the useful address. */ - ltend = ltbno + ltlen; args->len = XFS_EXTLEN_MIN(ltlena, args->maxlen); xfs_alloc_fix_len(args); if (!xfs_alloc_fix_minleft(args)) { - TRACE_ALLOC("nominleft", args); + trace_xfs_alloc_near_nominleft(args); xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); return 0; } rlen = args->len; - (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, ltbno, - ltlen, <new); + (void)xfs_alloc_compute_diff(args->agbno, rlen, args->alignment, + args->userdata, ltbnoa, ltlena, <new); ASSERT(ltnew >= ltbno); - ASSERT(ltnew + rlen <= ltend); + ASSERT(ltnew + rlen <= ltbnoa + ltlena); ASSERT(ltnew + rlen <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); args->agbno = ltnew; + if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, ltnew, rlen, XFSA_FIXUP_BNO_OK))) goto error0; - TRACE_ALLOC(j ? "gt" : "lt", args); + + if (j) + trace_xfs_alloc_near_greater(args); + else + trace_xfs_alloc_near_lesser(args); + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); return 0; error0: - TRACE_ALLOC("error", args); + trace_xfs_alloc_near_error(args); if (cnt_cur != NULL) xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); if (bno_cur_lt != NULL) @@ -1264,50 +1256,92 @@ xfs_alloc_ag_vextent_size( int i; /* temp status variable */ xfs_agblock_t rbno; /* returned block number */ xfs_extlen_t rlen; /* length of returned extent */ + int forced = 0; +restart: /* * Allocate and initialize a cursor for the by-size btree. */ - cnt_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_CNT, NULL, 0); + cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_CNT); bno_cur = NULL; + /* * Look for an entry >= maxlen+alignment-1 blocks. */ if ((error = xfs_alloc_lookup_ge(cnt_cur, 0, args->maxlen + args->alignment - 1, &i))) goto error0; + /* - * If none, then pick up the last entry in the tree unless the - * tree is empty. + * If none or we have busy extents that we cannot allocate from, then + * we have to settle for a smaller extent. In the case that there are + * no large extents, this will return the last entry in the tree unless + * the tree is empty. In the case that there are only busy large + * extents, this will return the largest small extent unless there + * are no smaller extents available. */ - if (!i) { - if ((error = xfs_alloc_ag_vextent_small(args, cnt_cur, &fbno, - &flen, &i))) + if (!i || forced > 1) { + error = xfs_alloc_ag_vextent_small(args, cnt_cur, + &fbno, &flen, &i); + if (error) goto error0; if (i == 0 || flen == 0) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("noentry", args); + trace_xfs_alloc_size_noentry(args); return 0; } ASSERT(i == 1); + xfs_alloc_compute_aligned(args, fbno, flen, &rbno, &rlen); + } else { + /* + * Search for a non-busy extent that is large enough. + * If we are at low space, don't check, or if we fall of + * the end of the btree, turn off the busy check and + * restart. + */ + for (;;) { + error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i); + if (error) + goto error0; + XFS_WANT_CORRUPTED_GOTO(i == 1, error0); + + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); + + if (rlen >= args->maxlen) + break; + + error = xfs_btree_increment(cnt_cur, 0, &i); + if (error) + goto error0; + if (i == 0) { + /* + * Our only valid extents must have been busy. + * Make it unbusy by forcing the log out and + * retrying. If we've been here before, forcing + * the log isn't making the extents available, + * which means they have probably been freed in + * this transaction. In that case, we have to + * give up on them and we'll attempt a minlen + * allocation the next time around. + */ + xfs_btree_del_cursor(cnt_cur, + XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + if (!forced++) + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + } } - /* - * There's a freespace as big as maxlen+alignment-1, get it. - */ - else { - if ((error = xfs_alloc_get_rec(cnt_cur, &fbno, &flen, &i))) - goto error0; - XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - } + /* * In the first case above, we got the last entry in the * by-size btree. Now we check to see if the space hits maxlen * once aligned; if not, we search left for something better. * This can't happen in the second case above. */ - xfs_alloc_compute_aligned(fbno, flen, args->alignment, args->minlen, - &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), error0); @@ -1322,7 +1356,7 @@ xfs_alloc_ag_vextent_size( bestflen = flen; bestfbno = fbno; for (;;) { - if ((error = xfs_alloc_decrement(cnt_cur, 0, &i))) + if ((error = xfs_btree_decrement(cnt_cur, 0, &i))) goto error0; if (i == 0) break; @@ -1332,8 +1366,8 @@ xfs_alloc_ag_vextent_size( XFS_WANT_CORRUPTED_GOTO(i == 1, error0); if (flen < bestrlen) break; - xfs_alloc_compute_aligned(fbno, flen, args->alignment, - args->minlen, &rbno, &rlen); + xfs_alloc_compute_aligned(args, fbno, flen, + &rbno, &rlen); rlen = XFS_EXTLEN_MIN(args->maxlen, rlen); XFS_WANT_CORRUPTED_GOTO(rlen == 0 || (rlen <= flen && rbno + rlen <= fbno + flen), @@ -1361,20 +1395,26 @@ xfs_alloc_ag_vextent_size( * Fix up the length. */ args->len = rlen; - xfs_alloc_fix_len(args); - if (rlen < args->minlen || !xfs_alloc_fix_minleft(args)) { - xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); - TRACE_ALLOC("nominleft", args); - args->agbno = NULLAGBLOCK; - return 0; + if (rlen < args->minlen) { + if (!forced++) { + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_busy(args); + xfs_log_force(args->mp, XFS_LOG_SYNC); + goto restart; + } + goto out_nominleft; } + xfs_alloc_fix_len(args); + + if (!xfs_alloc_fix_minleft(args)) + goto out_nominleft; rlen = args->len; XFS_WANT_CORRUPTED_GOTO(rlen <= flen, error0); /* * Allocate and initialize a cursor for the by-block tree. */ - bno_cur = xfs_btree_init_cursor(args->mp, args->tp, args->agbp, - args->agno, XFS_BTNUM_BNO, NULL, 0); + bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, + args->agno, XFS_BTNUM_BNO); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, rbno, rlen, XFSA_FIXUP_CNT_OK))) goto error0; @@ -1387,16 +1427,22 @@ xfs_alloc_ag_vextent_size( args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), error0); - TRACE_ALLOC("normal", args); + trace_xfs_alloc_size_done(args); return 0; error0: - TRACE_ALLOC("error", args); + trace_xfs_alloc_size_error(args); if (cnt_cur) xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); return error; + +out_nominleft: + xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); + trace_xfs_alloc_size_nominleft(args); + args->agbno = NULLAGBLOCK; + return 0; } /* @@ -1417,7 +1463,7 @@ xfs_alloc_ag_vextent_small( xfs_extlen_t flen; int i; - if ((error = xfs_alloc_decrement(ccur, 0, &i))) + if ((error = xfs_btree_decrement(ccur, 0, &i))) goto error0; if (i) { if ((error = xfs_alloc_get_rec(ccur, &fbno, &flen, &i))) @@ -1436,6 +1482,9 @@ xfs_alloc_ag_vextent_small( if (error) goto error0; if (fbno != NULLAGBLOCK) { + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, + args->userdata); + if (args->userdata) { xfs_buf_t *bp; @@ -1450,7 +1499,7 @@ xfs_alloc_ag_vextent_small( be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length), error0); args->wasfromfl = 1; - TRACE_ALLOC("freelist", args); + trace_xfs_alloc_small_freelist(args); *stat = 0; return 0; } @@ -1472,17 +1521,17 @@ xfs_alloc_ag_vextent_small( */ if (flen < args->minlen) { args->agbno = NULLAGBLOCK; - TRACE_ALLOC("notenough", args); + trace_xfs_alloc_small_notenough(args); flen = 0; } *fbnop = fbno; *flenp = flen; *stat = 1; - TRACE_ALLOC("normal", args); + trace_xfs_alloc_small_done(args); return 0; error0: - TRACE_ALLOC("error", args); + trace_xfs_alloc_small_error(args); return error; } @@ -1511,13 +1560,13 @@ xfs_free_ag_extent( xfs_mount_t *mp; /* mount point struct for filesystem */ xfs_agblock_t nbno; /* new starting block of freespace */ xfs_extlen_t nlen; /* new length of freespace */ + xfs_perag_t *pag; /* per allocation group data */ mp = tp->t_mountp; /* * Allocate and initialize a cursor for the by-block btree. */ - bno_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO, NULL, - 0); + bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); cnt_cur = NULL; /* * Look for a neighboring block on the left (lower block numbers) @@ -1550,7 +1599,7 @@ xfs_free_ag_extent( * Look for a neighboring block on the right (higher block numbers) * that is contiguous with this space. */ - if ((error = xfs_alloc_increment(bno_cur, 0, &haveright))) + if ((error = xfs_btree_increment(bno_cur, 0, &haveright))) goto error0; if (haveright) { /* @@ -1576,8 +1625,7 @@ xfs_free_ag_extent( /* * Now allocate and initialize a cursor for the by-size tree. */ - cnt_cur = xfs_btree_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT, NULL, - 0); + cnt_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_CNT); /* * Have both left and right contiguous neighbors. * Merge all three into a single free block. @@ -1589,7 +1637,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1598,19 +1646,19 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* * Delete the old by-block entry for the right block. */ - if ((error = xfs_alloc_delete(bno_cur, &i))) + if ((error = xfs_btree_delete(bno_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* * Move the by-block cursor back to the left neighbor. */ - if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); #ifdef DEBUG @@ -1649,14 +1697,14 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, ltbno, ltlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* * Back up the by-block cursor to the left neighbor, and * update its length. */ - if ((error = xfs_alloc_decrement(bno_cur, 0, &i))) + if ((error = xfs_btree_decrement(bno_cur, 0, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); nbno = ltbno; @@ -1675,7 +1723,7 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, gtbno, gtlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); - if ((error = xfs_alloc_delete(cnt_cur, &i))) + if ((error = xfs_btree_delete(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); /* @@ -1694,7 +1742,7 @@ xfs_free_ag_extent( else { nbno = bno; nlen = len; - if ((error = xfs_alloc_insert(bno_cur, &i))) + if ((error = xfs_btree_insert(bno_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); } @@ -1706,55 +1754,32 @@ xfs_free_ag_extent( if ((error = xfs_alloc_lookup_eq(cnt_cur, nbno, nlen, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 0, error0); - if ((error = xfs_alloc_insert(cnt_cur, &i))) + if ((error = xfs_btree_insert(cnt_cur, &i))) goto error0; XFS_WANT_CORRUPTED_GOTO(i == 1, error0); xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); cnt_cur = NULL; + /* * Update the freespace totals in the ag and superblock. */ - { - xfs_agf_t *agf; - xfs_perag_t *pag; /* per allocation group data */ - - agf = XFS_BUF_TO_AGF(agbp); - pag = &mp->m_perag[agno]; - be32_add_cpu(&agf->agf_freeblks, len); - xfs_trans_agblocks_delta(tp, len); - pag->pagf_freeblks += len; - XFS_WANT_CORRUPTED_GOTO( - be32_to_cpu(agf->agf_freeblks) <= - be32_to_cpu(agf->agf_length), - error0); - TRACE_MODAGF(NULL, agf, XFS_AGF_FREEBLKS); - xfs_alloc_log_agf(tp, agbp, XFS_AGF_FREEBLKS); - if (!isfl) - xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); - XFS_STATS_INC(xs_freex); - XFS_STATS_ADD(xs_freeb, len); - } - TRACE_FREE(haveleft ? - (haveright ? "both" : "left") : - (haveright ? "right" : "none"), - agno, bno, len, isfl); - - /* - * Since blocks move to the free list without the coordination - * used in xfs_bmap_finish, we can't allow block to be available - * for reallocation and non-transaction writing (user data) - * until we know that the transaction that moved it to the free - * list is permanently on disk. We track the blocks by declaring - * these blocks as "busy"; the busy list is maintained on a per-ag - * basis and each transaction records which entries should be removed - * when the iclog commits to disk. If a busy block is allocated, - * the iclog is pushed up to the LSN that freed the block. - */ - xfs_alloc_mark_busy(tp, agno, bno, len); + pag = xfs_perag_get(mp, agno); + error = xfs_alloc_update_counters(tp, pag, agbp, len); + xfs_perag_put(pag); + if (error) + goto error0; + + if (!isfl) + xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, (long)len); + XFS_STATS_INC(xs_freex); + XFS_STATS_ADD(xs_freeb, len); + + trace_xfs_free_extent(mp, agno, bno, len, isfl, haveleft, haveright); + return 0; error0: - TRACE_FREE("error", agno, bno, len, isfl); + trace_xfs_free_extent(mp, agno, bno, len, isfl, -1, -1); if (bno_cur) xfs_btree_del_cursor(bno_cur, XFS_BTREE_ERROR); if (cnt_cur) @@ -1790,6 +1815,25 @@ xfs_alloc_compute_maxlevels( } /* + * Find the length of the longest extent in an AG. + */ +xfs_extlen_t +xfs_alloc_longest_free_extent( + struct xfs_mount *mp, + struct xfs_perag *pag) +{ + xfs_extlen_t need, delta = 0; + + need = XFS_MIN_FREELIST_PAG(pag, mp); + if (need > pag->pagf_flcount) + delta = need - pag->pagf_flcount; + + if (pag->pagf_longest > delta) + return pag->pagf_longest - delta; + return pag->pagf_flcount > 0 || pag->pagf_longest > 0; +} + +/* * Decide whether to use this allocation group for this allocation. * If so, fix up the btree freelist's size. */ @@ -1841,15 +1885,12 @@ xfs_alloc_fix_freelist( } if (!(flags & XFS_ALLOC_FLAG_FREEING)) { - need = XFS_MIN_FREELIST_PAG(pag, mp); - delta = need > pag->pagf_flcount ? need - pag->pagf_flcount : 0; /* * If it looks like there isn't a long enough extent, or enough * total blocks, reject it. */ - longest = (pag->pagf_longest > delta) ? - (pag->pagf_longest - delta) : - (pag->pagf_flcount > 0 || pag->pagf_longest > 0); + need = XFS_MIN_FREELIST_PAG(pag, mp); + longest = xfs_alloc_longest_free_extent(mp, pag); if ((args->minlen + args->alignment + args->minalignslop - 1) > longest || ((int)(pag->pagf_freeblks + pag->pagf_flcount - @@ -1917,12 +1958,11 @@ xfs_alloc_fix_freelist( /* * Initialize the args structure. */ + memset(&targs, 0, sizeof(targs)); targs.tp = tp; targs.mp = mp; targs.agbp = agbp; targs.agno = args->agno; - targs.mod = targs.minleft = targs.wasdel = targs.userdata = - targs.minalignslop = 0; targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; targs.type = XFS_ALLOCTYPE_THIS_AG; targs.pag = pag; @@ -1980,18 +2020,18 @@ xfs_alloc_get_freelist( int btreeblk) /* destination is a AGF btree */ { xfs_agf_t *agf; /* a.g. freespace structure */ - xfs_agfl_t *agfl; /* a.g. freelist structure */ xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ + __be32 *agfl_bno; int error; int logflags; - xfs_mount_t *mp; /* mount structure */ + xfs_mount_t *mp = tp->t_mountp; xfs_perag_t *pag; /* per allocation group data */ - agf = XFS_BUF_TO_AGF(agbp); /* * Freelist is empty, give up. */ + agf = XFS_BUF_TO_AGF(agbp); if (!agf->agf_flcount) { *bnop = NULLAGBLOCK; return 0; @@ -1999,23 +2039,27 @@ xfs_alloc_get_freelist( /* * Read the array of free blocks. */ - mp = tp->t_mountp; - if ((error = xfs_alloc_read_agfl(mp, tp, - be32_to_cpu(agf->agf_seqno), &agflbp))) + error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), + &agflbp); + if (error) return error; - agfl = XFS_BUF_TO_AGFL(agflbp); + + /* * Get the block number and update the data structures. */ - bno = be32_to_cpu(agfl->agfl_bno[be32_to_cpu(agf->agf_flfirst)]); + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + bno = be32_to_cpu(agfl_bno[be32_to_cpu(agf->agf_flfirst)]); be32_add_cpu(&agf->agf_flfirst, 1); xfs_trans_brelse(tp, agflbp); if (be32_to_cpu(agf->agf_flfirst) == XFS_AGFL_SIZE(mp)) agf->agf_flfirst = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, -1); xfs_trans_agflist_delta(tp, -1); pag->pagf_flcount--; + xfs_perag_put(pag); logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT; if (btreeblk) { @@ -2024,19 +2068,9 @@ xfs_alloc_get_freelist( logflags |= XFS_AGF_BTREEBLKS; } - TRACE_MODAGF(NULL, agf, logflags); xfs_alloc_log_agf(tp, agbp, logflags); *bnop = bno; - /* - * As blocks are freed, they are added to the per-ag busy list - * and remain there until the freeing transaction is committed to - * disk. Now that we have allocated blocks, this list must be - * searched to see if a block is being reused. If one is, then - * the freeing transaction must be pushed to disk NOW by forcing - * to disk all iclogs up that transaction's LSN. - */ - xfs_alloc_search_busy(tp, be32_to_cpu(agf->agf_seqno), bno, 1); return 0; } @@ -2064,9 +2098,14 @@ xfs_alloc_log_agf( offsetof(xfs_agf_t, agf_freeblks), offsetof(xfs_agf_t, agf_longest), offsetof(xfs_agf_t, agf_btreeblks), + offsetof(xfs_agf_t, agf_uuid), sizeof(xfs_agf_t) }; + trace_xfs_agf(tp->t_mountp, XFS_BUF_TO_AGF(bp), fields, _RET_IP_); + + xfs_trans_buf_set_type(tp, bp, XFS_BLFT_AGF_BUF); + xfs_btree_offsets(fields, offsets, XFS_AGF_NUM_BITS, &first, &last); xfs_trans_log_buf(tp, bp, (uint)first, (uint)last); } @@ -2103,12 +2142,13 @@ xfs_alloc_put_freelist( int btreeblk) /* block came from a AGF btree */ { xfs_agf_t *agf; /* a.g. freespace structure */ - xfs_agfl_t *agfl; /* a.g. free block array */ __be32 *blockp;/* pointer to array entry */ int error; int logflags; xfs_mount_t *mp; /* mount structure */ xfs_perag_t *pag; /* per allocation group data */ + __be32 *agfl_bno; + int startoff; agf = XFS_BUF_TO_AGF(agbp); mp = tp->t_mountp; @@ -2116,11 +2156,11 @@ xfs_alloc_put_freelist( if (!agflbp && (error = xfs_alloc_read_agfl(mp, tp, be32_to_cpu(agf->agf_seqno), &agflbp))) return error; - agfl = XFS_BUF_TO_AGFL(agflbp); be32_add_cpu(&agf->agf_fllast, 1); if (be32_to_cpu(agf->agf_fllast) == XFS_AGFL_SIZE(mp)) agf->agf_fllast = 0; - pag = &mp->m_perag[be32_to_cpu(agf->agf_seqno)]; + + pag = xfs_perag_get(mp, be32_to_cpu(agf->agf_seqno)); be32_add_cpu(&agf->agf_flcount, 1); xfs_trans_agflist_delta(tp, 1); pag->pagf_flcount++; @@ -2131,72 +2171,165 @@ xfs_alloc_put_freelist( pag->pagf_btreeblks--; logflags |= XFS_AGF_BTREEBLKS; } + xfs_perag_put(pag); - TRACE_MODAGF(NULL, agf, logflags); xfs_alloc_log_agf(tp, agbp, logflags); ASSERT(be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp)); - blockp = &agfl->agfl_bno[be32_to_cpu(agf->agf_fllast)]; + + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + blockp = &agfl_bno[be32_to_cpu(agf->agf_fllast)]; *blockp = cpu_to_be32(bno); - TRACE_MODAGF(NULL, agf, logflags); + startoff = (char *)blockp - (char *)agflbp->b_addr; + xfs_alloc_log_agf(tp, agbp, logflags); - xfs_trans_log_buf(tp, agflbp, - (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl), - (int)((xfs_caddr_t)blockp - (xfs_caddr_t)agfl + - sizeof(xfs_agblock_t) - 1)); + + xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); + xfs_trans_log_buf(tp, agflbp, startoff, + startoff + sizeof(xfs_agblock_t) - 1); return 0; } +static bool +xfs_agf_verify( + struct xfs_mount *mp, + struct xfs_buf *bp) + { + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !uuid_equal(&agf->agf_uuid, &mp->m_sb.sb_uuid)) + return false; + + if (!(agf->agf_magicnum == cpu_to_be32(XFS_AGF_MAGIC) && + XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && + be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && + be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && + be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp))) + return false; + + /* + * during growfs operations, the perag is not fully initialised, + * so we can't use it for any useful checking. growfs ensures we can't + * use it by using uncached buffers that don't have the perag attached + * so we can detect and avoid this problem. + */ + if (bp->b_pag && be32_to_cpu(agf->agf_seqno) != bp->b_pag->pag_agno) + return false; + + if (xfs_sb_version_haslazysbcount(&mp->m_sb) && + be32_to_cpu(agf->agf_btreeblks) > be32_to_cpu(agf->agf_length)) + return false; + + return true;; + +} + +static void +xfs_agf_read_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + + if (xfs_sb_version_hascrc(&mp->m_sb) && + !xfs_buf_verify_cksum(bp, XFS_AGF_CRC_OFF)) + xfs_buf_ioerror(bp, EFSBADCRC); + else if (XFS_TEST_ERROR(!xfs_agf_verify(mp, bp), mp, + XFS_ERRTAG_ALLOC_READ_AGF, + XFS_RANDOM_ALLOC_READ_AGF)) + xfs_buf_ioerror(bp, EFSCORRUPTED); + + if (bp->b_error) + xfs_verifier_error(bp); +} + +static void +xfs_agf_write_verify( + struct xfs_buf *bp) +{ + struct xfs_mount *mp = bp->b_target->bt_mount; + struct xfs_buf_log_item *bip = bp->b_fspriv; + + if (!xfs_agf_verify(mp, bp)) { + xfs_buf_ioerror(bp, EFSCORRUPTED); + xfs_verifier_error(bp); + return; + } + + if (!xfs_sb_version_hascrc(&mp->m_sb)) + return; + + if (bip) + XFS_BUF_TO_AGF(bp)->agf_lsn = cpu_to_be64(bip->bli_item.li_lsn); + + xfs_buf_update_cksum(bp, XFS_AGF_CRC_OFF); +} + +const struct xfs_buf_ops xfs_agf_buf_ops = { + .verify_read = xfs_agf_read_verify, + .verify_write = xfs_agf_write_verify, +}; + /* * Read in the allocation group header (free/alloc section). */ int /* error */ -xfs_alloc_read_agf( - xfs_mount_t *mp, /* mount point structure */ - xfs_trans_t *tp, /* transaction pointer */ - xfs_agnumber_t agno, /* allocation group number */ - int flags, /* XFS_ALLOC_FLAG_... */ - xfs_buf_t **bpp) /* buffer for the ag freelist header */ +xfs_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_BUF_ */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ { - xfs_agf_t *agf; /* ag freelist header */ - int agf_ok; /* set if agf is consistent */ - xfs_buf_t *bp; /* return value */ - xfs_perag_t *pag; /* per allocation group data */ int error; + trace_xfs_read_agf(mp, agno); + ASSERT(agno != NULLAGNUMBER); error = xfs_trans_read_buf( mp, tp, mp->m_ddev_targp, XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)), - XFS_FSS_TO_BB(mp, 1), - (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XFS_BUF_TRYLOCK : 0U, - &bp); + XFS_FSS_TO_BB(mp, 1), flags, bpp, &xfs_agf_buf_ops); if (error) return error; - ASSERT(!bp || !XFS_BUF_GETERROR(bp)); - if (!bp) { - *bpp = NULL; + if (!*bpp) return 0; - } - /* - * Validate the magic number of the agf block. - */ - agf = XFS_BUF_TO_AGF(bp); - agf_ok = - be32_to_cpu(agf->agf_magicnum) == XFS_AGF_MAGIC && - XFS_AGF_GOOD_VERSION(be32_to_cpu(agf->agf_versionnum)) && - be32_to_cpu(agf->agf_freeblks) <= be32_to_cpu(agf->agf_length) && - be32_to_cpu(agf->agf_flfirst) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_fllast) < XFS_AGFL_SIZE(mp) && - be32_to_cpu(agf->agf_flcount) <= XFS_AGFL_SIZE(mp); - if (unlikely(XFS_TEST_ERROR(!agf_ok, mp, XFS_ERRTAG_ALLOC_READ_AGF, - XFS_RANDOM_ALLOC_READ_AGF))) { - XFS_CORRUPTION_ERROR("xfs_alloc_read_agf", - XFS_ERRLEVEL_LOW, mp, agf); - xfs_trans_brelse(tp, bp); - return XFS_ERROR(EFSCORRUPTED); - } - pag = &mp->m_perag[agno]; + + ASSERT(!(*bpp)->b_error); + xfs_buf_set_ref(*bpp, XFS_AGF_REF); + return 0; +} + +/* + * Read in the allocation group header (free/alloc section). + */ +int /* error */ +xfs_alloc_read_agf( + struct xfs_mount *mp, /* mount point structure */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_agnumber_t agno, /* allocation group number */ + int flags, /* XFS_ALLOC_FLAG_... */ + struct xfs_buf **bpp) /* buffer for the ag freelist header */ +{ + struct xfs_agf *agf; /* ag freelist header */ + struct xfs_perag *pag; /* per allocation group data */ + int error; + + trace_xfs_alloc_read_agf(mp, agno); + + ASSERT(agno != NULLAGNUMBER); + error = xfs_read_agf(mp, tp, agno, + (flags & XFS_ALLOC_FLAG_TRYLOCK) ? XBF_TRYLOCK : 0, + bpp); + if (error) + return error; + if (!*bpp) + return 0; + ASSERT(!(*bpp)->b_error); + + agf = XFS_BUF_TO_AGF(*bpp); + pag = xfs_perag_get(mp, agno); if (!pag->pagf_init) { pag->pagf_freeblks = be32_to_cpu(agf->agf_freeblks); pag->pagf_btreeblks = be32_to_cpu(agf->agf_btreeblks); @@ -2207,13 +2340,14 @@ xfs_alloc_read_agf( pag->pagf_levels[XFS_BTNUM_CNTi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); spin_lock_init(&pag->pagb_lock); - pag->pagb_list = kmem_zalloc(XFS_PAGB_NUM_SLOTS * - sizeof(xfs_perag_busy_t), KM_SLEEP); + pag->pagb_count = 0; + pag->pagb_tree = RB_ROOT; pag->pagf_init = 1; } #ifdef DEBUG else if (!XFS_FORCED_SHUTDOWN(mp)) { ASSERT(pag->pagf_freeblks == be32_to_cpu(agf->agf_freeblks)); + ASSERT(pag->pagf_btreeblks == be32_to_cpu(agf->agf_btreeblks)); ASSERT(pag->pagf_flcount == be32_to_cpu(agf->agf_flcount)); ASSERT(pag->pagf_longest == be32_to_cpu(agf->agf_longest)); ASSERT(pag->pagf_levels[XFS_BTNUM_BNOi] == @@ -2222,8 +2356,7 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi])); } #endif - XFS_BUF_SET_VTYPE_REF(bp, B_FS_AGF, XFS_AGF_REF); - *bpp = bp; + xfs_perag_put(pag); return 0; } @@ -2270,7 +2403,7 @@ xfs_alloc_vextent( args->minlen > args->maxlen || args->minlen > agsize || args->mod >= args->prod) { args->fsbno = NULLFSBLOCK; - TRACE_ALLOC("badargs", args); + trace_xfs_alloc_vextent_badargs(args); return 0; } minleft = args->minleft; @@ -2283,24 +2416,21 @@ xfs_alloc_vextent( * These three force us into a single a.g. */ args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); - down_read(&mp->m_peraglock); - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); args->minleft = 0; error = xfs_alloc_fix_freelist(args, 0); args->minleft = minleft; if (error) { - TRACE_ALLOC("nofix", args); + trace_xfs_alloc_vextent_nofix(args); goto error0; } if (!args->agbp) { - up_read(&mp->m_peraglock); - TRACE_ALLOC("noagbp", args); + trace_xfs_alloc_vextent_noagbp(args); break; } args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); if ((error = xfs_alloc_ag_vextent(args))) goto error0; - up_read(&mp->m_peraglock); break; case XFS_ALLOCTYPE_START_BNO: /* @@ -2352,14 +2482,13 @@ xfs_alloc_vextent( * Loop over allocation groups twice; first time with * trylock set, second time without. */ - down_read(&mp->m_peraglock); for (;;) { - args->pag = &mp->m_perag[args->agno]; + args->pag = xfs_perag_get(mp, args->agno); if (no_min) args->minleft = 0; error = xfs_alloc_fix_freelist(args, flags); args->minleft = minleft; if (error) { - TRACE_ALLOC("nofix", args); + trace_xfs_alloc_vextent_nofix(args); goto error0; } /* @@ -2370,7 +2499,9 @@ xfs_alloc_vextent( goto error0; break; } - TRACE_ALLOC("loopfailed", args); + + trace_xfs_alloc_vextent_loopfailed(args); + /* * Didn't work, figure out the next iteration. */ @@ -2397,7 +2528,7 @@ xfs_alloc_vextent( if (args->agno == sagno) { if (no_min == 1) { args->agbno = NULLAGBLOCK; - TRACE_ALLOC("allfailed", args); + trace_xfs_alloc_vextent_allfailed(args); break; } if (flags == 0) { @@ -2411,8 +2542,8 @@ xfs_alloc_vextent( } } } + xfs_perag_put(args->pag); } - up_read(&mp->m_peraglock); if (bump_rotor || (type == XFS_ALLOCTYPE_ANY_AG)) { if (args->agno == sagno) mp->m_agfrotor = (mp->m_agfrotor + 1) % @@ -2438,9 +2569,10 @@ xfs_alloc_vextent( args->len); #endif } + xfs_perag_put(args->pag); return 0; error0: - up_read(&mp->m_peraglock); + xfs_perag_put(args->pag); return error; } @@ -2462,163 +2594,37 @@ xfs_free_extent( memset(&args, 0, sizeof(xfs_alloc_arg_t)); args.tp = tp; args.mp = tp->t_mountp; - args.agno = XFS_FSB_TO_AGNO(args.mp, bno); - ASSERT(args.agno < args.mp->m_sb.sb_agcount); - args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); - down_read(&args.mp->m_peraglock); - args.pag = &args.mp->m_perag[args.agno]; - if ((error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING))) - goto error0; -#ifdef DEBUG - ASSERT(args.agbp != NULL); - ASSERT((args.agbno + len) <= - be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)); -#endif - error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); -error0: - up_read(&args.mp->m_peraglock); - return error; -} - -/* - * AG Busy list management - * The busy list contains block ranges that have been freed but whose - * transactions have not yet hit disk. If any block listed in a busy - * list is reused, the transaction that freed it must be forced to disk - * before continuing to use the block. - * - * xfs_alloc_mark_busy - add to the per-ag busy list - * xfs_alloc_clear_busy - remove an item from the per-ag busy list - */ -void -xfs_alloc_mark_busy(xfs_trans_t *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len) -{ - xfs_mount_t *mp; - xfs_perag_busy_t *bsy; - int n; - - mp = tp->t_mountp; - spin_lock(&mp->m_perag[agno].pagb_lock); - - /* search pagb_list for an open slot */ - for (bsy = mp->m_perag[agno].pagb_list, n = 0; - n < XFS_PAGB_NUM_SLOTS; - bsy++, n++) { - if (bsy->busy_tp == NULL) { - break; - } - } - - if (n < XFS_PAGB_NUM_SLOTS) { - bsy = &mp->m_perag[agno].pagb_list[n]; - mp->m_perag[agno].pagb_count++; - TRACE_BUSY("xfs_alloc_mark_busy", "got", agno, bno, len, n, tp); - bsy->busy_start = bno; - bsy->busy_length = len; - bsy->busy_tp = tp; - xfs_trans_add_busy(tp, agno, n); - } else { - TRACE_BUSY("xfs_alloc_mark_busy", "FULL", agno, bno, len, -1, tp); - /* - * The busy list is full! Since it is now not possible to - * track the free block, make this a synchronous transaction - * to insure that the block is not reused before this - * transaction commits. - */ - xfs_trans_set_sync(tp); - } - - spin_unlock(&mp->m_perag[agno].pagb_lock); -} - -void -xfs_alloc_clear_busy(xfs_trans_t *tp, - xfs_agnumber_t agno, - int idx) -{ - xfs_mount_t *mp; - xfs_perag_busy_t *list; - - mp = tp->t_mountp; - - spin_lock(&mp->m_perag[agno].pagb_lock); - list = mp->m_perag[agno].pagb_list; - - ASSERT(idx < XFS_PAGB_NUM_SLOTS); - if (list[idx].busy_tp == tp) { - TRACE_UNBUSY("xfs_alloc_clear_busy", "found", agno, idx, tp); - list[idx].busy_tp = NULL; - mp->m_perag[agno].pagb_count--; - } else { - TRACE_UNBUSY("xfs_alloc_clear_busy", "missing", agno, idx, tp); - } - - spin_unlock(&mp->m_perag[agno].pagb_lock); -} - - -/* - * returns non-zero if any of (agno,bno):len is in a busy list - */ -STATIC int -xfs_alloc_search_busy(xfs_trans_t *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len) -{ - xfs_mount_t *mp; - xfs_perag_busy_t *bsy; - int n; - xfs_agblock_t uend, bend; - xfs_lsn_t lsn; - int cnt; - - mp = tp->t_mountp; - - spin_lock(&mp->m_perag[agno].pagb_lock); - cnt = mp->m_perag[agno].pagb_count; + /* + * validate that the block number is legal - the enables us to detect + * and handle a silent filesystem corruption rather than crashing. + */ + args.agno = XFS_FSB_TO_AGNO(args.mp, bno); + if (args.agno >= args.mp->m_sb.sb_agcount) + return EFSCORRUPTED; - uend = bno + len - 1; + args.agbno = XFS_FSB_TO_AGBNO(args.mp, bno); + if (args.agbno >= args.mp->m_sb.sb_agblocks) + return EFSCORRUPTED; - /* search pagb_list for this slot, skipping open slots */ - for (bsy = mp->m_perag[agno].pagb_list, n = 0; - cnt; bsy++, n++) { + args.pag = xfs_perag_get(args.mp, args.agno); + ASSERT(args.pag); - /* - * (start1,length1) within (start2, length2) - */ - if (bsy->busy_tp != NULL) { - bend = bsy->busy_start + bsy->busy_length - 1; - if ((bno > bend) || - (uend < bsy->busy_start)) { - cnt--; - } else { - TRACE_BUSYSEARCH("xfs_alloc_search_busy", - "found1", agno, bno, len, n, - tp); - break; - } - } - } + error = xfs_alloc_fix_freelist(&args, XFS_ALLOC_FLAG_FREEING); + if (error) + goto error0; - /* - * If a block was found, force the log through the LSN of the - * transaction that freed the block - */ - if (cnt) { - TRACE_BUSYSEARCH("xfs_alloc_search_busy", "found", agno, bno, len, n, tp); - lsn = bsy->busy_tp->t_commit_lsn; - spin_unlock(&mp->m_perag[agno].pagb_lock); - xfs_log_force(mp, lsn, XFS_LOG_FORCE|XFS_LOG_SYNC); - } else { - TRACE_BUSYSEARCH("xfs_alloc_search_busy", "not-found", agno, bno, len, n, tp); - n = -1; - spin_unlock(&mp->m_perag[agno].pagb_lock); + /* validate the extent size is legal now we have the agf locked */ + if (args.agbno + len > + be32_to_cpu(XFS_BUF_TO_AGF(args.agbp)->agf_length)) { + error = EFSCORRUPTED; + goto error0; } - return n; + error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); + if (!error) + xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); +error0: + xfs_perag_put(args.pag); + return error; } |
