diff options
80 files changed, 2459 insertions, 2852 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index eaff0392eb3..150a29f3cd3 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7623,7 +7623,7 @@ XFS FILESYSTEM P: Silicon Graphics Inc M: Ben Myers <bpm@sgi.com> M: Alex Elder <elder@kernel.org> -M: xfs-masters@oss.sgi.com +M: xfs@oss.sgi.com L: xfs@oss.sgi.com W: http://oss.sgi.com/projects/xfs T: git git://oss.sgi.com/xfs/xfs.git diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 0a9977983f9..d2bf974b1a2 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -33,6 +33,7 @@ xfs-y += xfs_aops.o \ xfs_discard.o \ xfs_error.o \ xfs_export.o \ + xfs_extent_busy.o \ xfs_file.o \ xfs_filestream.o \ xfs_fsops.o \ @@ -49,7 +50,6 @@ xfs-y += xfs_aops.o \ xfs_sync.o \ xfs_xattr.o \ xfs_rename.o \ - xfs_rw.o \ xfs_utils.o \ xfs_vnodeops.o \ kmem.o \ diff --git a/fs/xfs/xfs_ag.h b/fs/xfs/xfs_ag.h index 4805f009f92..44d65c1533c 100644 --- a/fs/xfs/xfs_ag.h +++ b/fs/xfs/xfs_ag.h @@ -175,24 +175,6 @@ typedef struct xfs_agfl { } xfs_agfl_t; /* - * Busy block/extent entry. Indexed by a rbtree in perag to mark blocks that - * have been freed but whose transactions aren't committed to disk yet. - * - * Note that we use the transaction ID to record the transaction, not the - * transaction structure itself. See xfs_alloc_busy_insert() for details. - */ -struct xfs_busy_extent { - struct rb_node rb_node; /* ag by-bno indexed search tree */ - struct list_head list; /* transaction busy extent list */ - xfs_agnumber_t agno; - xfs_agblock_t bno; - xfs_extlen_t length; - unsigned int flags; -#define XFS_ALLOC_BUSY_DISCARDED 0x01 /* undergoing a discard op. */ -#define XFS_ALLOC_BUSY_SKIP_DISCARD 0x02 /* do not discard */ -}; - -/* * Per-ag incore structure, copies of information in agf and agi, * to improve the performance of allocation group selection. */ diff --git a/fs/xfs/xfs_alloc.c b/fs/xfs/xfs_alloc.c index 0f0df2759b0..229641fb8e6 100644 --- a/fs/xfs/xfs_alloc.c +++ b/fs/xfs/xfs_alloc.c @@ -20,7 +20,6 @@ #include "xfs_types.h" #include "xfs_bit.h" #include "xfs_log.h" -#include "xfs_inum.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_ag.h" @@ -32,6 +31,7 @@ #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_alloc.h" +#include "xfs_extent_busy.h" #include "xfs_error.h" #include "xfs_trace.h" @@ -47,8 +47,6 @@ STATIC int xfs_alloc_ag_vextent_near(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); -STATIC void xfs_alloc_busy_trim(struct xfs_alloc_arg *, - xfs_agblock_t, xfs_extlen_t, xfs_agblock_t *, xfs_extlen_t *); /* * Lookup the record equal to [bno, len] in the btree given by cur. @@ -152,7 +150,7 @@ xfs_alloc_compute_aligned( xfs_extlen_t len; /* Trim busy sections out of found extent */ - xfs_alloc_busy_trim(args, foundbno, foundlen, &bno, &len); + xfs_extent_busy_trim(args, foundbno, foundlen, &bno, &len); if (args->alignment > 1 && len >= args->minlen) { xfs_agblock_t aligned_bno = roundup(bno, args->alignment); @@ -536,7 +534,7 @@ xfs_alloc_ag_vextent( if (error) return error; - ASSERT(!xfs_alloc_busy_search(args->mp, args->agno, + ASSERT(!xfs_extent_busy_search(args->mp, args->agno, args->agbno, args->len)); } @@ -603,7 +601,7 @@ xfs_alloc_ag_vextent_exact( /* * Check for overlapping busy extents. */ - xfs_alloc_busy_trim(args, fbno, flen, &tbno, &tlen); + xfs_extent_busy_trim(args, fbno, flen, &tbno, &tlen); /* * Give up if the start of the extent is busy, or the freespace isn't @@ -1391,7 +1389,7 @@ xfs_alloc_ag_vextent_small( if (error) goto error0; if (fbno != NULLAGBLOCK) { - xfs_alloc_busy_reuse(args->mp, args->agno, fbno, 1, + xfs_extent_busy_reuse(args->mp, args->agno, fbno, 1, args->userdata); if (args->userdata) { @@ -2496,579 +2494,8 @@ xfs_free_extent( error = xfs_free_ag_extent(tp, args.agbp, args.agno, args.agbno, len, 0); if (!error) - xfs_alloc_busy_insert(tp, args.agno, args.agbno, len, 0); + xfs_extent_busy_insert(tp, args.agno, args.agbno, len, 0); error0: xfs_perag_put(args.pag); return error; } - -void -xfs_alloc_busy_insert( - struct xfs_trans *tp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len, - unsigned int flags) -{ - struct xfs_busy_extent *new; - struct xfs_busy_extent *busyp; - struct xfs_perag *pag; - struct rb_node **rbp; - struct rb_node *parent = NULL; - - new = kmem_zalloc(sizeof(struct xfs_busy_extent), KM_MAYFAIL); - if (!new) { - /* - * No Memory! Since it is now not possible to track the free - * block, make this a synchronous transaction to insure that - * the block is not reused before this transaction commits. - */ - trace_xfs_alloc_busy_enomem(tp->t_mountp, agno, bno, len); - xfs_trans_set_sync(tp); - return; - } - - new->agno = agno; - new->bno = bno; - new->length = len; - INIT_LIST_HEAD(&new->list); - new->flags = flags; - - /* trace before insert to be able to see failed inserts */ - trace_xfs_alloc_busy(tp->t_mountp, agno, bno, len); - - pag = xfs_perag_get(tp->t_mountp, new->agno); - spin_lock(&pag->pagb_lock); - rbp = &pag->pagb_tree.rb_node; - while (*rbp) { - parent = *rbp; - busyp = rb_entry(parent, struct xfs_busy_extent, rb_node); - - if (new->bno < busyp->bno) { - rbp = &(*rbp)->rb_left; - ASSERT(new->bno + new->length <= busyp->bno); - } else if (new->bno > busyp->bno) { - rbp = &(*rbp)->rb_right; - ASSERT(bno >= busyp->bno + busyp->length); - } else { - ASSERT(0); - } - } - - rb_link_node(&new->rb_node, parent, rbp); - rb_insert_color(&new->rb_node, &pag->pagb_tree); - - list_add(&new->list, &tp->t_busy); - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); -} - -/* - * Search for a busy extent within the range of the extent we are about to - * allocate. You need to be holding the busy extent tree lock when calling - * xfs_alloc_busy_search(). This function returns 0 for no overlapping busy - * extent, -1 for an overlapping but not exact busy extent, and 1 for an exact - * match. This is done so that a non-zero return indicates an overlap that - * will require a synchronous transaction, but it can still be - * used to distinguish between a partial or exact match. - */ -int -xfs_alloc_busy_search( - struct xfs_mount *mp, - xfs_agnumber_t agno, - xfs_agblock_t bno, - xfs_extlen_t len) -{ - struct xfs_perag *pag; - struct rb_node *rbp; - struct xfs_busy_extent *busyp; - int match = 0; - - pag = xfs_perag_get(mp, agno); - spin_lock(&pag->pagb_lock); - - rbp = pag->pagb_tree.rb_node; - - /* find closest start bno overlap */ - while (rbp) { - busyp = rb_entry(rbp, struct xfs_busy_extent, rb_node); - if (bno < busyp->bno) { - /* may overlap, but exact start block is lower */ - if (bno + len > busyp->bno) - match = -1; - rbp = rbp->rb_left; - } else if (bno > busyp->bno) { - /* may overlap, but exact start block is higher */ - if (bno < busyp->bno + busyp->length) - match = -1; - rbp = rbp->rb_right; - } else { - /* bno matches busyp, length determines exact match */ - match = (busyp->length == len) ? 1 : -1; - break; - } - } - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); - return match; -} - -/* - * The found free extent [fbno, fend] overlaps part or all of the given busy - * extent. If the overlap covers the beginning, the end, or all of the busy - * extent, the overlapping portion can be made unbusy and used for the - * allocation. We can't split a busy extent because we can't modify a - * transaction/CIL context busy list, but we can update an entries block - * number or length. - * - * Returns true if the extent can safely be reused, or false if the search - * needs to be restarted. - */ -STATIC bool -xfs_alloc_busy_update_extent( - struct xfs_mount *mp, - struct xfs_perag *pag, - struct xfs_busy_extent *busyp, - xfs_agblock_t fbno, - xfs_extlen_t flen, - bool userdata) -{ - xfs_agblock_t fend = fbno + flen; - xfs_agblock_t bbno = busyp->bno; - xfs_agblock_t bend = bbno + busyp->length; - - /* - * This extent is currently being discarded. Give the thread - * performing the discard a chance to mark the extent unbusy - * and retry. - */ - if (busyp->flags & XFS_ALLOC_BUSY_DISCARDED) { - spin_unlock(&pag->pagb_lock); - delay(1); - spin_lock(&pag->pagb_lock); - return false; - } - - /* - * If there is a busy extent overlapping a user allocation, we have - * no choice but to force the log and retry the search. - * - * Fortunately this does not happen during normal operation, but - * only if the filesystem is very low on space and has to dip into - * the AGFL for normal allocations. - */ - if (userdata) - goto out_force_log; - - if (bbno < fbno && bend > fend) { - /* - * Case 1: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +---------+ - * fbno fend - */ - - /* - * We would have to split the busy extent to be able to track - * it correct, which we cannot do because we would have to - * modify the list of busy extents attached to the transaction - * or CIL context, which is immutable. - * - * Force out the log to clear the busy extent and retry the - * search. - */ - goto out_force_log; - } else if (bbno >= fbno && bend <= fend) { - /* - * Case 2: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +-----------------+ - * fbno fend - * - * Case 3: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +--------------------------+ - * fbno fend - * - * Case 4: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +--------------------------+ - * fbno fend - * - * Case 5: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +-----------------------------------+ - * fbno fend - * - */ - - /* - * The busy extent is fully covered by the extent we are - * allocating, and can simply be removed from the rbtree. - * However we cannot remove it from the immutable list - * tracking busy extents in the transaction or CIL context, - * so set the length to zero to mark it invalid. - * - * We also need to restart the busy extent search from the - * tree root, because erasing the node can rearrange the - * tree topology. - */ - rb_erase(&busyp->rb_node, &pag->pagb_tree); - busyp->length = 0; - return false; - } else if (fend < bend) { - /* - * Case 6: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +---------+ - * fbno fend - * - * Case 7: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +------------------+ - * fbno fend - * - */ - busyp->bno = fend; - } else if (bbno < fbno) { - /* - * Case 8: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +-------------+ - * fbno fend - * - * Case 9: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +----------------------+ - * fbno fend - */ - busyp->length = fbno - busyp->bno; - } else { - ASSERT(0); - } - - trace_xfs_alloc_busy_reuse(mp, pag->pag_agno, fbno, flen); - return true; - -out_force_log: - spin_unlock(&pag->pagb_lock); - xfs_log_force(mp, XFS_LOG_SYNC); - trace_xfs_alloc_busy_force(mp, pag->pag_agno, fbno, flen); - spin_lock(&pag->pagb_lock); - return false; -} - - -/* - * For a given extent [fbno, flen], make sure we can reuse it safely. - */ -void -xfs_alloc_busy_reuse( - struct xfs_mount *mp, - xfs_agnumber_t agno, - xfs_agblock_t fbno, - xfs_extlen_t flen, - bool userdata) -{ - struct xfs_perag *pag; - struct rb_node *rbp; - - ASSERT(flen > 0); - - pag = xfs_perag_get(mp, agno); - spin_lock(&pag->pagb_lock); -restart: - rbp = pag->pagb_tree.rb_node; - while (rbp) { - struct xfs_busy_extent *busyp = - rb_entry(rbp, struct xfs_busy_extent, rb_node); - xfs_agblock_t bbno = busyp->bno; - xfs_agblock_t bend = bbno + busyp->length; - - if (fbno + flen <= bbno) { - rbp = rbp->rb_left; - continue; - } else if (fbno >= bend) { - rbp = rbp->rb_right; - continue; - } - - if (!xfs_alloc_busy_update_extent(mp, pag, busyp, fbno, flen, - userdata)) - goto restart; - } - spin_unlock(&pag->pagb_lock); - xfs_perag_put(pag); -} - -/* - * For a given extent [fbno, flen], search the busy extent list to find a - * subset of the extent that is not busy. If *rlen is smaller than - * args->minlen no suitable extent could be found, and the higher level - * code needs to force out the log and retry the allocation. - */ -STATIC void -xfs_alloc_busy_trim( - struct xfs_alloc_arg *args, - xfs_agblock_t bno, - xfs_extlen_t len, - xfs_agblock_t *rbno, - xfs_extlen_t *rlen) -{ - xfs_agblock_t fbno; - xfs_extlen_t flen; - struct rb_node *rbp; - - ASSERT(len > 0); - - spin_lock(&args->pag->pagb_lock); -restart: - fbno = bno; - flen = len; - rbp = args->pag->pagb_tree.rb_node; - while (rbp && flen >= args->minlen) { - struct xfs_busy_extent *busyp = - rb_entry(rbp, struct xfs_busy_extent, rb_node); - xfs_agblock_t fend = fbno + flen; - xfs_agblock_t bbno = busyp->bno; - xfs_agblock_t bend = bbno + busyp->length; - - if (fend <= bbno) { - rbp = rbp->rb_left; - continue; - } else if (fbno >= bend) { - rbp = rbp->rb_right; - continue; - } - - /* - * If this is a metadata allocation, try to reuse the busy - * extent instead of trimming the allocation. - */ - if (!args->userdata && - !(busyp->flags & XFS_ALLOC_BUSY_DISCARDED)) { - if (!xfs_alloc_busy_update_extent(args->mp, args->pag, - busyp, fbno, flen, - false)) - goto restart; - continue; - } - - if (bbno <= fbno) { - /* start overlap */ - - /* - * Case 1: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +---------+ - * fbno fend - * - * Case 2: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +-------------+ - * fbno fend - * - * Case 3: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +-------------+ - * fbno fend - * - * Case 4: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +-----------------+ - * fbno fend - * - * No unbusy region in extent, return failure. - */ - if (fend <= bend) - goto fail; - - /* - * Case 5: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +----------------------+ - * fbno fend - * - * Case 6: - * bbno bend - * +BBBBBBBBBBBBBBBBB+ - * +--------------------------+ - * fbno fend - * - * Needs to be trimmed to: - * +-------+ - * fbno fend - */ - fbno = bend; - } else if (bend >= fend) { - /* end overlap */ - - /* |