aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_trans_buf.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_trans_buf.c')
-rw-r--r--fs/xfs/xfs_trans_buf.c521
1 files changed, 263 insertions, 258 deletions
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index c47918c302a..b8eef0549f3 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -17,23 +17,18 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
+#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
#include "xfs_error.h"
-#include "xfs_rw.h"
#include "xfs_trace.h"
/*
@@ -44,20 +39,26 @@ STATIC struct xfs_buf *
xfs_trans_buf_item_match(
struct xfs_trans *tp,
struct xfs_buftarg *target,
- xfs_daddr_t blkno,
- int len)
+ struct xfs_buf_map *map,
+ int nmaps)
{
struct xfs_log_item_desc *lidp;
struct xfs_buf_log_item *blip;
+ int len = 0;
+ int i;
+
+ for (i = 0; i < nmaps; i++)
+ len += map[i].bm_len;
- len = BBTOB(len);
list_for_each_entry(lidp, &tp->t_items, lid_trans) {
blip = (struct xfs_buf_log_item *)lidp->lid_item;
if (blip->bli_item.li_type == XFS_LI_BUF &&
- XFS_BUF_TARGET(blip->bli_buf) == target &&
- XFS_BUF_ADDR(blip->bli_buf) == blkno &&
- XFS_BUF_COUNT(blip->bli_buf) == len)
+ blip->bli_buf->b_target == target &&
+ XFS_BUF_ADDR(blip->bli_buf) == map[0].bm_bn &&
+ blip->bli_buf->b_length == len) {
+ ASSERT(blip->bli_buf->b_map_count == nmaps);
return blip->bli_buf;
+ }
}
return NULL;
@@ -80,8 +81,7 @@ _xfs_trans_bjoin(
{
struct xfs_buf_log_item *bip;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
+ ASSERT(bp->b_transp == NULL);
/*
* The xfs_buf_log_item pointer is stored in b_fsprivate. If
@@ -89,9 +89,9 @@ _xfs_trans_bjoin(
* The checks to see if one is there are in xfs_buf_item_init().
*/
xfs_buf_item_init(bp, tp->t_mountp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ bip = bp->b_fspriv;
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(!(bip->bli_flags & XFS_BLI_LOGGED));
if (reset_recur)
bip->bli_recur = 0;
@@ -110,7 +110,7 @@ _xfs_trans_bjoin(
* Initialize b_fsprivate2 so we can find it with incore_match()
* in xfs_trans_get_buf() and friends above.
*/
- XFS_BUF_SET_FSPRIVATE2(bp, tp);
+ bp->b_transp = tp;
}
@@ -132,25 +132,19 @@ xfs_trans_bjoin(
* If the transaction pointer is NULL, make this just a normal
* get_buf() call.
*/
-xfs_buf_t *
-xfs_trans_get_buf(xfs_trans_t *tp,
- xfs_buftarg_t *target_dev,
- xfs_daddr_t blkno,
- int len,
- uint flags)
+struct xfs_buf *
+xfs_trans_get_buf_map(
+ struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
+ xfs_buf_flags_t flags)
{
xfs_buf_t *bp;
xfs_buf_log_item_t *bip;
- if (flags == 0)
- flags = XBF_LOCK | XBF_MAPPED;
-
- /*
- * Default to a normal get_buf() call if the tp is NULL.
- */
- if (tp == NULL)
- return xfs_buf_get(target_dev, blkno, len,
- flags | XBF_DONT_BLOCK);
+ if (!tp)
+ return xfs_buf_get_map(target, map, nmaps, flags);
/*
* If we find the buffer in the cache with this transaction
@@ -158,22 +152,16 @@ xfs_trans_get_buf(xfs_trans_t *tp,
* have it locked. In this case we just increment the lock
* recursion count and return the buffer to the caller.
*/
- bp = xfs_trans_buf_item_match(tp, target_dev, blkno, len);
+ bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
if (bp != NULL) {
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
- if (XFS_FORCED_SHUTDOWN(tp->t_mountp))
- XFS_BUF_SUPER_STALE(bp);
-
- /*
- * If the buffer is stale then it was binval'ed
- * since last read. This doesn't matter since the
- * caller isn't allowed to use the data anyway.
- */
- else if (XFS_BUF_ISSTALE(bp))
- ASSERT(!XFS_BUF_ISDELAYWRITE(bp));
+ ASSERT(xfs_buf_islocked(bp));
+ if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
+ xfs_buf_stale(bp);
+ XFS_BUF_DONE(bp);
+ }
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ bip = bp->b_fspriv;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -181,20 +169,12 @@ xfs_trans_get_buf(xfs_trans_t *tp,
return (bp);
}
- /*
- * We always specify the XBF_DONT_BLOCK flag within a transaction
- * so that get_buf does not try to push out a delayed write buffer
- * which might cause another transaction to take place (if the
- * buffer was delayed alloc). Such recursive transactions can
- * easily deadlock with our current transaction as well as cause
- * us to run out of stack space.
- */
- bp = xfs_buf_get(target_dev, blkno, len, flags | XBF_DONT_BLOCK);
+ bp = xfs_buf_get_map(target, map, nmaps, flags);
if (bp == NULL) {
return NULL;
}
- ASSERT(!XFS_BUF_GETERROR(bp));
+ ASSERT(!bp->b_error);
_xfs_trans_bjoin(tp, bp, 1);
trace_xfs_trans_get_buf(bp->b_fspriv);
@@ -232,8 +212,8 @@ xfs_trans_getsb(xfs_trans_t *tp,
* recursion count and return the buffer to the caller.
*/
bp = mp->m_sb_bp;
- if (XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp) {
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+ if (bp->b_transp == tp) {
+ bip = bp->b_fspriv;
ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_recur++;
@@ -268,36 +248,37 @@ int xfs_error_mod = 33;
* read_buf() call.
*/
int
-xfs_trans_read_buf(
- xfs_mount_t *mp,
- xfs_trans_t *tp,
- xfs_buftarg_t *target,
- xfs_daddr_t blkno,
- int len,
- uint flags,
- xfs_buf_t **bpp)
+xfs_trans_read_buf_map(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buftarg *target,
+ struct xfs_buf_map *map,
+ int nmaps,
+ xfs_buf_flags_t flags,
+ struct xfs_buf **bpp,
+ const struct xfs_buf_ops *ops)
{
xfs_buf_t *bp;
xfs_buf_log_item_t *bip;
int error;
- if (flags == 0)
- flags = XBF_LOCK | XBF_MAPPED;
-
- /*
- * Default to a normal get_buf() call if the tp is NULL.
- */
- if (tp == NULL) {
- bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
+ *bpp = NULL;
+ if (!tp) {
+ bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
if (!bp)
return (flags & XBF_TRYLOCK) ?
EAGAIN : XFS_ERROR(ENOMEM);
- if (XFS_BUF_GETERROR(bp) != 0) {
- xfs_ioerror_alert("xfs_trans_read_buf", mp,
- bp, blkno);
- error = XFS_BUF_GETERROR(bp);
+ if (bp->b_error) {
+ error = bp->b_error;
+ xfs_buf_ioerror_alert(bp, __func__);
+ XFS_BUF_UNDONE(bp);
+ xfs_buf_stale(bp);
xfs_buf_relse(bp);
+
+ /* bad CRC means corrupted metadata */
+ if (error == EFSBADCRC)
+ error = EFSCORRUPTED;
return error;
}
#ifdef DEBUG
@@ -305,7 +286,7 @@ xfs_trans_read_buf(
if (xfs_error_target == target) {
if (((xfs_req_num++) % xfs_error_mod) == 0) {
xfs_buf_relse(bp);
- cmn_err(CE_DEBUG, "Returning error!\n");
+ xfs_debug(mp, "Returning error!");
return XFS_ERROR(EIO);
}
}
@@ -325,21 +306,33 @@ xfs_trans_read_buf(
* If the buffer is not yet read in, then we read it in, increment
* the lock recursion count, and return it to the caller.
*/
- bp = xfs_trans_buf_item_match(tp, target, blkno, len);
+ bp = xfs_trans_buf_item_match(tp, target, map, nmaps);
if (bp != NULL) {
- ASSERT(XFS_BUF_VALUSEMA(bp) <= 0);
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
- ASSERT((XFS_BUF_ISERROR(bp)) == 0);
+ ASSERT(xfs_buf_islocked(bp));
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bp->b_fspriv != NULL);
+ ASSERT(!bp->b_error);
if (!(XFS_BUF_ISDONE(bp))) {
trace_xfs_trans_read_buf_io(bp, _RET_IP_);
ASSERT(!XFS_BUF_ISASYNC(bp));
+ ASSERT(bp->b_iodone == NULL);
XFS_BUF_READ(bp);
- xfsbdstrat(tp->t_mountp, bp);
+ bp->b_ops = ops;
+
+ /*
+ * XXX(hch): clean up the error handling here to be less
+ * of a mess..
+ */
+ if (XFS_FORCED_SHUTDOWN(mp)) {
+ trace_xfs_bdstrat_shut(bp, _RET_IP_);
+ xfs_bioerror_relse(bp);
+ } else {
+ xfs_buf_iorequest(bp);
+ }
+
error = xfs_buf_iowait(bp);
if (error) {
- xfs_ioerror_alert("xfs_trans_read_buf", mp,
- bp, blkno);
+ xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_relse(bp);
/*
* We can gracefully recover from most read
@@ -349,6 +342,9 @@ xfs_trans_read_buf(
if (tp->t_flags & XFS_TRANS_DIRTY)
xfs_force_shutdown(tp->t_mountp,
SHUTDOWN_META_IO_ERROR);
+ /* bad CRC means corrupted metadata */
+ if (error == EFSBADCRC)
+ error = EFSCORRUPTED;
return error;
}
}
@@ -363,7 +359,7 @@ xfs_trans_read_buf(
}
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t*);
+ bip = bp->b_fspriv;
bip->bli_recur++;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -372,28 +368,24 @@ xfs_trans_read_buf(
return 0;
}
- /*
- * We always specify the XBF_DONT_BLOCK flag within a transaction
- * so that get_buf does not try to push out a delayed write buffer
- * which might cause another transaction to take place (if the
- * buffer was delayed alloc). Such recursive transactions can
- * easily deadlock with our current transaction as well as cause
- * us to run out of stack space.
- */
- bp = xfs_buf_read(target, blkno, len, flags | XBF_DONT_BLOCK);
+ bp = xfs_buf_read_map(target, map, nmaps, flags, ops);
if (bp == NULL) {
*bpp = NULL;
- return 0;
+ return (flags & XBF_TRYLOCK) ?
+ 0 : XFS_ERROR(ENOMEM);
}
- if (XFS_BUF_GETERROR(bp) != 0) {
- XFS_BUF_SUPER_STALE(bp);
- error = XFS_BUF_GETERROR(bp);
-
- xfs_ioerror_alert("xfs_trans_read_buf", mp,
- bp, blkno);
+ if (bp->b_error) {
+ error = bp->b_error;
+ xfs_buf_stale(bp);
+ XFS_BUF_DONE(bp);
+ xfs_buf_ioerror_alert(bp, __func__);
if (tp->t_flags & XFS_TRANS_DIRTY)
xfs_force_shutdown(tp->t_mountp, SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
+
+ /* bad CRC means corrupted metadata */
+ if (error == EFSBADCRC)
+ error = EFSCORRUPTED;
return error;
}
#ifdef DEBUG
@@ -403,7 +395,7 @@ xfs_trans_read_buf(
xfs_force_shutdown(tp->t_mountp,
SHUTDOWN_META_IO_ERROR);
xfs_buf_relse(bp);
- cmn_err(CE_DEBUG, "Returning trans error!\n");
+ xfs_debug(mp, "Returning trans error!");
return XFS_ERROR(EIO);
}
}
@@ -419,26 +411,12 @@ xfs_trans_read_buf(
return 0;
shutdown_abort:
- /*
- * the theory here is that buffer is good but we're
- * bailing out because the filesystem is being forcibly
- * shut down. So we should leave the b_flags alone since
- * the buffer's not staled and just get out.
- */
-#if defined(DEBUG)
- if (XFS_BUF_ISSTALE(bp) && XFS_BUF_ISDELAYWRITE(bp))
- cmn_err(CE_NOTE, "about to pop assert, bp == 0x%p", bp);
-#endif
- ASSERT((XFS_BUF_BFLAGS(bp) & (XBF_STALE|XBF_DELWRI)) !=
- (XBF_STALE|XBF_DELWRI));
-
trace_xfs_trans_read_buf_shut(bp, _RET_IP_);
xfs_buf_relse(bp);
*bpp = NULL;
return XFS_ERROR(EIO);
}
-
/*
* Release the buffer bp which was previously acquired with one of the
* xfs_trans_... buffer allocation routines if the buffer has not
@@ -459,35 +437,21 @@ xfs_trans_brelse(xfs_trans_t *tp,
xfs_buf_t *bp)
{
xfs_buf_log_item_t *bip;
- xfs_log_item_t *lip;
/*
* Default to a normal brelse() call if the tp is NULL.
*/
if (tp == NULL) {
- ASSERT(XFS_BUF_FSPRIVATE2(bp, void *) == NULL);
- /*
- * If there's a buf log item attached to the buffer,
- * then let the AIL know that the buffer is being
- * unlocked.
- */
- if (XFS_BUF_FSPRIVATE(bp, void *) != NULL) {
- lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
- if (lip->li_type == XFS_LI_BUF) {
- bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*);
- xfs_trans_unlocked_item(bip->bli_item.li_ailp,
- lip);
- }
- }
+ ASSERT(bp->b_transp == NULL);
xfs_buf_relse(bp);
return;
}
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ bip = bp->b_fspriv;
ASSERT(bip->bli_item.li_type == XFS_LI_BUF);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_trans_brelse(bip);
@@ -553,26 +517,15 @@ xfs_trans_brelse(xfs_trans_t *tp,
ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
ASSERT(!(bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF));
xfs_buf_item_relse(bp);
- bip = NULL;
- }
- XFS_BUF_SET_FSPRIVATE2(bp, NULL);
-
- /*
- * If we've still got a buf log item on the buffer, then
- * tell the AIL that the buffer is being unlocked.
- */
- if (bip != NULL) {
- xfs_trans_unlocked_item(bip->bli_item.li_ailp,
- (xfs_log_item_t*)bip);
}
+ bp->b_transp = NULL;
xfs_buf_relse(bp);
- return;
}
/*
* Mark the buffer as not needing to be unlocked when the buf item's
- * IOP_UNLOCK() routine is called. The buffer must already be locked
+ * iop_unlock() routine is called. The buffer must already be locked
* and associated with the given transaction.
*/
/* ARGSUSED */
@@ -580,16 +533,14 @@ void
xfs_trans_bhold(xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
+
bip->bli_flags |= XFS_BLI_HOLD;
trace_xfs_trans_bhold(bip);
}
@@ -602,19 +553,16 @@ void
xfs_trans_bhold_release(xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(!(bip->bli_flags & XFS_BLI_STALE));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_CANCEL));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_CANCEL));
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT(bip->bli_flags & XFS_BLI_HOLD);
- bip->bli_flags &= ~XFS_BLI_HOLD;
+ bip->bli_flags &= ~XFS_BLI_HOLD;
trace_xfs_trans_bhold_release(bip);
}
@@ -633,14 +581,13 @@ xfs_trans_log_buf(xfs_trans_t *tp,
uint first,
uint last)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
- ASSERT((first <= last) && (last < XFS_BUF_COUNT(bp)));
- ASSERT((XFS_BUF_IODONE_FUNC(bp) == NULL) ||
- (XFS_BUF_IODONE_FUNC(bp) == xfs_buf_iodone_callbacks));
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
+ ASSERT(first <= last && last < BBTOB(bp->b_length));
+ ASSERT(bp->b_iodone == NULL ||
+ bp->b_iodone == xfs_buf_iodone_callbacks);
/*
* Mark the buffer as needing to be written out eventually,
@@ -652,12 +599,10 @@ xfs_trans_log_buf(xfs_trans_t *tp,
* inside the b_bdstrat callback so that this won't get written to
* disk.
*/
- XFS_BUF_DELAYWRITE(bp);
XFS_BUF_DONE(bp);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
- XFS_BUF_SET_IODONE_FUNC(bp, xfs_buf_iodone_callbacks);
+ bp->b_iodone = xfs_buf_iodone_callbacks;
bip->bli_item.li_cb = xfs_buf_iodone;
trace_xfs_trans_log_buf(bip);
@@ -672,46 +617,61 @@ xfs_trans_log_buf(xfs_trans_t *tp,
bip->bli_flags &= ~XFS_BLI_STALE;
ASSERT(XFS_BUF_ISSTALE(bp));
XFS_BUF_UNSTALE(bp);
- bip->bli_format.blf_flags &= ~XFS_BLF_CANCEL;
+ bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
}
tp->t_flags |= XFS_TRANS_DIRTY;
bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
- bip->bli_flags |= XFS_BLI_LOGGED;
- xfs_buf_item_log(bip, first, last);
+
+ /*
+ * If we have an ordered buffer we are not logging any dirty range but
+ * it still needs to be marked dirty and that it has been logged.
+ */
+ bip->bli_flags |= XFS_BLI_DIRTY | XFS_BLI_LOGGED;
+ if (!(bip->bli_flags & XFS_BLI_ORDERED))
+ xfs_buf_item_log(bip, first, last);
}
/*
- * This called to invalidate a buffer that is being used within
- * a transaction. Typically this is because the blocks in the
- * buffer are being freed, so we need to prevent it from being
- * written out when we're done. Allowing it to be written again
- * might overwrite data in the free blocks if they are reallocated
- * to a file.
+ * Invalidate a buffer that is being used within a transaction.
+ *
+ * Typically this is because the blocks in the buffer are being freed, so we
+ * need to prevent it from being written out when we're done. Allowing it
+ * to be written again might overwrite data in the free blocks if they are
+ * reallocated to a file.
+ *
+ * We prevent the buffer from being written out by marking it stale. We can't
+ * get rid of the buf log item at this point because the buffer may still be
+ * pinned by another transaction. If that is the case, then we'll wait until
+ * the buffer is committed to disk for the last time (we can tell by the ref
+ * count) and free it in xfs_buf_item_unpin(). Until that happens we will
+ * keep the buffer locked so that the buffer and buf log item are not reused.
+ *
+ * We also set the XFS_BLF_CANCEL flag in the buf log format structure and log
+ * the buf item. This will be used at recovery time to determine that copies
+ * of the buffer in the log before this should not be replayed.
*
- * We prevent the buffer from being written out by clearing the
- * B_DELWRI flag. We can't always
- * get rid of the buf log item at this point, though, because
- * the buffer may still be pinned by another transaction. If that
- * is the case, then we'll wait until the buffer is committed to
- * disk for the last time (we can tell by the ref count) and
- * free it in xfs_buf_item_unpin(). Until it is cleaned up we
- * will keep the buffer locked so that the buffer and buf log item
- * are not reused.
+ * We mark the item descriptor and the transaction dirty so that we'll hold
+ * the buffer until after the commit.
+ *
+ * Since we're invalidating the buffer, we also clear the state about which
+ * parts of the buffer have been logged. We also clear the flag indicating
+ * that this is an inode buffer since the data in the buffer will no longer
+ * be valid.
+ *
+ * We set the stale bit in the buffer as well since we're getting rid of it.
*/
void
xfs_trans_binval(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
+ int i;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
trace_xfs_trans_binval(bip);
@@ -721,44 +681,27 @@ xfs_trans_binval(
* If the buffer is already invalidated, then
* just return.
*/
- ASSERT(!(XFS_BUF_ISDELAYWRITE(bp)));
ASSERT(XFS_BUF_ISSTALE(bp));
ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
- ASSERT(!(bip->bli_format.blf_flags & XFS_BLF_INODE_BUF));
- ASSERT(bip->bli_format.blf_flags & XFS_BLF_CANCEL);
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
+ ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
+ ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
ASSERT(bip->bli_item.li_desc->lid_flags & XFS_LID_DIRTY);
ASSERT(tp->t_flags & XFS_TRANS_DIRTY);
return;
}
- /*
- * Clear the dirty bit in the buffer and set the STALE flag
- * in the buf log item. The STALE flag will be used in
- * xfs_buf_item_unpin() to determine if it should clean up
- * when the last reference to the buf item is given up.
- * We set the XFS_BLF_CANCEL flag in the buf log format structure
- * and log the buf item. This will be used at recovery time
- * to determine that copies of the buffer in the log before
- * this should not be replayed.
- * We mark the item descriptor and the transaction dirty so
- * that we'll hold the buffer until after the commit.
- *
- * Since we're invalidating the buffer, we also clear the state
- * about which parts of the buffer have been logged. We also
- * clear the flag indicating that this is an inode buffer since
- * the data in the buffer will no longer be valid.
- *
- * We set the stale bit in the buffer as well since we're getting
- * rid of it.
- */
- XFS_BUF_UNDELAYWRITE(bp);
- XFS_BUF_STALE(bp);
+ xfs_buf_stale(bp);
+
bip->bli_flags |= XFS_BLI_STALE;
bip->bli_flags &= ~(XFS_BLI_INODE_BUF | XFS_BLI_LOGGED | XFS_BLI_DIRTY);
- bip->bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
- bip->bli_format.blf_flags |= XFS_BLF_CANCEL;
- memset((char *)(bip->bli_format.blf_data_map), 0,
- (bip->bli_format.blf_map_size * sizeof(uint)));
+ bip->__bli_format.blf_flags &= ~XFS_BLF_INODE_BUF;
+ bip->__bli_format.blf_flags |= XFS_BLF_CANCEL;
+ bip->__bli_format.blf_flags &= ~XFS_BLFT_MASK;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ memset(bip->bli_formats[i].blf_data_map, 0,
+ (bip->bli_formats[i].blf_map_size * sizeof(uint)));
+ }
bip->bli_item.li_desc->lid_flags |= XFS_LID_DIRTY;
tp->t_flags |= XFS_TRANS_DIRTY;
}
@@ -779,22 +722,20 @@ xfs_trans_inode_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
-
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_BUF;
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
}
/*
* This call is used to indicate that the buffer is going to
* be staled and was an inode buffer. This means it gets
- * special processing during unpin - where any inodes
+ * special processing during unpin - where any inodes
* associated with the buffer should be removed from ail.
* There is also special processing during recovery,
* any replay of the inodes in the buffer needs to be
@@ -805,17 +746,15 @@ xfs_trans_stale_inode_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
-
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_STALE_INODE;
bip->bli_item.li_cb = xfs_buf_iodone;
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
}
/*
@@ -832,18 +771,73 @@ xfs_trans_inode_alloc_buf(
xfs_trans_t *tp,
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
-
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bip->bli_flags |= XFS_BLI_INODE_ALLOC_BUF;
+ xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF);
+}
+
+/*
+ * Mark the buffer as ordered for this transaction. This means
+ * that the contents of the buffer are not recorded in the transaction
+ * but it is tracked in the AIL as though it was. This allows us
+ * to record logical changes in transactions rather than the physical
+ * changes we make to the buffer without changing writeback ordering
+ * constraints of metadata buffers.
+ */
+void
+xfs_trans_ordered_buf(
+ struct xfs_trans *tp,
+ struct xfs_buf *bp)
+{
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+
+ bip->bli_flags |= XFS_BLI_ORDERED;
+ trace_xfs_buf_item_ordered(bip);
+}
+
+/*
+ * Set the type of the buffer for log recovery so that it can correctly identify
+ * and hence attach the correct buffer ops to the buffer after replay.
+ */
+void
+xfs_trans_buf_set_type(
+ struct xfs_trans *tp,
+ struct xfs_buf *bp,
+ enum xfs_blft type)
+{
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
+
+ if (!tp)
+ return;
+
+ ASSERT(bp->b_transp == tp);
+ ASSERT(bip != NULL);
+ ASSERT(atomic_read(&bip->bli_refcount) > 0);
+
+ xfs_blft_to_flags(&bip->__bli_format, type);
}
+void
+xfs_trans_buf_copy_type(
+ struct xfs_buf *dst_bp,
+ struct xfs_buf *src_bp)
+{
+ struct xfs_buf_log_item *sbip = src_bp->b_fspriv;
+ struct xfs_buf_log_item *dbip = dst_bp->b_fspriv;
+ enum xfs_blft type;
+
+ type = xfs_blft_from_flags(&sbip->__bli_format);
+ xfs_blft_to_flags(&dbip->__bli_format, type);
+}
/*
* Similar to xfs_trans_inode_buf(), this marks the buffer as a cluster of
@@ -862,17 +856,28 @@ xfs_trans_dquot_buf(
xfs_buf_t *bp,
uint type)
{
- xfs_buf_log_item_t *bip;
+ struct xfs_buf_log_item *bip = bp->b_fspriv;
- ASSERT(XFS_BUF_ISBUSY(bp));
- ASSERT(XFS_BUF_FSPRIVATE2(bp, xfs_trans_t *) == tp);
- ASSERT(XFS_BUF_FSPRIVATE(bp, void *) != NULL);
ASSERT(type == XFS_BLF_UDQUOT_BUF ||
type == XFS_BLF_PDQUOT_BUF ||
type == XFS_BLF_GDQUOT_BUF);
- bip = XFS_BUF_FSPRIVATE(bp, xfs_buf_log_item_t *);
- ASSERT(atomic_read(&bip->bli_refcount) > 0);
+ bip->__bli_format.blf_flags |= type;
+
+ switch (type) {
+ case XFS_BLF_UDQUOT_BUF:
+ type = XFS_BLFT_UDQUOT_BUF;
+ break;
+ case XFS_BLF_PDQUOT_BUF:
+ type = XFS_BLFT_PDQUOT_BUF;
+ break;
+ case XFS_BLF_GDQUOT_BUF:
+ type = XFS_BLFT_GDQUOT_BUF;
+ break;
+ default:
+ type = XFS_BLFT_UNKNOWN_BUF;
+ break;
+ }
- bip->bli_format.blf_flags |= type;
+ xfs_trans_buf_set_type(tp, bp, type);
}