aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_buf_item.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_buf_item.c')
-rw-r--r--fs/xfs/xfs_buf_item.c324
1 files changed, 185 insertions, 139 deletions
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index cf263476d6b..4654338b03f 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -17,17 +17,18 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_types.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
+#include "xfs_trans.h"
#include "xfs_buf_item.h"
#include "xfs_trans_priv.h"
#include "xfs_error.h"
#include "xfs_trace.h"
+#include "xfs_log.h"
kmem_zone_t *xfs_buf_item_zone;
@@ -39,6 +40,14 @@ static inline struct xfs_buf_log_item *BUF_ITEM(struct xfs_log_item *lip)
STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
+static inline int
+xfs_buf_log_format_size(
+ struct xfs_buf_log_format *blfp)
+{
+ return offsetof(struct xfs_buf_log_format, blf_data_map) +
+ (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+}
+
/*
* This returns the number of log iovecs needed to log the
* given buf log item.
@@ -49,25 +58,27 @@ STATIC void xfs_buf_do_callbacks(struct xfs_buf *bp);
*
* If the XFS_BLI_STALE flag has been set, then log nothing.
*/
-STATIC uint
+STATIC void
xfs_buf_item_size_segment(
struct xfs_buf_log_item *bip,
- struct xfs_buf_log_format *blfp)
+ struct xfs_buf_log_format *blfp,
+ int *nvecs,
+ int *nbytes)
{
struct xfs_buf *bp = bip->bli_buf;
- uint nvecs;
int next_bit;
int last_bit;
last_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
if (last_bit == -1)
- return 0;
+ return;
/*
* initial count for a dirty buffer is 2 vectors - the format structure
* and the first dirty region.
*/
- nvecs = 2;
+ *nvecs += 2;
+ *nbytes += xfs_buf_log_format_size(blfp) + XFS_BLF_CHUNK;
while (last_bit != -1) {
/*
@@ -87,18 +98,17 @@ xfs_buf_item_size_segment(
break;
} else if (next_bit != last_bit + 1) {
last_bit = next_bit;
- nvecs++;
+ (*nvecs)++;
} else if (xfs_buf_offset(bp, next_bit * XFS_BLF_CHUNK) !=
(xfs_buf_offset(bp, last_bit * XFS_BLF_CHUNK) +
XFS_BLF_CHUNK)) {
last_bit = next_bit;
- nvecs++;
+ (*nvecs)++;
} else {
last_bit++;
}
+ *nbytes += XFS_BLF_CHUNK;
}
-
- return nvecs;
}
/*
@@ -118,12 +128,13 @@ xfs_buf_item_size_segment(
* If the XFS_BLI_STALE flag has been set, then log nothing but the buf log
* format structures.
*/
-STATIC uint
+STATIC void
xfs_buf_item_size(
- struct xfs_log_item *lip)
+ struct xfs_log_item *lip,
+ int *nvecs,
+ int *nbytes)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
- uint nvecs;
int i;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
@@ -135,11 +146,26 @@ xfs_buf_item_size(
*/
trace_xfs_buf_item_size_stale(bip);
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
- return bip->bli_format_count;
+ *nvecs += bip->bli_format_count;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ *nbytes += xfs_buf_log_format_size(&bip->bli_formats[i]);
+ }
+ return;
}
ASSERT(bip->bli_flags & XFS_BLI_LOGGED);
+ if (bip->bli_flags & XFS_BLI_ORDERED) {
+ /*
+ * The buffer has been logged just to order it.
+ * It is not being included in the transaction
+ * commit, so no vectors are used at all.
+ */
+ trace_xfs_buf_item_size_ordered(bip);
+ *nvecs = XFS_LOG_VEC_ORDERED;
+ return;
+ }
+
/*
* the vector count is based on the number of buffer vectors we have
* dirty bits in. This will only be greater than one when we have a
@@ -149,30 +175,54 @@ xfs_buf_item_size(
* count for the extra buf log format structure that will need to be
* written.
*/
- nvecs = 0;
for (i = 0; i < bip->bli_format_count; i++) {
- nvecs += xfs_buf_item_size_segment(bip, &bip->bli_formats[i]);
+ xfs_buf_item_size_segment(bip, &bip->bli_formats[i],
+ nvecs, nbytes);
}
-
trace_xfs_buf_item_size(bip);
- return nvecs;
}
-static struct xfs_log_iovec *
+static inline void
+xfs_buf_item_copy_iovec(
+ struct xfs_log_vec *lv,
+ struct xfs_log_iovec **vecp,
+ struct xfs_buf *bp,
+ uint offset,
+ int first_bit,
+ uint nbits)
+{
+ offset += first_bit * XFS_BLF_CHUNK;
+ xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BCHUNK,
+ xfs_buf_offset(bp, offset),
+ nbits * XFS_BLF_CHUNK);
+}
+
+static inline bool
+xfs_buf_item_straddle(
+ struct xfs_buf *bp,
+ uint offset,
+ int next_bit,
+ int last_bit)
+{
+ return xfs_buf_offset(bp, offset + (next_bit << XFS_BLF_SHIFT)) !=
+ (xfs_buf_offset(bp, offset + (last_bit << XFS_BLF_SHIFT)) +
+ XFS_BLF_CHUNK);
+}
+
+static void
xfs_buf_item_format_segment(
struct xfs_buf_log_item *bip,
- struct xfs_log_iovec *vecp,
+ struct xfs_log_vec *lv,
+ struct xfs_log_iovec **vecp,
uint offset,
struct xfs_buf_log_format *blfp)
{
struct xfs_buf *bp = bip->bli_buf;
uint base_size;
- uint nvecs;
int first_bit;
int last_bit;
int next_bit;
uint nbits;
- uint buffer_offset;
/* copy the flags across from the base format item */
blfp->blf_flags = bip->__bli_format.blf_flags;
@@ -182,24 +232,19 @@ xfs_buf_item_format_segment(
* the actual size of the dirty bitmap rather than the size of the in
* memory structure.
*/
- base_size = offsetof(struct xfs_buf_log_format, blf_data_map) +
- (blfp->blf_map_size * sizeof(blfp->blf_data_map[0]));
+ base_size = xfs_buf_log_format_size(blfp);
- nvecs = 0;
first_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size, 0);
if (!(bip->bli_flags & XFS_BLI_STALE) && first_bit == -1) {
/*
* If the map is not be dirty in the transaction, mark
* the size as zero and do not advance the vector pointer.
*/
- goto out;
+ return;
}
- vecp->i_addr = blfp;
- vecp->i_len = base_size;
- vecp->i_type = XLOG_REG_TYPE_BFORMAT;
- vecp++;
- nvecs = 1;
+ blfp = xlog_copy_iovec(lv, vecp, XLOG_REG_TYPE_BFORMAT, blfp, base_size);
+ blfp->blf_size = 1;
if (bip->bli_flags & XFS_BLI_STALE) {
/*
@@ -209,13 +254,13 @@ xfs_buf_item_format_segment(
*/
trace_xfs_buf_item_format_stale(bip);
ASSERT(blfp->blf_flags & XFS_BLF_CANCEL);
- goto out;
+ return;
}
+
/*
* Fill in an iovec for each set of contiguous chunks.
*/
-
last_bit = first_bit;
nbits = 1;
for (;;) {
@@ -228,47 +273,22 @@ xfs_buf_item_format_segment(
next_bit = xfs_next_bit(blfp->blf_data_map, blfp->blf_map_size,
(uint)last_bit + 1);
/*
- * If we run out of bits fill in the last iovec and get
- * out of the loop.
- * Else if we start a new set of bits then fill in the
- * iovec for the series we were looking at and start
- * counting the bits in the new one.
- * Else we're still in the same set of bits so just
- * keep counting and scanning.
+ * If we run out of bits fill in the last iovec and get out of
+ * the loop. Else if we start a new set of bits then fill in
+ * the iovec for the series we were looking at and start
+ * counting the bits in the new one. Else we're still in the
+ * same set of bits so just keep counting and scanning.
*/
if (next_bit == -1) {
- buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
- vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
- vecp->i_len = nbits * XFS_BLF_CHUNK;
- vecp->i_type = XLOG_REG_TYPE_BCHUNK;
- nvecs++;
+ xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
+ first_bit, nbits);
+ blfp->blf_size++;
break;
- } else if (next_bit != last_bit + 1) {
- buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
- vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
- vecp->i_len = nbits * XFS_BLF_CHUNK;
- vecp->i_type = XLOG_REG_TYPE_BCHUNK;
- nvecs++;
- vecp++;
- first_bit = next_bit;
- last_bit = next_bit;
- nbits = 1;
- } else if (xfs_buf_offset(bp, offset +
- (next_bit << XFS_BLF_SHIFT)) !=
- (xfs_buf_offset(bp, offset +
- (last_bit << XFS_BLF_SHIFT)) +
- XFS_BLF_CHUNK)) {
- buffer_offset = offset + first_bit * XFS_BLF_CHUNK;
- vecp->i_addr = xfs_buf_offset(bp, buffer_offset);
- vecp->i_len = nbits * XFS_BLF_CHUNK;
- vecp->i_type = XLOG_REG_TYPE_BCHUNK;
-/*
- * You would think we need to bump the nvecs here too, but we do not
- * this number is used by recovery, and it gets confused by the boundary
- * split here
- * nvecs++;
- */
- vecp++;
+ } else if (next_bit != last_bit + 1 ||
+ xfs_buf_item_straddle(bp, offset, next_bit, last_bit)) {
+ xfs_buf_item_copy_iovec(lv, vecp, bp, offset,
+ first_bit, nbits);
+ blfp->blf_size++;
first_bit = next_bit;
last_bit = next_bit;
nbits = 1;
@@ -277,9 +297,6 @@ xfs_buf_item_format_segment(
nbits++;
}
}
-out:
- blfp->blf_size = nvecs;
- return vecp;
}
/*
@@ -291,10 +308,11 @@ out:
STATIC void
xfs_buf_item_format(
struct xfs_log_item *lip,
- struct xfs_log_iovec *vecp)
+ struct xfs_log_vec *lv)
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
+ struct xfs_log_iovec *vecp = NULL;
uint offset = 0;
int i;
@@ -304,21 +322,39 @@ xfs_buf_item_format(
/*
* If it is an inode buffer, transfer the in-memory state to the
- * format flags and clear the in-memory state. We do not transfer
+ * format flags and clear the in-memory state.
+ *
+ * For buffer based inode allocation, we do not transfer
* this state if the inode buffer allocation has not yet been committed
* to the log as setting the XFS_BLI_INODE_BUF flag will prevent
* correct replay of the inode allocation.
+ *
+ * For icreate item based inode allocation, the buffers aren't written
+ * to the journal during allocation, and hence we should always tag the
+ * buffer as an inode buffer so that the correct unlinked list replay
+ * occurs during recovery.
*/
if (bip->bli_flags & XFS_BLI_INODE_BUF) {
- if (!((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
+ if (xfs_sb_version_hascrc(&lip->li_mountp->m_sb) ||
+ !((bip->bli_flags & XFS_BLI_INODE_ALLOC_BUF) &&
xfs_log_item_in_current_chkpt(lip)))
bip->__bli_format.blf_flags |= XFS_BLF_INODE_BUF;
bip->bli_flags &= ~XFS_BLI_INODE_BUF;
}
+ if ((bip->bli_flags & (XFS_BLI_ORDERED|XFS_BLI_STALE)) ==
+ XFS_BLI_ORDERED) {
+ /*
+ * The buffer has been logged just to order it. It is not being
+ * included in the transaction commit, so don't format it.
+ */
+ trace_xfs_buf_item_format_ordered(bip);
+ return;
+ }
+
for (i = 0; i < bip->bli_format_count; i++) {
- vecp = xfs_buf_item_format_segment(bip, vecp, offset,
- &bip->bli_formats[i]);
+ xfs_buf_item_format_segment(bip, lv, &vecp, offset,
+ &bip->bli_formats[i]);
offset += bp->b_maps[i].bm_len;
}
@@ -345,6 +381,7 @@ xfs_buf_item_pin(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
ASSERT((bip->bli_flags & XFS_BLI_LOGGED) ||
+ (bip->bli_flags & XFS_BLI_ORDERED) ||
(bip->bli_flags & XFS_BLI_STALE));
trace_xfs_buf_item_pin(bip);
@@ -458,6 +495,14 @@ xfs_buf_item_unpin(
}
}
+/*
+ * Buffer IO error rate limiting. Limit it to no more than 10 messages per 30
+ * seconds so as to not spam logs too much on repeated detection of the same
+ * buffer being bad..
+ */
+
+DEFINE_RATELIMIT_STATE(xfs_buf_write_fail_rl_state, 30 * HZ, 10);
+
STATIC uint
xfs_buf_item_push(
struct xfs_log_item *lip,
@@ -486,6 +531,14 @@ xfs_buf_item_push(
trace_xfs_buf_item_push(bip);
+ /* has a previous flush failed due to IO errors? */
+ if ((bp->b_flags & XBF_WRITE_FAIL) &&
+ ___ratelimit(&xfs_buf_write_fail_rl_state, "XFS:")) {
+ xfs_warn(bp->b_target->bt_mount,
+"Detected failing async write on buffer block 0x%llx. Retrying async write.\n",
+ (long long)bp->b_bn);
+ }
+
if (!xfs_buf_delwri_queue(bp, buffer_list))
rval = XFS_ITEM_FLUSHING;
xfs_buf_unlock(bp);
@@ -517,8 +570,9 @@ xfs_buf_item_unlock(
{
struct xfs_buf_log_item *bip = BUF_ITEM(lip);
struct xfs_buf *bp = bip->bli_buf;
- int aborted, clean, i;
- uint hold;
+ bool clean;
+ bool aborted;
+ int flags;
/* Clear the buffer's association with this transaction. */
bp->b_transp = NULL;
@@ -529,23 +583,21 @@ xfs_buf_item_unlock(
* (cancelled) buffers at unpin time, but we'll never go through the
* pin/unpin cycle if we abort inside commit.
*/
- aborted = (lip->li_flags & XFS_LI_ABORTED) != 0;
-
+ aborted = (lip->li_flags & XFS_LI_ABORTED) ? true : false;
/*
- * Before possibly freeing the buf item, determine if we should
- * release the buffer at the end of this routine.
+ * Before possibly freeing the buf item, copy the per-transaction state
+ * so we can reference it safely later after clearing it from the
+ * buffer log item.
*/
- hold = bip->bli_flags & XFS_BLI_HOLD;
-
- /* Clear the per transaction state. */
- bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD);
+ flags = bip->bli_flags;
+ bip->bli_flags &= ~(XFS_BLI_LOGGED | XFS_BLI_HOLD | XFS_BLI_ORDERED);
/*
* If the buf item is marked stale, then don't do anything. We'll
* unlock the buffer and free the buf item when the buffer is unpinned
* for the last time.
*/
- if (bip->bli_flags & XFS_BLI_STALE) {
+ if (flags & XFS_BLI_STALE) {
trace_xfs_buf_item_unlock_stale(bip);
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
if (!aborted) {
@@ -562,26 +614,45 @@ xfs_buf_item_unlock(
* be the only reference to the buf item, so we free it anyway
* regardless of whether it is dirty or not. A dirty abort implies a
* shutdown, anyway.
+ *
+ * Ordered buffers are dirty but may have no recorded changes, so ensure
+ * we only release clean items here.
*/
- clean = 1;
- for (i = 0; i < bip->bli_format_count; i++) {
- if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
- bip->bli_formats[i].blf_map_size)) {
- clean = 0;
- break;
+ clean = (flags & XFS_BLI_DIRTY) ? false : true;
+ if (clean) {
+ int i;
+ for (i = 0; i < bip->bli_format_count; i++) {
+ if (!xfs_bitmap_empty(bip->bli_formats[i].blf_data_map,
+ bip->bli_formats[i].blf_map_size)) {
+ clean = false;
+ break;
+ }
}
}
- if (clean)
- xfs_buf_item_relse(bp);
- else if (aborted) {
- if (atomic_dec_and_test(&bip->bli_refcount)) {
+
+ /*
+ * Clean buffers, by definition, cannot be in the AIL. However, aborted
+ * buffers may be dirty and hence in the AIL. Therefore if we are
+ * aborting a buffer and we've just taken the last refernce away, we
+ * have to check if it is in the AIL before freeing it. We need to free
+ * it in this case, because an aborted transaction has already shut the
+ * filesystem down and this is the last chance we will have to do so.
+ */
+ if (atomic_dec_and_test(&bip->bli_refcount)) {
+ if (clean)
+ xfs_buf_item_relse(bp);
+ else if (aborted) {
ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+ if (lip->li_flags & XFS_LI_IN_AIL) {
+ spin_lock(&lip->li_ailp->xa_lock);
+ xfs_trans_ail_delete(lip->li_ailp, lip,
+ SHUTDOWN_LOG_IO_ERROR);
+ }
xfs_buf_item_relse(bp);
}
- } else
- atomic_dec(&bip->bli_refcount);
+ }
- if (!hold)
+ if (!(flags & XFS_BLI_HOLD))
xfs_buf_relse(bp);
}
@@ -725,20 +796,6 @@ xfs_buf_item_init(
bip->bli_formats[i].blf_map_size = map_size;
}
-#ifdef XFS_TRANS_DEBUG
- /*
- * Allocate the arrays for tracking what needs to be logged
- * and what our callers request to be logged. bli_orig
- * holds a copy of the original, clean buffer for comparison
- * against, and bli_logged keeps a 1 bit flag per byte in
- * the buffer to indicate which bytes the callers have asked
- * to have logged.
- */
- bip->bli_orig = kmem_alloc(BBTOB(bp->b_length), KM_SLEEP);
- memcpy(bip->bli_orig, bp->b_addr, BBTOB(bp->b_length));
- bip->bli_logged = kmem_zalloc(BBTOB(bp->b_length) / NBBY, KM_SLEEP);
-#endif
-
/*
* Put the buf item into the list of items attached to the
* buffer at the front.
@@ -753,9 +810,8 @@ xfs_buf_item_init(
* Mark bytes first through last inclusive as dirty in the buf
* item's bitmap.
*/
-void
+static void
xfs_buf_item_log_segment(
- struct xfs_buf_log_item *bip,
uint first,
uint last,
uint *map)
@@ -847,12 +903,6 @@ xfs_buf_item_log(
struct xfs_buf *bp = bip->bli_buf;
/*
- * Mark the item as having some dirty data for
- * quick reference in xfs_buf_item_dirty.
- */
- bip->bli_flags |= XFS_BLI_DIRTY;
-
- /*
* walk each buffer segment and mark them dirty appropriately.
*/
start = 0;
@@ -869,7 +919,7 @@ xfs_buf_item_log(
if (end > last)
end = last;
- xfs_buf_item_log_segment(bip, first, end,
+ xfs_buf_item_log_segment(first, end,
&bip->bli_formats[i].blf_data_map[0]);
start += bp->b_maps[i].bm_len;
@@ -878,7 +928,7 @@ xfs_buf_item_log(
/*
- * Return 1 if the buffer has some data that has been logged (at any
+ * Return 1 if the buffer has been logged or ordered in a transaction (at any
* point, not just the current transaction) and 0 if not.
*/
uint
@@ -892,11 +942,6 @@ STATIC void
xfs_buf_item_free(
xfs_buf_log_item_t *bip)
{
-#ifdef XFS_TRANS_DEBUG
- kmem_free(bip->bli_orig);
- kmem_free(bip->bli_logged);
-#endif /* XFS_TRANS_DEBUG */
-
xfs_buf_item_free_format(bip);
kmem_zone_free(xfs_buf_item_zone, bip);
}
@@ -912,11 +957,11 @@ void
xfs_buf_item_relse(
xfs_buf_t *bp)
{
- xfs_buf_log_item_t *bip;
+ xfs_buf_log_item_t *bip = bp->b_fspriv;
trace_xfs_buf_item_relse(bp, _RET_IP_);
+ ASSERT(!(bip->bli_item.li_flags & XFS_LI_IN_AIL));
- bip = bp->b_fspriv;
bp->b_fspriv = bip->bli_item.li_bio_list;
if (bp->b_fspriv == NULL)
bp->b_iodone = NULL;
@@ -1007,7 +1052,7 @@ xfs_buf_iodone_callbacks(
static ulong lasttime;
static xfs_buftarg_t *lasttarg;
- if (likely(!xfs_buf_geterror(bp)))
+ if (likely(!bp->b_error))
goto do_callbacks;
/*
@@ -1046,8 +1091,9 @@ xfs_buf_iodone_callbacks(
xfs_buf_ioerror(bp, 0); /* errno of 0 unsets the flag */
- if (!XFS_BUF_ISSTALE(bp)) {
- bp->b_flags |= XBF_WRITE | XBF_ASYNC | XBF_DONE;
+ if (!(bp->b_flags & (XBF_STALE|XBF_WRITE_FAIL))) {
+ bp->b_flags |= XBF_WRITE | XBF_ASYNC |
+ XBF_DONE | XBF_WRITE_FAIL;
xfs_buf_iorequest(bp);
} else {
xfs_buf_relse(bp);