aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log_cil.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
-rw-r--r--fs/xfs/xfs_log_cil.c762
1 files changed, 466 insertions, 296 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 23d6ceb5e97..b3425b34e3d 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -17,80 +17,20 @@
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
-#include "xfs_trans_priv.h"
-#include "xfs_log_priv.h"
+#include "xfs_log_format.h"
+#include "xfs_shared.h"
+#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_error.h"
#include "xfs_alloc.h"
-
-/*
- * Perform initial CIL structure initialisation. If the CIL is not
- * enabled in this filesystem, ensure the log->l_cilp is null so
- * we can check this conditional to determine if we are doing delayed
- * logging or not.
- */
-int
-xlog_cil_init(
- struct log *log)
-{
- struct xfs_cil *cil;
- struct xfs_cil_ctx *ctx;
-
- log->l_cilp = NULL;
- if (!(log->l_mp->m_flags & XFS_MOUNT_DELAYLOG))
- return 0;
-
- cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
- if (!cil)
- return ENOMEM;
-
- ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
- if (!ctx) {
- kmem_free(cil);
- return ENOMEM;
- }
-
- INIT_LIST_HEAD(&cil->xc_cil);
- INIT_LIST_HEAD(&cil->xc_committing);
- spin_lock_init(&cil->xc_cil_lock);
- init_rwsem(&cil->xc_ctx_lock);
- sv_init(&cil->xc_commit_wait, SV_DEFAULT, "cilwait");
-
- INIT_LIST_HEAD(&ctx->committing);
- INIT_LIST_HEAD(&ctx->busy_extents);
- ctx->sequence = 1;
- ctx->cil = cil;
- cil->xc_ctx = ctx;
- cil->xc_current_sequence = ctx->sequence;
-
- cil->xc_log = log;
- log->l_cilp = cil;
- return 0;
-}
-
-void
-xlog_cil_destroy(
- struct log *log)
-{
- if (!log->l_cilp)
- return;
-
- if (log->l_cilp->xc_ctx) {
- if (log->l_cilp->xc_ctx->ticket)
- xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
- kmem_free(log->l_cilp->xc_ctx);
- }
-
- ASSERT(list_empty(&log->l_cilp->xc_cil));
- kmem_free(log->l_cilp);
-}
+#include "xfs_extent_busy.h"
+#include "xfs_discard.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_log.h"
+#include "xfs_log_priv.h"
/*
* Allocate a new ticket. Failing to get a new ticket makes it really hard to
@@ -106,7 +46,7 @@ xlog_cil_destroy(
*/
static struct xlog_ticket *
xlog_cil_ticket_alloc(
- struct log *log)
+ struct xlog *log)
{
struct xlog_ticket *tic;
@@ -134,11 +74,8 @@ xlog_cil_ticket_alloc(
*/
void
xlog_cil_init_post_recovery(
- struct log *log)
+ struct xlog *log)
{
- if (!log->l_cilp)
- return;
-
log->l_cilp->xc_ctx->ticket = xlog_cil_ticket_alloc(log);
log->l_cilp->xc_ctx->sequence = 1;
log->l_cilp->xc_ctx->commit_lsn = xlog_assign_lsn(log->l_curr_cycle,
@@ -146,6 +83,53 @@ xlog_cil_init_post_recovery(
}
/*
+ * Prepare the log item for insertion into the CIL. Calculate the difference in
+ * log space and vectors it will consume, and if it is a new item pin it as
+ * well.
+ */
+STATIC void
+xfs_cil_prepare_item(
+ struct xlog *log,
+ struct xfs_log_vec *lv,
+ struct xfs_log_vec *old_lv,
+ int *diff_len,
+ int *diff_iovecs)
+{
+ /* Account for the new LV being passed in */
+ if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) {
+ *diff_len += lv->lv_bytes;
+ *diff_iovecs += lv->lv_niovecs;
+ }
+
+ /*
+ * If there is no old LV, this is the first time we've seen the item in
+ * this CIL context and so we need to pin it. If we are replacing the
+ * old_lv, then remove the space it accounts for and free it.
+ */
+ if (!old_lv)
+ lv->lv_item->li_ops->iop_pin(lv->lv_item);
+ else if (old_lv != lv) {
+ ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED);
+
+ *diff_len -= old_lv->lv_bytes;
+ *diff_iovecs -= old_lv->lv_niovecs;
+ kmem_free(old_lv);
+ }
+
+ /* attach new log vector to log item */
+ lv->lv_item->li_lv = lv;
+
+ /*
+ * If this is the first time the item is being committed to the
+ * CIL, store the sequence number on the log item so we can
+ * tell in future commits whether this is the first checkpoint
+ * the item is being committed into.
+ */
+ if (!lv->lv_item->li_seq)
+ lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+}
+
+/*
* Format log item into a flat buffers
*
* For delayed logging, we need to hold a formatted buffer containing all the
@@ -172,136 +156,171 @@ xlog_cil_init_post_recovery(
* directly out of the objects themselves.
*/
static void
-xlog_cil_format_items(
- struct log *log,
- struct xfs_log_vec *log_vector)
+xlog_cil_insert_format_items(
+ struct xlog *log,
+ struct xfs_trans *tp,
+ int *diff_len,
+ int *diff_iovecs)
{
- struct xfs_log_vec *lv;
+ struct xfs_log_item_desc *lidp;
- ASSERT(log_vector);
- for (lv = log_vector; lv; lv = lv->lv_next) {
- void *ptr;
- int index;
- int len = 0;
- /* build the vector array and calculate it's length */
- IOP_FORMAT(lv->lv_item, lv->lv_iovecp);
- for (index = 0; index < lv->lv_niovecs; index++)
- len += lv->lv_iovecp[index].i_len;
+ /* Bail out if we didn't find a log item. */
+ if (list_empty(&tp->t_items)) {
+ ASSERT(0);
+ return;
+ }
- lv->lv_buf_len = len;
- lv->lv_buf = kmem_alloc(lv->lv_buf_len, KM_SLEEP|KM_NOFS);
- ptr = lv->lv_buf;
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
+ struct xfs_log_vec *lv;
+ struct xfs_log_vec *old_lv;
+ int niovecs = 0;
+ int nbytes = 0;
+ int buf_size;
+ bool ordered = false;
+
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
- for (index = 0; index < lv->lv_niovecs; index++) {
- struct xfs_log_iovec *vec = &lv->lv_iovecp[index];
+ /* get number of vecs and size of data to be stored */
+ lip->li_ops->iop_size(lip, &niovecs, &nbytes);
+
+ /* Skip items that do not have any vectors for writing */
+ if (!niovecs)
+ continue;
- memcpy(ptr, vec->i_addr, vec->i_len);
- vec->i_addr = ptr;
- ptr += vec->i_len;
+ /*
+ * Ordered items need to be tracked but we do not wish to write
+ * them. We need a logvec to track the object, but we do not
+ * need an iovec or buffer to be allocated for copying data.
+ */
+ if (niovecs == XFS_LOG_VEC_ORDERED) {
+ ordered = true;
+ niovecs = 0;
+ nbytes = 0;
}
- ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
- }
-}
-/*
- * Prepare the log item for insertion into the CIL. Calculate the difference in
- * log space and vectors it will consume, and if it is a new item pin it as
- * well.
- */
-STATIC void
-xfs_cil_prepare_item(
- struct log *log,
- struct xfs_log_vec *lv,
- int *len,
- int *diff_iovecs)
-{
- struct xfs_log_vec *old = lv->lv_item->li_lv;
-
- if (old) {
- /* existing lv on log item, space used is a delta */
- ASSERT(!list_empty(&lv->lv_item->li_cil));
- ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs);
-
- *len += lv->lv_buf_len - old->lv_buf_len;
- *diff_iovecs += lv->lv_niovecs - old->lv_niovecs;
- kmem_free(old->lv_buf);
- kmem_free(old);
- } else {
- /* new lv, must pin the log item */
- ASSERT(!lv->lv_item->li_lv);
- ASSERT(list_empty(&lv->lv_item->li_cil));
-
- *len += lv->lv_buf_len;
- *diff_iovecs += lv->lv_niovecs;
- IOP_PIN(lv->lv_item);
+ /*
+ * We 64-bit align the length of each iovec so that the start
+ * of the next one is naturally aligned. We'll need to
+ * account for that slack space here. Then round nbytes up
+ * to 64-bit alignment so that the initial buffer alignment is
+ * easy to calculate and verify.
+ */
+ nbytes += niovecs * sizeof(uint64_t);
+ nbytes = round_up(nbytes, sizeof(uint64_t));
- }
+ /* grab the old item if it exists for reservation accounting */
+ old_lv = lip->li_lv;
- /* attach new log vector to log item */
- lv->lv_item->li_lv = lv;
+ /*
+ * The data buffer needs to start 64-bit aligned, so round up
+ * that space to ensure we can align it appropriately and not
+ * overrun the buffer.
+ */
+ buf_size = nbytes +
+ round_up((sizeof(struct xfs_log_vec) +
+ niovecs * sizeof(struct xfs_log_iovec)),
+ sizeof(uint64_t));
- /*
- * If this is the first time the item is being committed to the
- * CIL, store the sequence number on the log item so we can
- * tell in future commits whether this is the first checkpoint
- * the item is being committed into.
- */
- if (!lv->lv_item->li_seq)
- lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence;
+ /* compare to existing item size */
+ if (lip->li_lv && buf_size <= lip->li_lv->lv_size) {
+ /* same or smaller, optimise common overwrite case */
+ lv = lip->li_lv;
+ lv->lv_next = NULL;
+
+ if (ordered)
+ goto insert;
+
+ /*
+ * set the item up as though it is a new insertion so
+ * that the space reservation accounting is correct.
+ */
+ *diff_iovecs -= lv->lv_niovecs;
+ *diff_len -= lv->lv_bytes;
+ } else {
+ /* allocate new data chunk */
+ lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS);
+ lv->lv_item = lip;
+ lv->lv_size = buf_size;
+ if (ordered) {
+ /* track as an ordered logvec */
+ ASSERT(lip->li_lv == NULL);
+ lv->lv_buf_len = XFS_LOG_VEC_ORDERED;
+ goto insert;
+ }
+ lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1];
+ }
+
+ /* Ensure the lv is set up according to ->iop_size */
+ lv->lv_niovecs = niovecs;
+
+ /* The allocated data region lies beyond the iovec region */
+ lv->lv_buf_len = 0;
+ lv->lv_bytes = 0;
+ lv->lv_buf = (char *)lv + buf_size - nbytes;
+ ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t)));
+
+ lip->li_ops->iop_format(lip, lv);
+insert:
+ ASSERT(lv->lv_buf_len <= nbytes);
+ xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs);
+ }
}
/*
* Insert the log items into the CIL and calculate the difference in space
* consumed by the item. Add the space to the checkpoint ticket and calculate
* if the change requires additional log metadata. If it does, take that space
- * as well. Remove the amount of space we addded to the checkpoint ticket from
+ * as well. Remove the amount of space we added to the checkpoint ticket from
* the current transaction ticket so that the accounting works out correctly.
*/
static void
xlog_cil_insert_items(
- struct log *log,
- struct xfs_log_vec *log_vector,
- struct xlog_ticket *ticket)
+ struct xlog *log,
+ struct xfs_trans *tp)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_cil_ctx *ctx = cil->xc_ctx;
- struct xfs_log_vec *lv;
+ struct xfs_log_item_desc *lidp;
int len = 0;
int diff_iovecs = 0;
int iclog_space;
- ASSERT(log_vector);
+ ASSERT(tp);
/*
- * Do all the accounting aggregation and switching of log vectors
- * around in a separate loop to the insertion of items into the CIL.
- * Then we can do a separate loop to update the CIL within a single
- * lock/unlock pair. This reduces the number of round trips on the CIL
- * lock from O(nr_logvectors) to O(1) and greatly reduces the overall
- * hold time for the transaction commit.
- *
- * If this is the first time the item is being placed into the CIL in
- * this context, pin it so it can't be written to disk until the CIL is
- * flushed to the iclog and the iclog written to disk.
- *
* We can do this safely because the context can't checkpoint until we
* are done so it doesn't matter exactly how we update the CIL.
*/
- for (lv = log_vector; lv; lv = lv->lv_next)
- xfs_cil_prepare_item(log, lv, &len, &diff_iovecs);
-
- /* account for space used by new iovec headers */
- len += diff_iovecs * sizeof(xlog_op_header_t);
+ xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs);
+ /*
+ * Now (re-)position everything modified at the tail of the CIL.
+ * We do this here so we only need to take the CIL lock once during
+ * the transaction commit.
+ */
spin_lock(&cil->xc_cil_lock);
+ list_for_each_entry(lidp, &tp->t_items, lid_trans) {
+ struct xfs_log_item *lip = lidp->lid_item;
- /* move the items to the tail of the CIL */
- for (lv = log_vector; lv; lv = lv->lv_next)
- list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil);
+ /* Skip items which aren't dirty in this transaction. */
+ if (!(lidp->lid_flags & XFS_LID_DIRTY))
+ continue;
+
+ list_move_tail(&lip->li_cil, &cil->xc_cil);
+ }
+ /* account for space used by new iovec headers */
+ len += diff_iovecs * sizeof(xlog_op_header_t);
ctx->nvecs += diff_iovecs;
+ /* attach the transaction to the CIL if it has any busy extents */
+ if (!list_empty(&tp->t_busy))
+ list_splice_init(&tp->t_busy, &ctx->busy_extents);
+
/*
* Now transfer enough transaction reservation to the context ticket
* for the checkpoint. The context ticket is special - the unit
@@ -310,10 +329,8 @@ xlog_cil_insert_items(
* during the transaction commit.
*/
if (ctx->ticket->t_curr_res == 0) {
- /* first commit in checkpoint, steal the header reservation */
- ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len);
ctx->ticket->t_curr_res = ctx->ticket->t_unit_res;
- ticket->t_curr_res -= ctx->ticket->t_unit_res;
+ tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res;
}
/* do we need space for more log record headers? */
@@ -327,10 +344,10 @@ xlog_cil_insert_items(
hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header);
ctx->ticket->t_unit_res += hdrs;
ctx->ticket->t_curr_res += hdrs;
- ticket->t_curr_res -= hdrs;
- ASSERT(ticket->t_curr_res >= len);
+ tp->t_ticket->t_curr_res -= hdrs;
+ ASSERT(tp->t_ticket->t_curr_res >= len);
}
- ticket->t_curr_res -= len;
+ tp->t_ticket->t_curr_res -= len;
ctx->space_used += len;
spin_unlock(&cil->xc_cil_lock);
@@ -344,7 +361,6 @@ xlog_cil_free_logvec(
for (lv = log_vector; lv; ) {
struct xfs_log_vec *next = lv->lv_next;
- kmem_free(lv->lv_buf);
kmem_free(lv);
lv = next;
}
@@ -361,24 +377,36 @@ xlog_cil_committed(
int abort)
{
struct xfs_cil_ctx *ctx = args;
- struct xfs_log_vec *lv;
- int abortflag = abort ? XFS_LI_ABORTED : 0;
- struct xfs_busy_extent *busyp, *n;
+ struct xfs_mount *mp = ctx->cil->xc_log->l_mp;
- /* unpin all the log items */
- for (lv = ctx->lv_chain; lv; lv = lv->lv_next ) {
- xfs_trans_item_committed(lv->lv_item, ctx->start_lsn,
- abortflag);
- }
+ xfs_trans_committed_bulk(ctx->cil->xc_log->l_ailp, ctx->lv_chain,
+ ctx->start_lsn, abort);
- list_for_each_entry_safe(busyp, n, &ctx->busy_extents, list)
- xfs_alloc_busy_clear(ctx->cil->xc_log->l_mp, busyp);
+ xfs_extent_busy_sort(&ctx->busy_extents);
+ xfs_extent_busy_clear(mp, &ctx->busy_extents,
+ (mp->m_flags & XFS_MOUNT_DISCARD) && !abort);
- spin_lock(&ctx->cil->xc_cil_lock);
+ /*
+ * If we are aborting the commit, wake up anyone waiting on the
+ * committing list. If we don't, then a shutdown we can leave processes
+ * waiting in xlog_cil_force_lsn() waiting on a sequence commit that
+ * will never happen because we aborted it.
+ */
+ spin_lock(&ctx->cil->xc_push_lock);
+ if (abort)
+ wake_up_all(&ctx->cil->xc_commit_wait);
list_del(&ctx->committing);
- spin_unlock(&ctx->cil->xc_cil_lock);
+ spin_unlock(&ctx->cil->xc_push_lock);
xlog_cil_free_logvec(ctx->lv_chain);
+
+ if (!list_empty(&ctx->busy_extents)) {
+ ASSERT(mp->m_flags & XFS_MOUNT_DISCARD);
+
+ xfs_discard_extents(mp, &ctx->busy_extents);
+ xfs_extent_busy_clear(mp, &ctx->busy_extents, false);
+ }
+
kmem_free(ctx);
}
@@ -398,8 +426,7 @@ xlog_cil_committed(
*/
STATIC int
xlog_cil_push(
- struct log *log,
- xfs_lsn_t push_seq)
+ struct xlog *log)
{
struct xfs_cil *cil = log->l_cilp;
struct xfs_log_vec *lv;
@@ -407,47 +434,42 @@ xlog_cil_push(
struct xfs_cil_ctx *new_ctx;
struct xlog_in_core *commit_iclog;
struct xlog_ticket *tic;
- int num_lv;
int num_iovecs;
- int len;
int error = 0;
struct xfs_trans_header thdr;
struct xfs_log_iovec lhdr;
struct xfs_log_vec lvhdr = { NULL };
xfs_lsn_t commit_lsn;
+ xfs_lsn_t push_seq;
if (!cil)
return 0;
- ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
-
new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
new_ctx->ticket = xlog_cil_ticket_alloc(log);
- /*
- * Lock out transaction commit, but don't block for background pushes
- * unless we are well over the CIL space limit. See the definition of
- * XLOG_CIL_HARD_SPACE_LIMIT() for the full explanation of the logic
- * used here.
- */
- if (!down_write_trylock(&cil->xc_ctx_lock)) {
- if (!push_seq &&
- cil->xc_ctx->space_used < XLOG_CIL_HARD_SPACE_LIMIT(log))
- goto out_free_ticket;
- down_write(&cil->xc_ctx_lock);
- }
+ down_write(&cil->xc_ctx_lock);
ctx = cil->xc_ctx;
- /* check if we've anything to push */
- if (list_empty(&cil->xc_cil))
- goto out_skip;
+ spin_lock(&cil->xc_push_lock);
+ push_seq = cil->xc_push_seq;
+ ASSERT(push_seq <= ctx->sequence);
- /* check for spurious background flush */
- if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+ /*
+ * Check if we've anything to push. If there is nothing, then we don't
+ * move on to a new sequence number and so we have to be able to push
+ * this sequence again later.
+ */
+ if (list_empty(&cil->xc_cil)) {
+ cil->xc_push_seq = 0;
+ spin_unlock(&cil->xc_push_lock);
goto out_skip;
+ }
+ spin_unlock(&cil->xc_push_lock);
+
/* check for a previously pushed seqeunce */
- if (push_seq && push_seq < cil->xc_ctx->sequence)
+ if (push_seq < cil->xc_ctx->sequence)
goto out_skip;
/*
@@ -457,12 +479,9 @@ xlog_cil_push(
* side which is currently locked out by the flush lock.
*/
lv = NULL;
- num_lv = 0;
num_iovecs = 0;
- len = 0;
while (!list_empty(&cil->xc_cil)) {
struct xfs_log_item *item;
- int i;
item = list_first_entry(&cil->xc_cil,
struct xfs_log_item, li_cil);
@@ -473,11 +492,7 @@ xlog_cil_push(
lv->lv_next = item->li_lv;
lv = item->li_lv;
item->li_lv = NULL;
-
- num_lv++;
num_iovecs += lv->lv_niovecs;
- for (i = 0; i < lv->lv_niovecs; i++)
- len += lv->lv_iovecp[i].i_len;
}
/*
@@ -493,13 +508,6 @@ xlog_cil_push(
cil->xc_ctx = new_ctx;
/*
- * mirror the new sequence into the cil structure so that we can do
- * unlocked checks against the current sequence in log forces without
- * risking deferencing a freed context pointer.
- */
- cil->xc_current_sequence = new_ctx->sequence;
-
- /*
* The switch is now done, so we can drop the context lock and move out
* of a shared context. We can't just go straight to the commit record,
* though - we need to synchronise with previous and future commits so
@@ -517,10 +525,17 @@ xlog_cil_push(
* Hence we need to add this context to the committing context list so
* that higher sequences will wait for us to write out a commit record
* before they do.
+ *
+ * xfs_log_force_lsn requires us to mirror the new sequence into the cil
+ * structure atomically with the addition of this sequence to the
+ * committing list. This also ensures that we can do unlocked checks
+ * against the current sequence in log forces without risking
+ * deferencing a freed context pointer.
*/
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
+ cil->xc_current_sequence = new_ctx->sequence;
list_add(&ctx->committing, &cil->xc_committing);
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
up_write(&cil->xc_ctx_lock);
/*
@@ -548,18 +563,28 @@ xlog_cil_push(
error = xlog_write(log, &lvhdr, tic, &ctx->start_lsn, NULL, 0);
if (error)
- goto out_abort;
+ goto out_abort_free_ticket;
/*
* now that we've written the checkpoint into the log, strictly
* order the commit records so replay will get them in the right order.
*/
restart:
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
list_for_each_entry(new_ctx, &cil->xc_committing, committing) {
/*
+ * Avoid getting stuck in this loop because we were woken by the
+ * shutdown, but then went back to sleep once already in the
+ * shutdown state.
+ */
+ if (XLOG_FORCED_SHUTDOWN(log)) {
+ spin_unlock(&cil->xc_push_lock);
+ goto out_abort_free_ticket;
+ }
+
+ /*
* Higher sequences will wait for this one so skip them.
- * Don't wait for own own sequence, either.
+ * Don't wait for our own sequence, either.
*/
if (new_ctx->sequence >= ctx->sequence)
continue;
@@ -568,14 +593,15 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
goto restart;
}
}
- spin_unlock(&cil->xc_cil_lock);
+ spin_unlock(&cil->xc_push_lock);
+ /* xfs_log_done always frees the ticket on error. */
commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0);
- if (error || commit_lsn == -1)
+ if (commit_lsn == -1)
goto out_abort;
/* attach all the transactions w/ busy extents to iclog */
@@ -590,26 +616,121 @@ restart:
* callbacks to the iclog we can assign the commit LSN to the context
* and wake up anyone who is waiting for the commit to complete.
*/
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
ctx->commit_lsn = commit_lsn;
- sv_broadcast(&cil->xc_commit_wait);
- spin_unlock(&cil->xc_cil_lock);
+ wake_up_all(&cil->xc_commit_wait);
+ spin_unlock(&cil->xc_push_lock);
/* release the hounds! */
return xfs_log_release_iclog(log->l_mp, commit_iclog);
out_skip:
up_write(&cil->xc_ctx_lock);
-out_free_ticket:
xfs_log_ticket_put(new_ctx->ticket);
kmem_free(new_ctx);
return 0;
+out_abort_free_ticket:
+ xfs_log_ticket_put(tic);
out_abort:
xlog_cil_committed(ctx, XFS_LI_ABORTED);
return XFS_ERROR(EIO);
}
+static void
+xlog_cil_push_work(
+ struct work_struct *work)
+{
+ struct xfs_cil *cil = container_of(work, struct xfs_cil,
+ xc_push_work);
+ xlog_cil_push(cil->xc_log);
+}
+
+/*
+ * We need to push CIL every so often so we don't cache more than we can fit in
+ * the log. The limit really is that a checkpoint can't be more than half the
+ * log (the current checkpoint is not allowed to overwrite the previous
+ * checkpoint), but commit latency and memory usage limit this to a smaller
+ * size.
+ */
+static void
+xlog_cil_push_background(
+ struct xlog *log)
+{
+ struct xfs_cil *cil = log->l_cilp;
+
+ /*
+ * The cil won't be empty because we are called while holding the
+ * context lock so whatever we added to the CIL will still be there
+ */
+ ASSERT(!list_empty(&cil->xc_cil));
+
+ /*
+ * don't do a background push if we haven't used up all the
+ * space available yet.
+ */
+ if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+ return;
+
+ spin_lock(&cil->xc_push_lock);
+ if (cil->xc_push_seq < cil->xc_current_sequence) {
+ cil->xc_push_seq = cil->xc_current_sequence;
+ queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+ }
+ spin_unlock(&cil->xc_push_lock);
+
+}
+
+/*
+ * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence
+ * number that is passed. When it returns, the work will be queued for
+ * @push_seq, but it won't be completed. The caller is expected to do any
+ * waiting for push_seq to complete if it is required.
+ */
+static void
+xlog_cil_push_now(
+ struct xlog *log,
+ xfs_lsn_t push_seq)
+{
+ struct xfs_cil *cil = log->l_cilp;
+
+ if (!cil)
+ return;
+
+ ASSERT(push_seq && push_seq <= cil->xc_current_sequence);
+
+ /* start on any pending background push to minimise wait time on it */
+ flush_work(&cil->xc_push_work);
+
+ /*
+ * If the CIL is empty or we've already pushed the sequence then
+ * there's no work we need to do.
+ */
+ spin_lock(&cil->xc_push_lock);
+ if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) {
+ spin_unlock(&cil->xc_push_lock);
+ return;
+ }
+
+ cil->xc_push_seq = push_seq;
+ queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work);
+ spin_unlock(&cil->xc_push_lock);
+}
+
+bool
+xlog_cil_empty(
+ struct xlog *log)
+{
+ struct xfs_cil *cil = log->l_cilp;
+ bool empty = false;
+
+ spin_lock(&cil->xc_push_lock);
+ if (list_empty(&cil->xc_cil))
+ empty = true;
+ spin_unlock(&cil->xc_push_lock);
+ return empty;
+}
+
/*
* Commit a transaction with the given vector to the Committed Item List.
*
@@ -619,62 +740,37 @@ out_abort:
* transaction to the checkpoint context so we carry the busy extents through
* to checkpoint completion, and then unlock all the items in the transaction.
*
- * For more specific information about the order of operations in
- * xfs_log_commit_cil() please refer to the comments in
- * xfs_trans_commit_iclog().
- *
* Called with the context lock already held in read mode to lock out
* background commit, returns without it held once background commits are
* allowed again.
*/
-int
+void
xfs_log_commit_cil(
struct xfs_mount *mp,
struct xfs_trans *tp,
- struct xfs_log_vec *log_vector,
xfs_lsn_t *commit_lsn,
int flags)
{
- struct log *log = mp->m_log;
+ struct xlog *log = mp->m_log;
+ struct xfs_cil *cil = log->l_cilp;
int log_flags = 0;
- int push = 0;
if (flags & XFS_TRANS_RELEASE_LOG_RES)
log_flags = XFS_LOG_REL_PERM_RESERV;
- if (XLOG_FORCED_SHUTDOWN(log)) {
- xlog_cil_free_logvec(log_vector);
- return XFS_ERROR(EIO);
- }
-
- /*
- * do all the hard work of formatting items (including memory
- * allocation) outside the CIL context lock. This prevents stalling CIL
- * pushes when we are low on memory and a transaction commit spends a
- * lot of time in memory reclaim.
- */
- xlog_cil_format_items(log, log_vector);
-
/* lock out background commit */
- down_read(&log->l_cilp->xc_ctx_lock);
- if (commit_lsn)
- *commit_lsn = log->l_cilp->xc_ctx->sequence;
+ down_read(&cil->xc_ctx_lock);
- xlog_cil_insert_items(log, log_vector, tp->t_ticket);
+ xlog_cil_insert_items(log, tp);
/* check we didn't blow the reservation */
if (tp->t_ticket->t_curr_res < 0)
- xlog_print_tic_res(log->l_mp, tp->t_ticket);
+ xlog_print_tic_res(mp, tp->t_ticket);
- /* attach the transaction to the CIL if it has any busy extents */
- if (!list_empty(&tp->t_busy)) {
- spin_lock(&log->l_cilp->xc_cil_lock);
- list_splice_init(&tp->t_busy,
- &log->l_cilp->xc_ctx->busy_extents);
- spin_unlock(&log->l_cilp->xc_cil_lock);
- }
+ tp->t_commit_lsn = cil->xc_ctx->sequence;
+ if (commit_lsn)
+ *commit_lsn = tp->t_commit_lsn;
- tp->t_commit_lsn = *commit_lsn;
xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
xfs_trans_unreserve_and_mod_sb(tp);
@@ -689,24 +785,11 @@ xfs_log_commit_cil(
* the log items. This affects (at least) processing of stale buffers,
* inodes and EFIs.
*/
- xfs_trans_free_items(tp, *commit_lsn, 0);
+ xfs_trans_free_items(tp, tp->t_commit_lsn, 0);
- /* check for background commit before unlock */
- if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
- push = 1;
+ xlog_cil_push_background(log);
- up_read(&log->l_cilp->xc_ctx_lock);
-
- /*
- * We need to push CIL every so often so we don't cache more than we
- * can fit in the log. The limit really is that a checkpoint can't be
- * more than half the log (the current checkpoint is not allowed to
- * overwrite the previous checkpoint), but commit latency and memory
- * usage limit this to a smaller size in most cases.
- */
- if (push)
- xlog_cil_push(log, 0);
- return 0;
+ up_read(&cil->xc_ctx_lock);
}
/*
@@ -718,13 +801,10 @@ xfs_log_commit_cil(
*
* We return the current commit lsn to allow the callers to determine if a
* iclog flush is necessary following this call.
- *
- * XXX: Initially, just push the CIL unconditionally and return whatever
- * commit lsn is there. It'll be empty, so this is broken for now.
*/
xfs_lsn_t
xlog_cil_force_lsn(
- struct log *log,
+ struct xlog *log,
xfs_lsn_t sequence)
{
struct xfs_cil *cil = log->l_cilp;
@@ -738,8 +818,8 @@ xlog_cil_force_lsn(
* xlog_cil_push() handles racing pushes for the same sequence,
* so no need to deal with it here.
*/
- if (sequence == cil->xc_current_sequence)
- xlog_cil_push(log, sequence);
+restart:
+ xlog_cil_push_now(log, sequence);
/*
* See if we can find a previous sequence still committing.
@@ -747,9 +827,15 @@ xlog_cil_force_lsn(
* before allowing the force of push_seq to go ahead. Hence block
* on commits for those as well.
*/
-restart:
- spin_lock(&cil->xc_cil_lock);
+ spin_lock(&cil->xc_push_lock);
list_for_each_entry(ctx, &cil->xc_committing, committing) {
+ /*
+ * Avoid getting stuck in this loop because we were woken by the
+ * shutdown, but then went back to sleep once already in the
+ * shutdown state.
+ */
+ if (XLOG_FORCED_SHUTDOWN(log))
+ goto out_shutdown;
if (ctx->sequence > sequence)
continue;
if (!ctx->commit_lsn) {
@@ -757,7 +843,7 @@ restart:
* It is still being pushed! Wait for the push to
* complete, then start again from the beginning.
*/
- sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
+ xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock);
goto restart;
}
if (ctx->sequence != sequence)
@@ -765,8 +851,39 @@ restart:
/* found it! */
commit_lsn = ctx->commit_lsn;
}
- spin_unlock(&cil->xc_cil_lock);
+
+ /*
+ * The call to xlog_cil_push_now() executes the push in the background.
+ * Hence by the time we have got here it our sequence may not have been
+ * pushed yet. This is true if the current sequence still matches the
+ * push sequence after the above wait loop and the CIL still contains
+ * dirty objects.
+ *
+ * When the push occurs, it will empty the CIL and atomically increment
+ * the currect sequence past the push sequence and move it into the
+ * committing list. Of course, if the CIL is clean at the time of the
+ * push, it won't have pushed the CIL at all, so in that case we should
+ * try the push for this sequence again from the start just in case.
+ */
+ if (sequence == cil->xc_current_sequence &&
+ !list_empty(&cil->xc_cil)) {
+ spin_unlock(&cil->xc_push_lock);
+ goto restart;
+ }
+
+ spin_unlock(&cil->xc_push_lock);
return commit_lsn;
+
+ /*
+ * We detected a shutdown in progress. We need to trigger the log force
+ * to pass through it's iclog state machine error handling, even though
+ * we are already in a shutdown state. Hence we can't return
+ * NULLCOMMITLSN here as that has special meaning to log forces (i.e.
+ * LSN is already stable), so we return a zero LSN instead.
+ */
+out_shutdown:
+ spin_unlock(&cil->xc_push_lock);
+ return 0;
}
/*
@@ -784,8 +901,6 @@ xfs_log_item_in_current_chkpt(
{
struct xfs_cil_ctx *ctx;
- if (!(lip->li_mountp->m_flags & XFS_MOUNT_DELAYLOG))
- return false;
if (list_empty(&lip->li_cil))
return false;
@@ -800,3 +915,58 @@ xfs_log_item_in_current_chkpt(
return false;
return true;
}
+
+/*
+ * Perform initial CIL structure initialisation.
+ */
+int
+xlog_cil_init(
+ struct xlog *log)
+{
+ struct xfs_cil *cil;
+ struct xfs_cil_ctx *ctx;
+
+ cil = kmem_zalloc(sizeof(*cil), KM_SLEEP|KM_MAYFAIL);
+ if (!cil)
+ return ENOMEM;
+
+ ctx = kmem_zalloc(sizeof(*ctx), KM_SLEEP|KM_MAYFAIL);
+ if (!ctx) {
+ kmem_free(cil);
+ return ENOMEM;
+ }
+
+ INIT_WORK(&cil->xc_push_work, xlog_cil_push_work);
+ INIT_LIST_HEAD(&cil->xc_cil);
+ INIT_LIST_HEAD(&cil->xc_committing);
+ spin_lock_init(&cil->xc_cil_lock);
+ spin_lock_init(&cil->xc_push_lock);
+ init_rwsem(&cil->xc_ctx_lock);
+ init_waitqueue_head(&cil->xc_commit_wait);
+
+ INIT_LIST_HEAD(&ctx->committing);
+ INIT_LIST_HEAD(&ctx->busy_extents);
+ ctx->sequence = 1;
+ ctx->cil = cil;
+ cil->xc_ctx = ctx;
+ cil->xc_current_sequence = ctx->sequence;
+
+ cil->xc_log = log;
+ log->l_cilp = cil;
+ return 0;
+}
+
+void
+xlog_cil_destroy(
+ struct xlog *log)
+{
+ if (log->l_cilp->xc_ctx) {
+ if (log->l_cilp->xc_ctx->ticket)
+ xfs_log_ticket_put(log->l_cilp->xc_ctx->ticket);
+ kmem_free(log->l_cilp->xc_ctx);
+ }
+
+ ASSERT(list_empty(&log->l_cilp->xc_cil));
+ kmem_free(log->l_cilp);
+}
+