diff options
Diffstat (limited to 'fs/xfs/xfs_log_cil.c')
| -rw-r--r-- | fs/xfs/xfs_log_cil.c | 477 |
1 files changed, 283 insertions, 194 deletions
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c index ddc4529d07d..b3425b34e3d 100644 --- a/fs/xfs/xfs_log_cil.c +++ b/fs/xfs/xfs_log_cil.c @@ -17,11 +17,9 @@ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_log.h" -#include "xfs_trans.h" -#include "xfs_trans_priv.h" -#include "xfs_log_priv.h" +#include "xfs_log_format.h" +#include "xfs_shared.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" @@ -29,6 +27,10 @@ #include "xfs_alloc.h" #include "xfs_extent_busy.h" #include "xfs_discard.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" +#include "xfs_log_priv.h" /* * Allocate a new ticket. Failing to get a new ticket makes it really hard to @@ -81,6 +83,53 @@ xlog_cil_init_post_recovery( } /* + * Prepare the log item for insertion into the CIL. Calculate the difference in + * log space and vectors it will consume, and if it is a new item pin it as + * well. + */ +STATIC void +xfs_cil_prepare_item( + struct xlog *log, + struct xfs_log_vec *lv, + struct xfs_log_vec *old_lv, + int *diff_len, + int *diff_iovecs) +{ + /* Account for the new LV being passed in */ + if (lv->lv_buf_len != XFS_LOG_VEC_ORDERED) { + *diff_len += lv->lv_bytes; + *diff_iovecs += lv->lv_niovecs; + } + + /* + * If there is no old LV, this is the first time we've seen the item in + * this CIL context and so we need to pin it. If we are replacing the + * old_lv, then remove the space it accounts for and free it. + */ + if (!old_lv) + lv->lv_item->li_ops->iop_pin(lv->lv_item); + else if (old_lv != lv) { + ASSERT(lv->lv_buf_len != XFS_LOG_VEC_ORDERED); + + *diff_len -= old_lv->lv_bytes; + *diff_iovecs -= old_lv->lv_niovecs; + kmem_free(old_lv); + } + + /* attach new log vector to log item */ + lv->lv_item->li_lv = lv; + + /* + * If this is the first time the item is being committed to the + * CIL, store the sequence number on the log item so we can + * tell in future commits whether this is the first checkpoint + * the item is being committed into. + */ + if (!lv->lv_item->li_seq) + lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; +} + +/* * Format log item into a flat buffers * * For delayed logging, we need to hold a formatted buffer containing all the @@ -106,120 +155,119 @@ xlog_cil_init_post_recovery( * format the regions into the iclog as though they are being formatted * directly out of the objects themselves. */ -static struct xfs_log_vec * -xlog_cil_prepare_log_vecs( - struct xfs_trans *tp) +static void +xlog_cil_insert_format_items( + struct xlog *log, + struct xfs_trans *tp, + int *diff_len, + int *diff_iovecs) { struct xfs_log_item_desc *lidp; - struct xfs_log_vec *lv = NULL; - struct xfs_log_vec *ret_lv = NULL; /* Bail out if we didn't find a log item. */ if (list_empty(&tp->t_items)) { ASSERT(0); - return NULL; + return; } list_for_each_entry(lidp, &tp->t_items, lid_trans) { - struct xfs_log_vec *new_lv; - void *ptr; - int index; - int len = 0; - uint niovecs; + struct xfs_log_item *lip = lidp->lid_item; + struct xfs_log_vec *lv; + struct xfs_log_vec *old_lv; + int niovecs = 0; + int nbytes = 0; + int buf_size; + bool ordered = false; /* Skip items which aren't dirty in this transaction. */ if (!(lidp->lid_flags & XFS_LID_DIRTY)) continue; + /* get number of vecs and size of data to be stored */ + lip->li_ops->iop_size(lip, &niovecs, &nbytes); + /* Skip items that do not have any vectors for writing */ - niovecs = IOP_SIZE(lidp->lid_item); if (!niovecs) continue; - new_lv = kmem_zalloc(sizeof(*new_lv) + - niovecs * sizeof(struct xfs_log_iovec), - KM_SLEEP); + /* + * Ordered items need to be tracked but we do not wish to write + * them. We need a logvec to track the object, but we do not + * need an iovec or buffer to be allocated for copying data. + */ + if (niovecs == XFS_LOG_VEC_ORDERED) { + ordered = true; + niovecs = 0; + nbytes = 0; + } - /* The allocated iovec region lies beyond the log vector. */ - new_lv->lv_iovecp = (struct xfs_log_iovec *)&new_lv[1]; - new_lv->lv_niovecs = niovecs; - new_lv->lv_item = lidp->lid_item; + /* + * We 64-bit align the length of each iovec so that the start + * of the next one is naturally aligned. We'll need to + * account for that slack space here. Then round nbytes up + * to 64-bit alignment so that the initial buffer alignment is + * easy to calculate and verify. + */ + nbytes += niovecs * sizeof(uint64_t); + nbytes = round_up(nbytes, sizeof(uint64_t)); - /* build the vector array and calculate it's length */ - IOP_FORMAT(new_lv->lv_item, new_lv->lv_iovecp); - for (index = 0; index < new_lv->lv_niovecs; index++) - len += new_lv->lv_iovecp[index].i_len; + /* grab the old item if it exists for reservation accounting */ + old_lv = lip->li_lv; - new_lv->lv_buf_len = len; - new_lv->lv_buf = kmem_alloc(new_lv->lv_buf_len, - KM_SLEEP|KM_NOFS); - ptr = new_lv->lv_buf; + /* + * The data buffer needs to start 64-bit aligned, so round up + * that space to ensure we can align it appropriately and not + * overrun the buffer. + */ + buf_size = nbytes + + round_up((sizeof(struct xfs_log_vec) + + niovecs * sizeof(struct xfs_log_iovec)), + sizeof(uint64_t)); - for (index = 0; index < new_lv->lv_niovecs; index++) { - struct xfs_log_iovec *vec = &new_lv->lv_iovecp[index]; + /* compare to existing item size */ + if (lip->li_lv && buf_size <= lip->li_lv->lv_size) { + /* same or smaller, optimise common overwrite case */ + lv = lip->li_lv; + lv->lv_next = NULL; - memcpy(ptr, vec->i_addr, vec->i_len); - vec->i_addr = ptr; - ptr += vec->i_len; - } - ASSERT(ptr == new_lv->lv_buf + new_lv->lv_buf_len); + if (ordered) + goto insert; - if (!ret_lv) - ret_lv = new_lv; - else - lv->lv_next = new_lv; - lv = new_lv; - } + /* + * set the item up as though it is a new insertion so + * that the space reservation accounting is correct. + */ + *diff_iovecs -= lv->lv_niovecs; + *diff_len -= lv->lv_bytes; + } else { + /* allocate new data chunk */ + lv = kmem_zalloc(buf_size, KM_SLEEP|KM_NOFS); + lv->lv_item = lip; + lv->lv_size = buf_size; + if (ordered) { + /* track as an ordered logvec */ + ASSERT(lip->li_lv == NULL); + lv->lv_buf_len = XFS_LOG_VEC_ORDERED; + goto insert; + } + lv->lv_iovecp = (struct xfs_log_iovec *)&lv[1]; + } - return ret_lv; -} + /* Ensure the lv is set up according to ->iop_size */ + lv->lv_niovecs = niovecs; -/* - * Prepare the log item for insertion into the CIL. Calculate the difference in - * log space and vectors it will consume, and if it is a new item pin it as - * well. - */ -STATIC void -xfs_cil_prepare_item( - struct xlog *log, - struct xfs_log_vec *lv, - int *len, - int *diff_iovecs) -{ - struct xfs_log_vec *old = lv->lv_item->li_lv; - - if (old) { - /* existing lv on log item, space used is a delta */ - ASSERT(!list_empty(&lv->lv_item->li_cil)); - ASSERT(old->lv_buf && old->lv_buf_len && old->lv_niovecs); - - *len += lv->lv_buf_len - old->lv_buf_len; - *diff_iovecs += lv->lv_niovecs - old->lv_niovecs; - kmem_free(old->lv_buf); - kmem_free(old); - } else { - /* new lv, must pin the log item */ - ASSERT(!lv->lv_item->li_lv); - ASSERT(list_empty(&lv->lv_item->li_cil)); - - *len += lv->lv_buf_len; - *diff_iovecs += lv->lv_niovecs; - IOP_PIN(lv->lv_item); + /* The allocated data region lies beyond the iovec region */ + lv->lv_buf_len = 0; + lv->lv_bytes = 0; + lv->lv_buf = (char *)lv + buf_size - nbytes; + ASSERT(IS_ALIGNED((unsigned long)lv->lv_buf, sizeof(uint64_t))); + lip->li_ops->iop_format(lip, lv); +insert: + ASSERT(lv->lv_buf_len <= nbytes); + xfs_cil_prepare_item(log, lv, old_lv, diff_len, diff_iovecs); } - - /* attach new log vector to log item */ - lv->lv_item->li_lv = lv; - - /* - * If this is the first time the item is being committed to the - * CIL, store the sequence number on the log item so we can - * tell in future commits whether this is the first checkpoint - * the item is being committed into. - */ - if (!lv->lv_item->li_seq) - lv->lv_item->li_seq = log->l_cilp->xc_ctx->sequence; } /* @@ -232,47 +280,47 @@ xfs_cil_prepare_item( static void xlog_cil_insert_items( struct xlog *log, - struct xfs_log_vec *log_vector, - struct xlog_ticket *ticket) + struct xfs_trans *tp) { struct xfs_cil *cil = log->l_cilp; struct xfs_cil_ctx *ctx = cil->xc_ctx; - struct xfs_log_vec *lv; + struct xfs_log_item_desc *lidp; int len = 0; int diff_iovecs = 0; int iclog_space; - ASSERT(log_vector); + ASSERT(tp); /* - * Do all the accounting aggregation and switching of log vectors - * around in a separate loop to the insertion of items into the CIL. - * Then we can do a separate loop to update the CIL within a single - * lock/unlock pair. This reduces the number of round trips on the CIL - * lock from O(nr_logvectors) to O(1) and greatly reduces the overall - * hold time for the transaction commit. - * - * If this is the first time the item is being placed into the CIL in - * this context, pin it so it can't be written to disk until the CIL is - * flushed to the iclog and the iclog written to disk. - * * We can do this safely because the context can't checkpoint until we * are done so it doesn't matter exactly how we update the CIL. */ - for (lv = log_vector; lv; lv = lv->lv_next) - xfs_cil_prepare_item(log, lv, &len, &diff_iovecs); - - /* account for space used by new iovec headers */ - len += diff_iovecs * sizeof(xlog_op_header_t); + xlog_cil_insert_format_items(log, tp, &len, &diff_iovecs); + /* + * Now (re-)position everything modified at the tail of the CIL. + * We do this here so we only need to take the CIL lock once during + * the transaction commit. + */ spin_lock(&cil->xc_cil_lock); + list_for_each_entry(lidp, &tp->t_items, lid_trans) { + struct xfs_log_item *lip = lidp->lid_item; + + /* Skip items which aren't dirty in this transaction. */ + if (!(lidp->lid_flags & XFS_LID_DIRTY)) + continue; - /* move the items to the tail of the CIL */ - for (lv = log_vector; lv; lv = lv->lv_next) - list_move_tail(&lv->lv_item->li_cil, &cil->xc_cil); + list_move_tail(&lip->li_cil, &cil->xc_cil); + } + /* account for space used by new iovec headers */ + len += diff_iovecs * sizeof(xlog_op_header_t); ctx->nvecs += diff_iovecs; + /* attach the transaction to the CIL if it has any busy extents */ + if (!list_empty(&tp->t_busy)) + list_splice_init(&tp->t_busy, &ctx->busy_extents); + /* * Now transfer enough transaction reservation to the context ticket * for the checkpoint. The context ticket is special - the unit @@ -281,10 +329,8 @@ xlog_cil_insert_items( * during the transaction commit. */ if (ctx->ticket->t_curr_res == 0) { - /* first commit in checkpoint, steal the header reservation */ - ASSERT(ticket->t_curr_res >= ctx->ticket->t_unit_res + len); ctx->ticket->t_curr_res = ctx->ticket->t_unit_res; - ticket->t_curr_res -= ctx->ticket->t_unit_res; + tp->t_ticket->t_curr_res -= ctx->ticket->t_unit_res; } /* do we need space for more log record headers? */ @@ -298,10 +344,10 @@ xlog_cil_insert_items( hdrs *= log->l_iclog_hsize + sizeof(struct xlog_op_header); ctx->ticket->t_unit_res += hdrs; ctx->ticket->t_curr_res += hdrs; - ticket->t_curr_res -= hdrs; - ASSERT(ticket->t_curr_res >= len); + tp->t_ticket->t_curr_res -= hdrs; + ASSERT(tp->t_ticket->t_curr_res >= len); } - ticket->t_curr_res -= len; + tp->t_ticket->t_curr_res -= len; ctx->space_used += len; spin_unlock(&cil->xc_cil_lock); @@ -315,7 +361,6 @@ xlog_cil_free_logvec( for (lv = log_vector; lv; ) { struct xfs_log_vec *next = lv->lv_next; - kmem_free(lv->lv_buf); kmem_free(lv); lv = next; } @@ -341,9 +386,17 @@ xlog_cil_committed( xfs_extent_busy_clear(mp, &ctx->busy_extents, (mp->m_flags & XFS_MOUNT_DISCARD) && !abort); - spin_lock(&ctx->cil->xc_cil_lock); + /* + * If we are aborting the commit, wake up anyone waiting on the + * committing list. If we don't, then a shutdown we can leave processes + * waiting in xlog_cil_force_lsn() waiting on a sequence commit that + * will never happen because we aborted it. + */ + spin_lock(&ctx->cil->xc_push_lock); + if (abort) + wake_up_all(&ctx->cil->xc_commit_wait); list_del(&ctx->committing); - spin_unlock(&ctx->cil->xc_cil_lock); + spin_unlock(&ctx->cil->xc_push_lock); xlog_cil_free_logvec(ctx->lv_chain); @@ -381,9 +434,7 @@ xlog_cil_push( struct xfs_cil_ctx *new_ctx; struct xlog_in_core *commit_iclog; struct xlog_ticket *tic; - int num_lv; int num_iovecs; - int len; int error = 0; struct xfs_trans_header thdr; struct xfs_log_iovec lhdr; @@ -400,7 +451,7 @@ xlog_cil_push( down_write(&cil->xc_ctx_lock); ctx = cil->xc_ctx; - spin_lock(&cil->xc_cil_lock); + spin_lock(&cil->xc_push_lock); push_seq = cil->xc_push_seq; ASSERT(push_seq <= ctx->sequence); @@ -411,10 +462,10 @@ xlog_cil_push( */ if (list_empty(&cil->xc_cil)) { cil->xc_push_seq = 0; - spin_unlock(&cil->xc_cil_lock); + spin_unlock(&cil->xc_push_lock); goto out_skip; } - spin_unlock(&cil->xc_cil_lock); + spin_unlock(&cil->xc_push_lock); /* check for a previously pushed seqeunce */ @@ -428,12 +479,9 @@ xlog_cil_push( * side which is currently locked out by the flush lock. */ lv = NULL; - num_lv = 0; num_iovecs = 0; - len = 0; while (!list_empty(&cil->xc_cil)) { struct xfs_log_item *item; - int i; item = list_first_entry(&cil->xc_cil, struct xfs_log_item, li_cil); @@ -444,11 +492,7 @@ xlog_cil_push( lv->lv_next = item->li_lv; lv = item->li_lv; item->li_lv = NULL; - - num_lv++; num_iovecs += lv->lv_niovecs; - for (i = 0; i < lv->lv_niovecs; i++) - len += lv->lv_iovecp[i].i_len; } /* @@ -464,13 +508,6 @@ xlog_cil_push( cil->xc_ctx = new_ctx; /* - * mirror the new sequence into the cil structure so that we can do - * unlocked checks against the current sequence in log forces without - * risking deferencing a freed context pointer. - */ - cil->xc_current_sequence = new_ctx->sequence; - - /* * The switch is now done, so we can drop the context lock and move out * of a shared context. We can't just go straight to the commit record, * though - we need to synchronise with previous and future commits so @@ -488,10 +525,17 @@ xlog_cil_push( * Hence we need to add this context to the committing context list so * that higher sequences will wait for us to write out a commit record * before they do. + * + * xfs_log_force_lsn requires us to mirror the new sequence into the cil + * structure atomically with the addition of this sequence to the + * committing list. This also ensures that we can do unlocked checks + * against the current sequence in log forces without risking + * deferencing a freed context pointer. */ - spin_lock(&cil->xc_cil_lock); + spin_lock(&cil->xc_push_lock); + cil->xc_current_sequence = new_ctx->sequence; list_add(&ctx->committing, &cil->xc_committing); - spin_unlock(&cil->xc_cil_lock); + spin_unlock(&cil->xc_push_lock); up_write(&cil->xc_ctx_lock); /* @@ -526,11 +570,21 @@ xlog_cil_push( * order the commit records so replay will get them in the right order. */ restart: - spin_lock(&cil->xc_cil_lock); + spin_lock(&cil->xc_push_lock); list_for_each_entry(new_ctx, &cil->xc_committing, committing) { /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (XLOG_FORCED_SHUTDOWN(log)) { + spin_unlock(&cil->xc_push_lock); + goto out_abort_free_ticket; + } + + /* * Higher sequences will wait for this one so skip them. - * Don't wait for own own sequence, either. + * Don't wait for our own sequence, either. */ if (new_ctx->sequence >= ctx->sequence) continue; @@ -539,11 +593,11 @@ restart: * It is still being pushed! Wait for the push to * complete, then start again from the beginning. */ - xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); + xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); goto restart; } } - spin_unlock(&cil->xc_cil_lock); + spin_unlock(&cil->xc_push_lock); /* xfs_log_done always frees the ticket on error. */ commit_lsn = xfs_log_done(log->l_mp, tic, &commit_iclog, 0); @@ -562,10 +616,10 @@ restart: * callbacks to the iclog we can assign the commit LSN to the context * and wake up anyone who is waiting for the commit to complete. */ - spin_lock(&cil->xc_cil_lock); + spin_lock(&cil->xc_push_lock); ctx->commit_lsn = commit_lsn; wake_up_all(&cil->xc_commit_wait); - spin_unlock(&cil->xc_cil_lock); + spin_unlock(&cil->xc_push_lock); /* release the hounds! */ return xfs_log_release_iclog(log->l_mp, commit_iclog); @@ -618,17 +672,23 @@ xlog_cil_push_background( if (cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log)) return; - spin_lock(&cil->xc_cil_lock); + spin_lock(&cil->xc_push_lock); if (cil->xc_push_seq < cil->xc_current_sequence) { cil->xc_push_seq = cil->xc_current_sequence; queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); } - spin_unlock(&cil->xc_cil_lock); + spin_unlock(&cil->xc_push_lock); } +/* + * xlog_cil_push_now() is used to trigger an immediate CIL push to the sequence + * number that is passed. When it returns, the work will be queued for + * @push_seq, but it won't be completed. The caller is expected to do any + * waiting for push_seq to complete if it is required. + */ static void -xlog_cil_push_foreground( +xlog_cil_push_now( struct xlog *log, xfs_lsn_t push_seq) { @@ -646,17 +706,29 @@ xlog_cil_push_foreground( * If the CIL is empty or we've already pushed the sequence then * there's no work we need to do. */ - spin_lock(&cil->xc_cil_lock); + spin_lock(&cil->xc_push_lock); if (list_empty(&cil->xc_cil) || push_seq <= cil->xc_push_seq) { - spin_unlock(&cil->xc_cil_lock); + spin_unlock(&cil->xc_push_lock); return; } cil->xc_push_seq = push_seq; - spin_unlock(&cil->xc_cil_lock); + queue_work(log->l_mp->m_cil_workqueue, &cil->xc_push_work); + spin_unlock(&cil->xc_push_lock); +} - /* do the push now */ - xlog_cil_push(log); +bool +xlog_cil_empty( + struct xlog *log) +{ + struct xfs_cil *cil = log->l_cilp; + bool empty = false; + + spin_lock(&cil->xc_push_lock); + if (list_empty(&cil->xc_cil)) + empty = true; + spin_unlock(&cil->xc_push_lock); + return empty; } /* @@ -668,15 +740,11 @@ xlog_cil_push_foreground( * transaction to the checkpoint context so we carry the busy extents through * to checkpoint completion, and then unlock all the items in the transaction. * - * For more specific information about the order of operations in - * xfs_log_commit_cil() please refer to the comments in - * xfs_trans_commit_iclog(). - * * Called with the context lock already held in read mode to lock out * background commit, returns without it held once background commits are * allowed again. */ -int +void xfs_log_commit_cil( struct xfs_mount *mp, struct xfs_trans *tp, @@ -684,42 +752,25 @@ xfs_log_commit_cil( int flags) { struct xlog *log = mp->m_log; + struct xfs_cil *cil = log->l_cilp; int log_flags = 0; - struct xfs_log_vec *log_vector; if (flags & XFS_TRANS_RELEASE_LOG_RES) log_flags = XFS_LOG_REL_PERM_RESERV; - /* - * Do all the hard work of formatting items (including memory - * allocation) outside the CIL context lock. This prevents stalling CIL - * pushes when we are low on memory and a transaction commit spends a - * lot of time in memory reclaim. - */ - log_vector = xlog_cil_prepare_log_vecs(tp); - if (!log_vector) - return ENOMEM; - /* lock out background commit */ - down_read(&log->l_cilp->xc_ctx_lock); - if (commit_lsn) - *commit_lsn = log->l_cilp->xc_ctx->sequence; + down_read(&cil->xc_ctx_lock); - xlog_cil_insert_items(log, log_vector, tp->t_ticket); + xlog_cil_insert_items(log, tp); /* check we didn't blow the reservation */ if (tp->t_ticket->t_curr_res < 0) - xlog_print_tic_res(log->l_mp, tp->t_ticket); + xlog_print_tic_res(mp, tp->t_ticket); - /* attach the transaction to the CIL if it has any busy extents */ - if (!list_empty(&tp->t_busy)) { - spin_lock(&log->l_cilp->xc_cil_lock); - list_splice_init(&tp->t_busy, - &log->l_cilp->xc_ctx->busy_extents); - spin_unlock(&log->l_cilp->xc_cil_lock); - } + tp->t_commit_lsn = cil->xc_ctx->sequence; + if (commit_lsn) + *commit_lsn = tp->t_commit_lsn; - tp->t_commit_lsn = *commit_lsn; xfs_log_done(mp, tp->t_ticket, NULL, log_flags); xfs_trans_unreserve_and_mod_sb(tp); @@ -734,12 +785,11 @@ xfs_log_commit_cil( * the log items. This affects (at least) processing of stale buffers, * inodes and EFIs. */ - xfs_trans_free_items(tp, *commit_lsn, 0); + xfs_trans_free_items(tp, tp->t_commit_lsn, 0); xlog_cil_push_background(log); - up_read(&log->l_cilp->xc_ctx_lock); - return 0; + up_read(&cil->xc_ctx_lock); } /* @@ -768,7 +818,8 @@ xlog_cil_force_lsn( * xlog_cil_push() handles racing pushes for the same sequence, * so no need to deal with it here. */ - xlog_cil_push_foreground(log, sequence); +restart: + xlog_cil_push_now(log, sequence); /* * See if we can find a previous sequence still committing. @@ -776,9 +827,15 @@ xlog_cil_force_lsn( * before allowing the force of push_seq to go ahead. Hence block * on commits for those as well. */ -restart: - spin_lock(&cil->xc_cil_lock); + spin_lock(&cil->xc_push_lock); list_for_each_entry(ctx, &cil->xc_committing, committing) { + /* + * Avoid getting stuck in this loop because we were woken by the + * shutdown, but then went back to sleep once already in the + * shutdown state. + */ + if (XLOG_FORCED_SHUTDOWN(log)) + goto out_shutdown; if (ctx->sequence > sequence) continue; if (!ctx->commit_lsn) { @@ -786,7 +843,7 @@ restart: * It is still being pushed! Wait for the push to * complete, then start again from the beginning. */ - xlog_wait(&cil->xc_commit_wait, &cil->xc_cil_lock); + xlog_wait(&cil->xc_commit_wait, &cil->xc_push_lock); goto restart; } if (ctx->sequence != sequence) @@ -794,8 +851,39 @@ restart: /* found it! */ commit_lsn = ctx->commit_lsn; } - spin_unlock(&cil->xc_cil_lock); + + /* + * The call to xlog_cil_push_now() executes the push in the background. + * Hence by the time we have got here it our sequence may not have been + * pushed yet. This is true if the current sequence still matches the + * push sequence after the above wait loop and the CIL still contains + * dirty objects. + * + * When the push occurs, it will empty the CIL and atomically increment + * the currect sequence past the push sequence and move it into the + * committing list. Of course, if the CIL is clean at the time of the + * push, it won't have pushed the CIL at all, so in that case we should + * try the push for this sequence again from the start just in case. + */ + if (sequence == cil->xc_current_sequence && + !list_empty(&cil->xc_cil)) { + spin_unlock(&cil->xc_push_lock); + goto restart; + } + + spin_unlock(&cil->xc_push_lock); return commit_lsn; + + /* + * We detected a shutdown in progress. We need to trigger the log force + * to pass through it's iclog state machine error handling, even though + * we are already in a shutdown state. Hence we can't return + * NULLCOMMITLSN here as that has special meaning to log forces (i.e. + * LSN is already stable), so we return a zero LSN instead. + */ +out_shutdown: + spin_unlock(&cil->xc_push_lock); + return 0; } /* @@ -852,6 +940,7 @@ xlog_cil_init( INIT_LIST_HEAD(&cil->xc_cil); INIT_LIST_HEAD(&cil->xc_committing); spin_lock_init(&cil->xc_cil_lock); + spin_lock_init(&cil->xc_push_lock); init_rwsem(&cil->xc_ctx_lock); init_waitqueue_head(&cil->xc_commit_wait); |
