aboutsummaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log.c')
-rw-r--r--fs/xfs/xfs_log.c1485
1 files changed, 885 insertions, 600 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index e2cc3568c29..292308dede6 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -17,75 +17,113 @@
*/
#include "xfs.h"
#include "xfs_fs.h"
-#include "xfs_types.h"
-#include "xfs_bit.h"
-#include "xfs_log.h"
-#include "xfs_inum.h"
-#include "xfs_trans.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_sb.h"
#include "xfs_ag.h"
#include "xfs_mount.h"
#include "xfs_error.h"
+#include "xfs_trans.h"
+#include "xfs_trans_priv.h"
+#include "xfs_log.h"
#include "xfs_log_priv.h"
-#include "xfs_buf_item.h"
-#include "xfs_bmap_btree.h"
-#include "xfs_alloc_btree.h"
-#include "xfs_ialloc_btree.h"
#include "xfs_log_recover.h"
-#include "xfs_trans_priv.h"
-#include "xfs_dinode.h"
#include "xfs_inode.h"
-#include "xfs_rw.h"
#include "xfs_trace.h"
+#include "xfs_fsops.h"
+#include "xfs_cksum.h"
kmem_zone_t *xfs_log_ticket_zone;
/* Local miscellaneous function prototypes */
-STATIC int xlog_commit_record(struct log *log, struct xlog_ticket *ticket,
- xlog_in_core_t **, xfs_lsn_t *);
-STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp,
- xfs_buftarg_t *log_target,
- xfs_daddr_t blk_offset,
- int num_bblks);
-STATIC int xlog_space_left(struct log *log, atomic64_t *head);
-STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void xlog_dealloc_log(xlog_t *log);
+STATIC int
+xlog_commit_record(
+ struct xlog *log,
+ struct xlog_ticket *ticket,
+ struct xlog_in_core **iclog,
+ xfs_lsn_t *commitlsnp);
+
+STATIC struct xlog *
+xlog_alloc_log(
+ struct xfs_mount *mp,
+ struct xfs_buftarg *log_target,
+ xfs_daddr_t blk_offset,
+ int num_bblks);
+STATIC int
+xlog_space_left(
+ struct xlog *log,
+ atomic64_t *head);
+STATIC int
+xlog_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
+STATIC void
+xlog_dealloc_log(
+ struct xlog *log);
/* local state machine functions */
STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int);
-STATIC void xlog_state_do_callback(xlog_t *log,int aborted, xlog_in_core_t *iclog);
-STATIC int xlog_state_get_iclog_space(xlog_t *log,
- int len,
- xlog_in_core_t **iclog,
- xlog_ticket_t *ticket,
- int *continued_write,
- int *logoffsetp);
-STATIC int xlog_state_release_iclog(xlog_t *log,
- xlog_in_core_t *iclog);
-STATIC void xlog_state_switch_iclogs(xlog_t *log,
- xlog_in_core_t *iclog,
- int eventual_size);
-STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog);
-
-/* local functions to manipulate grant head */
-STATIC int xlog_grant_log_space(xlog_t *log,
- xlog_ticket_t *xtic);
-STATIC void xlog_grant_push_ail(struct log *log,
- int need_bytes);
-STATIC void xlog_regrant_reserve_log_space(xlog_t *log,
- xlog_ticket_t *ticket);
-STATIC int xlog_regrant_write_log_space(xlog_t *log,
- xlog_ticket_t *ticket);
-STATIC void xlog_ungrant_log_space(xlog_t *log,
- xlog_ticket_t *ticket);
+STATIC void
+xlog_state_do_callback(
+ struct xlog *log,
+ int aborted,
+ struct xlog_in_core *iclog);
+STATIC int
+xlog_state_get_iclog_space(
+ struct xlog *log,
+ int len,
+ struct xlog_in_core **iclog,
+ struct xlog_ticket *ticket,
+ int *continued_write,
+ int *logoffsetp);
+STATIC int
+xlog_state_release_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
+STATIC void
+xlog_state_switch_iclogs(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int eventual_size);
+STATIC void
+xlog_state_want_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog);
+
+STATIC void
+xlog_grant_push_ail(
+ struct xlog *log,
+ int need_bytes);
+STATIC void
+xlog_regrant_reserve_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket);
+STATIC void
+xlog_ungrant_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket);
#if defined(DEBUG)
-STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr);
-STATIC void xlog_verify_grant_tail(struct log *log);
-STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog,
- int count, boolean_t syncing);
-STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
- xfs_lsn_t tail_lsn);
+STATIC void
+xlog_verify_dest_ptr(
+ struct xlog *log,
+ char *ptr);
+STATIC void
+xlog_verify_grant_tail(
+ struct xlog *log);
+STATIC void
+xlog_verify_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int count,
+ bool syncing);
+STATIC void
+xlog_verify_tail_lsn(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ xfs_lsn_t tail_lsn);
#else
#define xlog_verify_dest_ptr(a,b)
#define xlog_verify_grant_tail(a)
@@ -93,13 +131,15 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog,
#define xlog_verify_tail_lsn(a,b,c)
#endif
-STATIC int xlog_iclogs_empty(xlog_t *log);
+STATIC int
+xlog_iclogs_empty(
+ struct xlog *log);
static void
xlog_grant_sub_space(
- struct log *log,
- atomic64_t *head,
- int bytes)
+ struct xlog *log,
+ atomic64_t *head,
+ int bytes)
{
int64_t head_val = atomic64_read(head);
int64_t new, old;
@@ -123,9 +163,9 @@ xlog_grant_sub_space(
static void
xlog_grant_add_space(
- struct log *log,
- atomic64_t *head,
- int bytes)
+ struct xlog *log,
+ atomic64_t *head,
+ int bytes)
{
int64_t head_val = atomic64_read(head);
int64_t new, old;
@@ -150,78 +190,94 @@ xlog_grant_add_space(
} while (head_val != old);
}
-STATIC bool
-xlog_reserveq_wake(
- struct log *log,
- int *free_bytes)
+STATIC void
+xlog_grant_head_init(
+ struct xlog_grant_head *head)
+{
+ xlog_assign_grant_head(&head->grant, 1, 0);
+ INIT_LIST_HEAD(&head->waiters);
+ spin_lock_init(&head->lock);
+}
+
+STATIC void
+xlog_grant_head_wake_all(
+ struct xlog_grant_head *head)
{
struct xlog_ticket *tic;
- int need_bytes;
- list_for_each_entry(tic, &log->l_reserveq, t_queue) {
+ spin_lock(&head->lock);
+ list_for_each_entry(tic, &head->waiters, t_queue)
+ wake_up_process(tic->t_task);
+ spin_unlock(&head->lock);
+}
+
+static inline int
+xlog_ticket_reservation(
+ struct xlog *log,
+ struct xlog_grant_head *head,
+ struct xlog_ticket *tic)
+{
+ if (head == &log->l_write_head) {
+ ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
+ return tic->t_unit_res;
+ } else {
if (tic->t_flags & XLOG_TIC_PERM_RESERV)
- need_bytes = tic->t_unit_res * tic->t_cnt;
+ return tic->t_unit_res * tic->t_cnt;
else
- need_bytes = tic->t_unit_res;
-
- if (*free_bytes < need_bytes)
- return false;
- *free_bytes -= need_bytes;
-
- trace_xfs_log_grant_wake_up(log, tic);
- wake_up(&tic->t_wait);
+ return tic->t_unit_res;
}
-
- return true;
}
STATIC bool
-xlog_writeq_wake(
- struct log *log,
+xlog_grant_head_wake(
+ struct xlog *log,
+ struct xlog_grant_head *head,
int *free_bytes)
{
struct xlog_ticket *tic;
int need_bytes;
- list_for_each_entry(tic, &log->l_writeq, t_queue) {
- ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
-
- need_bytes = tic->t_unit_res;
-
+ list_for_each_entry(tic, &head->waiters, t_queue) {
+ need_bytes = xlog_ticket_reservation(log, head, tic);
if (*free_bytes < need_bytes)
return false;
- *free_bytes -= need_bytes;
- trace_xfs_log_regrant_write_wake_up(log, tic);
- wake_up(&tic->t_wait);
+ *free_bytes -= need_bytes;
+ trace_xfs_log_grant_wake_up(log, tic);
+ wake_up_process(tic->t_task);
}
return true;
}
STATIC int
-xlog_reserveq_wait(
- struct log *log,
+xlog_grant_head_wait(
+ struct xlog *log,
+ struct xlog_grant_head *head,
struct xlog_ticket *tic,
- int need_bytes)
+ int need_bytes) __releases(&head->lock)
+ __acquires(&head->lock)
{
- list_add_tail(&tic->t_queue, &log->l_reserveq);
+ list_add_tail(&tic->t_queue, &head->waiters);
do {
if (XLOG_FORCED_SHUTDOWN(log))
goto shutdown;
xlog_grant_push_ail(log, need_bytes);
+ __set_current_state(TASK_UNINTERRUPTIBLE);
+ spin_unlock(&head->lock);
+
XFS_STATS_INC(xs_sleep_logspace);
- trace_xfs_log_grant_sleep(log, tic);
- xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock);
+ trace_xfs_log_grant_sleep(log, tic);
+ schedule();
trace_xfs_log_grant_wake(log, tic);
- spin_lock(&log->l_grant_reserve_lock);
+ spin_lock(&head->lock);
if (XLOG_FORCED_SHUTDOWN(log))
goto shutdown;
- } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes);
+ } while (xlog_space_left(log, &head->grant) < need_bytes);
list_del_init(&tic->t_queue);
return 0;
@@ -230,35 +286,58 @@ shutdown:
return XFS_ERROR(EIO);
}
+/*
+ * Atomically get the log space required for a log ticket.
+ *
+ * Once a ticket gets put onto head->waiters, it will only return after the
+ * needed reservation is satisfied.
+ *
+ * This function is structured so that it has a lock free fast path. This is
+ * necessary because every new transaction reservation will come through this
+ * path. Hence any lock will be globally hot if we take it unconditionally on
+ * every pass.
+ *
+ * As tickets are only ever moved on and off head->waiters under head->lock, we
+ * only need to take that lock if we are going to add the ticket to the queue
+ * and sleep. We can avoid taking the lock if the ticket was never added to
+ * head->waiters because the t_queue list head will be empty and we hold the
+ * only reference to it so it can safely be checked unlocked.
+ */
STATIC int
-xlog_writeq_wait(
- struct log *log,
+xlog_grant_head_check(
+ struct xlog *log,
+ struct xlog_grant_head *head,
struct xlog_ticket *tic,
- int need_bytes)
+ int *need_bytes)
{
- list_add_tail(&tic->t_queue, &log->l_writeq);
-
- do {
- if (XLOG_FORCED_SHUTDOWN(log))
- goto shutdown;
- xlog_grant_push_ail(log, need_bytes);
-
- XFS_STATS_INC(xs_sleep_logspace);
- trace_xfs_log_regrant_write_sleep(log, tic);
+ int free_bytes;
+ int error = 0;
- xlog_wait(&tic->t_wait, &log->l_grant_write_lock);
- trace_xfs_log_regrant_write_wake(log, tic);
+ ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
- spin_lock(&log->l_grant_write_lock);
- if (XLOG_FORCED_SHUTDOWN(log))
- goto shutdown;
- } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes);
+ /*
+ * If there are other waiters on the queue then give them a chance at
+ * logspace before us. Wake up the first waiters, if we do not wake
+ * up all the waiters then go to sleep waiting for more free space,
+ * otherwise try to get some space for this transaction.
+ */
+ *need_bytes = xlog_ticket_reservation(log, head, tic);
+ free_bytes = xlog_space_left(log, &head->grant);
+ if (!list_empty_careful(&head->waiters)) {
+ spin_lock(&head->lock);
+ if (!xlog_grant_head_wake(log, head, &free_bytes) ||
+ free_bytes < *need_bytes) {
+ error = xlog_grant_head_wait(log, head, tic,
+ *need_bytes);
+ }
+ spin_unlock(&head->lock);
+ } else if (free_bytes < *need_bytes) {
+ spin_lock(&head->lock);
+ error = xlog_grant_head_wait(log, head, tic, *need_bytes);
+ spin_unlock(&head->lock);
+ }
- list_del_init(&tic->t_queue);
- return 0;
-shutdown:
- list_del_init(&tic->t_queue);
- return XFS_ERROR(EIO);
+ return error;
}
static void
@@ -286,6 +365,129 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type)
}
/*
+ * Replenish the byte reservation required by moving the grant write head.
+ */
+int
+xfs_log_regrant(
+ struct xfs_mount *mp,
+ struct xlog_ticket *tic)
+{
+ struct xlog *log = mp->m_log;
+ int need_bytes;
+ int error = 0;
+
+ if (XLOG_FORCED_SHUTDOWN(log))
+ return XFS_ERROR(EIO);
+
+ XFS_STATS_INC(xs_try_logspace);
+
+ /*
+ * This is a new transaction on the ticket, so we need to change the
+ * transaction ID so that the next transaction has a different TID in
+ * the log. Just add one to the existing tid so that we can see chains
+ * of rolling transactions in the log easily.
+ */
+ tic->t_tid++;
+
+ xlog_grant_push_ail(log, tic->t_unit_res);
+
+ tic->t_curr_res = tic->t_unit_res;
+ xlog_tic_reset_res(tic);
+
+ if (tic->t_cnt > 0)
+ return 0;
+
+ trace_xfs_log_regrant(log, tic);
+
+ error = xlog_grant_head_check(log, &log->l_write_head, tic,
+ &need_bytes);
+ if (error)
+ goto out_error;
+
+ xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
+ trace_xfs_log_regrant_exit(log, tic);
+ xlog_verify_grant_tail(log);
+ return 0;
+
+out_error:
+ /*
+ * If we are failing, make sure the ticket doesn't have any current
+ * reservations. We don't want to add this back when the ticket/
+ * transaction gets cancelled.
+ */
+ tic->t_curr_res = 0;
+ tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
+ return error;
+}
+
+/*
+ * Reserve log space and return a ticket corresponding the reservation.
+ *
+ * Each reservation is going to reserve extra space for a log record header.
+ * When writes happen to the on-disk log, we don't subtract the length of the
+ * log record header from any reservation. By wasting space in each
+ * reservation, we prevent over allocation problems.
+ */
+int
+xfs_log_reserve(
+ struct xfs_mount *mp,
+ int unit_bytes,
+ int cnt,
+ struct xlog_ticket **ticp,
+ __uint8_t client,
+ bool permanent,
+ uint t_type)
+{
+ struct xlog *log = mp->m_log;
+ struct xlog_ticket *tic;
+ int need_bytes;
+ int error = 0;
+
+ ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
+
+ if (XLOG_FORCED_SHUTDOWN(log))
+ return XFS_ERROR(EIO);
+
+ XFS_STATS_INC(xs_try_logspace);
+
+ ASSERT(*ticp == NULL);
+ tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
+ KM_SLEEP | KM_MAYFAIL);
+ if (!tic)
+ return XFS_ERROR(ENOMEM);
+
+ tic->t_trans_type = t_type;
+ *ticp = tic;
+
+ xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt
+ : tic->t_unit_res);
+
+ trace_xfs_log_reserve(log, tic);
+
+ error = xlog_grant_head_check(log, &log->l_reserve_head, tic,
+ &need_bytes);
+ if (error)
+ goto out_error;
+
+ xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes);
+ xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes);
+ trace_xfs_log_reserve_exit(log, tic);
+ xlog_verify_grant_tail(log);
+ return 0;
+
+out_error:
+ /*
+ * If we are failing, make sure the ticket doesn't have any current
+ * reservations. We don't want to add this back when the ticket/
+ * transaction gets cancelled.
+ */
+ tic->t_curr_res = 0;
+ tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */
+ return error;
+}
+
+
+/*
* NOTES:
*
* 1. currblock field gets updated at startup and after in-core logs
@@ -313,7 +515,7 @@ xfs_log_done(
struct xlog_in_core **iclog,
uint flags)
{
- struct log *log = mp->m_log;
+ struct xlog *log = mp->m_log;
xfs_lsn_t lsn = 0;
if (XLOG_FORCED_SHUTDOWN(log) ||
@@ -395,88 +597,6 @@ xfs_log_release_iclog(
}
/*
- * 1. Reserve an amount of on-disk log space and return a ticket corresponding
- * to the reservation.
- * 2. Potentially, push buffers at tail of log to disk.
- *
- * Each reservation is going to reserve extra space for a log record header.
- * When writes happen to the on-disk log, we don't subtract the length of the
- * log record header from any reservation. By wasting space in each
- * reservation, we prevent over allocation problems.
- */
-int
-xfs_log_reserve(
- struct xfs_mount *mp,
- int unit_bytes,
- int cnt,
- struct xlog_ticket **ticket,
- __uint8_t client,
- uint flags,
- uint t_type)
-{
- struct log *log = mp->m_log;
- struct xlog_ticket *internal_ticket;
- int retval = 0;
-
- ASSERT(client == XFS_TRANSACTION || client == XFS_LOG);
-
- if (XLOG_FORCED_SHUTDOWN(log))
- return XFS_ERROR(EIO);
-
- XFS_STATS_INC(xs_try_logspace);
-
-
- if (*ticket != NULL) {
- ASSERT(flags & XFS_LOG_PERM_RESERV);
- internal_ticket = *ticket;
-
- /*
- * this is a new transaction on the ticket, so we need to
- * change the transaction ID so that the next transaction has a
- * different TID in the log. Just add one to the existing tid
- * so that we can see chains of rolling transactions in the log
- * easily.
- */
- internal_ticket->t_tid++;
-
- trace_xfs_log_reserve(log, internal_ticket);
-
- xlog_grant_push_ail(log, internal_ticket->t_unit_res);
- retval = xlog_regrant_write_log_space(log, internal_ticket);
- } else {
- /* may sleep if need to allocate more tickets */
- internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt,
- client, flags,
- KM_SLEEP|KM_MAYFAIL);
- if (!internal_ticket)
- return XFS_ERROR(ENOMEM);
- internal_ticket->t_trans_type = t_type;
- *ticket = internal_ticket;
-
- trace_xfs_log_reserve(log, internal_ticket);
-
- xlog_grant_push_ail(log,
- (internal_ticket->t_unit_res *
- internal_ticket->t_cnt));
- retval = xlog_grant_log_space(log, internal_ticket);
- }
-
- if (unlikely(retval)) {
- /*
- * If we are failing, make sure the ticket doesn't have any
- * current reservations. We don't want to add this back
- * when the ticket/ transaction gets cancelled.
- */
- internal_ticket->t_curr_res = 0;
- /* ungrant will give back unit_res * t_cnt. */
- internal_ticket->t_cnt = 0;
- }
-
- return retval;
-}
-
-
-/*
* Mount a log filesystem
*
* mp - ubiquitous xfs mount point structure
@@ -493,13 +613,16 @@ xfs_log_mount(
xfs_daddr_t blk_offset,
int num_bblks)
{
- int error;
+ int error = 0;
+ int min_logfsbs;
- if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
- xfs_notice(mp, "Mounting Filesystem");
- else {
+ if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
+ xfs_notice(mp, "Mounting V%d Filesystem",
+ XFS_SB_VERSION_NUM(&mp->m_sb));
+ } else {
xfs_notice(mp,
-"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent.");
+"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.",
+ XFS_SB_VERSION_NUM(&mp->m_sb));
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
}
@@ -510,6 +633,50 @@ xfs_log_mount(
}
/*
+ * Validate the given log space and drop a critical message via syslog
+ * if the log size is too small that would lead to some unexpected
+ * situations in transaction log space reservation stage.
+ *
+ * Note: we can't just reject the mount if the validation fails. This
+ * would mean that people would have to downgrade their kernel just to
+ * remedy the situation as there is no way to grow the log (short of
+ * black magic surgery with xfs_db).
+ *
+ * We can, however, reject mounts for CRC format filesystems, as the
+ * mkfs binary being used to make the filesystem should never create a
+ * filesystem with a log that is too small.
+ */
+ min_logfsbs = xfs_log_calc_minimum_size(mp);
+
+ if (mp->m_sb.sb_logblocks < min_logfsbs) {
+ xfs_warn(mp,
+ "Log size %d blocks too small, minimum size is %d blocks",
+ mp->m_sb.sb_logblocks, min_logfsbs);
+ error = EINVAL;
+ } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) {
+ xfs_warn(mp,
+ "Log size %d blocks too large, maximum size is %lld blocks",
+ mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS);
+ error = EINVAL;
+ } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) {
+ xfs_warn(mp,
+ "log size %lld bytes too large, maximum size is %lld bytes",
+ XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks),
+ XFS_MAX_LOG_BYTES);
+ error = EINVAL;
+ }
+ if (error) {
+ if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!");
+ ASSERT(0);
+ goto out_free_log;
+ }
+ xfs_crit(mp,
+"Log size out of supported range. Continuing onwards, but if log hangs are\n"
+"experienced then please report this message in the bug report.");
+ }
+
+ /*
* Initialize the AIL now we have a log.
*/
error = xfs_trans_ail_init(mp);
@@ -561,25 +728,29 @@ out:
}
/*
- * Finish the recovery of the file system. This is separate from
- * the xfs_log_mount() call, because it depends on the code in
- * xfs_mountfs() to read in the root and real-time bitmap inodes
- * between calling xfs_log_mount() and here.
+ * Finish the recovery of the file system. This is separate from the
+ * xfs_log_mount() call, because it depends on the code in xfs_mountfs() to read
+ * in the root and real-time bitmap inodes between calling xfs_log_mount() and
+ * here.
*
- * mp - ubiquitous xfs mount point structure
+ * If we finish recovery successfully, start the background log work. If we are
+ * not doing recovery, then we have a RO filesystem and we don't need to start
+ * it.
*/
int
xfs_log_mount_finish(xfs_mount_t *mp)
{
- int error;
+ int error = 0;
- if (!(mp->m_flags & XFS_MOUNT_NORECOVERY))
+ if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) {
error = xlog_recover_finish(mp->m_log);
- else {
- error = 0;
+ if (!error)
+ xfs_log_work_queue(mp);
+ } else {
ASSERT(mp->m_flags & XFS_MOUNT_RDONLY);
}
+
return error;
}
@@ -595,14 +766,14 @@ xfs_log_mount_finish(xfs_mount_t *mp)
* Unmount record used to have a string "Unmount filesystem--" in the
* data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE).
* We just write the magic number now since that particular field isn't
- * currently architecture converted and "nUmount" is a bit foo.
+ * currently architecture converted and "Unmount" is a bit foo.
* As far as I know, there weren't any dependencies on the old behaviour.
*/
int
xfs_log_unmount_write(xfs_mount_t *mp)
{
- xlog_t *log = mp->m_log;
+ struct xlog *log = mp->m_log;
xlog_in_core_t *iclog;
#ifdef DEBUG
xlog_in_core_t *first_iclog;
@@ -653,8 +824,9 @@ xfs_log_unmount_write(xfs_mount_t *mp)
.lv_iovecp = &reg,
};
- /* remove inited flag */
+ /* remove inited flag, and account for space used */
tic->t_flags = 0;
+ tic->t_curr_res -= sizeof(magic);
error = xlog_write(log, &vec, tic, &lsn,
NULL, XLOG_UNMOUNT_TRANS);
/*
@@ -731,14 +903,49 @@ xfs_log_unmount_write(xfs_mount_t *mp)
} /* xfs_log_unmount_write */
/*
- * Deallocate log structures for unmount/relocation.
+ * Empty the log for unmount/freeze.
+ *
+ * To do this, we first need to shut down the background log work so it is not
+ * trying to cover the log as we clean up. We then need to unpin all objects in
+ * the log so we can then flush them out. Once they have completed their IO and
+ * run the callbacks removing themselves from the AIL, we can write the unmount
+ * record.
+ */
+void
+xfs_log_quiesce(
+ struct xfs_mount *mp)
+{
+ cancel_delayed_work_sync(&mp->m_log->l_work);
+ xfs_log_force(mp, XFS_LOG_SYNC);
+
+ /*
+ * The superblock buffer is uncached and while xfs_ail_push_all_sync()
+ * will push it, xfs_wait_buftarg() will not wait for it. Further,
+ * xfs_buf_iowait() cannot be used because it was pushed with the
+ * XBF_ASYNC flag set, so we need to use a lock/unlock pair to wait for
+ * the IO to complete.
+ */
+ xfs_ail_push_all_sync(mp->m_ail);
+ xfs_wait_buftarg(mp->m_ddev_targp);
+ xfs_buf_lock(mp->m_sb_bp);
+ xfs_buf_unlock(mp->m_sb_bp);
+
+ xfs_log_unmount_write(mp);
+}
+
+/*
+ * Shut down and release the AIL and Log.
*
- * We need to stop the aild from running before we destroy
- * and deallocate the log as the aild references the log.
+ * During unmount, we need to ensure we flush all the dirty metadata objects
+ * from the AIL so that the log is empty before we write the unmount record to
+ * the log. Once this is done, we can tear down the AIL and the log.
*/
void
-xfs_log_unmount(xfs_mount_t *mp)
+xfs_log_unmount(
+ struct xfs_mount *mp)
{
+ xfs_log_quiesce(mp);
+
xfs_trans_ail_destroy(mp);
xlog_dealloc_log(mp->m_log);
}
@@ -760,89 +967,67 @@ xfs_log_item_init(
INIT_LIST_HEAD(&item->li_cil);
}
+/*
+ * Wake up processes waiting for log space after we have moved the log tail.
+ */
void
-xfs_log_move_tail(xfs_mount_t *mp,
- xfs_lsn_t tail_lsn)
+xfs_log_space_wake(
+ struct xfs_mount *mp)
{
- xlog_ticket_t *tic;
- xlog_t *log = mp->m_log;
- int need_bytes, free_bytes;
+ struct xlog *log = mp->m_log;
+ int free_bytes;
if (XLOG_FORCED_SHUTDOWN(log))
return;
- if (tail_lsn == 0)
- tail_lsn = atomic64_read(&log->l_last_sync_lsn);
-
- /* tail_lsn == 1 implies that we weren't passed a valid value. */
- if (tail_lsn != 1)
- atomic64_set(&log->l_tail_lsn, tail_lsn);
-
- if (!list_empty_careful(&log->l_writeq)) {
-#ifdef DEBUG
- if (log->l_flags & XLOG_ACTIVE_RECOVERY)
- panic("Recovery problem");
-#endif
- spin_lock(&log->l_grant_write_lock);
- free_bytes = xlog_space_left(log, &log->l_grant_write_head);
- list_for_each_entry(tic, &log->l_writeq, t_queue) {
- ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV);
+ if (!list_empty_careful(&log->l_write_head.waiters)) {
+ ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
- if (free_bytes < tic->t_unit_res && tail_lsn != 1)
- break;
- tail_lsn = 0;
- free_bytes -= tic->t_unit_res;
- trace_xfs_log_regrant_write_wake_up(log, tic);
- wake_up(&tic->t_wait);
- }
- spin_unlock(&log->l_grant_write_lock);
+ spin_lock(&log->l_write_head.lock);
+ free_bytes = xlog_space_left(log, &log->l_write_head.grant);
+ xlog_grant_head_wake(log, &log->l_write_head, &free_bytes);
+ spin_unlock(&log->l_write_head.lock);
}
- if (!list_empty_careful(&log->l_reserveq)) {
-#ifdef DEBUG
- if (log->l_flags & XLOG_ACTIVE_RECOVERY)
- panic("Recovery problem");
-#endif
- spin_lock(&log->l_grant_reserve_lock);
- free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
- list_for_each_entry(tic, &log->l_reserveq, t_queue) {
- if (tic->t_flags & XLOG_TIC_PERM_RESERV)
- need_bytes = tic->t_unit_res*tic->t_cnt;
- else
- need_bytes = tic->t_unit_res;
- if (free_bytes < need_bytes && tail_lsn != 1)
- break;
- tail_lsn = 0;
- free_bytes -= need_bytes;
- trace_xfs_log_grant_wake_up(log, tic);
- wake_up(&tic->t_wait);
- }
- spin_unlock(&log->l_grant_reserve_lock);
+ if (!list_empty_careful(&log->l_reserve_head.waiters)) {
+ ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
+
+ spin_lock(&log->l_reserve_head.lock);
+ free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
+ xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes);
+ spin_unlock(&log->l_reserve_head.lock);
}
}
/*
- * Determine if we have a transaction that has gone to disk
- * that needs to be covered. To begin the transition to the idle state
- * firstly the log needs to be idle (no AIL and nothing in the iclogs).
- * If we are then in a state where covering is needed, the caller is informed
- * that dummy transactions are required to move the log into the idle state.
+ * Determine if we have a transaction that has gone to disk that needs to be
+ * covered. To begin the transition to the idle state firstly the log needs to
+ * be idle. That means the CIL, the AIL and the iclogs needs to be empty before
+ * we start attempting to cover the log.
+ *
+ * Only if we are then in a state where covering is needed, the caller is
+ * informed that dummy transactions are required to move the log into the idle
+ * state.
*
- * Because this is called as part of the sync process, we should also indicate
- * that dummy transactions should be issued in anything but the covered or
- * idle states. This ensures that the log tail is accurately reflected in
- * the log at the end of the sync, hence if a crash occurrs avoids replay
- * of transactions where the metadata is already on disk.
+ * If there are any items in the AIl or CIL, then we do not want to attempt to
+ * cover the log as we may be in a situation where there isn't log space
+ * available to run a dummy transaction and this can lead to deadlocks when the
+ * tail of the log is pinned by an item that is modified in the CIL. Hence
+ * there's no point in running a dummy transaction at this point because we
+ * can't start trying to idle the log until both the CIL and AIL are empty.
*/
int
xfs_log_need_covered(xfs_mount_t *mp)
{
+ struct xlog *log = mp->m_log;
int needed = 0;
- xlog_t *log = mp->m_log;
if (!xfs_fs_writable(mp))
return 0;
+ if (!xlog_cil_empty(log))
+ return 0;
+
spin_lock(&log->l_icloglock);
switch (log->l_covered_state) {
case XLOG_STATE_COVER_DONE:
@@ -851,14 +1036,17 @@ xfs_log_need_covered(xfs_mount_t *mp)
break;
case XLOG_STATE_COVER_NEED:
case XLOG_STATE_COVER_NEED2:
- if (!xfs_ail_min_lsn(log->l_ailp) &&
- xlog_iclogs_empty(log)) {
- if (log->l_covered_state == XLOG_STATE_COVER_NEED)
- log->l_covered_state = XLOG_STATE_COVER_DONE;
- else
- log->l_covered_state = XLOG_STATE_COVER_DONE2;
- }
- /* FALLTHRU */
+ if (xfs_ail_min_lsn(log->l_ailp))
+ break;
+ if (!xlog_iclogs_empty(log))
+ break;
+
+ needed = 1;
+ if (log->l_covered_state == XLOG_STATE_COVER_NEED)
+ log->l_covered_state = XLOG_STATE_COVER_DONE;
+ else
+ log->l_covered_state = XLOG_STATE_COVER_DONE2;
+ break;
default:
needed = 1;
break;
@@ -867,38 +1055,47 @@ xfs_log_need_covered(xfs_mount_t *mp)
return needed;
}
-/******************************************************************************
- *
- * local routines
- *
- ******************************************************************************
- */
-
-/* xfs_trans_tail_ail returns 0 when there is nothing in the list.
- * The log manager must keep track of the last LR which was committed
- * to disk. The lsn of this LR will become the new tail_lsn whenever
- * xfs_trans_tail_ail returns 0. If we don't do this, we run into
- * the situation where stuff could be written into the log but nothing
- * was ever in the AIL when asked. Eventually, we panic since the
- * tail hits the head.
- *
+/*
* We may be holding the log iclog lock upon entering this routine.
*/
xfs_lsn_t
-xlog_assign_tail_lsn(
+xlog_assign_tail_lsn_locked(
struct xfs_mount *mp)
{
+ struct xlog *log = mp->m_log;
+ struct xfs_log_item *lip;
xfs_lsn_t tail_lsn;
- struct log *log = mp->m_log;
- tail_lsn = xfs_ail_min_lsn(mp->m_ail);
- if (!tail_lsn)
- tail_lsn = atomic64_read(&log->l_last_sync_lsn);
+ assert_spin_locked(&mp->m_ail->xa_lock);
+ /*
+ * To make sure we always have a valid LSN for the log tail we keep
+ * track of the last LSN which was committed in log->l_last_sync_lsn,
+ * and use that when the AIL was empty.
+ */
+ lip = xfs_ail_min(mp->m_ail);
+ if (lip)
+ tail_lsn = lip->li_lsn;
+ else
+ tail_lsn = atomic64_read(&log->l_last_sync_lsn);
+ trace_xfs_log_assign_tail_lsn(log, tail_lsn);
atomic64_set(&log->l_tail_lsn, tail_lsn);
return tail_lsn;
}
+xfs_lsn_t
+xlog_assign_tail_lsn(
+ struct xfs_mount *mp)
+{
+ xfs_lsn_t tail_lsn;
+
+ spin_lock(&mp->m_ail->xa_lock);
+ tail_lsn = xlog_assign_tail_lsn_locked(mp);
+ spin_unlock(&mp->m_ail->xa_lock);
+
+ return tail_lsn;
+}
+
/*
* Return the space in the log between the tail and the head. The head
* is passed in the cycle/bytes formal parms. In the special case where
@@ -915,7 +1112,7 @@ xlog_assign_tail_lsn(
*/
STATIC int
xlog_space_left(
- struct log *log,
+ struct xlog *log,
atomic64_t *head)
{
int free_bytes;
@@ -961,14 +1158,14 @@ xlog_space_left(
void
xlog_iodone(xfs_buf_t *bp)
{
- xlog_in_core_t *iclog = bp->b_fspriv;
- xlog_t *l = iclog->ic_log;
- int aborted = 0;
+ struct xlog_in_core *iclog = bp->b_fspriv;
+ struct xlog *l = iclog->ic_log;
+ int aborted = 0;
/*
* Race to shutdown the filesystem if we see an error.
*/
- if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp,
+ if (XFS_TEST_ERROR(bp->b_error, l->l_mp,
XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) {
xfs_buf_ioerror_alert(bp, __func__);
xfs_buf_stale(bp);
@@ -986,13 +1183,15 @@ xlog_iodone(xfs_buf_t *bp)
/* log I/O is always issued ASYNC */
ASSERT(XFS_BUF_ISASYNC(bp));
xlog_state_done_syncing(iclog, aborted);
+
/*
- * do not reference the buffer (bp) here as we could race
- * with it being freed after writing the unmount record to the
- * log.
+ * drop the buffer lock now that we are done. Nothing references
+ * the buffer after this, so an unmount waiting on this lock can now
+ * tear it down safely. As such, it is unsafe to reference the buffer
+ * (bp) after the unlock as we could race with it being freed.
*/
-
-} /* xlog_iodone */
+ xfs_buf_unlock(bp);
+}
/*
* Return size of each in-core log record buffer.
@@ -1004,8 +1203,9 @@ xlog_iodone(xfs_buf_t *bp)
*/
STATIC void
-xlog_get_iclog_buffer_size(xfs_mount_t *mp,
- xlog_t *log)
+xlog_get_iclog_buffer_size(
+ struct xfs_mount *mp,
+ struct xlog *log)
{
int size;
int xhdrs;
@@ -1061,18 +1261,53 @@ done:
} /* xlog_get_iclog_buffer_size */
+void
+xfs_log_work_queue(
+ struct xfs_mount *mp)
+{
+ queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work,
+ msecs_to_jiffies(xfs_syncd_centisecs * 10));
+}
+
+/*
+ * Every sync period we need to unpin all items in the AIL and push them to
+ * disk. If there is nothing dirty, then we might need to cover the log to
+ * indicate that the filesystem is idle.
+ */
+void
+xfs_log_worker(
+ struct work_struct *work)
+{
+ struct xlog *log = container_of(to_delayed_work(work),
+ struct xlog, l_work);
+ struct xfs_mount *mp = log->l_mp;
+
+ /* dgc: errors ignored - not fatal and nowhere to report them */
+ if (xfs_log_need_covered(mp))
+ xfs_fs_log_dummy(mp);
+ else
+ xfs_log_force(mp, 0);
+
+ /* start pushing all the metadata that is currently dirty */
+ xfs_ail_push_all(mp->m_ail);
+
+ /* queue us up again */
+ xfs_log_work_queue(mp);
+}
+
/*
* This routine initializes some of the log structure for a given mount point.
* Its primary purpose is to fill in enough, so recovery can occur. However,
* some other stuff may be filled in too.
*/
-STATIC xlog_t *
-xlog_alloc_log(xfs_mount_t *mp,
- xfs_buftarg_t *log_target,
- xfs_daddr_t blk_offset,
- int num_bblks)
+STATIC struct xlog *
+xlog_alloc_log(
+ struct xfs_mount *mp,
+ struct xfs_buftarg *log_target,
+ xfs_daddr_t blk_offset,
+ int num_bblks)
{
- xlog_t *log;
+ struct xlog *log;
xlog_rec_header_t *head;
xlog_in_core_t **iclogp;
xlog_in_core_t *iclog, *prev_iclog=NULL;
@@ -1081,7 +1316,7 @@ xlog_alloc_log(xfs_mount_t *mp,
int error = ENOMEM;
uint log2_size = 0;
- log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL);
+ log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL);
if (!log) {
xfs_warn(mp, "Log allocation failed: No memory!");
goto out;
@@ -1094,18 +1329,16 @@ xlog_alloc_log(xfs_mount_t *mp,
log->l_logBBsize = num_bblks;
log->l_covered_state = XLOG_STATE_COVER_IDLE;
log->l_flags |= XLOG_ACTIVE_RECOVERY;
+ INIT_DELAYED_WORK(&log->l_work, xfs_log_worker);
log->l_prev_block = -1;
/* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */
xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0);
xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0);
log->l_curr_cycle = 1; /* 0 is bad since this is initial value */
- xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0);
- xlog_assign_grant_head(&log->l_grant_write_head, 1, 0);
- INIT_LIST_HEAD(&log->l_reserveq);
- INIT_LIST_HEAD(&log->l_writeq);
- spin_lock_init(&log->l_grant_reserve_lock);
- spin_lock_init(&log->l_grant_write_lock);
+
+ xlog_grant_head_init(&log->l_reserve_head);
+ xlog_grant_head_init(&log->l_write_head);
error = EFSCORRUPTED;
if (xfs_sb_version_hassector(&mp->m_sb)) {
@@ -1137,19 +1370,24 @@ xlog_alloc_log(xfs_mount_t *mp,
xlog_get_iclog_buffer_size(mp, log);
error = ENOMEM;
- bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0);
+ bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0);
if (!bp)
goto out_free_log;
- bp->b_iodone = xlog_iodone;
+
+ /*
+ * The iclogbuf buffer locks are held over IO but we are not going to do
+ * IO yet. Hence unlock the buffer so that the log IO path can grab it
+ * when appropriately.
+ */
ASSERT(xfs_buf_islocked(bp));
+ xfs_buf_unlock(bp);
+
+ bp->b_iodone = xlog_iodone;
log->l_xbuf = bp;
spin_lock_init(&log->l_icloglock);
init_waitqueue_head(&log->l_flush_wait);
- /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */
- ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0);
-
iclogp = &log->l_iclog;
/*
* The amount of memory to allocate for the iclog structure is
@@ -1169,10 +1407,13 @@ xlog_alloc_log(xfs_mount_t *mp,
prev_iclog = iclog;
bp = xfs_buf_get_uncached(mp->m_logdev_targp,
- log->l_iclog_size, 0);
+ BTOBB(log->l_iclog_size), 0);
if (!bp)
goto out_free_iclog;
+ ASSERT(xfs_buf_islocked(bp));
+ xfs_buf_unlock(bp);
+
bp->b_iodone = xlog_iodone;
iclog->ic_bp = bp;
iclog->ic_data = bp->b_addr;
@@ -1189,7 +1430,7 @@ xlog_alloc_log(xfs_mount_t *mp,
head->h_fmt = cpu_to_be32(XLOG_FMT);
memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t));
- iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize;
+ iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize;
iclog->ic_state = XLOG_STATE_ACTIVE;
iclog->ic_log = log;
atomic_set(&iclog->ic_refcnt, 0);
@@ -1197,7 +1438,6 @@ xlog_alloc_log(xfs_mount_t *mp,
iclog->ic_callback_tail = &(iclog->ic_callback);
iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize;
- ASSERT(xfs_buf_islocked(iclog->ic_bp));
init_waitqueue_head(&iclog->ic_force_wait);
init_waitqueue_head(&iclog->ic_write_wait);
@@ -1233,7 +1473,7 @@ out:
*/
STATIC int
xlog_commit_record(
- struct log *log,
+ struct xlog *log,
struct xlog_ticket *ticket,
struct xlog_in_core **iclog,
xfs_lsn_t *commitlsnp)
@@ -1267,7 +1507,7 @@ xlog_commit_record(
*/
STATIC void
xlog_grant_push_ail(
- struct log *log,
+ struct xlog *log,
int need_bytes)
{
xfs_lsn_t threshold_lsn = 0;
@@ -1280,7 +1520,7 @@ xlog_grant_push_ail(
ASSERT(BTOBB(need_bytes) < log->l_logBBsize);
- free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
+ free_bytes = xlog_space_left(log, &log->l_reserve_head.grant);
free_blocks = BTOBBT(free_bytes);
/*
@@ -1322,12 +1562,96 @@ xlog_grant_push_ail(
}
/*
+ * Stamp cycle number in every block
+ */
+STATIC void
+xlog_pack_data(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int roundoff)
+{
+ int i, j, k;
+ int size = iclog->ic_offset + roundoff;
+ __be32 cycle_lsn;
+ xfs_caddr_t dp;
+
+ cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn);
+
+ dp = iclog->ic_datap;
+ for (i = 0; i < BTOBB(size); i++) {
+ if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE))
+ break;
+ iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp;
+ *(__be32 *)dp = cycle_lsn;
+ dp += BBSIZE;
+ }
+
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+ xlog_in_core_2_t *xhdr = iclog->ic_data;
+
+ for ( ; i < BTOBB(size); i++) {
+ j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE);
+ xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp;
+ *(__be32 *)dp = cycle_lsn;
+ dp += BBSIZE;
+ }
+
+ for (i = 1; i < log->l_iclog_heads; i++)
+ xhdr[i].hic_xheader.xh_cycle = cycle_lsn;
+ }
+}
+
+/*
+ * Calculate the checksum for a log buffer.
+ *
+ * This is a little more complicated than it should be because the various
+ * headers and the actual data are non-contiguous.
+ */
+__le32
+xlog_cksum(
+ struct xlog *log,
+ struct xlog_rec_header *rhead,
+ char *dp,
+ int size)
+{
+ __uint32_t crc;
+
+ /* first generate the crc for the record header ... */
+ crc = xfs_start_cksum((char *)rhead,
+ sizeof(struct xlog_rec_header),
+ offsetof(struct xlog_rec_header, h_crc));
+
+ /* ... then for additional cycle data for v2 logs ... */
+ if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) {
+ union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead;
+ int i;
+
+ for (i = 1; i < log->l_iclog_heads; i++) {
+ crc = crc32c(crc, &xhdr[i].hic_xheader,
+ sizeof(struct xlog_rec_ext_header));
+ }
+ }
+
+ /* ... and finally for the payload */
+ crc = crc32c(crc, dp, size);
+
+ return xfs_end_cksum(crc);
+}
+
+/*
* The bdstrat callback function for log bufs. This gives us a central
* place to trap bufs in case we get hit by a log I/O error and need to
* shutdown. Actually, in practice, even when we didn't get a log error,
* we transition the iclogs to IOERROR state *after* flushing all existing
* iclogs to disk. This is because we don't want anymore new transactions to be
* started or completed afterwards.
+ *
+ * We lock the iclogbufs here so that we can serialise against IO completion
+ * during unmount. We might be processing a shutdown triggered during unmount,
+ * and that can occur asynchronously to the unmount thread, and hence we need to
+ * ensure that completes before tearing down the iclogbufs. Hence we need to
+ * hold the buffer lock across the log IO to acheive that.
*/
STATIC int
xlog_bdstrat(
@@ -1335,6 +1659,7 @@ xlog_bdstrat(
{
struct xlog_in_core *iclog = bp->b_fspriv;
+ xfs_buf_lock(bp);
if (iclog->ic_state & XLOG_STATE_IOERROR) {
xfs_buf_ioerror(bp, EIO);
xfs_buf_stale(bp);
@@ -1342,7 +1667,8 @@ xlog_bdstrat(
/*
* It would seem logical to return EIO here, but we rely on
* the log state machine to propagate I/O errors instead of
- * doing it here.
+ * doing it here. Similarly, IO completion will unlock the
+ * buffer, so we don't do it here.
*/
return 0;
}
@@ -1377,10 +1703,10 @@ xlog_bdstrat(
*/
STATIC int
-xlog_sync(xlog_t *log,
- xlog_in_core_t *iclog)
+xlog_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
{
- xfs_caddr_t dptr; /* pointer to byte sized element */
xfs_buf_t *bp;
int i;
uint count; /* byte count of bwrite */
@@ -1389,6 +1715,7 @@ xlog_sync(xlog_t *log,
int split = 0; /* split write into two regions */
int error;
int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb);
+ int size;
XFS_STATS_INC(xs_log_writes);
ASSERT(atomic_read(&iclog->ic_refcnt) == 0);
@@ -1412,20 +1739,17 @@ xlog_sync(xlog_t *log,
roundoff < BBTOB(1)));
/* move grant heads by roundoff in sync */
- xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff);
- xlog_grant_add_space(log, &log->l_grant_write_head, roundoff);
+ xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff);
+ xlog_grant_add_space(log, &log->l_write_head.grant, roundoff);
/* put cycle number in every block */
xlog_pack_data(log, iclog, roundoff);
/* real byte length */
- if (v2) {
- iclog->ic_header.h_len =
- cpu_to_be32(iclog->ic_offset + roundoff);
- } else {
- iclog->ic_header.h_len =
- cpu_to_be32(iclog->ic_offset);
- }
+ size = iclog->ic_offset;
+ if (v2)
+ size += roundoff;
+ iclog->ic_header.h_len = cpu_to_be32(size);
bp = iclog->ic_bp;
XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn)));
@@ -1434,13 +1758,37 @@ xlog_sync(xlog_t *log,
/* Do we need to split this write into 2 parts? */
if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) {
+ char *dptr;
+
split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)));
count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp));
- iclog->ic_bwritecnt = 2; /* split into 2 writes */
+ iclog->ic_bwritecnt = 2;
+
+ /*
+ * Bump the cycle numbers at the start of each block in the
+ * part of the iclog that ends up in the buffer that gets
+ * written to the start of the log.
+ *
+ * Watch out for the header magic number case, though.
+ */
+ dptr = (char *)&iclog->ic_header + count;
+ for (i = 0; i < split; i += BBSIZE) {
+ __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr);
+ if (++cycle == XLOG_HEADER_MAGIC_NUM)
+ cycle++;
+ *(__be32 *)dptr = cpu_to_be32(cycle);
+
+ dptr += BBSIZE;
+ }
} else {
iclog->ic_bwritecnt = 1;
}
- XFS_BUF_SET_COUNT(bp, count);
+
+ /* calculcate the checksum */
+ iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header,
+ iclog->ic_datap, size);
+
+ bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
XFS_BUF_ZEROFLAGS(bp);
XFS_BUF_ASYNC(bp);
@@ -1467,7 +1815,7 @@ xlog_sync(xlog_t *log,
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
- xlog_verify_iclog(log, iclog, count, B_TRUE);
+ xlog_verify_iclog(log, iclog, count, true);
/* account for log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
@@ -1493,19 +1841,6 @@ xlog_sync(xlog_t *log,
bp->b_flags |= XBF_SYNCIO;
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
bp->b_flags |= XBF_FUA;
- dptr = bp->b_addr;
- /*
- * Bump the cycle numbers at the start of each block
- * since this part of the buffer is at the start of
- * a new cycle. Watch out for the header magic number
- * case, though.
- */
- for (i = 0; i < split; i += BBSIZE) {
- be32_add_cpu((__be32 *)dptr, 1);
- if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM)
- be32_add_cpu((__be32 *)dptr, 1);
- dptr += BBSIZE;
- }
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
@@ -1522,12 +1857,12 @@ xlog_sync(xlog_t *log,
return 0;
} /* xlog_sync */
-
/*
* Deallocate a log structure
*/
STATIC void
-xlog_dealloc_log(xlog_t *log)
+xlog_dealloc_log(
+ struct xlog *log)
{
xlog_in_core_t *iclog, *next_iclog;
int i;
@@ -1535,14 +1870,28 @@ xlog_dealloc_log(xlog_t *log)
xlog_cil_destroy(log);
/*
- * always need to ensure that the extra buffer does not point to memory
- * owned by another log buffer before we free it.
+ * Cycle all the iclogbuf locks to make sure all log IO completion
+ * is done before we tear down these buffers.
+ */
+ iclog = log->l_iclog;
+ for (i = 0; i < log->l_iclog_bufs; i++) {
+ xfs_buf_lock(iclog->ic_bp);
+ xfs_buf_unlock(iclog->ic_bp);
+ iclog = iclog->ic_next;
+ }
+
+ /*
+ * Always need to ensure that the extra buffer does not point to memory
+ * owned by another log buffer before we free it. Also, cycle the lock
+ * first to ensure we've completed IO on it.
*/
- xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size);
+ xfs_buf_lock(log->l_xbuf);
+ xfs_buf_unlock(log->l_xbuf);
+ xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size));
xfs_buf_free(log->l_xbuf);
iclog = log->l_iclog;
- for (i=0; i<log->l_iclog_bufs; i++) {
+ for (i = 0; i < log->l_iclog_bufs; i++) {
xfs_buf_free(iclog->ic_bp);
next_iclog = iclog->ic_next;
kmem_free(iclog);
@@ -1559,10 +1908,11 @@ xlog_dealloc_log(xlog_t *log)
*/
/* ARGSUSED */
static inline void
-xlog_state_finish_copy(xlog_t *log,
- xlog_in_core_t *iclog,
- int record_cnt,
- int copy_bytes)
+xlog_state_finish_copy(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int record_cnt,
+ int copy_bytes)
{
spin_lock(&log->l_icloglock);
@@ -1675,7 +2025,7 @@ xlog_print_tic_res(
for (i = 0; i < ticket->t_res_num; i++) {
uint r_type = ticket->t_res_arr[i].r_type;
- xfs_warn(mp, "region[%u]: %s - %u bytes\n", i,
+ xfs_warn(mp, "region[%u]: %s - %u bytes", i,
((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
"bad-rtype" : res_type_str[r_type-1]),
ticket->t_res_arr[i].r_len);
@@ -1683,7 +2033,7 @@ xlog_print_tic_res(
xfs_alert_tag(mp, XFS_PTAG_LOGRES,
"xlog_write: reservation ran out. Need to up reservation");
- xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
+ xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR);
}
/*
@@ -1705,6 +2055,10 @@ xlog_write_calc_vec_length(
headers++;
for (lv = log_vector; lv; lv = lv->lv_next) {
+ /* we don't write ordered log vectors */
+ if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED)
+ continue;
+
headers += lv->lv_niovecs;
for (i = 0; i < lv->lv_niovecs; i++) {
@@ -1746,7 +2100,7 @@ xlog_write_start_rec(
static xlog_op_header_t *
xlog_write_setup_ophdr(
- struct log *log,
+ struct xlog *log,
struct xlog_op_header *ophdr,
struct xlog_ticket *ticket,
uint flags)
@@ -1782,7 +2136,7 @@ xlog_write_setup_ophdr(
* Set up the parameters of the region copy into the log. This has
* to handle region write split across multiple log buffers - this
* state is kept external to this function so that this code can
- * can be written in an obvious, self documenting manner.
+ * be written in an obvious, self documenting manner.
*/
static int
xlog_write_setup_copy(
@@ -1829,7 +2183,7 @@ xlog_write_setup_copy(
static int
xlog_write_copy_finish(
- struct log *log,
+ struct xlog *log,
struct xlog_in_core *iclog,
uint flags,
int *record_cnt,
@@ -1914,7 +2268,7 @@ xlog_write_copy_finish(
*/
int
xlog_write(
- struct log *log,
+ struct xlog *log,
struct xfs_log_vec *log_vector,
struct xlog_ticket *ticket,
xfs_lsn_t *start_lsn,
@@ -1958,7 +2312,7 @@ xlog_write(
index = 0;
lv = log_vector;
vecp = lv->lv_iovecp;
- while (lv && index < lv->lv_niovecs) {
+ while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
void *ptr;
int log_offset;
@@ -1978,13 +2332,22 @@ xlog_write(
* This loop writes out as many regions as can fit in the amount
* of space which was allocated by xlog_state_get_iclog_space().
*/
- while (lv && index < lv->lv_niovecs) {
- struct xfs_log_iovec *reg = &vecp[index];
+ while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) {
+ struct xfs_log_iovec *reg;
struct xlog_op_header *ophdr;
int start_rec_copy;
int copy_len;
int copy_off;
+ bool ordered = false;
+
+ /* ordered log vectors have no regions to write */
+ if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) {
+ ASSERT(lv->lv_niovecs == 0);
+ ordered = true;
+ goto next_lv;
+ }
+ reg = &vecp[index];
ASSERT(reg->i_len % sizeof(__int32_t) == 0);
ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0);
@@ -2044,12 +2407,13 @@ xlog_write(
break;
if (++index == lv->lv_niovecs) {
+next_lv:
lv = lv->lv_next;
index = 0;
if (lv)
vecp = lv->lv_iovecp;
}
- if (record_cnt == 0) {
+ if (record_cnt == 0 && ordered == false) {
if (!lv)
return 0;
break;
@@ -2085,7 +2449,8 @@ xlog_write(
* State Change: DIRTY -> ACTIVE
*/
STATIC void
-xlog_state_clean_log(xlog_t *log)
+xlog_state_clean_log(
+ struct xlog *log)
{
xlog_in_core_t *iclog;
int changed = 0;
@@ -2165,7 +2530,7 @@ xlog_state_clean_log(xlog_t *log)
STATIC xfs_lsn_t
xlog_get_lowest_lsn(
- xlog_t *log)
+ struct xlog *log)
{
xlog_in_core_t *lsn_log;
xfs_lsn_t lowest_lsn, lsn;
@@ -2188,9 +2553,9 @@ xlog_get_lowest_lsn(
STATIC void
xlog_state_do_callback(
- xlog_t *log,
- int aborted,
- xlog_in_core_t *ciclog)
+ struct xlog *log,
+ int aborted,
+ struct xlog_in_core *ciclog)
{
xlog_in_core_t *iclog;
xlog_in_core_t *first_iclog; /* used to know when we've
@@ -2288,14 +2653,27 @@ xlog_state_do_callback(
/*
- * update the last_sync_lsn before we drop the
+ * Completion of a iclog IO does not imply that
+ * a transaction has completed, as transactions
+ * can be large enough to span many iclogs. We
+ * cannot change the tail of the log half way
+ * through a transaction as this may be the only
+ * transaction in the log and moving th etail to
+ * point to the middle of it will prevent
+ * recovery from finding the start of the
+ * transaction. Hence we should only update the
+ * last_sync_lsn if this iclog contains
+ * transaction completion callbacks on it.
+ *
+ * We have to do this before we drop the
* icloglock to ensure we are the only one that
* can update it.
*/
ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn),
be64_to_cpu(iclog->ic_header.h_lsn)) <= 0);
- atomic64_set(&log->l_last_sync_lsn,
- be64_to_cpu(iclog->ic_header.h_lsn));
+ if (iclog->ic_callback)
+ atomic64_set(&log->l_last_sync_lsn,
+ be64_to_cpu(iclog->ic_header.h_lsn));
} else
ioerrors++;
@@ -2410,7 +2788,7 @@ xlog_state_done_syncing(
xlog_in_core_t *iclog,
int aborted)
{
- xlog_t *log = iclog->ic_log;
+ struct xlog *log = iclog->ic_log;
spin_lock(&log->l_icloglock);
@@ -2464,12 +2842,13 @@ xlog_state_done_syncing(
* is copied.
*/
STATIC int
-xlog_state_get_iclog_space(xlog_t *log,
- int len,
- xlog_in_core_t **iclogp,
- xlog_ticket_t *ticket,
- int *continued_write,
- int *logoffsetp)
+xlog_state_get_iclog_space(
+ struct xlog *log,
+ int len,
+ struct xlog_in_core **iclogp,
+ struct xlog_ticket *ticket,
+ int *continued_write,
+ int *logoffsetp)
{
int log_offset;
xlog_rec_header_t *head;
@@ -2566,119 +2945,6 @@ restart:
return 0;
} /* xlog_state_get_iclog_space */
-/*
- * Atomically get the log space required for a log ticket.
- *
- * Once a ticket gets put onto the reserveq, it will only return after the
- * needed reservation is satisfied.
- *
- * This function is structured so that it has a lock free fast path. This is
- * necessary because every new transaction reservation will come through this
- * path. Hence any lock will be globally hot if we take it unconditionally on
- * every pass.
- *
- * As tickets are only ever moved on and off the reserveq under the
- * l_grant_reserve_lock, we only need to take that lock if we are going to add
- * the ticket to the queue and sleep. We can avoid taking the lock if the ticket
- * was never added to the reserveq because the t_queue list head will be empty
- * and we hold the only reference to it so it can safely be checked unlocked.
- */
-STATIC int
-xlog_grant_log_space(
- struct log *log,
- struct xlog_ticket *tic)
-{
- int free_bytes, need_bytes;
- int error = 0;
-
- ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
-
- trace_xfs_log_grant_enter(log, tic);
-
- /*
- * If there are other waiters on the queue then give them a chance at
- * logspace before us. Wake up the first waiters, if we do not wake
- * up all the waiters then go to sleep waiting for more free space,
- * otherwise try to get some space for this transaction.
- */
- need_bytes = tic->t_unit_res;
- if (tic->t_flags & XFS_LOG_PERM_RESERV)
- need_bytes *= tic->t_ocnt;
- free_bytes = xlog_space_left(log, &log->l_grant_reserve_head);
- if (!list_empty_careful(&log->l_reserveq)) {
- spin_lock(&log->l_grant_reserve_lock);
- if (!xlog_reserveq_wake(log, &free_bytes) ||
- free_bytes < need_bytes)
- error = xlog_reserveq_wait(log, tic, need_bytes);
- spin_unlock(&log->l_grant_reserve_lock);
- } else if (free_bytes < need_bytes) {
- spin_lock(&log->l_grant_reserve_lock);
- error = xlog_reserveq_wait(log, tic, need_bytes);
- spin_unlock(&log->l_grant_reserve_lock);
- }
- if (error)
- return error;
-
- xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes);
- xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
- trace_xfs_log_grant_exit(log, tic);
- xlog_verify_grant_tail(log);
- return 0;
-}
-
-/*
- * Replenish the byte reservation required by moving the grant write head.
- *
- * Similar to xlog_grant_log_space, the function is structured to have a lock
- * free fast path.
- */
-STATIC int
-xlog_regrant_write_log_space(
- struct log *log,
- struct xlog_ticket *tic)
-{
- int free_bytes, need_bytes;
- int error = 0;
-
- tic->t_curr_res = tic->t_unit_res;
- xlog_tic_reset_res(tic);
-
- if (tic->t_cnt > 0)
- return 0;
-
- ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY));
-
- trace_xfs_log_regrant_write_enter(log, tic);
-
- /*
- * If there are other waiters on the queue then give them a chance at
- * logspace before us. Wake up the first waiters, if we do not wake
- * up all the waiters then go to sleep waiting for more free space,
- * otherwise try to get some space for this transaction.
- */
- need_bytes = tic->t_unit_res;
- free_bytes = xlog_space_left(log, &log->l_grant_write_head);
- if (!list_empty_careful(&log->l_writeq)) {
- spin_lock(&log->l_grant_write_lock);
- if (!xlog_writeq_wake(log, &free_bytes) ||
- free_bytes < need_bytes)
- error = xlog_writeq_wait(log, tic, need_bytes);
- spin_unlock(&log->l_grant_write_lock);
- } else if (free_bytes < need_bytes) {
- spin_lock(&log->l_grant_write_lock);
- error = xlog_writeq_wait(log, tic, need_bytes);
- spin_unlock(&log->l_grant_write_lock);
- }
-
- if (error)
- return error;
-
- xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes);
- trace_xfs_log_regrant_write_exit(log, tic);
- xlog_verify_grant_tail(log);
- return 0;
-}
-
/* The first cnt-1 times through here we don't need to
* move the grant write head because the permanent
* reservation has reserved cnt times the unit amount.
@@ -2687,17 +2953,18 @@ xlog_regrant_write_log_space(
* move grant reservation head forward.
*/
STATIC void
-xlog_regrant_reserve_log_space(xlog_t *log,
- xlog_ticket_t *ticket)
+xlog_regrant_reserve_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket)
{
trace_xfs_log_regrant_reserve_enter(log, ticket);
if (ticket->t_cnt > 0)
ticket->t_cnt--;
- xlog_grant_sub_space(log, &log->l_grant_reserve_head,
+ xlog_grant_sub_space(log, &log->l_reserve_head.grant,
ticket->t_curr_res);
- xlog_grant_sub_space(log, &log->l_grant_write_head,
+ xlog_grant_sub_space(log, &log->l_write_head.grant,
ticket->t_curr_res);
ticket->t_curr_res = ticket->t_unit_res;
xlog_tic_reset_res(ticket);
@@ -2708,7 +2975,7 @@ xlog_regrant_reserve_log_space(xlog_t *log,
if (ticket->t_cnt > 0)
return;
- xlog_grant_add_space(log, &log->l_grant_reserve_head,
+ xlog_grant_add_space(log, &log->l_reserve_head.grant,
ticket->t_unit_res);
trace_xfs_log_regrant_reserve_exit(log, ticket);
@@ -2733,8 +3000,9 @@ xlog_regrant_reserve_log_space(xlog_t *log,
* in the current reservation field.
*/
STATIC void
-xlog_ungrant_log_space(xlog_t *log,
- xlog_ticket_t *ticket)
+xlog_ungrant_log_space(
+ struct xlog *log,
+ struct xlog_ticket *ticket)
{
int bytes;
@@ -2754,14 +3022,13 @@ xlog_ungrant_log_space(xlog_t *log,
bytes += ticket->t_unit_res*ticket->t_cnt;
}
- xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes);
- xlog_grant_sub_space(log, &log->l_grant_write_head, bytes);
+ xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes);
+ xlog_grant_sub_space(log, &log->l_write_head.grant, bytes);
trace_xfs_log_ungrant_exit(log, ticket);
- xfs_log_move_tail(log->l_mp, 1);
-} /* xlog_ungrant_log_space */
-
+ xfs_log_space_wake(log->l_mp);
+}
/*
* Flush iclog to disk if this is the last reference to the given iclog and
@@ -2774,8 +3041,8 @@ xlog_ungrant_log_space(xlog_t *log,
*/
STATIC int
xlog_state_release_iclog(
- xlog_t *log,
- xlog_in_core_t *iclog)
+ struct xlog *log,
+ struct xlog_in_core *iclog)
{
int sync = 0; /* do we sync? */
@@ -2825,9 +3092,10 @@ xlog_state_release_iclog(
* that every data block. We have run out of space in this log record.
*/
STATIC void
-xlog_state_switch_iclogs(xlog_t *log,
- xlog_in_core_t *iclog,
- int eventual_size)
+xlog_state_switch_iclogs(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int eventual_size)
{
ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE);
if (!eventual_size)
@@ -2891,7 +3159,7 @@ _xfs_log_force(
uint flags,
int *log_flushed)
{
- struct log *log = mp->m_log;
+ struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
xfs_lsn_t lsn;
@@ -3011,6 +3279,7 @@ xfs_log_force(
{
int error;
+ trace_xfs_log_force(mp, 0);
error = _xfs_log_force(mp, flags, NULL);
if (error)
xfs_warn(mp, "%s: error %d returned.", __func__, error);
@@ -3038,7 +3307,7 @@ _xfs_log_force_lsn(
uint flags,
int *log_flushed)
{
- struct log *log = mp->m_log;
+ struct xlog *log = mp->m_log;
struct xlog_in_core *iclog;
int already_slept = 0;
@@ -3159,6 +3428,7 @@ xfs_log_force_lsn(
{
int error;
+ trace_xfs_log_force(mp, lsn);
error = _xfs_log_force_lsn(mp, lsn, flags, NULL);
if (error)
xfs_warn(mp, "%s: error %d returned.", __func__, error);
@@ -3169,7 +3439,9 @@ xfs_log_force_lsn(
* disk.
*/
STATIC void
-xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog)
+xlog_state_want_sync(
+ struct xlog *log,
+ struct xlog_in_core *iclog)
{
assert_spin_locked(&log->l_icloglock);
@@ -3211,24 +3483,17 @@ xfs_log_ticket_get(
}
/*
- * Allocate and initialise a new log ticket.
+ * Figure out the total log space unit (in bytes) that would be
+ * required for a log ticket.
*/
-xlog_ticket_t *
-xlog_ticket_alloc(
- struct log *log,
- int unit_bytes,
- int cnt,
- char client,
- uint xflags,
- int alloc_flags)
+int
+xfs_log_calc_unit_res(
+ struct xfs_mount *mp,
+ int unit_bytes)
{
- struct xlog_ticket *tic;
- uint num_headers;
- int iclog_space;
-
- tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
- if (!tic)
- return NULL;
+ struct xlog *log = mp->m_log;
+ int iclog_space;
+ uint num_headers;
/*
* Permanent reservations have up to 'cnt'-1 active log operations
@@ -3303,28 +3568,51 @@ xlog_ticket_alloc(
unit_bytes += log->l_iclog_hsize;
/* for roundoff padding for transaction data and one for commit record */
- if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) &&
- log->l_mp->m_sb.sb_logsunit > 1) {
+ if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) {
/* log su roundoff */
- unit_bytes += 2*log->l_mp->m_sb.sb_logsunit;
+ unit_bytes += 2 * mp->m_sb.sb_logsunit;
} else {
/* BB roundoff */
- unit_bytes += 2*BBSIZE;
+ unit_bytes += 2 * BBSIZE;
}
+ return unit_bytes;
+}
+
+/*
+ * Allocate and initialise a new log ticket.
+ */
+struct xlog_ticket *
+xlog_ticket_alloc(
+ struct xlog *log,
+ int unit_bytes,
+ int cnt,
+ char client,
+ bool permanent,
+ xfs_km_flags_t alloc_flags)
+{
+ struct xlog_ticket *tic;
+ int unit_res;
+
+ tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags);
+ if (!tic)
+ return NULL;
+
+ unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes);
+
atomic_set(&tic->t_ref, 1);
+ tic->t_task = current;
INIT_LIST_HEAD(&tic->t_queue);
- tic->t_unit_res = unit_bytes;
- tic->t_curr_res = unit_bytes;
+ tic->t_unit_res = unit_res;
+ tic->t_curr_res = unit_res;
tic->t_cnt = cnt;
tic->t_ocnt = cnt;
- tic->t_tid = random32();
+ tic->t_tid = prandom_u32();
tic->t_clientid = client;
tic->t_flags = XLOG_TIC_INITED;
tic->t_trans_type = 0;
- if (xflags & XFS_LOG_PERM_RESERV)
+ if (permanent)
tic->t_flags |= XLOG_TIC_PERM_RESERV;
- init_waitqueue_head(&tic->t_wait);
xlog_tic_reset_res(tic);
@@ -3346,7 +3634,7 @@ xlog_ticket_alloc(
*/
void
xlog_verify_dest_ptr(
- struct log *log,
+ struct xlog *log,
char *ptr)
{
int i;
@@ -3375,12 +3663,12 @@ xlog_verify_dest_ptr(
*/
STATIC void
xlog_verify_grant_tail(
- struct log *log)
+ struct xlog *log)
{
int tail_cycle, tail_blocks;
int cycle, space;
- xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space);
+ xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space);
xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks);
if (tail_cycle != cycle) {
if (cycle - 1 != tail_cycle &&
@@ -3401,9 +3689,10 @@ xlog_verify_grant_tail(
/* check if it will fit */
STATIC void
-xlog_verify_tail_lsn(xlog_t *log,
- xlog_in_core_t *iclog,
- xfs_lsn_t tail_lsn)
+xlog_verify_tail_lsn(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ xfs_lsn_t tail_lsn)
{
int blocks;
@@ -3440,10 +3729,11 @@ xlog_verify_tail_lsn(xlog_t *log,
* the cycle numbers agree with the current cycle number.
*/
STATIC void
-xlog_verify_iclog(xlog_t *log,
- xlog_in_core_t *iclog,
- int count,
- boolean_t syncing)
+xlog_verify_iclog(
+ struct xlog *log,
+ struct xlog_in_core *iclog,
+ int count,
+ bool syncing)
{
xlog_op_header_t *ophead;
xlog_in_core_t *icptr;
@@ -3458,11 +3748,9 @@ xlog_verify_iclog(xlog_t *log,
/* check validity of iclog pointers */
spin_lock(&log->l_icloglock);
icptr = log->l_iclog;
- for (i=0; i < log->l_iclog_bufs; i++) {
- if (icptr == NULL)
- xfs_emerg(log->l_mp, "%s: invalid ptr", __func__);
- icptr = icptr->ic_next;
- }
+ for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next)
+ ASSERT(icptr);
+
if (icptr != log->l_iclog)
xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__);
spin_unlock(&log->l_icloglock);
@@ -3491,7 +3779,7 @@ xlog_verify_iclog(xlog_t *log,
/* clientid is only 1 byte */
field_offset = (__psint_t)
((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr);
- if (syncing == B_FALSE || (field_offset & 0x1ff)) {
+ if (!syncing || (field_offset & 0x1ff)) {
clientid = ophead->oh_clientid;
} else {
idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap);
@@ -3514,7 +3802,7 @@ xlog_verify_iclog(xlog_t *log,
/* check length */
field_offset = (__psint_t)
((xfs_caddr_t)&(ophead->oh_len) - base_ptr);
- if (syncing == B_FALSE || (field_offset & 0x1ff)) {
+ if (!syncing || (field_offset & 0x1ff)) {
op_len = be32_to_cpu(ophead->oh_len);
} else {
idx = BTOBBT((__psint_t)&ophead->oh_len -
@@ -3537,7 +3825,7 @@ xlog_verify_iclog(xlog_t *log,
*/
STATIC int
xlog_state_ioerror(
- xlog_t *log)
+ struct xlog *log)
{
xlog_in_core_t *iclog, *ic;
@@ -3582,8 +3870,7 @@ xfs_log_force_umount(
struct xfs_mount *mp,
int logerror)
{
- xlog_ticket_t *tic;
- xlog_t *log;
+ struct xlog *log;
int retval;
log = mp->m_log;
@@ -3650,15 +3937,8 @@ xfs_log_force_umount(
* we don't enqueue anything once the SHUTDOWN flag is set, and this
* action is protected by the grant locks.
*/
- spin_lock(&log->l_grant_reserve_lock);
- list_for_each_entry(tic, &log->l_reserveq, t_queue)
- wake_up(&tic->t_wait);
- spin_unlock(&log->l_grant_reserve_lock);
-
- spin_lock(&log->l_grant_write_lock);
- list_for_each_entry(tic, &log->l_writeq, t_queue)
- wake_up(&tic->t_wait);
- spin_unlock(&log->l_grant_write_lock);
+ xlog_grant_head_wake_all(&log->l_reserve_head);
+ xlog_grant_head_wake_all(&log->l_write_head);
if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) {
ASSERT(!logerror);
@@ -3672,11 +3952,14 @@ xfs_log_force_umount(
retval = xlog_state_ioerror(log);
spin_unlock(&log->l_icloglock);
}
+
/*
- * Wake up everybody waiting on xfs_log_force.
- * Callback all log item committed functions as if the
- * log writes were completed.
+ * Wake up everybody waiting on xfs_log_force. Wake the CIL push first
+ * as if the log writes were completed. The abort handling in the log
+ * item committed callback functions will do this again under lock to
+ * avoid races.
*/
+ wake_up_all(&log->l_cilp->xc_commit_wait);
xlog_state_do_callback(log, XFS_LI_ABORTED, NULL);
#ifdef XFSERRORDEBUG
@@ -3697,7 +3980,8 @@ xfs_log_force_umount(
}
STATIC int
-xlog_iclogs_empty(xlog_t *log)
+xlog_iclogs_empty(
+ struct xlog *log)
{
xlog_in_core_t *iclog;
@@ -3712,3 +3996,4 @@ xlog_iclogs_empty(xlog_t *log)
} while (iclog != log->l_iclog);
return 1;
}
+