diff options
Diffstat (limited to 'fs/xfs/xfs_log.c')
| -rw-r--r-- | fs/xfs/xfs_log.c | 1485 |
1 files changed, 885 insertions, 600 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index e2cc3568c29..292308dede6 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -17,75 +17,113 @@ */ #include "xfs.h" #include "xfs_fs.h" -#include "xfs_types.h" -#include "xfs_bit.h" -#include "xfs_log.h" -#include "xfs_inum.h" -#include "xfs_trans.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" #include "xfs_sb.h" #include "xfs_ag.h" #include "xfs_mount.h" #include "xfs_error.h" +#include "xfs_trans.h" +#include "xfs_trans_priv.h" +#include "xfs_log.h" #include "xfs_log_priv.h" -#include "xfs_buf_item.h" -#include "xfs_bmap_btree.h" -#include "xfs_alloc_btree.h" -#include "xfs_ialloc_btree.h" #include "xfs_log_recover.h" -#include "xfs_trans_priv.h" -#include "xfs_dinode.h" #include "xfs_inode.h" -#include "xfs_rw.h" #include "xfs_trace.h" +#include "xfs_fsops.h" +#include "xfs_cksum.h" kmem_zone_t *xfs_log_ticket_zone; /* Local miscellaneous function prototypes */ -STATIC int xlog_commit_record(struct log *log, struct xlog_ticket *ticket, - xlog_in_core_t **, xfs_lsn_t *); -STATIC xlog_t * xlog_alloc_log(xfs_mount_t *mp, - xfs_buftarg_t *log_target, - xfs_daddr_t blk_offset, - int num_bblks); -STATIC int xlog_space_left(struct log *log, atomic64_t *head); -STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog); -STATIC void xlog_dealloc_log(xlog_t *log); +STATIC int +xlog_commit_record( + struct xlog *log, + struct xlog_ticket *ticket, + struct xlog_in_core **iclog, + xfs_lsn_t *commitlsnp); + +STATIC struct xlog * +xlog_alloc_log( + struct xfs_mount *mp, + struct xfs_buftarg *log_target, + xfs_daddr_t blk_offset, + int num_bblks); +STATIC int +xlog_space_left( + struct xlog *log, + atomic64_t *head); +STATIC int +xlog_sync( + struct xlog *log, + struct xlog_in_core *iclog); +STATIC void +xlog_dealloc_log( + struct xlog *log); /* local state machine functions */ STATIC void xlog_state_done_syncing(xlog_in_core_t *iclog, int); -STATIC void xlog_state_do_callback(xlog_t *log,int aborted, xlog_in_core_t *iclog); -STATIC int xlog_state_get_iclog_space(xlog_t *log, - int len, - xlog_in_core_t **iclog, - xlog_ticket_t *ticket, - int *continued_write, - int *logoffsetp); -STATIC int xlog_state_release_iclog(xlog_t *log, - xlog_in_core_t *iclog); -STATIC void xlog_state_switch_iclogs(xlog_t *log, - xlog_in_core_t *iclog, - int eventual_size); -STATIC void xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog); - -/* local functions to manipulate grant head */ -STATIC int xlog_grant_log_space(xlog_t *log, - xlog_ticket_t *xtic); -STATIC void xlog_grant_push_ail(struct log *log, - int need_bytes); -STATIC void xlog_regrant_reserve_log_space(xlog_t *log, - xlog_ticket_t *ticket); -STATIC int xlog_regrant_write_log_space(xlog_t *log, - xlog_ticket_t *ticket); -STATIC void xlog_ungrant_log_space(xlog_t *log, - xlog_ticket_t *ticket); +STATIC void +xlog_state_do_callback( + struct xlog *log, + int aborted, + struct xlog_in_core *iclog); +STATIC int +xlog_state_get_iclog_space( + struct xlog *log, + int len, + struct xlog_in_core **iclog, + struct xlog_ticket *ticket, + int *continued_write, + int *logoffsetp); +STATIC int +xlog_state_release_iclog( + struct xlog *log, + struct xlog_in_core *iclog); +STATIC void +xlog_state_switch_iclogs( + struct xlog *log, + struct xlog_in_core *iclog, + int eventual_size); +STATIC void +xlog_state_want_sync( + struct xlog *log, + struct xlog_in_core *iclog); + +STATIC void +xlog_grant_push_ail( + struct xlog *log, + int need_bytes); +STATIC void +xlog_regrant_reserve_log_space( + struct xlog *log, + struct xlog_ticket *ticket); +STATIC void +xlog_ungrant_log_space( + struct xlog *log, + struct xlog_ticket *ticket); #if defined(DEBUG) -STATIC void xlog_verify_dest_ptr(xlog_t *log, char *ptr); -STATIC void xlog_verify_grant_tail(struct log *log); -STATIC void xlog_verify_iclog(xlog_t *log, xlog_in_core_t *iclog, - int count, boolean_t syncing); -STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, - xfs_lsn_t tail_lsn); +STATIC void +xlog_verify_dest_ptr( + struct xlog *log, + char *ptr); +STATIC void +xlog_verify_grant_tail( + struct xlog *log); +STATIC void +xlog_verify_iclog( + struct xlog *log, + struct xlog_in_core *iclog, + int count, + bool syncing); +STATIC void +xlog_verify_tail_lsn( + struct xlog *log, + struct xlog_in_core *iclog, + xfs_lsn_t tail_lsn); #else #define xlog_verify_dest_ptr(a,b) #define xlog_verify_grant_tail(a) @@ -93,13 +131,15 @@ STATIC void xlog_verify_tail_lsn(xlog_t *log, xlog_in_core_t *iclog, #define xlog_verify_tail_lsn(a,b,c) #endif -STATIC int xlog_iclogs_empty(xlog_t *log); +STATIC int +xlog_iclogs_empty( + struct xlog *log); static void xlog_grant_sub_space( - struct log *log, - atomic64_t *head, - int bytes) + struct xlog *log, + atomic64_t *head, + int bytes) { int64_t head_val = atomic64_read(head); int64_t new, old; @@ -123,9 +163,9 @@ xlog_grant_sub_space( static void xlog_grant_add_space( - struct log *log, - atomic64_t *head, - int bytes) + struct xlog *log, + atomic64_t *head, + int bytes) { int64_t head_val = atomic64_read(head); int64_t new, old; @@ -150,78 +190,94 @@ xlog_grant_add_space( } while (head_val != old); } -STATIC bool -xlog_reserveq_wake( - struct log *log, - int *free_bytes) +STATIC void +xlog_grant_head_init( + struct xlog_grant_head *head) +{ + xlog_assign_grant_head(&head->grant, 1, 0); + INIT_LIST_HEAD(&head->waiters); + spin_lock_init(&head->lock); +} + +STATIC void +xlog_grant_head_wake_all( + struct xlog_grant_head *head) { struct xlog_ticket *tic; - int need_bytes; - list_for_each_entry(tic, &log->l_reserveq, t_queue) { + spin_lock(&head->lock); + list_for_each_entry(tic, &head->waiters, t_queue) + wake_up_process(tic->t_task); + spin_unlock(&head->lock); +} + +static inline int +xlog_ticket_reservation( + struct xlog *log, + struct xlog_grant_head *head, + struct xlog_ticket *tic) +{ + if (head == &log->l_write_head) { + ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); + return tic->t_unit_res; + } else { if (tic->t_flags & XLOG_TIC_PERM_RESERV) - need_bytes = tic->t_unit_res * tic->t_cnt; + return tic->t_unit_res * tic->t_cnt; else - need_bytes = tic->t_unit_res; - - if (*free_bytes < need_bytes) - return false; - *free_bytes -= need_bytes; - - trace_xfs_log_grant_wake_up(log, tic); - wake_up(&tic->t_wait); + return tic->t_unit_res; } - - return true; } STATIC bool -xlog_writeq_wake( - struct log *log, +xlog_grant_head_wake( + struct xlog *log, + struct xlog_grant_head *head, int *free_bytes) { struct xlog_ticket *tic; int need_bytes; - list_for_each_entry(tic, &log->l_writeq, t_queue) { - ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); - - need_bytes = tic->t_unit_res; - + list_for_each_entry(tic, &head->waiters, t_queue) { + need_bytes = xlog_ticket_reservation(log, head, tic); if (*free_bytes < need_bytes) return false; - *free_bytes -= need_bytes; - trace_xfs_log_regrant_write_wake_up(log, tic); - wake_up(&tic->t_wait); + *free_bytes -= need_bytes; + trace_xfs_log_grant_wake_up(log, tic); + wake_up_process(tic->t_task); } return true; } STATIC int -xlog_reserveq_wait( - struct log *log, +xlog_grant_head_wait( + struct xlog *log, + struct xlog_grant_head *head, struct xlog_ticket *tic, - int need_bytes) + int need_bytes) __releases(&head->lock) + __acquires(&head->lock) { - list_add_tail(&tic->t_queue, &log->l_reserveq); + list_add_tail(&tic->t_queue, &head->waiters); do { if (XLOG_FORCED_SHUTDOWN(log)) goto shutdown; xlog_grant_push_ail(log, need_bytes); + __set_current_state(TASK_UNINTERRUPTIBLE); + spin_unlock(&head->lock); + XFS_STATS_INC(xs_sleep_logspace); - trace_xfs_log_grant_sleep(log, tic); - xlog_wait(&tic->t_wait, &log->l_grant_reserve_lock); + trace_xfs_log_grant_sleep(log, tic); + schedule(); trace_xfs_log_grant_wake(log, tic); - spin_lock(&log->l_grant_reserve_lock); + spin_lock(&head->lock); if (XLOG_FORCED_SHUTDOWN(log)) goto shutdown; - } while (xlog_space_left(log, &log->l_grant_reserve_head) < need_bytes); + } while (xlog_space_left(log, &head->grant) < need_bytes); list_del_init(&tic->t_queue); return 0; @@ -230,35 +286,58 @@ shutdown: return XFS_ERROR(EIO); } +/* + * Atomically get the log space required for a log ticket. + * + * Once a ticket gets put onto head->waiters, it will only return after the + * needed reservation is satisfied. + * + * This function is structured so that it has a lock free fast path. This is + * necessary because every new transaction reservation will come through this + * path. Hence any lock will be globally hot if we take it unconditionally on + * every pass. + * + * As tickets are only ever moved on and off head->waiters under head->lock, we + * only need to take that lock if we are going to add the ticket to the queue + * and sleep. We can avoid taking the lock if the ticket was never added to + * head->waiters because the t_queue list head will be empty and we hold the + * only reference to it so it can safely be checked unlocked. + */ STATIC int -xlog_writeq_wait( - struct log *log, +xlog_grant_head_check( + struct xlog *log, + struct xlog_grant_head *head, struct xlog_ticket *tic, - int need_bytes) + int *need_bytes) { - list_add_tail(&tic->t_queue, &log->l_writeq); - - do { - if (XLOG_FORCED_SHUTDOWN(log)) - goto shutdown; - xlog_grant_push_ail(log, need_bytes); - - XFS_STATS_INC(xs_sleep_logspace); - trace_xfs_log_regrant_write_sleep(log, tic); + int free_bytes; + int error = 0; - xlog_wait(&tic->t_wait, &log->l_grant_write_lock); - trace_xfs_log_regrant_write_wake(log, tic); + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); - spin_lock(&log->l_grant_write_lock); - if (XLOG_FORCED_SHUTDOWN(log)) - goto shutdown; - } while (xlog_space_left(log, &log->l_grant_write_head) < need_bytes); + /* + * If there are other waiters on the queue then give them a chance at + * logspace before us. Wake up the first waiters, if we do not wake + * up all the waiters then go to sleep waiting for more free space, + * otherwise try to get some space for this transaction. + */ + *need_bytes = xlog_ticket_reservation(log, head, tic); + free_bytes = xlog_space_left(log, &head->grant); + if (!list_empty_careful(&head->waiters)) { + spin_lock(&head->lock); + if (!xlog_grant_head_wake(log, head, &free_bytes) || + free_bytes < *need_bytes) { + error = xlog_grant_head_wait(log, head, tic, + *need_bytes); + } + spin_unlock(&head->lock); + } else if (free_bytes < *need_bytes) { + spin_lock(&head->lock); + error = xlog_grant_head_wait(log, head, tic, *need_bytes); + spin_unlock(&head->lock); + } - list_del_init(&tic->t_queue); - return 0; -shutdown: - list_del_init(&tic->t_queue); - return XFS_ERROR(EIO); + return error; } static void @@ -286,6 +365,129 @@ xlog_tic_add_region(xlog_ticket_t *tic, uint len, uint type) } /* + * Replenish the byte reservation required by moving the grant write head. + */ +int +xfs_log_regrant( + struct xfs_mount *mp, + struct xlog_ticket *tic) +{ + struct xlog *log = mp->m_log; + int need_bytes; + int error = 0; + + if (XLOG_FORCED_SHUTDOWN(log)) + return XFS_ERROR(EIO); + + XFS_STATS_INC(xs_try_logspace); + + /* + * This is a new transaction on the ticket, so we need to change the + * transaction ID so that the next transaction has a different TID in + * the log. Just add one to the existing tid so that we can see chains + * of rolling transactions in the log easily. + */ + tic->t_tid++; + + xlog_grant_push_ail(log, tic->t_unit_res); + + tic->t_curr_res = tic->t_unit_res; + xlog_tic_reset_res(tic); + + if (tic->t_cnt > 0) + return 0; + + trace_xfs_log_regrant(log, tic); + + error = xlog_grant_head_check(log, &log->l_write_head, tic, + &need_bytes); + if (error) + goto out_error; + + xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); + trace_xfs_log_regrant_exit(log, tic); + xlog_verify_grant_tail(log); + return 0; + +out_error: + /* + * If we are failing, make sure the ticket doesn't have any current + * reservations. We don't want to add this back when the ticket/ + * transaction gets cancelled. + */ + tic->t_curr_res = 0; + tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ + return error; +} + +/* + * Reserve log space and return a ticket corresponding the reservation. + * + * Each reservation is going to reserve extra space for a log record header. + * When writes happen to the on-disk log, we don't subtract the length of the + * log record header from any reservation. By wasting space in each + * reservation, we prevent over allocation problems. + */ +int +xfs_log_reserve( + struct xfs_mount *mp, + int unit_bytes, + int cnt, + struct xlog_ticket **ticp, + __uint8_t client, + bool permanent, + uint t_type) +{ + struct xlog *log = mp->m_log; + struct xlog_ticket *tic; + int need_bytes; + int error = 0; + + ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); + + if (XLOG_FORCED_SHUTDOWN(log)) + return XFS_ERROR(EIO); + + XFS_STATS_INC(xs_try_logspace); + + ASSERT(*ticp == NULL); + tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, + KM_SLEEP | KM_MAYFAIL); + if (!tic) + return XFS_ERROR(ENOMEM); + + tic->t_trans_type = t_type; + *ticp = tic; + + xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt + : tic->t_unit_res); + + trace_xfs_log_reserve(log, tic); + + error = xlog_grant_head_check(log, &log->l_reserve_head, tic, + &need_bytes); + if (error) + goto out_error; + + xlog_grant_add_space(log, &log->l_reserve_head.grant, need_bytes); + xlog_grant_add_space(log, &log->l_write_head.grant, need_bytes); + trace_xfs_log_reserve_exit(log, tic); + xlog_verify_grant_tail(log); + return 0; + +out_error: + /* + * If we are failing, make sure the ticket doesn't have any current + * reservations. We don't want to add this back when the ticket/ + * transaction gets cancelled. + */ + tic->t_curr_res = 0; + tic->t_cnt = 0; /* ungrant will give back unit_res * t_cnt. */ + return error; +} + + +/* * NOTES: * * 1. currblock field gets updated at startup and after in-core logs @@ -313,7 +515,7 @@ xfs_log_done( struct xlog_in_core **iclog, uint flags) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; xfs_lsn_t lsn = 0; if (XLOG_FORCED_SHUTDOWN(log) || @@ -395,88 +597,6 @@ xfs_log_release_iclog( } /* - * 1. Reserve an amount of on-disk log space and return a ticket corresponding - * to the reservation. - * 2. Potentially, push buffers at tail of log to disk. - * - * Each reservation is going to reserve extra space for a log record header. - * When writes happen to the on-disk log, we don't subtract the length of the - * log record header from any reservation. By wasting space in each - * reservation, we prevent over allocation problems. - */ -int -xfs_log_reserve( - struct xfs_mount *mp, - int unit_bytes, - int cnt, - struct xlog_ticket **ticket, - __uint8_t client, - uint flags, - uint t_type) -{ - struct log *log = mp->m_log; - struct xlog_ticket *internal_ticket; - int retval = 0; - - ASSERT(client == XFS_TRANSACTION || client == XFS_LOG); - - if (XLOG_FORCED_SHUTDOWN(log)) - return XFS_ERROR(EIO); - - XFS_STATS_INC(xs_try_logspace); - - - if (*ticket != NULL) { - ASSERT(flags & XFS_LOG_PERM_RESERV); - internal_ticket = *ticket; - - /* - * this is a new transaction on the ticket, so we need to - * change the transaction ID so that the next transaction has a - * different TID in the log. Just add one to the existing tid - * so that we can see chains of rolling transactions in the log - * easily. - */ - internal_ticket->t_tid++; - - trace_xfs_log_reserve(log, internal_ticket); - - xlog_grant_push_ail(log, internal_ticket->t_unit_res); - retval = xlog_regrant_write_log_space(log, internal_ticket); - } else { - /* may sleep if need to allocate more tickets */ - internal_ticket = xlog_ticket_alloc(log, unit_bytes, cnt, - client, flags, - KM_SLEEP|KM_MAYFAIL); - if (!internal_ticket) - return XFS_ERROR(ENOMEM); - internal_ticket->t_trans_type = t_type; - *ticket = internal_ticket; - - trace_xfs_log_reserve(log, internal_ticket); - - xlog_grant_push_ail(log, - (internal_ticket->t_unit_res * - internal_ticket->t_cnt)); - retval = xlog_grant_log_space(log, internal_ticket); - } - - if (unlikely(retval)) { - /* - * If we are failing, make sure the ticket doesn't have any - * current reservations. We don't want to add this back - * when the ticket/ transaction gets cancelled. - */ - internal_ticket->t_curr_res = 0; - /* ungrant will give back unit_res * t_cnt. */ - internal_ticket->t_cnt = 0; - } - - return retval; -} - - -/* * Mount a log filesystem * * mp - ubiquitous xfs mount point structure @@ -493,13 +613,16 @@ xfs_log_mount( xfs_daddr_t blk_offset, int num_bblks) { - int error; + int error = 0; + int min_logfsbs; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) - xfs_notice(mp, "Mounting Filesystem"); - else { + if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { + xfs_notice(mp, "Mounting V%d Filesystem", + XFS_SB_VERSION_NUM(&mp->m_sb)); + } else { xfs_notice(mp, -"Mounting filesystem in no-recovery mode. Filesystem will be inconsistent."); +"Mounting V%d filesystem in no-recovery mode. Filesystem will be inconsistent.", + XFS_SB_VERSION_NUM(&mp->m_sb)); ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } @@ -510,6 +633,50 @@ xfs_log_mount( } /* + * Validate the given log space and drop a critical message via syslog + * if the log size is too small that would lead to some unexpected + * situations in transaction log space reservation stage. + * + * Note: we can't just reject the mount if the validation fails. This + * would mean that people would have to downgrade their kernel just to + * remedy the situation as there is no way to grow the log (short of + * black magic surgery with xfs_db). + * + * We can, however, reject mounts for CRC format filesystems, as the + * mkfs binary being used to make the filesystem should never create a + * filesystem with a log that is too small. + */ + min_logfsbs = xfs_log_calc_minimum_size(mp); + + if (mp->m_sb.sb_logblocks < min_logfsbs) { + xfs_warn(mp, + "Log size %d blocks too small, minimum size is %d blocks", + mp->m_sb.sb_logblocks, min_logfsbs); + error = EINVAL; + } else if (mp->m_sb.sb_logblocks > XFS_MAX_LOG_BLOCKS) { + xfs_warn(mp, + "Log size %d blocks too large, maximum size is %lld blocks", + mp->m_sb.sb_logblocks, XFS_MAX_LOG_BLOCKS); + error = EINVAL; + } else if (XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks) > XFS_MAX_LOG_BYTES) { + xfs_warn(mp, + "log size %lld bytes too large, maximum size is %lld bytes", + XFS_FSB_TO_B(mp, mp->m_sb.sb_logblocks), + XFS_MAX_LOG_BYTES); + error = EINVAL; + } + if (error) { + if (xfs_sb_version_hascrc(&mp->m_sb)) { + xfs_crit(mp, "AAIEEE! Log failed size checks. Abort!"); + ASSERT(0); + goto out_free_log; + } + xfs_crit(mp, +"Log size out of supported range. Continuing onwards, but if log hangs are\n" +"experienced then please report this message in the bug report."); + } + + /* * Initialize the AIL now we have a log. */ error = xfs_trans_ail_init(mp); @@ -561,25 +728,29 @@ out: } /* - * Finish the recovery of the file system. This is separate from - * the xfs_log_mount() call, because it depends on the code in - * xfs_mountfs() to read in the root and real-time bitmap inodes - * between calling xfs_log_mount() and here. + * Finish the recovery of the file system. This is separate from the + * xfs_log_mount() call, because it depends on the code in xfs_mountfs() to read + * in the root and real-time bitmap inodes between calling xfs_log_mount() and + * here. * - * mp - ubiquitous xfs mount point structure + * If we finish recovery successfully, start the background log work. If we are + * not doing recovery, then we have a RO filesystem and we don't need to start + * it. */ int xfs_log_mount_finish(xfs_mount_t *mp) { - int error; + int error = 0; - if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) + if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { error = xlog_recover_finish(mp->m_log); - else { - error = 0; + if (!error) + xfs_log_work_queue(mp); + } else { ASSERT(mp->m_flags & XFS_MOUNT_RDONLY); } + return error; } @@ -595,14 +766,14 @@ xfs_log_mount_finish(xfs_mount_t *mp) * Unmount record used to have a string "Unmount filesystem--" in the * data section where the "Un" was really a magic number (XLOG_UNMOUNT_TYPE). * We just write the magic number now since that particular field isn't - * currently architecture converted and "nUmount" is a bit foo. + * currently architecture converted and "Unmount" is a bit foo. * As far as I know, there weren't any dependencies on the old behaviour. */ int xfs_log_unmount_write(xfs_mount_t *mp) { - xlog_t *log = mp->m_log; + struct xlog *log = mp->m_log; xlog_in_core_t *iclog; #ifdef DEBUG xlog_in_core_t *first_iclog; @@ -653,8 +824,9 @@ xfs_log_unmount_write(xfs_mount_t *mp) .lv_iovecp = ®, }; - /* remove inited flag */ + /* remove inited flag, and account for space used */ tic->t_flags = 0; + tic->t_curr_res -= sizeof(magic); error = xlog_write(log, &vec, tic, &lsn, NULL, XLOG_UNMOUNT_TRANS); /* @@ -731,14 +903,49 @@ xfs_log_unmount_write(xfs_mount_t *mp) } /* xfs_log_unmount_write */ /* - * Deallocate log structures for unmount/relocation. + * Empty the log for unmount/freeze. + * + * To do this, we first need to shut down the background log work so it is not + * trying to cover the log as we clean up. We then need to unpin all objects in + * the log so we can then flush them out. Once they have completed their IO and + * run the callbacks removing themselves from the AIL, we can write the unmount + * record. + */ +void +xfs_log_quiesce( + struct xfs_mount *mp) +{ + cancel_delayed_work_sync(&mp->m_log->l_work); + xfs_log_force(mp, XFS_LOG_SYNC); + + /* + * The superblock buffer is uncached and while xfs_ail_push_all_sync() + * will push it, xfs_wait_buftarg() will not wait for it. Further, + * xfs_buf_iowait() cannot be used because it was pushed with the + * XBF_ASYNC flag set, so we need to use a lock/unlock pair to wait for + * the IO to complete. + */ + xfs_ail_push_all_sync(mp->m_ail); + xfs_wait_buftarg(mp->m_ddev_targp); + xfs_buf_lock(mp->m_sb_bp); + xfs_buf_unlock(mp->m_sb_bp); + + xfs_log_unmount_write(mp); +} + +/* + * Shut down and release the AIL and Log. * - * We need to stop the aild from running before we destroy - * and deallocate the log as the aild references the log. + * During unmount, we need to ensure we flush all the dirty metadata objects + * from the AIL so that the log is empty before we write the unmount record to + * the log. Once this is done, we can tear down the AIL and the log. */ void -xfs_log_unmount(xfs_mount_t *mp) +xfs_log_unmount( + struct xfs_mount *mp) { + xfs_log_quiesce(mp); + xfs_trans_ail_destroy(mp); xlog_dealloc_log(mp->m_log); } @@ -760,89 +967,67 @@ xfs_log_item_init( INIT_LIST_HEAD(&item->li_cil); } +/* + * Wake up processes waiting for log space after we have moved the log tail. + */ void -xfs_log_move_tail(xfs_mount_t *mp, - xfs_lsn_t tail_lsn) +xfs_log_space_wake( + struct xfs_mount *mp) { - xlog_ticket_t *tic; - xlog_t *log = mp->m_log; - int need_bytes, free_bytes; + struct xlog *log = mp->m_log; + int free_bytes; if (XLOG_FORCED_SHUTDOWN(log)) return; - if (tail_lsn == 0) - tail_lsn = atomic64_read(&log->l_last_sync_lsn); - - /* tail_lsn == 1 implies that we weren't passed a valid value. */ - if (tail_lsn != 1) - atomic64_set(&log->l_tail_lsn, tail_lsn); - - if (!list_empty_careful(&log->l_writeq)) { -#ifdef DEBUG - if (log->l_flags & XLOG_ACTIVE_RECOVERY) - panic("Recovery problem"); -#endif - spin_lock(&log->l_grant_write_lock); - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - list_for_each_entry(tic, &log->l_writeq, t_queue) { - ASSERT(tic->t_flags & XLOG_TIC_PERM_RESERV); + if (!list_empty_careful(&log->l_write_head.waiters)) { + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); - if (free_bytes < tic->t_unit_res && tail_lsn != 1) - break; - tail_lsn = 0; - free_bytes -= tic->t_unit_res; - trace_xfs_log_regrant_write_wake_up(log, tic); - wake_up(&tic->t_wait); - } - spin_unlock(&log->l_grant_write_lock); + spin_lock(&log->l_write_head.lock); + free_bytes = xlog_space_left(log, &log->l_write_head.grant); + xlog_grant_head_wake(log, &log->l_write_head, &free_bytes); + spin_unlock(&log->l_write_head.lock); } - if (!list_empty_careful(&log->l_reserveq)) { -#ifdef DEBUG - if (log->l_flags & XLOG_ACTIVE_RECOVERY) - panic("Recovery problem"); -#endif - spin_lock(&log->l_grant_reserve_lock); - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); - list_for_each_entry(tic, &log->l_reserveq, t_queue) { - if (tic->t_flags & XLOG_TIC_PERM_RESERV) - need_bytes = tic->t_unit_res*tic->t_cnt; - else - need_bytes = tic->t_unit_res; - if (free_bytes < need_bytes && tail_lsn != 1) - break; - tail_lsn = 0; - free_bytes -= need_bytes; - trace_xfs_log_grant_wake_up(log, tic); - wake_up(&tic->t_wait); - } - spin_unlock(&log->l_grant_reserve_lock); + if (!list_empty_careful(&log->l_reserve_head.waiters)) { + ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); + + spin_lock(&log->l_reserve_head.lock); + free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); + xlog_grant_head_wake(log, &log->l_reserve_head, &free_bytes); + spin_unlock(&log->l_reserve_head.lock); } } /* - * Determine if we have a transaction that has gone to disk - * that needs to be covered. To begin the transition to the idle state - * firstly the log needs to be idle (no AIL and nothing in the iclogs). - * If we are then in a state where covering is needed, the caller is informed - * that dummy transactions are required to move the log into the idle state. + * Determine if we have a transaction that has gone to disk that needs to be + * covered. To begin the transition to the idle state firstly the log needs to + * be idle. That means the CIL, the AIL and the iclogs needs to be empty before + * we start attempting to cover the log. + * + * Only if we are then in a state where covering is needed, the caller is + * informed that dummy transactions are required to move the log into the idle + * state. * - * Because this is called as part of the sync process, we should also indicate - * that dummy transactions should be issued in anything but the covered or - * idle states. This ensures that the log tail is accurately reflected in - * the log at the end of the sync, hence if a crash occurrs avoids replay - * of transactions where the metadata is already on disk. + * If there are any items in the AIl or CIL, then we do not want to attempt to + * cover the log as we may be in a situation where there isn't log space + * available to run a dummy transaction and this can lead to deadlocks when the + * tail of the log is pinned by an item that is modified in the CIL. Hence + * there's no point in running a dummy transaction at this point because we + * can't start trying to idle the log until both the CIL and AIL are empty. */ int xfs_log_need_covered(xfs_mount_t *mp) { + struct xlog *log = mp->m_log; int needed = 0; - xlog_t *log = mp->m_log; if (!xfs_fs_writable(mp)) return 0; + if (!xlog_cil_empty(log)) + return 0; + spin_lock(&log->l_icloglock); switch (log->l_covered_state) { case XLOG_STATE_COVER_DONE: @@ -851,14 +1036,17 @@ xfs_log_need_covered(xfs_mount_t *mp) break; case XLOG_STATE_COVER_NEED: case XLOG_STATE_COVER_NEED2: - if (!xfs_ail_min_lsn(log->l_ailp) && - xlog_iclogs_empty(log)) { - if (log->l_covered_state == XLOG_STATE_COVER_NEED) - log->l_covered_state = XLOG_STATE_COVER_DONE; - else - log->l_covered_state = XLOG_STATE_COVER_DONE2; - } - /* FALLTHRU */ + if (xfs_ail_min_lsn(log->l_ailp)) + break; + if (!xlog_iclogs_empty(log)) + break; + + needed = 1; + if (log->l_covered_state == XLOG_STATE_COVER_NEED) + log->l_covered_state = XLOG_STATE_COVER_DONE; + else + log->l_covered_state = XLOG_STATE_COVER_DONE2; + break; default: needed = 1; break; @@ -867,38 +1055,47 @@ xfs_log_need_covered(xfs_mount_t *mp) return needed; } -/****************************************************************************** - * - * local routines - * - ****************************************************************************** - */ - -/* xfs_trans_tail_ail returns 0 when there is nothing in the list. - * The log manager must keep track of the last LR which was committed - * to disk. The lsn of this LR will become the new tail_lsn whenever - * xfs_trans_tail_ail returns 0. If we don't do this, we run into - * the situation where stuff could be written into the log but nothing - * was ever in the AIL when asked. Eventually, we panic since the - * tail hits the head. - * +/* * We may be holding the log iclog lock upon entering this routine. */ xfs_lsn_t -xlog_assign_tail_lsn( +xlog_assign_tail_lsn_locked( struct xfs_mount *mp) { + struct xlog *log = mp->m_log; + struct xfs_log_item *lip; xfs_lsn_t tail_lsn; - struct log *log = mp->m_log; - tail_lsn = xfs_ail_min_lsn(mp->m_ail); - if (!tail_lsn) - tail_lsn = atomic64_read(&log->l_last_sync_lsn); + assert_spin_locked(&mp->m_ail->xa_lock); + /* + * To make sure we always have a valid LSN for the log tail we keep + * track of the last LSN which was committed in log->l_last_sync_lsn, + * and use that when the AIL was empty. + */ + lip = xfs_ail_min(mp->m_ail); + if (lip) + tail_lsn = lip->li_lsn; + else + tail_lsn = atomic64_read(&log->l_last_sync_lsn); + trace_xfs_log_assign_tail_lsn(log, tail_lsn); atomic64_set(&log->l_tail_lsn, tail_lsn); return tail_lsn; } +xfs_lsn_t +xlog_assign_tail_lsn( + struct xfs_mount *mp) +{ + xfs_lsn_t tail_lsn; + + spin_lock(&mp->m_ail->xa_lock); + tail_lsn = xlog_assign_tail_lsn_locked(mp); + spin_unlock(&mp->m_ail->xa_lock); + + return tail_lsn; +} + /* * Return the space in the log between the tail and the head. The head * is passed in the cycle/bytes formal parms. In the special case where @@ -915,7 +1112,7 @@ xlog_assign_tail_lsn( */ STATIC int xlog_space_left( - struct log *log, + struct xlog *log, atomic64_t *head) { int free_bytes; @@ -961,14 +1158,14 @@ xlog_space_left( void xlog_iodone(xfs_buf_t *bp) { - xlog_in_core_t *iclog = bp->b_fspriv; - xlog_t *l = iclog->ic_log; - int aborted = 0; + struct xlog_in_core *iclog = bp->b_fspriv; + struct xlog *l = iclog->ic_log; + int aborted = 0; /* * Race to shutdown the filesystem if we see an error. */ - if (XFS_TEST_ERROR((xfs_buf_geterror(bp)), l->l_mp, + if (XFS_TEST_ERROR(bp->b_error, l->l_mp, XFS_ERRTAG_IODONE_IOERR, XFS_RANDOM_IODONE_IOERR)) { xfs_buf_ioerror_alert(bp, __func__); xfs_buf_stale(bp); @@ -986,13 +1183,15 @@ xlog_iodone(xfs_buf_t *bp) /* log I/O is always issued ASYNC */ ASSERT(XFS_BUF_ISASYNC(bp)); xlog_state_done_syncing(iclog, aborted); + /* - * do not reference the buffer (bp) here as we could race - * with it being freed after writing the unmount record to the - * log. + * drop the buffer lock now that we are done. Nothing references + * the buffer after this, so an unmount waiting on this lock can now + * tear it down safely. As such, it is unsafe to reference the buffer + * (bp) after the unlock as we could race with it being freed. */ - -} /* xlog_iodone */ + xfs_buf_unlock(bp); +} /* * Return size of each in-core log record buffer. @@ -1004,8 +1203,9 @@ xlog_iodone(xfs_buf_t *bp) */ STATIC void -xlog_get_iclog_buffer_size(xfs_mount_t *mp, - xlog_t *log) +xlog_get_iclog_buffer_size( + struct xfs_mount *mp, + struct xlog *log) { int size; int xhdrs; @@ -1061,18 +1261,53 @@ done: } /* xlog_get_iclog_buffer_size */ +void +xfs_log_work_queue( + struct xfs_mount *mp) +{ + queue_delayed_work(mp->m_log_workqueue, &mp->m_log->l_work, + msecs_to_jiffies(xfs_syncd_centisecs * 10)); +} + +/* + * Every sync period we need to unpin all items in the AIL and push them to + * disk. If there is nothing dirty, then we might need to cover the log to + * indicate that the filesystem is idle. + */ +void +xfs_log_worker( + struct work_struct *work) +{ + struct xlog *log = container_of(to_delayed_work(work), + struct xlog, l_work); + struct xfs_mount *mp = log->l_mp; + + /* dgc: errors ignored - not fatal and nowhere to report them */ + if (xfs_log_need_covered(mp)) + xfs_fs_log_dummy(mp); + else + xfs_log_force(mp, 0); + + /* start pushing all the metadata that is currently dirty */ + xfs_ail_push_all(mp->m_ail); + + /* queue us up again */ + xfs_log_work_queue(mp); +} + /* * This routine initializes some of the log structure for a given mount point. * Its primary purpose is to fill in enough, so recovery can occur. However, * some other stuff may be filled in too. */ -STATIC xlog_t * -xlog_alloc_log(xfs_mount_t *mp, - xfs_buftarg_t *log_target, - xfs_daddr_t blk_offset, - int num_bblks) +STATIC struct xlog * +xlog_alloc_log( + struct xfs_mount *mp, + struct xfs_buftarg *log_target, + xfs_daddr_t blk_offset, + int num_bblks) { - xlog_t *log; + struct xlog *log; xlog_rec_header_t *head; xlog_in_core_t **iclogp; xlog_in_core_t *iclog, *prev_iclog=NULL; @@ -1081,7 +1316,7 @@ xlog_alloc_log(xfs_mount_t *mp, int error = ENOMEM; uint log2_size = 0; - log = kmem_zalloc(sizeof(xlog_t), KM_MAYFAIL); + log = kmem_zalloc(sizeof(struct xlog), KM_MAYFAIL); if (!log) { xfs_warn(mp, "Log allocation failed: No memory!"); goto out; @@ -1094,18 +1329,16 @@ xlog_alloc_log(xfs_mount_t *mp, log->l_logBBsize = num_bblks; log->l_covered_state = XLOG_STATE_COVER_IDLE; log->l_flags |= XLOG_ACTIVE_RECOVERY; + INIT_DELAYED_WORK(&log->l_work, xfs_log_worker); log->l_prev_block = -1; /* log->l_tail_lsn = 0x100000000LL; cycle = 1; current block = 0 */ xlog_assign_atomic_lsn(&log->l_tail_lsn, 1, 0); xlog_assign_atomic_lsn(&log->l_last_sync_lsn, 1, 0); log->l_curr_cycle = 1; /* 0 is bad since this is initial value */ - xlog_assign_grant_head(&log->l_grant_reserve_head, 1, 0); - xlog_assign_grant_head(&log->l_grant_write_head, 1, 0); - INIT_LIST_HEAD(&log->l_reserveq); - INIT_LIST_HEAD(&log->l_writeq); - spin_lock_init(&log->l_grant_reserve_lock); - spin_lock_init(&log->l_grant_write_lock); + + xlog_grant_head_init(&log->l_reserve_head); + xlog_grant_head_init(&log->l_write_head); error = EFSCORRUPTED; if (xfs_sb_version_hassector(&mp->m_sb)) { @@ -1137,19 +1370,24 @@ xlog_alloc_log(xfs_mount_t *mp, xlog_get_iclog_buffer_size(mp, log); error = ENOMEM; - bp = xfs_buf_alloc(mp->m_logdev_targp, 0, log->l_iclog_size, 0); + bp = xfs_buf_alloc(mp->m_logdev_targp, 0, BTOBB(log->l_iclog_size), 0); if (!bp) goto out_free_log; - bp->b_iodone = xlog_iodone; + + /* + * The iclogbuf buffer locks are held over IO but we are not going to do + * IO yet. Hence unlock the buffer so that the log IO path can grab it + * when appropriately. + */ ASSERT(xfs_buf_islocked(bp)); + xfs_buf_unlock(bp); + + bp->b_iodone = xlog_iodone; log->l_xbuf = bp; spin_lock_init(&log->l_icloglock); init_waitqueue_head(&log->l_flush_wait); - /* log record size must be multiple of BBSIZE; see xlog_rec_header_t */ - ASSERT((XFS_BUF_SIZE(bp) & BBMASK) == 0); - iclogp = &log->l_iclog; /* * The amount of memory to allocate for the iclog structure is @@ -1169,10 +1407,13 @@ xlog_alloc_log(xfs_mount_t *mp, prev_iclog = iclog; bp = xfs_buf_get_uncached(mp->m_logdev_targp, - log->l_iclog_size, 0); + BTOBB(log->l_iclog_size), 0); if (!bp) goto out_free_iclog; + ASSERT(xfs_buf_islocked(bp)); + xfs_buf_unlock(bp); + bp->b_iodone = xlog_iodone; iclog->ic_bp = bp; iclog->ic_data = bp->b_addr; @@ -1189,7 +1430,7 @@ xlog_alloc_log(xfs_mount_t *mp, head->h_fmt = cpu_to_be32(XLOG_FMT); memcpy(&head->h_fs_uuid, &mp->m_sb.sb_uuid, sizeof(uuid_t)); - iclog->ic_size = XFS_BUF_SIZE(bp) - log->l_iclog_hsize; + iclog->ic_size = BBTOB(bp->b_length) - log->l_iclog_hsize; iclog->ic_state = XLOG_STATE_ACTIVE; iclog->ic_log = log; atomic_set(&iclog->ic_refcnt, 0); @@ -1197,7 +1438,6 @@ xlog_alloc_log(xfs_mount_t *mp, iclog->ic_callback_tail = &(iclog->ic_callback); iclog->ic_datap = (char *)iclog->ic_data + log->l_iclog_hsize; - ASSERT(xfs_buf_islocked(iclog->ic_bp)); init_waitqueue_head(&iclog->ic_force_wait); init_waitqueue_head(&iclog->ic_write_wait); @@ -1233,7 +1473,7 @@ out: */ STATIC int xlog_commit_record( - struct log *log, + struct xlog *log, struct xlog_ticket *ticket, struct xlog_in_core **iclog, xfs_lsn_t *commitlsnp) @@ -1267,7 +1507,7 @@ xlog_commit_record( */ STATIC void xlog_grant_push_ail( - struct log *log, + struct xlog *log, int need_bytes) { xfs_lsn_t threshold_lsn = 0; @@ -1280,7 +1520,7 @@ xlog_grant_push_ail( ASSERT(BTOBB(need_bytes) < log->l_logBBsize); - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); + free_bytes = xlog_space_left(log, &log->l_reserve_head.grant); free_blocks = BTOBBT(free_bytes); /* @@ -1322,12 +1562,96 @@ xlog_grant_push_ail( } /* + * Stamp cycle number in every block + */ +STATIC void +xlog_pack_data( + struct xlog *log, + struct xlog_in_core *iclog, + int roundoff) +{ + int i, j, k; + int size = iclog->ic_offset + roundoff; + __be32 cycle_lsn; + xfs_caddr_t dp; + + cycle_lsn = CYCLE_LSN_DISK(iclog->ic_header.h_lsn); + + dp = iclog->ic_datap; + for (i = 0; i < BTOBB(size); i++) { + if (i >= (XLOG_HEADER_CYCLE_SIZE / BBSIZE)) + break; + iclog->ic_header.h_cycle_data[i] = *(__be32 *)dp; + *(__be32 *)dp = cycle_lsn; + dp += BBSIZE; + } + + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + xlog_in_core_2_t *xhdr = iclog->ic_data; + + for ( ; i < BTOBB(size); i++) { + j = i / (XLOG_HEADER_CYCLE_SIZE / BBSIZE); + k = i % (XLOG_HEADER_CYCLE_SIZE / BBSIZE); + xhdr[j].hic_xheader.xh_cycle_data[k] = *(__be32 *)dp; + *(__be32 *)dp = cycle_lsn; + dp += BBSIZE; + } + + for (i = 1; i < log->l_iclog_heads; i++) + xhdr[i].hic_xheader.xh_cycle = cycle_lsn; + } +} + +/* + * Calculate the checksum for a log buffer. + * + * This is a little more complicated than it should be because the various + * headers and the actual data are non-contiguous. + */ +__le32 +xlog_cksum( + struct xlog *log, + struct xlog_rec_header *rhead, + char *dp, + int size) +{ + __uint32_t crc; + + /* first generate the crc for the record header ... */ + crc = xfs_start_cksum((char *)rhead, + sizeof(struct xlog_rec_header), + offsetof(struct xlog_rec_header, h_crc)); + + /* ... then for additional cycle data for v2 logs ... */ + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + union xlog_in_core2 *xhdr = (union xlog_in_core2 *)rhead; + int i; + + for (i = 1; i < log->l_iclog_heads; i++) { + crc = crc32c(crc, &xhdr[i].hic_xheader, + sizeof(struct xlog_rec_ext_header)); + } + } + + /* ... and finally for the payload */ + crc = crc32c(crc, dp, size); + + return xfs_end_cksum(crc); +} + +/* * The bdstrat callback function for log bufs. This gives us a central * place to trap bufs in case we get hit by a log I/O error and need to * shutdown. Actually, in practice, even when we didn't get a log error, * we transition the iclogs to IOERROR state *after* flushing all existing * iclogs to disk. This is because we don't want anymore new transactions to be * started or completed afterwards. + * + * We lock the iclogbufs here so that we can serialise against IO completion + * during unmount. We might be processing a shutdown triggered during unmount, + * and that can occur asynchronously to the unmount thread, and hence we need to + * ensure that completes before tearing down the iclogbufs. Hence we need to + * hold the buffer lock across the log IO to acheive that. */ STATIC int xlog_bdstrat( @@ -1335,6 +1659,7 @@ xlog_bdstrat( { struct xlog_in_core *iclog = bp->b_fspriv; + xfs_buf_lock(bp); if (iclog->ic_state & XLOG_STATE_IOERROR) { xfs_buf_ioerror(bp, EIO); xfs_buf_stale(bp); @@ -1342,7 +1667,8 @@ xlog_bdstrat( /* * It would seem logical to return EIO here, but we rely on * the log state machine to propagate I/O errors instead of - * doing it here. + * doing it here. Similarly, IO completion will unlock the + * buffer, so we don't do it here. */ return 0; } @@ -1377,10 +1703,10 @@ xlog_bdstrat( */ STATIC int -xlog_sync(xlog_t *log, - xlog_in_core_t *iclog) +xlog_sync( + struct xlog *log, + struct xlog_in_core *iclog) { - xfs_caddr_t dptr; /* pointer to byte sized element */ xfs_buf_t *bp; int i; uint count; /* byte count of bwrite */ @@ -1389,6 +1715,7 @@ xlog_sync(xlog_t *log, int split = 0; /* split write into two regions */ int error; int v2 = xfs_sb_version_haslogv2(&log->l_mp->m_sb); + int size; XFS_STATS_INC(xs_log_writes); ASSERT(atomic_read(&iclog->ic_refcnt) == 0); @@ -1412,20 +1739,17 @@ xlog_sync(xlog_t *log, roundoff < BBTOB(1))); /* move grant heads by roundoff in sync */ - xlog_grant_add_space(log, &log->l_grant_reserve_head, roundoff); - xlog_grant_add_space(log, &log->l_grant_write_head, roundoff); + xlog_grant_add_space(log, &log->l_reserve_head.grant, roundoff); + xlog_grant_add_space(log, &log->l_write_head.grant, roundoff); /* put cycle number in every block */ xlog_pack_data(log, iclog, roundoff); /* real byte length */ - if (v2) { - iclog->ic_header.h_len = - cpu_to_be32(iclog->ic_offset + roundoff); - } else { - iclog->ic_header.h_len = - cpu_to_be32(iclog->ic_offset); - } + size = iclog->ic_offset; + if (v2) + size += roundoff; + iclog->ic_header.h_len = cpu_to_be32(size); bp = iclog->ic_bp; XFS_BUF_SET_ADDR(bp, BLOCK_LSN(be64_to_cpu(iclog->ic_header.h_lsn))); @@ -1434,13 +1758,37 @@ xlog_sync(xlog_t *log, /* Do we need to split this write into 2 parts? */ if (XFS_BUF_ADDR(bp) + BTOBB(count) > log->l_logBBsize) { + char *dptr; + split = count - (BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp))); count = BBTOB(log->l_logBBsize - XFS_BUF_ADDR(bp)); - iclog->ic_bwritecnt = 2; /* split into 2 writes */ + iclog->ic_bwritecnt = 2; + + /* + * Bump the cycle numbers at the start of each block in the + * part of the iclog that ends up in the buffer that gets + * written to the start of the log. + * + * Watch out for the header magic number case, though. + */ + dptr = (char *)&iclog->ic_header + count; + for (i = 0; i < split; i += BBSIZE) { + __uint32_t cycle = be32_to_cpu(*(__be32 *)dptr); + if (++cycle == XLOG_HEADER_MAGIC_NUM) + cycle++; + *(__be32 *)dptr = cpu_to_be32(cycle); + + dptr += BBSIZE; + } } else { iclog->ic_bwritecnt = 1; } - XFS_BUF_SET_COUNT(bp, count); + + /* calculcate the checksum */ + iclog->ic_header.h_crc = xlog_cksum(log, &iclog->ic_header, + iclog->ic_datap, size); + + bp->b_io_length = BTOBB(count); bp->b_fspriv = iclog; XFS_BUF_ZEROFLAGS(bp); XFS_BUF_ASYNC(bp); @@ -1467,7 +1815,7 @@ xlog_sync(xlog_t *log, ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); - xlog_verify_iclog(log, iclog, count, B_TRUE); + xlog_verify_iclog(log, iclog, count, true); /* account for log which doesn't start at block #0 */ XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart); @@ -1493,19 +1841,6 @@ xlog_sync(xlog_t *log, bp->b_flags |= XBF_SYNCIO; if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) bp->b_flags |= XBF_FUA; - dptr = bp->b_addr; - /* - * Bump the cycle numbers at the start of each block - * since this part of the buffer is at the start of - * a new cycle. Watch out for the header magic number - * case, though. - */ - for (i = 0; i < split; i += BBSIZE) { - be32_add_cpu((__be32 *)dptr, 1); - if (be32_to_cpu(*(__be32 *)dptr) == XLOG_HEADER_MAGIC_NUM) - be32_add_cpu((__be32 *)dptr, 1); - dptr += BBSIZE; - } ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1); ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize); @@ -1522,12 +1857,12 @@ xlog_sync(xlog_t *log, return 0; } /* xlog_sync */ - /* * Deallocate a log structure */ STATIC void -xlog_dealloc_log(xlog_t *log) +xlog_dealloc_log( + struct xlog *log) { xlog_in_core_t *iclog, *next_iclog; int i; @@ -1535,14 +1870,28 @@ xlog_dealloc_log(xlog_t *log) xlog_cil_destroy(log); /* - * always need to ensure that the extra buffer does not point to memory - * owned by another log buffer before we free it. + * Cycle all the iclogbuf locks to make sure all log IO completion + * is done before we tear down these buffers. + */ + iclog = log->l_iclog; + for (i = 0; i < log->l_iclog_bufs; i++) { + xfs_buf_lock(iclog->ic_bp); + xfs_buf_unlock(iclog->ic_bp); + iclog = iclog->ic_next; + } + + /* + * Always need to ensure that the extra buffer does not point to memory + * owned by another log buffer before we free it. Also, cycle the lock + * first to ensure we've completed IO on it. */ - xfs_buf_set_empty(log->l_xbuf, log->l_iclog_size); + xfs_buf_lock(log->l_xbuf); + xfs_buf_unlock(log->l_xbuf); + xfs_buf_set_empty(log->l_xbuf, BTOBB(log->l_iclog_size)); xfs_buf_free(log->l_xbuf); iclog = log->l_iclog; - for (i=0; i<log->l_iclog_bufs; i++) { + for (i = 0; i < log->l_iclog_bufs; i++) { xfs_buf_free(iclog->ic_bp); next_iclog = iclog->ic_next; kmem_free(iclog); @@ -1559,10 +1908,11 @@ xlog_dealloc_log(xlog_t *log) */ /* ARGSUSED */ static inline void -xlog_state_finish_copy(xlog_t *log, - xlog_in_core_t *iclog, - int record_cnt, - int copy_bytes) +xlog_state_finish_copy( + struct xlog *log, + struct xlog_in_core *iclog, + int record_cnt, + int copy_bytes) { spin_lock(&log->l_icloglock); @@ -1675,7 +2025,7 @@ xlog_print_tic_res( for (i = 0; i < ticket->t_res_num; i++) { uint r_type = ticket->t_res_arr[i].r_type; - xfs_warn(mp, "region[%u]: %s - %u bytes\n", i, + xfs_warn(mp, "region[%u]: %s - %u bytes", i, ((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ? "bad-rtype" : res_type_str[r_type-1]), ticket->t_res_arr[i].r_len); @@ -1683,7 +2033,7 @@ xlog_print_tic_res( xfs_alert_tag(mp, XFS_PTAG_LOGRES, "xlog_write: reservation ran out. Need to up reservation"); - xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); + xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); } /* @@ -1705,6 +2055,10 @@ xlog_write_calc_vec_length( headers++; for (lv = log_vector; lv; lv = lv->lv_next) { + /* we don't write ordered log vectors */ + if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) + continue; + headers += lv->lv_niovecs; for (i = 0; i < lv->lv_niovecs; i++) { @@ -1746,7 +2100,7 @@ xlog_write_start_rec( static xlog_op_header_t * xlog_write_setup_ophdr( - struct log *log, + struct xlog *log, struct xlog_op_header *ophdr, struct xlog_ticket *ticket, uint flags) @@ -1782,7 +2136,7 @@ xlog_write_setup_ophdr( * Set up the parameters of the region copy into the log. This has * to handle region write split across multiple log buffers - this * state is kept external to this function so that this code can - * can be written in an obvious, self documenting manner. + * be written in an obvious, self documenting manner. */ static int xlog_write_setup_copy( @@ -1829,7 +2183,7 @@ xlog_write_setup_copy( static int xlog_write_copy_finish( - struct log *log, + struct xlog *log, struct xlog_in_core *iclog, uint flags, int *record_cnt, @@ -1914,7 +2268,7 @@ xlog_write_copy_finish( */ int xlog_write( - struct log *log, + struct xlog *log, struct xfs_log_vec *log_vector, struct xlog_ticket *ticket, xfs_lsn_t *start_lsn, @@ -1958,7 +2312,7 @@ xlog_write( index = 0; lv = log_vector; vecp = lv->lv_iovecp; - while (lv && index < lv->lv_niovecs) { + while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { void *ptr; int log_offset; @@ -1978,13 +2332,22 @@ xlog_write( * This loop writes out as many regions as can fit in the amount * of space which was allocated by xlog_state_get_iclog_space(). */ - while (lv && index < lv->lv_niovecs) { - struct xfs_log_iovec *reg = &vecp[index]; + while (lv && (!lv->lv_niovecs || index < lv->lv_niovecs)) { + struct xfs_log_iovec *reg; struct xlog_op_header *ophdr; int start_rec_copy; int copy_len; int copy_off; + bool ordered = false; + + /* ordered log vectors have no regions to write */ + if (lv->lv_buf_len == XFS_LOG_VEC_ORDERED) { + ASSERT(lv->lv_niovecs == 0); + ordered = true; + goto next_lv; + } + reg = &vecp[index]; ASSERT(reg->i_len % sizeof(__int32_t) == 0); ASSERT((unsigned long)ptr % sizeof(__int32_t) == 0); @@ -2044,12 +2407,13 @@ xlog_write( break; if (++index == lv->lv_niovecs) { +next_lv: lv = lv->lv_next; index = 0; if (lv) vecp = lv->lv_iovecp; } - if (record_cnt == 0) { + if (record_cnt == 0 && ordered == false) { if (!lv) return 0; break; @@ -2085,7 +2449,8 @@ xlog_write( * State Change: DIRTY -> ACTIVE */ STATIC void -xlog_state_clean_log(xlog_t *log) +xlog_state_clean_log( + struct xlog *log) { xlog_in_core_t *iclog; int changed = 0; @@ -2165,7 +2530,7 @@ xlog_state_clean_log(xlog_t *log) STATIC xfs_lsn_t xlog_get_lowest_lsn( - xlog_t *log) + struct xlog *log) { xlog_in_core_t *lsn_log; xfs_lsn_t lowest_lsn, lsn; @@ -2188,9 +2553,9 @@ xlog_get_lowest_lsn( STATIC void xlog_state_do_callback( - xlog_t *log, - int aborted, - xlog_in_core_t *ciclog) + struct xlog *log, + int aborted, + struct xlog_in_core *ciclog) { xlog_in_core_t *iclog; xlog_in_core_t *first_iclog; /* used to know when we've @@ -2288,14 +2653,27 @@ xlog_state_do_callback( /* - * update the last_sync_lsn before we drop the + * Completion of a iclog IO does not imply that + * a transaction has completed, as transactions + * can be large enough to span many iclogs. We + * cannot change the tail of the log half way + * through a transaction as this may be the only + * transaction in the log and moving th etail to + * point to the middle of it will prevent + * recovery from finding the start of the + * transaction. Hence we should only update the + * last_sync_lsn if this iclog contains + * transaction completion callbacks on it. + * + * We have to do this before we drop the * icloglock to ensure we are the only one that * can update it. */ ASSERT(XFS_LSN_CMP(atomic64_read(&log->l_last_sync_lsn), be64_to_cpu(iclog->ic_header.h_lsn)) <= 0); - atomic64_set(&log->l_last_sync_lsn, - be64_to_cpu(iclog->ic_header.h_lsn)); + if (iclog->ic_callback) + atomic64_set(&log->l_last_sync_lsn, + be64_to_cpu(iclog->ic_header.h_lsn)); } else ioerrors++; @@ -2410,7 +2788,7 @@ xlog_state_done_syncing( xlog_in_core_t *iclog, int aborted) { - xlog_t *log = iclog->ic_log; + struct xlog *log = iclog->ic_log; spin_lock(&log->l_icloglock); @@ -2464,12 +2842,13 @@ xlog_state_done_syncing( * is copied. */ STATIC int -xlog_state_get_iclog_space(xlog_t *log, - int len, - xlog_in_core_t **iclogp, - xlog_ticket_t *ticket, - int *continued_write, - int *logoffsetp) +xlog_state_get_iclog_space( + struct xlog *log, + int len, + struct xlog_in_core **iclogp, + struct xlog_ticket *ticket, + int *continued_write, + int *logoffsetp) { int log_offset; xlog_rec_header_t *head; @@ -2566,119 +2945,6 @@ restart: return 0; } /* xlog_state_get_iclog_space */ -/* - * Atomically get the log space required for a log ticket. - * - * Once a ticket gets put onto the reserveq, it will only return after the - * needed reservation is satisfied. - * - * This function is structured so that it has a lock free fast path. This is - * necessary because every new transaction reservation will come through this - * path. Hence any lock will be globally hot if we take it unconditionally on - * every pass. - * - * As tickets are only ever moved on and off the reserveq under the - * l_grant_reserve_lock, we only need to take that lock if we are going to add - * the ticket to the queue and sleep. We can avoid taking the lock if the ticket - * was never added to the reserveq because the t_queue list head will be empty - * and we hold the only reference to it so it can safely be checked unlocked. - */ -STATIC int -xlog_grant_log_space( - struct log *log, - struct xlog_ticket *tic) -{ - int free_bytes, need_bytes; - int error = 0; - - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); - - trace_xfs_log_grant_enter(log, tic); - - /* - * If there are other waiters on the queue then give them a chance at - * logspace before us. Wake up the first waiters, if we do not wake - * up all the waiters then go to sleep waiting for more free space, - * otherwise try to get some space for this transaction. - */ - need_bytes = tic->t_unit_res; - if (tic->t_flags & XFS_LOG_PERM_RESERV) - need_bytes *= tic->t_ocnt; - free_bytes = xlog_space_left(log, &log->l_grant_reserve_head); - if (!list_empty_careful(&log->l_reserveq)) { - spin_lock(&log->l_grant_reserve_lock); - if (!xlog_reserveq_wake(log, &free_bytes) || - free_bytes < need_bytes) - error = xlog_reserveq_wait(log, tic, need_bytes); - spin_unlock(&log->l_grant_reserve_lock); - } else if (free_bytes < need_bytes) { - spin_lock(&log->l_grant_reserve_lock); - error = xlog_reserveq_wait(log, tic, need_bytes); - spin_unlock(&log->l_grant_reserve_lock); - } - if (error) - return error; - - xlog_grant_add_space(log, &log->l_grant_reserve_head, need_bytes); - xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); - trace_xfs_log_grant_exit(log, tic); - xlog_verify_grant_tail(log); - return 0; -} - -/* - * Replenish the byte reservation required by moving the grant write head. - * - * Similar to xlog_grant_log_space, the function is structured to have a lock - * free fast path. - */ -STATIC int -xlog_regrant_write_log_space( - struct log *log, - struct xlog_ticket *tic) -{ - int free_bytes, need_bytes; - int error = 0; - - tic->t_curr_res = tic->t_unit_res; - xlog_tic_reset_res(tic); - - if (tic->t_cnt > 0) - return 0; - - ASSERT(!(log->l_flags & XLOG_ACTIVE_RECOVERY)); - - trace_xfs_log_regrant_write_enter(log, tic); - - /* - * If there are other waiters on the queue then give them a chance at - * logspace before us. Wake up the first waiters, if we do not wake - * up all the waiters then go to sleep waiting for more free space, - * otherwise try to get some space for this transaction. - */ - need_bytes = tic->t_unit_res; - free_bytes = xlog_space_left(log, &log->l_grant_write_head); - if (!list_empty_careful(&log->l_writeq)) { - spin_lock(&log->l_grant_write_lock); - if (!xlog_writeq_wake(log, &free_bytes) || - free_bytes < need_bytes) - error = xlog_writeq_wait(log, tic, need_bytes); - spin_unlock(&log->l_grant_write_lock); - } else if (free_bytes < need_bytes) { - spin_lock(&log->l_grant_write_lock); - error = xlog_writeq_wait(log, tic, need_bytes); - spin_unlock(&log->l_grant_write_lock); - } - - if (error) - return error; - - xlog_grant_add_space(log, &log->l_grant_write_head, need_bytes); - trace_xfs_log_regrant_write_exit(log, tic); - xlog_verify_grant_tail(log); - return 0; -} - /* The first cnt-1 times through here we don't need to * move the grant write head because the permanent * reservation has reserved cnt times the unit amount. @@ -2687,17 +2953,18 @@ xlog_regrant_write_log_space( * move grant reservation head forward. */ STATIC void -xlog_regrant_reserve_log_space(xlog_t *log, - xlog_ticket_t *ticket) +xlog_regrant_reserve_log_space( + struct xlog *log, + struct xlog_ticket *ticket) { trace_xfs_log_regrant_reserve_enter(log, ticket); if (ticket->t_cnt > 0) ticket->t_cnt--; - xlog_grant_sub_space(log, &log->l_grant_reserve_head, + xlog_grant_sub_space(log, &log->l_reserve_head.grant, ticket->t_curr_res); - xlog_grant_sub_space(log, &log->l_grant_write_head, + xlog_grant_sub_space(log, &log->l_write_head.grant, ticket->t_curr_res); ticket->t_curr_res = ticket->t_unit_res; xlog_tic_reset_res(ticket); @@ -2708,7 +2975,7 @@ xlog_regrant_reserve_log_space(xlog_t *log, if (ticket->t_cnt > 0) return; - xlog_grant_add_space(log, &log->l_grant_reserve_head, + xlog_grant_add_space(log, &log->l_reserve_head.grant, ticket->t_unit_res); trace_xfs_log_regrant_reserve_exit(log, ticket); @@ -2733,8 +3000,9 @@ xlog_regrant_reserve_log_space(xlog_t *log, * in the current reservation field. */ STATIC void -xlog_ungrant_log_space(xlog_t *log, - xlog_ticket_t *ticket) +xlog_ungrant_log_space( + struct xlog *log, + struct xlog_ticket *ticket) { int bytes; @@ -2754,14 +3022,13 @@ xlog_ungrant_log_space(xlog_t *log, bytes += ticket->t_unit_res*ticket->t_cnt; } - xlog_grant_sub_space(log, &log->l_grant_reserve_head, bytes); - xlog_grant_sub_space(log, &log->l_grant_write_head, bytes); + xlog_grant_sub_space(log, &log->l_reserve_head.grant, bytes); + xlog_grant_sub_space(log, &log->l_write_head.grant, bytes); trace_xfs_log_ungrant_exit(log, ticket); - xfs_log_move_tail(log->l_mp, 1); -} /* xlog_ungrant_log_space */ - + xfs_log_space_wake(log->l_mp); +} /* * Flush iclog to disk if this is the last reference to the given iclog and @@ -2774,8 +3041,8 @@ xlog_ungrant_log_space(xlog_t *log, */ STATIC int xlog_state_release_iclog( - xlog_t *log, - xlog_in_core_t *iclog) + struct xlog *log, + struct xlog_in_core *iclog) { int sync = 0; /* do we sync? */ @@ -2825,9 +3092,10 @@ xlog_state_release_iclog( * that every data block. We have run out of space in this log record. */ STATIC void -xlog_state_switch_iclogs(xlog_t *log, - xlog_in_core_t *iclog, - int eventual_size) +xlog_state_switch_iclogs( + struct xlog *log, + struct xlog_in_core *iclog, + int eventual_size) { ASSERT(iclog->ic_state == XLOG_STATE_ACTIVE); if (!eventual_size) @@ -2891,7 +3159,7 @@ _xfs_log_force( uint flags, int *log_flushed) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; struct xlog_in_core *iclog; xfs_lsn_t lsn; @@ -3011,6 +3279,7 @@ xfs_log_force( { int error; + trace_xfs_log_force(mp, 0); error = _xfs_log_force(mp, flags, NULL); if (error) xfs_warn(mp, "%s: error %d returned.", __func__, error); @@ -3038,7 +3307,7 @@ _xfs_log_force_lsn( uint flags, int *log_flushed) { - struct log *log = mp->m_log; + struct xlog *log = mp->m_log; struct xlog_in_core *iclog; int already_slept = 0; @@ -3159,6 +3428,7 @@ xfs_log_force_lsn( { int error; + trace_xfs_log_force(mp, lsn); error = _xfs_log_force_lsn(mp, lsn, flags, NULL); if (error) xfs_warn(mp, "%s: error %d returned.", __func__, error); @@ -3169,7 +3439,9 @@ xfs_log_force_lsn( * disk. */ STATIC void -xlog_state_want_sync(xlog_t *log, xlog_in_core_t *iclog) +xlog_state_want_sync( + struct xlog *log, + struct xlog_in_core *iclog) { assert_spin_locked(&log->l_icloglock); @@ -3211,24 +3483,17 @@ xfs_log_ticket_get( } /* - * Allocate and initialise a new log ticket. + * Figure out the total log space unit (in bytes) that would be + * required for a log ticket. */ -xlog_ticket_t * -xlog_ticket_alloc( - struct log *log, - int unit_bytes, - int cnt, - char client, - uint xflags, - int alloc_flags) +int +xfs_log_calc_unit_res( + struct xfs_mount *mp, + int unit_bytes) { - struct xlog_ticket *tic; - uint num_headers; - int iclog_space; - - tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags); - if (!tic) - return NULL; + struct xlog *log = mp->m_log; + int iclog_space; + uint num_headers; /* * Permanent reservations have up to 'cnt'-1 active log operations @@ -3303,28 +3568,51 @@ xlog_ticket_alloc( unit_bytes += log->l_iclog_hsize; /* for roundoff padding for transaction data and one for commit record */ - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb) && - log->l_mp->m_sb.sb_logsunit > 1) { + if (xfs_sb_version_haslogv2(&mp->m_sb) && mp->m_sb.sb_logsunit > 1) { /* log su roundoff */ - unit_bytes += 2*log->l_mp->m_sb.sb_logsunit; + unit_bytes += 2 * mp->m_sb.sb_logsunit; } else { /* BB roundoff */ - unit_bytes += 2*BBSIZE; + unit_bytes += 2 * BBSIZE; } + return unit_bytes; +} + +/* + * Allocate and initialise a new log ticket. + */ +struct xlog_ticket * +xlog_ticket_alloc( + struct xlog *log, + int unit_bytes, + int cnt, + char client, + bool permanent, + xfs_km_flags_t alloc_flags) +{ + struct xlog_ticket *tic; + int unit_res; + + tic = kmem_zone_zalloc(xfs_log_ticket_zone, alloc_flags); + if (!tic) + return NULL; + + unit_res = xfs_log_calc_unit_res(log->l_mp, unit_bytes); + atomic_set(&tic->t_ref, 1); + tic->t_task = current; INIT_LIST_HEAD(&tic->t_queue); - tic->t_unit_res = unit_bytes; - tic->t_curr_res = unit_bytes; + tic->t_unit_res = unit_res; + tic->t_curr_res = unit_res; tic->t_cnt = cnt; tic->t_ocnt = cnt; - tic->t_tid = random32(); + tic->t_tid = prandom_u32(); tic->t_clientid = client; tic->t_flags = XLOG_TIC_INITED; tic->t_trans_type = 0; - if (xflags & XFS_LOG_PERM_RESERV) + if (permanent) tic->t_flags |= XLOG_TIC_PERM_RESERV; - init_waitqueue_head(&tic->t_wait); xlog_tic_reset_res(tic); @@ -3346,7 +3634,7 @@ xlog_ticket_alloc( */ void xlog_verify_dest_ptr( - struct log *log, + struct xlog *log, char *ptr) { int i; @@ -3375,12 +3663,12 @@ xlog_verify_dest_ptr( */ STATIC void xlog_verify_grant_tail( - struct log *log) + struct xlog *log) { int tail_cycle, tail_blocks; int cycle, space; - xlog_crack_grant_head(&log->l_grant_write_head, &cycle, &space); + xlog_crack_grant_head(&log->l_write_head.grant, &cycle, &space); xlog_crack_atomic_lsn(&log->l_tail_lsn, &tail_cycle, &tail_blocks); if (tail_cycle != cycle) { if (cycle - 1 != tail_cycle && @@ -3401,9 +3689,10 @@ xlog_verify_grant_tail( /* check if it will fit */ STATIC void -xlog_verify_tail_lsn(xlog_t *log, - xlog_in_core_t *iclog, - xfs_lsn_t tail_lsn) +xlog_verify_tail_lsn( + struct xlog *log, + struct xlog_in_core *iclog, + xfs_lsn_t tail_lsn) { int blocks; @@ -3440,10 +3729,11 @@ xlog_verify_tail_lsn(xlog_t *log, * the cycle numbers agree with the current cycle number. */ STATIC void -xlog_verify_iclog(xlog_t *log, - xlog_in_core_t *iclog, - int count, - boolean_t syncing) +xlog_verify_iclog( + struct xlog *log, + struct xlog_in_core *iclog, + int count, + bool syncing) { xlog_op_header_t *ophead; xlog_in_core_t *icptr; @@ -3458,11 +3748,9 @@ xlog_verify_iclog(xlog_t *log, /* check validity of iclog pointers */ spin_lock(&log->l_icloglock); icptr = log->l_iclog; - for (i=0; i < log->l_iclog_bufs; i++) { - if (icptr == NULL) - xfs_emerg(log->l_mp, "%s: invalid ptr", __func__); - icptr = icptr->ic_next; - } + for (i = 0; i < log->l_iclog_bufs; i++, icptr = icptr->ic_next) + ASSERT(icptr); + if (icptr != log->l_iclog) xfs_emerg(log->l_mp, "%s: corrupt iclog ring", __func__); spin_unlock(&log->l_icloglock); @@ -3491,7 +3779,7 @@ xlog_verify_iclog(xlog_t *log, /* clientid is only 1 byte */ field_offset = (__psint_t) ((xfs_caddr_t)&(ophead->oh_clientid) - base_ptr); - if (syncing == B_FALSE || (field_offset & 0x1ff)) { + if (!syncing || (field_offset & 0x1ff)) { clientid = ophead->oh_clientid; } else { idx = BTOBBT((xfs_caddr_t)&(ophead->oh_clientid) - iclog->ic_datap); @@ -3514,7 +3802,7 @@ xlog_verify_iclog(xlog_t *log, /* check length */ field_offset = (__psint_t) ((xfs_caddr_t)&(ophead->oh_len) - base_ptr); - if (syncing == B_FALSE || (field_offset & 0x1ff)) { + if (!syncing || (field_offset & 0x1ff)) { op_len = be32_to_cpu(ophead->oh_len); } else { idx = BTOBBT((__psint_t)&ophead->oh_len - @@ -3537,7 +3825,7 @@ xlog_verify_iclog(xlog_t *log, */ STATIC int xlog_state_ioerror( - xlog_t *log) + struct xlog *log) { xlog_in_core_t *iclog, *ic; @@ -3582,8 +3870,7 @@ xfs_log_force_umount( struct xfs_mount *mp, int logerror) { - xlog_ticket_t *tic; - xlog_t *log; + struct xlog *log; int retval; log = mp->m_log; @@ -3650,15 +3937,8 @@ xfs_log_force_umount( * we don't enqueue anything once the SHUTDOWN flag is set, and this * action is protected by the grant locks. */ - spin_lock(&log->l_grant_reserve_lock); - list_for_each_entry(tic, &log->l_reserveq, t_queue) - wake_up(&tic->t_wait); - spin_unlock(&log->l_grant_reserve_lock); - - spin_lock(&log->l_grant_write_lock); - list_for_each_entry(tic, &log->l_writeq, t_queue) - wake_up(&tic->t_wait); - spin_unlock(&log->l_grant_write_lock); + xlog_grant_head_wake_all(&log->l_reserve_head); + xlog_grant_head_wake_all(&log->l_write_head); if (!(log->l_iclog->ic_state & XLOG_STATE_IOERROR)) { ASSERT(!logerror); @@ -3672,11 +3952,14 @@ xfs_log_force_umount( retval = xlog_state_ioerror(log); spin_unlock(&log->l_icloglock); } + /* - * Wake up everybody waiting on xfs_log_force. - * Callback all log item committed functions as if the - * log writes were completed. + * Wake up everybody waiting on xfs_log_force. Wake the CIL push first + * as if the log writes were completed. The abort handling in the log + * item committed callback functions will do this again under lock to + * avoid races. */ + wake_up_all(&log->l_cilp->xc_commit_wait); xlog_state_do_callback(log, XFS_LI_ABORTED, NULL); #ifdef XFSERRORDEBUG @@ -3697,7 +3980,8 @@ xfs_log_force_umount( } STATIC int -xlog_iclogs_empty(xlog_t *log) +xlog_iclogs_empty( + struct xlog *log) { xlog_in_core_t *iclog; @@ -3712,3 +3996,4 @@ xlog_iclogs_empty(xlog_t *log) } while (iclog != log->l_iclog); return 1; } + |
