diff options
Diffstat (limited to 'fs/ocfs2/journal.c')
| -rw-r--r-- | fs/ocfs2/journal.c | 352 |
1 files changed, 176 insertions, 176 deletions
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index bf34c491ae9..4b0c68849b3 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -30,8 +30,8 @@ #include <linux/kthread.h> #include <linux/time.h> #include <linux/random.h> +#include <linux/delay.h> -#define MLOG_MASK_PREFIX ML_JOURNAL #include <cluster/masklog.h> #include "ocfs2.h" @@ -52,6 +52,7 @@ #include "quota.h" #include "buffer_head_io.h" +#include "ocfs2_trace.h" DEFINE_SPINLOCK(trans_inc_lock); @@ -301,19 +302,17 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) { int status = 0; unsigned int flushed; - unsigned long old_id; struct ocfs2_journal *journal = NULL; - mlog_entry_void(); - journal = osb->journal; /* Flush all pending commits and checkpoint the journal. */ down_write(&journal->j_trans_barrier); - if (atomic_read(&journal->j_num_trans) == 0) { + flushed = atomic_read(&journal->j_num_trans); + trace_ocfs2_commit_cache_begin(flushed); + if (flushed == 0) { up_write(&journal->j_trans_barrier); - mlog(0, "No transactions for me to flush!\n"); goto finally; } @@ -326,25 +325,20 @@ static int ocfs2_commit_cache(struct ocfs2_super *osb) goto finally; } - old_id = ocfs2_inc_trans_id(journal); + ocfs2_inc_trans_id(journal); flushed = atomic_read(&journal->j_num_trans); atomic_set(&journal->j_num_trans, 0); up_write(&journal->j_trans_barrier); - mlog(0, "commit_thread: flushed transaction %lu (%u handles)\n", - journal->j_trans_id, flushed); + trace_ocfs2_commit_cache_end(journal->j_trans_id, flushed); ocfs2_wake_downconvert_thread(osb); wake_up(&journal->j_checkpointed); finally: - mlog_exit(status); return status; } -/* pass it NULL and it will allocate a new handle object for you. If - * you pass it a handle however, it may still return error, in which - * case it has free'd the passed handle for you. */ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) { journal_t *journal = osb->journal->j_journal; @@ -362,11 +356,14 @@ handle_t *ocfs2_start_trans(struct ocfs2_super *osb, int max_buffs) if (journal_current_handle()) return jbd2_journal_start(journal, max_buffs); + sb_start_intwrite(osb->sb); + down_read(&osb->journal->j_trans_barrier); handle = jbd2_journal_start(journal, max_buffs); if (IS_ERR(handle)) { up_read(&osb->journal->j_trans_barrier); + sb_end_intwrite(osb->sb); mlog_errno(PTR_ERR(handle)); @@ -395,16 +392,16 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, if (ret < 0) mlog_errno(ret); - if (!nested) + if (!nested) { up_read(&journal->j_trans_barrier); + sb_end_intwrite(osb->sb); + } return ret; } /* - * 'nblocks' is what you want to add to the current - * transaction. extend_trans will either extend the current handle by - * nblocks, or commit it and start a new one with nblocks credits. + * 'nblocks' is what you want to add to the current transaction. * * This might call jbd2_journal_restart() which will commit dirty buffers * and then restart the transaction. Before calling @@ -422,14 +419,17 @@ int ocfs2_commit_trans(struct ocfs2_super *osb, */ int ocfs2_extend_trans(handle_t *handle, int nblocks) { - int status; + int status, old_nblocks; BUG_ON(!handle); - BUG_ON(!nblocks); + BUG_ON(nblocks < 0); - mlog_entry_void(); + if (!nblocks) + return 0; + + old_nblocks = handle->h_buffer_credits; - mlog(0, "Trying to extend transaction by %d blocks\n", nblocks); + trace_ocfs2_extend_trans(old_nblocks, nblocks); #ifdef CONFIG_OCFS2_DEBUG_FS status = 1; @@ -442,10 +442,9 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) #endif if (status > 0) { - mlog(0, - "jbd2_journal_extend failed, trying " - "jbd2_journal_restart\n"); - status = jbd2_journal_restart(handle, nblocks); + trace_ocfs2_extend_trans_restart(old_nblocks + nblocks); + status = jbd2_journal_restart(handle, + old_nblocks + nblocks); if (status < 0) { mlog_errno(status); goto bail; @@ -454,11 +453,44 @@ int ocfs2_extend_trans(handle_t *handle, int nblocks) status = 0; bail: + return status; +} + +/* + * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA. + * If that fails, restart the transaction & regain write access for the + * buffer head which is used for metadata modifications. + * Taken from Ext4: extend_or_restart_transaction() + */ +int ocfs2_allocate_extend_trans(handle_t *handle, int thresh) +{ + int status, old_nblks; + + BUG_ON(!handle); + + old_nblks = handle->h_buffer_credits; + trace_ocfs2_allocate_extend_trans(old_nblks, thresh); + + if (old_nblks < thresh) + return 0; + + status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) { + mlog_errno(status); + goto bail; + } + + if (status > 0) { + status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA); + if (status < 0) + mlog_errno(status); + } - mlog_exit(status); +bail: return status; } + struct ocfs2_triggers { struct jbd2_buffer_trigger_type ot_triggers; int ot_offset; @@ -469,7 +501,7 @@ static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger return container_of(triggers, struct ocfs2_triggers, ot_triggers); } -static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -488,7 +520,7 @@ static void ocfs2_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Quota blocks have their own trigger because the struct ocfs2_block_check * offset depends on the blocksize. */ -static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_dq_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -508,7 +540,7 @@ static void ocfs2_dq_commit_trigger(struct jbd2_buffer_trigger_type *triggers, * Directory blocks also have their own trigger because the * struct ocfs2_block_check offset depends on the blocksize. */ -static void ocfs2_db_commit_trigger(struct jbd2_buffer_trigger_type *triggers, +static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh, void *data, size_t size) { @@ -541,7 +573,7 @@ static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, static struct ocfs2_triggers di_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dinode, i_check), @@ -549,7 +581,7 @@ static struct ocfs2_triggers di_triggers = { static struct ocfs2_triggers eb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_extent_block, h_check), @@ -557,7 +589,7 @@ static struct ocfs2_triggers eb_triggers = { static struct ocfs2_triggers rb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), @@ -565,7 +597,7 @@ static struct ocfs2_triggers rb_triggers = { static struct ocfs2_triggers gd_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), @@ -573,14 +605,14 @@ static struct ocfs2_triggers gd_triggers = { static struct ocfs2_triggers db_triggers = { .ot_triggers = { - .t_commit = ocfs2_db_commit_trigger, + .t_frozen = ocfs2_db_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers xb_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), @@ -588,14 +620,14 @@ static struct ocfs2_triggers xb_triggers = { static struct ocfs2_triggers dq_triggers = { .ot_triggers = { - .t_commit = ocfs2_dq_commit_trigger, + .t_frozen = ocfs2_dq_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, }; static struct ocfs2_triggers dr_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), @@ -603,7 +635,7 @@ static struct ocfs2_triggers dr_triggers = { static struct ocfs2_triggers dl_triggers = { .ot_triggers = { - .t_commit = ocfs2_commit_trigger, + .t_frozen = ocfs2_frozen_trigger, .t_abort = ocfs2_abort_trigger, }, .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), @@ -623,12 +655,9 @@ static int __ocfs2_journal_access(handle_t *handle, BUG_ON(!handle); BUG_ON(!bh); - mlog_entry("bh->b_blocknr=%llu, type=%d (\"%s\"), bh->b_size = %zu\n", - (unsigned long long)bh->b_blocknr, type, - (type == OCFS2_JOURNAL_ACCESS_CREATE) ? - "OCFS2_JOURNAL_ACCESS_CREATE" : - "OCFS2_JOURNAL_ACCESS_WRITE", - bh->b_size); + trace_ocfs2_journal_access( + (unsigned long long)ocfs2_metadata_cache_owner(ci), + (unsigned long long)bh->b_blocknr, type, bh->b_size); /* we can safely remove this assertion after testing. */ if (!buffer_uptodate(bh)) { @@ -669,7 +698,6 @@ static int __ocfs2_journal_access(handle_t *handle, mlog(ML_ERROR, "Error %d getting %d access to buffer!\n", status, type); - mlog_exit(status); return status; } @@ -734,22 +762,14 @@ int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, return __ocfs2_journal_access(handle, ci, bh, NULL, type); } -int ocfs2_journal_dirty(handle_t *handle, - struct buffer_head *bh) +void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh) { int status; - mlog_entry("(bh->b_blocknr=%llu)\n", - (unsigned long long)bh->b_blocknr); + trace_ocfs2_journal_dirty((unsigned long long)bh->b_blocknr); status = jbd2_journal_dirty_metadata(handle, bh); - if (status < 0) - mlog(ML_ERROR, "Could not dirty metadata buffer. " - "(bh->b_blocknr=%llu)\n", - (unsigned long long)bh->b_blocknr); - - mlog_exit(status); - return status; + BUG_ON(status); } #define OCFS2_DEFAULT_COMMIT_INTERVAL (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE) @@ -762,13 +782,13 @@ void ocfs2_set_journal_params(struct ocfs2_super *osb) if (osb->osb_commit_interval) commit_interval = osb->osb_commit_interval; - spin_lock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); journal->j_commit_interval = commit_interval; if (osb->s_mount_opt & OCFS2_MOUNT_BARRIER) journal->j_flags |= JBD2_BARRIER; else journal->j_flags &= ~JBD2_BARRIER; - spin_unlock(&journal->j_state_lock); + write_unlock(&journal->j_state_lock); } int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) @@ -781,8 +801,6 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) struct ocfs2_super *osb; int inode_lock = 0; - mlog_entry_void(); - BUG_ON(!journal); osb = journal->j_osb; @@ -819,17 +837,16 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) inode_lock = 1; di = (struct ocfs2_dinode *)bh->b_data; - if (inode->i_size < OCFS2_MIN_JOURNAL_SIZE) { + if (i_size_read(inode) < OCFS2_MIN_JOURNAL_SIZE) { mlog(ML_ERROR, "Journal file size (%lld) is too small!\n", - inode->i_size); + i_size_read(inode)); status = -EINVAL; goto done; } - mlog(0, "inode->i_size = %lld\n", inode->i_size); - mlog(0, "inode->i_blocks = %llu\n", - (unsigned long long)inode->i_blocks); - mlog(0, "inode->ip_clusters = %u\n", OCFS2_I(inode)->ip_clusters); + trace_ocfs2_journal_init(i_size_read(inode), + (unsigned long long)inode->i_blocks, + OCFS2_I(inode)->ip_clusters); /* call the kernels journal init function now */ j_journal = jbd2_journal_init_inode(inode); @@ -839,8 +856,7 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty) goto done; } - mlog(0, "Returned from jbd2_journal_init_inode\n"); - mlog(0, "j_journal->j_maxlen = %u\n", j_journal->j_maxlen); + trace_ocfs2_journal_init_maxlen(j_journal->j_maxlen); *dirty = (le32_to_cpu(di->id1.journal1.ij_flags) & OCFS2_JOURNAL_DIRTY_FL); @@ -865,7 +881,6 @@ done: } } - mlog_exit(status); return status; } @@ -888,8 +903,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, struct buffer_head *bh = journal->j_bh; struct ocfs2_dinode *fe; - mlog_entry_void(); - fe = (struct ocfs2_dinode *)bh->b_data; /* The journal bh on the osb always comes from ocfs2_journal_init() @@ -912,7 +925,6 @@ static int ocfs2_journal_toggle_dirty(struct ocfs2_super *osb, if (status < 0) mlog_errno(status); - mlog_exit(status); return status; } @@ -927,8 +939,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) struct inode *inode = NULL; int num_running_trans = 0; - mlog_entry_void(); - BUG_ON(!osb); journal = osb->journal; @@ -945,10 +955,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) BUG(); num_running_trans = atomic_read(&(osb->journal->j_num_trans)); - if (num_running_trans > 0) - mlog(0, "Shutting down journal: must wait on %d " - "running transactions!\n", - num_running_trans); + trace_ocfs2_journal_shutdown(num_running_trans); /* Do a commit_cache here. It will flush our journal, *and* * release any locks that are still held. @@ -961,7 +968,7 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) * completely destroy the journal. */ if (osb->commit_task) { /* Wait for the commit thread */ - mlog(0, "Waiting for ocfs2commit to exit....\n"); + trace_ocfs2_journal_shutdown_wait(osb->commit_task); kthread_stop(osb->commit_task); osb->commit_task = NULL; } @@ -1004,7 +1011,6 @@ void ocfs2_journal_shutdown(struct ocfs2_super *osb) done: if (inode) iput(inode); - mlog_exit_void(); } static void ocfs2_clear_journal_error(struct super_block *sb, @@ -1030,8 +1036,6 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) int status = 0; struct ocfs2_super *osb; - mlog_entry_void(); - BUG_ON(!journal); osb = journal->j_osb; @@ -1065,7 +1069,6 @@ int ocfs2_journal_load(struct ocfs2_journal *journal, int local, int replayed) osb->commit_task = NULL; done: - mlog_exit(status); return status; } @@ -1076,8 +1079,6 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) { int status; - mlog_entry_void(); - BUG_ON(!journal); status = jbd2_journal_wipe(journal->j_journal, full); @@ -1091,7 +1092,6 @@ int ocfs2_journal_wipe(struct ocfs2_journal *journal, int full) mlog_errno(status); bail: - mlog_exit(status); return status; } @@ -1130,11 +1130,9 @@ static int ocfs2_force_read_journal(struct inode *inode) #define CONCURRENT_JOURNAL_FILL 32ULL struct buffer_head *bhs[CONCURRENT_JOURNAL_FILL]; - mlog_entry_void(); - memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL); - num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size); + num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode)); v_blkno = 0; while (v_blkno < num_blocks) { status = ocfs2_extent_map_get_blocks(inode, v_blkno, @@ -1167,7 +1165,6 @@ static int ocfs2_force_read_journal(struct inode *inode) bail: for(i = 0; i < CONCURRENT_JOURNAL_FILL; i++) brelse(bhs[i]); - mlog_exit(status); return status; } @@ -1191,7 +1188,7 @@ struct ocfs2_la_recovery_item { */ void ocfs2_complete_recovery(struct work_struct *work) { - int ret; + int ret = 0; struct ocfs2_journal *journal = container_of(work, struct ocfs2_journal, j_recovery_work); struct ocfs2_super *osb = journal->j_osb; @@ -1200,9 +1197,8 @@ void ocfs2_complete_recovery(struct work_struct *work) struct ocfs2_quota_recovery *qrec; LIST_HEAD(tmp_la_list); - mlog_entry_void(); - - mlog(0, "completing recovery from keventd\n"); + trace_ocfs2_complete_recovery( + (unsigned long long)OCFS2_I(journal->j_inode)->ip_blkno); spin_lock(&journal->j_lock); list_splice_init(&journal->j_la_cleanups, &tmp_la_list); @@ -1211,15 +1207,18 @@ void ocfs2_complete_recovery(struct work_struct *work) list_for_each_entry_safe(item, n, &tmp_la_list, lri_list) { list_del_init(&item->lri_list); - mlog(0, "Complete recovery for slot %d\n", item->lri_slot); - ocfs2_wait_on_quotas(osb); la_dinode = item->lri_la_dinode; - if (la_dinode) { - mlog(0, "Clean up local alloc %llu\n", - (unsigned long long)le64_to_cpu(la_dinode->i_blkno)); + tl_dinode = item->lri_tl_dinode; + qrec = item->lri_qrec; + + trace_ocfs2_complete_recovery_slot(item->lri_slot, + la_dinode ? le64_to_cpu(la_dinode->i_blkno) : 0, + tl_dinode ? le64_to_cpu(tl_dinode->i_blkno) : 0, + qrec); + if (la_dinode) { ret = ocfs2_complete_local_alloc_recovery(osb, la_dinode); if (ret < 0) @@ -1228,11 +1227,7 @@ void ocfs2_complete_recovery(struct work_struct *work) kfree(la_dinode); } - tl_dinode = item->lri_tl_dinode; if (tl_dinode) { - mlog(0, "Clean up truncate log %llu\n", - (unsigned long long)le64_to_cpu(tl_dinode->i_blkno)); - ret = ocfs2_complete_truncate_log_recovery(osb, tl_dinode); if (ret < 0) @@ -1245,9 +1240,7 @@ void ocfs2_complete_recovery(struct work_struct *work) if (ret < 0) mlog_errno(ret); - qrec = item->lri_qrec; if (qrec) { - mlog(0, "Recovering quota files"); ret = ocfs2_finish_quota_recovery(osb, qrec, item->lri_slot); if (ret < 0) @@ -1258,8 +1251,7 @@ void ocfs2_complete_recovery(struct work_struct *work) kfree(item); } - mlog(0, "Recovery completion\n"); - mlog_exit_void(); + trace_ocfs2_complete_recovery_end(ret); } /* NOTE: This function always eats your references to la_dinode and @@ -1278,11 +1270,8 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal, /* Though we wish to avoid it, we are in fact safe in * skipping local alloc cleanup as fsck.ocfs2 is more * than capable of reclaiming unused space. */ - if (la_dinode) - kfree(la_dinode); - - if (tl_dinode) - kfree(tl_dinode); + kfree(la_dinode); + kfree(tl_dinode); if (qrec) ocfs2_free_quota_recovery(qrec); @@ -1309,6 +1298,9 @@ void ocfs2_complete_mount_recovery(struct ocfs2_super *osb) { struct ocfs2_journal *journal = osb->journal; + if (ocfs2_is_hard_readonly(osb)) + return; + /* No need to queue up our truncate_log as regular cleanup will catch * that */ ocfs2_queue_recovery_completion(journal, osb->slot_num, @@ -1345,8 +1337,6 @@ static int __ocfs2_recovery_thread(void *arg) int rm_quota_used = 0, i; struct ocfs2_quota_recovery *qrec; - mlog_entry_void(); - status = ocfs2_wait_on_mount(osb); if (status < 0) { goto bail; @@ -1378,15 +1368,12 @@ restart: * clear it until ocfs2_recover_node() has succeeded. */ node_num = rm->rm_entries[0]; spin_unlock(&osb->osb_lock); - mlog(0, "checking node %d\n", node_num); slot_num = ocfs2_node_num_to_slot(osb, node_num); + trace_ocfs2_recovery_thread_node(node_num, slot_num); if (slot_num == -ENOENT) { status = 0; - mlog(0, "no slot for this node, so no recovery" - "required.\n"); goto skip_recovery; } - mlog(0, "node %d was using slot %d\n", node_num, slot_num); /* It is a bit subtle with quota recovery. We cannot do it * immediately because we have to obtain cluster locks from @@ -1413,7 +1400,7 @@ skip_recovery: spin_lock(&osb->osb_lock); } spin_unlock(&osb->osb_lock); - mlog(0, "All nodes recovered\n"); + trace_ocfs2_recovery_thread_end(status); /* Refresh all journal recovery generations from disk */ status = ocfs2_check_journals_nolocks(osb); @@ -1422,7 +1409,7 @@ skip_recovery: mlog_errno(status); /* Now it is right time to recover quotas... We have to do this under - * superblock lock so that noone can start using the slot (and crash) + * superblock lock so that no one can start using the slot (and crash) * before we recover it */ for (i = 0; i < rm_quota_used; i++) { qrec = ocfs2_begin_quota_recovery(osb, rm_quota[i]); @@ -1454,10 +1441,8 @@ bail: mutex_unlock(&osb->recovery_lock); - if (rm_quota) - kfree(rm_quota); + kfree(rm_quota); - mlog_exit(status); /* no one is callint kthread_stop() for us so the kthread() api * requires that we call do_exit(). And it isn't exported, but * complete_and_exit() seems to be a minimal wrapper around it. */ @@ -1467,19 +1452,15 @@ bail: void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) { - mlog_entry("(node_num=%d, osb->node_num = %d)\n", - node_num, osb->node_num); - mutex_lock(&osb->recovery_lock); - if (osb->disable_recovery) - goto out; - /* People waiting on recovery will wait on - * the recovery map to empty. */ - if (ocfs2_recovery_map_set(osb, node_num)) - mlog(0, "node %d already in recovery map.\n", node_num); + trace_ocfs2_recovery_thread(node_num, osb->node_num, + osb->disable_recovery, osb->recovery_thread_task, + osb->disable_recovery ? + -1 : ocfs2_recovery_map_set(osb, node_num)); - mlog(0, "starting recovery thread...\n"); + if (osb->disable_recovery) + goto out; if (osb->recovery_thread_task) goto out; @@ -1494,8 +1475,6 @@ void ocfs2_recovery_thread(struct ocfs2_super *osb, int node_num) out: mutex_unlock(&osb->recovery_lock); wake_up(&osb->recovery_event); - - mlog_exit_void(); } static int ocfs2_read_journal_inode(struct ocfs2_super *osb, @@ -1569,7 +1548,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, * If not, it needs recovery. */ if (osb->slot_recovery_generations[slot_num] != slot_reco_gen) { - mlog(0, "Slot %u already recovered (old/new=%u/%u)\n", slot_num, + trace_ocfs2_replay_journal_recovered(slot_num, osb->slot_recovery_generations[slot_num], slot_reco_gen); osb->slot_recovery_generations[slot_num] = slot_reco_gen; status = -EBUSY; @@ -1580,7 +1559,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, status = ocfs2_inode_lock_full(inode, &bh, 1, OCFS2_META_LOCK_RECOVERY); if (status < 0) { - mlog(0, "status returned from ocfs2_inode_lock=%d\n", status); + trace_ocfs2_replay_journal_lock_err(status); if (status != -ERESTARTSYS) mlog(ML_ERROR, "Could not lock journal!\n"); goto done; @@ -1593,7 +1572,7 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, slot_reco_gen = ocfs2_get_recovery_generation(fe); if (!(flags & OCFS2_JOURNAL_DIRTY_FL)) { - mlog(0, "No recovery required for node %d\n", node_num); + trace_ocfs2_replay_journal_skip(node_num); /* Refresh recovery generation for the slot */ osb->slot_recovery_generations[slot_num] = slot_reco_gen; goto done; @@ -1602,9 +1581,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, /* we need to run complete recovery for offline orphan slots */ ocfs2_replay_map_set_state(osb, REPLAY_NEEDED); - mlog(ML_NOTICE, "Recovering node %d from slot %d on device (%u,%u)\n", - node_num, slot_num, - MAJOR(osb->sb->s_dev), MINOR(osb->sb->s_dev)); + printk(KERN_NOTICE "ocfs2: Begin replay journal (node %d, slot %d) on "\ + "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), + MINOR(osb->sb->s_dev)); OCFS2_I(inode)->ip_clusters = le32_to_cpu(fe->i_clusters); @@ -1614,7 +1593,6 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, goto done; } - mlog(0, "calling journal_init_inode\n"); journal = jbd2_journal_init_inode(inode); if (journal == NULL) { mlog(ML_ERROR, "Linux journal layer error\n"); @@ -1634,7 +1612,6 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, ocfs2_clear_journal_error(osb->sb, journal, slot_num); /* wipe the journal */ - mlog(0, "flushing the journal.\n"); jbd2_journal_lock_updates(journal); status = jbd2_journal_flush(journal); jbd2_journal_unlock_updates(journal); @@ -1661,6 +1638,9 @@ static int ocfs2_replay_journal(struct ocfs2_super *osb, jbd2_journal_destroy(journal); + printk(KERN_NOTICE "ocfs2: End replay journal (node %d, slot %d) on "\ + "device (%u,%u)\n", node_num, slot_num, MAJOR(osb->sb->s_dev), + MINOR(osb->sb->s_dev)); done: /* drop the lock on this nodes journal */ if (got_lock) @@ -1671,7 +1651,6 @@ done: brelse(bh); - mlog_exit(status); return status; } @@ -1694,8 +1673,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, struct ocfs2_dinode *la_copy = NULL; struct ocfs2_dinode *tl_copy = NULL; - mlog_entry("(node_num=%d, slot_num=%d, osb->node_num = %d)\n", - node_num, slot_num, osb->node_num); + trace_ocfs2_recover_node(node_num, slot_num, osb->node_num); /* Should not ever be called to recover ourselves -- in that * case we should've called ocfs2_journal_load instead. */ @@ -1704,9 +1682,7 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, status = ocfs2_replay_journal(osb, node_num, slot_num); if (status < 0) { if (status == -EBUSY) { - mlog(0, "Skipping recovery for slot %u (node %u) " - "as another node has recovered it\n", slot_num, - node_num); + trace_ocfs2_recover_node_skip(slot_num, node_num); status = 0; goto done; } @@ -1741,7 +1717,6 @@ static int ocfs2_recover_node(struct ocfs2_super *osb, status = 0; done: - mlog_exit(status); return status; } @@ -1814,8 +1789,8 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) spin_lock(&osb->osb_lock); osb->slot_recovery_generations[i] = gen; - mlog(0, "Slot %u recovery generation is %u\n", i, - osb->slot_recovery_generations[i]); + trace_ocfs2_mark_dead_nodes(i, + osb->slot_recovery_generations[i]); if (i == osb->slot_num) { spin_unlock(&osb->osb_lock); @@ -1851,7 +1826,6 @@ int ocfs2_mark_dead_nodes(struct ocfs2_super *osb) status = 0; bail: - mlog_exit(status); return status; } @@ -1874,6 +1848,20 @@ static inline unsigned long ocfs2_orphan_scan_timeout(void) * every slot, queuing a recovery of the slot on the ocfs2_wq thread. This * is done to catch any orphans that are left over in orphan directories. * + * It scans all slots, even ones that are in use. It does so to handle the + * case described below: + * + * Node 1 has an inode it was using. The dentry went away due to memory + * pressure. Node 1 closes the inode, but it's on the free list. The node + * has the open lock. + * Node 2 unlinks the inode. It grabs the dentry lock to notify others, + * but node 1 has no dentry and doesn't get the message. It trylocks the + * open lock, sees that another node has a PR, and does nothing. + * Later node 2 runs its orphan dir. It igets the inode, trylocks the + * open lock, sees the PR still, and does nothing. + * Basically, we have to trigger an orphan iput on node 1. The only way + * for this to happen is if node 1 runs node 2's orphan dir. + * * ocfs2_queue_orphan_scan gets called every ORPHAN_SCAN_SCHEDULE_TIMEOUT * seconds. It gets an EX lock on os_lockres and checks sequence number * stored in LVB. If the sequence number has changed, it means some other @@ -1893,6 +1881,9 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) if (atomic_read(&os->os_state) == ORPHAN_SCAN_INACTIVE) goto out; + trace_ocfs2_queue_orphan_scan_begin(os->os_count, os->os_seqno, + atomic_read(&os->os_state)); + status = ocfs2_orphan_scan_lock(osb, &seqno); if (status < 0) { if (status != -EAGAIN) @@ -1922,6 +1913,8 @@ void ocfs2_queue_orphan_scan(struct ocfs2_super *osb) unlock: ocfs2_orphan_scan_unlock(osb, seqno); out: + trace_ocfs2_queue_orphan_scan_end(os->os_count, os->os_seqno, + atomic_read(&os->os_state)); return; } @@ -1938,7 +1931,7 @@ void ocfs2_orphan_scan_work(struct work_struct *work) mutex_lock(&os->os_lock); ocfs2_queue_orphan_scan(osb); if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE) - schedule_delayed_work(&os->os_orphan_scan_work, + queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, ocfs2_orphan_scan_timeout()); mutex_unlock(&os->os_lock); } @@ -1978,12 +1971,13 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb) atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE); else { atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE); - schedule_delayed_work(&os->os_orphan_scan_work, - ocfs2_orphan_scan_timeout()); + queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work, + ocfs2_orphan_scan_timeout()); } } struct ocfs2_orphan_filldir_priv { + struct dir_context ctx; struct inode *head; struct ocfs2_super *osb; }; @@ -2005,8 +1999,7 @@ static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len, if (IS_ERR(iter)) return 0; - mlog(0, "queue orphan %llu\n", - (unsigned long long)OCFS2_I(iter)->ip_blkno); + trace_ocfs2_orphan_filldir((unsigned long long)OCFS2_I(iter)->ip_blkno); /* No locking is required for the next_orphan queue as there * is only ever a single process doing orphan recovery. */ OCFS2_I(iter)->ip_next_orphan = p->head; @@ -2021,11 +2014,11 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, { int status; struct inode *orphan_dir_inode = NULL; - struct ocfs2_orphan_filldir_priv priv; - loff_t pos = 0; - - priv.osb = osb; - priv.head = *head; + struct ocfs2_orphan_filldir_priv priv = { + .ctx.actor = ocfs2_orphan_filldir, + .osb = osb, + .head = *head + }; orphan_dir_inode = ocfs2_get_system_file_inode(osb, ORPHAN_DIR_SYSTEM_INODE, @@ -2034,7 +2027,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, status = -ENOENT; mlog_errno(status); return status; - } + } mutex_lock(&orphan_dir_inode->i_mutex); status = ocfs2_inode_lock(orphan_dir_inode, NULL, 0); @@ -2043,8 +2036,7 @@ static int ocfs2_queue_orphans(struct ocfs2_super *osb, goto out; } - status = ocfs2_dir_foreach(orphan_dir_inode, &pos, &priv, - ocfs2_orphan_filldir); + status = ocfs2_dir_foreach(orphan_dir_inode, &priv.ctx); if (status) { mlog_errno(status); goto out_cluster; @@ -2122,7 +2114,7 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, struct inode *iter; struct ocfs2_inode_info *oi; - mlog(0, "Recover inodes from orphan dir in slot %d\n", slot); + trace_ocfs2_recover_orphans(slot); ocfs2_mark_recovering_orphan_dir(osb, slot); ret = ocfs2_queue_orphans(osb, slot, &inode); @@ -2135,17 +2127,12 @@ static int ocfs2_recover_orphans(struct ocfs2_super *osb, while (inode) { oi = OCFS2_I(inode); - mlog(0, "iput orphan %llu\n", (unsigned long long)oi->ip_blkno); + trace_ocfs2_recover_orphans_iput( + (unsigned long long)oi->ip_blkno); iter = oi->ip_next_orphan; spin_lock(&oi->ip_lock); - /* The remote delete code may have set these on the - * assumption that the other node would wipe them - * successfully. If they are still in the node's - * orphan dir, we need to reset that state. */ - oi->ip_flags &= ~(OCFS2_INODE_DELETED|OCFS2_INODE_SKIP_DELETE); - /* Set the proper information to get us going into * ocfs2_delete_inode. */ oi->ip_flags |= OCFS2_INODE_MAYBE_ORPHANED; @@ -2173,6 +2160,7 @@ static int __ocfs2_wait_on_mount(struct ocfs2_super *osb, int quota) * MOUNTED flag, but this is set right before * dismount_volume() so we can trust it. */ if (atomic_read(&osb->vol_state) == VOLUME_DISABLED) { + trace_ocfs2_wait_on_mount(VOLUME_DISABLED); mlog(0, "mount error, exiting!\n"); return -EBUSY; } @@ -2198,8 +2186,20 @@ static int ocfs2_commit_thread(void *arg) || kthread_should_stop()); status = ocfs2_commit_cache(osb); - if (status < 0) - mlog_errno(status); + if (status < 0) { + static unsigned long abort_warn_time; + + /* Warn about this once per minute */ + if (printk_timed_ratelimit(&abort_warn_time, 60*HZ)) + mlog(ML_ERROR, "status = %d, journal is " + "already aborted.\n", status); + /* + * After ocfs2_commit_cache() fails, j_num_trans has a + * non-zero value. Sleep here to avoid a busy-wait + * loop. + */ + msleep_interruptible(1000); + } if (kthread_should_stop() && atomic_read(&journal->j_num_trans)){ mlog(ML_KTHREAD, |
