diff options
Diffstat (limited to 'fs/jbd2')
-rw-r--r-- | fs/jbd2/Kconfig | 33 | ||||
-rw-r--r-- | fs/jbd2/checkpoint.c | 71 | ||||
-rw-r--r-- | fs/jbd2/commit.c | 35 | ||||
-rw-r--r-- | fs/jbd2/journal.c | 103 | ||||
-rw-r--r-- | fs/jbd2/recovery.c | 7 | ||||
-rw-r--r-- | fs/jbd2/transaction.c | 1 |
6 files changed, 182 insertions, 68 deletions
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig new file mode 100644 index 00000000000..f32f346f4b0 --- /dev/null +++ b/fs/jbd2/Kconfig @@ -0,0 +1,33 @@ +config JBD2 + tristate + select CRC32 + help + This is a generic journaling layer for block devices that support + both 32-bit and 64-bit block numbers. It is currently used by + the ext4 and OCFS2 filesystems, but it could also be used to add + journal support to other file systems or block devices such + as RAID or LVM. + + If you are using ext4 or OCFS2, you need to say Y here. + If you are not using ext4 or OCFS2 then you will + probably want to say N. + + To compile this device as a module, choose M here. The module will be + called jbd2. If you are compiling ext4 or OCFS2 into the kernel, + you cannot compile this code as a module. + +config JBD2_DEBUG + bool "JBD2 (ext4) debugging support" + depends on JBD2 && DEBUG_FS + help + If you are using the ext4 journaled file system (or + potentially any other filesystem/device using JBD2), this option + allows you to enable debugging output while the system is running, + in order to help track down any problems you are having. + By default, the debugging output will be turned off. + + If you select Y here, then you will be able to turn on debugging + with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a + number between 1 and 5. The higher the number, the more debugging + output is generated. To turn debugging off again, do + "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 91389c8aee8..9203c3332f1 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -20,6 +20,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> +#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> @@ -93,7 +94,8 @@ static int __try_to_free_cp_buf(struct journal_head *jh) int ret = 0; struct buffer_head *bh = jh2bh(jh); - if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) { + if (jh->b_jlist == BJ_None && !buffer_locked(bh) && + !buffer_dirty(bh) && !buffer_write_io_error(bh)) { JBUFFER_TRACE(jh, "remove from checkpoint list"); ret = __jbd2_journal_remove_checkpoint(jh) + 1; jbd_unlock_bh_state(bh); @@ -126,14 +128,29 @@ void __jbd2_log_wait_for_space(journal_t *journal) /* * Test again, another process may have checkpointed while we - * were waiting for the checkpoint lock + * were waiting for the checkpoint lock. If there are no + * outstanding transactions there is nothing to checkpoint and + * we can't make progress. Abort the journal in this case. */ spin_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); nblocks = jbd_space_needed(journal); if (__jbd2_log_space_left(journal) < nblocks) { + int chkpt = journal->j_checkpoint_transactions != NULL; + + spin_unlock(&journal->j_list_lock); spin_unlock(&journal->j_state_lock); - jbd2_log_do_checkpoint(journal); + if (chkpt) { + jbd2_log_do_checkpoint(journal); + } else { + printk(KERN_ERR "%s: no transactions\n", + __func__); + jbd2_journal_abort(journal, 0); + } + spin_lock(&journal->j_state_lock); + } else { + spin_unlock(&journal->j_list_lock); } mutex_unlock(&journal->j_checkpoint_mutex); } @@ -160,21 +177,25 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh) * buffers. Note that we take the buffers in the opposite ordering * from the one in which they were submitted for IO. * + * Return 0 on success, and return <0 if some buffers have failed + * to be written out. + * * Called with j_list_lock held. */ -static void __wait_cp_io(journal_t *journal, transaction_t *transaction) +static int __wait_cp_io(journal_t *journal, transaction_t *transaction) { struct journal_head *jh; struct buffer_head *bh; tid_t this_tid; int released = 0; + int ret = 0; this_tid = transaction->t_tid; restart: /* Did somebody clean up the transaction in the meanwhile? */ if (journal->j_checkpoint_transactions != transaction || transaction->t_tid != this_tid) - return; + return ret; while (!released && transaction->t_checkpoint_io_list) { jh = transaction->t_checkpoint_io_list; bh = jh2bh(jh); @@ -194,6 +215,9 @@ restart: spin_lock(&journal->j_list_lock); goto restart; } + if (unlikely(buffer_write_io_error(bh))) + ret = -EIO; + /* * Now in whatever state the buffer currently is, we know that * it has been written out and so we can drop it from the list @@ -203,6 +227,8 @@ restart: jbd2_journal_remove_journal_head(bh); __brelse(bh); } + + return ret; } #define NR_BATCH 64 @@ -226,7 +252,8 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count) * Try to flush one buffer from the checkpoint list to disk. * * Return 1 if something happened which requires us to abort the current - * scan of the checkpoint list. + * scan of the checkpoint list. Return <0 if the buffer has failed to + * be written out. * * Called with j_list_lock held and drops it if 1 is returned * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it @@ -258,6 +285,9 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, jbd2_log_wait_commit(journal, tid); ret = 1; } else if (!buffer_dirty(bh)) { + ret = 1; + if (unlikely(buffer_write_io_error(bh))) + ret = -EIO; J_ASSERT_JH(jh, !buffer_jbddirty(bh)); BUFFER_TRACE(bh, "remove from checkpoint"); __jbd2_journal_remove_checkpoint(jh); @@ -265,7 +295,6 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, jbd_unlock_bh_state(bh); jbd2_journal_remove_journal_head(bh); __brelse(bh); - ret = 1; } else { /* * Important: we are about to write the buffer, and @@ -298,6 +327,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh, * to disk. We submit larger chunks of data at once. * * The journal should be locked before calling this function. + * Called with j_checkpoint_mutex held. */ int jbd2_log_do_checkpoint(journal_t *journal) { @@ -313,6 +343,8 @@ int jbd2_log_do_checkpoint(journal_t *journal) * journal straight away. */ result = jbd2_cleanup_journal_tail(journal); + trace_mark(jbd2_checkpoint, "dev %s need_checkpoint %d", + journal->j_devname, result); jbd_debug(1, "cleanup_journal_tail returned %d\n", result); if (result <= 0) return result; @@ -321,6 +353,7 @@ int jbd2_log_do_checkpoint(journal_t *journal) * OK, we need to start writing disk blocks. Take one transaction * and write it. */ + result = 0; spin_lock(&journal->j_list_lock); if (!journal->j_checkpoint_transactions) goto out; @@ -339,7 +372,7 @@ restart: int batch_count = 0; struct buffer_head *bhs[NR_BATCH]; struct journal_head *jh; - int retry = 0; + int retry = 0, err; while (!retry && transaction->t_checkpoint_list) { struct buffer_head *bh; @@ -353,6 +386,8 @@ restart: } retry = __process_buffer(journal, jh, bhs, &batch_count, transaction); + if (retry < 0 && !result) + result = retry; if (!retry && (need_resched() || spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); @@ -377,14 +412,18 @@ restart: * Now we have cleaned up the first transaction's checkpoint * list. Let's clean up the second one */ - __wait_cp_io(journal, transaction); + err = __wait_cp_io(journal, transaction); + if (!result) + result = err; } out: spin_unlock(&journal->j_list_lock); - result = jbd2_cleanup_journal_tail(journal); if (result < 0) - return result; - return 0; + jbd2_journal_abort(journal, result); + else + result = jbd2_cleanup_journal_tail(journal); + + return (result < 0) ? result : 0; } /* @@ -400,8 +439,9 @@ out: * This is the only part of the journaling code which really needs to be * aware of transaction aborts. Checkpointing involves writing to the * main filesystem area rather than to the journal, so it can proceed - * even in abort state, but we must not update the journal superblock if - * we have an abort error outstanding. + * even in abort state, but we must not update the super block if + * checkpointing may have failed. Otherwise, we would lose some metadata + * buffers which should be written-back to the filesystem. */ int jbd2_cleanup_journal_tail(journal_t *journal) @@ -410,6 +450,9 @@ int jbd2_cleanup_journal_tail(journal_t *journal) tid_t first_tid; unsigned long blocknr, freed; + if (is_journal_aborted(journal)) + return 1; + /* OK, work out the oldest transaction remaining in the log, and * the log block it starts at. * diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index f2ad061e95e..8b119e16aa3 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -16,6 +16,7 @@ #include <linux/time.h> #include <linux/fs.h> #include <linux/jbd2.h> +#include <linux/marker.h> #include <linux/errno.h> #include <linux/slab.h> #include <linux/mm.h> @@ -126,8 +127,7 @@ static int journal_submit_commit_record(journal_t *journal, JBUFFER_TRACE(descriptor, "submit commit block"); lock_buffer(bh); - get_bh(bh); - set_buffer_dirty(bh); + clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; @@ -147,12 +147,9 @@ static int journal_submit_commit_record(journal_t *journal, * to remember if we sent a barrier request */ if (ret == -EOPNOTSUPP && barrier_done) { - char b[BDEVNAME_SIZE]; - printk(KERN_WARNING - "JBD: barrier-based sync failed on %s - " - "disabling barriers\n", - bdevname(journal->j_dev, b)); + "JBD: barrier-based sync failed on %s - " + "disabling barriers\n", journal->j_devname); spin_lock(&journal->j_state_lock); journal->j_flags &= ~JBD2_BARRIER; spin_unlock(&journal->j_state_lock); @@ -160,7 +157,7 @@ static int journal_submit_commit_record(journal_t *journal, /* And try again, without the barrier */ lock_buffer(bh); set_buffer_uptodate(bh); - set_buffer_dirty(bh); + clear_buffer_dirty(bh); ret = submit_bh(WRITE, bh); } *cbh = bh; @@ -371,6 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) commit_transaction = journal->j_running_transaction; J_ASSERT(commit_transaction->t_state == T_RUNNING); + trace_mark(jbd2_start_commit, "dev %s transaction %d", + journal->j_devname, commit_transaction->t_tid); jbd_debug(1, "JBD: starting commit of transaction %d\n", commit_transaction->t_tid); @@ -505,9 +504,10 @@ void jbd2_journal_commit_transaction(journal_t *journal) jh = commit_transaction->t_buffers; /* If we're in abort mode, we just un-journal the buffer and - release it for background writing. */ + release it. */ if (is_journal_aborted(journal)) { + clear_buffer_jbddirty(jh2bh(jh)); JBUFFER_TRACE(jh, "journal is aborting: refile"); jbd2_journal_refile_buffer(journal, jh); /* If that was the last one, we need to clean up @@ -681,11 +681,11 @@ start_journal_io: */ err = journal_finish_inode_data_buffers(journal, commit_transaction); if (err) { - char b[BDEVNAME_SIZE]; - printk(KERN_WARNING "JBD2: Detected IO errors while flushing file data " - "on %s\n", bdevname(journal->j_fs_dev, b)); + "on %s\n", journal->j_devname); + if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR) + jbd2_journal_abort(journal, err); err = 0; } @@ -786,6 +786,9 @@ wait_for_iobuf: /* AKPM: bforget here */ } + if (err) + jbd2_journal_abort(journal, err); + jbd_debug(3, "JBD: commit phase 5\n"); if (!JBD2_HAS_INCOMPAT_FEATURE(journal, @@ -884,6 +887,8 @@ restart_loop: if (buffer_jbddirty(bh)) { JBUFFER_TRACE(jh, "add to new checkpointing trans"); __jbd2_journal_insert_checkpoint(jh, commit_transaction); + if (is_journal_aborted(journal)) + clear_buffer_jbddirty(bh); JBUFFER_TRACE(jh, "refile for checkpoint writeback"); __jbd2_journal_refile_buffer(jh); jbd_unlock_bh_state(bh); @@ -990,6 +995,12 @@ restart_loop: } spin_unlock(&journal->j_list_lock); + if (journal->j_commit_callback) + journal->j_commit_callback(journal, commit_transaction); + + trace_mark(jbd2_end_commit, "dev %s transaction %d head %d", + journal->j_devname, commit_transaction->t_tid, + journal->j_tail_sequence); jbd_debug(1, "JBD: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 8207a01c4ed..783de118de9 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -597,13 +597,9 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, if (ret) *retp = ret; else { - char b[BDEVNAME_SIZE]; - printk(KERN_ALERT "%s: journal block not found " "at offset %lu on %s\n", - __func__, - blocknr, - bdevname(journal->j_dev, b)); + __func__, blocknr, journal->j_devname); err = -EIO; __journal_abort_soft(journal, err); } @@ -901,10 +897,7 @@ static struct proc_dir_entry *proc_jbd2_stats; static void jbd2_stats_proc_init(journal_t *journal) { - char name[BDEVNAME_SIZE]; - - bdevname(journal->j_dev, name); - journal->j_proc_entry = proc_mkdir(name, proc_jbd2_stats); + journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats); if (journal->j_proc_entry) { proc_create_data("history", S_IRUGO, journal->j_proc_entry, &jbd2_seq_history_fops, journal); @@ -915,12 +908,9 @@ static void jbd2_stats_proc_init(journal_t *journal) static void jbd2_stats_proc_exit(journal_t *journal) { - char name[BDEVNAME_SIZE]; - - bdevname(journal->j_dev, name); remove_proc_entry("info", journal->j_proc_entry); remove_proc_entry("history", journal->j_proc_entry); - remove_proc_entry(name, proc_jbd2_stats); + remove_proc_entry(journal->j_devname, proc_jbd2_stats); } static void journal_init_stats(journal_t *journal) @@ -1018,6 +1008,7 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, { journal_t *journal = journal_init_common(); struct buffer_head *bh; + char *p; int n; if (!journal) @@ -1039,6 +1030,10 @@ journal_t * jbd2_journal_init_dev(struct block_device *bdev, journal->j_fs_dev = fs_dev; journal->j_blk_offset = start; journal->j_maxlen = len; + bdevname(journal->j_dev, journal->j_devname); + p = journal->j_devname; + while ((p = strchr(p, '/'))) + *p = '!'; jbd2_stats_proc_init(journal); bh = __getblk(journal->j_dev, start, journal->j_blocksize); @@ -1061,6 +1056,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) { struct buffer_head *bh; journal_t *journal = journal_init_common(); + char *p; int err; int n; unsigned long long blocknr; @@ -1070,6 +1066,12 @@ journal_t * jbd2_journal_init_inode (struct inode *inode) journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev; journal->j_inode = inode; + bdevname(journal->j_dev, journal->j_devname); + p = journal->j_devname; + while ((p = strchr(p, '/'))) + *p = '!'; + p = journal->j_devname + strlen(journal->j_devname); + sprintf(p, ":%lu", journal->j_inode->i_ino); jbd_debug(1, "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n", journal, inode->i_sb->s_id, inode->i_ino, @@ -1253,6 +1255,22 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) goto out; } + if (buffer_write_io_error(bh)) { + /* + * Oh, dear. A previous attempt to write the journal + * superblock failed. This could happen because the + * USB device was yanked out. Or it could happen to + * be a transient write error and maybe the block will + * be remapped. Nothing we can do but to retry the + * write and hope for the best. + */ + printk(KERN_ERR "JBD2: previous I/O error detected " + "for journal superblock update for %s.\n", + journal->j_devname); + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + } + spin_lock(&journal->j_state_lock); jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", journal->j_tail, journal->j_tail_sequence, journal->j_errno); @@ -1264,9 +1282,16 @@ void jbd2_journal_update_superblock(journal_t *journal, int wait) BUFFER_TRACE(bh, "marking dirty"); mark_buffer_dirty(bh); - if (wait) + if (wait) { sync_dirty_buffer(bh); - else + if (buffer_write_io_error(bh)) { + printk(KERN_ERR "JBD2: I/O error detected " + "when updating journal superblock for %s.\n", + journal->j_devname); + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + } + } else ll_rw_block(SWRITE, 1, &bh); out: @@ -1426,9 +1451,12 @@ recovery_error: * * Release a journal_t structure once it is no longer in use by the * journaled object. + * Return <0 if we couldn't clean up the journal. */ -void jbd2_journal_destroy(journal_t *journal) +int jbd2_journal_destroy(journal_t *journal) { + int err = 0; + /* Wait for the commit thread to wake up and die. */ journal_kill_thread(journal); @@ -1451,11 +1479,16 @@ void jbd2_journal_destroy(journal_t *journal) J_ASSERT(journal->j_checkpoint_transactions == NULL); spin_unlock(&journal->j_list_lock); - /* We can now mark the journal as empty. */ - journal->j_tail = 0; - journal->j_tail_sequence = ++journal->j_transaction_sequence; if (journal->j_sb_buffer) { - jbd2_journal_update_superblock(journal, 1); + if (!is_journal_aborted(journal)) { + /* We can now mark the journal as empty. */ + journal->j_tail = 0; + journal->j_tail_sequence = + ++journal->j_transaction_sequence; + jbd2_journal_update_superblock(journal, 1); + } else { + err = -EIO; + } brelse(journal->j_sb_buffer); } @@ -1467,6 +1500,8 @@ void jbd2_journal_destroy(journal_t *journal) jbd2_journal_destroy_revoke(journal); kfree(journal->j_wbuf); kfree(journal); + + return err; } @@ -1692,10 +1727,16 @@ int jbd2_journal_flush(journal_t *journal) spin_lock(&journal->j_list_lock); while (!err && journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); + mutex_lock(&journal->j_checkpoint_mutex); err = jbd2_log_do_checkpoint(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); + + if (is_journal_aborted(journal)) + return -EIO; + jbd2_cleanup_journal_tail(journal); /* Finally, mark the journal as really needing no recovery. @@ -1717,7 +1758,7 @@ int jbd2_journal_flush(journal_t *journal) J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); spin_unlock(&journal->j_state_lock); - return err; + return 0; } /** @@ -1761,23 +1802,6 @@ int jbd2_journal_wipe(journal_t *journal, int write) } /* - * journal_dev_name: format a character string to describe on what - * device this journal is present. - */ - -static const char *journal_dev_name(journal_t *journal, char *buffer) -{ - struct block_device *bdev; - - if (journal->j_inode) - bdev = journal->j_inode->i_sb->s_bdev; - else - bdev = journal->j_dev; - - return bdevname(bdev, buffer); -} - -/* * Journal abort has very specific semantics, which we describe * for journal abort. * @@ -1793,13 +1817,12 @@ static const char *journal_dev_name(journal_t *journal, char *buffer) void __jbd2_journal_abort_hard(journal_t *journal) { transaction_t *transaction; - char b[BDEVNAME_SIZE]; if (journal->j_flags & JBD2_ABORT) return; printk(KERN_ERR "Aborting journal on device %s.\n", - journal_dev_name(journal, b)); + journal->j_devname); spin_lock(&journal->j_state_lock); journal->j_flags |= JBD2_ABORT; diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 058f50f65b7..73063285b13 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -225,7 +225,7 @@ do { \ */ int jbd2_journal_recover(journal_t *journal) { - int err; + int err, err2; journal_superblock_t * sb; struct recovery_info info; @@ -263,7 +263,10 @@ int jbd2_journal_recover(journal_t *journal) journal->j_transaction_sequence = ++info.end_transaction; jbd2_journal_clear_revoke(journal); - sync_blockdev(journal->j_fs_dev); + err2 = sync_blockdev(journal->j_fs_dev); + if (!err) + err = err2; + return err; } diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index e5d540588fa..39b7805a599 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -52,6 +52,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) transaction->t_expires = jiffies + journal->j_commit_interval; spin_lock_init(&transaction->t_handle_lock); INIT_LIST_HEAD(&transaction->t_inode_list); + INIT_LIST_HEAD(&transaction->t_private_list); /* Set up the commit timer for the new transaction. */ journal->j_commit_timer.expires = round_jiffies(transaction->t_expires); |