diff options
Diffstat (limited to 'fs/jbd/journal.c')
| -rw-r--r-- | fs/jbd/journal.c | 599 |
1 files changed, 390 insertions, 209 deletions
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 0e081d5f32e..06fe11e0abf 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -36,6 +36,10 @@ #include <linux/poison.h> #include <linux/proc_fs.h> #include <linux/debugfs.h> +#include <linux/ratelimit.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/jbd.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -68,12 +72,12 @@ EXPORT_SYMBOL(journal_set_features); EXPORT_SYMBOL(journal_create); EXPORT_SYMBOL(journal_load); EXPORT_SYMBOL(journal_destroy); -EXPORT_SYMBOL(journal_update_superblock); EXPORT_SYMBOL(journal_abort); EXPORT_SYMBOL(journal_errno); EXPORT_SYMBOL(journal_ack_err); EXPORT_SYMBOL(journal_clear_err); EXPORT_SYMBOL(log_wait_commit); +EXPORT_SYMBOL(log_start_commit); EXPORT_SYMBOL(journal_start_commit); EXPORT_SYMBOL(journal_force_commit_nested); EXPORT_SYMBOL(journal_wipe); @@ -84,6 +88,25 @@ EXPORT_SYMBOL(journal_force_commit); static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *); static void __journal_abort_soft (journal_t *journal, int errno); +static const char *journal_dev_name(journal_t *journal, char *buffer); + +#ifdef CONFIG_JBD_DEBUG +void __jbd_debug(int level, const char *file, const char *func, + unsigned int line, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (level > journal_enable_debug) + return; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); + va_end(args); +} +EXPORT_SYMBOL(__jbd_debug); +#endif /* * Helper function used to manage commit timeouts @@ -124,6 +147,8 @@ static int kjournald(void *arg) setup_timer(&journal->j_commit_timer, commit_timeout, (unsigned long)current); + set_freezable(); + /* Record that the journal thread is running */ journal->j_task = current; wake_up(&journal->j_wait_done_commit); @@ -161,7 +186,7 @@ loop: */ jbd_debug(1, "Now suspending kjournald\n"); spin_unlock(&journal->j_state_lock); - refrigerator(); + try_to_freeze(); spin_lock(&journal->j_state_lock); } else { /* @@ -277,7 +302,7 @@ static void journal_kill_thread(journal_t *journal) int journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, struct journal_head **jh_out, - unsigned long blocknr) + unsigned int blocknr) { int need_copy_out = 0; int done_copy_out = 0; @@ -288,6 +313,7 @@ int journal_write_metadata_buffer(transaction_t *transaction, struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); + journal_t *journal = transaction->t_journal; /* * The buffer really shouldn't be locked: only the current committing @@ -301,6 +327,9 @@ int journal_write_metadata_buffer(transaction_t *transaction, J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in)); new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL); + /* keep subsequent assertions sane */ + atomic_set(&new_bh->b_count, 1); + new_jh = journal_add_journal_head(new_bh); /* This sleeps */ /* * If a new transaction has already done a buffer copy-out, then @@ -317,7 +346,7 @@ repeat: new_offset = offset_in_page(jh2bh(jh_in)->b_data); } - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); /* * Check for escaping */ @@ -326,7 +355,7 @@ repeat: need_copy_out = 1; do_escape = 1; } - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); /* * Do we need to do a data copy? @@ -343,9 +372,9 @@ repeat: } jh_in->b_frozen_data = tmp; - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); new_page = virt_to_page(tmp); new_offset = offset_in_page(tmp); @@ -357,19 +386,11 @@ repeat: * copying, we can finally do so. */ if (do_escape) { - mapped_data = kmap_atomic(new_page, KM_USER0); + mapped_data = kmap_atomic(new_page); *((unsigned int *)(mapped_data + new_offset)) = 0; - kunmap_atomic(mapped_data, KM_USER0); + kunmap_atomic(mapped_data); } - /* keep subsequent assertions sane */ - new_bh->b_state = 0; - init_buffer(new_bh, NULL, NULL); - atomic_set(&new_bh->b_count, 1); - jbd_unlock_bh_state(bh_in); - - new_jh = journal_add_journal_head(new_bh); /* This sleeps */ - set_bh_page(new_bh, new_page, new_offset); new_jh->b_transaction = NULL; new_bh->b_size = jh2bh(jh_in)->b_size; @@ -386,7 +407,11 @@ repeat: * copying is moved to the transaction's shadow queue. */ JBUFFER_TRACE(jh_in, "file as BJ_Shadow"); - journal_file_buffer(jh_in, transaction, BJ_Shadow); + spin_lock(&journal->j_list_lock); + __journal_file_buffer(jh_in, transaction, BJ_Shadow); + spin_unlock(&journal->j_list_lock); + jbd_unlock_bh_state(bh_in); + JBUFFER_TRACE(new_jh, "file as BJ_IO"); journal_file_buffer(new_jh, transaction, BJ_IO); @@ -428,16 +453,20 @@ int __log_space_left(journal_t *journal) } /* - * Called under j_state_lock. Returns true if a transaction was started. + * Called under j_state_lock. Returns true if a transaction commit was started. */ int __log_start_commit(journal_t *journal, tid_t target) { /* - * Are we already doing a recent enough commit? + * The only transaction we can possibly wait upon is the + * currently running transaction (if it exists). Otherwise, + * the target tid must be an old one. */ - if (!tid_geq(journal->j_commit_request, target)) { + if (journal->j_commit_request != target && + journal->j_running_transaction && + journal->j_running_transaction->t_tid == target) { /* - * We want a new commit: OK, mark the request and wakup the + * We want a new commit: OK, mark the request and wakeup the * commit thread. We do _not_ do the commit ourselves. */ @@ -447,7 +476,14 @@ int __log_start_commit(journal_t *journal, tid_t target) journal->j_commit_sequence); wake_up(&journal->j_wait_commit); return 1; - } + } else if (!tid_geq(journal->j_commit_request, target)) + /* This should never happen, but if it does, preserve + the evidence before kjournald goes into a loop and + increments j_commit_sequence beyond all recognition. */ + WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n", + journal->j_commit_request, journal->j_commit_sequence, + target, journal->j_running_transaction ? + journal->j_running_transaction->t_tid : 0); return 0; } @@ -496,7 +532,8 @@ int journal_force_commit_nested(journal_t *journal) /* * Start a commit of the current running transaction (if any). Returns true - * if a transaction was started, and fills its tid in at *ptid + * if a transaction is going to be committed (or is currently already + * committing), and fills its tid in at *ptid */ int journal_start_commit(journal_t *journal, tid_t *ptid) { @@ -506,15 +543,19 @@ int journal_start_commit(journal_t *journal, tid_t *ptid) if (journal->j_running_transaction) { tid_t tid = journal->j_running_transaction->t_tid; - ret = __log_start_commit(journal, tid); - if (ret && ptid) + __log_start_commit(journal, tid); + /* There's a running transaction and we've just made sure + * it's commit has been scheduled. */ + if (ptid) *ptid = tid; - } else if (journal->j_committing_transaction && ptid) { + ret = 1; + } else if (journal->j_committing_transaction) { /* - * If ext3_write_super() recently started a commit, then we - * have to wait for completion of that transaction + * If commit has been started, then we have to wait for + * completion of that transaction. */ - *ptid = journal->j_committing_transaction->t_tid; + if (ptid) + *ptid = journal->j_committing_transaction->t_tid; ret = 1; } spin_unlock(&journal->j_state_lock); @@ -532,13 +573,25 @@ int log_wait_commit(journal_t *journal, tid_t tid) #ifdef CONFIG_JBD_DEBUG spin_lock(&journal->j_state_lock); if (!tid_geq(journal->j_commit_request, tid)) { - printk(KERN_EMERG + printk(KERN_ERR "%s: error: j_commit_request=%d, tid=%d\n", - __FUNCTION__, journal->j_commit_request, tid); + __func__, journal->j_commit_request, tid); } spin_unlock(&journal->j_state_lock); #endif spin_lock(&journal->j_state_lock); + /* + * Not running or committing trans? Must be already committed. This + * saves us from waiting for a *long* time when tid overflows. + */ + if (!((journal->j_running_transaction && + journal->j_running_transaction->t_tid == tid) || + (journal->j_committing_transaction && + journal->j_committing_transaction->t_tid == tid))) + goto out_unlock; + + if (!tid_geq(journal->j_commit_waited, tid)) + journal->j_commit_waited = tid; while (tid_gt(tid, journal->j_commit_sequence)) { jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n", tid, journal->j_commit_sequence); @@ -548,22 +601,53 @@ int log_wait_commit(journal_t *journal, tid_t tid) !tid_gt(tid, journal->j_commit_sequence)); spin_lock(&journal->j_state_lock); } +out_unlock: spin_unlock(&journal->j_state_lock); - if (unlikely(is_journal_aborted(journal))) { - printk(KERN_EMERG "journal commit I/O error\n"); + if (unlikely(is_journal_aborted(journal))) err = -EIO; - } return err; } /* + * Return 1 if a given transaction has not yet sent barrier request + * connected with a transaction commit. If 0 is returned, transaction + * may or may not have sent the barrier. Used to avoid sending barrier + * twice in common cases. + */ +int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid) +{ + int ret = 0; + transaction_t *commit_trans; + + if (!(journal->j_flags & JFS_BARRIER)) + return 0; + spin_lock(&journal->j_state_lock); + /* Transaction already committed? */ + if (tid_geq(journal->j_commit_sequence, tid)) + goto out; + /* + * Transaction is being committed and we already proceeded to + * writing commit record? + */ + commit_trans = journal->j_committing_transaction; + if (commit_trans && commit_trans->t_tid == tid && + commit_trans->t_state >= T_COMMIT_RECORD) + goto out; + ret = 1; +out: + spin_unlock(&journal->j_state_lock); + return ret; +} +EXPORT_SYMBOL(journal_trans_will_send_data_barrier); + +/* * Log buffer allocation routines: */ -int journal_next_log_block(journal_t *journal, unsigned long *retp) +int journal_next_log_block(journal_t *journal, unsigned int *retp) { - unsigned long blocknr; + unsigned int blocknr; spin_lock(&journal->j_state_lock); J_ASSERT(journal->j_free > 1); @@ -584,11 +668,11 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp) * this is a no-op. If needed, we can use j_blk_offset - everything is * ready. */ -int journal_bmap(journal_t *journal, unsigned long blocknr, - unsigned long *retp) +int journal_bmap(journal_t *journal, unsigned int blocknr, + unsigned int *retp) { int err = 0; - unsigned long ret; + unsigned int ret; if (journal->j_inode) { ret = bmap(journal->j_inode, blocknr); @@ -598,8 +682,8 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, char b[BDEVNAME_SIZE]; printk(KERN_ALERT "%s: journal block not found " - "at offset %lu on %s\n", - __FUNCTION__, + "at offset %u on %s\n", + __func__, blocknr, bdevname(journal->j_dev, b)); err = -EIO; @@ -624,7 +708,7 @@ int journal_bmap(journal_t *journal, unsigned long blocknr, struct journal_head *journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; - unsigned long blocknr; + unsigned int blocknr; int err; err = journal_next_log_block(journal, &blocknr); @@ -633,6 +717,8 @@ struct journal_head *journal_get_descriptor_buffer(journal_t *journal) return NULL; bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (!bh) + return NULL; lock_buffer(bh); memset(bh->b_data, 0, journal->j_blocksize); set_buffer_uptodate(bh); @@ -665,7 +751,6 @@ static journal_t * journal_init_common (void) init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); - mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); spin_lock_init(&journal->j_list_lock); @@ -727,11 +812,9 @@ journal_t * journal_init_dev(struct block_device *bdev, journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); - kfree(journal); - journal = NULL; - goto out; + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", + __func__); + goto out_err; } journal->j_dev = bdev; journal->j_fs_dev = fs_dev; @@ -739,11 +822,20 @@ journal_t * journal_init_dev(struct block_device *bdev, journal->j_maxlen = len; bh = __getblk(journal->j_dev, start, journal->j_blocksize); - J_ASSERT(bh != NULL); + if (!bh) { + printk(KERN_ERR + "%s: Cannot get buffer for journal superblock\n", + __func__); + goto out_err; + } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; -out: + return journal; +out_err: + kfree(journal->j_wbuf); + kfree(journal); + return NULL; } /** @@ -760,7 +852,7 @@ journal_t * journal_init_inode (struct inode *inode) journal_t *journal = journal_init_common(); int err; int n; - unsigned long blocknr; + unsigned int blocknr; if (!journal) return NULL; @@ -781,27 +873,34 @@ journal_t * journal_init_inode (struct inode *inode) journal->j_wbufsize = n; journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL); if (!journal->j_wbuf) { - printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n", - __FUNCTION__); - kfree(journal); - return NULL; + printk(KERN_ERR "%s: Can't allocate bhs for commit thread\n", + __func__); + goto out_err; } err = journal_bmap(journal, 0, &blocknr); /* If that failed, give up */ if (err) { - printk(KERN_ERR "%s: Cannnot locate journal superblock\n", - __FUNCTION__); - kfree(journal); - return NULL; + printk(KERN_ERR "%s: Cannot locate journal superblock\n", + __func__); + goto out_err; } bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); - J_ASSERT(bh != NULL); + if (!bh) { + printk(KERN_ERR + "%s: Cannot get buffer for journal superblock\n", + __func__); + goto out_err; + } journal->j_sb_buffer = bh; journal->j_superblock = (journal_superblock_t *)bh->b_data; return journal; +out_err: + kfree(journal->j_wbuf); + kfree(journal); + return NULL; } /* @@ -826,10 +925,16 @@ static void journal_fail_superblock (journal_t *journal) static int journal_reset(journal_t *journal) { journal_superblock_t *sb = journal->j_superblock; - unsigned long first, last; + unsigned int first, last; first = be32_to_cpu(sb->s_first); last = be32_to_cpu(sb->s_maxlen); + if (first + JFS_MIN_JOURNAL_BLOCKS > last + 1) { + printk(KERN_ERR "JBD: Journal too short (blocks %u-%u).\n", + first, last); + journal_fail_superblock(journal); + return -EINVAL; + } journal->j_first = first; journal->j_last = last; @@ -844,8 +949,33 @@ static int journal_reset(journal_t *journal) journal->j_max_transaction_buffers = journal->j_maxlen / 4; - /* Add the dynamic fields and write it to disk. */ - journal_update_superblock(journal, 1); + /* + * As a special case, if the on-disk copy is already marked as needing + * no recovery (s_start == 0), then we can safely defer the superblock + * update until the next commit by setting JFS_FLUSHED. This avoids + * attempting a write to a potential-readonly device. + */ + if (sb->s_start == 0) { + jbd_debug(1,"JBD: Skipping superblock update on recovered sb " + "(start %u, seq %d, errno %d)\n", + journal->j_tail, journal->j_tail_sequence, + journal->j_errno); + journal->j_flags |= JFS_FLUSHED; + } else { + /* Lock here to make assertions happy... */ + mutex_lock(&journal->j_checkpoint_mutex); + /* + * Update log tail information. We use WRITE_FUA since new + * transaction will start reusing journal space and so we + * must make sure information about current log tail is on + * disk before that. + */ + journal_update_sb_log_tail(journal, + journal->j_tail_sequence, + journal->j_tail, + WRITE_FUA); + mutex_unlock(&journal->j_checkpoint_mutex); + } return journal_start_thread(journal); } @@ -859,7 +989,7 @@ static int journal_reset(journal_t *journal) **/ int journal_create(journal_t *journal) { - unsigned long blocknr; + unsigned int blocknr; struct buffer_head *bh; journal_superblock_t *sb; int i, err; @@ -877,7 +1007,7 @@ int journal_create(journal_t *journal) */ printk(KERN_EMERG "%s: creation of journal on external device!\n", - __FUNCTION__); + __func__); BUG(); } @@ -889,6 +1019,8 @@ int journal_create(journal_t *journal) if (err) return err; bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); + if (unlikely(!bh)) + return -ENOMEM; lock_buffer(bh); memset (bh->b_data, 0, journal->j_blocksize); BUFFER_TRACE(bh, "marking dirty"); @@ -920,62 +1052,131 @@ int journal_create(journal_t *journal) return journal_reset(journal); } +static void journal_write_superblock(journal_t *journal, int write_op) +{ + struct buffer_head *bh = journal->j_sb_buffer; + int ret; + + trace_journal_write_superblock(journal, write_op); + if (!(journal->j_flags & JFS_BARRIER)) + write_op &= ~(REQ_FUA | REQ_FLUSH); + lock_buffer(bh); + if (buffer_write_io_error(bh)) { + char b[BDEVNAME_SIZE]; + /* + * Oh, dear. A previous attempt to write the journal + * superblock failed. This could happen because the + * USB device was yanked out. Or it could happen to + * be a transient write error and maybe the block will + * be remapped. Nothing we can do but to retry the + * write and hope for the best. + */ + printk(KERN_ERR "JBD: previous I/O error detected " + "for journal superblock update for %s.\n", + journal_dev_name(journal, b)); + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + } + + get_bh(bh); + bh->b_end_io = end_buffer_write_sync; + ret = submit_bh(write_op, bh); + wait_on_buffer(bh); + if (buffer_write_io_error(bh)) { + clear_buffer_write_io_error(bh); + set_buffer_uptodate(bh); + ret = -EIO; + } + if (ret) { + char b[BDEVNAME_SIZE]; + printk(KERN_ERR "JBD: Error %d detected " + "when updating journal superblock for %s.\n", + ret, journal_dev_name(journal, b)); + } +} + /** - * void journal_update_superblock() - Update journal sb on disk. + * journal_update_sb_log_tail() - Update log tail in journal sb on disk. * @journal: The journal to update. - * @wait: Set to '0' if you don't want to wait for IO completion. + * @tail_tid: TID of the new transaction at the tail of the log + * @tail_block: The first block of the transaction at the tail of the log + * @write_op: With which operation should we write the journal sb * - * Update a journal's dynamic superblock fields and write it to disk, - * optionally waiting for the IO to complete. + * Update a journal's superblock information about log tail and write it to + * disk, waiting for the IO to complete. */ -void journal_update_superblock(journal_t *journal, int wait) +void journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid, + unsigned int tail_block, int write_op) { journal_superblock_t *sb = journal->j_superblock; - struct buffer_head *bh = journal->j_sb_buffer; - /* - * As a special case, if the on-disk copy is already marked as needing - * no recovery (s_start == 0) and there are no outstanding transactions - * in the filesystem, then we can safely defer the superblock update - * until the next commit by setting JFS_FLUSHED. This avoids - * attempting a write to a potential-readonly device. - */ - if (sb->s_start == 0 && journal->j_tail_sequence == - journal->j_transaction_sequence) { - jbd_debug(1,"JBD: Skipping superblock update on recovered sb " - "(start %ld, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, - journal->j_errno); - goto out; - } + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); + jbd_debug(1,"JBD: updating superblock (start %u, seq %u)\n", + tail_block, tail_tid); + sb->s_sequence = cpu_to_be32(tail_tid); + sb->s_start = cpu_to_be32(tail_block); + + journal_write_superblock(journal, write_op); + + /* Log is no longer empty */ spin_lock(&journal->j_state_lock); - jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n", - journal->j_tail, journal->j_tail_sequence, journal->j_errno); + WARN_ON(!sb->s_sequence); + journal->j_flags &= ~JFS_FLUSHED; + spin_unlock(&journal->j_state_lock); +} + +/** + * mark_journal_empty() - Mark on disk journal as empty. + * @journal: The journal to update. + * + * Update a journal's dynamic superblock fields to show that journal is empty. + * Write updated superblock to disk waiting for IO to complete. + */ +static void mark_journal_empty(journal_t *journal) +{ + journal_superblock_t *sb = journal->j_superblock; + + BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex)); + spin_lock(&journal->j_state_lock); + /* Is it already empty? */ + if (sb->s_start == 0) { + spin_unlock(&journal->j_state_lock); + return; + } + jbd_debug(1, "JBD: Marking journal as empty (seq %d)\n", + journal->j_tail_sequence); sb->s_sequence = cpu_to_be32(journal->j_tail_sequence); - sb->s_start = cpu_to_be32(journal->j_tail); - sb->s_errno = cpu_to_be32(journal->j_errno); + sb->s_start = cpu_to_be32(0); spin_unlock(&journal->j_state_lock); - BUFFER_TRACE(bh, "marking dirty"); - mark_buffer_dirty(bh); - if (wait) - sync_dirty_buffer(bh); - else - ll_rw_block(SWRITE, 1, &bh); + journal_write_superblock(journal, WRITE_FUA); -out: - /* If we have just flushed the log (by marking s_start==0), then - * any future commit will have to be careful to update the - * superblock again to re-record the true start of the log. */ + spin_lock(&journal->j_state_lock); + /* Log is empty */ + journal->j_flags |= JFS_FLUSHED; + spin_unlock(&journal->j_state_lock); +} + +/** + * journal_update_sb_errno() - Update error in the journal. + * @journal: The journal to update. + * + * Update a journal's errno. Write updated superblock to disk waiting for IO + * to complete. + */ +static void journal_update_sb_errno(journal_t *journal) +{ + journal_superblock_t *sb = journal->j_superblock; spin_lock(&journal->j_state_lock); - if (sb->s_start) - journal->j_flags &= ~JFS_FLUSHED; - else - journal->j_flags |= JFS_FLUSHED; + jbd_debug(1, "JBD: updating superblock error (errno %d)\n", + journal->j_errno); + sb->s_errno = cpu_to_be32(journal->j_errno); spin_unlock(&journal->j_state_lock); + + journal_write_superblock(journal, WRITE_SYNC); } /* @@ -1031,6 +1232,14 @@ static int journal_get_superblock(journal_t *journal) goto out; } + if (be32_to_cpu(sb->s_first) == 0 || + be32_to_cpu(sb->s_first) >= journal->j_maxlen) { + printk(KERN_WARNING + "JBD: Invalid start block of journal: %u\n", + be32_to_cpu(sb->s_first)); + goto out; + } + return 0; out: @@ -1122,9 +1331,13 @@ recovery_error: * * Release a journal_t structure once it is no longer in use by the * journaled object. + * Return <0 if we couldn't clean up the journal. */ -void journal_destroy(journal_t *journal) +int journal_destroy(journal_t *journal) { + int err = 0; + + /* Wait for the commit thread to wake up and die. */ journal_kill_thread(journal); @@ -1134,6 +1347,8 @@ void journal_destroy(journal_t *journal) /* Force any old transactions to disk */ + /* We cannot race with anybody but must keep assertions happy */ + mutex_lock(&journal->j_checkpoint_mutex); /* Totally anal locking here... */ spin_lock(&journal->j_list_lock); while (journal->j_checkpoint_transactions != NULL) { @@ -1147,13 +1362,16 @@ void journal_destroy(journal_t *journal) J_ASSERT(journal->j_checkpoint_transactions == NULL); spin_unlock(&journal->j_list_lock); - /* We can now mark the journal as empty. */ - journal->j_tail = 0; - journal->j_tail_sequence = ++journal->j_transaction_sequence; if (journal->j_sb_buffer) { - journal_update_superblock(journal, 1); + if (!is_journal_aborted(journal)) { + journal->j_tail_sequence = + ++journal->j_transaction_sequence; + mark_journal_empty(journal); + } else + err = -EIO; brelse(journal->j_sb_buffer); } + mutex_unlock(&journal->j_checkpoint_mutex); if (journal->j_inode) iput(journal->j_inode); @@ -1161,6 +1379,8 @@ void journal_destroy(journal_t *journal) journal_destroy_revoke(journal); kfree(journal->j_wbuf); kfree(journal); + + return err; } @@ -1209,13 +1429,9 @@ int journal_check_used_features (journal_t *journal, unsigned long compat, int journal_check_available_features (journal_t *journal, unsigned long compat, unsigned long ro, unsigned long incompat) { - journal_superblock_t *sb; - if (!compat && !ro && !incompat) return 1; - sb = journal->j_superblock; - /* We can support any known requested features iff the * superblock is in version 2. Otherwise we fail to support any * extended sb features. */ @@ -1335,7 +1551,6 @@ int journal_flush(journal_t *journal) { int err = 0; transaction_t *transaction = NULL; - unsigned long old_tail; spin_lock(&journal->j_state_lock); @@ -1360,10 +1575,17 @@ int journal_flush(journal_t *journal) spin_lock(&journal->j_list_lock); while (!err && journal->j_checkpoint_transactions != NULL) { spin_unlock(&journal->j_list_lock); + mutex_lock(&journal->j_checkpoint_mutex); err = log_do_checkpoint(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_list_lock); } spin_unlock(&journal->j_list_lock); + + if (is_journal_aborted(journal)) + return -EIO; + + mutex_lock(&journal->j_checkpoint_mutex); cleanup_journal_tail(journal); /* Finally, mark the journal as really needing no recovery. @@ -1371,21 +1593,16 @@ int journal_flush(journal_t *journal) * the magic code for a fully-recovered superblock. Any future * commits of data to the journal will restore the current * s_start value. */ + mark_journal_empty(journal); + mutex_unlock(&journal->j_checkpoint_mutex); spin_lock(&journal->j_state_lock); - old_tail = journal->j_tail; - journal->j_tail = 0; - spin_unlock(&journal->j_state_lock); - journal_update_superblock(journal, 1); - spin_lock(&journal->j_state_lock); - journal->j_tail = old_tail; - J_ASSERT(!journal->j_running_transaction); J_ASSERT(!journal->j_committing_transaction); J_ASSERT(!journal->j_checkpoint_transactions); J_ASSERT(journal->j_head == journal->j_tail); J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence); spin_unlock(&journal->j_state_lock); - return err; + return 0; } /** @@ -1403,7 +1620,6 @@ int journal_flush(journal_t *journal) int journal_wipe(journal_t *journal, int write) { - journal_superblock_t *sb; int err = 0; J_ASSERT (!(journal->j_flags & JFS_LOADED)); @@ -1412,8 +1628,6 @@ int journal_wipe(journal_t *journal, int write) if (err) return err; - sb = journal->j_superblock; - if (!journal->j_tail) goto no_recovery; @@ -1421,8 +1635,12 @@ int journal_wipe(journal_t *journal, int write) write ? "Clearing" : "Ignoring"); err = journal_skip_recovery(journal); - if (write) - journal_update_superblock(journal, 1); + if (write) { + /* Lock to make assertions happy... */ + mutex_lock(&journal->j_checkpoint_mutex); + mark_journal_empty(journal); + mutex_unlock(&journal->j_checkpoint_mutex); + } no_recovery: return err; @@ -1490,7 +1708,7 @@ static void __journal_abort_soft (journal_t *journal, int errno) __journal_abort_hard(journal); if (errno) - journal_update_superblock(journal, 1); + journal_update_sb_errno(journal); } /** @@ -1636,9 +1854,10 @@ static int journal_init_journal_head_cache(void) static void journal_destroy_journal_head_cache(void) { - J_ASSERT(journal_head_cache != NULL); - kmem_cache_destroy(journal_head_cache); - journal_head_cache = NULL; + if (journal_head_cache) { + kmem_cache_destroy(journal_head_cache); + journal_head_cache = NULL; + } } /* @@ -1647,22 +1866,19 @@ static void journal_destroy_journal_head_cache(void) static struct journal_head *journal_alloc_journal_head(void) { struct journal_head *ret; - static unsigned long last_warning; #ifdef CONFIG_JBD_DEBUG atomic_inc(&nr_journal_heads); #endif - ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); + ret = kmem_cache_zalloc(journal_head_cache, GFP_NOFS); if (ret == NULL) { jbd_debug(1, "out of memory for journal_head\n"); - if (time_after(jiffies, last_warning + 5*HZ)) { - printk(KERN_NOTICE "ENOMEM in %s, retrying.\n", - __FUNCTION__); - last_warning = jiffies; - } + printk_ratelimited(KERN_NOTICE "ENOMEM in %s, retrying.\n", + __func__); + while (ret == NULL) { yield(); - ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS); + ret = kmem_cache_zalloc(journal_head_cache, GFP_NOFS); } } return ret; @@ -1691,10 +1907,9 @@ static void journal_free_journal_head(struct journal_head *jh) * When a buffer has its BH_JBD bit set it is immune from being released by * core kernel code, mainly via ->b_count. * - * A journal_head may be detached from its buffer_head when the journal_head's - * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL. - * Various places in JBD call journal_remove_journal_head() to indicate that the - * journal_head can be dropped if needed. + * A journal_head is detached from its buffer_head when the journal_head's + * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint + * transaction (b_cp_transaction) hold their references to b_jcount. * * Various places in the kernel want to attach a journal_head to a buffer_head * _before_ attaching the journal_head to a transaction. To protect the @@ -1707,17 +1922,16 @@ static void journal_free_journal_head(struct journal_head *jh) * (Attach a journal_head if needed. Increments b_jcount) * struct journal_head *jh = journal_add_journal_head(bh); * ... - * jh->b_transaction = xxx; - * journal_put_journal_head(jh); - * - * Now, the journal_head's b_jcount is zero, but it is safe from being released - * because it has a non-zero b_transaction. + * (Get another reference for transaction) + * journal_grab_journal_head(bh); + * jh->b_transaction = xxx; + * (Put original reference) + * journal_put_journal_head(jh); */ /* * Give a buffer_head a journal_head. * - * Doesn't need the journal lock. * May sleep. */ struct journal_head *journal_add_journal_head(struct buffer_head *bh) @@ -1726,10 +1940,8 @@ struct journal_head *journal_add_journal_head(struct buffer_head *bh) struct journal_head *new_jh = NULL; repeat: - if (!buffer_jbd(bh)) { + if (!buffer_jbd(bh)) new_jh = journal_alloc_journal_head(); - memset(new_jh, 0, sizeof(*new_jh)); - } jbd_lock_bh_journal_head(bh); if (buffer_jbd(bh)) { @@ -1781,61 +1993,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh) struct journal_head *jh = bh2jh(bh); J_ASSERT_JH(jh, jh->b_jcount >= 0); - - get_bh(bh); - if (jh->b_jcount == 0) { - if (jh->b_transaction == NULL && - jh->b_next_transaction == NULL && - jh->b_cp_transaction == NULL) { - J_ASSERT_JH(jh, jh->b_jlist == BJ_None); - J_ASSERT_BH(bh, buffer_jbd(bh)); - J_ASSERT_BH(bh, jh2bh(jh) == bh); - BUFFER_TRACE(bh, "remove journal_head"); - if (jh->b_frozen_data) { - printk(KERN_WARNING "%s: freeing " - "b_frozen_data\n", - __FUNCTION__); - jbd_free(jh->b_frozen_data, bh->b_size); - } - if (jh->b_committed_data) { - printk(KERN_WARNING "%s: freeing " - "b_committed_data\n", - __FUNCTION__); - jbd_free(jh->b_committed_data, bh->b_size); - } - bh->b_private = NULL; - jh->b_bh = NULL; /* debug, really */ - clear_buffer_jbd(bh); - __brelse(bh); - journal_free_journal_head(jh); - } else { - BUFFER_TRACE(bh, "journal_head was locked"); - } + J_ASSERT_JH(jh, jh->b_transaction == NULL); + J_ASSERT_JH(jh, jh->b_next_transaction == NULL); + J_ASSERT_JH(jh, jh->b_cp_transaction == NULL); + J_ASSERT_JH(jh, jh->b_jlist == BJ_None); + J_ASSERT_BH(bh, buffer_jbd(bh)); + J_ASSERT_BH(bh, jh2bh(jh) == bh); + BUFFER_TRACE(bh, "remove journal_head"); + if (jh->b_frozen_data) { + printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__); + jbd_free(jh->b_frozen_data, bh->b_size); } + if (jh->b_committed_data) { + printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__); + jbd_free(jh->b_committed_data, bh->b_size); + } + bh->b_private = NULL; + jh->b_bh = NULL; /* debug, really */ + clear_buffer_jbd(bh); + journal_free_journal_head(jh); } /* - * journal_remove_journal_head(): if the buffer isn't attached to a transaction - * and has a zero b_jcount then remove and release its journal_head. If we did - * see that the buffer is not used by any transaction we also "logically" - * decrement ->b_count. - * - * We in fact take an additional increment on ->b_count as a convenience, - * because the caller usually wants to do additional things with the bh - * after calling here. - * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some - * time. Once the caller has run __brelse(), the buffer is eligible for - * reaping by try_to_free_buffers(). - */ -void journal_remove_journal_head(struct buffer_head *bh) -{ - jbd_lock_bh_journal_head(bh); - __journal_remove_journal_head(bh); - jbd_unlock_bh_journal_head(bh); -} - -/* - * Drop a reference on the passed journal_head. If it fell to zero then try to + * Drop a reference on the passed journal_head. If it fell to zero then * release the journal_head from the buffer_head. */ void journal_put_journal_head(struct journal_head *jh) @@ -1845,11 +2025,12 @@ void journal_put_journal_head(struct journal_head *jh) jbd_lock_bh_journal_head(bh); J_ASSERT_JH(jh, jh->b_jcount > 0); --jh->b_jcount; - if (!jh->b_jcount && !jh->b_transaction) { + if (!jh->b_jcount) { __journal_remove_journal_head(bh); + jbd_unlock_bh_journal_head(bh); __brelse(bh); - } - jbd_unlock_bh_journal_head(bh); + } else + jbd_unlock_bh_journal_head(bh); } /* @@ -1867,7 +2048,7 @@ static void __init jbd_create_debugfs_entry(void) { jbd_debugfs_dir = debugfs_create_dir("jbd", NULL); if (jbd_debugfs_dir) - jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO, + jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO | S_IWUSR, jbd_debugfs_dir, &journal_enable_debug); } @@ -1953,7 +2134,7 @@ static void __exit journal_exit(void) #ifdef CONFIG_JBD_DEBUG int n = atomic_read(&nr_journal_heads); if (n) - printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); + printk(KERN_ERR "JBD: leaked %d journal_heads!\n", n); #endif jbd_remove_debugfs_entry(); journal_destroy_caches(); |
