diff options
Diffstat (limited to 'fs/jbd2')
| -rw-r--r-- | fs/jbd2/Kconfig | 6 | ||||
| -rw-r--r-- | fs/jbd2/checkpoint.c | 22 | ||||
| -rw-r--r-- | fs/jbd2/commit.c | 289 | ||||
| -rw-r--r-- | fs/jbd2/journal.c | 300 | ||||
| -rw-r--r-- | fs/jbd2/recovery.c | 33 | ||||
| -rw-r--r-- | fs/jbd2/revoke.c | 49 | ||||
| -rw-r--r-- | fs/jbd2/transaction.c | 634 |
7 files changed, 752 insertions, 581 deletions
diff --git a/fs/jbd2/Kconfig b/fs/jbd2/Kconfig index 69a48c2944d..5a9f5534d57 100644 --- a/fs/jbd2/Kconfig +++ b/fs/jbd2/Kconfig @@ -20,7 +20,7 @@ config JBD2 config JBD2_DEBUG bool "JBD2 (ext4) debugging support" - depends on JBD2 && DEBUG_FS + depends on JBD2 help If you are using the ext4 journaled file system (or potentially any other filesystem/device using JBD2), this option @@ -29,7 +29,7 @@ config JBD2_DEBUG By default, the debugging output will be turned off. If you select Y here, then you will be able to turn on debugging - with "echo N > /sys/kernel/debug/jbd2/jbd2-debug", where N is a + with "echo N > /sys/module/jbd2/parameters/jbd2_debug", where N is a number between 1 and 5. The higher the number, the more debugging output is generated. To turn debugging off again, do - "echo 0 > /sys/kernel/debug/jbd2/jbd2-debug". + "echo 0 > /sys/module/jbd2/parameters/jbd2_debug". diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index c78841ee81c..7f34f471616 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -120,8 +120,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) int nblocks, space_left; /* assert_spin_locked(&journal->j_state_lock); */ - nblocks = jbd_space_needed(journal); - while (__jbd2_log_space_left(journal) < nblocks) { + nblocks = jbd2_space_needed(journal); + while (jbd2_log_space_left(journal) < nblocks) { if (journal->j_flags & JBD2_ABORT) return; write_unlock(&journal->j_state_lock); @@ -140,8 +140,8 @@ void __jbd2_log_wait_for_space(journal_t *journal) */ write_lock(&journal->j_state_lock); spin_lock(&journal->j_list_lock); - nblocks = jbd_space_needed(journal); - space_left = __jbd2_log_space_left(journal); + nblocks = jbd2_space_needed(journal); + space_left = jbd2_log_space_left(journal); if (space_left < nblocks) { int chkpt = journal->j_checkpoint_transactions != NULL; tid_t tid = 0; @@ -156,7 +156,15 @@ void __jbd2_log_wait_for_space(journal_t *journal) /* We were able to recover space; yay! */ ; } else if (tid) { + /* + * jbd2_journal_commit_transaction() may want + * to take the checkpoint_mutex if JBD2_FLUSHED + * is set. So we need to temporarily drop it. + */ + mutex_unlock(&journal->j_checkpoint_mutex); jbd2_log_wait_commit(journal, tid); + write_lock(&journal->j_state_lock); + continue; } else { printk(KERN_ERR "%s: needed %d blocks and " "only had %d space available\n", @@ -625,10 +633,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh) __jbd2_journal_drop_transaction(journal, transaction); jbd2_journal_free_transaction(transaction); - - /* Just in case anybody was waiting for more transactions to be - checkpointed... */ - wake_up(&journal->j_wait_logspace); ret = 1; out: return ret; @@ -690,9 +694,7 @@ void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transact J_ASSERT(transaction->t_state == T_FINISHED); J_ASSERT(transaction->t_buffers == NULL); J_ASSERT(transaction->t_forget == NULL); - J_ASSERT(transaction->t_iobuf_list == NULL); J_ASSERT(transaction->t_shadow_list == NULL); - J_ASSERT(transaction->t_log_list == NULL); J_ASSERT(transaction->t_checkpoint_list == NULL); J_ASSERT(transaction->t_checkpoint_io_list == NULL); J_ASSERT(atomic_read(&transaction->t_updates) == 0); diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 3091d42992f..6fac7434985 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -30,15 +30,22 @@ #include <trace/events/jbd2.h> /* - * Default IO end handler for temporary BJ_IO buffer_heads. + * IO end handler for temporary buffer_heads handling writes to the journal. */ static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) { + struct buffer_head *orig_bh = bh->b_private; + BUFFER_TRACE(bh, ""); if (uptodate) set_buffer_uptodate(bh); else clear_buffer_uptodate(bh); + if (orig_bh) { + clear_bit_unlock(BH_Shadow, &orig_bh->b_state); + smp_mb__after_atomic(); + wake_up_bit(&orig_bh->b_state, BH_Shadow); + } unlock_buffer(bh); } @@ -85,8 +92,7 @@ nope: __brelse(bh); } -static void jbd2_commit_block_csum_set(journal_t *j, - struct journal_head *descriptor) +static void jbd2_commit_block_csum_set(journal_t *j, struct buffer_head *bh) { struct commit_header *h; __u32 csum; @@ -94,12 +100,11 @@ static void jbd2_commit_block_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - h = (struct commit_header *)(jh2bh(descriptor)->b_data); + h = (struct commit_header *)(bh->b_data); h->h_chksum_type = 0; h->h_chksum_size = 0; h->h_chksum[0] = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); h->h_chksum[0] = cpu_to_be32(csum); } @@ -116,7 +121,6 @@ static int journal_submit_commit_record(journal_t *journal, struct buffer_head **cbh, __u32 crc32_sum) { - struct journal_head *descriptor; struct commit_header *tmp; struct buffer_head *bh; int ret; @@ -127,12 +131,10 @@ static int journal_submit_commit_record(journal_t *journal, if (is_journal_aborted(journal)) return 0; - descriptor = jbd2_journal_get_descriptor_buffer(journal); - if (!descriptor) + bh = jbd2_journal_get_descriptor_buffer(journal); + if (!bh) return 1; - bh = jh2bh(descriptor); - tmp = (struct commit_header *)bh->b_data; tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); @@ -146,9 +148,9 @@ static int journal_submit_commit_record(journal_t *journal, tmp->h_chksum_size = JBD2_CRC32_CHKSUM_SIZE; tmp->h_chksum[0] = cpu_to_be32(crc32_sum); } - jbd2_commit_block_csum_set(journal, descriptor); + jbd2_commit_block_csum_set(journal, bh); - JBUFFER_TRACE(descriptor, "submit commit block"); + BUFFER_TRACE(bh, "submit commit block"); lock_buffer(bh); clear_buffer_dirty(bh); set_buffer_uptodate(bh); @@ -180,7 +182,6 @@ static int journal_wait_on_commit_record(journal_t *journal, if (unlikely(!buffer_uptodate(bh))) ret = -EIO; put_bh(bh); /* One for getblk() */ - jbd2_journal_put_journal_head(bh2jh(bh)); return ret; } @@ -238,7 +239,7 @@ static int journal_submit_data_buffers(journal_t *journal, spin_lock(&journal->j_list_lock); J_ASSERT(jinode->i_transaction == commit_transaction); clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } spin_unlock(&journal->j_list_lock); @@ -276,7 +277,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal, } spin_lock(&journal->j_list_lock); clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags); - smp_mb__after_clear_bit(); + smp_mb__after_atomic(); wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING); } @@ -321,7 +322,7 @@ static void write_tag_block(int tag_bytes, journal_block_tag_t *tag, } static void jbd2_descr_block_csum_set(journal_t *j, - struct journal_head *descriptor) + struct buffer_head *bh) { struct jbd2_journal_block_tail *tail; __u32 csum; @@ -329,12 +330,10 @@ static void jbd2_descr_block_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - tail = (struct jbd2_journal_block_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - + tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); tail->t_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); tail->t_checksum = cpu_to_be32(csum); } @@ -343,20 +342,21 @@ static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag, { struct page *page = bh->b_page; __u8 *addr; - __u32 csum; + __u32 csum32; + __be32 seq; if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - sequence = cpu_to_be32(sequence); + seq = cpu_to_be32(sequence); addr = kmap_atomic(page); - csum = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, - sizeof(sequence)); - csum = jbd2_chksum(j, csum, addr + offset_in_page(bh->b_data), - bh->b_size); + csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); + csum32 = jbd2_chksum(j, csum32, addr + offset_in_page(bh->b_data), + bh->b_size); kunmap_atomic(addr); - tag->t_checksum = cpu_to_be32(csum); + /* We only have space to store the lower 16 bits of the crc32c. */ + tag->t_checksum = cpu_to_be16(csum32); } /* * jbd2_journal_commit_transaction @@ -368,7 +368,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) { struct transaction_stats_s stats; transaction_t *commit_transaction; - struct journal_head *jh, *new_jh, *descriptor; + struct journal_head *jh; + struct buffer_head *descriptor; struct buffer_head **wbuf = journal->j_wbuf; int bufs; int flags; @@ -382,7 +383,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) int space_left = 0; int first_tag = 0; int tag_flag; - int i, to_free = 0; + int i; int tag_bytes = journal_tag_bytes(journal); struct buffer_head *cbh = NULL; /* For transactional checksums */ __u32 crc32_sum = ~0; @@ -392,6 +393,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) tid_t first_tid; int update_tail; int csum_size = 0; + LIST_HEAD(io_bufs); + LIST_HEAD(log_bufs); if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) csum_size = sizeof(struct jbd2_journal_block_tail); @@ -424,18 +427,23 @@ void jbd2_journal_commit_transaction(journal_t *journal) J_ASSERT(journal->j_committing_transaction == NULL); commit_transaction = journal->j_running_transaction; - J_ASSERT(commit_transaction->t_state == T_RUNNING); trace_jbd2_start_commit(journal, commit_transaction); jbd_debug(1, "JBD2: starting commit of transaction %d\n", commit_transaction->t_tid); write_lock(&journal->j_state_lock); + J_ASSERT(commit_transaction->t_state == T_RUNNING); commit_transaction->t_state = T_LOCKED; trace_jbd2_commit_locking(journal, commit_transaction); stats.run.rs_wait = commit_transaction->t_max_wait; + stats.run.rs_request_delay = 0; stats.run.rs_locked = jiffies; + if (commit_transaction->t_requested) + stats.run.rs_request_delay = + jbd2_time_diff(commit_transaction->t_requested, + stats.run.rs_locked); stats.run.rs_running = jbd2_time_diff(commit_transaction->t_start, stats.run.rs_locked); @@ -515,6 +523,12 @@ void jbd2_journal_commit_transaction(journal_t *journal) */ jbd2_journal_switch_revoke_table(journal); + /* + * Reserved credits cannot be claimed anymore, free them + */ + atomic_sub(atomic_read(&journal->j_reserved_credits), + &commit_transaction->t_outstanding_credits); + trace_jbd2_commit_flushing(journal, commit_transaction); stats.run.rs_flushing = jiffies; stats.run.rs_locked = jbd2_time_diff(stats.run.rs_locked, @@ -528,7 +542,7 @@ void jbd2_journal_commit_transaction(journal_t *journal) wake_up(&journal->j_wait_transaction_locked); write_unlock(&journal->j_state_lock); - jbd_debug(3, "JBD2: commit phase 2\n"); + jbd_debug(3, "JBD2: commit phase 2a\n"); /* * Now start flushing things to disk, in the order they appear @@ -540,10 +554,9 @@ void jbd2_journal_commit_transaction(journal_t *journal) blk_start_plug(&plug); jbd2_journal_write_revoke_records(journal, commit_transaction, - WRITE_SYNC); - blk_finish_plug(&plug); + &log_bufs, WRITE_SYNC); - jbd_debug(3, "JBD2: commit phase 2\n"); + jbd_debug(3, "JBD2: commit phase 2b\n"); /* * Way to go: we have now written out all of the data for a @@ -566,9 +579,8 @@ void jbd2_journal_commit_transaction(journal_t *journal) atomic_read(&commit_transaction->t_outstanding_credits)); err = 0; - descriptor = NULL; bufs = 0; - blk_start_plug(&plug); + descriptor = NULL; while (commit_transaction->t_buffers) { /* Find the next buffer to be journaled... */ @@ -599,8 +611,6 @@ void jbd2_journal_commit_transaction(journal_t *journal) record the metadata buffer. */ if (!descriptor) { - struct buffer_head *bh; - J_ASSERT (bufs == 0); jbd_debug(4, "JBD2: get descriptor\n"); @@ -611,26 +621,26 @@ void jbd2_journal_commit_transaction(journal_t *journal) continue; } - bh = jh2bh(descriptor); jbd_debug(4, "JBD2: got buffer %llu (%p)\n", - (unsigned long long)bh->b_blocknr, bh->b_data); - header = (journal_header_t *)&bh->b_data[0]; + (unsigned long long)descriptor->b_blocknr, + descriptor->b_data); + header = (journal_header_t *)descriptor->b_data; header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); header->h_sequence = cpu_to_be32(commit_transaction->t_tid); - tagp = &bh->b_data[sizeof(journal_header_t)]; - space_left = bh->b_size - sizeof(journal_header_t); + tagp = &descriptor->b_data[sizeof(journal_header_t)]; + space_left = descriptor->b_size - + sizeof(journal_header_t); first_tag = 1; - set_buffer_jwrite(bh); - set_buffer_dirty(bh); - wbuf[bufs++] = bh; + set_buffer_jwrite(descriptor); + set_buffer_dirty(descriptor); + wbuf[bufs++] = descriptor; /* Record it so that we can wait for IO completion later */ - BUFFER_TRACE(bh, "ph3: file as descriptor"); - jbd2_journal_file_buffer(descriptor, commit_transaction, - BJ_LogCtl); + BUFFER_TRACE(descriptor, "ph3: file as descriptor"); + jbd2_file_log_bh(&log_bufs, descriptor); } /* Where is the buffer to be written? */ @@ -653,29 +663,22 @@ void jbd2_journal_commit_transaction(journal_t *journal) /* Bump b_count to prevent truncate from stumbling over the shadowed buffer! @@@ This can go if we ever get - rid of the BJ_IO/BJ_Shadow pairing of buffers. */ + rid of the shadow pairing of buffers. */ atomic_inc(&jh2bh(jh)->b_count); - /* Make a temporary IO buffer with which to write it out - (this will requeue both the metadata buffer and the - temporary IO buffer). new_bh goes on BJ_IO*/ - - set_bit(BH_JWrite, &jh2bh(jh)->b_state); /* - * akpm: jbd2_journal_write_metadata_buffer() sets - * new_bh->b_transaction to commit_transaction. - * We need to clean this up before we release new_bh - * (which is of type BJ_IO) + * Make a temporary IO buffer with which to write it out + * (this will requeue the metadata buffer to BJ_Shadow). */ + set_bit(BH_JWrite, &jh2bh(jh)->b_state); JBUFFER_TRACE(jh, "ph3: write metadata"); flags = jbd2_journal_write_metadata_buffer(commit_transaction, - jh, &new_jh, blocknr); + jh, &wbuf[bufs], blocknr); if (flags < 0) { jbd2_journal_abort(journal, flags); continue; } - set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); - wbuf[bufs++] = jh2bh(new_jh); + jbd2_file_log_bh(&io_bufs, wbuf[bufs]); /* Record the new block's tag in the current descriptor buffer */ @@ -689,10 +692,11 @@ void jbd2_journal_commit_transaction(journal_t *journal) tag = (journal_block_tag_t *) tagp; write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); tag->t_flags = cpu_to_be16(tag_flag); - jbd2_block_tag_csum_set(journal, tag, jh2bh(new_jh), + jbd2_block_tag_csum_set(journal, tag, wbuf[bufs], commit_transaction->t_tid); tagp += tag_bytes; space_left -= tag_bytes; + bufs++; if (first_tag) { memcpy (tagp, journal->j_uuid, 16); @@ -804,7 +808,7 @@ start_journal_io: the log. Before we can commit it, wait for the IO so far to complete. Control buffers being written are on the transaction's t_log_list queue, and metadata buffers are on - the t_iobuf_list queue. + the io_bufs list. Wait for the buffers in reverse order. That way we are less likely to be woken up until all IOs have completed, and @@ -813,47 +817,33 @@ start_journal_io: jbd_debug(3, "JBD2: commit phase 3\n"); - /* - * akpm: these are BJ_IO, and j_list_lock is not needed. - * See __journal_try_to_free_buffer. - */ -wait_for_iobuf: - while (commit_transaction->t_iobuf_list != NULL) { - struct buffer_head *bh; + while (!list_empty(&io_bufs)) { + struct buffer_head *bh = list_entry(io_bufs.prev, + struct buffer_head, + b_assoc_buffers); - jh = commit_transaction->t_iobuf_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_iobuf; - } - if (cond_resched()) - goto wait_for_iobuf; + wait_on_buffer(bh); + cond_resched(); if (unlikely(!buffer_uptodate(bh))) err = -EIO; - - clear_buffer_jwrite(bh); - - JBUFFER_TRACE(jh, "ph4: unfile after journal write"); - jbd2_journal_unfile_buffer(journal, jh); + jbd2_unfile_log_bh(bh); /* - * ->t_iobuf_list should contain only dummy buffer_heads - * which were created by jbd2_journal_write_metadata_buffer(). + * The list contains temporary buffer heads created by + * jbd2_journal_write_metadata_buffer(). */ BUFFER_TRACE(bh, "dumping temporary bh"); - jbd2_journal_put_journal_head(jh); __brelse(bh); J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); free_buffer_head(bh); - /* We also have to unlock and free the corresponding - shadowed buffer */ + /* We also have to refile the corresponding shadowed buffer */ jh = commit_transaction->t_shadow_list->b_tprev; bh = jh2bh(jh); - clear_bit(BH_JWrite, &bh->b_state); + clear_buffer_jwrite(bh); J_ASSERT_BH(bh, buffer_jbddirty(bh)); + J_ASSERT_BH(bh, !buffer_shadow(bh)); /* The metadata is now released for reuse, but we need to remember it against this transaction so that when @@ -861,14 +851,6 @@ wait_for_iobuf: required. */ JBUFFER_TRACE(jh, "file as BJ_Forget"); jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); - /* - * Wake up any transactions which were waiting for this IO to - * complete. The barrier must be here so that changes by - * jbd2_journal_file_buffer() take effect before wake_up_bit() - * does the waitqueue check. - */ - smp_mb(); - wake_up_bit(&bh->b_state, BH_Unshadow); JBUFFER_TRACE(jh, "brelse shadowed buffer"); __brelse(bh); } @@ -878,26 +860,19 @@ wait_for_iobuf: jbd_debug(3, "JBD2: commit phase 4\n"); /* Here we wait for the revoke record and descriptor record buffers */ - wait_for_ctlbuf: - while (commit_transaction->t_log_list != NULL) { + while (!list_empty(&log_bufs)) { struct buffer_head *bh; - jh = commit_transaction->t_log_list->b_tprev; - bh = jh2bh(jh); - if (buffer_locked(bh)) { - wait_on_buffer(bh); - goto wait_for_ctlbuf; - } - if (cond_resched()) - goto wait_for_ctlbuf; + bh = list_entry(log_bufs.prev, struct buffer_head, b_assoc_buffers); + wait_on_buffer(bh); + cond_resched(); if (unlikely(!buffer_uptodate(bh))) err = -EIO; BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); clear_buffer_jwrite(bh); - jbd2_journal_unfile_buffer(journal, jh); - jbd2_journal_put_journal_head(jh); + jbd2_unfile_log_bh(bh); __brelse(bh); /* One for getblk */ /* AKPM: bforget here */ } @@ -947,9 +922,7 @@ wait_for_iobuf: J_ASSERT(list_empty(&commit_transaction->t_inode_list)); J_ASSERT(commit_transaction->t_buffers == NULL); J_ASSERT(commit_transaction->t_checkpoint_list == NULL); - J_ASSERT(commit_transaction->t_iobuf_list == NULL); J_ASSERT(commit_transaction->t_shadow_list == NULL); - J_ASSERT(commit_transaction->t_log_list == NULL); restart_loop: /* @@ -1092,6 +1065,25 @@ restart_loop: goto restart_loop; } + /* Add the transaction to the checkpoint list + * __journal_remove_checkpoint() can not destroy transaction + * under us because it is not marked as T_FINISHED yet */ + if (journal->j_checkpoint_transactions == NULL) { + journal->j_checkpoint_transactions = commit_transaction; + commit_transaction->t_cpnext = commit_transaction; + commit_transaction->t_cpprev = commit_transaction; + } else { + commit_transaction->t_cpnext = + journal->j_checkpoint_transactions; + commit_transaction->t_cpprev = + commit_transaction->t_cpnext->t_cpprev; + commit_transaction->t_cpnext->t_cpprev = + commit_transaction; + commit_transaction->t_cpprev->t_cpnext = + commit_transaction; + } + spin_unlock(&journal->j_list_lock); + /* Done with this transaction! */ jbd_debug(3, "JBD2: commit phase 7\n"); @@ -1110,23 +1102,9 @@ restart_loop: atomic_read(&commit_transaction->t_handle_count); trace_jbd2_run_stats(journal->j_fs_dev->bd_dev, commit_transaction->t_tid, &stats.run); + stats.ts_requested = (commit_transaction->t_requested) ? 1 : 0; - /* - * Calculate overall stats - */ - spin_lock(&journal->j_history_lock); - journal->j_stats.ts_tid++; - journal->j_stats.run.rs_wait += stats.run.rs_wait; - journal->j_stats.run.rs_running += stats.run.rs_running; - journal->j_stats.run.rs_locked += stats.run.rs_locked; - journal->j_stats.run.rs_flushing += stats.run.rs_flushing; - journal->j_stats.run.rs_logging += stats.run.rs_logging; - journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; - journal->j_stats.run.rs_blocks += stats.run.rs_blocks; - journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; - spin_unlock(&journal->j_history_lock); - - commit_transaction->t_state = T_FINISHED; + commit_transaction->t_state = T_COMMIT_CALLBACK; J_ASSERT(commit_transaction == journal->j_committing_transaction); journal->j_commit_sequence = commit_transaction->t_tid; journal->j_committing_transaction = NULL; @@ -1141,29 +1119,8 @@ restart_loop: journal->j_average_commit_time*3) / 4; else journal->j_average_commit_time = commit_time; - write_unlock(&journal->j_state_lock); - if (commit_transaction->t_checkpoint_list == NULL && - commit_transaction->t_checkpoint_io_list == NULL) { - __jbd2_journal_drop_transaction(journal, commit_transaction); - to_free = 1; - } else { - if (journal->j_checkpoint_transactions == NULL) { - journal->j_checkpoint_transactions = commit_transaction; - commit_transaction->t_cpnext = commit_transaction; - commit_transaction->t_cpprev = commit_transaction; - } else { - commit_transaction->t_cpnext = - journal->j_checkpoint_transactions; - commit_transaction->t_cpprev = - commit_transaction->t_cpnext->t_cpprev; - commit_transaction->t_cpnext->t_cpprev = - commit_transaction; - commit_transaction->t_cpprev->t_cpnext = - commit_transaction; - } - } - spin_unlock(&journal->j_list_lock); + write_unlock(&journal->j_state_lock); if (journal->j_commit_callback) journal->j_commit_callback(journal, commit_transaction); @@ -1171,8 +1128,34 @@ restart_loop: trace_jbd2_end_commit(journal, commit_transaction); jbd_debug(1, "JBD2: commit %d complete, head %d\n", journal->j_commit_sequence, journal->j_tail_sequence); - if (to_free) - jbd2_journal_free_transaction(commit_transaction); + write_lock(&journal->j_state_lock); + spin_lock(&journal->j_list_lock); + commit_transaction->t_state = T_FINISHED; + /* Check if the transaction can be dropped now that we are finished */ + if (commit_transaction->t_checkpoint_list == NULL && + commit_transaction->t_checkpoint_io_list == NULL) { + __jbd2_journal_drop_transaction(journal, commit_transaction); + jbd2_journal_free_transaction(commit_transaction); + } + spin_unlock(&journal->j_list_lock); + write_unlock(&journal->j_state_lock); wake_up(&journal->j_wait_done_commit); + + /* + * Calculate overall stats + */ + spin_lock(&journal->j_history_lock); + journal->j_stats.ts_tid++; + journal->j_stats.ts_requested += stats.ts_requested; + journal->j_stats.run.rs_wait += stats.run.rs_wait; + journal->j_stats.run.rs_request_delay += stats.run.rs_request_delay; + journal->j_stats.run.rs_running += stats.run.rs_running; + journal->j_stats.run.rs_locked += stats.run.rs_locked; + journal->j_stats.run.rs_flushing += stats.run.rs_flushing; + journal->j_stats.run.rs_logging += stats.run.rs_logging; + journal->j_stats.run.rs_handle_count += stats.run.rs_handle_count; + journal->j_stats.run.rs_blocks += stats.run.rs_blocks; + journal->j_stats.run.rs_blocks_logged += stats.run.rs_blocks_logged; + spin_unlock(&journal->j_history_lock); } diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index dbf41f9452d..67b8e303946 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -35,7 +35,6 @@ #include <linux/kthread.h> #include <linux/poison.h> #include <linux/proc_fs.h> -#include <linux/debugfs.h> #include <linux/seq_file.h> #include <linux/math64.h> #include <linux/hash.h> @@ -51,6 +50,14 @@ #include <asm/uaccess.h> #include <asm/page.h> +#ifdef CONFIG_JBD2_DEBUG +ushort jbd2_journal_enable_debug __read_mostly; +EXPORT_SYMBOL(jbd2_journal_enable_debug); + +module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644); +MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2"); +#endif + EXPORT_SYMBOL(jbd2_journal_extend); EXPORT_SYMBOL(jbd2_journal_stop); EXPORT_SYMBOL(jbd2_journal_lock_updates); @@ -96,8 +103,26 @@ EXPORT_SYMBOL(jbd2_inode_cache); static void __journal_abort_soft (journal_t *journal, int errno); static int jbd2_journal_create_slab(size_t slab_size); +#ifdef CONFIG_JBD2_DEBUG +void __jbd2_debug(int level, const char *file, const char *func, + unsigned int line, const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (level > jbd2_journal_enable_debug) + return; + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + printk(KERN_DEBUG "%s: (%s, %u): %pV\n", file, func, line, &vaf); + va_end(args); +} +EXPORT_SYMBOL(__jbd2_debug); +#endif + /* Checksumming functions */ -int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) +static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) { if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return 1; @@ -105,9 +130,10 @@ int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb) return sb->s_checksum_type == JBD2_CRC32C_CHKSUM; } -static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) +static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) { - __u32 csum, old_csum; + __u32 csum; + __be32 old_csum; old_csum = sb->s_checksum; sb->s_checksum = 0; @@ -117,7 +143,7 @@ static __u32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb) return cpu_to_be32(csum); } -int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) +static int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) { if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return 1; @@ -125,7 +151,7 @@ int jbd2_superblock_csum_verify(journal_t *j, journal_superblock_t *sb) return sb->s_checksum == jbd2_superblock_csum(j, sb); } -void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) +static void jbd2_superblock_csum_set(journal_t *j, journal_superblock_t *sb) { if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; @@ -276,8 +302,8 @@ static void journal_kill_thread(journal_t *journal) journal->j_flags |= JBD2_UNMOUNT; while (journal->j_task) { - wake_up(&journal->j_wait_commit); write_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_commit); wait_event(journal->j_wait_done_commit, journal->j_task == NULL); write_lock(&journal->j_state_lock); } @@ -303,14 +329,12 @@ static void journal_kill_thread(journal_t *journal) * * If the source buffer has already been modified by a new transaction * since we took the last commit snapshot, we use the frozen copy of - * that data for IO. If we end up using the existing buffer_head's data - * for the write, then we *have* to lock the buffer to prevent anyone - * else from using and possibly modifying it while the IO is in - * progress. + * that data for IO. If we end up using the existing buffer_head's data + * for the write, then we have to make sure nobody modifies it while the + * IO is in progress. do_get_write_access() handles this. * - * The function returns a pointer to the buffer_heads to be used for IO. - * - * We assume that the journal has already been locked in this function. + * The function returns a pointer to the buffer_head to be used for IO. + * * * Return value: * <0: Error @@ -323,15 +347,14 @@ static void journal_kill_thread(journal_t *journal) int jbd2_journal_write_metadata_buffer(transaction_t *transaction, struct journal_head *jh_in, - struct journal_head **jh_out, - unsigned long long blocknr) + struct buffer_head **bh_out, + sector_t blocknr) { int need_copy_out = 0; int done_copy_out = 0; int do_escape = 0; char *mapped_data; struct buffer_head *new_bh; - struct journal_head *new_jh; struct page *new_page; unsigned int new_offset; struct buffer_head *bh_in = jh2bh(jh_in); @@ -360,17 +383,14 @@ retry_alloc: } /* keep subsequent assertions sane */ - new_bh->b_state = 0; - init_buffer(new_bh, NULL, NULL); atomic_set(&new_bh->b_count, 1); - new_jh = jbd2_journal_add_journal_head(new_bh); /* This sleeps */ + jbd_lock_bh_state(bh_in); +repeat: /* * If a new transaction has already done a buffer copy-out, then * we use that version of the data for the commit. */ - jbd_lock_bh_state(bh_in); -repeat: if (jh_in->b_frozen_data) { done_copy_out = 1; new_page = virt_to_page(jh_in->b_frozen_data); @@ -410,7 +430,7 @@ repeat: jbd_unlock_bh_state(bh_in); tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS); if (!tmp) { - jbd2_journal_put_journal_head(new_jh); + brelse(new_bh); return -ENOMEM; } jbd_lock_bh_state(bh_in); @@ -421,7 +441,7 @@ repeat: jh_in->b_frozen_data = tmp; mapped_data = kmap_atomic(new_page); - memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size); + memcpy(tmp, mapped_data + new_offset, bh_in->b_size); kunmap_atomic(mapped_data); new_page = virt_to_page(tmp); @@ -447,14 +467,14 @@ repeat: } set_bh_page(new_bh, new_page, new_offset); - new_jh->b_transaction = NULL; - new_bh->b_size = jh2bh(jh_in)->b_size; - new_bh->b_bdev = transaction->t_journal->j_dev; + new_bh->b_size = bh_in->b_size; + new_bh->b_bdev = journal->j_dev; new_bh->b_blocknr = blocknr; + new_bh->b_private = bh_in; set_buffer_mapped(new_bh); set_buffer_dirty(new_bh); - *jh_out = new_jh; + *bh_out = new_bh; /* * The to-be-written buffer needs to get moved to the io queue, @@ -465,11 +485,9 @@ repeat: spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow); spin_unlock(&journal->j_list_lock); + set_buffer_shadow(bh_in); jbd_unlock_bh_state(bh_in); - JBUFFER_TRACE(new_jh, "file as BJ_IO"); - jbd2_journal_file_buffer(new_jh, transaction, BJ_IO); - return do_escape | (done_copy_out << 1); } @@ -479,40 +497,15 @@ repeat: */ /* - * __jbd2_log_space_left: Return the number of free blocks left in the journal. - * - * Called with the journal already locked. - * - * Called under j_state_lock - */ - -int __jbd2_log_space_left(journal_t *journal) -{ - int left = journal->j_free; - - /* assert_spin_locked(&journal->j_state_lock); */ - - /* - * Be pessimistic here about the number of those free blocks which - * might be required for log descriptor control blocks. - */ - -#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */ - - left -= MIN_LOG_RESERVED_BLOCKS; - - if (left <= 0) - return 0; - left -= (left >> 3); - return left; -} - -/* * Called with j_state_lock locked for writing. * Returns true if a transaction commit was started. */ int __jbd2_log_start_commit(journal_t *journal, tid_t target) { + /* Return if the txn has already requested to be committed */ + if (journal->j_commit_request == target) + return 0; + /* * The only transaction we can possibly wait upon is the * currently running transaction (if it exists). Otherwise, @@ -529,6 +522,7 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target) jbd_debug(1, "JBD2: requesting commit %d/%d\n", journal->j_commit_request, journal->j_commit_sequence); + journal->j_running_transaction->t_requested = jiffies; wake_up(&journal->j_wait_commit); return 1; } else if (!tid_geq(journal->j_commit_request, target)) @@ -554,20 +548,17 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid) } /* - * Force and wait upon a commit if the calling process is not within - * transaction. This is used for forcing out undo-protected data which contains - * bitmaps, when the fs is running out of space. - * - * We can only force the running transaction if we don't have an active handle; - * otherwise, we will deadlock. - * - * Returns true if a transaction was started. + * Force and wait any uncommitted transactions. We can only force the running + * transaction if we don't have an active handle, otherwise, we will deadlock. + * Returns: <0 in case of error, + * 0 if nothing to commit, + * 1 if transaction was successfully committed. */ -int jbd2_journal_force_commit_nested(journal_t *journal) +static int __jbd2_journal_force_commit(journal_t *journal) { transaction_t *transaction = NULL; tid_t tid; - int need_to_start = 0; + int need_to_start = 0, ret = 0; read_lock(&journal->j_state_lock); if (journal->j_running_transaction && !current->journal_info) { @@ -578,16 +569,53 @@ int jbd2_journal_force_commit_nested(journal_t *journal) transaction = journal->j_committing_transaction; if (!transaction) { + /* Nothing to commit */ read_unlock(&journal->j_state_lock); - return 0; /* Nothing to retry */ + return 0; } - tid = transaction->t_tid; read_unlock(&journal->j_state_lock); if (need_to_start) jbd2_log_start_commit(journal, tid); - jbd2_log_wait_commit(journal, tid); - return 1; + ret = jbd2_log_wait_commit(journal, tid); + if (!ret) + ret = 1; + + return ret; +} + +/** + * Force and wait upon a commit if the calling process is not within + * transaction. This is used for forcing out undo-protected data which contains + * bitmaps, when the fs is running out of space. + * + * @journal: journal to force + * Returns true if progress was made. + */ +int jbd2_journal_force_commit_nested(journal_t *journal) +{ + int ret; + + ret = __jbd2_journal_force_commit(journal); + return ret > 0; +} + +/** + * int journal_force_commit() - force any uncommitted transactions + * @journal: journal to force + * + * Caller want unconditional commit. We can only force the running transaction + * if we don't have an active handle, otherwise, we will deadlock. + */ +int jbd2_journal_force_commit(journal_t *journal) +{ + int ret; + + J_ASSERT(!current->journal_info); + ret = __jbd2_journal_force_commit(journal); + if (ret > 0) + ret = 0; + return ret; } /* @@ -674,7 +702,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) read_lock(&journal->j_state_lock); #ifdef CONFIG_JBD2_DEBUG if (!tid_geq(journal->j_commit_request, tid)) { - printk(KERN_EMERG + printk(KERN_ERR "%s: error: j_commit_request=%d, tid=%d\n", __func__, journal->j_commit_request, tid); } @@ -682,22 +710,51 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid) while (tid_gt(tid, journal->j_commit_sequence)) { jbd_debug(1, "JBD2: want %d, j_commit_sequence=%d\n", tid, journal->j_commit_sequence); - wake_up(&journal->j_wait_commit); read_unlock(&journal->j_state_lock); + wake_up(&journal->j_wait_commit); wait_event(journal->j_wait_done_commit, !tid_gt(tid, journal->j_commit_sequence)); read_lock(&journal->j_state_lock); } read_unlock(&journal->j_state_lock); - if (unlikely(is_journal_aborted(journal))) { - printk(KERN_EMERG "journal commit I/O error\n"); + if (unlikely(is_journal_aborted(journal))) err = -EIO; - } return err; } /* + * When this function returns the transaction corresponding to tid + * will be completed. If the transaction has currently running, start + * committing that transaction before waiting for it to complete. If + * the transaction id is stale, it is by definition already completed, + * so just return SUCCESS. + */ +int jbd2_complete_transaction(journal_t *journal, tid_t tid) +{ + int need_to_wait = 1; + + read_lock(&journal->j_state_lock); + if (journal->j_running_transaction && + journal->j_running_transaction->t_tid == tid) { + if (journal->j_commit_request != tid) { + /* transaction not yet started, so request it */ + read_unlock(&journal->j_state_lock); + jbd2_log_start_commit(journal, tid); + goto wait_commit; + } + } else if (!(journal->j_committing_transaction && + journal->j_committing_transaction->t_tid == tid)) + need_to_wait = 0; + read_unlock(&journal->j_state_lock); + if (!need_to_wait) + return 0; +wait_commit: + return jbd2_log_wait_commit(journal, tid); +} +EXPORT_SYMBOL(jbd2_complete_transaction); + +/* * Log buffer allocation routines: */ @@ -757,7 +814,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr, * But we don't bother doing that, so there will be coherency problems with * mmaps of blockdevs which hold live JBD-controlled filesystems. */ -struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) +struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) { struct buffer_head *bh; unsigned long long blocknr; @@ -776,7 +833,7 @@ struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal) set_buffer_uptodate(bh); unlock_buffer(bh); BUFFER_TRACE(bh, "return this buffer"); - return jbd2_journal_add_journal_head(bh); + return bh; } /* @@ -894,13 +951,18 @@ static int jbd2_seq_info_show(struct seq_file *seq, void *v) if (v != SEQ_START_TOKEN) return 0; - seq_printf(seq, "%lu transaction, each up to %u blocks\n", - s->stats->ts_tid, - s->journal->j_max_transaction_buffers); + seq_printf(seq, "%lu transactions (%lu requested), " + "each up to %u blocks\n", + s->stats->ts_tid, s->stats->ts_requested, + s->journal->j_max_transaction_buffers); if (s->stats->ts_tid == 0) return 0; seq_printf(seq, "average: \n %ums waiting for transaction\n", jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid)); + seq_printf(seq, " %ums request delay\n", + (s->stats->ts_requested == 0) ? 0 : + jiffies_to_msecs(s->stats->run.rs_request_delay / + s->stats->ts_requested)); seq_printf(seq, " %ums running transaction\n", jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid)); seq_printf(seq, " %ums transaction was being locked\n", @@ -933,7 +995,7 @@ static const struct seq_operations jbd2_seq_info_ops = { static int jbd2_seq_info_open(struct inode *inode, struct file *file) { - journal_t *journal = PDE(inode)->data; + journal_t *journal = PDE_DATA(inode); struct jbd2_stats_proc_session *s; int rc, size; @@ -1016,11 +1078,10 @@ static journal_t * journal_init_common (void) return NULL; init_waitqueue_head(&journal->j_wait_transaction_locked); - init_waitqueue_head(&journal->j_wait_logspace); init_waitqueue_head(&journal->j_wait_done_commit); - init_waitqueue_head(&journal->j_wait_checkpoint); init_waitqueue_head(&journal->j_wait_commit); init_waitqueue_head(&journal->j_wait_updates); + init_waitqueue_head(&journal->j_wait_reserved); mutex_init(&journal->j_barrier); mutex_init(&journal->j_checkpoint_mutex); spin_lock_init(&journal->j_revoke_lock); @@ -1030,6 +1091,7 @@ static journal_t * journal_init_common (void) journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE); journal->j_min_batch_time = 0; journal->j_max_batch_time = 15000; /* 15ms */ + atomic_set(&journal->j_reserved_credits, 0); /* The journal is marked for error until we succeed with recovery! */ journal->j_flags = JBD2_ABORT; @@ -1272,6 +1334,7 @@ static int journal_reset(journal_t *journal) static void jbd2_write_superblock(journal_t *journal, int write_op) { struct buffer_head *bh = journal->j_sb_buffer; + journal_superblock_t *sb = journal->j_superblock; int ret; trace_jbd2_write_superblock(journal, write_op); @@ -1293,6 +1356,7 @@ static void jbd2_write_superblock(journal_t *journal, int write_op) clear_buffer_write_io_error(bh); set_buffer_uptodate(bh); } + jbd2_superblock_csum_set(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; ret = submit_bh(write_op, bh); @@ -1389,7 +1453,6 @@ void jbd2_journal_update_sb_errno(journal_t *journal) jbd_debug(1, "JBD2: updating superblock error (errno %d)\n", journal->j_errno); sb->s_errno = cpu_to_be32(journal->j_errno); - jbd2_superblock_csum_set(journal, sb); read_unlock(&journal->j_state_lock); jbd2_write_superblock(journal, WRITE_SYNC); @@ -1462,13 +1525,13 @@ static int journal_get_superblock(journal_t *journal) if (JBD2_HAS_COMPAT_FEATURE(journal, JBD2_FEATURE_COMPAT_CHECKSUM) && JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { /* Can't have checksum v1 and v2 on at the same time! */ - printk(KERN_ERR "JBD: Can't enable checksumming v1 and v2 " + printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2 " "at the same time!\n"); goto out; } if (!jbd2_verify_csum_type(journal, sb)) { - printk(KERN_ERR "JBD: Unknown checksum type\n"); + printk(KERN_ERR "JBD2: Unknown checksum type\n"); goto out; } @@ -1476,7 +1539,7 @@ static int journal_get_superblock(journal_t *journal) if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2)) { journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c driver.\n"); + printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n"); err = PTR_ERR(journal->j_chksum_driver); journal->j_chksum_driver = NULL; goto out; @@ -1485,7 +1548,7 @@ static int journal_get_superblock(journal_t *journal) /* Check superblock checksum */ if (!jbd2_superblock_csum_verify(journal, sb)) { - printk(KERN_ERR "JBD: journal checksum error\n"); + printk(KERN_ERR "JBD2: journal checksum error\n"); goto out; } @@ -1771,7 +1834,7 @@ int jbd2_journal_set_features (journal_t *journal, unsigned long compat, journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0); if (IS_ERR(journal->j_chksum_driver)) { - printk(KERN_ERR "JBD: Cannot load crc32c " + printk(KERN_ERR "JBD2: Cannot load crc32c " "driver.\n"); journal->j_chksum_driver = NULL; return 0; @@ -2279,13 +2342,13 @@ static struct journal_head *journal_alloc_journal_head(void) #ifdef CONFIG_JBD2_DEBUG atomic_inc(&nr_journal_heads); #endif - ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); + ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); if (!ret) { jbd_debug(1, "out of memory for journal_head\n"); pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__); while (!ret) { yield(); - ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS); + ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS); } } return ret; @@ -2347,10 +2410,8 @@ struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh) struct journal_head *new_jh = NULL; repeat: - if (!buffer_jbd(bh)) { + if (!buffer_jbd(bh)) new_jh = journal_alloc_journal_head(); - memset(new_jh, 0, sizeof(*new_jh)); - } jbd_lock_bh_journal_head(bh); if (buffer_jbd(bh)) { @@ -2485,45 +2546,6 @@ restart: spin_unlock(&journal->j_list_lock); } -/* - * debugfs tunables - */ -#ifdef CONFIG_JBD2_DEBUG -u8 jbd2_journal_enable_debug __read_mostly; -EXPORT_SYMBOL(jbd2_journal_enable_debug); - -#define JBD2_DEBUG_NAME "jbd2-debug" - -static struct dentry *jbd2_debugfs_dir; -static struct dentry *jbd2_debug; - -static void __init jbd2_create_debugfs_entry(void) -{ - jbd2_debugfs_dir = debugfs_create_dir("jbd2", NULL); - if (jbd2_debugfs_dir) - jbd2_debug = debugfs_create_u8(JBD2_DEBUG_NAME, - S_IRUGO | S_IWUSR, - jbd2_debugfs_dir, - &jbd2_journal_enable_debug); -} - -static void __exit jbd2_remove_debugfs_entry(void) -{ - debugfs_remove(jbd2_debug); - debugfs_remove(jbd2_debugfs_dir); -} - -#else - -static void __init jbd2_create_debugfs_entry(void) -{ -} - -static void __exit jbd2_remove_debugfs_entry(void) -{ -} - -#endif #ifdef CONFIG_PROC_FS @@ -2609,7 +2631,6 @@ static int __init journal_init(void) ret = journal_init_caches(); if (ret == 0) { - jbd2_create_debugfs_entry(); jbd2_create_jbd_stats_proc_entry(); } else { jbd2_journal_destroy_caches(); @@ -2622,9 +2643,8 @@ static void __exit journal_exit(void) #ifdef CONFIG_JBD2_DEBUG int n = atomic_read(&nr_journal_heads); if (n) - printk(KERN_EMERG "JBD2: leaked %d journal_heads!\n", n); + printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n); #endif - jbd2_remove_debugfs_entry(); jbd2_remove_jbd_stats_proc_entry(); jbd2_journal_destroy_caches(); } diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 626846bac32..3b6bb19d60b 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -178,7 +178,8 @@ static int jbd2_descr_block_csum_verify(journal_t *j, void *buf) { struct jbd2_journal_block_tail *tail; - __u32 provided, calculated; + __be32 provided; + __u32 calculated; if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return 1; @@ -190,8 +191,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); tail->t_checksum = provided; - provided = be32_to_cpu(provided); - return provided == calculated; + return provided == cpu_to_be32(calculated); } /* @@ -381,7 +381,8 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh, static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) { struct commit_header *h; - __u32 provided, calculated; + __be32 provided; + __u32 calculated; if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return 1; @@ -392,25 +393,23 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); h->h_chksum[0] = provided; - provided = be32_to_cpu(provided); - return provided == calculated; + return provided == cpu_to_be32(calculated); } static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, void *buf, __u32 sequence) { - __u32 provided, calculated; + __u32 csum32; + __be32 seq; if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return 1; - sequence = cpu_to_be32(sequence); - calculated = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&sequence, - sizeof(sequence)); - calculated = jbd2_chksum(j, calculated, buf, j->j_blocksize); - provided = be32_to_cpu(tag->t_checksum); + seq = cpu_to_be32(sequence); + csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); + csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); - return provided == cpu_to_be32(calculated); + return tag->t_checksum == cpu_to_be16(csum32); } static int do_one_pass(journal_t *journal, @@ -595,7 +594,7 @@ static int do_one_pass(journal_t *journal, be32_to_cpu(tmp->h_sequence))) { brelse(obh); success = -EIO; - printk(KERN_ERR "JBD: Invalid " + printk(KERN_ERR "JBD2: Invalid " "checksum recovering " "block %llu in log\n", blocknr); @@ -809,7 +808,8 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, void *buf) { struct jbd2_journal_revoke_tail *tail; - __u32 provided, calculated; + __be32 provided; + __u32 calculated; if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return 1; @@ -821,8 +821,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); tail->r_checksum = provided; - provided = be32_to_cpu(provided); - return provided == calculated; + return provided == cpu_to_be32(calculated); } /* Scan a revoke record, marking all blocks mentioned as revoked. */ diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c index f30b80b4ce8..198c9c10276 100644 --- a/fs/jbd2/revoke.c +++ b/fs/jbd2/revoke.c @@ -122,9 +122,10 @@ struct jbd2_revoke_table_s #ifdef __KERNEL__ static void write_one_revoke_record(journal_t *, transaction_t *, - struct journal_head **, int *, + struct list_head *, + struct buffer_head **, int *, struct jbd2_revoke_record_s *, int); -static void flush_descriptor(journal_t *, struct journal_head *, int, int); +static void flush_descriptor(journal_t *, struct buffer_head *, int, int); #endif /* Utility functions to maintain the revoke table */ @@ -531,9 +532,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal) */ void jbd2_journal_write_revoke_records(journal_t *journal, transaction_t *transaction, + struct list_head *log_bufs, int write_op) { - struct journal_head *descriptor; + struct buffer_head *descriptor; struct jbd2_revoke_record_s *record; struct jbd2_revoke_table_s *revoke; struct list_head *hash_list; @@ -553,7 +555,7 @@ void jbd2_journal_write_revoke_records(journal_t *journal, while (!list_empty(hash_list)) { record = (struct jbd2_revoke_record_s *) hash_list->next; - write_one_revoke_record(journal, transaction, + write_one_revoke_record(journal, transaction, log_bufs, &descriptor, &offset, record, write_op); count++; @@ -573,13 +575,14 @@ void jbd2_journal_write_revoke_records(journal_t *journal, static void write_one_revoke_record(journal_t *journal, transaction_t *transaction, - struct journal_head **descriptorp, + struct list_head *log_bufs, + struct buffer_head **descriptorp, int *offsetp, struct jbd2_revoke_record_s *record, int write_op) { int csum_size = 0; - struct journal_head *descriptor; + struct buffer_head *descriptor; int offset; journal_header_t *header; @@ -609,26 +612,26 @@ static void write_one_revoke_record(journal_t *journal, descriptor = jbd2_journal_get_descriptor_buffer(journal); if (!descriptor) return; - header = (journal_header_t *) &jh2bh(descriptor)->b_data[0]; + header = (journal_header_t *)descriptor->b_data; header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK); header->h_sequence = cpu_to_be32(transaction->t_tid); /* Record it so that we can wait for IO completion later */ - JBUFFER_TRACE(descriptor, "file as BJ_LogCtl"); - jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl); + BUFFER_TRACE(descriptor, "file in log_bufs"); + jbd2_file_log_bh(log_bufs, descriptor); offset = sizeof(jbd2_journal_revoke_header_t); *descriptorp = descriptor; } if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) { - * ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be64 *)(&descriptor->b_data[offset])) = cpu_to_be64(record->blocknr); offset += 8; } else { - * ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) = + * ((__be32 *)(&descriptor->b_data[offset])) = cpu_to_be32(record->blocknr); offset += 4; } @@ -636,8 +639,7 @@ static void write_one_revoke_record(journal_t *journal, *offsetp = offset; } -static void jbd2_revoke_csum_set(journal_t *j, - struct journal_head *descriptor) +static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) { struct jbd2_journal_revoke_tail *tail; __u32 csum; @@ -645,12 +647,10 @@ static void jbd2_revoke_csum_set(journal_t *j, if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2)) return; - tail = (struct jbd2_journal_revoke_tail *) - (jh2bh(descriptor)->b_data + j->j_blocksize - + tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize - sizeof(struct jbd2_journal_revoke_tail)); tail->r_checksum = 0; - csum = jbd2_chksum(j, j->j_csum_seed, jh2bh(descriptor)->b_data, - j->j_blocksize); + csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize); tail->r_checksum = cpu_to_be32(csum); } @@ -662,25 +662,24 @@ static void jbd2_revoke_csum_set(journal_t *j, */ static void flush_descriptor(journal_t *journal, - struct journal_head *descriptor, + struct buffer_head *descriptor, int offset, int write_op) { jbd2_journal_revoke_header_t *header; - struct buffer_head *bh = jh2bh(descriptor); if (is_journal_aborted(journal)) { - put_bh(bh); + put_bh(descriptor); return; } - header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data; + header = (jbd2_journal_revoke_header_t *)descriptor->b_data; header->r_count = cpu_to_be32(offset); jbd2_revoke_csum_set(journal, descriptor); - set_buffer_jwrite(bh); - BUFFER_TRACE(bh, "write"); - set_buffer_dirty(bh); - write_dirty_buffer(bh, write_op); + set_buffer_jwrite(descriptor); + BUFFER_TRACE(descriptor, "write"); + set_buffer_dirty(descriptor); + write_dirty_buffer(descriptor, write_op); } #endif diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index df9f29760ef..6f0f590cc5a 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -30,6 +30,8 @@ #include <linux/bug.h> #include <linux/module.h> +#include <trace/events/jbd2.h> + static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); static void __jbd2_journal_unfile_buffer(struct journal_head *jh); @@ -87,7 +89,8 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) transaction->t_expires = jiffies + journal->j_commit_interval; spin_lock_init(&transaction->t_handle_lock); atomic_set(&transaction->t_updates, 0); - atomic_set(&transaction->t_outstanding_credits, 0); + atomic_set(&transaction->t_outstanding_credits, + atomic_read(&journal->j_reserved_credits)); atomic_set(&transaction->t_handle_count, 0); INIT_LIST_HEAD(&transaction->t_inode_list); INIT_LIST_HEAD(&transaction->t_private_list); @@ -100,6 +103,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction) journal->j_running_transaction = transaction; transaction->t_max_wait = 0; transaction->t_start = jiffies; + transaction->t_requested = 0; return transaction; } @@ -138,6 +142,112 @@ static inline void update_t_max_wait(transaction_t *transaction, } /* + * Wait until running transaction passes T_LOCKED state. Also starts the commit + * if needed. The function expects running transaction to exist and releases + * j_state_lock. + */ +static void wait_transaction_locked(journal_t *journal) + __releases(journal->j_state_lock) +{ + DEFINE_WAIT(wait); + int need_to_start; + tid_t tid = journal->j_running_transaction->t_tid; + + prepare_to_wait(&journal->j_wait_transaction_locked, &wait, + TASK_UNINTERRUPTIBLE); + need_to_start = !tid_geq(journal->j_commit_request, tid); + read_unlock(&journal->j_state_lock); + if (need_to_start) + jbd2_log_start_commit(journal, tid); + schedule(); + finish_wait(&journal->j_wait_transaction_locked, &wait); +} + +static void sub_reserved_credits(journal_t *journal, int blocks) +{ + atomic_sub(blocks, &journal->j_reserved_credits); + wake_up(&journal->j_wait_reserved); +} + +/* + * Wait until we can add credits for handle to the running transaction. Called + * with j_state_lock held for reading. Returns 0 if handle joined the running + * transaction. Returns 1 if we had to wait, j_state_lock is dropped, and + * caller must retry. + */ +static int add_transaction_credits(journal_t *journal, int blocks, + int rsv_blocks) +{ + transaction_t *t = journal->j_running_transaction; + int needed; + int total = blocks + rsv_blocks; + + /* + * If the current transaction is locked down for commit, wait + * for the lock to be released. + */ + if (t->t_state == T_LOCKED) { + wait_transaction_locked(journal); + return 1; + } + + /* + * If there is not enough space left in the log to write all + * potential buffers requested by this operation, we need to + * stall pending a log checkpoint to free some more log space. + */ + needed = atomic_add_return(total, &t->t_outstanding_credits); + if (needed > journal->j_max_transaction_buffers) { + /* + * If the current transaction is already too large, + * then start to commit it: we can then go back and + * attach this handle to a new transaction. + */ + atomic_sub(total, &t->t_outstanding_credits); + wait_transaction_locked(journal); + return 1; + } + + /* + * The commit code assumes that it can get enough log space + * without forcing a checkpoint. This is *critical* for + * correctness: a checkpoint of a buffer which is also + * associated with a committing transaction creates a deadlock, + * so commit simply cannot force through checkpoints. + * + * We must therefore ensure the necessary space in the journal + * *before* starting to dirty potentially checkpointed buffers + * in the new transaction. + */ + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) { + atomic_sub(total, &t->t_outstanding_credits); + read_unlock(&journal->j_state_lock); + write_lock(&journal->j_state_lock); + if (jbd2_log_space_left(journal) < jbd2_space_needed(journal)) + __jbd2_log_wait_for_space(journal); + write_unlock(&journal->j_state_lock); + return 1; + } + + /* No reservation? We are done... */ + if (!rsv_blocks) + return 0; + + needed = atomic_add_return(rsv_blocks, &journal->j_reserved_credits); + /* We allow at most half of a transaction to be reserved */ + if (needed > journal->j_max_transaction_buffers / 2) { + sub_reserved_credits(journal, rsv_blocks); + atomic_sub(total, &t->t_outstanding_credits); + read_unlock(&journal->j_state_lock); + wait_event(journal->j_wait_reserved, + atomic_read(&journal->j_reserved_credits) + rsv_blocks + <= journal->j_max_transaction_buffers / 2); + return 1; + } + return 0; +} + +/* * start_this_handle: Given a handle, deal with any locking or stalling * needed to make sure that there is enough journal space for the handle * to begin. Attach the handle to a transaction and set up the @@ -148,18 +258,24 @@ static int start_this_handle(journal_t *journal, handle_t *handle, gfp_t gfp_mask) { transaction_t *transaction, *new_transaction = NULL; - tid_t tid; - int needed, need_to_start; - int nblocks = handle->h_buffer_credits; + int blocks = handle->h_buffer_credits; + int rsv_blocks = 0; unsigned long ts = jiffies; - if (nblocks > journal->j_max_transaction_buffers) { + /* + * 1/2 of transaction can be reserved so we can practically handle + * only 1/2 of maximum transaction size per operation + */ + if (WARN_ON(blocks > journal->j_max_transaction_buffers / 2)) { printk(KERN_ERR "JBD2: %s wants too many credits (%d > %d)\n", - current->comm, nblocks, - journal->j_max_transaction_buffers); + current->comm, blocks, + journal->j_max_transaction_buffers / 2); return -ENOSPC; } + if (handle->h_rsv_handle) + rsv_blocks = handle->h_rsv_handle->h_buffer_credits; + alloc_transaction: if (!journal->j_running_transaction) { new_transaction = kmem_cache_zalloc(transaction_cache, @@ -196,8 +312,12 @@ repeat: return -EROFS; } - /* Wait on the journal's transaction barrier if necessary */ - if (journal->j_barrier_count) { + /* + * Wait on the journal's transaction barrier if necessary. Specifically + * we allow reserved handles to proceed because otherwise commit could + * deadlock on page writeback not being able to complete. + */ + if (!handle->h_reserved && journal->j_barrier_count) { read_unlock(&journal->j_state_lock); wait_event(journal->j_wait_transaction_locked, journal->j_barrier_count == 0); @@ -210,7 +330,7 @@ repeat: goto alloc_transaction; write_lock(&journal->j_state_lock); if (!journal->j_running_transaction && - !journal->j_barrier_count) { + (handle->h_reserved || !journal->j_barrier_count)) { jbd2_get_transaction(journal, new_transaction); new_transaction = NULL; } @@ -220,85 +340,18 @@ repeat: transaction = journal->j_running_transaction; - /* - * If the current transaction is locked down for commit, wait for the - * lock to be released. - */ - if (transaction->t_state == T_LOCKED) { - DEFINE_WAIT(wait); - - prepare_to_wait(&journal->j_wait_transaction_locked, - &wait, TASK_UNINTERRUPTIBLE); - read_unlock(&journal->j_state_lock); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * If there is not enough space left in the log to write all potential - * buffers requested by this operation, we need to stall pending a log - * checkpoint to free some more log space. - */ - needed = atomic_add_return(nblocks, - &transaction->t_outstanding_credits); - - if (needed > journal->j_max_transaction_buffers) { + if (!handle->h_reserved) { + /* We may have dropped j_state_lock - restart in that case */ + if (add_transaction_credits(journal, blocks, rsv_blocks)) + goto repeat; + } else { /* - * If the current transaction is already too large, then start - * to commit it: we can then go back and attach this handle to - * a new transaction. + * We have handle reserved so we are allowed to join T_LOCKED + * transaction and we don't have to check for transaction size + * and journal space. */ - DEFINE_WAIT(wait); - - jbd_debug(2, "Handle %p starting new commit...\n", handle); - atomic_sub(nblocks, &transaction->t_outstanding_credits); - prepare_to_wait(&journal->j_wait_transaction_locked, &wait, - TASK_UNINTERRUPTIBLE); - tid = transaction->t_tid; - need_to_start = !tid_geq(journal->j_commit_request, tid); - read_unlock(&journal->j_state_lock); - if (need_to_start) - jbd2_log_start_commit(journal, tid); - schedule(); - finish_wait(&journal->j_wait_transaction_locked, &wait); - goto repeat; - } - - /* - * The commit code assumes that it can get enough log space - * without forcing a checkpoint. This is *critical* for - * correctness: a checkpoint of a buffer which is also - * associated with a committing transaction creates a deadlock, - * so commit simply cannot force through checkpoints. - * - * We must therefore ensure the necessary space in the journal - * *before* starting to dirty potentially checkpointed buffers - * in the new transaction. - * - * The worst part is, any transaction currently committing can - * reduce the free space arbitrarily. Be careful to account for - * those buffers when checkpointing. - */ - - /* - * @@@ AKPM: This seems rather over-defensive. We're giving commit - * a _lot_ of headroom: 1/4 of the journal plus the size of - * the committing transaction. Really, we only need to give it - * committing_transaction->t_outstanding_credits plus "enough" for - * the log control blocks. - * Also, this test is inconsistent with the matching one in - * jbd2_journal_extend(). - */ - if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { - jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); - atomic_sub(nblocks, &transaction->t_outstanding_credits); - read_unlock(&journal->j_state_lock); - write_lock(&journal->j_state_lock); - if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) - __jbd2_log_wait_for_space(journal); - write_unlock(&journal->j_state_lock); - goto repeat; + sub_reserved_credits(journal, blocks); + handle->h_reserved = 0; } /* OK, account for the buffers that this operation expects to @@ -306,13 +359,16 @@ repeat: */ update_t_max_wait(transaction, ts); handle->h_transaction = transaction; + handle->h_requested_credits = blocks; + handle->h_start_jiffies = jiffies; atomic_inc(&transaction->t_updates); atomic_inc(&transaction->t_handle_count); - jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", - handle, nblocks, + jbd_debug(4, "Handle %p given %d credits (total %d, free %lu)\n", + handle, blocks, atomic_read(&transaction->t_outstanding_credits), - __jbd2_log_space_left(journal)); + jbd2_log_space_left(journal)); read_unlock(&journal->j_state_lock); + current->journal_info = handle; lock_map_acquire(&handle->h_lockdep_map); jbd2_journal_free_transaction(new_transaction); @@ -327,7 +383,6 @@ static handle_t *new_handle(int nblocks) handle_t *handle = jbd2_alloc_handle(GFP_NOFS); if (!handle) return NULL; - memset(handle, 0, sizeof(*handle)); handle->h_buffer_credits = nblocks; handle->h_ref = 1; @@ -344,15 +399,21 @@ static handle_t *new_handle(int nblocks) * * We make sure that the transaction can guarantee at least nblocks of * modified buffers in the log. We block until the log can guarantee - * that much space. - * - * This function is visible to journal users (like ext3fs), so is not - * called with the journal already locked. + * that much space. Additionally, if rsv_blocks > 0, we also create another + * handle with rsv_blocks reserved blocks in the journal. This handle is + * is stored in h_rsv_handle. It is not attached to any particular transaction + * and thus doesn't block transaction commit. If the caller uses this reserved + * handle, it has to set h_rsv_handle to NULL as otherwise jbd2_journal_stop() + * on the parent handle will dispose the reserved one. Reserved handle has to + * be converted to a normal handle using jbd2_journal_start_reserved() before + * it can be used. * * Return a pointer to a newly allocated handle, or an ERR_PTR() value * on failure. */ -handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask) +handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int rsv_blocks, + gfp_t gfp_mask, unsigned int type, + unsigned int line_no) { handle_t *handle = journal_current_handle(); int err; @@ -369,15 +430,31 @@ handle_t *jbd2__journal_start(journal_t *journal, int nblocks, gfp_t gfp_mask) handle = new_handle(nblocks); if (!handle) return ERR_PTR(-ENOMEM); + if (rsv_blocks) { + handle_t *rsv_handle; - current->journal_info = handle; + rsv_handle = new_handle(rsv_blocks); + if (!rsv_handle) { + jbd2_free_handle(handle); + return ERR_PTR(-ENOMEM); + } + rsv_handle->h_reserved = 1; + rsv_handle->h_journal = journal; + handle->h_rsv_handle = rsv_handle; + } err = start_this_handle(journal, handle, gfp_mask); if (err < 0) { + if (handle->h_rsv_handle) + jbd2_free_handle(handle->h_rsv_handle); jbd2_free_handle(handle); - current->journal_info = NULL; - handle = ERR_PTR(err); + return ERR_PTR(err); } + handle->h_type = type; + handle->h_line_no = line_no; + trace_jbd2_handle_start(journal->j_fs_dev->bd_dev, + handle->h_transaction->t_tid, type, + line_no, nblocks); return handle; } EXPORT_SYMBOL(jbd2__journal_start); @@ -385,10 +462,67 @@ EXPORT_SYMBOL(jbd2__journal_start); handle_t *jbd2_journal_start(journal_t *journal, int nblocks) { - return jbd2__journal_start(journal, nblocks, GFP_NOFS); + return jbd2__journal_start(journal, nblocks, 0, GFP_NOFS, 0, 0); } EXPORT_SYMBOL(jbd2_journal_start); +void jbd2_journal_free_reserved(handle_t *handle) +{ + journal_t *journal = handle->h_journal; + + WARN_ON(!handle->h_reserved); + sub_reserved_credits(journal, handle->h_buffer_credits); + jbd2_free_handle(handle); +} +EXPORT_SYMBOL(jbd2_journal_free_reserved); + +/** + * int jbd2_journal_start_reserved(handle_t *handle) - start reserved handle + * @handle: handle to start + * + * Start handle that has been previously reserved with jbd2_journal_reserve(). + * This attaches @handle to the running transaction (or creates one if there's + * not transaction running). Unlike jbd2_journal_start() this function cannot + * block on journal commit, checkpointing, or similar stuff. It can block on + * memory allocation or frozen journal though. + * + * Return 0 on success, non-zero on error - handle is freed in that case. + */ +int jbd2_journal_start_reserved(handle_t *handle, unsigned int type, + unsigned int line_no) +{ + journal_t *journal = handle->h_journal; + int ret = -EIO; + + if (WARN_ON(!handle->h_reserved)) { + /* Someone passed in normal handle? Just stop it. */ + jbd2_journal_stop(handle); + return ret; + } + /* + * Usefulness of mixing of reserved and unreserved handles is + * questionable. So far nobody seems to need it so just error out. + */ + if (WARN_ON(current->journal_info)) { + jbd2_journal_free_reserved(handle); + return ret; + } + + handle->h_journal = NULL; + /* + * GFP_NOFS is here because callers are likely from writeback or + * similarly constrained call sites + */ + ret = start_this_handle(journal, handle, GFP_NOFS); + if (ret < 0) { + jbd2_journal_free_reserved(handle); + return ret; + } + handle->h_type = type; + handle->h_line_no = line_no; + return 0; +} +EXPORT_SYMBOL(jbd2_journal_start_reserved); /** * int jbd2_journal_extend() - extend buffer credits. @@ -413,42 +547,53 @@ EXPORT_SYMBOL(jbd2_journal_start); int jbd2_journal_extend(handle_t *handle, int nblocks) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; int result; int wanted; - result = -EIO; + WARN_ON(!transaction); if (is_handle_aborted(handle)) - goto out; + return -EROFS; + journal = transaction->t_journal; result = 1; read_lock(&journal->j_state_lock); /* Don't extend a locked-down transaction! */ - if (handle->h_transaction->t_state != T_RUNNING) { + if (transaction->t_state != T_RUNNING) { jbd_debug(3, "denied handle %p %d blocks: " "transaction not running\n", handle, nblocks); goto error_out; } spin_lock(&transaction->t_handle_lock); - wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; + wanted = atomic_add_return(nblocks, + &transaction->t_outstanding_credits); if (wanted > journal->j_max_transaction_buffers) { jbd_debug(3, "denied handle %p %d blocks: " "transaction too large\n", handle, nblocks); + atomic_sub(nblocks, &transaction->t_outstanding_credits); goto unlock; } - if (wanted > __jbd2_log_space_left(journal)) { + if (wanted + (wanted >> JBD2_CONTROL_BLOCKS_SHIFT) > + jbd2_log_space_left(journal)) { jbd_debug(3, "denied handle %p %d blocks: " "insufficient log space\n", handle, nblocks); + atomic_sub(nblocks, &transaction->t_outstanding_credits); goto unlock; } + trace_jbd2_handle_extend(journal->j_fs_dev->bd_dev, + transaction->t_tid, + handle->h_type, handle->h_line_no, + handle->h_buffer_credits, + nblocks); + handle->h_buffer_credits += nblocks; - atomic_add(nblocks, &transaction->t_outstanding_credits); + handle->h_requested_credits += nblocks; result = 0; jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); @@ -456,7 +601,6 @@ unlock: spin_unlock(&transaction->t_handle_lock); error_out: read_unlock(&journal->j_state_lock); -out: return result; } @@ -473,19 +617,22 @@ out: * to a running handle, a call to jbd2_journal_restart will commit the * handle's transaction so far and reattach the handle to a new * transaction capabable of guaranteeing the requested number of - * credits. + * credits. We preserve reserved handle if there's any attached to the + * passed in handle. */ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; tid_t tid; int need_to_start, ret; + WARN_ON(!transaction); /* If we've had an abort of any type, don't even think about * actually doing the restart! */ if (is_handle_aborted(handle)) return 0; + journal = transaction->t_journal; /* * First unlink the handle from its current transaction, and start the @@ -498,12 +645,18 @@ int jbd2__journal_restart(handle_t *handle, int nblocks, gfp_t gfp_mask) spin_lock(&transaction->t_handle_lock); atomic_sub(handle->h_buffer_credits, &transaction->t_outstanding_credits); + if (handle->h_rsv_handle) { + sub_reserved_credits(journal, + handle->h_rsv_handle->h_buffer_credits); + } if (atomic_dec_and_test(&transaction->t_updates)) wake_up(&journal->j_wait_updates); + tid = transaction->t_tid; spin_unlock(&transaction->t_handle_lock); + handle->h_transaction = NULL; + current->journal_info = NULL; jbd_debug(2, "restarting handle %p\n", handle); - tid = transaction->t_tid; need_to_start = !tid_geq(journal->j_commit_request, tid); read_unlock(&journal->j_state_lock); if (need_to_start) @@ -540,6 +693,14 @@ void jbd2_journal_lock_updates(journal_t *journal) write_lock(&journal->j_state_lock); ++journal->j_barrier_count; + /* Wait until there are no reserved handles */ + if (atomic_read(&journal->j_reserved_credits)) { + write_unlock(&journal->j_state_lock); + wait_event(journal->j_wait_reserved, + atomic_read(&journal->j_reserved_credits) == 0); + write_lock(&journal->j_state_lock); + } + /* Wait until there are no running updates */ while (1) { transaction_t *transaction = journal->j_running_transaction; @@ -602,6 +763,12 @@ static void warn_dirty_buffer(struct buffer_head *bh) bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); } +static int sleep_on_shadow_bh(void *word) +{ + io_schedule(); + return 0; +} + /* * If the buffer is already part of the current transaction, then there * is nothing we need to do. If it is already part of a prior @@ -617,16 +784,16 @@ do_get_write_access(handle_t *handle, struct journal_head *jh, int force_copy) { struct buffer_head *bh; - transaction_t *transaction; + transaction_t *transaction = handle->h_transaction; journal_t *journal; int error; char *frozen_buffer = NULL; int need_copy = 0; + unsigned long start_lock, time_lock; + WARN_ON(!transaction); if (is_handle_aborted(handle)) return -EROFS; - - transaction = handle->h_transaction; journal = transaction->t_journal; jbd_debug(5, "journal_head %p, force_copy %d\n", jh, force_copy); @@ -637,9 +804,16 @@ repeat: /* @@@ Need to check for errors here at some point. */ + start_lock = jiffies; lock_buffer(bh); jbd_lock_bh_state(bh); + /* If it takes too long to lock the buffer, trace it */ + time_lock = jbd2_time_diff(start_lock, jiffies); + if (time_lock > HZ/10) + trace_jbd2_lock_buffer_stall(bh->b_bdev->bd_dev, + jiffies_to_msecs(time_lock)); + /* We now hold the buffer lock so it is safe to query the buffer * state. Is the buffer dirty? * @@ -729,41 +903,29 @@ repeat: * journaled. If the primary copy is already going to * disk then we cannot do copy-out here. */ - if (jh->b_jlist == BJ_Shadow) { - DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); - wait_queue_head_t *wqh; - - wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); - + if (buffer_shadow(bh)) { JBUFFER_TRACE(jh, "on shadow: sleep"); jbd_unlock_bh_state(bh); - /* commit wakes up all shadow buffers after IO */ - for ( ; ; ) { - prepare_to_wait(wqh, &wait.wait, - TASK_UNINTERRUPTIBLE); - if (jh->b_jlist != BJ_Shadow) - break; - schedule(); - } - finish_wait(wqh, &wait.wait); + wait_on_bit(&bh->b_state, BH_Shadow, + sleep_on_shadow_bh, TASK_UNINTERRUPTIBLE); goto repeat; } - /* Only do the copy if the currently-owning transaction - * still needs it. If it is on the Forget list, the - * committing transaction is past that stage. The - * buffer had better remain locked during the kmalloc, - * but that should be true --- we hold the journal lock - * still and the buffer is already on the BUF_JOURNAL - * list so won't be flushed. + /* + * Only do the copy if the currently-owning transaction still + * needs it. If buffer isn't on BJ_Metadata list, the + * committing transaction is past that stage (here we use the + * fact that BH_Shadow is set under bh_state lock together with + * refiling to BJ_Shadow list and at this point we know the + * buffer doesn't have BH_Shadow set). * * Subtle point, though: if this is a get_undo_access, * then we will be relying on the frozen_data to contain * the new value of the committed_data record after the * transaction, so we HAVE to force the frozen_data copy - * in that case. */ - - if (jh->b_jlist != BJ_Forget || force_copy) { + * in that case. + */ + if (jh->b_jlist == BJ_Metadata || force_copy) { JBUFFER_TRACE(jh, "generate frozen data"); if (!frozen_buffer) { JBUFFER_TRACE(jh, "allocate memory for buffer"); @@ -772,7 +934,7 @@ repeat: jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!frozen_buffer) { - printk(KERN_EMERG + printk(KERN_ERR "%s: OOM for frozen_buffer\n", __func__); JBUFFER_TRACE(jh, "oom!"); @@ -890,14 +1052,16 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; struct journal_head *jh = jbd2_journal_add_journal_head(bh); int err; jbd_debug(5, "journal_head %p\n", jh); + WARN_ON(!transaction); err = -EROFS; if (is_handle_aborted(handle)) goto out; + journal = transaction->t_journal; err = 0; JBUFFER_TRACE(jh, "entry"); @@ -909,7 +1073,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) * reused here. */ jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); J_ASSERT_JH(jh, (jh->b_transaction == transaction || jh->b_transaction == NULL || (jh->b_transaction == journal->j_committing_transaction && @@ -932,12 +1095,14 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) jh->b_modified = 0; JBUFFER_TRACE(jh, "file as BJ_Reserved"); + spin_lock(&journal->j_list_lock); __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); } else if (jh->b_transaction == journal->j_committing_transaction) { /* first access by this transaction */ jh->b_modified = 0; JBUFFER_TRACE(jh, "set next transaction"); + spin_lock(&journal->j_list_lock); jh->b_next_transaction = transaction; } spin_unlock(&journal->j_list_lock); @@ -1004,7 +1169,7 @@ repeat: if (!jh->b_committed_data) { committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); if (!committed_data) { - printk(KERN_EMERG "%s: No memory for committed data\n", + printk(KERN_ERR "%s: No memory for committed data\n", __func__); err = -ENOMEM; goto out; @@ -1047,9 +1212,12 @@ out: void jbd2_journal_set_triggers(struct buffer_head *bh, struct jbd2_buffer_trigger_type *type) { - struct journal_head *jh = bh2jh(bh); + struct journal_head *jh = jbd2_journal_grab_journal_head(bh); + if (WARN_ON(!jh)) + return; jh->b_triggers = type; + jbd2_journal_put_journal_head(jh); } void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, @@ -1100,18 +1268,21 @@ void jbd2_buffer_abort_trigger(struct journal_head *jh, int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - struct journal_head *jh = bh2jh(bh); + journal_t *journal; + struct journal_head *jh; int ret = 0; - jbd_debug(5, "journal_head %p\n", jh); - JBUFFER_TRACE(jh, "entry"); + WARN_ON(!transaction); if (is_handle_aborted(handle)) - goto out; - if (!buffer_jbd(bh)) { + return -EROFS; + journal = transaction->t_journal; + jh = jbd2_journal_grab_journal_head(bh); + if (!jh) { ret = -EUCLEAN; goto out; } + jbd_debug(5, "journal_head %p\n", jh); + JBUFFER_TRACE(jh, "entry"); jbd_lock_bh_state(bh); @@ -1122,7 +1293,10 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) * once a transaction -bzzz */ jh->b_modified = 1; - J_ASSERT_JH(jh, handle->h_buffer_credits > 0); + if (handle->h_buffer_credits <= 0) { + ret = -ENOSPC; + goto out_unlock_bh; + } handle->h_buffer_credits--; } @@ -1137,9 +1311,9 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "fastpath"); if (unlikely(jh->b_transaction != journal->j_running_transaction)) { - printk(KERN_EMERG "JBD: %s: " + printk(KERN_ERR "JBD2: %s: " "jh->b_transaction (%llu, %p, %u) != " - "journal->j_running_transaction (%p, %u)", + "journal->j_running_transaction (%p, %u)\n", journal->j_devname, (unsigned long long) bh->b_blocknr, jh->b_transaction, @@ -1162,30 +1336,25 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) */ if (jh->b_transaction != transaction) { JBUFFER_TRACE(jh, "already on other transaction"); - if (unlikely(jh->b_transaction != - journal->j_committing_transaction)) { - printk(KERN_EMERG "JBD: %s: " - "jh->b_transaction (%llu, %p, %u) != " - "journal->j_committing_transaction (%p, %u)", + if (unlikely(((jh->b_transaction != + journal->j_committing_transaction)) || + (jh->b_next_transaction != transaction))) { + printk(KERN_ERR "jbd2_journal_dirty_metadata: %s: " + "bad jh for block %llu: " + "transaction (%p, %u), " + "jh->b_transaction (%p, %u), " + "jh->b_next_transaction (%p, %u), jlist %u\n", journal->j_devname, (unsigned long long) bh->b_blocknr, + transaction, transaction->t_tid, jh->b_transaction, - jh->b_transaction ? jh->b_transaction->t_tid : 0, - journal->j_committing_transaction, - journal->j_committing_transaction ? - journal->j_committing_transaction->t_tid : 0); - ret = -EINVAL; - } - if (unlikely(jh->b_next_transaction != transaction)) { - printk(KERN_EMERG "JBD: %s: " - "jh->b_next_transaction (%llu, %p, %u) != " - "transaction (%p, %u)", - journal->j_devname, - (unsigned long long) bh->b_blocknr, + jh->b_transaction ? + jh->b_transaction->t_tid : 0, jh->b_next_transaction, jh->b_next_transaction ? jh->b_next_transaction->t_tid : 0, - transaction, transaction->t_tid); + jh->b_jlist); + WARN_ON(1); ret = -EINVAL; } /* And this case is illegal: we can't reuse another @@ -1198,13 +1367,13 @@ int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) JBUFFER_TRACE(jh, "file as BJ_Metadata"); spin_lock(&journal->j_list_lock); - __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); + __jbd2_journal_file_buffer(jh, transaction, BJ_Metadata); spin_unlock(&journal->j_list_lock); out_unlock_bh: jbd_unlock_bh_state(bh); + jbd2_journal_put_journal_head(jh); out: JBUFFER_TRACE(jh, "exit"); - WARN_ON(ret); /* All errors are bugs, so dump the stack */ return ret; } @@ -1228,16 +1397,20 @@ out: int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; struct journal_head *jh; int drop_reserve = 0; int err = 0; int was_modified = 0; + WARN_ON(!transaction); + if (is_handle_aborted(handle)) + return -EROFS; + journal = transaction->t_journal; + BUFFER_TRACE(bh, "entry"); jbd_lock_bh_state(bh); - spin_lock(&journal->j_list_lock); if (!buffer_jbd(bh)) goto not_jbd; @@ -1260,7 +1433,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) */ jh->b_modified = 0; - if (jh->b_transaction == handle->h_transaction) { + if (jh->b_transaction == transaction) { J_ASSERT_JH(jh, !jh->b_frozen_data); /* If we are forgetting a buffer which is already part @@ -1290,6 +1463,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) * we know to remove the checkpoint after we commit. */ + spin_lock(&journal->j_list_lock); if (jh->b_cp_transaction) { __jbd2_journal_temp_unlink_buffer(jh); __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); @@ -1302,6 +1476,7 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) goto drop; } } + spin_unlock(&journal->j_list_lock); } else if (jh->b_transaction) { J_ASSERT_JH(jh, (jh->b_transaction == journal->j_committing_transaction)); @@ -1313,7 +1488,9 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) if (jh->b_next_transaction) { J_ASSERT(jh->b_next_transaction == transaction); + spin_lock(&journal->j_list_lock); jh->b_next_transaction = NULL; + spin_unlock(&journal->j_list_lock); /* * only drop a reference if this transaction modified @@ -1325,7 +1502,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) } not_jbd: - spin_unlock(&journal->j_list_lock); jbd_unlock_bh_state(bh); __brelse(bh); drop: @@ -1355,19 +1531,21 @@ drop: int jbd2_journal_stop(handle_t *handle) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; - int err, wait_for_commit = 0; + journal_t *journal; + int err = 0, wait_for_commit = 0; tid_t tid; pid_t pid; + if (!transaction) + goto free_and_exit; + journal = transaction->t_journal; + J_ASSERT(journal_current_handle() == handle); if (is_handle_aborted(handle)) err = -EIO; - else { + else J_ASSERT(atomic_read(&transaction->t_updates) > 0); - err = 0; - } if (--handle->h_ref > 0) { jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, @@ -1376,6 +1554,13 @@ int jbd2_journal_stop(handle_t *handle) } jbd_debug(4, "Handle %p going down\n", handle); + trace_jbd2_handle_stats(journal->j_fs_dev->bd_dev, + transaction->t_tid, + handle->h_type, handle->h_line_no, + jiffies - handle->h_start_jiffies, + handle->h_sync, handle->h_requested_credits, + (handle->h_requested_credits - + handle->h_buffer_credits)); /* * Implement synchronous transaction batching. If the handle @@ -1403,9 +1588,12 @@ int jbd2_journal_stop(handle_t *handle) * to perform a synchronous write. We do this to detect the * case where a single process is doing a stream of sync * writes. No point in waiting for joiners in that case. + * + * Setting max_batch_time to 0 disables this completely. */ pid = current->pid; - if (handle->h_sync && journal->j_last_sync_writer != pid) { + if (handle->h_sync && journal->j_last_sync_writer != pid && + journal->j_max_batch_time) { u64 commit_time, trans_time; journal->j_last_sync_writer = pid; @@ -1481,33 +1669,13 @@ int jbd2_journal_stop(handle_t *handle) lock_map_release(&handle->h_lockdep_map); + if (handle->h_rsv_handle) + jbd2_journal_free_reserved(handle->h_rsv_handle); +free_and_exit: jbd2_free_handle(handle); return err; } -/** - * int jbd2_journal_force_commit() - force any uncommitted transactions - * @journal: journal to force - * - * For synchronous operations: force any uncommitted transactions - * to disk. May seem kludgy, but it reuses all the handle batching - * code in a very simple manner. - */ -int jbd2_journal_force_commit(journal_t *journal) -{ - handle_t *handle; - int ret; - - handle = jbd2_journal_start(journal, 1); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - } else { - handle->h_sync = 1; - ret = jbd2_journal_stop(handle); - } - return ret; -} - /* * * List management code snippets: various functions for manipulating the @@ -1564,10 +1732,10 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh) * Remove a buffer from the appropriate transaction list. * * Note that this function can *change* the value of - * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, - * t_log_list or t_reserved_list. If the caller is holding onto a copy of one - * of these pointers, it could go bad. Generally the caller needs to re-read - * the pointer from the transaction_t. + * bh->b_transaction->t_buffers, t_forget, t_shadow_list, t_log_list or + * t_reserved_list. If the caller is holding onto a copy of one of these + * pointers, it could go bad. Generally the caller needs to re-read the + * pointer from the transaction_t. * * Called under j_list_lock. */ @@ -1597,15 +1765,9 @@ static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) case BJ_Forget: list = &transaction->t_forget; break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; case BJ_Shadow: list = &transaction->t_shadow_list; break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; case BJ_Reserved: list = &transaction->t_reserved_list; break; @@ -1660,11 +1822,11 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) if (buffer_locked(bh) || buffer_dirty(bh)) goto out; - if (jh->b_next_transaction != NULL) + if (jh->b_next_transaction != NULL || jh->b_transaction != NULL) goto out; spin_lock(&journal->j_list_lock); - if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { + if (jh->b_cp_transaction != NULL) { /* written-back checkpointed metadata buffer */ JBUFFER_TRACE(jh, "remove from checkpoint list"); __jbd2_journal_remove_checkpoint(jh); @@ -1997,18 +2159,23 @@ zap_buffer_unlocked: * void jbd2_journal_invalidatepage() * @journal: journal to use for flush... * @page: page to flush - * @offset: length of page to invalidate. + * @offset: start of the range to invalidate + * @length: length of the range to invalidate * - * Reap page buffers containing data after offset in page. Can return -EBUSY - * if buffers are part of the committing transaction and the page is straddling - * i_size. Caller then has to wait for current commit and try again. + * Reap page buffers containing data after in the specified range in page. + * Can return -EBUSY if buffers are part of the committing transaction and + * the page is straddling i_size. Caller then has to wait for current commit + * and try again. */ int jbd2_journal_invalidatepage(journal_t *journal, struct page *page, - unsigned long offset) + unsigned int offset, + unsigned int length) { struct buffer_head *head, *bh, *next; + unsigned int stop = offset + length; unsigned int curr_off = 0; + int partial_page = (offset || length < PAGE_CACHE_SIZE); int may_free = 1; int ret = 0; @@ -2017,6 +2184,8 @@ int jbd2_journal_invalidatepage(journal_t *journal, if (!page_has_buffers(page)) return 0; + BUG_ON(stop > PAGE_CACHE_SIZE || stop < length); + /* We will potentially be playing with lists other than just the * data lists (especially for journaled data mode), so be * cautious in our locking. */ @@ -2026,10 +2195,13 @@ int jbd2_journal_invalidatepage(journal_t *journal, unsigned int next_off = curr_off + bh->b_size; next = bh->b_this_page; + if (next_off > stop) + return 0; + if (offset <= curr_off) { /* This block is wholly outside the truncation point */ lock_buffer(bh); - ret = journal_unmap_buffer(journal, bh, offset > 0); + ret = journal_unmap_buffer(journal, bh, partial_page); unlock_buffer(bh); if (ret < 0) return ret; @@ -2040,7 +2212,7 @@ int jbd2_journal_invalidatepage(journal_t *journal, } while (bh != head); - if (!offset) { + if (!partial_page) { if (may_free && try_to_free_buffers(page)) J_ASSERT(!page_has_buffers(page)); } @@ -2101,15 +2273,9 @@ void __jbd2_journal_file_buffer(struct journal_head *jh, case BJ_Forget: list = &transaction->t_forget; break; - case BJ_IO: - list = &transaction->t_iobuf_list; - break; case BJ_Shadow: list = &transaction->t_shadow_list; break; - case BJ_LogCtl: - list = &transaction->t_log_list; - break; case BJ_Reserved: list = &transaction->t_reserved_list; break; @@ -2211,10 +2377,12 @@ void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) { transaction_t *transaction = handle->h_transaction; - journal_t *journal = transaction->t_journal; + journal_t *journal; + WARN_ON(!transaction); if (is_handle_aborted(handle)) - return -EIO; + return -EROFS; + journal = transaction->t_journal; jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, transaction->t_tid); |
