aboutsummaryrefslogtreecommitdiff
path: root/fs/jbd2
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2')
-rw-r--r--fs/jbd2/checkpoint.c28
-rw-r--r--fs/jbd2/commit.c61
-rw-r--r--fs/jbd2/journal.c149
-rw-r--r--fs/jbd2/transaction.c91
4 files changed, 186 insertions, 143 deletions
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6a79fd0a1a3..2c62c5aae82 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -97,10 +97,14 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
!buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+ /*
+ * Get our reference so that bh cannot be freed before
+ * we unlock it
+ */
+ get_bh(bh);
JBUFFER_TRACE(jh, "remove from checkpoint list");
ret = __jbd2_journal_remove_checkpoint(jh) + 1;
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
BUFFER_TRACE(bh, "release");
__brelse(bh);
} else {
@@ -223,8 +227,8 @@ restart:
spin_lock(&journal->j_list_lock);
goto restart;
}
+ get_bh(bh);
if (buffer_locked(bh)) {
- atomic_inc(&bh->b_count);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
@@ -243,7 +247,6 @@ restart:
*/
released = __jbd2_journal_remove_checkpoint(jh);
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
__brelse(bh);
}
@@ -284,7 +287,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
int ret = 0;
if (buffer_locked(bh)) {
- atomic_inc(&bh->b_count);
+ get_bh(bh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
wait_on_buffer(bh);
@@ -316,12 +319,12 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
ret = 1;
if (unlikely(buffer_write_io_error(bh)))
ret = -EIO;
+ get_bh(bh);
J_ASSERT_JH(jh, !buffer_jbddirty(bh));
BUFFER_TRACE(bh, "remove from checkpoint");
__jbd2_journal_remove_checkpoint(jh);
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
__brelse(bh);
} else {
/*
@@ -554,7 +557,8 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
/*
* journal_clean_one_cp_list
*
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and
+ * release them.
*
* Called with the journal locked.
* Called with j_list_lock held.
@@ -663,8 +667,8 @@ out:
* checkpoint lists.
*
* The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
*
- * This function is called with the journal locked.
* This function is called with j_list_lock held.
* This function is called with jbd_lock_bh_state(jh2bh(jh))
*/
@@ -684,13 +688,14 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
}
journal = transaction->t_journal;
+ JBUFFER_TRACE(jh, "removing from transaction");
__buffer_unlink(jh);
jh->b_cp_transaction = NULL;
+ jbd2_journal_put_journal_head(jh);
if (transaction->t_checkpoint_list != NULL ||
transaction->t_checkpoint_io_list != NULL)
goto out;
- JBUFFER_TRACE(jh, "transaction has no more buffers");
/*
* There is one special case to worry about: if we have just pulled the
@@ -701,10 +706,8 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
* The locking here around t_state is a bit sleazy.
* See the comment at the end of jbd2_journal_commit_transaction().
*/
- if (transaction->t_state != T_FINISHED) {
- JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+ if (transaction->t_state != T_FINISHED)
goto out;
- }
/* OK, that was the last buffer for the transaction: we can now
safely remove this transaction from the log */
@@ -723,7 +726,6 @@ int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
wake_up(&journal->j_wait_logspace);
ret = 1;
out:
- JBUFFER_TRACE(jh, "exit");
return ret;
}
@@ -742,6 +744,8 @@ void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ /* Get reference for checkpointing transaction */
+ jbd2_journal_grab_journal_head(jh2bh(jh));
jh->b_cp_transaction = transaction;
if (!transaction->t_checkpoint_list) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6e28000a4b2..eef6979821a 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -219,7 +219,6 @@ static int journal_submit_data_buffers(journal_t *journal,
ret = err;
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
- commit_transaction->t_flushed_data_blocks = 1;
clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
@@ -338,12 +337,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
* all outstanding updates to complete.
*/
-#ifdef COMMIT_STATS
- spin_lock(&journal->j_list_lock);
- summarise_journal_usage(journal);
- spin_unlock(&journal->j_list_lock);
-#endif
-
/* Do we need to erase the effects of a prior jbd2_journal_flush? */
if (journal->j_flags & JBD2_FLUSHED) {
jbd_debug(3, "super block updated\n");
@@ -678,12 +671,16 @@ start_journal_io:
err = 0;
}
+ write_lock(&journal->j_state_lock);
+ J_ASSERT(commit_transaction->t_state == T_COMMIT);
+ commit_transaction->t_state = T_COMMIT_DFLUSH;
+ write_unlock(&journal->j_state_lock);
/*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
* the commit record
*/
- if (commit_transaction->t_flushed_data_blocks &&
+ if (commit_transaction->t_need_data_flush &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
@@ -760,8 +757,13 @@ wait_for_iobuf:
required. */
JBUFFER_TRACE(jh, "file as BJ_Forget");
jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
- /* Wake up any transactions which were waiting for this
- IO to complete */
+ /*
+ * Wake up any transactions which were waiting for this IO to
+ * complete. The barrier must be here so that changes by
+ * jbd2_journal_file_buffer() take effect before wake_up_bit()
+ * does the waitqueue check.
+ */
+ smp_mb();
wake_up_bit(&bh->b_state, BH_Unshadow);
JBUFFER_TRACE(jh, "brelse shadowed buffer");
__brelse(bh);
@@ -800,6 +802,10 @@ wait_for_iobuf:
jbd2_journal_abort(journal, err);
jbd_debug(3, "JBD: commit phase 5\n");
+ write_lock(&journal->j_state_lock);
+ J_ASSERT(commit_transaction->t_state == T_COMMIT_DFLUSH);
+ commit_transaction->t_state = T_COMMIT_JFLUSH;
+ write_unlock(&journal->j_state_lock);
if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
@@ -842,10 +848,16 @@ restart_loop:
while (commit_transaction->t_forget) {
transaction_t *cp_transaction;
struct buffer_head *bh;
+ int try_to_free = 0;
jh = commit_transaction->t_forget;
spin_unlock(&journal->j_list_lock);
bh = jh2bh(jh);
+ /*
+ * Get a reference so that bh cannot be freed before we are
+ * done with it.
+ */
+ get_bh(bh);
jbd_lock_bh_state(bh);
J_ASSERT_JH(jh, jh->b_transaction == commit_transaction);
@@ -908,28 +920,27 @@ restart_loop:
__jbd2_journal_insert_checkpoint(jh, commit_transaction);
if (is_journal_aborted(journal))
clear_buffer_jbddirty(bh);
- JBUFFER_TRACE(jh, "refile for checkpoint writeback");
- __jbd2_journal_refile_buffer(jh);
- jbd_unlock_bh_state(bh);
} else {
J_ASSERT_BH(bh, !buffer_dirty(bh));
- /* The buffer on BJ_Forget list and not jbddirty means
+ /*
+ * The buffer on BJ_Forget list and not jbddirty means
* it has been freed by this transaction and hence it
* could not have been reallocated until this
* transaction has committed. *BUT* it could be
* reallocated once we have written all the data to
* disk and before we process the buffer on BJ_Forget
- * list. */
- JBUFFER_TRACE(jh, "refile or unfile freed buffer");
- __jbd2_journal_refile_buffer(jh);
- if (!jh->b_transaction) {
- jbd_unlock_bh_state(bh);
- /* needs a brelse */
- jbd2_journal_remove_journal_head(bh);
- release_buffer_page(bh);
- } else
- jbd_unlock_bh_state(bh);
+ * list.
+ */
+ if (!jh->b_next_transaction)
+ try_to_free = 1;
}
+ JBUFFER_TRACE(jh, "refile or unfile buffer");
+ __jbd2_journal_refile_buffer(jh);
+ jbd_unlock_bh_state(bh);
+ if (try_to_free)
+ release_buffer_page(bh); /* Drops bh reference */
+ else
+ __brelse(bh);
cond_resched_lock(&journal->j_list_lock);
}
spin_unlock(&journal->j_list_lock);
@@ -955,7 +966,7 @@ restart_loop:
jbd_debug(3, "JBD: commit phase 7\n");
- J_ASSERT(commit_transaction->t_state == T_COMMIT);
+ J_ASSERT(commit_transaction->t_state == T_COMMIT_JFLUSH);
commit_transaction->t_start = jiffies;
stats.run.rs_logging = jbd2_time_diff(stats.run.rs_logging,
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index e0ec3db1c39..0dfa5b598e6 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -479,9 +479,12 @@ int __jbd2_log_space_left(journal_t *journal)
int __jbd2_log_start_commit(journal_t *journal, tid_t target)
{
/*
- * Are we already doing a recent enough commit?
+ * The only transaction we can possibly wait upon is the
+ * currently running transaction (if it exists). Otherwise,
+ * the target tid must be an old one.
*/
- if (!tid_geq(journal->j_commit_request, target)) {
+ if (journal->j_running_transaction &&
+ journal->j_running_transaction->t_tid == target) {
/*
* We want a new commit: OK, mark the request and wakeup the
* commit thread. We do _not_ do the commit ourselves.
@@ -493,7 +496,15 @@ int __jbd2_log_start_commit(journal_t *journal, tid_t target)
journal->j_commit_sequence);
wake_up(&journal->j_wait_commit);
return 1;
- }
+ } else if (!tid_geq(journal->j_commit_request, target))
+ /* This should never happen, but if it does, preserve
+ the evidence before kjournald goes into a loop and
+ increments j_commit_sequence beyond all recognition. */
+ WARN_ONCE(1, "jbd: bad log_start_commit: %u %u %u %u\n",
+ journal->j_commit_request,
+ journal->j_commit_sequence,
+ target, journal->j_running_transaction ?
+ journal->j_running_transaction->t_tid : 0);
return 0;
}
@@ -577,6 +588,47 @@ int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
}
/*
+ * Return 1 if a given transaction has not yet sent barrier request
+ * connected with a transaction commit. If 0 is returned, transaction
+ * may or may not have sent the barrier. Used to avoid sending barrier
+ * twice in common cases.
+ */
+int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
+{
+ int ret = 0;
+ transaction_t *commit_trans;
+
+ if (!(journal->j_flags & JBD2_BARRIER))
+ return 0;
+ read_lock(&journal->j_state_lock);
+ /* Transaction already committed? */
+ if (tid_geq(journal->j_commit_sequence, tid))
+ goto out;
+ commit_trans = journal->j_committing_transaction;
+ if (!commit_trans || commit_trans->t_tid != tid) {
+ ret = 1;
+ goto out;
+ }
+ /*
+ * Transaction is being committed and we already proceeded to
+ * submitting a flush to fs partition?
+ */
+ if (journal->j_fs_dev != journal->j_dev) {
+ if (!commit_trans->t_need_data_flush ||
+ commit_trans->t_state >= T_COMMIT_DFLUSH)
+ goto out;
+ } else {
+ if (commit_trans->t_state >= T_COMMIT_JFLUSH)
+ goto out;
+ }
+ ret = 1;
+out:
+ read_unlock(&journal->j_state_lock);
+ return ret;
+}
+EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
+
+/*
* Wait for a specified commit to complete.
* The caller may not hold the journal lock.
*/
@@ -2026,10 +2078,9 @@ static void journal_free_journal_head(struct journal_head *jh)
* When a buffer has its BH_JBD bit set it is immune from being released by
* core kernel code, mainly via ->b_count.
*
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
*
* Various places in the kernel want to attach a journal_head to a buffer_head
* _before_ attaching the journal_head to a transaction. To protect the
@@ -2042,17 +2093,16 @@ static void journal_free_journal_head(struct journal_head *jh)
* (Attach a journal_head if needed. Increments b_jcount)
* struct journal_head *jh = jbd2_journal_add_journal_head(bh);
* ...
+ * (Get another reference for transaction)
+ * jbd2_journal_grab_journal_head(bh);
* jh->b_transaction = xxx;
+ * (Put original reference)
* jbd2_journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
*/
/*
* Give a buffer_head a journal_head.
*
- * Doesn't need the journal lock.
* May sleep.
*/
struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
@@ -2116,61 +2166,29 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
struct journal_head *jh = bh2jh(bh);
J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
- get_bh(bh);
- if (jh->b_jcount == 0) {
- if (jh->b_transaction == NULL &&
- jh->b_next_transaction == NULL &&
- jh->b_cp_transaction == NULL) {
- J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
- J_ASSERT_BH(bh, buffer_jbd(bh));
- J_ASSERT_BH(bh, jh2bh(jh) == bh);
- BUFFER_TRACE(bh, "remove journal_head");
- if (jh->b_frozen_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_frozen_data\n",
- __func__);
- jbd2_free(jh->b_frozen_data, bh->b_size);
- }
- if (jh->b_committed_data) {
- printk(KERN_WARNING "%s: freeing "
- "b_committed_data\n",
- __func__);
- jbd2_free(jh->b_committed_data, bh->b_size);
- }
- bh->b_private = NULL;
- jh->b_bh = NULL; /* debug, really */
- clear_buffer_jbd(bh);
- __brelse(bh);
- journal_free_journal_head(jh);
- } else {
- BUFFER_TRACE(bh, "journal_head was locked");
- }
+ J_ASSERT_JH(jh, jh->b_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+ J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+ J_ASSERT_BH(bh, buffer_jbd(bh));
+ J_ASSERT_BH(bh, jh2bh(jh) == bh);
+ BUFFER_TRACE(bh, "remove journal_head");
+ if (jh->b_frozen_data) {
+ printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+ jbd2_free(jh->b_frozen_data, bh->b_size);
}
+ if (jh->b_committed_data) {
+ printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+ jbd2_free(jh->b_committed_data, bh->b_size);
+ }
+ bh->b_private = NULL;
+ jh->b_bh = NULL; /* debug, really */
+ clear_buffer_jbd(bh);
+ journal_free_journal_head(jh);
}
/*
- * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head. If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
- * time. Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void jbd2_journal_remove_journal_head(struct buffer_head *bh)
-{
- jbd_lock_bh_journal_head(bh);
- __journal_remove_journal_head(bh);
- jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head. If it fell to zero then try to
+ * Drop a reference on the passed journal_head. If it fell to zero then
* release the journal_head from the buffer_head.
*/
void jbd2_journal_put_journal_head(struct journal_head *jh)
@@ -2180,11 +2198,12 @@ void jbd2_journal_put_journal_head(struct journal_head *jh)
jbd_lock_bh_journal_head(bh);
J_ASSERT_JH(jh, jh->b_jcount > 0);
--jh->b_jcount;
- if (!jh->b_jcount && !jh->b_transaction) {
+ if (!jh->b_jcount) {
__journal_remove_journal_head(bh);
+ jbd_unlock_bh_journal_head(bh);
__brelse(bh);
- }
- jbd_unlock_bh_journal_head(bh);
+ } else
+ jbd_unlock_bh_journal_head(bh);
}
/*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 05fa77a2371..2d7109414cd 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,7 @@
#include <linux/module.h>
static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
/*
* jbd2_get_transaction: obtain a new transaction_t object.
@@ -82,7 +83,7 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
*/
/*
- * Update transiaction's maximum wait time, if debugging is enabled.
+ * Update transaction's maximum wait time, if debugging is enabled.
*
* In order for t_max_wait to be reliable, it must be protected by a
* lock. But doing so will mean that start_this_handle() can not be
@@ -91,11 +92,10 @@ jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
* means that maximum wait time reported by the jbd2_run_stats
* tracepoint will always be zero.
*/
-static inline void update_t_max_wait(transaction_t *transaction)
+static inline void update_t_max_wait(transaction_t *transaction,
+ unsigned long ts)
{
#ifdef CONFIG_JBD2_DEBUG
- unsigned long ts = jiffies;
-
if (jbd2_journal_enable_debug &&
time_after(transaction->t_start, ts)) {
ts = jbd2_time_diff(ts, transaction->t_start);
@@ -121,6 +121,7 @@ static int start_this_handle(journal_t *journal, handle_t *handle,
tid_t tid;
int needed, need_to_start;
int nblocks = handle->h_buffer_credits;
+ unsigned long ts = jiffies;
if (nblocks > journal->j_max_transaction_buffers) {
printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
@@ -271,7 +272,7 @@ repeat:
/* OK, account for the buffers that this operation expects to
* use and add the handle to the running transaction.
*/
- update_t_max_wait(transaction);
+ update_t_max_wait(transaction, ts);
handle->h_transaction = transaction;
atomic_inc(&transaction->t_updates);
atomic_inc(&transaction->t_handle_count);
@@ -316,7 +317,8 @@ static handle_t *new_handle(int nblocks)
* This function is visible to journal users (like ext3fs), so is not
* called with the journal already locked.
*
- * Return a pointer to a newly allocated handle, or NULL on failure
+ * Return a pointer to a newly allocated handle, or an ERR_PTR() value
+ * on failure.
*/
handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask)
{
@@ -763,7 +765,6 @@ repeat:
if (!jh->b_transaction) {
JBUFFER_TRACE(jh, "no transaction");
J_ASSERT_JH(jh, !jh->b_next_transaction);
- jh->b_transaction = transaction;
JBUFFER_TRACE(jh, "file as BJ_Reserved");
spin_lock(&journal->j_list_lock);
__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -813,7 +814,6 @@ out:
* int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
* @handle: transaction to add buffer modifications to
* @bh: bh to be used for metadata writes
- * @credits: variable that will receive credits for the buffer
*
* Returns an error code or 0 on success.
*
@@ -895,8 +895,6 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
* committed and so it's safe to clear the dirty bit.
*/
clear_buffer_dirty(jh2bh(jh));
- jh->b_transaction = transaction;
-
/* first access by this transaction */
jh->b_modified = 0;
@@ -921,8 +919,8 @@ int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
*/
JBUFFER_TRACE(jh, "cancelling revoke");
jbd2_journal_cancel_revoke(handle, jh);
- jbd2_journal_put_journal_head(jh);
out:
+ jbd2_journal_put_journal_head(jh);
return err;
}
@@ -931,7 +929,6 @@ out:
* non-rewindable consequences
* @handle: transaction
* @bh: buffer to undo
- * @credits: store the number of taken credits here (if not NULL)
*
* Sometimes there is a need to distinguish between metadata which has
* been committed to disk and that which has not. The ext3fs code uses
@@ -1231,8 +1228,6 @@ int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
} else {
__jbd2_journal_unfile_buffer(jh);
- jbd2_journal_remove_journal_head(bh);
- __brelse(bh);
if (!buffer_jbd(bh)) {
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
@@ -1555,19 +1550,32 @@ void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
mark_buffer_dirty(bh); /* Expose it to the VM */
}
-void __jbd2_journal_unfile_buffer(struct journal_head *jh)
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
{
__jbd2_journal_temp_unlink_buffer(jh);
jh->b_transaction = NULL;
+ jbd2_journal_put_journal_head(jh);
}
void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
{
- jbd_lock_bh_state(jh2bh(jh));
+ struct buffer_head *bh = jh2bh(jh);
+
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
+ jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
__jbd2_journal_unfile_buffer(jh);
spin_unlock(&journal->j_list_lock);
- jbd_unlock_bh_state(jh2bh(jh));
+ jbd_unlock_bh_state(bh);
+ __brelse(bh);
}
/*
@@ -1594,8 +1602,6 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
if (jh->b_jlist == BJ_None) {
JBUFFER_TRACE(jh, "remove from checkpoint list");
__jbd2_journal_remove_checkpoint(jh);
- jbd2_journal_remove_journal_head(bh);
- __brelse(bh);
}
}
spin_unlock(&journal->j_list_lock);
@@ -1658,7 +1664,6 @@ int jbd2_journal_try_to_free_buffers(journal_t *journal,
/*
* We take our own ref against the journal_head here to avoid
* having to add tons of locking around each instance of
- * jbd2_journal_remove_journal_head() and
* jbd2_journal_put_journal_head().
*/
jh = jbd2_journal_grab_journal_head(bh);
@@ -1696,10 +1701,9 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
int may_free = 1;
struct buffer_head *bh = jh2bh(jh);
- __jbd2_journal_unfile_buffer(jh);
-
if (jh->b_cp_transaction) {
JBUFFER_TRACE(jh, "on running+cp transaction");
+ __jbd2_journal_temp_unlink_buffer(jh);
/*
* We don't want to write the buffer anymore, clear the
* bit so that we don't confuse checks in
@@ -1710,8 +1714,7 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
may_free = 0;
} else {
JBUFFER_TRACE(jh, "on running transaction");
- jbd2_journal_remove_journal_head(bh);
- __brelse(bh);
+ __jbd2_journal_unfile_buffer(jh);
}
return may_free;
}
@@ -1989,6 +1992,8 @@ void __jbd2_journal_file_buffer(struct journal_head *jh,
if (jh->b_transaction)
__jbd2_journal_temp_unlink_buffer(jh);
+ else
+ jbd2_journal_grab_journal_head(bh);
jh->b_transaction = transaction;
switch (jlist) {
@@ -2040,9 +2045,10 @@ void jbd2_journal_file_buffer(struct journal_head *jh,
* already started to be used by a subsequent transaction, refile the
* buffer on that transaction's metadata list.
*
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
* Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
*/
void __jbd2_journal_refile_buffer(struct journal_head *jh)
{
@@ -2066,6 +2072,11 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
was_dirty = test_clear_buffer_jbddirty(bh);
__jbd2_journal_temp_unlink_buffer(jh);
+ /*
+ * We set b_transaction here because b_next_transaction will inherit
+ * our jh reference and thus __jbd2_journal_file_buffer() must not
+ * take a new one.
+ */
jh->b_transaction = jh->b_next_transaction;
jh->b_next_transaction = NULL;
if (buffer_freed(bh))
@@ -2082,30 +2093,21 @@ void __jbd2_journal_refile_buffer(struct journal_head *jh)
}
/*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
- *
- * __jbd2_journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists. In that case it is up
- * to the caller to remove the journal_head if necessary. For the
- * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * __jbd2_journal_refile_buffer() with necessary locking added. We take our
+ * bh reference so that we can safely unlock bh.
+ *
+ * The jh and bh may be freed by this call.
*/
void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
{
struct buffer_head *bh = jh2bh(jh);
+ /* Get reference so that buffer cannot be freed before we unlock it */
+ get_bh(bh);
jbd_lock_bh_state(bh);
spin_lock(&journal->j_list_lock);
-
__jbd2_journal_refile_buffer(jh);
jbd_unlock_bh_state(bh);
- jbd2_journal_remove_journal_head(bh);
-
spin_unlock(&journal->j_list_lock);
__brelse(bh);
}
@@ -2147,6 +2149,13 @@ int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode)
jinode->i_next_transaction == transaction)
goto done;
+ /*
+ * We only ever set this variable to 1 so the test is safe. Since
+ * t_need_data_flush is likely to be set, we do the test to save some
+ * cacheline bouncing
+ */
+ if (!transaction->t_need_data_flush)
+ transaction->t_need_data_flush = 1;
/* On some different transaction's list - should be
* the committing one */
if (jinode->i_transaction) {