aboutsummaryrefslogtreecommitdiff
path: root/fs/jbd2/recovery.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/jbd2/recovery.c')
-rw-r--r--fs/jbd2/recovery.c332
1 files changed, 292 insertions, 40 deletions
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b50be8a044e..3b6bb19d60b 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -20,7 +20,8 @@
#include <linux/fs.h>
#include <linux/jbd2.h>
#include <linux/errno.h>
-#include <linux/slab.h>
+#include <linux/crc32.h>
+#include <linux/blkdev.h>
#endif
/*
@@ -89,7 +90,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
err = jbd2_journal_bmap(journal, next, &blocknr);
if (err) {
- printk (KERN_ERR "JBD: bad block at offset %u\n",
+ printk(KERN_ERR "JBD2: bad block at offset %u\n",
next);
goto failed;
}
@@ -138,14 +139,14 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
*bhp = NULL;
if (offset >= journal->j_maxlen) {
- printk(KERN_ERR "JBD: corrupted journal superblock\n");
+ printk(KERN_ERR "JBD2: corrupted journal superblock\n");
return -EIO;
}
err = jbd2_journal_bmap(journal, offset, &blocknr);
if (err) {
- printk (KERN_ERR "JBD: bad block at offset %u\n",
+ printk(KERN_ERR "JBD2: bad block at offset %u\n",
offset);
return err;
}
@@ -163,7 +164,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
}
if (!buffer_uptodate(bh)) {
- printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
+ printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
offset);
brelse(bh);
return -EIO;
@@ -173,6 +174,25 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
return 0;
}
+static int jbd2_descr_block_csum_verify(journal_t *j,
+ void *buf)
+{
+ struct jbd2_journal_block_tail *tail;
+ __be32 provided;
+ __u32 calculated;
+
+ if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ return 1;
+
+ tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize -
+ sizeof(struct jbd2_journal_block_tail));
+ provided = tail->t_checksum;
+ tail->t_checksum = 0;
+ calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+ tail->t_checksum = provided;
+
+ return provided == cpu_to_be32(calculated);
+}
/*
* Count the number of in-use tags in a journal descriptor block.
@@ -185,6 +205,9 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
int nr = 0, size = journal->j_blocksize;
int tag_bytes = journal_tag_bytes(journal);
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ size -= sizeof(struct jbd2_journal_block_tail);
+
tagp = &bh->b_data[sizeof(journal_header_t)];
while ((tagp - bh->b_data + tag_bytes) <= size) {
@@ -192,10 +215,10 @@ static int count_tags(journal_t *journal, struct buffer_head *bh)
nr++;
tagp += tag_bytes;
- if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
+ if (!(tag->t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
tagp += 16;
- if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
+ if (tag->t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
break;
}
@@ -224,7 +247,7 @@ do { \
*/
int jbd2_journal_recover(journal_t *journal)
{
- int err;
+ int err, err2;
journal_superblock_t * sb;
struct recovery_info info;
@@ -251,10 +274,10 @@ int jbd2_journal_recover(journal_t *journal)
if (!err)
err = do_one_pass(journal, &info, PASS_REPLAY);
- jbd_debug(1, "JBD: recovery, exit status %d, "
+ jbd_debug(1, "JBD2: recovery, exit status %d, "
"recovered transactions %u to %u\n",
err, info.start_transaction, info.end_transaction);
- jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n",
+ jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
/* Restart the log at the next transaction ID, thus invalidating
@@ -262,7 +285,15 @@ int jbd2_journal_recover(journal_t *journal)
journal->j_transaction_sequence = ++info.end_transaction;
jbd2_journal_clear_revoke(journal);
- sync_blockdev(journal->j_fs_dev);
+ err2 = sync_blockdev(journal->j_fs_dev);
+ if (!err)
+ err = err2;
+ /* Make sure all replayed data is on permanent storage */
+ if (journal->j_flags & JBD2_BARRIER) {
+ err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
+ if (!err)
+ err = err2;
+ }
return err;
}
@@ -282,25 +313,24 @@ int jbd2_journal_recover(journal_t *journal)
int jbd2_journal_skip_recovery(journal_t *journal)
{
int err;
- journal_superblock_t * sb;
struct recovery_info info;
memset (&info, 0, sizeof(info));
- sb = journal->j_superblock;
err = do_one_pass(journal, &info, PASS_SCAN);
if (err) {
- printk(KERN_ERR "JBD: error %d scanning journal\n", err);
+ printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
++journal->j_transaction_sequence;
} else {
#ifdef CONFIG_JBD2_DEBUG
- int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
-#endif
+ int dropped = info.end_transaction -
+ be32_to_cpu(journal->j_superblock->s_sequence);
jbd_debug(1,
- "JBD: ignoring %d transaction%s from the journal.\n",
+ "JBD2: ignoring %d transaction%s from the journal.\n",
dropped, (dropped == 1) ? "" : "s");
+#endif
journal->j_transaction_sequence = ++info.end_transaction;
}
@@ -311,11 +341,77 @@ int jbd2_journal_skip_recovery(journal_t *journal)
static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
{
unsigned long long block = be32_to_cpu(tag->t_blocknr);
- if (tag_bytes > JBD_TAG_SIZE32)
+ if (tag_bytes > JBD2_TAG_SIZE32)
block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
return block;
}
+/*
+ * calc_chksums calculates the checksums for the blocks described in the
+ * descriptor block.
+ */
+static int calc_chksums(journal_t *journal, struct buffer_head *bh,
+ unsigned long *next_log_block, __u32 *crc32_sum)
+{
+ int i, num_blks, err;
+ unsigned long io_block;
+ struct buffer_head *obh;
+
+ num_blks = count_tags(journal, bh);
+ /* Calculate checksum of the descriptor block. */
+ *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
+
+ for (i = 0; i < num_blks; i++) {
+ io_block = (*next_log_block)++;
+ wrap(journal, *next_log_block);
+ err = jread(&obh, journal, io_block);
+ if (err) {
+ printk(KERN_ERR "JBD2: IO error %d recovering block "
+ "%lu in log\n", err, io_block);
+ return 1;
+ } else {
+ *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
+ obh->b_size);
+ }
+ put_bh(obh);
+ }
+ return 0;
+}
+
+static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
+{
+ struct commit_header *h;
+ __be32 provided;
+ __u32 calculated;
+
+ if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ return 1;
+
+ h = buf;
+ provided = h->h_chksum[0];
+ h->h_chksum[0] = 0;
+ calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+ h->h_chksum[0] = provided;
+
+ return provided == cpu_to_be32(calculated);
+}
+
+static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
+ void *buf, __u32 sequence)
+{
+ __u32 csum32;
+ __be32 seq;
+
+ if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ return 1;
+
+ seq = cpu_to_be32(sequence);
+ csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
+ csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
+
+ return tag->t_checksum == cpu_to_be16(csum32);
+}
+
static int do_one_pass(journal_t *journal,
struct recovery_info *info, enum passtype pass)
{
@@ -328,11 +424,8 @@ static int do_one_pass(journal_t *journal,
unsigned int sequence;
int blocktype;
int tag_bytes = journal_tag_bytes(journal);
-
- /* Precompute the maximum metadata descriptors in a descriptor block */
- int MAX_BLOCKS_PER_DESC;
- MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
- / tag_bytes);
+ __u32 crc32_sum = ~0; /* Transactional Checksums */
+ int descr_csum_size = 0;
/*
* First thing is to establish what we expect to find in the log
@@ -364,7 +457,7 @@ static int do_one_pass(journal_t *journal,
struct buffer_head * obh;
struct buffer_head * nbh;
- cond_resched(); /* We're under lock_kernel() */
+ cond_resched();
/* If we already know where to stop the log traversal,
* check right now that we haven't gone past the end of
@@ -381,7 +474,7 @@ static int do_one_pass(journal_t *journal,
* either the next descriptor block or the final commit
* record. */
- jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+ jbd_debug(3, "JBD2: checking block %ld\n", next_log_block);
err = jread(&bh, journal, next_log_block);
if (err)
goto failed;
@@ -418,13 +511,39 @@ static int do_one_pass(journal_t *journal,
switch(blocktype) {
case JBD2_DESCRIPTOR_BLOCK:
+ /* Verify checksum first */
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ descr_csum_size =
+ sizeof(struct jbd2_journal_block_tail);
+ if (descr_csum_size > 0 &&
+ !jbd2_descr_block_csum_verify(journal,
+ bh->b_data)) {
+ err = -EIO;
+ goto failed;
+ }
+
/* If it is a valid descriptor block, replay it
- * in pass REPLAY; otherwise, just skip over the
- * blocks it describes. */
+ * in pass REPLAY; if journal_checksums enabled, then
+ * calculate checksums in PASS_SCAN, otherwise,
+ * just skip over the blocks it describes. */
if (pass != PASS_REPLAY) {
+ if (pass == PASS_SCAN &&
+ JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM) &&
+ !info->end_transaction) {
+ if (calc_chksums(journal, bh,
+ &next_log_block,
+ &crc32_sum)) {
+ put_bh(bh);
+ break;
+ }
+ put_bh(bh);
+ continue;
+ }
next_log_block += count_tags(journal, bh);
wrap(journal, next_log_block);
- brelse(bh);
+ put_bh(bh);
continue;
}
@@ -434,11 +553,11 @@ static int do_one_pass(journal_t *journal,
tagp = &bh->b_data[sizeof(journal_header_t)];
while ((tagp - bh->b_data + tag_bytes)
- <= journal->j_blocksize) {
+ <= journal->j_blocksize - descr_csum_size) {
unsigned long io_block;
tag = (journal_block_tag_t *) tagp;
- flags = be32_to_cpu(tag->t_flags);
+ flags = be16_to_cpu(tag->t_flags);
io_block = next_log_block++;
wrap(journal, next_log_block);
@@ -447,8 +566,8 @@ static int do_one_pass(journal_t *journal,
/* Recover what we can, but
* report failure at the end. */
success = err;
- printk (KERN_ERR
- "JBD: IO error %d recovering "
+ printk(KERN_ERR
+ "JBD2: IO error %d recovering "
"block %ld in log\n",
err, io_block);
} else {
@@ -469,6 +588,19 @@ static int do_one_pass(journal_t *journal,
goto skip_write;
}
+ /* Look for block corruption */
+ if (!jbd2_block_tag_csum_verify(
+ journal, tag, obh->b_data,
+ be32_to_cpu(tmp->h_sequence))) {
+ brelse(obh);
+ success = -EIO;
+ printk(KERN_ERR "JBD2: Invalid "
+ "checksum recovering "
+ "block %llu in log\n",
+ blocknr);
+ continue;
+ }
+
/* Find a buffer for the new
* data being restored */
nbh = __getblk(journal->j_fs_dev,
@@ -476,7 +608,7 @@ static int do_one_pass(journal_t *journal,
journal->j_blocksize);
if (nbh == NULL) {
printk(KERN_ERR
- "JBD: Out of memory "
+ "JBD2: Out of memory "
"during recovery.\n");
err = -ENOMEM;
brelse(bh);
@@ -488,7 +620,7 @@ static int do_one_pass(journal_t *journal,
memcpy(nbh->b_data, obh->b_data,
journal->j_blocksize);
if (flags & JBD2_FLAG_ESCAPE) {
- *((__be32 *)bh->b_data) =
+ *((__be32 *)nbh->b_data) =
cpu_to_be32(JBD2_MAGIC_NUMBER);
}
@@ -516,9 +648,106 @@ static int do_one_pass(journal_t *journal,
continue;
case JBD2_COMMIT_BLOCK:
- /* Found an expected commit block: not much to
- * do other than move on to the next sequence
+ /* How to differentiate between interrupted commit
+ * and journal corruption ?
+ *
+ * {nth transaction}
+ * Checksum Verification Failed
+ * |
+ * ____________________
+ * | |
+ * async_commit sync_commit
+ * | |
+ * | GO TO NEXT "Journal Corruption"
+ * | TRANSACTION
+ * |
+ * {(n+1)th transanction}
+ * |
+ * _______|______________
+ * | |
+ * Commit block found Commit block not found
+ * | |
+ * "Journal Corruption" |
+ * _____________|_________
+ * | |
+ * nth trans corrupt OR nth trans
+ * and (n+1)th interrupted interrupted
+ * before commit block
+ * could reach the disk.
+ * (Cannot find the difference in above
+ * mentioned conditions. Hence assume
+ * "Interrupted Commit".)
+ */
+
+ /* Found an expected commit block: if checksums
+ * are present verify them in PASS_SCAN; else not
+ * much to do other than move on to the next sequence
* number. */
+ if (pass == PASS_SCAN &&
+ JBD2_HAS_COMPAT_FEATURE(journal,
+ JBD2_FEATURE_COMPAT_CHECKSUM)) {
+ int chksum_err, chksum_seen;
+ struct commit_header *cbh =
+ (struct commit_header *)bh->b_data;
+ unsigned found_chksum =
+ be32_to_cpu(cbh->h_chksum[0]);
+
+ chksum_err = chksum_seen = 0;
+
+ if (info->end_transaction) {
+ journal->j_failed_commit =
+ info->end_transaction;
+ brelse(bh);
+ break;
+ }
+
+ if (crc32_sum == found_chksum &&
+ cbh->h_chksum_type == JBD2_CRC32_CHKSUM &&
+ cbh->h_chksum_size ==
+ JBD2_CRC32_CHKSUM_SIZE)
+ chksum_seen = 1;
+ else if (!(cbh->h_chksum_type == 0 &&
+ cbh->h_chksum_size == 0 &&
+ found_chksum == 0 &&
+ !chksum_seen))
+ /*
+ * If fs is mounted using an old kernel and then
+ * kernel with journal_chksum is used then we
+ * get a situation where the journal flag has
+ * checksum flag set but checksums are not
+ * present i.e chksum = 0, in the individual
+ * commit blocks.
+ * Hence to avoid checksum failures, in this
+ * situation, this extra check is added.
+ */
+ chksum_err = 1;
+
+ if (chksum_err) {
+ info->end_transaction = next_commit_ID;
+
+ if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)){
+ journal->j_failed_commit =
+ next_commit_ID;
+ brelse(bh);
+ break;
+ }
+ }
+ crc32_sum = ~0;
+ }
+ if (pass == PASS_SCAN &&
+ !jbd2_commit_block_csum_verify(journal,
+ bh->b_data)) {
+ info->end_transaction = next_commit_ID;
+
+ if (!JBD2_HAS_INCOMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
+ journal->j_failed_commit =
+ next_commit_ID;
+ brelse(bh);
+ break;
+ }
+ }
brelse(bh);
next_commit_ID++;
continue;
@@ -554,13 +783,14 @@ static int do_one_pass(journal_t *journal,
* transaction marks the end of the valid log.
*/
- if (pass == PASS_SCAN)
- info->end_transaction = next_commit_ID;
- else {
+ if (pass == PASS_SCAN) {
+ if (!info->end_transaction)
+ info->end_transaction = next_commit_ID;
+ } else {
/* It's really bad news if different passes end up at
* different places (but possible due to IO errors). */
if (info->end_transaction != next_commit_ID) {
- printk (KERN_ERR "JBD: recovery pass %d ended at "
+ printk(KERN_ERR "JBD2: recovery pass %d ended at "
"transaction %u, expected %u\n",
pass, next_commit_ID, info->end_transaction);
if (!success)
@@ -574,6 +804,25 @@ static int do_one_pass(journal_t *journal,
return err;
}
+static int jbd2_revoke_block_csum_verify(journal_t *j,
+ void *buf)
+{
+ struct jbd2_journal_revoke_tail *tail;
+ __be32 provided;
+ __u32 calculated;
+
+ if (!JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2))
+ return 1;
+
+ tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
+ sizeof(struct jbd2_journal_revoke_tail));
+ provided = tail->r_checksum;
+ tail->r_checksum = 0;
+ calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
+ tail->r_checksum = provided;
+
+ return provided == cpu_to_be32(calculated);
+}
/* Scan a revoke record, marking all blocks mentioned as revoked. */
@@ -588,6 +837,9 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
offset = sizeof(jbd2_journal_revoke_header_t);
max = be32_to_cpu(header->r_count);
+ if (!jbd2_revoke_block_csum_verify(journal, header))
+ return -EINVAL;
+
if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
record_len = 8;