diff options
Diffstat (limited to 'fs/ocfs2/buffer_head_io.c')
| -rw-r--r-- | fs/ocfs2/buffer_head_io.c | 208 |
1 files changed, 146 insertions, 62 deletions
diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index f136639f5b4..1edcb141f63 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -25,7 +25,6 @@ #include <linux/fs.h> #include <linux/types.h> -#include <linux/slab.h> #include <linux/highmem.h> #include <cluster/masklog.h> @@ -36,16 +35,27 @@ #include "inode.h" #include "journal.h" #include "uptodate.h" - #include "buffer_head_io.h" +#include "ocfs2_trace.h" + +/* + * Bits on bh->b_state used by ocfs2. + * + * These MUST be after the JBD2 bits. Hence, we use BH_JBDPrivateStart. + */ +enum ocfs2_state_bits { + BH_NeedsValidate = BH_JBDPrivateStart, +}; + +/* Expand the magic b_state functions */ +BUFFER_FNS(NeedsValidate, needs_validate); int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, - struct inode *inode) + struct ocfs2_caching_info *ci) { int ret = 0; - mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n", - (unsigned long long)bh->b_blocknr, inode); + trace_ocfs2_write_block((unsigned long long)bh->b_blocknr, ci); BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO); BUG_ON(buffer_jbd(bh)); @@ -55,10 +65,11 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, * can get modified during recovery even if read-only. */ if (ocfs2_is_hard_readonly(osb)) { ret = -EROFS; + mlog_errno(ret); goto out; } - mutex_lock(&OCFS2_I(inode)->ip_io_mutex); + ocfs2_metadata_cache_io_lock(ci); lock_buffer(bh); set_buffer_uptodate(bh); @@ -66,44 +77,119 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, /* remove from dirty list before I/O. */ clear_buffer_dirty(bh); - get_bh(bh); /* for end_buffer_write_sync() */ + get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; submit_bh(WRITE, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) { - ocfs2_set_buffer_uptodate(inode, bh); + ocfs2_set_buffer_uptodate(ci, bh); } else { /* We don't need to remove the clustered uptodate * information for this bh as it's not marked locally * uptodate. */ ret = -EIO; - put_bh(bh); + mlog_errno(ret); } - mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); + ocfs2_metadata_cache_io_unlock(ci); out: - mlog_exit(ret); return ret; } -int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, +int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, + unsigned int nr, struct buffer_head *bhs[]) +{ + int status = 0; + unsigned int i; + struct buffer_head *bh; + + trace_ocfs2_read_blocks_sync((unsigned long long)block, nr); + + if (!nr) + goto bail; + + for (i = 0 ; i < nr ; i++) { + if (bhs[i] == NULL) { + bhs[i] = sb_getblk(osb->sb, block++); + if (bhs[i] == NULL) { + status = -ENOMEM; + mlog_errno(status); + goto bail; + } + } + bh = bhs[i]; + + if (buffer_jbd(bh)) { + trace_ocfs2_read_blocks_sync_jbd( + (unsigned long long)bh->b_blocknr); + continue; + } + + if (buffer_dirty(bh)) { + /* This should probably be a BUG, or + * at least return an error. */ + mlog(ML_ERROR, + "trying to sync read a dirty " + "buffer! (blocknr = %llu), skipping\n", + (unsigned long long)bh->b_blocknr); + continue; + } + + lock_buffer(bh); + if (buffer_jbd(bh)) { + mlog(ML_ERROR, + "block %llu had the JBD bit set " + "while I was in lock_buffer!", + (unsigned long long)bh->b_blocknr); + BUG(); + } + + clear_buffer_uptodate(bh); + get_bh(bh); /* for end_buffer_read_sync() */ + bh->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh); + } + + for (i = nr; i > 0; i--) { + bh = bhs[i - 1]; + + /* No need to wait on the buffer if it's managed by JBD. */ + if (!buffer_jbd(bh)) + wait_on_buffer(bh); + + if (!buffer_uptodate(bh)) { + /* Status won't be cleared from here on out, + * so we can safely record this and loop back + * to cleanup the other buffers. */ + status = -EIO; + put_bh(bh); + bhs[i - 1] = NULL; + } + } + +bail: + return status; +} + +int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, struct buffer_head *bhs[], int flags, - struct inode *inode) + int (*validate)(struct super_block *sb, + struct buffer_head *bh)) { int status = 0; - struct super_block *sb; int i, ignore_cache = 0; struct buffer_head *bh; + struct super_block *sb = ocfs2_metadata_cache_get_super(ci); - mlog_entry("(block=(%llu), nr=(%d), flags=%d, inode=%p)\n", - (unsigned long long)block, nr, flags, inode); + trace_ocfs2_read_blocks_begin(ci, (unsigned long long)block, nr, flags); + BUG_ON(!ci); BUG_ON((flags & OCFS2_BH_READAHEAD) && - (!inode || !(flags & OCFS2_BH_CACHED))); + (flags & OCFS2_BH_IGNORE_CACHE)); - if (osb == NULL || osb->sb == NULL || bhs == NULL) { + if (bhs == NULL) { status = -EINVAL; mlog_errno(status); goto bail; @@ -117,31 +203,23 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, } if (nr == 0) { - mlog(ML_BH_IO, "No buffers will be read!\n"); status = 0; goto bail; } - sb = osb->sb; - - if (flags & OCFS2_BH_CACHED && !inode) - flags &= ~OCFS2_BH_CACHED; - - if (inode) - mutex_lock(&OCFS2_I(inode)->ip_io_mutex); + ocfs2_metadata_cache_io_lock(ci); for (i = 0 ; i < nr ; i++) { if (bhs[i] == NULL) { bhs[i] = sb_getblk(sb, block++); if (bhs[i] == NULL) { - if (inode) - mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); - status = -EIO; + ocfs2_metadata_cache_io_unlock(ci); + status = -ENOMEM; mlog_errno(status); goto bail; } } bh = bhs[i]; - ignore_cache = 0; + ignore_cache = (flags & OCFS2_BH_IGNORE_CACHE); /* There are three read-ahead cases here which we need to * be concerned with. All three assume a buffer has @@ -167,32 +245,26 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, * before our is-it-in-flight check. */ - if (flags & OCFS2_BH_CACHED && - !ocfs2_buffer_uptodate(inode, bh)) { - mlog(ML_UPTODATE, - "bh (%llu), inode %llu not uptodate\n", + if (!ignore_cache && !ocfs2_buffer_uptodate(ci, bh)) { + trace_ocfs2_read_blocks_from_disk( (unsigned long long)bh->b_blocknr, - (unsigned long long)OCFS2_I(inode)->ip_blkno); + (unsigned long long)ocfs2_metadata_cache_owner(ci)); + /* We're using ignore_cache here to say + * "go to disk" */ ignore_cache = 1; } - /* XXX: Can we ever get this and *not* have the cached - * flag set? */ + trace_ocfs2_read_blocks_bh((unsigned long long)bh->b_blocknr, + ignore_cache, buffer_jbd(bh), buffer_dirty(bh)); + if (buffer_jbd(bh)) { - if (!(flags & OCFS2_BH_CACHED) || ignore_cache) - mlog(ML_BH_IO, "trying to sync read a jbd " - "managed bh (blocknr = %llu)\n", - (unsigned long long)bh->b_blocknr); continue; } - if (!(flags & OCFS2_BH_CACHED) || ignore_cache) { + if (ignore_cache) { if (buffer_dirty(bh)) { /* This should probably be a BUG, or * at least return an error. */ - mlog(ML_BH_IO, "asking me to sync read a dirty " - "buffer! (blocknr = %llu)\n", - (unsigned long long)bh->b_blocknr); continue; } @@ -201,7 +273,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, * previously submitted request than we are * done here. */ if ((flags & OCFS2_BH_READAHEAD) - && ocfs2_buffer_read_ahead(inode, bh)) + && ocfs2_buffer_read_ahead(ci, bh)) continue; lock_buffer(bh); @@ -221,15 +293,17 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, * previously read-ahead buffer may have * completed I/O while we were waiting for the * buffer lock. */ - if ((flags & OCFS2_BH_CACHED) + if (!(flags & OCFS2_BH_IGNORE_CACHE) && !(flags & OCFS2_BH_READAHEAD) - && ocfs2_buffer_uptodate(inode, bh)) { + && ocfs2_buffer_uptodate(ci, bh)) { unlock_buffer(bh); continue; } clear_buffer_uptodate(bh); get_bh(bh); /* for end_buffer_read_sync() */ + if (validate) + set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(READ, bh); continue; @@ -243,7 +317,7 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, if (!(flags & OCFS2_BH_READAHEAD)) { /* We know this can't have changed as we hold the - * inode sem. Avoid doing any work on the bh if the + * owner sem. Avoid doing any work on the bh if the * journal has it. */ if (!buffer_jbd(bh)) wait_on_buffer(bh); @@ -260,24 +334,34 @@ int ocfs2_read_blocks(struct ocfs2_super *osb, u64 block, int nr, bhs[i] = NULL; continue; } + + if (buffer_needs_validate(bh)) { + /* We never set NeedsValidate if the + * buffer was held by the journal, so + * that better not have changed */ + BUG_ON(buffer_jbd(bh)); + clear_buffer_needs_validate(bh); + status = validate(sb, bh); + if (status) { + put_bh(bh); + bhs[i] = NULL; + continue; + } + } } /* Always set the buffer in the cache, even if it was * a forced read, or read-ahead which hasn't yet * completed. */ - if (inode) - ocfs2_set_buffer_uptodate(inode, bh); + ocfs2_set_buffer_uptodate(ci, bh); } - if (inode) - mutex_unlock(&OCFS2_I(inode)->ip_io_mutex); + ocfs2_metadata_cache_io_unlock(ci); - mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", - (unsigned long long)block, nr, - (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags); + trace_ocfs2_read_blocks_end((unsigned long long)block, nr, + flags, ignore_cache); bail: - mlog_exit(status); return status; } @@ -302,21 +386,21 @@ static void ocfs2_check_super_or_backup(struct super_block *sb, /* * Write super block and backups doesn't need to collaborate with journal, - * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed + * so we don't need to lock ip_io_mutex and ci doesn't need to bea passed * into this function. */ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, struct buffer_head *bh) { int ret = 0; - - mlog_entry_void(); + struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data; BUG_ON(buffer_jbd(bh)); ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr); if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) { ret = -EROFS; + mlog_errno(ret); goto out; } @@ -328,16 +412,16 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; + ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); submit_bh(WRITE, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ret = -EIO; - put_bh(bh); + mlog_errno(ret); } out: - mlog_exit(ret); return ret; } |
