From c969f58ca43fc403c75f5d3da4cf1e21de7afaa0 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 7 Apr 2009 14:13:01 +0100 Subject: GFS2: Update the rw flags After Jens recent updates: http://git.kernel.org/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=a1f242524c3c1f5d40f1c9c343427e34d1aadd6e et al. this is a patch to bring gfs2 uptodate with the core code. Also I've managed to squash another call to ll_rw_block() along the way. There is still one part of the GFS2 I/O paths which are not correctly annotated and that is due to the sharing of the writeback code between the data and metadata address spaces. I would like to change that too, but this patch is still worth doing on its own, I think. Signed-off-by: Steven Whitehouse --- fs/gfs2/log.c | 6 +++--- fs/gfs2/lops.c | 14 ++++++++------ fs/gfs2/meta_io.c | 38 +++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 20 deletions(-) diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 98918a75641..aa62cf5976e 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -120,7 +120,7 @@ __acquires(&sdp->sd_log_lock) lock_buffer(bh); if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } else { unlock_buffer(bh); brelse(bh); @@ -604,7 +604,7 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) goto skip_barrier; get_bh(bh); - submit_bh(WRITE_BARRIER | (1 << BIO_RW_META), bh); + submit_bh(WRITE_SYNC | (1 << BIO_RW_BARRIER) | (1 << BIO_RW_META), bh); wait_on_buffer(bh); if (buffer_eopnotsupp(bh)) { clear_buffer_eopnotsupp(bh); @@ -664,7 +664,7 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp) lock_buffer(bh); if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } else { unlock_buffer(bh); brelse(bh); diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 80e4f5f898b..00315f50fa4 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -13,6 +13,8 @@ #include #include #include +#include +#include #include "gfs2.h" #include "incore.h" @@ -189,7 +191,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_log_unlock(sdp); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); n = 0; @@ -199,7 +201,7 @@ static void buf_lo_before_commit(struct gfs2_sbd *sdp) gfs2_log_unlock(sdp); lock_buffer(bd2->bd_bh); bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); if (++n >= num) break; @@ -341,7 +343,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) sdp->sd_log_num_revoke--; if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); bh = gfs2_log_get_buf(sdp); mh = (struct gfs2_meta_header *)bh->b_data; @@ -358,7 +360,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp) } gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); } static void revoke_lo_before_scan(struct gfs2_jdesc *jd, @@ -560,7 +562,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, ptr = bh_log_ptr(bh); get_bh(bh); - submit_bh(WRITE, bh); + submit_bh(WRITE_SYNC_PLUG, bh); gfs2_log_lock(sdp); while(!list_empty(list)) { bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); @@ -586,7 +588,7 @@ static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, } else { bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); } - submit_bh(WRITE, bh1); + submit_bh(WRITE_SYNC_PLUG, bh1); gfs2_log_lock(sdp); ptr += 2; } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 8d6f13256b2..75b2aec06f8 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -201,16 +201,32 @@ struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, struct buffer_head **bhp) { - *bhp = gfs2_getbuf(gl, blkno, CREATE); - if (!buffer_uptodate(*bhp)) { - ll_rw_block(READ_META, 1, bhp); - if (flags & DIO_WAIT) { - int error = gfs2_meta_wait(gl->gl_sbd, *bhp); - if (error) { - brelse(*bhp); - return error; - } - } + struct gfs2_sbd *sdp = gl->gl_sbd; + struct buffer_head *bh; + + if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) + return -EIO; + + *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); + + lock_buffer(bh); + if (buffer_uptodate(bh)) { + unlock_buffer(bh); + return 0; + } + bh->b_end_io = end_buffer_read_sync; + get_bh(bh); + submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); + if (!(flags & DIO_WAIT)) + return 0; + + wait_on_buffer(bh); + if (unlikely(!buffer_uptodate(bh))) { + struct gfs2_trans *tr = current->journal_info; + if (tr && tr->tr_touched) + gfs2_io_error_bh(sdp, bh); + brelse(bh); + return -EIO; } return 0; @@ -404,7 +420,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(READ_META, 1, &first_bh); + ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); dblock++; extlen--; -- cgit v1.2.3-18-g5258 From 4a0f9a321a113392b448e477018311d14fba2b34 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Mon, 20 Apr 2009 08:16:26 +0100 Subject: GFS2: Optimise writepage for metadata This adds a GFS2 specific writepage for metadata, rather than continuing to use the VFS function. As a result we now tag all our metadata I/O with the correct flag so that blktraces will now be less confusing. Also, the generic function was checking for a number of corner cases which cannot happen on the metadata address spaces so that this should be faster too. Signed-off-by: Steven Whitehouse --- fs/gfs2/meta_io.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 57 insertions(+), 9 deletions(-) diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 75b2aec06f8..78a5f431266 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -33,17 +33,65 @@ #include "util.h" #include "ops_address.h" -static int aspace_get_block(struct inode *inode, sector_t lblock, - struct buffer_head *bh_result, int create) +static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) { - gfs2_assert_warn(inode->i_sb->s_fs_info, 0); - return -EOPNOTSUPP; -} + int err; + struct buffer_head *bh, *head; + int nr_underway = 0; + int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? + WRITE_SYNC_PLUG : WRITE)); + + BUG_ON(!PageLocked(page)); + BUG_ON(!page_has_buffers(page)); + + head = page_buffers(page); + bh = head; + + do { + if (!buffer_mapped(bh)) + continue; + /* + * If it's a fully non-blocking write attempt and we cannot + * lock the buffer then redirty the page. Note that this can + * potentially cause a busy-wait loop from pdflush and kswapd + * activity, but those code paths have their own higher-level + * throttling. + */ + if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { + lock_buffer(bh); + } else if (!trylock_buffer(bh)) { + redirty_page_for_writepage(wbc, page); + continue; + } + if (test_clear_buffer_dirty(bh)) { + mark_buffer_async_write(bh); + } else { + unlock_buffer(bh); + } + } while ((bh = bh->b_this_page) != head); + + /* + * The page and its buffers are protected by PageWriteback(), so we can + * drop the bh refcounts early. + */ + BUG_ON(PageWriteback(page)); + set_page_writeback(page); + + do { + struct buffer_head *next = bh->b_this_page; + if (buffer_async_write(bh)) { + submit_bh(write_op, bh); + nr_underway++; + } + bh = next; + } while (bh != head); + unlock_page(page); -static int gfs2_aspace_writepage(struct page *page, - struct writeback_control *wbc) -{ - return block_write_full_page(page, aspace_get_block, wbc); + err = 0; + if (nr_underway == 0) + end_page_writeback(page); + + return err; } static const struct address_space_operations aspace_aops = { -- cgit v1.2.3-18-g5258 From 48bf2b1711dc498494e77705c415ee46bb508fd9 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 29 Apr 2009 13:59:35 +0100 Subject: GFS2: Something nonlinear this way comes! For some reason GFS2 has been missing support for non-linear mappings. This patch fixes that, and also avoids taking any locks for mmap in the O_NOATIME case. In fact we don't actually need to take the lock here at all - just doing file_accessed() would be enough, but we have to take the lock eventually and this helps it hit disk (and thus be seen by other nodes) faster. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_file.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c index 5d82e91887e..0ee7bd287c5 100644 --- a/fs/gfs2/ops_file.c +++ b/fs/gfs2/ops_file.c @@ -425,33 +425,36 @@ static struct vm_operations_struct gfs2_vm_ops = { .page_mkwrite = gfs2_page_mkwrite, }; - /** * gfs2_mmap - * @file: The file to map * @vma: The VMA which described the mapping * - * Returns: 0 or error code + * There is no need to get a lock here unless we should be updating + * atime. We ignore any locking errors since the only consequence is + * a missed atime update (which will just be deferred until later). + * + * Returns: 0 */ static int gfs2_mmap(struct file *file, struct vm_area_struct *vma) { struct gfs2_inode *ip = GFS2_I(file->f_mapping->host); - struct gfs2_holder i_gh; - int error; - gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &i_gh); - error = gfs2_glock_nq(&i_gh); - if (error) { - gfs2_holder_uninit(&i_gh); - return error; - } + if (!(file->f_flags & O_NOATIME)) { + struct gfs2_holder i_gh; + int error; + gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &i_gh); + error = gfs2_glock_nq(&i_gh); + file_accessed(file); + if (error == 0) + gfs2_glock_dq_uninit(&i_gh); + } vma->vm_ops = &gfs2_vm_ops; + vma->vm_flags |= VM_CAN_NONLINEAR; - gfs2_glock_dq_uninit(&i_gh); - - return error; + return 0; } /** -- cgit v1.2.3-18-g5258 From 7537d81aa7b7cd31b0caeac8091456e93d96fa8d Mon Sep 17 00:00:00 2001 From: Abhijith Das Date: Tue, 12 May 2009 11:16:20 -0500 Subject: GFS2: Fix timestamps on write This patch copies the timestamps from the vfs inode into gfs2 and syncs it to the disk inode during writes. Signed-off-by: Abhijith Das Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_address.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c index a6dde1751e1..e5664210f0d 100644 --- a/fs/gfs2/ops_address.c +++ b/fs/gfs2/ops_address.c @@ -781,10 +781,12 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, unlock_page(page); page_cache_release(page); - if (inode->i_size < to) { - i_size_write(inode, to); - ip->i_disksize = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); + if (copied) { + if (inode->i_size < to) { + i_size_write(inode, to); + ip->i_disksize = inode->i_size; + } + gfs2_dinode_out(ip, di); mark_inode_dirty(inode); } @@ -824,7 +826,6 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(inode); struct buffer_head *dibh; struct gfs2_alloc *al = ip->i_alloc; - struct gfs2_dinode *di; unsigned int from = pos & (PAGE_CACHE_SIZE - 1); unsigned int to = from + len; int ret; @@ -847,11 +848,10 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, gfs2_page_add_databufs(ip, page, from, to); ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); - - if (likely(ret >= 0) && (inode->i_size > ip->i_disksize)) { - di = (struct gfs2_dinode *)dibh->b_data; - ip->i_disksize = inode->i_size; - di->di_size = cpu_to_be64(inode->i_size); + if (ret > 0) { + if (inode->i_size > ip->i_disksize) + ip->i_disksize = inode->i_size; + gfs2_dinode_out(ip, dibh->b_data); mark_inode_dirty(inode); } -- cgit v1.2.3-18-g5258 From a1c0643ff9f360a30644f6e3cd643ca2a5083aea Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 10:56:52 +0100 Subject: GFS2: Move journal live test at transaction start There seems little point grabbing the transaction glock only to have to release it again if the journal isn't live. This moves the test earlier to avoid grabbing the lock when we don't need it in the first place. Signed-off-by: Steven Whitehouse --- fs/gfs2/trans.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/fs/gfs2/trans.c b/fs/gfs2/trans.c index 053752d4b27..4ef0e9fa354 100644 --- a/fs/gfs2/trans.c +++ b/fs/gfs2/trans.c @@ -33,6 +33,9 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, BUG_ON(current->journal_info); BUG_ON(blocks == 0 && revokes == 0); + if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) + return -EROFS; + tr = kzalloc(sizeof(struct gfs2_trans), GFP_NOFS); if (!tr) return -ENOMEM; @@ -54,12 +57,6 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks, if (error) goto fail_holder_uninit; - if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) { - tr->tr_t_gh.gh_flags |= GL_NOCACHE; - error = -EROFS; - goto fail_gunlock; - } - error = gfs2_log_reserve(sdp, tr->tr_reserved); if (error) goto fail_gunlock; -- cgit v1.2.3-18-g5258 From 48c2b613616235d7c97fda5982f50100a6c79166 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 14:49:48 +0100 Subject: GFS2: Add commit= mount option It has always been possible to adjust the gfs2 log commit interval, but only from the sysfs interface. This adds a mount option, commit=, which will be familar to ext3 users. The sysfs interface continues to be available as well, although this might be removed in the future. Also this patch cleans up some duplicated structures in the GFS2 sysfs code. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 1 + fs/gfs2/mount.c | 10 ++++++ fs/gfs2/ops_fstype.c | 4 ++- fs/gfs2/ops_super.c | 13 +++++++- fs/gfs2/sys.c | 92 +++++++++++++++++++--------------------------------- 5 files changed, 60 insertions(+), 60 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 399d1b97804..65f438e9537 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -418,6 +418,7 @@ struct gfs2_args { unsigned int ar_data:2; /* ordered/writeback */ unsigned int ar_meta:1; /* mount metafs */ unsigned int ar_discard:1; /* discard requests */ + int ar_commit; /* Commit interval */ }; struct gfs2_tune { diff --git a/fs/gfs2/mount.c b/fs/gfs2/mount.c index f7e8527a21e..947af151fa2 100644 --- a/fs/gfs2/mount.c +++ b/fs/gfs2/mount.c @@ -45,6 +45,7 @@ enum { Opt_meta, Opt_discard, Opt_nodiscard, + Opt_commit, Opt_err, }; @@ -73,6 +74,7 @@ static const match_table_t tokens = { {Opt_meta, "meta"}, {Opt_discard, "discard"}, {Opt_nodiscard, "nodiscard"}, + {Opt_commit, "commit=%d"}, {Opt_err, NULL} }; @@ -89,6 +91,7 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) char *o; int token; substring_t tmp[MAX_OPT_ARGS]; + int rv; /* Split the options into tokens with the "," character and process them */ @@ -173,6 +176,13 @@ int gfs2_mount_args(struct gfs2_sbd *sdp, struct gfs2_args *args, char *options) case Opt_nodiscard: args->ar_discard = 0; break; + case Opt_commit: + rv = match_int(&tmp[0], &args->ar_commit); + if (rv || args->ar_commit <= 0) { + fs_info(sdp, "commit mount option requires a positive numeric argument\n"); + return rv ? rv : -EINVAL; + } + break; case Opt_err: default: fs_info(sdp, "invalid mount option: %s\n", o); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 1ff9473ea75..7981fbc9fc3 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -55,7 +55,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) spin_lock_init(>->gt_spin); gt->gt_incore_log_blocks = 1024; - gt->gt_log_flush_secs = 60; gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quota_simul_sync = 64; @@ -1165,6 +1164,7 @@ static int fill_super(struct super_block *sb, void *data, int silent) sdp->sd_args.ar_quota = GFS2_QUOTA_DEFAULT; sdp->sd_args.ar_data = GFS2_DATA_DEFAULT; + sdp->sd_args.ar_commit = 60; error = gfs2_mount_args(sdp, &sdp->sd_args, data); if (error) { @@ -1191,6 +1191,8 @@ static int fill_super(struct super_block *sb, void *data, int silent) GFS2_BASIC_BLOCK_SHIFT; sdp->sd_fsb2bb = 1 << sdp->sd_fsb2bb_shift; + sdp->sd_tune.gt_log_flush_secs = sdp->sd_args.ar_commit; + error = init_names(sdp, silent); if (error) goto fail; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 458019569dc..0677a837856 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -436,8 +436,12 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) { struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_args args = sdp->sd_args; /* Default to current settings */ + struct gfs2_tune *gt = &sdp->sd_tune; int error; + spin_lock(>->gt_spin); + args.ar_commit = gt->gt_log_flush_secs; + spin_unlock(>->gt_spin); error = gfs2_mount_args(sdp, &args, data); if (error) return error; @@ -473,6 +477,10 @@ static int gfs2_remount_fs(struct super_block *sb, int *flags, char *data) sb->s_flags |= MS_POSIXACL; else sb->s_flags &= ~MS_POSIXACL; + spin_lock(>->gt_spin); + gt->gt_log_flush_secs = args.ar_commit; + spin_unlock(>->gt_spin); + return 0; } @@ -550,6 +558,7 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) { struct gfs2_sbd *sdp = mnt->mnt_sb->s_fs_info; struct gfs2_args *args = &sdp->sd_args; + int lfsecs; if (is_ancestor(mnt->mnt_root, sdp->sd_master_dir)) seq_printf(s, ",meta"); @@ -610,7 +619,9 @@ static int gfs2_show_options(struct seq_file *s, struct vfsmount *mnt) } if (args->ar_discard) seq_printf(s, ",discard"); - + lfsecs = sdp->sd_tune.gt_log_flush_secs; + if (lfsecs != 60) + seq_printf(s, ",commit=%d", lfsecs); return 0; } diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 7655f5025fe..d53b22edc98 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -26,6 +26,36 @@ #include "util.h" #include "glops.h" +struct gfs2_attr { + struct attribute attr; + ssize_t (*show)(struct gfs2_sbd *, char *); + ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); +}; + +static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, + char *buf) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); + return a->show ? a->show(sdp, buf) : 0; +} + +static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t len) +{ + struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); + struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); + return a->store ? a->store(sdp, buf, len) : len; +} + +static struct sysfs_ops gfs2_attr_ops = { + .show = gfs2_attr_show, + .store = gfs2_attr_store, +}; + + +static struct kset *gfs2_kset; + static ssize_t id_show(struct gfs2_sbd *sdp, char *buf) { return snprintf(buf, PAGE_SIZE, "%u:%u\n", @@ -212,11 +242,6 @@ static ssize_t demote_rq_store(struct gfs2_sbd *sdp, const char *buf, size_t len return len; } -struct gfs2_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); - ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); -}; #define GFS2_ATTR(name, mode, show, store) \ static struct gfs2_attr gfs2_attr_##name = __ATTR(name, mode, show, store) @@ -246,49 +271,21 @@ static struct attribute *gfs2_attrs[] = { NULL, }; -static ssize_t gfs2_attr_show(struct kobject *kobj, struct attribute *attr, - char *buf) -{ - struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); - return a->show ? a->show(sdp, buf) : 0; -} - -static ssize_t gfs2_attr_store(struct kobject *kobj, struct attribute *attr, - const char *buf, size_t len) -{ - struct gfs2_sbd *sdp = container_of(kobj, struct gfs2_sbd, sd_kobj); - struct gfs2_attr *a = container_of(attr, struct gfs2_attr, attr); - return a->store ? a->store(sdp, buf, len) : len; -} - -static struct sysfs_ops gfs2_attr_ops = { - .show = gfs2_attr_show, - .store = gfs2_attr_store, -}; - static struct kobj_type gfs2_ktype = { .default_attrs = gfs2_attrs, .sysfs_ops = &gfs2_attr_ops, }; -static struct kset *gfs2_kset; - /* * display struct lm_lockstruct fields */ -struct lockstruct_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); -}; - #define LOCKSTRUCT_ATTR(name, fmt) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_lockstruct.ls_##name); \ } \ -static struct lockstruct_attr lockstruct_attr_##name = __ATTR_RO(name) +static struct gfs2_attr lockstruct_attr_##name = __ATTR_RO(name) LOCKSTRUCT_ATTR(jid, "%u\n"); LOCKSTRUCT_ATTR(first, "%u\n"); @@ -401,14 +398,8 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_recover_jid_status); } -struct gdlm_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *sdp, char *); - ssize_t (*store)(struct gfs2_sbd *sdp, const char *, size_t); -}; - #define GDLM_ATTR(_name,_mode,_show,_store) \ -static struct gdlm_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) +static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); GDLM_ATTR(block, 0644, block_show, block_store); @@ -434,21 +425,12 @@ static struct attribute *lock_module_attrs[] = { NULL, }; -/* - * display struct gfs2_args fields - */ - -struct args_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); -}; - #define ARGS_ATTR(name, fmt) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ { \ return snprintf(buf, PAGE_SIZE, fmt, sdp->sd_args.ar_##name); \ } \ -static struct args_attr args_attr_##name = __ATTR_RO(name) +static struct gfs2_attr args_attr_##name = __ATTR_RO(name) ARGS_ATTR(lockproto, "%s\n"); ARGS_ATTR(locktable, "%s\n"); @@ -531,14 +513,8 @@ static ssize_t tune_set(struct gfs2_sbd *sdp, unsigned int *field, return len; } -struct tune_attr { - struct attribute attr; - ssize_t (*show)(struct gfs2_sbd *, char *); - ssize_t (*store)(struct gfs2_sbd *, const char *, size_t); -}; - #define TUNE_ATTR_3(name, show, store) \ -static struct tune_attr tune_attr_##name = __ATTR(name, 0644, show, store) +static struct gfs2_attr tune_attr_##name = __ATTR(name, 0644, show, store) #define TUNE_ATTR_2(name, store) \ static ssize_t name##_show(struct gfs2_sbd *sdp, char *buf) \ -- cgit v1.2.3-18-g5258 From 9582d41135c0d362f04ed6bf3dc8d693a7eafee2 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 13 May 2009 15:06:25 +0100 Subject: GFS2: Remove a couple of unused sysfs entries These two tunables are pointless and would never need to be changed anyway. There is also a race between them and umount as the deamons which they refer to might have gone away. The easiest way to fix the race is to remove the interface. Signed-off-by: Steven Whitehouse --- fs/gfs2/sys.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index d53b22edc98..894bf773ec9 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -530,15 +530,6 @@ static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ } \ TUNE_ATTR_2(name, name##_store) -#define TUNE_ATTR_DAEMON(name, process) \ -static ssize_t name##_store(struct gfs2_sbd *sdp, const char *buf, size_t len)\ -{ \ - ssize_t r = tune_set(sdp, &sdp->sd_tune.gt_##name, 1, buf, len); \ - wake_up_process(sdp->sd_##process); \ - return r; \ -} \ -TUNE_ATTR_2(name, name##_store) - TUNE_ATTR(incore_log_blocks, 0); TUNE_ATTR(log_flush_secs, 0); TUNE_ATTR(quota_warn_period, 0); @@ -550,8 +541,6 @@ TUNE_ATTR(new_files_jdata, 0); TUNE_ATTR(quota_simul_sync, 1); TUNE_ATTR(stall_secs, 1); TUNE_ATTR(statfs_quantum, 1); -TUNE_ATTR_DAEMON(recoverd_secs, recoverd_process); -TUNE_ATTR_DAEMON(logd_secs, logd_process); TUNE_ATTR_3(quota_scale, quota_scale_show, quota_scale_store); static struct attribute *tune_attrs[] = { @@ -565,8 +554,6 @@ static struct attribute *tune_attrs[] = { &tune_attr_quota_simul_sync.attr, &tune_attr_stall_secs.attr, &tune_attr_statfs_quantum.attr, - &tune_attr_recoverd_secs.attr, - &tune_attr_logd_secs.attr, &tune_attr_quota_scale.attr, &tune_attr_new_files_jdata.attr, NULL, -- cgit v1.2.3-18-g5258 From fe64d517df0970a68417184a12fcd4ba0589cc28 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 10:01:18 +0100 Subject: GFS2: Umount recovery race fix This patch fixes a race condition where we can receive recovery requests part way through processing a umount. This was causing problems since the recovery thread had already gone away. Looking in more detail at the recovery code, it was really trying to implement a slight variation on a work queue, and that happens to align nicely with the recently introduced slow-work subsystem. As a result I've updated the code to use slow-work, rather than its own home grown variety of work queue. When using the wait_on_bit() function, I noticed that the wait function that was supplied as an argument was appearing in the WCHAN field, so I've updated the function names in order to produce more meaningful output. Signed-off-by: Steven Whitehouse --- fs/gfs2/Kconfig | 1 + fs/gfs2/glock.c | 21 +++++++++-- fs/gfs2/incore.h | 14 +++---- fs/gfs2/main.c | 8 ++++ fs/gfs2/ops_fstype.c | 20 +++------- fs/gfs2/ops_super.c | 25 ++++++++++++- fs/gfs2/recovery.c | 102 +++++++++++++++++---------------------------------- fs/gfs2/recovery.h | 2 +- fs/gfs2/sys.c | 53 +++++++++++++------------- 9 files changed, 122 insertions(+), 124 deletions(-) diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index 3a981b7f64c..cad957cdb1e 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -7,6 +7,7 @@ config GFS2_FS select IP_SCTP if DLM_SCTP select FS_POSIX_ACL select CRC32 + select SLOW_WORK help A cluster filesystem. diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index ff498109048..2bf62bcc518 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -796,22 +796,37 @@ void gfs2_holder_uninit(struct gfs2_holder *gh) gh->gh_ip = 0; } -static int just_schedule(void *word) +/** + * gfs2_glock_holder_wait + * @word: unused + * + * This function and gfs2_glock_demote_wait both show up in the WCHAN + * field. Thus I've separated these otherwise identical functions in + * order to be more informative to the user. + */ + +static int gfs2_glock_holder_wait(void *word) { schedule(); return 0; } +static int gfs2_glock_demote_wait(void *word) +{ + schedule(); + return 0; +} + static void wait_on_holder(struct gfs2_holder *gh) { might_sleep(); - wait_on_bit(&gh->gh_iflags, HIF_WAIT, just_schedule, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gh->gh_iflags, HIF_WAIT, gfs2_glock_holder_wait, TASK_UNINTERRUPTIBLE); } static void wait_on_demote(struct gfs2_glock *gl) { might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, just_schedule, TASK_UNINTERRUPTIBLE); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 65f438e9537..0060e9564bb 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -12,6 +12,7 @@ #include #include +#include #include #include @@ -376,11 +377,11 @@ struct gfs2_journal_extent { struct gfs2_jdesc { struct list_head jd_list; struct list_head extent_list; - + struct slow_work jd_work; struct inode *jd_inode; + unsigned long jd_flags; +#define JDF_RECOVERY 1 unsigned int jd_jid; - int jd_dirty; - unsigned int jd_blocks; }; @@ -390,9 +391,6 @@ struct gfs2_statfs_change_host { s64 sc_dinodes; }; -#define GFS2_GLOCKD_DEFAULT 1 -#define GFS2_GLOCKD_MAX 16 - #define GFS2_QUOTA_DEFAULT GFS2_QUOTA_OFF #define GFS2_QUOTA_OFF 0 #define GFS2_QUOTA_ACCOUNT 1 @@ -427,7 +425,6 @@ struct gfs2_tune { unsigned int gt_incore_log_blocks; unsigned int gt_log_flush_secs; - unsigned int gt_recoverd_secs; unsigned int gt_logd_secs; unsigned int gt_quota_simul_sync; /* Max quotavals to sync at once */ @@ -448,6 +445,7 @@ enum { SDF_JOURNAL_LIVE = 1, SDF_SHUTDOWN = 2, SDF_NOBARRIERS = 3, + SDF_NORECOVERY = 4, }; #define GFS2_FSNAME_LEN 256 @@ -494,7 +492,6 @@ struct lm_lockstruct { unsigned long ls_flags; dlm_lockspace_t *ls_dlm; - int ls_recover_jid; int ls_recover_jid_done; int ls_recover_jid_status; }; @@ -583,7 +580,6 @@ struct gfs2_sbd { /* Daemon stuff */ - struct task_struct *sd_recoverd_process; struct task_struct *sd_logd_process; struct task_struct *sd_quotad_process; diff --git a/fs/gfs2/main.c b/fs/gfs2/main.c index a6892ed0840..eacd78a5d08 100644 --- a/fs/gfs2/main.c +++ b/fs/gfs2/main.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -113,12 +114,18 @@ static int __init init_gfs2_fs(void) if (error) goto fail_unregister; + error = slow_work_register_user(); + if (error) + goto fail_slow; + gfs2_register_debugfs(); printk("GFS2 (built %s %s) installed\n", __DATE__, __TIME__); return 0; +fail_slow: + unregister_filesystem(&gfs2meta_fs_type); fail_unregister: unregister_filesystem(&gfs2_fs_type); fail: @@ -156,6 +163,7 @@ static void __exit exit_gfs2_fs(void) gfs2_unregister_debugfs(); unregister_filesystem(&gfs2_fs_type); unregister_filesystem(&gfs2meta_fs_type); + slow_work_unregister_user(); kmem_cache_destroy(gfs2_quotad_cachep); kmem_cache_destroy(gfs2_rgrpd_cachep); diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 7981fbc9fc3..2cd1164c88d 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "gfs2.h" #include "incore.h" @@ -55,7 +56,6 @@ static void gfs2_tune_init(struct gfs2_tune *gt) spin_lock_init(>->gt_spin); gt->gt_incore_log_blocks = 1024; - gt->gt_recoverd_secs = 60; gt->gt_logd_secs = 1; gt->gt_quota_simul_sync = 64; gt->gt_quota_warn_period = 10; @@ -675,6 +675,7 @@ static int gfs2_jindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ji_gh) break; INIT_LIST_HEAD(&jd->extent_list); + slow_work_init(&jd->jd_work, &gfs2_recover_ops); jd->jd_inode = gfs2_lookupi(sdp->sd_jindex, &name, 1); if (!jd->jd_inode || IS_ERR(jd->jd_inode)) { if (!jd->jd_inode) @@ -700,14 +701,13 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) { struct inode *master = sdp->sd_master_dir->d_inode; struct gfs2_holder ji_gh; - struct task_struct *p; struct gfs2_inode *ip; int jindex = 1; int error = 0; if (undo) { jindex = 0; - goto fail_recoverd; + goto fail_jinode_gh; } sdp->sd_jindex = gfs2_lookup_simple(master, "jindex"); @@ -800,18 +800,8 @@ static int init_journal(struct gfs2_sbd *sdp, int undo) gfs2_glock_dq_uninit(&ji_gh); jindex = 0; - p = kthread_run(gfs2_recoverd, sdp, "gfs2_recoverd"); - error = IS_ERR(p); - if (error) { - fs_err(sdp, "can't start recoverd thread: %d\n", error); - goto fail_jinode_gh; - } - sdp->sd_recoverd_process = p; - return 0; -fail_recoverd: - kthread_stop(sdp->sd_recoverd_process); fail_jinode_gh: if (!sdp->sd_args.ar_spectator) gfs2_glock_dq_uninit(&sdp->sd_jinode_gh); @@ -1172,8 +1162,10 @@ static int fill_super(struct super_block *sb, void *data, int silent) goto fail; } - if (sdp->sd_args.ar_spectator) + if (sdp->sd_args.ar_spectator) { sb->s_flags |= MS_RDONLY; + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + } if (sdp->sd_args.ar_posix_acl) sb->s_flags |= MS_POSIXACL; diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index 0677a837856..a3c2272e7ca 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -121,6 +121,12 @@ static int gfs2_make_fs_ro(struct gfs2_sbd *sdp) return error; } +static int gfs2_umount_recovery_wait(void *word) +{ + schedule(); + return 0; +} + /** * gfs2_put_super - Unmount the filesystem * @sb: The VFS superblock @@ -131,6 +137,7 @@ static void gfs2_put_super(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; int error; + struct gfs2_jdesc *jd; /* Unfreeze the filesystem, if we need to */ @@ -139,9 +146,25 @@ static void gfs2_put_super(struct super_block *sb) gfs2_glock_dq_uninit(&sdp->sd_freeze_gh); mutex_unlock(&sdp->sd_freeze_lock); + /* No more recovery requests */ + set_bit(SDF_NORECOVERY, &sdp->sd_flags); + smp_mb(); + + /* Wait on outstanding recovery */ +restart: + spin_lock(&sdp->sd_jindex_spin); + list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { + if (!test_bit(JDF_RECOVERY, &jd->jd_flags)) + continue; + spin_unlock(&sdp->sd_jindex_spin); + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, + gfs2_umount_recovery_wait, TASK_UNINTERRUPTIBLE); + goto restart; + } + spin_unlock(&sdp->sd_jindex_spin); + kthread_stop(sdp->sd_quotad_process); kthread_stop(sdp->sd_logd_process); - kthread_stop(sdp->sd_recoverd_process); if (!(sb->s_flags & MS_RDONLY)) { error = gfs2_make_fs_ro(sdp); diff --git a/fs/gfs2/recovery.c b/fs/gfs2/recovery.c index 247e8f7d6b3..59d2695509d 100644 --- a/fs/gfs2/recovery.c +++ b/fs/gfs2/recovery.c @@ -13,8 +13,7 @@ #include #include #include -#include -#include +#include #include "gfs2.h" #include "incore.h" @@ -441,18 +440,25 @@ static void gfs2_recovery_done(struct gfs2_sbd *sdp, unsigned int jid, kobject_uevent_env(&sdp->sd_kobj, KOBJ_CHANGE, envp); } -/** - * gfs2_recover_journal - recover a given journal - * @jd: the struct gfs2_jdesc describing the journal - * - * Acquire the journal's lock, check to see if the journal is clean, and - * do recovery if necessary. - * - * Returns: errno - */ +static int gfs2_recover_get_ref(struct slow_work *work) +{ + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); + if (test_and_set_bit(JDF_RECOVERY, &jd->jd_flags)) + return -EBUSY; + return 0; +} -int gfs2_recover_journal(struct gfs2_jdesc *jd) +static void gfs2_recover_put_ref(struct slow_work *work) +{ + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); + clear_bit(JDF_RECOVERY, &jd->jd_flags); + smp_mb__after_clear_bit(); + wake_up_bit(&jd->jd_flags, JDF_RECOVERY); +} + +static void gfs2_recover_work(struct slow_work *work) { + struct gfs2_jdesc *jd = container_of(work, struct gfs2_jdesc, jd_work); struct gfs2_inode *ip = GFS2_I(jd->jd_inode); struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); struct gfs2_log_header_host head; @@ -569,7 +575,7 @@ int gfs2_recover_journal(struct gfs2_jdesc *jd) gfs2_glock_dq_uninit(&j_gh); fs_info(sdp, "jid=%u: Done\n", jd->jd_jid); - return 0; + return; fail_gunlock_tr: gfs2_glock_dq_uninit(&t_gh); @@ -584,70 +590,28 @@ fail_gunlock_j: fail: gfs2_recovery_done(sdp, jd->jd_jid, LM_RD_GAVEUP); - return error; } -static struct gfs2_jdesc *gfs2_jdesc_find_dirty(struct gfs2_sbd *sdp) -{ - struct gfs2_jdesc *jd; - int found = 0; - - spin_lock(&sdp->sd_jindex_spin); +struct slow_work_ops gfs2_recover_ops = { + .get_ref = gfs2_recover_get_ref, + .put_ref = gfs2_recover_put_ref, + .execute = gfs2_recover_work, +}; - list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { - if (jd->jd_dirty) { - jd->jd_dirty = 0; - found = 1; - break; - } - } - spin_unlock(&sdp->sd_jindex_spin); - - if (!found) - jd = NULL; - return jd; -} - -/** - * gfs2_check_journals - Recover any dirty journals - * @sdp: the filesystem - * - */ - -static void gfs2_check_journals(struct gfs2_sbd *sdp) +static int gfs2_recovery_wait(void *word) { - struct gfs2_jdesc *jd; - - for (;;) { - jd = gfs2_jdesc_find_dirty(sdp); - if (!jd) - break; - - if (jd != sdp->sd_jdesc) - gfs2_recover_journal(jd); - } + schedule(); + return 0; } -/** - * gfs2_recoverd - Recover dead machine's journals - * @sdp: Pointer to GFS2 superblock - * - */ - -int gfs2_recoverd(void *data) +int gfs2_recover_journal(struct gfs2_jdesc *jd) { - struct gfs2_sbd *sdp = data; - unsigned long t; - - while (!kthread_should_stop()) { - gfs2_check_journals(sdp); - t = gfs2_tune_get(sdp, gt_recoverd_secs) * HZ; - if (freezing(current)) - refrigerator(); - schedule_timeout_interruptible(t); - } - + int rv; + rv = slow_work_enqueue(&jd->jd_work); + if (rv) + return rv; + wait_on_bit(&jd->jd_flags, JDF_RECOVERY, gfs2_recovery_wait, TASK_UNINTERRUPTIBLE); return 0; } diff --git a/fs/gfs2/recovery.h b/fs/gfs2/recovery.h index a8218ea15b5..1616ac22569 100644 --- a/fs/gfs2/recovery.h +++ b/fs/gfs2/recovery.h @@ -28,7 +28,7 @@ extern void gfs2_revoke_clean(struct gfs2_sbd *sdp); extern int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head); extern int gfs2_recover_journal(struct gfs2_jdesc *gfs2_jd); -extern int gfs2_recoverd(void *data); +extern struct slow_work_ops gfs2_recover_ops; #endif /* __RECOVERY_DOT_H__ */ diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index 894bf773ec9..9f6d48b75fd 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -356,34 +356,33 @@ static ssize_t first_done_show(struct gfs2_sbd *sdp, char *buf) return sprintf(buf, "%d\n", ls->ls_first_done); } -static ssize_t recover_show(struct gfs2_sbd *sdp, char *buf) -{ - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - return sprintf(buf, "%d\n", ls->ls_recover_jid); -} - -static void gfs2_jdesc_make_dirty(struct gfs2_sbd *sdp, unsigned int jid) +static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) { + unsigned jid; struct gfs2_jdesc *jd; + int rv; + + rv = sscanf(buf, "%u", &jid); + if (rv != 1) + return -EINVAL; + rv = -ESHUTDOWN; spin_lock(&sdp->sd_jindex_spin); + if (test_bit(SDF_NORECOVERY, &sdp->sd_flags)) + goto out; + rv = -EBUSY; + if (sdp->sd_jdesc->jd_jid == jid) + goto out; + rv = -ENOENT; list_for_each_entry(jd, &sdp->sd_jindex_list, jd_list) { if (jd->jd_jid != jid) continue; - jd->jd_dirty = 1; + rv = slow_work_enqueue(&jd->jd_work); break; } +out: spin_unlock(&sdp->sd_jindex_spin); -} - -static ssize_t recover_store(struct gfs2_sbd *sdp, const char *buf, size_t len) -{ - struct lm_lockstruct *ls = &sdp->sd_lockstruct; - ls->ls_recover_jid = simple_strtol(buf, NULL, 0); - gfs2_jdesc_make_dirty(sdp, ls->ls_recover_jid); - if (sdp->sd_recoverd_process) - wake_up_process(sdp->sd_recoverd_process); - return len; + return rv ? rv : len; } static ssize_t recover_done_show(struct gfs2_sbd *sdp, char *buf) @@ -401,15 +400,15 @@ static ssize_t recover_status_show(struct gfs2_sbd *sdp, char *buf) #define GDLM_ATTR(_name,_mode,_show,_store) \ static struct gfs2_attr gdlm_attr_##_name = __ATTR(_name,_mode,_show,_store) -GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); -GDLM_ATTR(block, 0644, block_show, block_store); -GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); -GDLM_ATTR(id, 0444, lkid_show, NULL); -GDLM_ATTR(first, 0444, lkfirst_show, NULL); -GDLM_ATTR(first_done, 0444, first_done_show, NULL); -GDLM_ATTR(recover, 0644, recover_show, recover_store); -GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); -GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); +GDLM_ATTR(proto_name, 0444, proto_name_show, NULL); +GDLM_ATTR(block, 0644, block_show, block_store); +GDLM_ATTR(withdraw, 0644, withdraw_show, withdraw_store); +GDLM_ATTR(id, 0444, lkid_show, NULL); +GDLM_ATTR(first, 0444, lkfirst_show, NULL); +GDLM_ATTR(first_done, 0444, first_done_show, NULL); +GDLM_ATTR(recover, 0200, NULL, recover_store); +GDLM_ATTR(recover_done, 0444, recover_done_show, NULL); +GDLM_ATTR(recover_status, 0444, recover_status_show, NULL); static struct attribute *lock_module_attrs[] = { &gdlm_attr_proto_name.attr, -- cgit v1.2.3-18-g5258 From e9ccb73ab57ada469602506496c42e5b4468ac3e Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 10:23:23 +0100 Subject: GFS2: Update docs Update a few things which were out of date, and fix a typo. Signed-off-by: Steven Whitehouse --- Documentation/filesystems/gfs2-glocks.txt | 2 +- Documentation/filesystems/gfs2.txt | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Documentation/filesystems/gfs2-glocks.txt b/Documentation/filesystems/gfs2-glocks.txt index 4dae9a3840b..0494f78d87e 100644 --- a/Documentation/filesystems/gfs2-glocks.txt +++ b/Documentation/filesystems/gfs2-glocks.txt @@ -60,7 +60,7 @@ go_lock | Called for the first local holder of a lock go_unlock | Called on the final local unlock of a lock go_dump | Called to print content of object for debugfs file, or on | error to dump glock to the log. -go_type; | The type of the glock, LM_TYPE_..... +go_type | The type of the glock, LM_TYPE_..... go_min_hold_time | The minimum hold time The minimum hold time for each lock is the time after a remote lock diff --git a/Documentation/filesystems/gfs2.txt b/Documentation/filesystems/gfs2.txt index 593004b6bba..5e3ab8f3bef 100644 --- a/Documentation/filesystems/gfs2.txt +++ b/Documentation/filesystems/gfs2.txt @@ -11,18 +11,15 @@ their I/O so file system consistency is maintained. One of the nifty features of GFS is perfect consistency -- changes made to the file system on one machine show up immediately on all other machines in the cluster. -GFS uses interchangable inter-node locking mechanisms. Different lock -modules can plug into GFS and each file system selects the appropriate -lock module at mount time. Lock modules include: +GFS uses interchangable inter-node locking mechanisms, the currently +supported mechanisms are: lock_nolock -- allows gfs to be used as a local file system lock_dlm -- uses a distributed lock manager (dlm) for inter-node locking The dlm is found at linux/fs/dlm/ -In addition to interfacing with an external locking manager, a gfs lock -module is responsible for interacting with external cluster management -systems. Lock_dlm depends on user space cluster management systems found +Lock_dlm depends on user space cluster management systems found at the URL above. To use gfs as a local file system, no external clustering systems are @@ -31,13 +28,19 @@ needed, simply: $ mkfs -t gfs2 -p lock_nolock -j 1 /dev/block_device $ mount -t gfs2 /dev/block_device /dir -GFS2 is not on-disk compatible with previous versions of GFS. +If you are using Fedora, you need to install the gfs2-utils package +and, for lock_dlm, you will also need to install the cman package +and write a cluster.conf as per the documentation. + +GFS2 is not on-disk compatible with previous versions of GFS, but it +is pretty close. The following man pages can be found at the URL above: - gfs2_fsck to repair a filesystem + fsck.gfs2 to repair a filesystem gfs2_grow to expand a filesystem online gfs2_jadd to add journals to a filesystem online gfs2_tool to manipulate, examine and tune a filesystem gfs2_quota to examine and change quota values in a filesystem + gfs2_convert to convert a gfs filesystem to gfs2 in-place mount.gfs2 to help mount(8) mount a filesystem mkfs.gfs2 to make a filesystem -- cgit v1.2.3-18-g5258 From ef9e8b14a5c1d0afbaf12b4c3b271188ddfc52a4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Tue, 19 May 2009 14:25:16 +0100 Subject: GFS2: Don't warn when delete inode fails on ro filesystem If the filesystem is read-only, then we expect that delete inode will fail, so there is no need to warn about it. Signed-off-by: Steven Whitehouse --- fs/gfs2/ops_super.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/gfs2/ops_super.c b/fs/gfs2/ops_super.c index a3c2272e7ca..2fd1dcbcc5b 100644 --- a/fs/gfs2/ops_super.c +++ b/fs/gfs2/ops_super.c @@ -714,7 +714,7 @@ out_unlock: gfs2_glock_dq(&ip->i_iopen_gh); gfs2_holder_uninit(&ip->i_iopen_gh); gfs2_glock_dq_uninit(&gh); - if (error && error != GLR_TRYFAILED) + if (error && error != GLR_TRYFAILED && error != -EROFS) fs_warn(sdp, "gfs2_delete_inode: %d\n", error); out: truncate_inode_pages(&inode->i_data, 0); -- cgit v1.2.3-18-g5258 From 09010978345e8883003bf411bb99753710eb5a3a Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 20 May 2009 10:48:47 +0100 Subject: GFS2: Improve resource group error handling This patch improves the error handling in the case where we discover that the summary information in the resource group doesn't match the bitmap information while in the process of allocating blocks. Originally this resulted in a kernel bug, but this patch changes that so that we return -EIO and print some messages explaining what went wrong, and how to fix it. We also remember locally not to try and allocate from the same rgrp again, so that a subsequent allocation in a different rgrp should succeed. Signed-off-by: Steven Whitehouse --- fs/gfs2/bmap.c | 9 +++++++-- fs/gfs2/dir.c | 11 +++++++++-- fs/gfs2/eattr.c | 14 +++++++++++--- fs/gfs2/glops.c | 20 +------------------ fs/gfs2/incore.h | 7 ++++--- fs/gfs2/rgrp.c | 58 +++++++++++++++++++++++++++++++++++++++++--------------- fs/gfs2/rgrp.h | 47 +++++++++++++++++++++++---------------------- 7 files changed, 99 insertions(+), 67 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 3a5d3f883e1..253e1a39f84 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -136,7 +136,9 @@ int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page) and write it out to disk */ unsigned int n = 1; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + goto out_brelse; if (isdir) { gfs2_trans_add_unrevoke(GFS2_SB(&ip->i_inode), block, 1); error = gfs2_dir_get_new_buffer(ip, block, &bh); @@ -476,8 +478,11 @@ static int gfs2_bmap_alloc(struct inode *inode, const sector_t lblock, blks = dblks + iblks; i = sheight; do { + int error; n = blks - alloced; - bn = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &bn, &n); + if (error) + return error; alloced += n; if (state != ALLOC_DATA || gfs2_is_jdata(ip)) gfs2_trans_add_unrevoke(sdp, bn, n); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index aef4d0c0674..297d7e5ceba 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -803,13 +803,20 @@ static struct gfs2_leaf *new_leaf(struct inode *inode, struct buffer_head **pbh, { struct gfs2_inode *ip = GFS2_I(inode); unsigned int n = 1; - u64 bn = gfs2_alloc_block(ip, &n); - struct buffer_head *bh = gfs2_meta_new(ip->i_gl, bn); + u64 bn; + int error; + struct buffer_head *bh; struct gfs2_leaf *leaf; struct gfs2_dirent *dent; struct qstr name = { .name = "", .len = 0, .hash = 0 }; + + error = gfs2_alloc_block(ip, &bn, &n); + if (error) + return NULL; + bh = gfs2_meta_new(ip->i_gl, bn); if (!bh) return NULL; + gfs2_trans_add_unrevoke(GFS2_SB(inode), bn, 1); gfs2_trans_add_bh(ip->i_gl, bh, 1); gfs2_metatype_set(bh, GFS2_METATYPE_LF, GFS2_FORMAT_LF); diff --git a/fs/gfs2/eattr.c b/fs/gfs2/eattr.c index 899763aed21..07ea9529add 100644 --- a/fs/gfs2/eattr.c +++ b/fs/gfs2/eattr.c @@ -582,8 +582,11 @@ static int ea_alloc_blk(struct gfs2_inode *ip, struct buffer_head **bhp) struct gfs2_ea_header *ea; unsigned int n = 1; u64 block; + int error; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, block, 1); *bhp = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_bh(ip->i_gl, *bhp, 1); @@ -617,6 +620,7 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, struct gfs2_ea_request *er) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); + int error; ea->ea_data_len = cpu_to_be32(er->er_data_len); ea->ea_name_len = er->er_name_len; @@ -642,7 +646,9 @@ static int ea_write(struct gfs2_inode *ip, struct gfs2_ea_header *ea, int mh_size = sizeof(struct gfs2_meta_header); unsigned int n = 1; - block = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &block, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, block, 1); bh = gfs2_meta_new(ip->i_gl, block); gfs2_trans_add_bh(ip->i_gl, bh, 1); @@ -963,7 +969,9 @@ static int ea_set_block(struct gfs2_inode *ip, struct gfs2_ea_request *er, } else { u64 blk; unsigned int n = 1; - blk = gfs2_alloc_block(ip, &n); + error = gfs2_alloc_block(ip, &blk, &n); + if (error) + return error; gfs2_trans_add_unrevoke(sdp, blk, 1); indbh = gfs2_meta_new(ip->i_gl, blk); gfs2_trans_add_bh(ip->i_gl, indbh, 1); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 70f87f43afa..d5e4ab155ca 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -309,24 +309,6 @@ static void rgrp_go_unlock(struct gfs2_holder *gh) gfs2_rgrp_bh_put(gh->gh_gl->gl_object); } -/** - * rgrp_go_dump - print out an rgrp - * @seq: The iterator - * @gl: The glock in question - * - */ - -static int rgrp_go_dump(struct seq_file *seq, const struct gfs2_glock *gl) -{ - const struct gfs2_rgrpd *rgd = gl->gl_object; - if (rgd == NULL) - return 0; - gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", - (unsigned long long)rgd->rd_addr, rgd->rd_flags, - rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); - return 0; -} - /** * trans_go_sync - promote/demote the transaction glock * @gl: the glock @@ -410,7 +392,7 @@ const struct gfs2_glock_operations gfs2_rgrp_glops = { .go_demote_ok = rgrp_go_demote_ok, .go_lock = rgrp_go_lock, .go_unlock = rgrp_go_unlock, - .go_dump = rgrp_go_dump, + .go_dump = gfs2_rgrp_dump, .go_type = LM_TYPE_RGRP, .go_min_hold_time = HZ / 5, }; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 0060e9564bb..de50d86fec1 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -92,9 +92,10 @@ struct gfs2_rgrpd { unsigned int rd_bh_count; u32 rd_last_alloc; unsigned char rd_flags; -#define GFS2_RDF_CHECK 0x01 /* Need to check for unlinked inodes */ -#define GFS2_RDF_NOALLOC 0x02 /* rg prohibits allocation */ -#define GFS2_RDF_UPTODATE 0x04 /* rg is up to date */ +#define GFS2_RDF_CHECK 0x10000000 /* check for unlinked inodes */ +#define GFS2_RDF_UPTODATE 0x20000000 /* rg is up to date */ +#define GFS2_RDF_ERROR 0x40000000 /* error in rg */ +#define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ }; enum gfs2_state_bits { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 565038243fa..fbacf09ee34 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -701,10 +701,7 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) u32 rg_flags; rg_flags = be32_to_cpu(str->rg_flags); - if (rg_flags & GFS2_RGF_NOALLOC) - rgd->rd_flags |= GFS2_RDF_NOALLOC; - else - rgd->rd_flags &= ~GFS2_RDF_NOALLOC; + rg_flags &= ~GFS2_RDF_MASK; rgd->rd_free = be32_to_cpu(str->rg_free); rgd->rd_dinodes = be32_to_cpu(str->rg_dinodes); rgd->rd_igeneration = be64_to_cpu(str->rg_igeneration); @@ -713,11 +710,8 @@ static void gfs2_rgrp_in(struct gfs2_rgrpd *rgd, const void *buf) static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf) { struct gfs2_rgrp *str = buf; - u32 rg_flags = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) - rg_flags |= GFS2_RGF_NOALLOC; - str->rg_flags = cpu_to_be32(rg_flags); + str->rg_flags = cpu_to_be32(rgd->rd_flags & ~GFS2_RDF_MASK); str->rg_free = cpu_to_be32(rgd->rd_free); str->rg_dinodes = cpu_to_be32(rgd->rd_dinodes); str->__pad = cpu_to_be32(0); @@ -942,7 +936,7 @@ static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) struct gfs2_sbd *sdp = rgd->rd_sbd; int ret = 0; - if (rgd->rd_flags & GFS2_RDF_NOALLOC) + if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) return 0; spin_lock(&sdp->sd_rindex_spin); @@ -1435,13 +1429,33 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart, } /** - * gfs2_alloc_block - Allocate a block + * gfs2_rgrp_dump - print out an rgrp + * @seq: The iterator + * @gl: The glock in question + * + */ + +int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) +{ + const struct gfs2_rgrpd *rgd = gl->gl_object; + if (rgd == NULL) + return 0; + gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", + (unsigned long long)rgd->rd_addr, rgd->rd_flags, + rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); + return 0; +} + +/** + * gfs2_alloc_block - Allocate one or more blocks * @ip: the inode to allocate the block for + * @bn: Used to return the starting block number + * @n: requested number of blocks/extent length (value/result) * - * Returns: the allocated block + * Returns: 0 or error */ -u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) +int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) { struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); struct buffer_head *dibh; @@ -1457,7 +1471,10 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) goal = rgd->rd_last_alloc; blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); - BUG_ON(blk == BFITNOENT); + + /* Since all blocks are reserved in advance, this shouldn't happen */ + if (blk == BFITNOENT) + goto rgrp_error; rgd->rd_last_alloc = blk; block = rgd->rd_data0 + blk; @@ -1469,7 +1486,9 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); brelse(dibh); } - gfs2_assert_withdraw(sdp, rgd->rd_free >= *n); + if (rgd->rd_free < *n) + goto rgrp_error; + rgd->rd_free -= *n; gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); @@ -1484,7 +1503,16 @@ u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n) rgd->rd_free_clone -= *n; spin_unlock(&sdp->sd_rindex_spin); - return block; + *bn = block; + return 0; + +rgrp_error: + fs_warn(sdp, "rgrp %llu has an error, marking it readonly until umount\n", + (unsigned long long)rgd->rd_addr); + fs_warn(sdp, "umount on all nodes and run fsck.gfs2 to fix the error\n"); + gfs2_rgrp_dump(NULL, rgd->rd_gl); + rgd->rd_flags |= GFS2_RDF_ERROR; + return -EIO; } /** diff --git a/fs/gfs2/rgrp.h b/fs/gfs2/rgrp.h index 3181c7e624b..1e76ff0f3e0 100644 --- a/fs/gfs2/rgrp.h +++ b/fs/gfs2/rgrp.h @@ -14,22 +14,22 @@ struct gfs2_rgrpd; struct gfs2_sbd; struct gfs2_holder; -void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_verify(struct gfs2_rgrpd *rgd); struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk); struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp); struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd); -void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); -int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); +extern void gfs2_clear_rgrpd(struct gfs2_sbd *sdp); +extern int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh); -int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); +extern int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd); -void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); +extern void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd); -struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); +extern struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip); static inline void gfs2_alloc_put(struct gfs2_inode *ip) { BUG_ON(ip->i_alloc == NULL); @@ -37,22 +37,22 @@ static inline void gfs2_alloc_put(struct gfs2_inode *ip) ip->i_alloc = NULL; } -int gfs2_inplace_reserve_i(struct gfs2_inode *ip, - char *file, unsigned int line); +extern int gfs2_inplace_reserve_i(struct gfs2_inode *ip, char *file, + unsigned int line); #define gfs2_inplace_reserve(ip) \ gfs2_inplace_reserve_i((ip), __FILE__, __LINE__) -void gfs2_inplace_release(struct gfs2_inode *ip); +extern void gfs2_inplace_release(struct gfs2_inode *ip); -unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); +extern unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block); -u64 gfs2_alloc_block(struct gfs2_inode *ip, unsigned int *n); -u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); +extern int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n); +extern u64 gfs2_alloc_di(struct gfs2_inode *ip, u64 *generation); -void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); -void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); -void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); -void gfs2_unlink_di(struct inode *inode); +extern void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen); +extern void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip); +extern void gfs2_unlink_di(struct inode *inode); struct gfs2_rgrp_list { unsigned int rl_rgrps; @@ -61,10 +61,11 @@ struct gfs2_rgrp_list { struct gfs2_holder *rl_ghs; }; -void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, - u64 block); -void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); -void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); -u64 gfs2_ri_total(struct gfs2_sbd *sdp); +extern void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, + u64 block); +extern void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state); +extern void gfs2_rlist_free(struct gfs2_rgrp_list *rlist); +extern u64 gfs2_ri_total(struct gfs2_sbd *sdp); +extern int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl); #endif /* __RGRP_DOT_H__ */ -- cgit v1.2.3-18-g5258 From 60a0b8f93664621a07b93273fc8ebc29590c62f5 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Thu, 21 May 2009 12:23:12 +0100 Subject: GFS2: Add a rgrp bitmap full flag During block allocation, it is useful to know if sections of disk are full on a finer grained basis than a single resource group. This can make a performance difference when resource groups have larger numbers of bitmap blocks, since we no longer have to search them all block by block in each individual bitmap. The full flag is set on a per-bitmap basis when it has been searched and found to have no free space. It is then skipped in subsequent searches until the flag is reset. The resetting occurs if we have to drop the glock on the resource group for any reason, or if we deallocate some blocks within that resource group and thus free up some space. Signed-off-by: Steven Whitehouse --- fs/gfs2/incore.h | 3 +++ fs/gfs2/rgrp.c | 77 ++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 50 insertions(+), 30 deletions(-) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index de50d86fec1..dd87379b61e 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -64,9 +64,12 @@ struct gfs2_log_element { const struct gfs2_log_operations *le_ops; }; +#define GBF_FULL 1 + struct gfs2_bitmap { struct buffer_head *bi_bh; char *bi_clone; + unsigned long bi_flags; u32 bi_offset; u32 bi_start; u32 bi_len; diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index fbacf09ee34..23637b9d1c7 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -442,6 +442,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd) for (x = 0; x < length; x++) { bi = rgd->rd_bits + x; + bi->bi_flags = 0; /* small rgrp; bitmap stored completely in header block */ if (length == 1) { bytes = bytes_left; @@ -769,6 +770,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) } if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) { + for (x = 0; x < length; x++) + clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags); gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data); rgd->rd_flags |= GFS2_RDF_UPTODATE; } @@ -897,6 +900,7 @@ void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) continue; if (sdp->sd_args.ar_discard) gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); + clear_bit(GBF_FULL, &bi->bi_flags); memcpy(bi->bi_clone + bi->bi_offset, bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); } @@ -1309,30 +1313,37 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, { struct gfs2_bitmap *bi = NULL; const u32 length = rgd->rd_length; - u32 blk = 0; + u32 blk = BFITNOENT; unsigned int buf, x; const unsigned int elen = *n; - const u8 *buffer; + const u8 *buffer = NULL; *n = 0; /* Find bitmap block that contains bits for goal block */ for (buf = 0; buf < length; buf++) { bi = rgd->rd_bits + buf; - if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) - break; + /* Convert scope of "goal" from rgrp-wide to within found bit block */ + if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { + goal -= bi->bi_start * GFS2_NBBY; + goto do_search; + } } + buf = 0; + goal = 0; - gfs2_assert(rgd->rd_sbd, buf < length); - - /* Convert scope of "goal" from rgrp-wide to within found bit block */ - goal -= bi->bi_start * GFS2_NBBY; - +do_search: /* Search (up to entire) bitmap in this rgrp for allocatable block. "x <= length", instead of "x < length", because we typically start the search in the middle of a bit block, but if we can't find an allocatable block anywhere else, we want to be able wrap around and search in the first part of our first-searched bit block. */ for (x = 0; x <= length; x++) { + bi = rgd->rd_bits + buf; + + if (test_bit(GBF_FULL, &bi->bi_flags) && + (old_state == GFS2_BLKST_FREE)) + goto skip; + /* The GFS2_BLKST_UNLINKED state doesn't apply to the clone bitmaps, so we must search the originals for that. */ buffer = bi->bi_bh->b_data + bi->bi_offset; @@ -1343,33 +1354,39 @@ static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, if (blk != BFITNOENT) break; + if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) + set_bit(GBF_FULL, &bi->bi_flags); + /* Try next bitmap block (wrap back to rgrp header if at end) */ - buf = (buf + 1) % length; - bi = rgd->rd_bits + buf; +skip: + buf++; + buf %= length; goal = 0; } - if (blk != BFITNOENT && old_state != new_state) { - *n = 1; - gfs2_tran