diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 08:51:04 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-01 08:51:04 -0700 |
commit | e151960a23cc99e310f90adad7ee592821a77434 (patch) | |
tree | 3bc01871b1548b4e15d89a54b18f8c9213c90acc | |
parent | a0d271cbfed1dd50278c6b06bead3d00ba0a88f9 (diff) | |
parent | 2216db70c95a9610f5279b8da53bec614d98270f (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw
Pull GFS2 updates from Steven Whitehouse:
"The major feature this time is the "rbm" conversion in the resource
group code. The new struct gfs2_rbm specifies the location of an
allocatable block in (resource group, bitmap, offset) form. There are
a number of added helper functions, and later patches then rewrite
some of the resource group code in terms of this new structure. Not
only does this give us a nice code clean up, but it also removes some
of the previous restrictions where extents could not cross bitmap
boundaries, for example.
In addition to that, there are a few bug fixes and clean ups, but the
rbm work is by far the majority of this patch set in terms of number
of changed lines."
* git://git.kernel.org/pub/scm/linux/kernel/git/steve/gfs2-3.0-nmw: (27 commits)
GFS2: Write out dirty inode metadata in delayed deletes
GFS2: fix s_writers.counter imbalance in gfs2_ail_empty_gl
GFS2: Fix infinite loop in rbm_find
GFS2: Consolidate free block searching functions
GFS2: Get rid of I_MUTEX_QUOTA usage
GFS2: Stop block extents at the end of bitmaps
GFS2: Fix unclaimed_blocks() wrapping bug and clean up
GFS2: Improve block reservation tracing
GFS2: Fall back to ignoring reservations, if there are no other blocks left
GFS2: Fix ->show_options() for statfs slow
GFS2: Use rbm for gfs2_setbit()
GFS2: Use rbm for gfs2_testbit()
GFS2: Eliminate unnecessary check for state > 3 in bitfit
GFS2: Eliminate redundant calls to may_grant
GFS2: Combine functions gfs2_glock_dq_wait and wait_on_demote
GFS2: Combine functions gfs2_glock_wait and wait_on_holder
GFS2: inline __gfs2_glock_schedule_for_reclaim
GFS2: change function gfs2_direct_IO to use a normal gfs2_glock_dq
GFS2: rbm code cleanup
GFS2: Fix case where reservation finished at end of rgrp
...
-rw-r--r-- | fs/gfs2/aops.c | 11 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 2 | ||||
-rw-r--r-- | fs/gfs2/file.c | 4 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 60 | ||||
-rw-r--r-- | fs/gfs2/glops.c | 1 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 30 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 20 | ||||
-rw-r--r-- | fs/gfs2/ops_fstype.c | 8 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 11 | ||||
-rw-r--r-- | fs/gfs2/rgrp.c | 1203 | ||||
-rw-r--r-- | fs/gfs2/rgrp.h | 28 | ||||
-rw-r--r-- | fs/gfs2/super.c | 9 | ||||
-rw-r--r-- | fs/gfs2/trace_gfs2.h | 20 | ||||
-rw-r--r-- | fs/gfs2/trans.h | 7 | ||||
-rw-r--r-- | fs/gfs2/xattr.c | 96 |
15 files changed, 710 insertions, 800 deletions
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index d6526347d38..01c4975da4b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -612,6 +612,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, struct gfs2_sbd *sdp = GFS2_SB(mapping->host); struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); unsigned int data_blocks = 0, ind_blocks = 0, rblocks; + unsigned requested = 0; int alloc_required; int error = 0; pgoff_t index = pos >> PAGE_CACHE_SHIFT; @@ -641,7 +642,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (error) goto out_unlock; - error = gfs2_inplace_reserve(ip, data_blocks + ind_blocks); + requested = data_blocks + ind_blocks; + error = gfs2_inplace_reserve(ip, requested); if (error) goto out_qunlock; } @@ -654,7 +656,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping, if (&ip->i_inode == sdp->sd_rindex) rblocks += 2 * RES_STATFS; if (alloc_required) - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, requested); error = gfs2_trans_begin(sdp, rblocks, PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize); @@ -868,8 +870,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, brelse(dibh); failed: gfs2_trans_end(sdp); - if (gfs2_mb_reserved(ip)) - gfs2_inplace_release(ip); + gfs2_inplace_release(ip); if (ip->i_res->rs_qa_qd_num) gfs2_quota_unlock(ip); if (inode == sdp->sd_rindex) { @@ -1023,7 +1024,7 @@ static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb, offset, nr_segs, gfs2_get_block_direct, NULL, NULL, 0); out: - gfs2_glock_dq_m(1, &gh); + gfs2_glock_dq(&gh); gfs2_holder_uninit(&gh); return rv; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 49cd7dd4a9f..1fd3ae237bd 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -786,7 +786,7 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, goto out_rlist; if (gfs2_rs_active(ip->i_res)) /* needs to be done with the rgrp glock held */ - gfs2_rs_deltree(ip->i_res); + gfs2_rs_deltree(ip, ip->i_res); error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE + RES_INDIRECT + RES_STATFS + RES_QUOTA, diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 382000ffac1..30e21997a1a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -441,7 +441,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) rblocks += data_blocks ? data_blocks : 1; if (ind_blocks || data_blocks) { rblocks += RES_STATFS + RES_QUOTA; - rblocks += gfs2_rg_blocks(ip); + rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); } ret = gfs2_trans_begin(sdp, rblocks, 0); if (ret) @@ -845,7 +845,7 @@ retry: &max_bytes, &data_blocks, &ind_blocks); rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA + - RES_RG_HDR + gfs2_rg_blocks(ip); + RES_RG_HDR + gfs2_rg_blocks(ip, data_blocks + ind_blocks); if (gfs2_is_jdata(ip)) rblocks += data_blocks ? data_blocks : 1; diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 1ed81f40da0..e6c2fd53cab 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -186,20 +186,6 @@ static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl) } /** - * __gfs2_glock_schedule_for_reclaim - Add a glock to the reclaim list - * @gl: the glock - * - * If the glock is demotable, then we add it (or move it) to the end - * of the glock LRU list. - */ - -static void __gfs2_glock_schedule_for_reclaim(struct gfs2_glock *gl) -{ - if (demote_ok(gl)) - gfs2_glock_add_to_lru(gl); -} - -/** * gfs2_glock_put_nolock() - Decrement reference count on glock * @gl: The glock to put * @@ -883,7 +869,14 @@ static int gfs2_glock_demote_wait(void *word) return 0; } -static void wait_on_holder(struct gfs2_holder *gh) +/** + * gfs2_glock_wait - wait on a glock acquisition + * @gh: the glock holder + * + * Returns: 0 on success + */ + +int gfs2_glock_wait(struct gfs2_holder *gh) { unsigned long time1 = jiffies; @@ -894,12 +887,7 @@ static void wait_on_holder(struct gfs2_holder *gh) gh->gh_gl->gl_hold_time = min(gh->gh_gl->gl_hold_time + GL_GLOCK_HOLD_INCR, GL_GLOCK_MAX_HOLD); -} - -static void wait_on_demote(struct gfs2_glock *gl) -{ - might_sleep(); - wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); + return gh->gh_error; } /** @@ -929,19 +917,6 @@ static void handle_callback(struct gfs2_glock *gl, unsigned int state, trace_gfs2_demote_rq(gl); } -/** - * gfs2_glock_wait - wait on a glock acquisition - * @gh: the glock holder - * - * Returns: 0 on success - */ - -int gfs2_glock_wait(struct gfs2_holder *gh) -{ - wait_on_holder(gh); - return gh->gh_error; -} - void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...) { struct va_format vaf; @@ -979,7 +954,7 @@ __acquires(&gl->gl_spin) struct gfs2_sbd *sdp = gl->gl_sbd; struct list_head *insert_pt = NULL; struct gfs2_holder *gh2; - int try_lock = 0; + int try_futile = 0; BUG_ON(gh->gh_owner_pid == NULL); if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags)) @@ -987,7 +962,7 @@ __acquires(&gl->gl_spin) if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) { if (test_bit(GLF_LOCK, &gl->gl_flags)) - try_lock = 1; + try_futile = !may_grant(gl, gh); if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) goto fail; } @@ -996,9 +971,8 @@ __acquires(&gl->gl_spin) if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid && (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK))) goto trap_recursive; - if (try_lock && - !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) && - !may_grant(gl, gh)) { + if (try_futile && + !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) { fail: gh->gh_error = GLR_TRYFAILED; gfs2_holder_wake(gh); @@ -1121,8 +1095,9 @@ void gfs2_glock_dq(struct gfs2_holder *gh) !test_bit(GLF_DEMOTE, &gl->gl_flags)) fast_path = 1; } - if (!test_bit(GLF_LFLUSH, &gl->gl_flags)) - __gfs2_glock_schedule_for_reclaim(gl); + if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl)) + gfs2_glock_add_to_lru(gl); + trace_gfs2_glock_queue(gh, 0); spin_unlock(&gl->gl_spin); if (likely(fast_path)) @@ -1141,7 +1116,8 @@ void gfs2_glock_dq_wait(struct gfs2_holder *gh) { struct gfs2_glock *gl = gh->gh_gl; gfs2_glock_dq(gh); - wait_on_demote(gl); + might_sleep(); + wait_on_bit(&gl->gl_flags, GLF_DEMOTE, gfs2_glock_demote_wait, TASK_UNINTERRUPTIBLE); } /** diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 4bdcf378418..32cc4fde975 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -94,6 +94,7 @@ static void gfs2_ail_empty_gl(struct gfs2_glock *gl) /* A shortened, inline version of gfs2_trans_begin() */ tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes, sizeof(u64)); tr.tr_ip = (unsigned long)__builtin_return_address(0); + sb_start_intwrite(sdp->sd_vfs); gfs2_log_reserve(sdp, tr.tr_reserved); BUG_ON(current->journal_info); current->journal_info = &tr; diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index aaecc8085fc..3d469d37345 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -99,9 +99,26 @@ struct gfs2_rgrpd { #define GFS2_RDF_MASK 0xf0000000 /* mask for internal flags */ spinlock_t rd_rsspin; /* protects reservation related vars */ struct rb_root rd_rstree; /* multi-block reservation tree */ - u32 rd_rs_cnt; /* count of current reservations */ }; +struct gfs2_rbm { + struct gfs2_rgrpd *rgd; + struct gfs2_bitmap *bi; /* Bitmap must belong to the rgd */ + u32 offset; /* The offset is bitmap relative */ +}; + +static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm) +{ + return rbm->rgd->rd_data0 + (rbm->bi->bi_start * GFS2_NBBY) + rbm->offset; +} + +static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1, + const struct gfs2_rbm *rbm2) +{ + return (rbm1->rgd == rbm2->rgd) && (rbm1->bi == rbm2->bi) && + (rbm1->offset == rbm2->offset); +} + enum gfs2_state_bits { BH_Pinned = BH_PrivateStart, BH_Escaped = BH_PrivateStart + 1, @@ -250,18 +267,11 @@ struct gfs2_blkreserv { /* components used during write (step 1): */ atomic_t rs_sizehint; /* hint of the write size */ - /* components used during inplace_reserve (step 2): */ - u32 rs_requested; /* Filled in by caller of gfs2_inplace_reserve() */ - - /* components used during get_local_rgrp (step 3): */ - struct gfs2_rgrpd *rs_rgd; /* pointer to the gfs2_rgrpd */ struct gfs2_holder rs_rgd_gh; /* Filled in by get_local_rgrp */ struct rb_node rs_node; /* link to other block reservations */ - - /* components used during block searches and assignments (step 4): */ - struct gfs2_bitmap *rs_bi; /* bitmap for the current allocation */ - u32 rs_biblk; /* start block relative to the bi */ + struct gfs2_rbm rs_rbm; /* Start of reservation */ u32 rs_free; /* how many blocks are still free */ + u64 rs_inum; /* Inode number for reservation */ /* ancillary quota stuff */ struct gfs2_quota_data *rs_qa_qd[2 * MAXQUOTAS]; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 753af3d86bb..381893ceefa 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -712,14 +712,9 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, if (error) goto fail_gunlock2; - /* The newly created inode needs a reservation so it can allocate - xattrs. At the same time, we want new blocks allocated to the new - dinode to be as contiguous as possible. Since we allocated the - dinode block under the directory's reservation, we transfer - ownership of that reservation to the new inode. The directory - doesn't need a reservation unless it needs a new allocation. */ - ip->i_res = dip->i_res; - dip->i_res = NULL; + error = gfs2_rs_alloc(ip); + if (error) + goto fail_gunlock2; error = gfs2_acl_create(dip, inode); if (error) @@ -737,10 +732,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, brelse(bh); gfs2_trans_end(sdp); - /* Check if we reserved space in the rgrp. Function link_dinode may - not, depending on whether alloc is required. */ - if (gfs2_mb_reserved(dip)) - gfs2_inplace_release(dip); + gfs2_inplace_release(dip); gfs2_quota_unlock(dip); mark_inode_dirty(inode); gfs2_glock_dq_uninit_m(2, ghs); @@ -897,7 +889,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(dip) + + gfs2_rg_blocks(dip, sdp->sd_max_dirres) + 2 * RES_DINODE + RES_STATFS + RES_QUOTA, 0); if (error) @@ -1378,7 +1370,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, goto out_gunlock_q; error = gfs2_trans_begin(sdp, sdp->sd_max_dirres + - gfs2_rg_blocks(ndip) + + gfs2_rg_blocks(ndip, sdp->sd_max_dirres) + 4 * RES_DINODE + 4 * RES_LEAF + RES_STATFS + RES_QUOTA + 4, 0); if (error) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index e5af9dc420e..e443966c810 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -19,6 +19,7 @@ #include <linux/mount.h> #include <linux/gfs2_ondisk.h> #include <linux/quotaops.h> +#include <linux/lockdep.h> #include "gfs2.h" #include "incore.h" @@ -766,6 +767,7 @@ fail: return error; } +static struct lock_class_key gfs2_quota_imutex_key; static int init_inodes(struct gfs2_sbd *sdp, int undo) { @@ -803,6 +805,12 @@ static int init_inodes(struct gfs2_sbd *sdp, int undo) fs_err(sdp, "can't get quota file inode: %d\n", error); goto fail_rindex; } + /* + * i_mutex on quota files is special. Since this inode is hidden system + * file, we are safe to define locking ourselves. + */ + lockdep_set_class(&sdp->sd_quota_inode->i_mutex, + &gfs2_quota_imutex_key); error = gfs2_rindex_update(sdp); if (error) diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index a3bde91645c..4021deca61e 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -765,6 +765,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) struct gfs2_holder *ghs, i_gh; unsigned int qx, x; struct gfs2_quota_data *qd; + unsigned reserved; loff_t offset; unsigned int nalloc = 0, blocks; int error; @@ -781,7 +782,7 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) return -ENOMEM; sort(qda, num_qd, sizeof(struct gfs2_quota_data *), sort_qd, NULL); - mutex_lock_nested(&ip->i_inode.i_mutex, I_MUTEX_QUOTA); + mutex_lock(&ip->i_inode.i_mutex); for (qx = 0; qx < num_qd; qx++) { error = gfs2_glock_nq_init(qda[qx]->qd_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &ghs[qx]); @@ -811,13 +812,13 @@ static int do_sync(unsigned int num_qd, struct gfs2_quota_data **qda) * two blocks need to be updated instead of 1 */ blocks = num_qd * data_blocks + RES_DINODE + num_qd + 3; - error = gfs2_inplace_reserve(ip, 1 + - (nalloc * (data_blocks + ind_blocks))); + reserved = 1 + (nalloc * (data_blocks + ind_blocks)); + error = gfs2_inplace_reserve(ip, reserved); if (error) goto out_alloc; if (nalloc) - blocks += gfs2_rg_blocks(ip) + nalloc * ind_blocks + RES_STATFS; + blocks += gfs2_rg_blocks(ip, reserved) + nalloc * ind_blocks + RES_STATFS; error = gfs2_trans_begin(sdp, blocks, 0); if (error) @@ -1598,7 +1599,7 @@ static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id, error = gfs2_inplace_reserve(ip, blocks); if (error) goto out_i; - blocks += gfs2_rg_blocks(ip); + blocks += gfs2_rg_blocks(ip, blocks); } /* Some quotas span block boundaries and can update two blocks, diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index c9ed814eeb6..3cc402ce6fe 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -35,9 +35,6 @@ #define BFITNOENT ((u32)~0) #define NO_BLOCK ((u64)~0) -#define RSRV_CONTENTION_FACTOR 4 -#define RGRP_RSRV_MAX_CONTENDERS 2 - #if BITS_PER_LONG == 32 #define LBITMASK (0x55555555UL) #define LBITSKIP55 (0x55555555UL) @@ -67,53 +64,48 @@ static const char valid_change[16] = { 1, 0, 0, 0 }; -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, - unsigned char old_state, - struct gfs2_bitmap **rbi); +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 minext, + const struct gfs2_inode *ip, bool nowrap); + /** * gfs2_setbit - Set a bit in the bitmaps - * @rgd: the resource group descriptor - * @buf2: the clone buffer that holds the bitmaps - * @bi: the bitmap structure - * @block: the block to set + * @rbm: The position of the bit to set + * @do_clone: Also set the clone bitmap, if it exists * @new_state: the new state of the block * */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, - struct gfs2_bitmap *bi, u32 block, +static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone, unsigned char new_state) { unsigned char *byte1, *byte2, *end, cur_state; - unsigned int buflen = bi->bi_len; - const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; + unsigned int buflen = rbm->bi->bi_len; + const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - byte1 = bi->bi_bh->b_data + bi->bi_offset + (block / GFS2_NBBY); - end = bi->bi_bh->b_data + bi->bi_offset + buflen; + byte1 = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); + end = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset + buflen; BUG_ON(byte1 >= end); cur_state = (*byte1 >> bit) & GFS2_BIT_MASK; if (unlikely(!valid_change[new_state * 4 + cur_state])) { - printk(KERN_WARNING "GFS2: buf_blk = 0x%llx old_state=%d, " - "new_state=%d\n", - (unsigned long long)block, cur_state, new_state); - printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%lx\n", - (unsigned long long)rgd->rd_addr, - (unsigned long)bi->bi_start); - printk(KERN_WARNING "GFS2: bi_offset=0x%lx bi_len=0x%lx\n", - (unsigned long)bi->bi_offset, - (unsigned long)bi->bi_len); + printk(KERN_WARNING "GFS2: buf_blk = 0x%x old_state=%d, " + "new_state=%d\n", rbm->offset, cur_state, new_state); + printk(KERN_WARNING "GFS2: rgrp=0x%llx bi_start=0x%x\n", + (unsigned long long)rbm->rgd->rd_addr, + rbm->bi->bi_start); + printk(KERN_WARNING "GFS2: bi_offset=0x%x bi_len=0x%x\n", + rbm->bi->bi_offset, rbm->bi->bi_len); dump_stack(); - gfs2_consist_rgrpd(rgd); + gfs2_consist_rgrpd(rbm->rgd); return; } *byte1 ^= (cur_state ^ new_state) << bit; - if (buf2) { - byte2 = buf2 + bi->bi_offset + (block / GFS2_NBBY); + if (do_clone && rbm->bi->bi_clone) { + byte2 = rbm->bi->bi_clone + rbm->bi->bi_offset + (rbm->offset / GFS2_NBBY); cur_state = (*byte2 >> bit) & GFS2_BIT_MASK; *byte2 ^= (cur_state ^ new_state) << bit; } @@ -121,30 +113,21 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf2, /** * gfs2_testbit - test a bit in the bitmaps - * @rgd: the resource group descriptor - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to read + * @rbm: The bit to test * + * Returns: The two bit block state of the requested bit */ -static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, - const unsigned char *buffer, - unsigned int buflen, u32 block) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm) { - const unsigned char *byte, *end; - unsigned char cur_state; + const u8 *buffer = rbm->bi->bi_bh->b_data + rbm->bi->bi_offset; + const u8 *byte; unsigned int bit; - byte = buffer + (block / GFS2_NBBY); - bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; - end = buffer + buflen; - - gfs2_assert(rgd->rd_sbd, byte < end); + byte = buffer + (rbm->offset / GFS2_NBBY); + bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; - cur_state = (*byte >> bit) & GFS2_BIT_MASK; - - return cur_state; + return (*byte >> bit) & GFS2_BIT_MASK; } /** @@ -192,7 +175,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state) */ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) { - u64 startblk = gfs2_rs_startblk(rs); + u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); if (blk >= startblk + rs->rs_free) return 1; @@ -202,36 +185,6 @@ static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) } /** - * rs_find - Find a rgrp multi-block reservation that contains a given block - * @rgd: The rgrp - * @rgblk: The block we're looking for, relative to the rgrp - */ -static struct gfs2_blkreserv *rs_find(struct gfs2_rgrpd *rgd, u32 rgblk) -{ - struct rb_node **newn; - int rc; - u64 fsblk = rgblk + rgd->rd_data0; - - spin_lock(&rgd->rd_rsspin); - newn = &rgd->rd_rstree.rb_node; - while (*newn) { - struct gfs2_blkreserv *cur = - rb_entry(*newn, struct gfs2_blkreserv, rs_node); - rc = rs_cmp(fsblk, 1, cur); - if (rc < 0) - newn = &((*newn)->rb_left); - else if (rc > 0) - newn = &((*newn)->rb_right); - else { - spin_unlock(&rgd->rd_rsspin); - return cur; - } - } - spin_unlock(&rgd->rd_rsspin); - return NULL; -} - -/** * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing * a block in a given allocation state. * @buf: the buffer that holds the bitmaps @@ -262,8 +215,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, u64 mask = 0x5555555555555555ULL; u32 bit; - BUG_ON(state > 3); - /* Mask off bits we don't care about at the start of the search */ mask <<= spoint; tmp = gfs2_bit_search(ptr, mask, state); @@ -285,6 +236,131 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len, } /** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ + u64 rblock = block - rbm->rgd->rd_data0; + u32 goal = (u32)rblock; + int x; + + if (WARN_ON_ONCE(rblock > UINT_MAX)) + return -EINVAL; + if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) + return -E2BIG; + + for (x = 0; x < rbm->rgd->rd_length; x++) { + rbm->bi = rbm->rgd->rd_bits + x; + if (goal < (rbm->bi->bi_start + rbm->bi->bi_len) * GFS2_NBBY) { + rbm->offset = goal - (rbm->bi->bi_start * GFS2_NBBY); + break; + } + } + + return 0; +} + +/** + * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ + +static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) +{ + u64 block; + u32 n; + u8 res; + + for (n = 0; n < n_unaligned; n++) { + res = gfs2_testbit(rbm); + if (res != GFS2_BLKST_FREE) + return true; + (*len)--; + if (*len == 0) + return true; + block = gfs2_rbm_to_block(rbm); + if (gfs2_rbm_from_block(rbm, block + 1)) + return true; + } + + return false; +} + +/** + * gfs2_free_extlen - Return extent length of free blocks + * @rbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using memchr_inv when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ + +static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) +{ + struct gfs2_rbm rbm = *rrbm; + u32 n_unaligned = rbm.offset & 3; + u32 size = len; + u32 bytes; + u32 chunk_size; + u8 *ptr, *start, *end; + u64 block; + + if (n_unaligned && + gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) + goto out; + + n_unaligned = len & 3; + /* Start is now byte aligned */ + while (len > 3) { + start = rbm.bi->bi_bh->b_data; + if (rbm.bi->bi_clone) + start = rbm.bi->bi_clone; + end = start + rbm.bi->bi_bh->b_size; + start += rbm.bi->bi_offset; + BUG_ON(rbm.offset & 3); + start += (rbm.offset / GFS2_NBBY); + bytes = min_t(u32, len / GFS2_NBBY, (end - start)); + ptr = memchr_inv(start, 0, bytes); + chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); + chunk_size *= GFS2_NBBY; + BUG_ON(len < chunk_size); + len -= chunk_size; + block = gfs2_rbm_to_block(&rbm); + gfs2_rbm_from_block(&rbm, block + chunk_size); + n_unaligned = 3; + if (ptr) + break; + n_unaligned = len & 3; + } + + /* Deal with any bits left over at the end */ + if (n_unaligned) + gfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: + return size - len; +} + +/** * gfs2_bitcount - count the number of bits in a certain state * @rgd: the resource group descriptor * @buffer: the buffer that holds the bitmaps @@ -487,6 +563,8 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) if (!res) error = -ENOMEM; + RB_CLEAR_NODE(&res->rs_node); + down_write(&ip->i_rw_mutex); if (ip->i_res) kmem_cache_free(gfs2_rsrv_cachep, res); @@ -496,11 +574,12 @@ int gfs2_rs_alloc(struct gfs2_inode *ip) return error; } -static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) { - gfs2_print_dbg(seq, " r: %llu s:%llu b:%u f:%u\n", - rs->rs_rgd->rd_addr, gfs2_rs_startblk(rs), rs->rs_biblk, - rs->rs_free); + gfs2_print_dbg(seq, " B: n:%llu s:%llu b:%u f:%u\n", + (unsigned long long)rs->rs_inum, + (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), + rs->rs_rbm.offset, rs->rs_free); } /** @@ -508,41 +587,26 @@ static void dump_rs(struct seq_file *seq, struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -static void __rs_deltree(struct gfs2_blkreserv *rs) +static void __rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; if (!gfs2_rs_active(rs)) return; - rgd = rs->rs_rgd; - /* We can't do this: The reason is that when the rgrp is invalidated, - it's in the "middle" of acquiring the glock, but the HOLDER bit - isn't set yet: - BUG_ON(!gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl));*/ - trace_gfs2_rs(NULL, rs, TRACE_RS_TREEDEL); - - if (!RB_EMPTY_ROOT(&rgd->rd_rstree)) - rb_erase(&rs->rs_node, &rgd->rd_rstree); - BUG_ON(!rgd->rd_rs_cnt); - rgd->rd_rs_cnt--; + rgd = rs->rs_rbm.rgd; + trace_gfs2_rs(rs, TRACE_RS_TREEDEL); + rb_erase(&rs->rs_node, &rgd->rd_rstree); + RB_CLEAR_NODE(&rs->rs_node); if (rs->rs_free) { /* return reserved blocks to the rgrp and the ip */ - BUG_ON(rs->rs_rgd->rd_reserved < rs->rs_free); - rs->rs_rgd->rd_reserved -= rs->rs_free; + BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); + rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; rs->rs_free = 0; - clear_bit(GBF_FULL, &rs->rs_bi->bi_flags); + clear_bit(GBF_FULL, &rs->rs_rbm.bi->bi_flags); smp_mb__after_clear_bit(); } - /* We can't change any of the step 1 or step 2 components of the rs. - E.g. We can't set rs_rgd to NULL because the rgd glock is held and - dequeued through this pointer. - Can't: atomic_set(&rs->rs_sizehint, 0); - Can't: rs->rs_requested = 0; - Can't: rs->rs_rgd = NULL;*/ - rs->rs_bi = NULL; - rs->rs_biblk = 0; } /** @@ -550,17 +614,16 @@ static void __rs_deltree(struct gfs2_blkreserv *rs) * @rs: The reservation to remove * */ -void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +void gfs2_rs_deltree(struct gfs2_inode *ip, struct gfs2_blkreserv *rs) { struct gfs2_rgrpd *rgd; - if (!gfs2_rs_active(rs)) - return; - - rgd = rs->rs_rgd; - spin_lock(&rgd->rd_rsspin); - __rs_deltree(rs); - spin_unlock(&rgd->rd_rsspin); + rgd = rs->rs_rbm.rgd; + if (rgd) { + spin_lock(&rgd->rd_rsspin); + __rs_deltree(ip, rs); + spin_unlock(&rgd->rd_rsspin); + } } /** @@ -572,8 +635,7 @@ void gfs2_rs_delete(struct gfs2_inode *ip) { down_write(&ip->i_rw_mutex); if (ip->i_res) { - gfs2_rs_deltree(ip->i_res); - trace_gfs2_rs(ip, ip->i_res, TRACE_RS_DELETE); + gfs2_rs_deltree(ip, ip->i_res); BUG_ON(ip->i_res->rs_free); kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); ip->i_res = NULL; @@ -597,7 +659,7 @@ static void return_all_reservations(struct gfs2_rgrpd *rgd) spin_lock(&rgd->rd_rsspin); while ((n = rb_first(&rgd->rd_rstree))) { rs = rb_entry(n, struct gfs2_blkreserv, rs_node); - __rs_deltree(rs); + __rs_deltree(NULL, rs); } spin_unlock(&rgd->rd_rsspin); } @@ -1270,211 +1332,276 @@ out: /** * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree - * @bi: the bitmap with the blocks * @ip: the inode structure - * @biblk: the 32-bit block number relative to the start of the bitmap - * @amount: the number of blocks to reserve * - * Returns: NULL - reservation was already taken, so not inserted - * pointer to the inserted reservation */ -static struct gfs2_blkreserv *rs_insert(struct gfs2_bitmap *bi, - struct gfs2_inode *ip, u32 biblk, - int amount) +static void rs_insert(struct gfs2_inode *ip) { struct rb_node **newn, *parent = NULL; int rc; struct gfs2_blkreserv *rs = ip->i_res; - struct gfs2_rgrpd *rgd = rs->rs_rgd; - u64 fsblock = gfs2_bi2rgd_blk(bi, biblk) + rgd->rd_data0; + struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; + u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + + BUG_ON(gfs2_rs_active(rs)); spin_lock(&rgd->rd_rsspin); newn = &rgd->rd_rstree.rb_node; - BUG_ON(!ip->i_res); - BUG_ON(gfs2_rs_active(rs)); - /* Figure out where to put new node */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(rgd->rd_gl));*/ while (*newn) { struct gfs2_blkreserv *cur = rb_entry(*newn, struct gfs2_blkreserv, rs_node); parent = *newn; - rc = rs_cmp(fsblock, amount, cur); + rc = rs_cmp(fsblock, rs->rs_free, cur); if (rc > 0) newn = &((*newn)->rb_right); else if (rc < 0) newn = &((*newn)->rb_left); else { spin_unlock(&rgd->rd_rsspin); - return NULL; /* reservation already in use */ + WARN_ON(1); + return; } } - /* Do our reservation work */ - rs = ip->i_res; - rs->rs_free = amount; - rs->rs_biblk = biblk; - rs->rs_bi = bi; rb_link_node(&rs->rs_node, parent, newn); rb_insert_color(&rs->rs_node, &rgd->rd_rstree); - /* Do our inode accounting for the reservation */ - /*BUG_ON(!gfs2_glock_is_locked_by_me(ip->i_gl));*/ - /* Do our rgrp accounting for the reservation */ - rgd->rd_reserved += amount; /* blocks reserved */ - rgd->rd_rs_cnt++; /* number of in-tree reservations */ + rgd->rd_reserved += rs->rs_free; /* blocks reserved */ spin_unlock(&a |