diff options
Diffstat (limited to 'fs/gfs2/rgrp.c')
| -rw-r--r-- | fs/gfs2/rgrp.c | 2283 | 
1 files changed, 1477 insertions, 806 deletions
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index bef3ab6cf5c..f4cb9c0d6bb 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -7,6 +7,8 @@   * of the GNU General Public License version 2.   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/slab.h>  #include <linux/spinlock.h>  #include <linux/completion.h> @@ -15,6 +17,8 @@  #include <linux/gfs2_ondisk.h>  #include <linux/prefetch.h>  #include <linux/blkdev.h> +#include <linux/rbtree.h> +#include <linux/random.h>  #include "gfs2.h"  #include "incore.h" @@ -55,6 +59,11 @@   * 3 = Used (metadata)   */ +struct gfs2_extent { +	struct gfs2_rbm rbm; +	u32 len; +}; +  static const char valid_change[16] = {  	        /* current */  	/* n */ 0, 1, 1, 1, @@ -63,42 +72,49 @@ static const char valid_change[16] = {  	        1, 0, 0, 0  }; -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, -                        unsigned char old_state, unsigned char new_state, -			unsigned int *n); +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, +			 const struct gfs2_inode *ip, bool nowrap, +			 const struct gfs2_alloc_parms *ap); +  /**   * gfs2_setbit - Set a bit in the bitmaps - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to set + * @rbm: The position of the bit to set + * @do_clone: Also set the clone bitmap, if it exists   * @new_state: the new state of the block   *   */ -static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1, -			       unsigned char *buf2, unsigned int offset, -			       unsigned int buflen, u32 block, +static inline void gfs2_setbit(const struct gfs2_rbm *rbm, bool do_clone,  			       unsigned char new_state)  {  	unsigned char *byte1, *byte2, *end, cur_state; -	const unsigned int bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; +	struct gfs2_bitmap *bi = rbm_bi(rbm); +	unsigned int buflen = bi->bi_len; +	const unsigned int bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; -	byte1 = buf1 + offset + (block / GFS2_NBBY); -	end = buf1 + offset + buflen; +	byte1 = bi->bi_bh->b_data + bi->bi_offset + (rbm->offset / GFS2_NBBY); +	end = bi->bi_bh->b_data + bi->bi_offset + buflen;  	BUG_ON(byte1 >= end);  	cur_state = (*byte1 >> bit) & GFS2_BIT_MASK;  	if (unlikely(!valid_change[new_state * 4 + cur_state])) { -		gfs2_consist_rgrpd(rgd); +		pr_warn("buf_blk = 0x%x old_state=%d, new_state=%d\n", +			rbm->offset, cur_state, new_state); +		pr_warn("rgrp=0x%llx bi_start=0x%x\n", +			(unsigned long long)rbm->rgd->rd_addr, bi->bi_start); +		pr_warn("bi_offset=0x%x bi_len=0x%x\n", +			bi->bi_offset, bi->bi_len); +		dump_stack(); +		gfs2_consist_rgrpd(rbm->rgd);  		return;  	}  	*byte1 ^= (cur_state ^ new_state) << bit; -	if (buf2) { -		byte2 = buf2 + offset + (block / GFS2_NBBY); +	if (do_clone && bi->bi_clone) { +		byte2 = bi->bi_clone + bi->bi_offset + (rbm->offset / GFS2_NBBY);  		cur_state = (*byte2 >> bit) & GFS2_BIT_MASK;  		*byte2 ^= (cur_state ^ new_state) << bit;  	} @@ -106,29 +122,22 @@ static inline void gfs2_setbit(struct gfs2_rgrpd *rgd, unsigned char *buf1,  /**   * gfs2_testbit - test a bit in the bitmaps - * @buffer: the buffer that holds the bitmaps - * @buflen: the length (in bytes) of the buffer - * @block: the block to read + * @rbm: The bit to test   * + * Returns: The two bit block state of the requested bit   */ -static inline unsigned char gfs2_testbit(struct gfs2_rgrpd *rgd, -					 const unsigned char *buffer, -					 unsigned int buflen, u32 block) +static inline u8 gfs2_testbit(const struct gfs2_rbm *rbm)  { -	const unsigned char *byte, *end; -	unsigned char cur_state; +	struct gfs2_bitmap *bi = rbm_bi(rbm); +	const u8 *buffer = bi->bi_bh->b_data + bi->bi_offset; +	const u8 *byte;  	unsigned int bit; -	byte = buffer + (block / GFS2_NBBY); -	bit = (block % GFS2_NBBY) * GFS2_BIT_SIZE; -	end = buffer + buflen; - -	gfs2_assert(rgd->rd_sbd, byte < end); - -	cur_state = (*byte >> bit) & GFS2_BIT_MASK; +	byte = buffer + (rbm->offset / GFS2_NBBY); +	bit = (rbm->offset % GFS2_NBBY) * GFS2_BIT_SIZE; -	return cur_state; +	return (*byte >> bit) & GFS2_BIT_MASK;  }  /** @@ -165,9 +174,30 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)  }  /** + * rs_cmp - multi-block reservation range compare + * @blk: absolute file system block number of the new reservation + * @len: number of blocks in the new reservation + * @rs: existing reservation to compare against + * + * returns: 1 if the block range is beyond the reach of the reservation + *         -1 if the block range is before the start of the reservation + *          0 if the block range overlaps with the reservation + */ +static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs) +{ +	u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm); + +	if (blk >= startblk + rs->rs_free) +		return 1; +	if (blk + len - 1 < startblk) +		return -1; +	return 0; +} + +/**   * gfs2_bitfit - Search an rgrp's bitmap buffer to find a bit-pair representing   *       a block in a given allocation state. - * @buffer: the buffer that holds the bitmaps + * @buf: the buffer that holds the bitmaps   * @len: the length (in bytes) of the buffer   * @goal: start search at this block's bit-pair (within @buffer)   * @state: GFS2_BLKST_XXX the state of the block we're looking for. @@ -195,8 +225,6 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,  	u64 mask = 0x5555555555555555ULL;  	u32 bit; -	BUG_ON(state > 3); -  	/* Mask off bits we don't care about at the start of the search */  	mask <<= spoint;  	tmp = gfs2_bit_search(ptr, mask, state); @@ -218,7 +246,164 @@ static u32 gfs2_bitfit(const u8 *buf, const unsigned int len,  }  /** + * gfs2_rbm_from_block - Set the rbm based upon rgd and block number + * @rbm: The rbm with rgd already set correctly + * @block: The block number (filesystem relative) + * + * This sets the bi and offset members of an rbm based on a + * resource group and a filesystem relative block number. The + * resource group must be set in the rbm on entry, the bi and + * offset members will be set by this function. + * + * Returns: 0 on success, or an error code + */ + +static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block) +{ +	u64 rblock = block - rbm->rgd->rd_data0; + +	if (WARN_ON_ONCE(rblock > UINT_MAX)) +		return -EINVAL; +	if (block >= rbm->rgd->rd_data0 + rbm->rgd->rd_data) +		return -E2BIG; + +	rbm->bii = 0; +	rbm->offset = (u32)(rblock); +	/* Check if the block is within the first block */ +	if (rbm->offset < rbm_bi(rbm)->bi_blocks) +		return 0; + +	/* Adjust for the size diff between gfs2_meta_header and gfs2_rgrp */ +	rbm->offset += (sizeof(struct gfs2_rgrp) - +			sizeof(struct gfs2_meta_header)) * GFS2_NBBY; +	rbm->bii = rbm->offset / rbm->rgd->rd_sbd->sd_blocks_per_bitmap; +	rbm->offset -= rbm->bii * rbm->rgd->rd_sbd->sd_blocks_per_bitmap; +	return 0; +} + +/** + * gfs2_rbm_incr - increment an rbm structure + * @rbm: The rbm with rgd already set correctly + * + * This function takes an existing rbm structure and increments it to the next + * viable block offset. + * + * Returns: If incrementing the offset would cause the rbm to go past the + *          end of the rgrp, true is returned, otherwise false. + * + */ + +static bool gfs2_rbm_incr(struct gfs2_rbm *rbm) +{ +	if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */ +		rbm->offset++; +		return false; +	} +	if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */ +		return true; + +	rbm->offset = 0; +	rbm->bii++; +	return false; +} + +/** + * gfs2_unaligned_extlen - Look for free blocks which are not byte aligned + * @rbm: Position to search (value/result) + * @n_unaligned: Number of unaligned blocks to check + * @len: Decremented for each block found (terminate on zero) + * + * Returns: true if a non-free block is encountered + */ + +static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len) +{ +	u32 n; +	u8 res; + +	for (n = 0; n < n_unaligned; n++) { +		res = gfs2_testbit(rbm); +		if (res != GFS2_BLKST_FREE) +			return true; +		(*len)--; +		if (*len == 0) +			return true; +		if (gfs2_rbm_incr(rbm)) +			return true; +	} + +	return false; +} + +/** + * gfs2_free_extlen - Return extent length of free blocks + * @rrbm: Starting position + * @len: Max length to check + * + * Starting at the block specified by the rbm, see how many free blocks + * there are, not reading more than len blocks ahead. This can be done + * using memchr_inv when the blocks are byte aligned, but has to be done + * on a block by block basis in case of unaligned blocks. Also this + * function can cope with bitmap boundaries (although it must stop on + * a resource group boundary) + * + * Returns: Number of free blocks in the extent + */ + +static u32 gfs2_free_extlen(const struct gfs2_rbm *rrbm, u32 len) +{ +	struct gfs2_rbm rbm = *rrbm; +	u32 n_unaligned = rbm.offset & 3; +	u32 size = len; +	u32 bytes; +	u32 chunk_size; +	u8 *ptr, *start, *end; +	u64 block; +	struct gfs2_bitmap *bi; + +	if (n_unaligned && +	    gfs2_unaligned_extlen(&rbm, 4 - n_unaligned, &len)) +		goto out; + +	n_unaligned = len & 3; +	/* Start is now byte aligned */ +	while (len > 3) { +		bi = rbm_bi(&rbm); +		start = bi->bi_bh->b_data; +		if (bi->bi_clone) +			start = bi->bi_clone; +		end = start + bi->bi_bh->b_size; +		start += bi->bi_offset; +		BUG_ON(rbm.offset & 3); +		start += (rbm.offset / GFS2_NBBY); +		bytes = min_t(u32, len / GFS2_NBBY, (end - start)); +		ptr = memchr_inv(start, 0, bytes); +		chunk_size = ((ptr == NULL) ? bytes : (ptr - start)); +		chunk_size *= GFS2_NBBY; +		BUG_ON(len < chunk_size); +		len -= chunk_size; +		block = gfs2_rbm_to_block(&rbm); +		if (gfs2_rbm_from_block(&rbm, block + chunk_size)) { +			n_unaligned = 0; +			break; +		} +		if (ptr) { +			n_unaligned = 3; +			break; +		} +		n_unaligned = len & 3; +	} + +	/* Deal with any bits left over at the end */ +	if (n_unaligned) +		gfs2_unaligned_extlen(&rbm, n_unaligned, &len); +out: +	return size - len; +} + +/**   * gfs2_bitcount - count the number of bits in a certain state + * @rgd: the resource group descriptor   * @buffer: the buffer that holds the bitmaps   * @buflen: the length (in bytes) of the buffer   * @state: the state of the block we're looking for @@ -252,7 +437,6 @@ static u32 gfs2_bitcount(struct gfs2_rgrpd *rgd, const u8 *buffer,  /**   * gfs2_rgrp_verify - Verify that a resource group is consistent - * @sdp: the filesystem   * @rgd: the rgrp   *   */ @@ -310,25 +494,38 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)  /**   * gfs2_blk2rgrpd - Find resource group for a given data/meta block number   * @sdp: The GFS2 superblock - * @n: The data block number + * @blk: The data block number + * @exact: True if this needs to be an exact match   *   * Returns: The resource group, or NULL if not found   */ -struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk) +struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact)  { -	struct gfs2_rgrpd *rgd; +	struct rb_node *n, *next; +	struct gfs2_rgrpd *cur;  	spin_lock(&sdp->sd_rindex_spin); - -	list_for_each_entry(rgd, &sdp->sd_rindex_mru_list, rd_list_mru) { -		if (rgrp_contains_block(rgd, blk)) { -			list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); +	n = sdp->sd_rindex_tree.rb_node; +	while (n) { +		cur = rb_entry(n, struct gfs2_rgrpd, rd_node); +		next = NULL; +		if (blk < cur->rd_addr) +			next = n->rb_left; +		else if (blk >= cur->rd_data0 + cur->rd_data) +			next = n->rb_right; +		if (next == NULL) {  			spin_unlock(&sdp->sd_rindex_spin); -			return rgd; +			if (exact) { +				if (blk < cur->rd_addr) +					return NULL; +				if (blk >= cur->rd_data0 + cur->rd_data) +					return NULL; +			} +			return cur;  		} +		n = next;  	} -  	spin_unlock(&sdp->sd_rindex_spin);  	return NULL; @@ -343,66 +540,209 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk)  struct gfs2_rgrpd *gfs2_rgrpd_get_first(struct gfs2_sbd *sdp)  { -	gfs2_assert(sdp, !list_empty(&sdp->sd_rindex_list)); -	return list_entry(sdp->sd_rindex_list.next, struct gfs2_rgrpd, rd_list); +	const struct rb_node *n; +	struct gfs2_rgrpd *rgd; + +	spin_lock(&sdp->sd_rindex_spin); +	n = rb_first(&sdp->sd_rindex_tree); +	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); +	spin_unlock(&sdp->sd_rindex_spin); + +	return rgd;  }  /**   * gfs2_rgrpd_get_next - get the next RG - * @rgd: A RG + * @rgd: the resource group descriptor   *   * Returns: The next rgrp   */  struct gfs2_rgrpd *gfs2_rgrpd_get_next(struct gfs2_rgrpd *rgd)  { -	if (rgd->rd_list.next == &rgd->rd_sbd->sd_rindex_list) +	struct gfs2_sbd *sdp = rgd->rd_sbd; +	const struct rb_node *n; + +	spin_lock(&sdp->sd_rindex_spin); +	n = rb_next(&rgd->rd_node); +	if (n == NULL) +		n = rb_first(&sdp->sd_rindex_tree); + +	if (unlikely(&rgd->rd_node == n)) { +		spin_unlock(&sdp->sd_rindex_spin);  		return NULL; -	return list_entry(rgd->rd_list.next, struct gfs2_rgrpd, rd_list); +	} +	rgd = rb_entry(n, struct gfs2_rgrpd, rd_node); +	spin_unlock(&sdp->sd_rindex_spin); +	return rgd; +} + +void gfs2_free_clones(struct gfs2_rgrpd *rgd) +{ +	int x; + +	for (x = 0; x < rgd->rd_length; x++) { +		struct gfs2_bitmap *bi = rgd->rd_bits + x; +		kfree(bi->bi_clone); +		bi->bi_clone = NULL; +	} +} + +/** + * gfs2_rs_alloc - make sure we have a reservation assigned to the inode + * @ip: the inode for this reservation + */ +int gfs2_rs_alloc(struct gfs2_inode *ip) +{ +	int error = 0; + +	down_write(&ip->i_rw_mutex); +	if (ip->i_res) +		goto out; + +	ip->i_res = kmem_cache_zalloc(gfs2_rsrv_cachep, GFP_NOFS); +	if (!ip->i_res) { +		error = -ENOMEM; +		goto out; +	} + +	RB_CLEAR_NODE(&ip->i_res->rs_node); +out: +	up_write(&ip->i_rw_mutex); +	return error; +} + +static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs) +{ +	gfs2_print_dbg(seq, "  B: n:%llu s:%llu b:%u f:%u\n", +		       (unsigned long long)rs->rs_inum, +		       (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm), +		       rs->rs_rbm.offset, rs->rs_free);  } -static void clear_rgrpdi(struct gfs2_sbd *sdp) +/** + * __rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +static void __rs_deltree(struct gfs2_blkreserv *rs)  { -	struct list_head *head;  	struct gfs2_rgrpd *rgd; -	struct gfs2_glock *gl; -	spin_lock(&sdp->sd_rindex_spin); -	sdp->sd_rindex_forward = NULL; -	spin_unlock(&sdp->sd_rindex_spin); +	if (!gfs2_rs_active(rs)) +		return; + +	rgd = rs->rs_rbm.rgd; +	trace_gfs2_rs(rs, TRACE_RS_TREEDEL); +	rb_erase(&rs->rs_node, &rgd->rd_rstree); +	RB_CLEAR_NODE(&rs->rs_node); + +	if (rs->rs_free) { +		struct gfs2_bitmap *bi = rbm_bi(&rs->rs_rbm); + +		/* return reserved blocks to the rgrp */ +		BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free); +		rs->rs_rbm.rgd->rd_reserved -= rs->rs_free; +		/* The rgrp extent failure point is likely not to increase; +		   it will only do so if the freed blocks are somehow +		   contiguous with a span of free blocks that follows. Still, +		   it will force the number to be recalculated later. */ +		rgd->rd_extfail_pt += rs->rs_free; +		rs->rs_free = 0; +		clear_bit(GBF_FULL, &bi->bi_flags); +	} +} + +/** + * gfs2_rs_deltree - remove a multi-block reservation from the rgd tree + * @rs: The reservation to remove + * + */ +void gfs2_rs_deltree(struct gfs2_blkreserv *rs) +{ +	struct gfs2_rgrpd *rgd; + +	rgd = rs->rs_rbm.rgd; +	if (rgd) { +		spin_lock(&rgd->rd_rsspin); +		__rs_deltree(rs); +		spin_unlock(&rgd->rd_rsspin); +	} +} + +/** + * gfs2_rs_delete - delete a multi-block reservation + * @ip: The inode for this reservation + * @wcount: The inode's write count, or NULL + * + */ +void gfs2_rs_delete(struct gfs2_inode *ip, atomic_t *wcount) +{ +	down_write(&ip->i_rw_mutex); +	if (ip->i_res && ((wcount == NULL) || (atomic_read(wcount) <= 1))) { +		gfs2_rs_deltree(ip->i_res); +		BUG_ON(ip->i_res->rs_free); +		kmem_cache_free(gfs2_rsrv_cachep, ip->i_res); +		ip->i_res = NULL; +	} +	up_write(&ip->i_rw_mutex); +} + +/** + * return_all_reservations - return all reserved blocks back to the rgrp. + * @rgd: the rgrp that needs its space back + * + * We previously reserved a bunch of blocks for allocation. Now we need to + * give them back. This leave the reservation structures in tact, but removes + * all of their corresponding "no-fly zones". + */ +static void return_all_reservations(struct gfs2_rgrpd *rgd) +{ +	struct rb_node *n; +	struct gfs2_blkreserv *rs; + +	spin_lock(&rgd->rd_rsspin); +	while ((n = rb_first(&rgd->rd_rstree))) { +		rs = rb_entry(n, struct gfs2_blkreserv, rs_node); +		__rs_deltree(rs); +	} +	spin_unlock(&rgd->rd_rsspin); +} -	head = &sdp->sd_rindex_list; -	while (!list_empty(head)) { -		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_list); +void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) +{ +	struct rb_node *n; +	struct gfs2_rgrpd *rgd; +	struct gfs2_glock *gl; + +	while ((n = rb_first(&sdp->sd_rindex_tree))) { +		rgd = rb_entry(n, struct gfs2_rgrpd, rd_node);  		gl = rgd->rd_gl; -		list_del(&rgd->rd_list); -		list_del(&rgd->rd_list_mru); +		rb_erase(n, &sdp->sd_rindex_tree);  		if (gl) { +			spin_lock(&gl->gl_spin);  			gl->gl_object = NULL; +			spin_unlock(&gl->gl_spin); +			gfs2_glock_add_to_lru(gl);  			gfs2_glock_put(gl);  		} +		gfs2_free_clones(rgd);  		kfree(rgd->rd_bits); +		return_all_reservations(rgd);  		kmem_cache_free(gfs2_rgrpd_cachep, rgd);  	}  } -void gfs2_clear_rgrpd(struct gfs2_sbd *sdp) -{ -	mutex_lock(&sdp->sd_rindex_mutex); -	clear_rgrpdi(sdp); -	mutex_unlock(&sdp->sd_rindex_mutex); -} -  static void gfs2_rindex_print(const struct gfs2_rgrpd *rgd)  { -	printk(KERN_INFO "  ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); -	printk(KERN_INFO "  ri_length = %u\n", rgd->rd_length); -	printk(KERN_INFO "  ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); -	printk(KERN_INFO "  ri_data = %u\n", rgd->rd_data); -	printk(KERN_INFO "  ri_bitbytes = %u\n", rgd->rd_bitbytes); +	pr_info("ri_addr = %llu\n", (unsigned long long)rgd->rd_addr); +	pr_info("ri_length = %u\n", rgd->rd_length); +	pr_info("ri_data0 = %llu\n", (unsigned long long)rgd->rd_data0); +	pr_info("ri_data = %u\n", rgd->rd_data); +	pr_info("ri_bitbytes = %u\n", rgd->rd_bitbytes);  }  /** @@ -441,18 +781,21 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)  			bi->bi_offset = sizeof(struct gfs2_rgrp);  			bi->bi_start = 0;  			bi->bi_len = bytes; +			bi->bi_blocks = bytes * GFS2_NBBY;  		/* header block */  		} else if (x == 0) {  			bytes = sdp->sd_sb.sb_bsize - sizeof(struct gfs2_rgrp);  			bi->bi_offset = sizeof(struct gfs2_rgrp);  			bi->bi_start = 0;  			bi->bi_len = bytes; +			bi->bi_blocks = bytes * GFS2_NBBY;  		/* last block */  		} else if (x + 1 == length) {  			bytes = bytes_left;  			bi->bi_offset = sizeof(struct gfs2_meta_header);  			bi->bi_start = rgd->rd_bitbytes - bytes_left;  			bi->bi_len = bytes; +			bi->bi_blocks = bytes * GFS2_NBBY;  		/* other blocks */  		} else {  			bytes = sdp->sd_sb.sb_bsize - @@ -460,6 +803,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)  			bi->bi_offset = sizeof(struct gfs2_meta_header);  			bi->bi_start = rgd->rd_bitbytes - bytes_left;  			bi->bi_len = bytes; +			bi->bi_blocks = bytes * GFS2_NBBY;  		}  		bytes_left -= bytes; @@ -484,6 +828,7 @@ static int compute_bitstructs(struct gfs2_rgrpd *rgd)  /**   * gfs2_ri_total - Total up the file system space, according to the rindex. + * @sdp: the filesystem   *   */  u64 gfs2_ri_total(struct gfs2_sbd *sdp) @@ -492,87 +837,113 @@ u64 gfs2_ri_total(struct gfs2_sbd *sdp)  	struct inode *inode = sdp->sd_rindex;  	struct gfs2_inode *ip = GFS2_I(inode);  	char buf[sizeof(struct gfs2_rindex)]; -	struct file_ra_state ra_state;  	int error, rgrps; -	mutex_lock(&sdp->sd_rindex_mutex); -	file_ra_state_init(&ra_state, inode->i_mapping);  	for (rgrps = 0;; rgrps++) {  		loff_t pos = rgrps * sizeof(struct gfs2_rindex); -		if (pos + sizeof(struct gfs2_rindex) >= i_size_read(inode)) +		if (pos + sizeof(struct gfs2_rindex) > i_size_read(inode))  			break; -		error = gfs2_internal_read(ip, &ra_state, buf, &pos, +		error = gfs2_internal_read(ip, buf, &pos,  					   sizeof(struct gfs2_rindex));  		if (error != sizeof(struct gfs2_rindex))  			break;  		total_data += be32_to_cpu(((struct gfs2_rindex *)buf)->ri_data);  	} -	mutex_unlock(&sdp->sd_rindex_mutex);  	return total_data;  } -static void gfs2_rindex_in(struct gfs2_rgrpd *rgd, const void *buf) +static int rgd_insert(struct gfs2_rgrpd *rgd)  { -	const struct gfs2_rindex *str = buf; +	struct gfs2_sbd *sdp = rgd->rd_sbd; +	struct rb_node **newn = &sdp->sd_rindex_tree.rb_node, *parent = NULL; + +	/* Figure out where to put new node */ +	while (*newn) { +		struct gfs2_rgrpd *cur = rb_entry(*newn, struct gfs2_rgrpd, +						  rd_node); + +		parent = *newn; +		if (rgd->rd_addr < cur->rd_addr) +			newn = &((*newn)->rb_left); +		else if (rgd->rd_addr > cur->rd_addr) +			newn = &((*newn)->rb_right); +		else +			return -EEXIST; +	} -	rgd->rd_addr = be64_to_cpu(str->ri_addr); -	rgd->rd_length = be32_to_cpu(str->ri_length); -	rgd->rd_data0 = be64_to_cpu(str->ri_data0); -	rgd->rd_data = be32_to_cpu(str->ri_data); -	rgd->rd_bitbytes = be32_to_cpu(str->ri_bitbytes); +	rb_link_node(&rgd->rd_node, parent, newn); +	rb_insert_color(&rgd->rd_node, &sdp->sd_rindex_tree); +	sdp->sd_rgrps++; +	return 0;  }  /**   * read_rindex_entry - Pull in a new resource index entry from the disk - * @gl: The glock covering the rindex inode + * @ip: Pointer to the rindex inode   * - * Returns: 0 on success, error code otherwise + * Returns: 0 on success, > 0 on EOF, error code otherwise   */ -static int read_rindex_entry(struct gfs2_inode *ip, -			     struct file_ra_state *ra_state) +static int read_rindex_entry(struct gfs2_inode *ip)  {  	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); +	const unsigned bsize = sdp->sd_sb.sb_bsize;  	loff_t pos = sdp->sd_rgrps * sizeof(struct gfs2_rindex); -	char buf[sizeof(struct gfs2_rindex)]; +	struct gfs2_rindex buf;  	int error;  	struct gfs2_rgrpd *rgd; -	error = gfs2_internal_read(ip, ra_state, buf, &pos, +	if (pos >= i_size_read(&ip->i_inode)) +		return 1; + +	error = gfs2_internal_read(ip, (char *)&buf, &pos,  				   sizeof(struct gfs2_rindex)); -	if (!error) -		return 0; -	if (error != sizeof(struct gfs2_rindex)) { -		if (error > 0) -			error = -EIO; -		return error; -	} + +	if (error != sizeof(struct gfs2_rindex)) +		return (error == 0) ? 1 : error;  	rgd = kmem_cache_zalloc(gfs2_rgrpd_cachep, GFP_NOFS);  	error = -ENOMEM;  	if (!rgd)  		return error; -	mutex_init(&rgd->rd_mutex); -	lops_init_le(&rgd->rd_le, &gfs2_rg_lops);  	rgd->rd_sbd = sdp; +	rgd->rd_addr = be64_to_cpu(buf.ri_addr); +	rgd->rd_length = be32_to_cpu(buf.ri_length); +	rgd->rd_data0 = be64_to_cpu(buf.ri_data0); +	rgd->rd_data = be32_to_cpu(buf.ri_data); +	rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes); +	spin_lock_init(&rgd->rd_rsspin); -	list_add_tail(&rgd->rd_list, &sdp->sd_rindex_list); -	list_add_tail(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); - -	gfs2_rindex_in(rgd, buf);  	error = compute_bitstructs(rgd);  	if (error) -		return error; +		goto fail;  	error = gfs2_glock_get(sdp, rgd->rd_addr,  			       &gfs2_rgrp_glops, CREATE, &rgd->rd_gl);  	if (error) -		return error; +		goto fail;  	rgd->rd_gl->gl_object = rgd; +	rgd->rd_gl->gl_vm.start = rgd->rd_addr * bsize; +	rgd->rd_gl->gl_vm.end = rgd->rd_gl->gl_vm.start + (rgd->rd_length * bsize) - 1; +	rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;  	rgd->rd_flags &= ~GFS2_RDF_UPTODATE; +	if (rgd->rd_data > sdp->sd_max_rg_data) +		sdp->sd_max_rg_data = rgd->rd_data; +	spin_lock(&sdp->sd_rindex_spin); +	error = rgd_insert(rgd); +	spin_unlock(&sdp->sd_rindex_spin); +	if (!error) +		return 0; + +	error = 0; /* someone else read in the rgrp; free it and ignore it */ +	gfs2_glock_put(rgd->rd_gl); + +fail: +	kfree(rgd->rd_bits); +	kmem_cache_free(gfs2_rgrpd_cachep, rgd);  	return error;  } @@ -586,77 +957,22 @@ static int read_rindex_entry(struct gfs2_inode *ip,  static int gfs2_ri_update(struct gfs2_inode *ip)  {  	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	struct inode *inode = &ip->i_inode; -	struct file_ra_state ra_state; -	u64 rgrp_count = i_size_read(inode); -	struct gfs2_rgrpd *rgd; -	unsigned int max_data = 0;  	int error; -	do_div(rgrp_count, sizeof(struct gfs2_rindex)); -	clear_rgrpdi(sdp); - -	file_ra_state_init(&ra_state, inode->i_mapping); -	for (sdp->sd_rgrps = 0; sdp->sd_rgrps < rgrp_count; sdp->sd_rgrps++) { -		error = read_rindex_entry(ip, &ra_state); -		if (error) { -			clear_rgrpdi(sdp); -			return error; -		} -	} - -	list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) -		if (rgd->rd_data > max_data) -			max_data = rgd->rd_data; -	sdp->sd_max_rg_data = max_data; -	sdp->sd_rindex_uptodate = 1; -	return 0; -} - -/** - * gfs2_ri_update_special - Pull in a new resource index from the disk - * - * This is a special version that's safe to call from gfs2_inplace_reserve_i. - * In this case we know that we don't have any resource groups in memory yet. - * - * @ip: pointer to the rindex inode - * - * Returns: 0 on successful update, error code otherwise - */ -static int gfs2_ri_update_special(struct gfs2_inode *ip) -{ -	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	struct inode *inode = &ip->i_inode; -	struct file_ra_state ra_state; -	struct gfs2_rgrpd *rgd; -	unsigned int max_data = 0; -	int error; +	do { +		error = read_rindex_entry(ip); +	} while (error == 0); -	file_ra_state_init(&ra_state, inode->i_mapping); -	for (sdp->sd_rgrps = 0;; sdp->sd_rgrps++) { -		/* Ignore partials */ -		if ((sdp->sd_rgrps + 1) * sizeof(struct gfs2_rindex) > -		    i_size_read(inode)) -			break; -		error = read_rindex_entry(ip, &ra_state); -		if (error) { -			clear_rgrpdi(sdp); -			return error; -		} -	} -	list_for_each_entry(rgd, &sdp->sd_rindex_list, rd_list) -		if (rgd->rd_data > max_data) -			max_data = rgd->rd_data; -	sdp->sd_max_rg_data = max_data; +	if (error < 0) +		return error;  	sdp->sd_rindex_uptodate = 1;  	return 0;  }  /** - * gfs2_rindex_hold - Grab a lock on the rindex + * gfs2_rindex_update - Update the rindex if required   * @sdp: The GFS2 superblock - * @ri_gh: the glock holder   *   * We grab a lock on the rindex inode to make sure that it doesn't   * change whilst we are performing an operation. We keep this lock @@ -668,28 +984,29 @@ static int gfs2_ri_update_special(struct gfs2_inode *ip)   * special file, which might have been updated if someone expanded the   * filesystem (via gfs2_grow utility), which adds new resource groups.   * - * Returns: 0 on success, error code otherwise + * Returns: 0 on succeess, error code otherwise   */ -int gfs2_rindex_hold(struct gfs2_sbd *sdp, struct gfs2_holder *ri_gh) +int gfs2_rindex_update(struct gfs2_sbd *sdp)  {  	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex);  	struct gfs2_glock *gl = ip->i_gl; -	int error; - -	error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, ri_gh); -	if (error) -		return error; +	struct gfs2_holder ri_gh; +	int error = 0; +	int unlock_required = 0;  	/* Read new copy from disk if we don't have the latest */  	if (!sdp->sd_rindex_uptodate) { -		mutex_lock(&sdp->sd_rindex_mutex); -		if (!sdp->sd_rindex_uptodate) { -			error = gfs2_ri_update(ip); +		if (!gfs2_glock_is_locked_by_me(gl)) { +			error = gfs2_glock_nq_init(gl, LM_ST_SHARED, 0, &ri_gh);  			if (error) -				gfs2_glock_dq_uninit(ri_gh); +				return error; +			unlock_required = 1;  		} -		mutex_unlock(&sdp->sd_rindex_mutex); +		if (!sdp->sd_rindex_uptodate) +			error = gfs2_ri_update(ip); +		if (unlock_required) +			gfs2_glock_dq_uninit(&ri_gh);  	}  	return error; @@ -721,6 +1038,62 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)  	memset(&str->rg_reserved, 0, sizeof(str->rg_reserved));  } +static int gfs2_rgrp_lvb_valid(struct gfs2_rgrpd *rgd) +{ +	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; +	struct gfs2_rgrp *str = (struct gfs2_rgrp *)rgd->rd_bits[0].bi_bh->b_data; + +	if (rgl->rl_flags != str->rg_flags || rgl->rl_free != str->rg_free || +	    rgl->rl_dinodes != str->rg_dinodes || +	    rgl->rl_igeneration != str->rg_igeneration) +		return 0; +	return 1; +} + +static void gfs2_rgrp_ondisk2lvb(struct gfs2_rgrp_lvb *rgl, const void *buf) +{ +	const struct gfs2_rgrp *str = buf; + +	rgl->rl_magic = cpu_to_be32(GFS2_MAGIC); +	rgl->rl_flags = str->rg_flags; +	rgl->rl_free = str->rg_free; +	rgl->rl_dinodes = str->rg_dinodes; +	rgl->rl_igeneration = str->rg_igeneration; +	rgl->__pad = 0UL; +} + +static void update_rgrp_lvb_unlinked(struct gfs2_rgrpd *rgd, u32 change) +{ +	struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl; +	u32 unlinked = be32_to_cpu(rgl->rl_unlinked) + change; +	rgl->rl_unlinked = cpu_to_be32(unlinked); +} + +static u32 count_unlinked(struct gfs2_rgrpd *rgd) +{ +	struct gfs2_bitmap *bi; +	const u32 length = rgd->rd_length; +	const u8 *buffer = NULL; +	u32 i, goal, count = 0; + +	for (i = 0, bi = rgd->rd_bits; i < length; i++, bi++) { +		goal = 0; +		buffer = bi->bi_bh->b_data + bi->bi_offset; +		WARN_ON(!buffer_uptodate(bi->bi_bh)); +		while (goal < bi->bi_len * GFS2_NBBY) { +			goal = gfs2_bitfit(buffer, bi->bi_len, goal, +					   GFS2_BLKST_UNLINKED); +			if (goal == BFITNOENT) +				break; +			count++; +			goal++; +		} +	} + +	return count; +} + +  /**   * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps   * @rgd: the struct gfs2_rgrpd describing the RG to read in @@ -731,7 +1104,7 @@ static void gfs2_rgrp_out(struct gfs2_rgrpd *rgd, void *buf)   * Returns: errno   */ -int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd) +static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)  {  	struct gfs2_sbd *sdp = rgd->rd_sbd;  	struct gfs2_glock *gl = rgd->rd_gl; @@ -740,16 +1113,8 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)  	unsigned int x, y;  	int error; -	mutex_lock(&rgd->rd_mutex); - -	spin_lock(&sdp->sd_rindex_spin); -	if (rgd->rd_bh_count) { -		rgd->rd_bh_count++; -		spin_unlock(&sdp->sd_rindex_spin); -		mutex_unlock(&rgd->rd_mutex); +	if (rgd->rd_bits[0].bi_bh != NULL)  		return 0; -	} -	spin_unlock(&sdp->sd_rindex_spin);  	for (x = 0; x < length; x++) {  		bi = rgd->rd_bits + x; @@ -775,15 +1140,24 @@ int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)  			clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);  		gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);  		rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); +		rgd->rd_free_clone = rgd->rd_free; +		/* max out the rgrp allocation failure point */ +		rgd->rd_extfail_pt = rgd->rd_free; +	} +	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) { +		rgd->rd_rgl->rl_unlinked = cpu_to_be32(count_unlinked(rgd)); +		gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, +				     rgd->rd_bits[0].bi_bh->b_data); +	} +	else if (sdp->sd_args.ar_rgrplvb) { +		if (!gfs2_rgrp_lvb_valid(rgd)){ +			gfs2_consist_rgrpd(rgd); +			error = -EIO; +			goto fail; +		} +		if (rgd->rd_rgl->rl_unlinked == 0) +			rgd->rd_flags &= ~GFS2_RDF_CHECK;  	} - -	spin_lock(&sdp->sd_rindex_spin); -	rgd->rd_free_clone = rgd->rd_free; -	rgd->rd_bh_count++; -	spin_unlock(&sdp->sd_rindex_spin); - -	mutex_unlock(&rgd->rd_mutex); -  	return 0;  fail: @@ -793,461 +1167,844 @@ fail:  		bi->bi_bh = NULL;  		gfs2_assert_warn(sdp, !bi->bi_clone);  	} -	mutex_unlock(&rgd->rd_mutex);  	return error;  } -void gfs2_rgrp_bh_hold(struct gfs2_rgrpd *rgd) +static int update_rgrp_lvb(struct gfs2_rgrpd *rgd) +{ +	u32 rl_flags; + +	if (rgd->rd_flags & GFS2_RDF_UPTODATE) +		return 0; + +	if (cpu_to_be32(GFS2_MAGIC) != rgd->rd_rgl->rl_magic) +		return gfs2_rgrp_bh_get(rgd); + +	rl_flags = be32_to_cpu(rgd->rd_rgl->rl_flags); +	rl_flags &= ~GFS2_RDF_MASK; +	rgd->rd_flags &= GFS2_RDF_MASK; +	rgd->rd_flags |= (rl_flags | GFS2_RDF_UPTODATE | GFS2_RDF_CHECK); +	if (rgd->rd_rgl->rl_unlinked == 0) +		rgd->rd_flags &= ~GFS2_RDF_CHECK; +	rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free); +	rgd->rd_free_clone = rgd->rd_free; +	rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes); +	rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration); +	return 0; +} + +int gfs2_rgrp_go_lock(struct gfs2_holder *gh)  { +	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;  	struct gfs2_sbd *sdp = rgd->rd_sbd; -	spin_lock(&sdp->sd_rindex_spin); -	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); -	rgd->rd_bh_count++; -	spin_unlock(&sdp->sd_rindex_spin); +	if (gh->gh_flags & GL_SKIP && sdp->sd_args.ar_rgrplvb) +		return 0; +	return gfs2_rgrp_bh_get(rgd);  }  /** - * gfs2_rgrp_bh_put - Release RG bitmaps read in with gfs2_rgrp_bh_get() - * @rgd: the struct gfs2_rgrpd describing the RG to read in + * gfs2_rgrp_go_unlock - Release RG bitmaps read in with gfs2_rgrp_bh_get() + * @gh: The glock holder for the resource group   *   */ -void gfs2_rgrp_bh_put(struct gfs2_rgrpd *rgd) +void gfs2_rgrp_go_unlock(struct gfs2_holder *gh)  { -	struct gfs2_sbd *sdp = rgd->rd_sbd; +	struct gfs2_rgrpd *rgd = gh->gh_gl->gl_object;  	int x, length = rgd->rd_length; -	spin_lock(&sdp->sd_rindex_spin); -	gfs2_assert_warn(rgd->rd_sbd, rgd->rd_bh_count); -	if (--rgd->rd_bh_count) { -		spin_unlock(&sdp->sd_rindex_spin); -		return; -	} -  	for (x = 0; x < length; x++) {  		struct gfs2_bitmap *bi = rgd->rd_bits + x; -		kfree(bi->bi_clone); -		bi->bi_clone = NULL; -		brelse(bi->bi_bh); -		bi->bi_bh = NULL; +		if (bi->bi_bh) { +			brelse(bi->bi_bh); +			bi->bi_bh = NULL; +		}  	} -	spin_unlock(&sdp->sd_rindex_spin);  } -static void gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, -				    const struct gfs2_bitmap *bi) +int gfs2_rgrp_send_discards(struct gfs2_sbd *sdp, u64 offset, +			     struct buffer_head *bh, +			     const struct gfs2_bitmap *bi, unsigned minlen, u64 *ptrimmed)  {  	struct super_block *sb = sdp->sd_vfs; -	struct block_device *bdev = sb->s_bdev; -	const unsigned int sects_per_blk = sdp->sd_sb.sb_bsize / -					   bdev_logical_block_size(sb->s_bdev);  	u64 blk;  	sector_t start = 0; -	sector_t nr_sects = 0; +	sector_t nr_blks = 0;  	int rv;  	unsigned int x; +	u32 trimmed = 0; +	u8 diff;  	for (x = 0; x < bi->bi_len; x++) { -		const u8 *orig = bi->bi_bh->b_data + bi->bi_offset + x; -		const u8 *clone = bi->bi_clone + bi->bi_offset + x; -		u8 diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); +		const u8 *clone = bi->bi_clone ? bi->bi_clone : bi->bi_bh->b_data; +		clone += bi->bi_offset; +		clone += x; +		if (bh) { +			const u8 *orig = bh->b_data + bi->bi_offset + x; +			diff = ~(*orig | (*orig >> 1)) & (*clone | (*clone >> 1)); +		} else { +			diff = ~(*clone | (*clone >> 1)); +		}  		diff &= 0x55;  		if (diff == 0)  			continue;  		blk = offset + ((bi->bi_start + x) * GFS2_NBBY); -		blk *= sects_per_blk; /* convert to sectors */  		while(diff) {  			if (diff & 1) { -				if (nr_sects == 0) +				if (nr_blks == 0)  					goto start_new_extent; -				if ((start + nr_sects) != blk) { -					rv = blkdev_issue_discard(bdev, start, -							    nr_sects, GFP_NOFS, -							    0); -					if (rv) -						goto fail; -					nr_sects = 0; +				if ((start + nr_blks) != blk) { +					if (nr_blks >= minlen) { +						rv = sb_issue_discard(sb, +							start, nr_blks, +							GFP_NOFS, 0); +						if (rv) +							goto fail; +						trimmed += nr_blks; +					} +					nr_blks = 0;  start_new_extent:  					start = blk;  				} -				nr_sects += sects_per_blk; +				nr_blks++;  			}  			diff >>= 2; -			blk += sects_per_blk; +			blk++;  		}  	} -	if (nr_sects) { -		rv = blkdev_issue_discard(bdev, start, nr_sects, GFP_NOFS, 0); +	if (nr_blks >= minlen) { +		rv = sb_issue_discard(sb, start, nr_blks, GFP_NOFS, 0);  		if (rv)  			goto fail; +		trimmed += nr_blks;  	} -	return; +	if (ptrimmed) +		*ptrimmed = trimmed; +	return 0; +  fail: -	fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv); +	if (sdp->sd_args.ar_discard) +		fs_warn(sdp, "error %d on discard request, turning discards off for this filesystem", rv);  	sdp->sd_args.ar_discard = 0; +	return -EIO;  } -void gfs2_rgrp_repolish_clones(struct gfs2_rgrpd *rgd) +/** + * gfs2_fitrim - Generate discard requests for unused bits of the filesystem + * @filp: Any file on the filesystem + * @argp: Pointer to the arguments (also used to pass result) + * + * Returns: 0 on success, otherwise error code + */ + +int gfs2_fitrim(struct file *filp, void __user *argp)  { -	struct gfs2_sbd *sdp = rgd->rd_sbd; -	unsigned int length = rgd->rd_length; +	struct inode *inode = file_inode(filp); +	struct gfs2_sbd *sdp = GFS2_SB(inode); +	struct request_queue *q = bdev_get_queue(sdp->sd_vfs->s_bdev); +	struct buffer_head *bh; +	struct gfs2_rgrpd *rgd; +	struct gfs2_rgrpd *rgd_end; +	struct gfs2_holder gh; +	struct fstrim_range r; +	int ret = 0; +	u64 amt; +	u64 trimmed = 0; +	u64 start, end, minlen;  	unsigned int x; +	unsigned bs_shift = sdp->sd_sb.sb_bsize_shift; -	for (x = 0; x < length; x++) { -		struct gfs2_bitmap *bi = rgd->rd_bits + x; -		if (!bi->bi_clone) -			continue; -		if (sdp->sd_args.ar_discard) -			gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bi); -		clear_bit(GBF_FULL, &bi->bi_flags); -		memcpy(bi->bi_clone + bi->bi_offset, -		       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len); +	if (!capable(CAP_SYS_ADMIN)) +		return -EPERM; + +	if (!blk_queue_discard(q)) +		return -EOPNOTSUPP; + +	if (copy_from_user(&r, argp, sizeof(r))) +		return -EFAULT; + +	ret = gfs2_rindex_update(sdp); +	if (ret) +		return ret; + +	start = r.start >> bs_shift; +	end = start + (r.len >> bs_shift); +	minlen = max_t(u64, r.minlen, +		       q->limits.discard_granularity) >> bs_shift; + +	if (end <= start || minlen > sdp->sd_max_rg_data) +		return -EINVAL; + +	rgd = gfs2_blk2rgrpd(sdp, start, 0); +	rgd_end = gfs2_blk2rgrpd(sdp, end, 0); + +	if ((gfs2_rgrpd_get_first(sdp) == gfs2_rgrpd_get_next(rgd_end)) +	    && (start > rgd_end->rd_data0 + rgd_end->rd_data)) +		return -EINVAL; /* start is beyond the end of the fs */ + +	while (1) { + +		ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh); +		if (ret) +			goto out; + +		if (!(rgd->rd_flags & GFS2_RGF_TRIMMED)) { +			/* Trim each bitmap in the rgrp */ +			for (x = 0; x < rgd->rd_length; x++) { +				struct gfs2_bitmap *bi = rgd->rd_bits + x; +				ret = gfs2_rgrp_send_discards(sdp, +						rgd->rd_data0, NULL, bi, minlen, +						&amt); +				if (ret) { +					gfs2_glock_dq_uninit(&gh); +					goto out; +				} +				trimmed += amt; +			} + +			/* Mark rgrp as having been trimmed */ +			ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0); +			if (ret == 0) { +				bh = rgd->rd_bits[0].bi_bh; +				rgd->rd_flags |= GFS2_RGF_TRIMMED; +				gfs2_trans_add_meta(rgd->rd_gl, bh); +				gfs2_rgrp_out(rgd, bh->b_data); +				gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, bh->b_data); +				gfs2_trans_end(sdp); +			} +		} +		gfs2_glock_dq_uninit(&gh); + +		if (rgd == rgd_end) +			break; + +		rgd = gfs2_rgrpd_get_next(rgd);  	} -	spin_lock(&sdp->sd_rindex_spin); -	rgd->rd_free_clone = rgd->rd_free; -	spin_unlock(&sdp->sd_rindex_spin); +out: +	r.len = trimmed << bs_shift; +	if (copy_to_user(argp, &r, sizeof(r))) +		return -EFAULT; + +	return ret;  }  /** - * gfs2_alloc_get - get the struct gfs2_alloc structure for an inode - * @ip: the incore GFS2 inode structure + * rs_insert - insert a new multi-block reservation into the rgrp's rb_tree + * @ip: the inode structure   * - * Returns: the struct gfs2_alloc   */ - -struct gfs2_alloc *gfs2_alloc_get(struct gfs2_inode *ip) +static void rs_insert(struct gfs2_inode *ip)  { -	BUG_ON(ip->i_alloc != NULL); -	ip->i_alloc = kzalloc(sizeof(struct gfs2_alloc), GFP_NOFS); -	return ip->i_alloc; +	struct rb_node **newn, *parent = NULL; +	int rc; +	struct gfs2_blkreserv *rs = ip->i_res; +	struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd; +	u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm); + +	BUG_ON(gfs2_rs_active(rs)); + +	spin_lock(&rgd->rd_rsspin); +	newn = &rgd->rd_rstree.rb_node; +	while (*newn) { +		struct gfs2_blkreserv *cur = +			rb_entry(*newn, struct gfs2_blkreserv, rs_node); + +		parent = *newn; +		rc = rs_cmp(fsblock, rs->rs_free, cur); +		if (rc > 0) +			newn = &((*newn)->rb_right); +		else if (rc < 0) +			newn = &((*newn)->rb_left); +		else { +			spin_unlock(&rgd->rd_rsspin); +			WARN_ON(1); +			return; +		} +	} + +	rb_link_node(&rs->rs_node, parent, newn); +	rb_insert_color(&rs->rs_node, &rgd->rd_rstree); + +	/* Do our rgrp accounting for the reservation */ +	rgd->rd_reserved += rs->rs_free; /* blocks reserved */ +	spin_unlock(&rgd->rd_rsspin); +	trace_gfs2_rs(rs, TRACE_RS_INSERT);  }  /** - * try_rgrp_fit - See if a given reservation will fit in a given RG - * @rgd: the RG data - * @al: the struct gfs2_alloc structure describing the reservation - * - * If there's room for the requested blocks to be allocated from the RG: - *   Sets the $al_rgd field in @al. + * rg_mblk_search - find a group of multiple free blocks to form a reservation + * @rgd: the resource group descriptor + * @ip: pointer to the inode for which we're reserving blocks + * @ap: the allocation parameters   * - * Returns: 1 on success (it fits), 0 on failure (it doesn't fit)   */ -static int try_rgrp_fit(struct gfs2_rgrpd *rgd, struct gfs2_alloc *al) +static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip, +			   const struct gfs2_alloc_parms *ap)  { -	struct gfs2_sbd *sdp = rgd->rd_sbd; -	int ret = 0; - -	if (rgd->rd_flags & (GFS2_RGF_NOALLOC | GFS2_RDF_ERROR)) -		return 0; +	struct gfs2_rbm rbm = { .rgd = rgd, }; +	u64 goal; +	struct gfs2_blkreserv *rs = ip->i_res; +	u32 extlen; +	u32 free_blocks = rgd->rd_free_clone - rgd->rd_reserved; +	int ret; +	struct inode *inode = &ip->i_inode; -	spin_lock(&sdp->sd_rindex_spin); -	if (rgd->rd_free_clone >= al->al_requested) { -		al->al_rgd = rgd; -		ret = 1; +	if (S_ISDIR(inode->i_mode)) +		extlen = 1; +	else { +		extlen = max_t(u32, atomic_read(&rs->rs_sizehint), ap->target); +		extlen = clamp(extlen, RGRP_RSRV_MINBLKS, free_blocks);  	} -	spin_unlock(&sdp->sd_rindex_spin); +	if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen)) +		return; -	return ret; +	/* Find bitmap block that contains bits for goal block */ +	if (rgrp_contains_block(rgd, ip->i_goal)) +		goal = ip->i_goal; +	else +		goal = rgd->rd_last_alloc + rgd->rd_data0; + +	if (WARN_ON(gfs2_rbm_from_block(&rbm, goal))) +		return; + +	ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true, ap); +	if (ret == 0) { +		rs->rs_rbm = rbm; +		rs->rs_free = extlen; +		rs->rs_inum = ip->i_no_addr; +		rs_insert(ip); +	} else { +		if (goal == rgd->rd_last_alloc + rgd->rd_data0) +			rgd->rd_last_alloc = 0; +	}  }  /** - * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes - * @rgd: The rgrp + * gfs2_next_unreserved_block - Return next block that is not reserved + * @rgd: The resource group + * @block: The starting block + * @length: The required length + * @ip: Ignore any reservations for this inode   * - * Returns: 0 if no error - *          The inode, if one has been found, in inode. + * If the block does not appear in any reservation, then return the + * block number unchanged. If it does appear in the reservation, then + * keep looking through the tree of reservations in order to find the + * first block number which is not reserved.   */ -static u64 try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, -			   u64 skip) +static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block, +				      u32 length, +				      const struct gfs2_inode *ip)  { -	u32 goal = 0, block; -	u64 no_addr; -	struct gfs2_sbd *sdp = rgd->rd_sbd; -	unsigned int n; - -	for(;;) { -		if (goal >= rgd->rd_data) +	struct gfs2_blkreserv *rs; +	struct rb_node *n; +	int rc; + +	spin_lock(&rgd->rd_rsspin); +	n = rgd->rd_rstree.rb_node; +	while (n) { +		rs = rb_entry(n, struct gfs2_blkreserv, rs_node); +		rc = rs_cmp(block, length, rs); +		if (rc < 0) +			n = n->rb_left; +		else if (rc > 0) +			n = n->rb_right; +		else  			break; -		down_write(&sdp->sd_log_flush_lock); -		n = 1; -		block = rgblk_search(rgd, goal, GFS2_BLKST_UNLINKED, -				     GFS2_BLKST_UNLINKED, &n); -		up_write(&sdp->sd_log_flush_lock); -		if (block == BFITNOENT) -			break; -		/* rgblk_search can return a block < goal, so we need to -		   keep it marching forward. */ -		no_addr = block + rgd->rd_data0; -		goal++; -		if (*last_unlinked != NO_BLOCK && no_addr <= *last_unlinked) -			continue; -		if (no_addr == skip) -			continue; -		*last_unlinked = no_addr; -		return no_addr;  	} -	rgd->rd_flags &= ~GFS2_RDF_CHECK; -	return 0; +	if (n) { +		while ((rs_cmp(block, length, rs) == 0) && (ip->i_res != rs)) { +			block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free; +			n = n->rb_right; +			if (n == NULL) +				break; +			rs = rb_entry(n, struct gfs2_blkreserv, rs_node); +		} +	} + +	spin_unlock(&rgd->rd_rsspin); +	return block;  }  /** - * recent_rgrp_next - get next RG from "recent" list - * @cur_rgd: current rgrp + * gfs2_reservation_check_and_update - Check for reservations during block alloc + * @rbm: The current position in the resource group + * @ip: The inode for which we are searching for blocks + * @minext: The minimum extent length + * @maxext: A pointer to the maximum extent structure + * + * This checks the current position in the rgrp to see whether there is + * a reservation covering this block. If not then this function is a + * no-op. If there is, then the position is moved to the end of the + * contiguous reservation(s) so that we are pointing at the first + * non-reserved block.   * - * Returns: The next rgrp in the recent list + * Returns: 0 if no reservation, 1 if @rbm has changed, otherwise an error   */ -static struct gfs2_rgrpd *recent_rgrp_next(struct gfs2_rgrpd *cur_rgd) +static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm, +					     const struct gfs2_inode *ip, +					     u32 minext, +					     struct gfs2_extent *maxext)  { -	struct gfs2_sbd *sdp = cur_rgd->rd_sbd; -	struct list_head *head; -	struct gfs2_rgrpd *rgd; +	u64 block = gfs2_rbm_to_block(rbm); +	u32 extlen = 1; +	u64 nblock; +	int ret; + +	/* +	 * If we have a minimum extent length, then skip over any extent +	 * which is less than the min extent length in size. +	 */ +	if (minext) { +		extlen = gfs2_free_extlen(rbm, minext); +		if (extlen <= maxext->len) +			goto fail; +	} -	spin_lock(&sdp->sd_rindex_spin); -	head = &sdp->sd_rindex_mru_list; -	if (unlikely(cur_rgd->rd_list_mru.next == head)) { -		spin_unlock(&sdp->sd_rindex_spin); -		return NULL; +	/* +	 * Check the extent which has been found against the reservations +	 * and skip if parts of it are already reserved +	 */ +	nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip); +	if (nblock == block) { +		if (!minext || extlen >= minext) +			return 0; + +		if (extlen > maxext->len) { +			maxext->len = extlen; +			maxext->rbm = *rbm; +		} +fail: +		nblock = block + extlen;  	} -	rgd = list_entry(cur_rgd->rd_list_mru.next, struct gfs2_rgrpd, rd_list_mru); -	spin_unlock(&sdp->sd_rindex_spin); -	return rgd; +	ret = gfs2_rbm_from_block(rbm, nblock); +	if (ret < 0) +		return ret; +	return 1;  }  /** - * forward_rgrp_get - get an rgrp to try next from full list - * @sdp: The GFS2 superblock + * gfs2_rbm_find - Look for blocks of a particular state + * @rbm: Value/result starting position and final position + * @state: The state which we want to find + * @minext: Pointer to the requested extent length (NULL for a single block) + *          This is updated to be the actual reservation size. + * @ip: If set, check for reservations + * @nowrap: Stop looking at the end of the rgrp, rather than wrapping + *          around until we've reached the starting point. + * @ap: the allocation parameters + * + * Side effects: + * - If looking for free blocks, we set GBF_FULL on each bitmap which + *   has no free blocks in it. + * - If looking for free blocks, we set rd_extfail_pt on each rgrp which + *   has come up short on a free block search.   * - * Returns: The rgrp to try next + * Returns: 0 on success, -ENOSPC if there is no block of the requested state   */ -static struct gfs2_rgrpd *forward_rgrp_get(struct gfs2_sbd *sdp) +static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext, +			 const struct gfs2_inode *ip, bool nowrap, +			 const struct gfs2_alloc_parms *ap)  { -	struct gfs2_rgrpd *rgd; -	unsigned int journals = gfs2_jindex_size(sdp); -	unsigned int rg = 0, x; +	struct buffer_head *bh; +	int initial_bii; +	u32 initial_offset; +	int first_bii = rbm->bii; +	u32 first_offset = rbm->offset; +	u32 offset; +	u8 *buffer; +	int n = 0; +	int iters = rbm->rgd->rd_length; +	int ret; +	struct gfs2_bitmap *bi; +	struct gfs2_extent maxext = { .rbm.rgd = rbm->rgd, }; -	spin_lock(&sdp->sd_rindex_spin); +	/* If we are not starting at the beginning of a bitmap, then we +	 * need to add one to the bitmap count to ensure that we search +	 * the starting bitmap twice. +	 */ +	if (rbm->offset != 0) +		iters++; -	rgd = sdp->sd_rindex_forward; -	if (!rgd) { -		if (sdp->sd_rgrps >= journals) -			rg = sdp->sd_rgrps * sdp->sd_jdesc->jd_jid / journals; +	while(1) { +		bi = rbm_bi(rbm); +		if (test_bit(GBF_FULL, &bi->bi_flags) && +		    (state == GFS2_BLKST_FREE)) +			goto next_bitmap; + +		bh = bi->bi_bh; +		buffer = bh->b_data + bi->bi_offset; +		WARN_ON(!buffer_uptodate(bh)); +		if (state != GFS2_BLKST_UNLINKED && bi->bi_clone) +			buffer = bi->bi_clone + bi->bi_offset; +		initial_offset = rbm->offset; +		offset = gfs2_bitfit(buffer, bi->bi_len, rbm->offset, state); +		if (offset == BFITNOENT) +			goto bitmap_full; +		rbm->offset = offset; +		if (ip == NULL) +			return 0; + +		initial_bii = rbm->bii; +		ret = gfs2_reservation_check_and_update(rbm, ip, +							minext ? *minext : 0, +							&maxext); +		if (ret == 0) +			return 0; +		if (ret > 0) { +			n += (rbm->bii - initial_bii); +			goto next_iter; +		} +		if (ret == -E2BIG) { +			rbm->bii = 0; +			rbm->offset = 0; +			n += (rbm->bii - initial_bii); +			goto res_covered_end_of_rgrp; +		} +		return ret; -		for (x = 0, rgd = gfs2_rgrpd_get_first(sdp); x < rg; -		     x++, rgd = gfs2_rgrpd_get_next(rgd)) -			/* Do Nothing */; +bitmap_full:	/* Mark bitmap as full and fall through */ +		if ((state == GFS2_BLKST_FREE) && initial_offset == 0) { +			struct gfs2_bitmap *bi = rbm_bi(rbm); +			set_bit(GBF_FULL, &bi->bi_flags); +		} -		sdp->sd_rindex_forward = rgd; +next_bitmap:	/* Find next bitmap in the rgrp */ +		rbm->offset = 0; +		rbm->bii++; +		if (rbm->bii == rbm->rgd->rd_length) +			rbm->bii = 0; +res_covered_end_of_rgrp: +		if ((rbm->bii == 0) && nowrap) +			break; +		n++; +next_iter: +		if (n >= iters) +			break;  	} -	spin_unlock(&sdp->sd_rindex_spin); +	if (minext == NULL || state != GFS2_BLKST_FREE) +		return -ENOSPC; + +	/* If the extent was too small, and it's smaller than the smallest +	   to have failed before, remember for future reference that it's +	   useless to search this rgrp again for this amount or more. */ +	if ((first_offset == 0) && (first_bii == 0) && +	    (*minext < rbm->rgd->rd_extfail_pt)) +		rbm->rgd->rd_extfail_pt = *minext; + +	/* If the maximum extent we found is big enough to fulfill the +	   minimum requirements, use it anyway. */ +	if (maxext.len) { +		*rbm = maxext.rbm; +		*minext = maxext.len; +		return 0; +	} -	return rgd; +	return -ENOSPC;  }  /** - * forward_rgrp_set - set the forward rgrp pointer - * @sdp: the filesystem - * @rgd: The new forward rgrp + * try_rgrp_unlink - Look for any unlinked, allocated, but unused inodes + * @rgd: The rgrp + * @last_unlinked: block address of the last dinode we unlinked + * @skip: block address we should explicitly not unlink   * + * Returns: 0 if no error + *          The inode, if one has been found, in inode.   */ -static void forward_rgrp_set(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd) +static void try_rgrp_unlink(struct gfs2_rgrpd *rgd, u64 *last_unlinked, u64 skip)  { -	spin_lock(&sdp->sd_rindex_spin); -	sdp->sd_rindex_forward = rgd; -	spin_unlock(&sdp->sd_rindex_spin); -} +	u64 block; +	struct gfs2_sbd *sdp = rgd->rd_sbd; +	struct gfs2_glock *gl; +	struct gfs2_inode *ip; +	int error; +	int found = 0; +	struct gfs2_rbm rbm = { .rgd = rgd, .bii = 0, .offset = 0 }; -/** - * get_local_rgrp - Choose and lock a rgrp for allocation - * @ip: the inode to reserve space for - * @rgp: the chosen and locked rgrp - * - * Try to acquire rgrp in way which avoids contending with others. - * - * Returns: errno - *          unlinked: the block address of an unlinked block to be reclaimed - */ +	while (1) { +		down_write(&sdp->sd_log_flush_lock); +		error = gfs2_rbm_find(&rbm, GFS2_BLKST_UNLINKED, NULL, NULL, +				      true, NULL); +		up_write(&sdp->sd_log_flush_lock); +		if (error == -ENOSPC) +			break; +		if (WARN_ON_ONCE(error)) +			break; -static int get_local_rgrp(struct gfs2_inode *ip, u64 *unlinked, -			  u64 *last_unlinked) -{ -	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	struct gfs2_rgrpd *rgd, *begin = NULL; -	struct gfs2_alloc *al = ip->i_alloc; -	int flags = LM_FLAG_TRY; -	int skipped = 0; -	int loops = 0; -	int error, rg_locked; +		block = gfs2_rbm_to_block(&rbm); +		if (gfs2_rbm_from_block(&rbm, block + 1)) +			break; +		if (*last_unlinked != NO_BLOCK && block <= *last_unlinked) +			continue; +		if (block == skip) +			continue; +		*last_unlinked = block; -	*unlinked = 0; -	rgd = gfs2_blk2rgrpd(sdp, ip->i_goal); +		error = gfs2_glock_get(sdp, block, &gfs2_inode_glops, CREATE, &gl); +		if (error) +			continue; -	while (rgd) { -		rg_locked = 0; +		/* If the inode is already in cache, we can ignore it here +		 * because the existing inode disposal code will deal with +		 * it when all refs have gone away. Accessing gl_object like +		 * this is not safe in general. Here it is ok because we do +		 * not dereference the pointer, and we only need an approx +		 * answer to whether it is NULL or not. +		 */ +		ip = gl->gl_object; -		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { -			rg_locked = 1; -			error = 0; -		} else { -			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, -						   LM_FLAG_TRY, &al->al_rgd_gh); -		} -		switch (error) { -		case 0: -			if (try_rgrp_fit(rgd, al)) -				goto out; -			/* If the rg came in already locked, there's no -			   way we can recover from a failed try_rgrp_unlink -			   because that would require an iput which can only -			   happen after the rgrp is unlocked. */ -			if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) -				*unlinked = try_rgrp_unlink(rgd, last_unlinked, -							   ip->i_no_addr); -			if (!rg_locked) -				gfs2_glock_dq_uninit(&al->al_rgd_gh); -			if (*unlinked) -				return -EAGAIN; -			/* fall through */ -		case GLR_TRYFAILED: -			rgd = recent_rgrp_next(rgd); -			break; +		if (ip || queue_work(gfs2_delete_workqueue, &gl->gl_delete) == 0) +			gfs2_glock_put(gl); +		else +			found++; -		default: -			return error; -		} +		/* Limit reclaim to sensible number of tasks */ +		if (found > NR_CPUS) +			return;  	} -	/* Go through full list of rgrps */ +	rgd->rd_flags &= ~GFS2_RDF_CHECK; +	return; +} -	begin = rgd = forward_rgrp_get(sdp); +/** + * gfs2_rgrp_congested - Use stats to figure out whether an rgrp is congested + * @rgd: The rgrp in question + * @loops: An indication of how picky we can be (0=very, 1=less so) + * + * This function uses the recently added glock statistics in order to + * figure out whether a parciular resource group is suffering from + * contention from multiple nodes. This is done purely on the basis + * of timings, since this is the only data we have to work with and + * our aim here is to reject a resource group which is highly contended + * but (very important) not to do this too often in order to ensure that + * we do not land up introducing fragmentation by changing resource + * groups when not actually required. + * + * The calculation is fairly simple, we want to know whether the SRTTB + * (i.e. smoothed round trip time for blocking operations) to acquire + * the lock for this rgrp's glock is significantly greater than the + * time taken for resource groups on average. We introduce a margin in + * the form of the variable @var which is computed as the sum of the two + * respective variences, and multiplied by a factor depending on @loops + * and whether we have a lot of data to base the decision on. This is + * then tested against the square difference of the means in order to + * decide whether the result is statistically significant or not. + * + * Returns: A boolean verdict on the congestion status + */ -	for (;;) { -		rg_locked = 0; +static bool gfs2_rgrp_congested(const struct gfs2_rgrpd *rgd, int loops) +{ +	const struct gfs2_glock *gl = rgd->rd_gl; +	const struct gfs2_sbd *sdp = gl->gl_sbd; +	struct gfs2_lkstats *st; +	s64 r_dcount, l_dcount; +	s64 r_srttb, l_srttb; +	s64 srttb_diff; +	s64 sqr_diff; +	s64 var; + +	preempt_disable(); +	st = &this_cpu_ptr(sdp->sd_lkstats)->lkstats[LM_TYPE_RGRP]; +	r_srttb = st->stats[GFS2_LKS_SRTTB]; +	r_dcount = st->stats[GFS2_LKS_DCOUNT]; +	var = st->stats[GFS2_LKS_SRTTVARB] + +	      gl->gl_stats.stats[GFS2_LKS_SRTTVARB]; +	preempt_enable(); + +	l_srttb = gl->gl_stats.stats[GFS2_LKS_SRTTB]; +	l_dcount = gl->gl_stats.stats[GFS2_LKS_DCOUNT]; + +	if ((l_dcount < 1) || (r_dcount < 1) || (r_srttb == 0)) +		return false; + +	srttb_diff = r_srttb - l_srttb; +	sqr_diff = srttb_diff * srttb_diff; + +	var *= 2; +	if (l_dcount < 8 || r_dcount < 8) +		var *= 2; +	if (loops == 1) +		var *= 2; + +	return ((srttb_diff < 0) && (sqr_diff > var)); +} -		if (gfs2_glock_is_locked_by_me(rgd->rd_gl)) { -			rg_locked = 1; -			error = 0; -		} else { -			error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, flags, -						   &al->al_rgd_gh); -		} -		switch (error) { -		case 0: -			if (try_rgrp_fit(rgd, al)) -				goto out; -			if (!rg_locked && rgd->rd_flags & GFS2_RDF_CHECK) -				*unlinked = try_rgrp_unlink(rgd, last_unlinked, -							    ip->i_no_addr); -			if (!rg_locked) -				gfs2_glock_dq_uninit(&al->al_rgd_gh); -			if (*unlinked) -				return -EAGAIN; -			break; +/** + * gfs2_rgrp_used_recently + * @rs: The block reservation with the rgrp to test + * @msecs: The time limit in milliseconds + * + * Returns: True if the rgrp glock has been used within the time limit + */ +static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs, +				    u64 msecs) +{ +	u64 tdiff; -		case GLR_TRYFAILED: -			skipped++; -			break; +	tdiff = ktime_to_ns(ktime_sub(ktime_get_real(), +                            rs->rs_rbm.rgd->rd_gl->gl_dstamp)); -		default: -			return error; -		} +	return tdiff > (msecs * 1000 * 1000); +} -		rgd = gfs2_rgrpd_get_next(rgd); -		if (!rgd) -			rgd = gfs2_rgrpd_get_first(sdp); - -		if (rgd == begin) { -			if (++loops >= 3) -				return -ENOSPC; -			if (!skipped) -				loops++; -			flags = 0; -			if (loops == 2) -				gfs2_log_flush(sdp, NULL); -		} -	} +static u32 gfs2_orlov_skip(const struct gfs2_inode *ip) +{ +	const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); +	u32 skip; -out: -	if (begin) { -		spin_lock(&sdp->sd_rindex_spin); -		list_move(&rgd->rd_list_mru, &sdp->sd_rindex_mru_list); -		spin_unlock(&sdp->sd_rindex_spin); -		rgd = gfs2_rgrpd_get_next(rgd); -		if (!rgd) -			rgd = gfs2_rgrpd_get_first(sdp); -		forward_rgrp_set(sdp, rgd); -	} +	get_random_bytes(&skip, sizeof(skip)); +	return skip % sdp->sd_rgrps; +} -	return 0; +static bool gfs2_select_rgrp(struct gfs2_rgrpd **pos, const struct gfs2_rgrpd *begin) +{ +	struct gfs2_rgrpd *rgd = *pos; +	struct gfs2_sbd *sdp = rgd->rd_sbd; + +	rgd = gfs2_rgrpd_get_next(rgd); +	if (rgd == NULL) +		rgd = gfs2_rgrpd_get_first(sdp); +	*pos = rgd; +	if (rgd != begin) /* If we didn't wrap */ +		return true; +	return false;  }  /** - * gfs2_inplace_reserve_i - Reserve space in the filesystem + * gfs2_inplace_reserve - Reserve space in the filesystem   * @ip: the inode to reserve space for + * @ap: the allocation parameters   *   * Returns: errno   */ -int gfs2_inplace_reserve_i(struct gfs2_inode *ip, int hold_rindex, -			   char *file, unsigned int line) +int gfs2_inplace_reserve(struct gfs2_inode *ip, const struct gfs2_alloc_parms *ap)  {  	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	struct gfs2_alloc *al = ip->i_alloc; -	int error = 0; -	u64 last_unlinked = NO_BLOCK, unlinked; +	struct gfs2_rgrpd *begin = NULL; +	struct gfs2_blkreserv *rs = ip->i_res; +	int error = 0, rg_locked, flags = 0; +	u64 last_unlinked = NO_BLOCK; +	int loops = 0; +	u32 skip = 0; -	if (gfs2_assert_warn(sdp, al->al_requested)) +	if (sdp->sd_args.ar_rgrplvb) +		flags |= GL_SKIP; +	if (gfs2_assert_warn(sdp, ap->target))  		return -EINVAL; - -try_again: -	if (hold_rindex) { -		/* We need to hold the rindex unless the inode we're using is -		   the rindex itself, in which case it's already held. */ -		if (ip != GFS2_I(sdp->sd_rindex)) -			error = gfs2_rindex_hold(sdp, &al->al_ri_gh); -		else if (!sdp->sd_rgrps) /* We may not have the rindex read -					    in, so: */ -			error = gfs2_ri_update_special(ip); +	if (gfs2_rs_active(rs)) { +		begin = rs->rs_rbm.rgd; +	} else if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, ip->i_goal)) { +		rs->rs_rbm.rgd = begin = ip->i_rgd; +	} else { +		rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);  	} +	if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV)) +		skip = gfs2_orlov_skip(ip); +	if (rs->rs_rbm.rgd == NULL) +		return -EBADSLT; + +	while (loops < 3) { +		rg_locked = 1; + +		if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) { +			rg_locked = 0; +			if (skip && skip--) +				goto next_rgrp; +			if (!gfs2_rs_active(rs) && (loops < 2) && +			     gfs2_rgrp_used_recently(rs, 1000) && +			     gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) +				goto next_rgrp; +			error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl, +						   LM_ST_EXCLUSIVE, flags, +						   &rs->rs_rgd_gh); +			if (unlikely(error)) +				return error; +			if (!gfs2_rs_active(rs) && (loops < 2) && +			    gfs2_rgrp_congested(rs->rs_rbm.rgd, loops)) +				goto skip_rgrp; +			if (sdp->sd_args.ar_rgrplvb) { +				error = update_rgrp_lvb(rs->rs_rbm.rgd); +				if (unlikely(error)) { +					gfs2_glock_dq_uninit(&rs->rs_rgd_gh); +					return error; +				} +			} +		} -	if (error) -		return error; +		/* Skip unuseable resource groups */ +		if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC | +						 GFS2_RDF_ERROR)) || +		    (ap->target > rs->rs_rbm.rgd->rd_extfail_pt)) +			goto skip_rgrp; -	/* Find an rgrp suitable for allocation.  If it encounters any unlinked -	   dinodes along the way, error will equal -EAGAIN and unlinked will -	   contains it block address. We then need to look up that inode and -	   try to free it, and try the allocation again. */ -	error = get_local_rgrp(ip, &unlinked, &last_unlinked); -	if (error) { -		if (hold_rindex && ip != GFS2_I(sdp->sd_rindex)) -			gfs2_glock_dq_uninit(&al->al_ri_gh); -		if (error != -EAGAIN) -			return error; - -		gfs2_process_unlinked_inode(ip->i_inode.i_sb, unlinked); -		/* regardless of whether or not gfs2_process_unlinked_inode -		   was successful, we don't want to repeat it again. */ -		last_unlinked = unlinked; -		gfs2_log_flush(sdp, NULL); -		error = 0; - -		goto try_again; +		if (sdp->sd_args.ar_rgrplvb) +			gfs2_rgrp_bh_get(rs->rs_rbm.rgd); + +		/* Get a reservation if we don't already have one */ +		if (!gfs2_rs_active(rs)) +			rg_mblk_search(rs->rs_rbm.rgd, ip, ap); + +		/* Skip rgrps when we can't get a reservation on first pass */ +		if (!gfs2_rs_active(rs) && (loops < 1)) +			goto check_rgrp; + +		/* If rgrp has enough free space, use it */ +		if (rs->rs_rbm.rgd->rd_free_clone >= ap->target) { +			ip->i_rgd = rs->rs_rbm.rgd; +			return 0; +		} + +check_rgrp: +		/* Check for unlinked inodes which can be reclaimed */ +		if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK) +			try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked, +					ip->i_no_addr); +skip_rgrp: +		/* Drop reservation, if we couldn't use reserved rgrp */ +		if (gfs2_rs_active(rs)) +			gfs2_rs_deltree(rs); + +		/* Unlock rgrp if required */ +		if (!rg_locked) +			gfs2_glock_dq_uninit(&rs->rs_rgd_gh); +next_rgrp: +		/* Find the next rgrp, and continue looking */ +		if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin)) +			continue; +		if (skip) +			continue; + +		/* If we've scanned all the rgrps, but found no free blocks +		 * then this checks for some less likely conditions before +		 * trying again. +		 */ +		loops++; +		/* Check that fs hasn't grown if writing to rindex */ +		if (ip == GFS2_I(sdp->sd_rindex) && !sdp->sd_rindex_uptodate) { +			error = gfs2_ri_update(ip); +			if (error) +				return error; +		} +		/* Flushing the log may release space */ +		if (loops == 2) +			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);  	} -	/* no error, so we have the rgrp set in the inode's allocation. */ -	al->al_file = file; -	al->al_line = line; -	return 0; +	return -ENOSPC;  }  /** @@ -1259,20 +2016,10 @@ try_again:  void gfs2_inplace_release(struct gfs2_inode *ip)  { -	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	struct gfs2_alloc *al = ip->i_alloc; - -	if (gfs2_assert_warn(sdp, al->al_alloced <= al->al_requested) == -1) -		fs_warn(sdp, "al_alloced = %u, al_requested = %u " -			     "al_file = %s, al_line = %u\n", -		             al->al_alloced, al->al_requested, al->al_file, -			     al->al_line); +	struct gfs2_blkreserv *rs = ip->i_res; -	al->al_rgd = NULL; -	if (al->al_rgd_gh.gh_gl) -		gfs2_glock_dq_uninit(&al->al_rgd_gh); -	if (ip != GFS2_I(sdp->sd_rindex) && al->al_ri_gh.gh_gl) -		gfs2_glock_dq_uninit(&al->al_ri_gh); +	if (rs->rs_rgd_gh.gh_gl) +		gfs2_glock_dq_uninit(&rs->rs_rgd_gh);  }  /** @@ -1285,131 +2032,47 @@ void gfs2_inplace_release(struct gfs2_inode *ip)  static unsigned char gfs2_get_block_type(struct gfs2_rgrpd *rgd, u64 block)  { -	struct gfs2_bitmap *bi = NULL; -	u32 length, rgrp_block, buf_block; -	unsigned int buf; -	unsigned char type; - -	length = rgd->rd_length; -	rgrp_block = block - rgd->rd_data0; - -	for (buf = 0; buf < length; buf++) { -		bi = rgd->rd_bits + buf; -		if (rgrp_block < (bi->bi_start + bi->bi_len) * GFS2_NBBY) -			break; -	} - -	gfs2_assert(rgd->rd_sbd, buf < length); -	buf_block = rgrp_block - bi->bi_start * GFS2_NBBY; +	struct gfs2_rbm rbm = { .rgd = rgd, }; +	int ret; -	type = gfs2_testbit(rgd, bi->bi_bh->b_data + bi->bi_offset, -			   bi->bi_len, buf_block); +	ret = gfs2_rbm_from_block(&rbm, block); +	WARN_ON_ONCE(ret != 0); -	return type; +	return gfs2_testbit(&rbm);  } +  /** - * rgblk_search - find a block in @old_state, change allocation - *           state to @new_state - * @rgd: the resource group descriptor - * @goal: the goal block within the RG (start here to search for avail block) - * @old_state: GFS2_BLKST_XXX the before-allocation state to find - * @new_state: GFS2_BLKST_XXX the after-allocation block state - * @n: The extent length + * gfs2_alloc_extent - allocate an extent from a given bitmap + * @rbm: the resource group information + * @dinode: TRUE if the first block we allocate is for a dinode + * @n: The extent length (value/result)   * - * Walk rgrp's bitmap to find bits that represent a block in @old_state. - * Add the found bitmap buffer to the transaction. + * Add the bitmap buffer to the transaction.   * Set the found bits to @new_state to change block's allocation state. - * - * This function never fails, because we wouldn't call it unless we - * know (from reservation results, etc.) that a block is available. - * - * Scope of @goal and returned block is just within rgrp, not the whole - * filesystem. - * - * Returns:  the block number allocated   */ - -static u32 rgblk_search(struct gfs2_rgrpd *rgd, u32 goal, -			unsigned char old_state, unsigned char new_state, -			unsigned int *n) +static void gfs2_alloc_extent(const struct gfs2_rbm *rbm, bool dinode, +			     unsigned int *n)  { -	struct gfs2_bitmap *bi = NULL; -	const u32 length = rgd->rd_length; -	u32 blk = BFITNOENT; -	unsigned int buf, x; +	struct gfs2_rbm pos = { .rgd = rbm->rgd, };  	const unsigned int elen = *n; -	const u8 *buffer = NULL; - -	*n = 0; -	/* Find bitmap block that contains bits for goal block */ -	for (buf = 0; buf < length; buf++) { -		bi = rgd->rd_bits + buf; -		/* Convert scope of "goal" from rgrp-wide to within found bit block */ -		if (goal < (bi->bi_start + bi->bi_len) * GFS2_NBBY) { -			goal -= bi->bi_start * GFS2_NBBY; -			goto do_search; -		} -	} -	buf = 0; -	goal = 0; - -do_search: -	/* Search (up to entire) bitmap in this rgrp for allocatable block. -	   "x <= length", instead of "x < length", because we typically start -	   the search in the middle of a bit block, but if we can't find an -	   allocatable block anywhere else, we want to be able wrap around and -	   search in the first part of our first-searched bit block.  */ -	for (x = 0; x <= length; x++) { -		bi = rgd->rd_bits + buf; - -		if (test_bit(GBF_FULL, &bi->bi_flags) && -		    (old_state == GFS2_BLKST_FREE)) -			goto skip; - -		/* The GFS2_BLKST_UNLINKED state doesn't apply to the clone -		   bitmaps, so we must search the originals for that. */ -		buffer = bi->bi_bh->b_data + bi->bi_offset; -		if (old_state != GFS2_BLKST_UNLINKED && bi->bi_clone) -			buffer = bi->bi_clone + bi->bi_offset; - -		blk = gfs2_bitfit(buffer, bi->bi_len, goal, old_state); -		if (blk != BFITNOENT) -			break; - -		if ((goal == 0) && (old_state == GFS2_BLKST_FREE)) -			set_bit(GBF_FULL, &bi->bi_flags); - -		/* Try next bitmap block (wrap back to rgrp header if at end) */ -skip: -		buf++; -		buf %= length; -		goal = 0; -	} +	u64 block; +	int ret; -	if (blk == BFITNOENT) -		return blk;  	*n = 1; -	if (old_state == new_state) -		goto out; - -	gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); -	gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, -		    bi->bi_len, blk, new_state); -	goal = blk; +	block = gfs2_rbm_to_block(rbm); +	gfs2_trans_add_meta(rbm->rgd->rd_gl, rbm_bi(rbm)->bi_bh); +	gfs2_setbit(rbm, true, dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED); +	block++;  	while (*n < elen) { -		goal++; -		if (goal >= (bi->bi_len * GFS2_NBBY)) +		ret = gfs2_rbm_from_block(&pos, block); +		if (ret || gfs2_testbit(&pos) != GFS2_BLKST_FREE)  			break; -		if (gfs2_testbit(rgd, buffer, bi->bi_len, goal) != -		    GFS2_BLKST_FREE) -			break; -		gfs2_setbit(rgd, bi->bi_bh->b_data, bi->bi_clone, bi->bi_offset, -			    bi->bi_len, goal, new_state); +		gfs2_trans_add_meta(pos.rgd->rd_gl, rbm_bi(&pos)->bi_bh); +		gfs2_setbit(&pos, true, GFS2_BLKST_USED);  		(*n)++; +		block++;  	} -out: -	return (bi->bi_start * GFS2_NBBY) + blk;  }  /** @@ -1425,47 +2088,31 @@ out:  static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,  				     u32 blen, unsigned char new_state)  { -	struct gfs2_rgrpd *rgd; -	struct gfs2_bitmap *bi = NULL; -	u32 length, rgrp_blk, buf_blk; -	unsigned int buf; +	struct gfs2_rbm rbm; +	struct gfs2_bitmap *bi; -	rgd = gfs2_blk2rgrpd(sdp, bstart); -	if (!rgd) { +	rbm.rgd = gfs2_blk2rgrpd(sdp, bstart, 1); +	if (!rbm.rgd) {  		if (gfs2_consist(sdp))  			fs_err(sdp, "block = %llu\n", (unsigned long long)bstart);  		return NULL;  	} -	length = rgd->rd_length; - -	rgrp_blk = bstart - rgd->rd_data0; -  	while (blen--) { -		for (buf = 0; buf < length; buf++) { -			bi = rgd->rd_bits + buf; -			if (rgrp_blk < (bi->bi_start + bi->bi_len) * GFS2_NBBY) -				break; -		} - -		gfs2_assert(rgd->rd_sbd, buf < length); - -		buf_blk = rgrp_blk - bi->bi_start * GFS2_NBBY; -		rgrp_blk++; - +		gfs2_rbm_from_block(&rbm, bstart); +		bi = rbm_bi(&rbm); +		bstart++;  		if (!bi->bi_clone) {  			bi->bi_clone = kmalloc(bi->bi_bh->b_size,  					       GFP_NOFS | __GFP_NOFAIL);  			memcpy(bi->bi_clone + bi->bi_offset, -			       bi->bi_bh->b_data + bi->bi_offset, -			       bi->bi_len); +			       bi->bi_bh->b_data + bi->bi_offset, bi->bi_len);  		} -		gfs2_trans_add_bh(rgd->rd_gl, bi->bi_bh, 1); -		gfs2_setbit(rgd, bi->bi_bh->b_data, NULL, bi->bi_offset, -			    bi->bi_len, buf_blk, new_state); +		gfs2_trans_add_meta(rbm.rgd->rd_gl, bi->bi_bh); +		gfs2_setbit(&rbm, false, new_state);  	} -	return rgd; +	return rbm.rgd;  }  /** @@ -1475,15 +2122,24 @@ static struct gfs2_rgrpd *rgblk_free(struct gfs2_sbd *sdp, u64 bstart,   *   */ -int gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl) +void gfs2_rgrp_dump(struct seq_file *seq, const struct gfs2_glock *gl)  { -	const struct gfs2_rgrpd *rgd = gl->gl_object; +	struct gfs2_rgrpd *rgd = gl->gl_object; +	struct gfs2_blkreserv *trs; +	const struct rb_node *n; +  	if (rgd == NULL) -		return 0; -	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u\n", +		return; +	gfs2_print_dbg(seq, " R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",  		       (unsigned long long)rgd->rd_addr, rgd->rd_flags, -		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes); -	return 0; +		       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes, +		       rgd->rd_reserved, rgd->rd_extfail_pt); +	spin_lock(&rgd->rd_rsspin); +	for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) { +		trs = rb_entry(n, struct gfs2_blkreserv, rs_node); +		dump_rs(seq, trs); +	} +	spin_unlock(&rgd->rd_rsspin);  }  static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd) @@ -1497,142 +2153,176 @@ static void gfs2_rgrp_error(struct gfs2_rgrpd *rgd)  }  /** - * gfs2_alloc_block - Allocate one or more blocks - * @ip: the inode to allocate the block for - * @bn: Used to return the starting block number - * @n: requested number of blocks/extent length (value/result) + * gfs2_adjust_reservation - Adjust (or remove) a reservation after allocation + * @ip: The inode we have just allocated blocks for + * @rbm: The start of the allocated blocks + * @len: The extent length   * - * Returns: 0 or error + * Adjusts a reservation after an allocation has taken place. If the + * reservation does not match the allocation, or if it is now empty + * then it is removed.   */ -int gfs2_alloc_block(struct gfs2_inode *ip, u64 *bn, unsigned int *n) +static void gfs2_adjust_reservation(struct gfs2_inode *ip, +				    const struct gfs2_rbm *rbm, unsigned len)  { -	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	struct buffer_head *dibh; -	struct gfs2_alloc *al = ip->i_alloc; -	struct gfs2_rgrpd *rgd; -	u32 goal, blk; +	struct gfs2_blkreserv *rs = ip->i_res; +	struct gfs2_rgrpd *rgd = rbm->rgd; +	unsigned rlen;  	u64 block; -	int error; - -	/* Only happens if there is a bug in gfs2, return something distinctive -	 * to ensure that it is noticed. -	 */ -	if (al == NULL) -		return -ECANCELED; - -	rgd = al->al_rgd; - -	if (rgrp_contains_block(rgd, ip->i_goal)) -		goal = ip->i_goal - rgd->rd_data0; -	else -		goal = rgd->rd_last_alloc; - -	blk = rgblk_search(rgd, goal, GFS2_BLKST_FREE, GFS2_BLKST_USED, n); - -	/* Since all blocks are reserved in advance, this shouldn't happen */ -	if (blk == BFITNOENT) -		goto rgrp_error; - -	rgd->rd_last_alloc = blk; -	block = rgd->rd_data0 + blk; -	ip->i_goal = block; -	error = gfs2_meta_inode_buffer(ip, &dibh); -	if (error == 0) { -		struct gfs2_dinode *di = (struct gfs2_dinode *)dibh->b_data; -		gfs2_trans_add_bh(ip->i_gl, dibh, 1); -		di->di_goal_meta = di->di_goal_data = cpu_to_be64(ip->i_goal); -		brelse(dibh); +	int ret; + +	spin_lock(&rgd->rd_rsspin); +	if (gfs2_rs_active(rs)) { +		if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) { +			block = gfs2_rbm_to_block(rbm); +			ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len); +			rlen = min(rs->rs_free, len); +			rs->rs_free -= rlen; +			rgd->rd_reserved -= rlen; +			trace_gfs2_rs(rs, TRACE_RS_CLAIM); +			if (rs->rs_free && !ret) +				goto out; +		} +		__rs_deltree(rs);  	} -	if (rgd->rd_free < *n) -		goto rgrp_error; - -	rgd->rd_free -= *n; +out: +	spin_unlock(&rgd->rd_rsspin); +} -	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); -	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); +/** + * gfs2_set_alloc_start - Set starting point for block allocation + * @rbm: The rbm which will be set to the required location + * @ip: The gfs2 inode + * @dinode: Flag to say if allocation includes a new inode + * + * This sets the starting point from the reservation if one is active + * otherwise it falls back to guessing a start point based on the + * inode's goal block or the last allocation point in the rgrp. + */ -	al->al_alloced += *n; +static void gfs2_set_alloc_start(struct gfs2_rbm *rbm, +				 const struct gfs2_inode *ip, bool dinode) +{ +	u64 goal; -	gfs2_statfs_change(sdp, 0, -(s64)*n, 0); -	gfs2_quota_change(ip, *n, ip->i_inode.i_uid, ip->i_inode.i_gid); +	if (gfs2_rs_active(ip->i_res)) { +		*rbm = ip->i_res->rs_rbm; +		return; +	} -	spin_lock(&sdp->sd_rindex_spin); -	rgd->rd_free_clone -= *n; -	spin_unlock(&sdp->sd_rindex_spin); -	trace_gfs2_block_alloc(ip, block, *n, GFS2_BLKST_USED); -	*bn = block; -	return 0; +	if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal)) +		goal = ip->i_goal; +	else +		goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0; -rgrp_error: -	gfs2_rgrp_error(rgd); -	return -EIO; +	gfs2_rbm_from_block(rbm, goal);  }  /** - * gfs2_alloc_di - Allocate a dinode - * @dip: the directory that the inode is going in - * @bn: the block number which is allocated + * gfs2_alloc_blocks - Allocate one or more blocks of data and/or a dinode + * @ip: the inode to allocate the block for + * @bn: Used to return the starting block number + * @nblocks: requested number of blocks/extent length (value/result) + * @dinode: 1 if we're allocating a dinode block, else 0   * @generation: the generation number of the inode   * - * Returns: 0 on success or error + * Returns: 0 or error   */ -int gfs2_alloc_di(struct gfs2_inode *dip, u64 *bn, u64 *generation) +int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks, +		      bool dinode, u64 *generation)  { -	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); -	struct gfs2_alloc *al = dip->i_alloc; -	struct gfs2_rgrpd *rgd = al->al_rgd; -	u32 blk; -	u64 block; -	unsigned int n = 1; +	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); +	struct buffer_head *dibh; +	struct gfs2_rbm rbm = { .rgd = ip->i_rgd, }; +	unsigned int ndata; +	u64 block; /* block, within the file system scope */ +	int error; + +	gfs2_set_alloc_start(&rbm, ip, dinode); +	error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false, NULL); -	blk = rgblk_search(rgd, rgd->rd_last_alloc, -			   GFS2_BLKST_FREE, GFS2_BLKST_DINODE, &n); +	if (error == -ENOSPC) { +		gfs2_set_alloc_start(&rbm, ip, dinode); +		error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false, +				      NULL); +	}  	/* Since all blocks are reserved in advance, this shouldn't happen */ -	if (blk == BFITNOENT) +	if (error) { +		fs_warn(sdp, "inum=%llu error=%d, nblocks=%u, full=%d fail_pt=%d\n", +			(unsigned long long)ip->i_no_addr, error, *nblocks, +			test_bit(GBF_FULL, &rbm.rgd->rd_bits->bi_flags), +			rbm.rgd->rd_extfail_pt);  		goto rgrp_error; +	} -	rgd->rd_last_alloc = blk; -	block = rgd->rd_data0 + blk; -	if (rgd->rd_free == 0) +	gfs2_alloc_extent(&rbm, dinode, nblocks); +	block = gfs2_rbm_to_block(&rbm); +	rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0; +	if (gfs2_rs_active(ip->i_res)) +		gfs2_adjust_reservation(ip, &rbm, *nblocks); +	ndata = *nblocks; +	if (dinode) +		ndata--; + +	if (!dinode) { +		ip->i_goal = block + ndata - 1; +		error = gfs2_meta_inode_buffer(ip, &dibh); +		if (error == 0) { +			struct gfs2_dinode *di = +				(struct gfs2_dinode *)dibh->b_data; +			gfs2_trans_add_meta(ip->i_gl, dibh); +			di->di_goal_meta = di->di_goal_data = +				cpu_to_be64(ip->i_goal); +			brelse(dibh); +		} +	} +	if (rbm.rgd->rd_free < *nblocks) { +		pr_warn("nblocks=%u\n", *nblocks);  		goto rgrp_error; +	} -	rgd->rd_free--; -	rgd->rd_dinodes++; -	*generation = rgd->rd_igeneration++; -	if (*generation == 0) -		*generation = rgd->rd_igeneration++; -	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); -	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); +	rbm.rgd->rd_free -= *nblocks; +	if (dinode) { +		rbm.rgd->rd_dinodes++; +		*generation = rbm.rgd->rd_igeneration++; +		if (*generation == 0) +			*generation = rbm.rgd->rd_igeneration++; +	} -	al->al_alloced++; +	gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh); +	gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data); +	gfs2_rgrp_ondisk2lvb(rbm.rgd->rd_rgl, rbm.rgd->rd_bits[0].bi_bh->b_data); -	gfs2_statfs_change(sdp, 0, -1, +1); -	gfs2_trans_add_unrevoke(sdp, block, 1); +	gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0); +	if (dinode) +		gfs2_trans_add_unrevoke(sdp, block, *nblocks); -	spin_lock(&sdp->sd_rindex_spin); -	rgd->rd_free_clone--; -	spin_unlock(&sdp->sd_rindex_spin); -	trace_gfs2_block_alloc(dip, block, 1, GFS2_BLKST_DINODE); +	gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid); + +	rbm.rgd->rd_free_clone -= *nblocks; +	trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks, +			       dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);  	*bn = block;  	return 0;  rgrp_error: -	gfs2_rgrp_error(rgd); +	gfs2_rgrp_error(rbm.rgd);  	return -EIO;  }  /** - * gfs2_free_data - free a contiguous run of data block(s) + * __gfs2_free_blocks - free a contiguous run of block(s)   * @ip: the inode these blocks are being freed from   * @bstart: first block of a run of contiguous blocks   * @blen: the length of the block run + * @meta: 1 if the blocks represent metadata   *   */ -void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen) +void __gfs2_free_blocks(struct gfs2_inode *ip, u64 bstart, u32 blen, int meta)  {  	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);  	struct gfs2_rgrpd *rgd; @@ -1640,16 +2330,16 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)  	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE);  	if (!rgd)  		return; -	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); +	trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);  	rgd->rd_free += blen; - -	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); +	rgd->rd_flags &= ~GFS2_RGF_TRIMMED; +	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);  	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); +	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); -	gfs2_trans_add_rg(rgd); - -	gfs2_statfs_change(sdp, 0, +blen, 0); -	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); +	/* Directories keep their data in the metadata address space */ +	if (meta || ip->i_depth) +		gfs2_meta_wipe(ip, bstart, blen);  }  /** @@ -1663,22 +2353,10 @@ void gfs2_free_data(struct gfs2_inode *ip, u64 bstart, u32 blen)  void gfs2_free_meta(struct gfs2_inode *ip, u64 bstart, u32 blen)  {  	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	struct gfs2_rgrpd *rgd; - -	rgd = rgblk_free(sdp, bstart, blen, GFS2_BLKST_FREE); -	if (!rgd) -		return; -	trace_gfs2_block_alloc(ip, bstart, blen, GFS2_BLKST_FREE); -	rgd->rd_free += blen; - -	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); -	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); - -	gfs2_trans_add_rg(rgd); +	__gfs2_free_blocks(ip, bstart, blen, 1);  	gfs2_statfs_change(sdp, 0, +blen, 0);  	gfs2_quota_change(ip, -(s64)blen, ip->i_inode.i_uid, ip->i_inode.i_gid); -	gfs2_meta_wipe(ip, bstart, blen);  }  void gfs2_unlink_di(struct inode *inode) @@ -1691,10 +2369,11 @@ void gfs2_unlink_di(struct inode *inode)  	rgd = rgblk_free(sdp, blkno, 1, GFS2_BLKST_UNLINKED);  	if (!rgd)  		return; -	trace_gfs2_block_alloc(ip, blkno, 1, GFS2_BLKST_UNLINKED); -	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); +	trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED); +	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);  	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); -	gfs2_trans_add_rg(rgd); +	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); +	update_rgrp_lvb_unlinked(rgd, 1);  }  static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno) @@ -1712,18 +2391,19 @@ static void gfs2_free_uninit_di(struct gfs2_rgrpd *rgd, u64 blkno)  	rgd->rd_dinodes--;  	rgd->rd_free++; -	gfs2_trans_add_bh(rgd->rd_gl, rgd->rd_bits[0].bi_bh, 1); +	gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);  	gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data); +	gfs2_rgrp_ondisk2lvb(rgd->rd_rgl, rgd->rd_bits[0].bi_bh->b_data); +	update_rgrp_lvb_unlinked(rgd, -1);  	gfs2_statfs_change(sdp, 0, +1, -1); -	gfs2_trans_add_rg(rgd);  }  void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)  {  	gfs2_free_uninit_di(rgd, ip->i_no_addr); -	trace_gfs2_block_alloc(ip, ip->i_no_addr, 1, GFS2_BLKST_FREE); +	trace_gfs2_block_alloc(ip, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);  	gfs2_quota_change(ip, -1, ip->i_inode.i_uid, ip->i_inode.i_gid);  	gfs2_meta_wipe(ip, ip->i_no_addr, 1);  } @@ -1742,41 +2422,28 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)  int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)  {  	struct gfs2_rgrpd *rgd; -	struct gfs2_holder ri_gh, rgd_gh; -	struct gfs2_inode *ip = GFS2_I(sdp->sd_rindex); -	int ri_locked = 0; -	int error; - -	if (!gfs2_glock_is_locked_by_me(ip->i_gl)) { -		error = gfs2_rindex_hold(sdp, &ri_gh); -		if (error) -			goto fail; -		ri_locked = 1; -	} +	struct gfs2_holder rgd_gh; +	int error = -EINVAL; -	error = -EINVAL; -	rgd = gfs2_blk2rgrpd(sdp, no_addr); +	rgd = gfs2_blk2rgrpd(sdp, no_addr, 1);  	if (!rgd) -		goto fail_rindex; +		goto fail;  	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_SHARED, 0, &rgd_gh);  	if (error) -		goto fail_rindex; +		goto fail;  	if (gfs2_get_block_type(rgd, no_addr) != type)  		error = -ESTALE;  	gfs2_glock_dq_uninit(&rgd_gh); -fail_rindex: -	if (ri_locked) -		gfs2_glock_dq_uninit(&ri_gh);  fail:  	return error;  }  /**   * gfs2_rlist_add - add a RG to a list of RGs - * @sdp: the filesystem + * @ip: the inode   * @rlist: the list of resource groups   * @block: the block   * @@ -1786,9 +2453,10 @@ fail:   *   */ -void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist, +void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,  		    u64 block)  { +	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);  	struct gfs2_rgrpd *rgd;  	struct gfs2_rgrpd **tmp;  	unsigned int new_space; @@ -1797,12 +2465,15 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,  	if (gfs2_assert_warn(sdp, !rlist->rl_ghs))  		return; -	rgd = gfs2_blk2rgrpd(sdp, block); +	if (ip->i_rgd && rgrp_contains_block(ip->i_rgd, block)) +		rgd = ip->i_rgd; +	else +		rgd = gfs2_blk2rgrpd(sdp, block, 1);  	if (!rgd) { -		if (gfs2_consist(sdp)) -			fs_err(sdp, "block = %llu\n", (unsigned long long)block); +		fs_err(sdp, "rlist_add: no rgrp for block %llu\n", (unsigned long long)block);  		return;  	} +	ip->i_rgd = rgd;  	for (x = 0; x < rlist->rl_rgrps; x++)  		if (rlist->rl_rgd[x] == rgd) @@ -1832,7 +2503,6 @@ void gfs2_rlist_add(struct gfs2_sbd *sdp, struct gfs2_rgrp_list *rlist,   *      and initialize an array of glock holders for them   * @rlist: the list of resource groups   * @state: the lock state to acquire the RG lock in - * @flags: the modifier flags for the holder structures   *   * FIXME: Don't use NOFAIL   * @@ -1852,7 +2522,7 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist, unsigned int state)  /**   * gfs2_rlist_free - free a resource group list - * @list: the list of resource groups + * @rlist: the list of resource groups   *   */ @@ -1866,6 +2536,7 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)  		for (x = 0; x < rlist->rl_rgrps; x++)  			gfs2_holder_uninit(&rlist->rl_ghs[x]);  		kfree(rlist->rl_ghs); +		rlist->rl_ghs = NULL;  	}  }  | 
