diff options
Diffstat (limited to 'fs/gfs2/log.c')
| -rw-r--r-- | fs/gfs2/log.c | 865 | 
1 files changed, 438 insertions, 427 deletions
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index eb01f3575e1..3966fadbceb 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -18,6 +18,9 @@  #include <linux/kthread.h>  #include <linux/freezer.h>  #include <linux/bio.h> +#include <linux/blkdev.h> +#include <linux/writeback.h> +#include <linux/list_sort.h>  #include "gfs2.h"  #include "incore.h" @@ -30,8 +33,6 @@  #include "dir.h"  #include "trace_gfs2.h" -#define PULL 1 -  /**   * gfs2_struct2blk - compute stuff   * @sdp: the filesystem @@ -67,13 +68,13 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct,   * @mapping: The associated mapping (maybe NULL)   * @bd: The gfs2_bufdata to remove   * - * The log lock _must_ be held when calling this function + * The ail lock _must_ be held when calling this function   *   */  void gfs2_remove_from_ail(struct gfs2_bufdata *bd)  { -	bd->bd_ail = NULL; +	bd->bd_tr = NULL;  	list_del_init(&bd->bd_ail_st_list);  	list_del_init(&bd->bd_ail_gl_list);  	atomic_dec(&bd->bd_gl->gl_ail_count); @@ -83,55 +84,100 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)  /**   * gfs2_ail1_start_one - Start I/O on a part of the AIL   * @sdp: the filesystem - * @tr: the part of the AIL + * @wbc: The writeback control structure + * @ai: The ail structure   *   */ -static void gfs2_ail1_start_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) -__releases(&sdp->sd_log_lock) -__acquires(&sdp->sd_log_lock) +static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, +			       struct writeback_control *wbc, +			       struct gfs2_trans *tr) +__releases(&sdp->sd_ail_lock) +__acquires(&sdp->sd_ail_lock)  { +	struct gfs2_glock *gl = NULL; +	struct address_space *mapping;  	struct gfs2_bufdata *bd, *s;  	struct buffer_head *bh; -	int retry; -	do { -		retry = 0; +	list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { +		bh = bd->bd_bh; -		list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, -						 bd_ail_st_list) { -			bh = bd->bd_bh; +		gfs2_assert(sdp, bd->bd_tr == tr); -			gfs2_assert(sdp, bd->bd_ail == ai); +		if (!buffer_busy(bh)) { +			if (!buffer_uptodate(bh)) +				gfs2_io_error_bh(sdp, bh); +			list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); +			continue; +		} -			if (!buffer_busy(bh)) { -				if (!buffer_uptodate(bh)) -					gfs2_io_error_bh(sdp, bh); -				list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); -				continue; -			} +		if (!buffer_dirty(bh)) +			continue; +		if (gl == bd->bd_gl) +			continue; +		gl = bd->bd_gl; +		list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list); +		mapping = bh->b_page->mapping; +		if (!mapping) +			continue; +		spin_unlock(&sdp->sd_ail_lock); +		generic_writepages(mapping, wbc); +		spin_lock(&sdp->sd_ail_lock); +		if (wbc->nr_to_write <= 0) +			break; +		return 1; +	} -			if (!buffer_dirty(bh)) -				continue; +	return 0; +} -			list_move(&bd->bd_ail_st_list, &ai->ai_ail1_list); -			get_bh(bh); -			gfs2_log_unlock(sdp); -			lock_buffer(bh); -			if (test_clear_buffer_dirty(bh)) { -				bh->b_end_io = end_buffer_write_sync; -				submit_bh(WRITE_SYNC_PLUG, bh); -			} else { -				unlock_buffer(bh); -				brelse(bh); -			} -			gfs2_log_lock(sdp); +/** + * gfs2_ail1_flush - start writeback of some ail1 entries  + * @sdp: The super block + * @wbc: The writeback control structure + * + * Writes back some ail1 entries, according to the limits in the + * writeback control structure + */ + +void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) +{ +	struct list_head *head = &sdp->sd_ail1_list; +	struct gfs2_trans *tr; +	struct blk_plug plug; -			retry = 1; +	trace_gfs2_ail_flush(sdp, wbc, 1); +	blk_start_plug(&plug); +	spin_lock(&sdp->sd_ail_lock); +restart: +	list_for_each_entry_reverse(tr, head, tr_list) { +		if (wbc->nr_to_write <= 0)  			break; -		} -	} while (retry); +		if (gfs2_ail1_start_one(sdp, wbc, tr)) +			goto restart; +	} +	spin_unlock(&sdp->sd_ail_lock); +	blk_finish_plug(&plug); +	trace_gfs2_ail_flush(sdp, wbc, 0); +} + +/** + * gfs2_ail1_start - start writeback of all ail1 entries + * @sdp: The superblock + */ + +static void gfs2_ail1_start(struct gfs2_sbd *sdp) +{ +	struct writeback_control wbc = { +		.sync_mode = WB_SYNC_NONE, +		.nr_to_write = LONG_MAX, +		.range_start = 0, +		.range_end = LLONG_MAX, +	}; + +	return gfs2_ail1_flush(sdp, &wbc);  }  /** @@ -141,84 +187,72 @@ __acquires(&sdp->sd_log_lock)   *   */ -static int gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai, int flags) +static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)  {  	struct gfs2_bufdata *bd, *s;  	struct buffer_head *bh; -	list_for_each_entry_safe_reverse(bd, s, &ai->ai_ail1_list, +	list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list,  					 bd_ail_st_list) {  		bh = bd->bd_bh; - -		gfs2_assert(sdp, bd->bd_ail == ai); - -		if (buffer_busy(bh)) { -			if (flags & DIO_ALL) -				continue; -			else -				break; -		} - +		gfs2_assert(sdp, bd->bd_tr == tr); +		if (buffer_busy(bh)) +			continue;  		if (!buffer_uptodate(bh))  			gfs2_io_error_bh(sdp, bh); - -		list_move(&bd->bd_ail_st_list, &ai->ai_ail2_list); +		list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list);  	} -	return list_empty(&ai->ai_ail1_list);  } -static void gfs2_ail1_start(struct gfs2_sbd *sdp) -{ -	struct list_head *head; -	u64 sync_gen; -	struct gfs2_ail *ai; -	int done = 0; - -	gfs2_log_lock(sdp); -	head = &sdp->sd_ail1_list; -	if (list_empty(head)) { -		gfs2_log_unlock(sdp); -		return; -	} -	sync_gen = sdp->sd_ail_sync_gen++; - -	while(!done) { -		done = 1; -		list_for_each_entry_reverse(ai, head, ai_list) { -			if (ai->ai_sync_gen >= sync_gen) -				continue; -			ai->ai_sync_gen = sync_gen; -			gfs2_ail1_start_one(sdp, ai); /* This may drop log lock */ -			done = 0; -			break; -		} -	} - -	gfs2_log_unlock(sdp); -} +/** + * gfs2_ail1_empty - Try to empty the ail1 lists + * @sdp: The superblock + * + * Tries to empty the ail1 lists, starting with the oldest first + */ -static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags) +static int gfs2_ail1_empty(struct gfs2_sbd *sdp)  { -	struct gfs2_ail *ai, *s; +	struct gfs2_trans *tr, *s; +	int oldest_tr = 1;  	int ret; -	gfs2_log_lock(sdp); - -	list_for_each_entry_safe_reverse(ai, s, &sdp->sd_ail1_list, ai_list) { -		if (gfs2_ail1_empty_one(sdp, ai, flags)) -			list_move(&ai->ai_list, &sdp->sd_ail2_list); -		else if (!(flags & DIO_ALL)) -			break; +	spin_lock(&sdp->sd_ail_lock); +	list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { +		gfs2_ail1_empty_one(sdp, tr); +		if (list_empty(&tr->tr_ail1_list) && oldest_tr) +			list_move(&tr->tr_list, &sdp->sd_ail2_list); +		else +			oldest_tr = 0;  	} -  	ret = list_empty(&sdp->sd_ail1_list); - -	gfs2_log_unlock(sdp); +	spin_unlock(&sdp->sd_ail_lock);  	return ret;  } +static void gfs2_ail1_wait(struct gfs2_sbd *sdp) +{ +	struct gfs2_trans *tr; +	struct gfs2_bufdata *bd; +	struct buffer_head *bh; + +	spin_lock(&sdp->sd_ail_lock); +	list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { +		list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) { +			bh = bd->bd_bh; +			if (!buffer_locked(bh)) +				continue; +			get_bh(bh); +			spin_unlock(&sdp->sd_ail_lock); +			wait_on_buffer(bh); +			brelse(bh); +			return; +		} +	} +	spin_unlock(&sdp->sd_ail_lock); +}  /**   * gfs2_ail2_empty_one - Check whether or not a trans in the AIL has been synced @@ -227,43 +261,60 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int flags)   *   */ -static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_ail *ai) +static void gfs2_ail2_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr)  { -	struct list_head *head = &ai->ai_ail2_list; +	struct list_head *head = &tr->tr_ail2_list;  	struct gfs2_bufdata *bd;  	while (!list_empty(head)) {  		bd = list_entry(head->prev, struct gfs2_bufdata,  				bd_ail_st_list); -		gfs2_assert(sdp, bd->bd_ail == ai); +		gfs2_assert(sdp, bd->bd_tr == tr);  		gfs2_remove_from_ail(bd);  	}  }  static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)  { -	struct gfs2_ail *ai, *safe; +	struct gfs2_trans *tr, *safe;  	unsigned int old_tail = sdp->sd_log_tail;  	int wrap = (new_tail < old_tail);  	int a, b, rm; -	gfs2_log_lock(sdp); +	spin_lock(&sdp->sd_ail_lock); -	list_for_each_entry_safe(ai, safe, &sdp->sd_ail2_list, ai_list) { -		a = (old_tail <= ai->ai_first); -		b = (ai->ai_first < new_tail); +	list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) { +		a = (old_tail <= tr->tr_first); +		b = (tr->tr_first < new_tail);  		rm = (wrap) ? (a || b) : (a && b);  		if (!rm)  			continue; -		gfs2_ail2_empty_one(sdp, ai); -		list_del(&ai->ai_list); -		gfs2_assert_warn(sdp, list_empty(&ai->ai_ail1_list)); -		gfs2_assert_warn(sdp, list_empty(&ai->ai_ail2_list)); -		kfree(ai); +		gfs2_ail2_empty_one(sdp, tr); +		list_del(&tr->tr_list); +		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); +		gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); +		kfree(tr);  	} -	gfs2_log_unlock(sdp); +	spin_unlock(&sdp->sd_ail_lock); +} + +/** + * gfs2_log_release - Release a given number of log blocks + * @sdp: The GFS2 superblock + * @blks: The number of blocks + * + */ + +void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) +{ + +	atomic_add(blks, &sdp->sd_log_blks_free); +	trace_gfs2_log_blocks(sdp, blks); +	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= +				  sdp->sd_jdesc->jd_blocks); +	up_read(&sdp->sd_log_flush_lock);  }  /** @@ -288,7 +339,7 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)  int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)  { -	unsigned reserved_blks = 6 * (4096 / sdp->sd_vfs->s_blocksize); +	unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);  	unsigned wanted = blks + reserved_blks;  	DEFINE_WAIT(wait);  	int did_wait = 0; @@ -324,20 +375,11 @@ retry:  		wake_up(&sdp->sd_log_waitq);  	down_read(&sdp->sd_log_flush_lock); - -	return 0; -} - -static u64 log_bmap(struct gfs2_sbd *sdp, unsigned int lbn) -{ -	struct gfs2_journal_extent *je; - -	list_for_each_entry(je, &sdp->sd_jdesc->extent_list, extent_list) { -		if (lbn >= je->lblock && lbn < je->lblock + je->blocks) -			return je->dblock + lbn - je->lblock; +	if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { +		gfs2_log_release(sdp, blks); +		return -EROFS;  	} - -	return -1; +	return 0;  }  /** @@ -392,24 +434,22 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer  static unsigned int calc_reserved(struct gfs2_sbd *sdp)  {  	unsigned int reserved = 0; -	unsigned int mbuf_limit, metabufhdrs_needed; -	unsigned int dbuf_limit, databufhdrs_needed; -	unsigned int revokes = 0; - -	mbuf_limit = buf_limit(sdp); -	metabufhdrs_needed = (sdp->sd_log_commited_buf + -			      (mbuf_limit - 1)) / mbuf_limit; -	dbuf_limit = databuf_limit(sdp); -	databufhdrs_needed = (sdp->sd_log_commited_databuf + -			      (dbuf_limit - 1)) / dbuf_limit; +	unsigned int mbuf; +	unsigned int dbuf; +	struct gfs2_trans *tr = sdp->sd_log_tr; + +	if (tr) { +		mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; +		dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; +		reserved = mbuf + dbuf; +		/* Account for header blocks */ +		reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); +		reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); +	}  	if (sdp->sd_log_commited_revoke > 0) -		revokes = gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke, +		reserved += gfs2_struct2blk(sdp, sdp->sd_log_commited_revoke,  					  sizeof(u64)); - -	reserved = sdp->sd_log_commited_buf + metabufhdrs_needed + -		sdp->sd_log_commited_databuf + databufhdrs_needed + -		revokes;  	/* One for the overall header */  	if (reserved)  		reserved++; @@ -418,139 +458,184 @@ static unsigned int calc_reserved(struct gfs2_sbd *sdp)  static unsigned int current_tail(struct gfs2_sbd *sdp)  { -	struct gfs2_ail *ai; +	struct gfs2_trans *tr;  	unsigned int tail; -	gfs2_log_lock(sdp); +	spin_lock(&sdp->sd_ail_lock);  	if (list_empty(&sdp->sd_ail1_list)) {  		tail = sdp->sd_log_head;  	} else { -		ai = list_entry(sdp->sd_ail1_list.prev, struct gfs2_ail, ai_list); -		tail = ai->ai_first; +		tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans, +				tr_list); +		tail = tr->tr_first;  	} -	gfs2_log_unlock(sdp); +	spin_unlock(&sdp->sd_ail_lock);  	return tail;  } -void gfs2_log_incr_head(struct gfs2_sbd *sdp) +static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)  { -	if (sdp->sd_log_flush_head == sdp->sd_log_tail) -		BUG_ON(sdp->sd_log_flush_head != sdp->sd_log_head); +	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); -	if (++sdp->sd_log_flush_head == sdp->sd_jdesc->jd_blocks) { -		sdp->sd_log_flush_head = 0; -		sdp->sd_log_flush_wrapped = 1; -	} +	ail2_empty(sdp, new_tail); + +	atomic_add(dist, &sdp->sd_log_blks_free); +	trace_gfs2_log_blocks(sdp, dist); +	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= +			     sdp->sd_jdesc->jd_blocks); + +	sdp->sd_log_tail = new_tail;  } -/** - * gfs2_log_write_endio - End of I/O for a log buffer - * @bh: The buffer head - * @uptodate: I/O Status - * - */ -static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) +static void log_flush_wait(struct gfs2_sbd *sdp)  { -	struct gfs2_sbd *sdp = bh->b_private; -	bh->b_private = NULL; +	DEFINE_WAIT(wait); -	end_buffer_write_sync(bh, uptodate); -	if (atomic_dec_and_test(&sdp->sd_log_in_flight)) -		wake_up(&sdp->sd_log_flush_wait); +	if (atomic_read(&sdp->sd_log_in_flight)) { +		do { +			prepare_to_wait(&sdp->sd_log_flush_wait, &wait, +					TASK_UNINTERRUPTIBLE); +			if (atomic_read(&sdp->sd_log_in_flight)) +				io_schedule(); +		} while(atomic_read(&sdp->sd_log_in_flight)); +		finish_wait(&sdp->sd_log_flush_wait, &wait); +	}  } -/** - * gfs2_log_get_buf - Get and initialize a buffer to use for log control data - * @sdp: The GFS2 superblock - * - * Returns: the buffer_head - */ - -struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) +static int ip_cmp(void *priv, struct list_head *a, struct list_head *b)  { -	u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); -	struct buffer_head *bh; +	struct gfs2_inode *ipa, *ipb; -	bh = sb_getblk(sdp->sd_vfs, blkno); -	lock_buffer(bh); -	memset(bh->b_data, 0, bh->b_size); -	set_buffer_uptodate(bh); -	clear_buffer_dirty(bh); -	gfs2_log_incr_head(sdp); -	atomic_inc(&sdp->sd_log_in_flight); -	bh->b_private = sdp; -	bh->b_end_io = gfs2_log_write_endio; +	ipa = list_entry(a, struct gfs2_inode, i_ordered); +	ipb = list_entry(b, struct gfs2_inode, i_ordered); -	return bh; +	if (ipa->i_no_addr < ipb->i_no_addr) +		return -1; +	if (ipa->i_no_addr > ipb->i_no_addr) +		return 1; +	return 0;  } -/** - * gfs2_fake_write_endio -  - * @bh: The buffer head - * @uptodate: The I/O Status - * - */ - -static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) +static void gfs2_ordered_write(struct gfs2_sbd *sdp)  { -	struct buffer_head *real_bh = bh->b_private; -	struct gfs2_bufdata *bd = real_bh->b_private; -	struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; +	struct gfs2_inode *ip; +	LIST_HEAD(written); -	end_buffer_write_sync(bh, uptodate); -	free_buffer_head(bh); -	unlock_buffer(real_bh); -	brelse(real_bh); -	if (atomic_dec_and_test(&sdp->sd_log_in_flight)) -		wake_up(&sdp->sd_log_flush_wait); +	spin_lock(&sdp->sd_ordered_lock); +	list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp); +	while (!list_empty(&sdp->sd_log_le_ordered)) { +		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered); +		list_move(&ip->i_ordered, &written); +		if (ip->i_inode.i_mapping->nrpages == 0) +			continue; +		spin_unlock(&sdp->sd_ordered_lock); +		filemap_fdatawrite(ip->i_inode.i_mapping); +		spin_lock(&sdp->sd_ordered_lock); +	} +	list_splice(&written, &sdp->sd_log_le_ordered); +	spin_unlock(&sdp->sd_ordered_lock);  } -/** - * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log - * @sdp: the filesystem - * @data: the data the buffer_head should point to - * - * Returns: the log buffer descriptor - */ - -struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, -				      struct buffer_head *real) +static void gfs2_ordered_wait(struct gfs2_sbd *sdp)  { -	u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); -	struct buffer_head *bh; +	struct gfs2_inode *ip; -	bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); -	atomic_set(&bh->b_count, 1); -	bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); -	set_bh_page(bh, real->b_page, bh_offset(real)); -	bh->b_blocknr = blkno; -	bh->b_size = sdp->sd_sb.sb_bsize; -	bh->b_bdev = sdp->sd_vfs->s_bdev; -	bh->b_private = real; -	bh->b_end_io = gfs2_fake_write_endio; +	spin_lock(&sdp->sd_ordered_lock); +	while (!list_empty(&sdp->sd_log_le_ordered)) { +		ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, i_ordered); +		list_del(&ip->i_ordered); +		WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags)); +		if (ip->i_inode.i_mapping->nrpages == 0) +			continue; +		spin_unlock(&sdp->sd_ordered_lock); +		filemap_fdatawait(ip->i_inode.i_mapping); +		spin_lock(&sdp->sd_ordered_lock); +	} +	spin_unlock(&sdp->sd_ordered_lock); +} -	gfs2_log_incr_head(sdp); -	atomic_inc(&sdp->sd_log_in_flight); +void gfs2_ordered_del_inode(struct gfs2_inode *ip) +{ +	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); -	return bh; +	spin_lock(&sdp->sd_ordered_lock); +	if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) +		list_del(&ip->i_ordered); +	spin_unlock(&sdp->sd_ordered_lock);  } -static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) +void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)  { -	unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); +	struct buffer_head *bh = bd->bd_bh; +	struct gfs2_glock *gl = bd->bd_gl; -	ail2_empty(sdp, new_tail); - -	atomic_add(dist, &sdp->sd_log_blks_free); -	trace_gfs2_log_blocks(sdp, dist); -	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= -			     sdp->sd_jdesc->jd_blocks); +	bh->b_private = NULL; +	bd->bd_blkno = bh->b_blocknr; +	gfs2_remove_from_ail(bd); /* drops ref on bh */ +	bd->bd_bh = NULL; +	bd->bd_ops = &gfs2_revoke_lops; +	sdp->sd_log_num_revoke++; +	atomic_inc(&gl->gl_revokes); +	set_bit(GLF_LFLUSH, &gl->gl_flags); +	list_add(&bd->bd_list, &sdp->sd_log_le_revoke); +} + +void gfs2_write_revokes(struct gfs2_sbd *sdp) +{ +	struct gfs2_trans *tr; +	struct gfs2_bufdata *bd, *tmp; +	int have_revokes = 0; +	int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); + +	gfs2_ail1_empty(sdp); +	spin_lock(&sdp->sd_ail_lock); +	list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { +		list_for_each_entry(bd, &tr->tr_ail2_list, bd_ail_st_list) { +			if (list_empty(&bd->bd_list)) { +				have_revokes = 1; +				goto done; +			} +		} +	} +done: +	spin_unlock(&sdp->sd_ail_lock); +	if (have_revokes == 0) +		return; +	while (sdp->sd_log_num_revoke > max_revokes) +		max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); +	max_revokes -= sdp->sd_log_num_revoke; +	if (!sdp->sd_log_num_revoke) { +		atomic_dec(&sdp->sd_log_blks_free); +		/* If no blocks have been reserved, we need to also +		 * reserve a block for the header */ +		if (!sdp->sd_log_blks_reserved) +			atomic_dec(&sdp->sd_log_blks_free); +	} +	gfs2_log_lock(sdp); +	spin_lock(&sdp->sd_ail_lock); +	list_for_each_entry(tr, &sdp->sd_ail1_list, tr_list) { +		list_for_each_entry_safe(bd, tmp, &tr->tr_ail2_list, bd_ail_st_list) { +			if (max_revokes == 0) +				goto out_of_blocks; +			if (!list_empty(&bd->bd_list)) +				continue; +			gfs2_add_revoke(sdp, bd); +			max_revokes--; +		} +	} +out_of_blocks: +	spin_unlock(&sdp->sd_ail_lock); +	gfs2_log_unlock(sdp); -	sdp->sd_log_tail = new_tail; +	if (!sdp->sd_log_num_revoke) { +		atomic_inc(&sdp->sd_log_blks_free); +		if (!sdp->sd_log_blks_reserved) +			atomic_inc(&sdp->sd_log_blks_free); +	}  }  /** @@ -560,25 +645,18 @@ static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)   * Returns: the initialized log buffer descriptor   */ -static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull) +static void log_write_header(struct gfs2_sbd *sdp, u32 flags)  { -	u64 blkno = log_bmap(sdp, sdp->sd_log_flush_head); -	struct buffer_head *bh;  	struct gfs2_log_header *lh;  	unsigned int tail;  	u32 hash; +	int rw = WRITE_FLUSH_FUA | REQ_META; +	struct page *page = mempool_alloc(gfs2_page_pool, GFP_NOIO); +	lh = page_address(page); +	clear_page(lh); -	bh = sb_getblk(sdp->sd_vfs, blkno); -	lock_buffer(bh); -	memset(bh->b_data, 0, bh->b_size); -	set_buffer_uptodate(bh); -	clear_buffer_dirty(bh); - -	gfs2_ail1_empty(sdp, 0);  	tail = current_tail(sdp); -	lh = (struct gfs2_log_header *)bh->b_data; -	memset(lh, 0, sizeof(struct gfs2_log_header));  	lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC);  	lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH);  	lh->lh_header.__pad0 = cpu_to_be64(0); @@ -588,96 +666,22 @@ static void log_write_header(struct gfs2_sbd *sdp, u32 flags, int pull)  	lh->lh_flags = cpu_to_be32(flags);  	lh->lh_tail = cpu_to_be32(tail);  	lh->lh_blkno = cpu_to_be32(sdp->sd_log_flush_head); -	hash = gfs2_disk_hash(bh->b_data, sizeof(struct gfs2_log_header)); +	hash = gfs2_disk_hash(page_address(page), sizeof(struct gfs2_log_header));  	lh->lh_hash = cpu_to_be32(hash); -	bh->b_end_io = end_buffer_write_sync; -	get_bh(bh); -	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) -		submit_bh(WRITE_SYNC | REQ_META, bh); -	else -		submit_bh(WRITE_FLUSH_FUA | REQ_META, bh); -	wait_on_buffer(bh); +	if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { +		gfs2_ordered_wait(sdp); +		log_flush_wait(sdp); +		rw = WRITE_SYNC | REQ_META | REQ_PRIO; +	} -	if (!buffer_uptodate(bh)) -		gfs2_io_error_bh(sdp, bh); -	brelse(bh); +	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); +	gfs2_log_write_page(sdp, page); +	gfs2_log_flush_bio(sdp, rw); +	log_flush_wait(sdp);  	if (sdp->sd_log_tail != tail)  		log_pull_tail(sdp, tail); -	else -		gfs2_assert_withdraw(sdp, !pull); - -	sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); -	gfs2_log_incr_head(sdp); -} - -static void log_flush_commit(struct gfs2_sbd *sdp) -{ -	DEFINE_WAIT(wait); - -	if (atomic_read(&sdp->sd_log_in_flight)) { -		do { -			prepare_to_wait(&sdp->sd_log_flush_wait, &wait, -					TASK_UNINTERRUPTIBLE); -			if (atomic_read(&sdp->sd_log_in_flight)) -				io_schedule(); -		} while(atomic_read(&sdp->sd_log_in_flight)); -		finish_wait(&sdp->sd_log_flush_wait, &wait); -	} - -	log_write_header(sdp, 0, 0); -} - -static void gfs2_ordered_write(struct gfs2_sbd *sdp) -{ -	struct gfs2_bufdata *bd; -	struct buffer_head *bh; -	LIST_HEAD(written); - -	gfs2_log_lock(sdp); -	while (!list_empty(&sdp->sd_log_le_ordered)) { -		bd = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_bufdata, bd_le.le_list); -		list_move(&bd->bd_le.le_list, &written); -		bh = bd->bd_bh; -		if (!buffer_dirty(bh)) -			continue; -		get_bh(bh); -		gfs2_log_unlock(sdp); -		lock_buffer(bh); -		if (buffer_mapped(bh) && test_clear_buffer_dirty(bh)) { -			bh->b_end_io = end_buffer_write_sync; -			submit_bh(WRITE_SYNC_PLUG, bh); -		} else { -			unlock_buffer(bh); -			brelse(bh); -		} -		gfs2_log_lock(sdp); -	} -	list_splice(&written, &sdp->sd_log_le_ordered); -	gfs2_log_unlock(sdp); -} - -static void gfs2_ordered_wait(struct gfs2_sbd *sdp) -{ -	struct gfs2_bufdata *bd; -	struct buffer_head *bh; - -	gfs2_log_lock(sdp); -	while (!list_empty(&sdp->sd_log_le_ordered)) { -		bd = list_entry(sdp->sd_log_le_ordered.prev, struct gfs2_bufdata, bd_le.le_list); -		bh = bd->bd_bh; -		if (buffer_locked(bh)) { -			get_bh(bh); -			gfs2_log_unlock(sdp); -			wait_on_buffer(bh); -			brelse(bh); -			gfs2_log_lock(sdp); -			continue; -		} -		list_del_init(&bd->bd_le.le_list); -	} -	gfs2_log_unlock(sdp);  }  /** @@ -687,9 +691,10 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)   *   */ -void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl) +void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, +		    enum gfs2_flush_type type)  { -	struct gfs2_ail *ai; +	struct gfs2_trans *tr;  	down_write(&sdp->sd_log_flush_lock); @@ -700,76 +705,129 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl)  	}  	trace_gfs2_log_flush(sdp, 1); -	ai = kzalloc(sizeof(struct gfs2_ail), GFP_NOFS | __GFP_NOFAIL); -	INIT_LIST_HEAD(&ai->ai_ail1_list); -	INIT_LIST_HEAD(&ai->ai_ail2_list); - -	if (sdp->sd_log_num_buf != sdp->sd_log_commited_buf) { -		printk(KERN_INFO "GFS2: log buf %u %u\n", sdp->sd_log_num_buf, -		       sdp->sd_log_commited_buf); -		gfs2_assert_withdraw(sdp, 0); -	} -	if (sdp->sd_log_num_databuf != sdp->sd_log_commited_databuf) { -		printk(KERN_INFO "GFS2: log databuf %u %u\n", -		       sdp->sd_log_num_databuf, sdp->sd_log_commited_databuf); -		gfs2_assert_withdraw(sdp, 0); +	sdp->sd_log_flush_head = sdp->sd_log_head; +	sdp->sd_log_flush_wrapped = 0; +	tr = sdp->sd_log_tr; +	if (tr) { +		sdp->sd_log_tr = NULL; +		INIT_LIST_HEAD(&tr->tr_ail1_list); +		INIT_LIST_HEAD(&tr->tr_ail2_list); +		tr->tr_first = sdp->sd_log_flush_head;  	} +  	gfs2_assert_withdraw(sdp,  			sdp->sd_log_num_revoke == sdp->sd_log_commited_revoke); -	sdp->sd_log_flush_head = sdp->sd_log_head; -	sdp->sd_log_flush_wrapped = 0; -	ai->ai_first = sdp->sd_log_flush_head; -  	gfs2_ordered_write(sdp); -	lops_before_commit(sdp); -	gfs2_ordered_wait(sdp); +	lops_before_commit(sdp, tr); +	gfs2_log_flush_bio(sdp, WRITE); -	if (sdp->sd_log_head != sdp->sd_log_flush_head) -		log_flush_commit(sdp); -	else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ -		gfs2_log_lock(sdp); +	if (sdp->sd_log_head != sdp->sd_log_flush_head) { +		log_flush_wait(sdp); +		log_write_header(sdp, 0); +	} else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){  		atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */  		trace_gfs2_log_blocks(sdp, -1); -		gfs2_log_unlock(sdp); -		log_write_header(sdp, 0, PULL); +		log_write_header(sdp, 0);  	} -	lops_after_commit(sdp, ai); +	lops_after_commit(sdp, tr);  	gfs2_log_lock(sdp);  	sdp->sd_log_head = sdp->sd_log_flush_head;  	sdp->sd_log_blks_reserved = 0; -	sdp->sd_log_commited_buf = 0; -	sdp->sd_log_commited_databuf = 0;  	sdp->sd_log_commited_revoke = 0; -	if (!list_empty(&ai->ai_ail1_list)) { -		list_add(&ai->ai_list, &sdp->sd_ail1_list); -		ai = NULL; +	spin_lock(&sdp->sd_ail_lock); +	if (tr && !list_empty(&tr->tr_ail1_list)) { +		list_add(&tr->tr_list, &sdp->sd_ail1_list); +		tr = NULL;  	} +	spin_unlock(&sdp->sd_ail_lock);  	gfs2_log_unlock(sdp); + +	if (atomic_read(&sdp->sd_log_freeze)) +		type = FREEZE_FLUSH; +	if (type != NORMAL_FLUSH) { +		if (!sdp->sd_log_idle) { +			for (;;) { +				gfs2_ail1_start(sdp); +				gfs2_ail1_wait(sdp); +				if (gfs2_ail1_empty(sdp)) +					break; +			} +			atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ +			trace_gfs2_log_blocks(sdp, -1); +			sdp->sd_log_flush_wrapped = 0; +			log_write_header(sdp, 0); +			sdp->sd_log_head = sdp->sd_log_flush_head; +		} +		if (type == SHUTDOWN_FLUSH || type == FREEZE_FLUSH) +			gfs2_log_shutdown(sdp); +		if (type == FREEZE_FLUSH) { +			int error; + +			atomic_set(&sdp->sd_log_freeze, 0); +			wake_up(&sdp->sd_log_frozen_wait); +			error = gfs2_glock_nq_init(sdp->sd_freeze_gl, +						   LM_ST_SHARED, 0, +						   &sdp->sd_thaw_gh); +			if (error) { +				printk(KERN_INFO "GFS2: couln't get freeze lock : %d\n", error); +				gfs2_assert_withdraw(sdp, 0); +			} +			else +				gfs2_glock_dq_uninit(&sdp->sd_thaw_gh); +		} +	} +  	trace_gfs2_log_flush(sdp, 0);  	up_write(&sdp->sd_log_flush_lock); -	kfree(ai); +	kfree(tr); +} + +/** + * gfs2_merge_trans - Merge a new transaction into a cached transaction + * @old: Original transaction to be expanded + * @new: New transaction to be merged + */ + +static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) +{ +	WARN_ON_ONCE(old->tr_attached != 1); + +	old->tr_num_buf_new	+= new->tr_num_buf_new; +	old->tr_num_databuf_new	+= new->tr_num_databuf_new; +	old->tr_num_buf_rm	+= new->tr_num_buf_rm; +	old->tr_num_databuf_rm	+= new->tr_num_databuf_rm; +	old->tr_num_revoke	+= new->tr_num_revoke; +	old->tr_num_revoke_rm	+= new->tr_num_revoke_rm; + +	list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); +	list_splice_tail_init(&new->tr_buf, &old->tr_buf);  }  static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)  {  	unsigned int reserved;  	unsigned int unused; +	unsigned int maxres;  	gfs2_log_lock(sdp); -	sdp->sd_log_commited_buf += tr->tr_num_buf_new - tr->tr_num_buf_rm; -	sdp->sd_log_commited_databuf += tr->tr_num_databuf_new - -		tr->tr_num_databuf_rm; -	gfs2_assert_withdraw(sdp, (((int)sdp->sd_log_commited_buf) >= 0) || -			     (((int)sdp->sd_log_commited_databuf) >= 0)); +	if (sdp->sd_log_tr) { +		gfs2_merge_trans(sdp->sd_log_tr, tr); +	} else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { +		gfs2_assert_withdraw(sdp, tr->tr_alloced); +		sdp->sd_log_tr = tr; +		tr->tr_attached = 1; +	} +  	sdp->sd_log_commited_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;  	reserved = calc_reserved(sdp); -	gfs2_assert_withdraw(sdp, sdp->sd_log_blks_reserved + tr->tr_reserved >= reserved); -	unused = sdp->sd_log_blks_reserved - reserved + tr->tr_reserved; +	maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; +	gfs2_assert_withdraw(sdp, maxres >= reserved); +	unused = maxres - reserved;  	atomic_add(unused, &sdp->sd_log_blks_free);  	trace_gfs2_log_blocks(sdp, unused);  	gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= @@ -779,21 +837,6 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)  	gfs2_log_unlock(sdp);  } -static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) -{ -	struct list_head *head = &tr->tr_list_buf; -	struct gfs2_bufdata *bd; - -	gfs2_log_lock(sdp); -	while (!list_empty(head)) { -		bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); -		list_del_init(&bd->bd_list_tr); -		tr->tr_num_buf--; -	} -	gfs2_log_unlock(sdp); -	gfs2_assert_warn(sdp, !tr->tr_num_buf); -} -  /**   * gfs2_log_commit - Commit a transaction to the log   * @sdp: the filesystem @@ -812,9 +855,6 @@ static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)  void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)  {  	log_refund(sdp, tr); -	buf_lo_incore_commit(sdp, tr); - -	up_read(&sdp->sd_log_flush_lock);  	if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) ||  	    ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) > @@ -830,52 +870,25 @@ void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)  void gfs2_log_shutdown(struct gfs2_sbd *sdp)  { -	down_write(&sdp->sd_log_flush_lock); -  	gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); -	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_buf);  	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); -	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_rg); -	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_databuf);  	gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));  	sdp->sd_log_flush_head = sdp->sd_log_head;  	sdp->sd_log_flush_wrapped = 0; -	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT, -			 (sdp->sd_log_tail == current_tail(sdp)) ? 0 : PULL); +	log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT); -	gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks);  	gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);  	gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));  	sdp->sd_log_head = sdp->sd_log_flush_head;  	sdp->sd_log_tail = sdp->sd_log_head; - -	up_write(&sdp->sd_log_flush_lock); -} - - -/** - * gfs2_meta_syncfs - sync all the buffers in a filesystem - * @sdp: the filesystem - * - */ - -void gfs2_meta_syncfs(struct gfs2_sbd *sdp) -{ -	gfs2_log_flush(sdp, NULL); -	for (;;) { -		gfs2_ail1_start(sdp); -		if (gfs2_ail1_empty(sdp, DIO_ALL)) -			break; -		msleep(10); -	}  }  static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)  { -	return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1)); +	return (atomic_read(&sdp->sd_log_pinned) >= atomic_read(&sdp->sd_log_thresh1) || atomic_read(&sdp->sd_log_freeze));  }  static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) @@ -897,29 +910,27 @@ int gfs2_logd(void *data)  	struct gfs2_sbd *sdp = data;  	unsigned long t = 1;  	DEFINE_WAIT(wait); -	unsigned preflush;  	while (!kthread_should_stop()) { -		preflush = atomic_read(&sdp->sd_log_pinned);  		if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { -			gfs2_ail1_empty(sdp, DIO_ALL); -			gfs2_log_flush(sdp, NULL); -			gfs2_ail1_empty(sdp, DIO_ALL); +			gfs2_ail1_empty(sdp); +			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);  		}  		if (gfs2_ail_flush_reqd(sdp)) {  			gfs2_ail1_start(sdp); -			io_schedule(); -			gfs2_ail1_empty(sdp, 0); -			gfs2_log_flush(sdp, NULL); -			gfs2_ail1_empty(sdp, DIO_ALL); +			gfs2_ail1_wait(sdp); +			gfs2_ail1_empty(sdp); +			gfs2_log_flush(sdp, NULL, NORMAL_FLUSH);  		} -		wake_up(&sdp->sd_log_waitq); +		if (!gfs2_ail_flush_reqd(sdp)) +			wake_up(&sdp->sd_log_waitq); +  		t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; -		if (freezing(current)) -			refrigerator(); + +		try_to_freeze();  		do {  			prepare_to_wait(&sdp->sd_logd_waitq, &wait,  | 
