diff options
Diffstat (limited to 'fs/ext4/fsync.c')
| -rw-r--r-- | fs/ext4/fsync.c | 180 | 
1 files changed, 56 insertions, 124 deletions
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index c1a7bc923cf..a8bc47f75fa 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c @@ -34,89 +34,6 @@  #include <trace/events/ext4.h> -static void dump_completed_IO(struct inode * inode) -{ -#ifdef	EXT4_DEBUG -	struct list_head *cur, *before, *after; -	ext4_io_end_t *io, *io0, *io1; -	unsigned long flags; - -	if (list_empty(&EXT4_I(inode)->i_completed_io_list)){ -		ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino); -		return; -	} - -	ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino); -	spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); -	list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){ -		cur = &io->list; -		before = cur->prev; -		io0 = container_of(before, ext4_io_end_t, list); -		after = cur->next; -		io1 = container_of(after, ext4_io_end_t, list); - -		ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n", -			    io, inode->i_ino, io0, io1); -	} -	spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags); -#endif -} - -/* - * This function is called from ext4_sync_file(). - * - * When IO is completed, the work to convert unwritten extents to - * written is queued on workqueue but may not get immediately - * scheduled. When fsync is called, we need to ensure the - * conversion is complete before fsync returns. - * The inode keeps track of a list of pending/completed IO that - * might needs to do the conversion. This function walks through - * the list and convert the related unwritten extents for completed IO - * to written. - * The function return the number of pending IOs on success. - */ -static int flush_completed_IO(struct inode *inode) -{ -	ext4_io_end_t *io; -	struct ext4_inode_info *ei = EXT4_I(inode); -	unsigned long flags; -	int ret = 0; -	int ret2 = 0; - -	if (list_empty(&ei->i_completed_io_list)) -		return ret; - -	dump_completed_IO(inode); -	spin_lock_irqsave(&ei->i_completed_io_lock, flags); -	while (!list_empty(&ei->i_completed_io_list)){ -		io = list_entry(ei->i_completed_io_list.next, -				ext4_io_end_t, list); -		/* -		 * Calling ext4_end_io_nolock() to convert completed -		 * IO to written. -		 * -		 * When ext4_sync_file() is called, run_queue() may already -		 * about to flush the work corresponding to this io structure. -		 * It will be upset if it founds the io structure related -		 * to the work-to-be schedule is freed. -		 * -		 * Thus we need to keep the io structure still valid here after -		 * convertion finished. The io structure has a flag to -		 * avoid double converting from both fsync and background work -		 * queue work. -		 */ -		spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); -		ret = ext4_end_io_nolock(io); -		spin_lock_irqsave(&ei->i_completed_io_lock, flags); -		if (ret < 0) -			ret2 = ret; -		else -			list_del_init(&io->list); -	} -	spin_unlock_irqrestore(&ei->i_completed_io_lock, flags); -	return (ret2 < 0) ? ret2 : 0; -} -  /*   * If we're not journaling and this is a just-created file, we have to   * sync our parent directory (if it was freshly created) since @@ -125,19 +42,35 @@ static int flush_completed_IO(struct inode *inode)   * the parent directory's parent as well, and so on recursively, if   * they are also freshly created.   */ -static void ext4_sync_parent(struct inode *inode) +static int ext4_sync_parent(struct inode *inode)  {  	struct dentry *dentry = NULL; +	struct inode *next; +	int ret = 0; -	while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { +	if (!ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) +		return 0; +	inode = igrab(inode); +	while (ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) {  		ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); -		dentry = list_entry(inode->i_dentry.next, -				    struct dentry, d_alias); -		if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) +		dentry = d_find_any_alias(inode); +		if (!dentry) +			break; +		next = igrab(dentry->d_parent->d_inode); +		dput(dentry); +		if (!next) +			break; +		iput(inode); +		inode = next; +		ret = sync_mapping_buffers(inode->i_mapping); +		if (ret) +			break; +		ret = sync_inode_metadata(inode, 1); +		if (ret)  			break; -		inode = dentry->d_parent->d_inode; -		sync_mapping_buffers(inode->i_mapping);  	} +	iput(inode); +	return ret;  }  /* @@ -150,36 +83,39 @@ static void ext4_sync_parent(struct inode *inode)   *   * What we do is just kick off a commit and wait on it.  This will snapshot the   * inode to disk. - * - * i_mutex lock is held when entering and exiting this function   */ -int ext4_sync_file(struct file *file, int datasync) +int ext4_sync_file(struct file *file, loff_t start, loff_t end, int datasync)  {  	struct inode *inode = file->f_mapping->host;  	struct ext4_inode_info *ei = EXT4_I(inode);  	journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; -	int ret; +	int ret = 0, err;  	tid_t commit_tid; +	bool needs_barrier = false;  	J_ASSERT(ext4_journal_current_handle() == NULL); -	trace_ext4_sync_file(file, datasync); +	trace_ext4_sync_file_enter(file, datasync); -	if (inode->i_sb->s_flags & MS_RDONLY) -		return 0; - -	ret = flush_completed_IO(inode); -	if (ret < 0) -		return ret; +	if (inode->i_sb->s_flags & MS_RDONLY) { +		/* Make sure that we read updated s_mount_flags value */ +		smp_rmb(); +		if (EXT4_SB(inode->i_sb)->s_mount_flags & EXT4_MF_FS_ABORTED) +			ret = -EROFS; +		goto out; +	}  	if (!journal) { -		ret = generic_file_fsync(file, datasync); -		if (!ret && !list_empty(&inode->i_dentry)) -			ext4_sync_parent(inode); -		return ret; +		ret = generic_file_fsync(file, start, end, datasync); +		if (!ret && !hlist_empty(&inode->i_dentry)) +			ret = ext4_sync_parent(inode); +		goto out;  	} +	ret = filemap_write_and_wait_range(inode->i_mapping, start, end); +	if (ret) +		return ret;  	/*  	 * data=writeback,ordered:  	 *  The caller's filemap_fdatawrite()/wait will sync the data. @@ -194,26 +130,22 @@ int ext4_sync_file(struct file *file, int datasync)  	 *  (they were dirtied by commit).  But that's OK - the blocks are  	 *  safe in-journal, which is all fsync() needs to ensure.  	 */ -	if (ext4_should_journal_data(inode)) -		return ext4_force_commit(inode->i_sb); +	if (ext4_should_journal_data(inode)) { +		ret = ext4_force_commit(inode->i_sb); +		goto out; +	}  	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; -	if (jbd2_log_start_commit(journal, commit_tid)) { -		/* -		 * When the journal is on a different device than the -		 * fs data disk, we need to issue the barrier in -		 * writeback mode.  (In ordered mode, the jbd2 layer -		 * will take care of issuing the barrier.  In -		 * data=journal, all of the data blocks are written to -		 * the journal device.) -		 */ -		if (ext4_should_writeback_data(inode) && -		    (journal->j_fs_dev != journal->j_dev) && -		    (journal->j_flags & JBD2_BARRIER)) -			blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, -					NULL); -		ret = jbd2_log_wait_commit(journal, commit_tid); -	} else if (journal->j_flags & JBD2_BARRIER) -		blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); +	if (journal->j_flags & JBD2_BARRIER && +	    !jbd2_trans_will_send_data_barrier(journal, commit_tid)) +		needs_barrier = true; +	ret = jbd2_complete_transaction(journal, commit_tid); +	if (needs_barrier) { +		err = blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); +		if (!ret) +			ret = err; +	} +out: +	trace_ext4_sync_file_exit(inode, ret);  	return ret;  }  | 
