diff options
author | Josef Bacik <josef@redhat.com> | 2011-07-16 20:44:56 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2011-07-20 20:47:59 -0400 |
commit | 02c24a82187d5a628c68edfe71ae60dc135cd178 (patch) | |
tree | c8dbaba4d82e2b20ed4335910a564a1f7d90fcf6 | |
parent | 22735068d53c7115e384bc88dea95b17e76a6839 (diff) |
fs: push i_mutex and filemap_write_and_wait down into ->fsync() handlers
Btrfs needs to be able to control how filemap_write_and_wait_range() is called
in fsync to make it less of a painful operation, so push down taking i_mutex and
the calling of filemap_write_and_wait() down into the ->fsync() handlers. Some
file systems can drop taking the i_mutex altogether it seems, like ext3 and
ocfs2. For correctness sake I just pushed everything down in all cases to make
sure that we keep the current behavior the same for everybody, and then each
individual fs maintainer can make up their mind about what to do from there.
Thanks,
Acked-by: Jan Kara <jack@suse.cz>
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
71 files changed, 462 insertions, 164 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 9b6ed7c9f34..ca7e2529254 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -412,7 +412,7 @@ prototypes: int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); @@ -438,9 +438,7 @@ prototypes: locking rules: All may block except for ->setlease. - No VFS locks held on entry except for ->fsync and ->setlease. - -->fsync() has i_mutex on inode. + No VFS locks held on entry except for ->setlease. ->setlease has the file_list_lock held and must not sleep. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 6b96773e27c..7f8861d341e 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -421,3 +421,10 @@ data and there is a virtual hole at the end of the file. So if the provided offset is less than i_size and SEEK_DATA is specified, return the same offset. If the above is true for the offset and you are given SEEK_HOLE, return the end of the file. If the offset is i_size or greater return -ENXIO in either case. + +[mandatory] + If you have your own ->fsync() you must make sure to call +filemap_write_and_wait_range() so that all dirty pages are synced out properly. +You must also keep in mind that ->fsync() is not called with i_mutex held +anymore, so if you require i_mutex locking you must make sure to take it and +release it yourself. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 6bf85b78cfe..eff6617c9a0 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -777,7 +777,7 @@ struct file_operations { int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 3c7c3f82d84..fb59c46e9e9 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1850,9 +1850,16 @@ out: return ret; } -static int spufs_mfc_fsync(struct file *file, int datasync) +static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - return spufs_mfc_flush(file, NULL); + struct inode *inode = file->f_path.dentry->d_inode; + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (!err) { + mutex_lock(&inode->i_mutex); + err = spufs_mfc_flush(file, NULL); + mutex_unlock(&inode->i_mutex); + } + return err; } static int spufs_mfc_fasync(int fd, struct file *file, int on) diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 5a06787e5be..d0c57c2e290 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -309,9 +309,14 @@ static int ps3flash_flush(struct file *file, fl_owner_t id) return ps3flash_writeback(ps3flash_dev); } -static int ps3flash_fsync(struct file *file, int datasync) +static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - return ps3flash_writeback(ps3flash_dev); + struct inode *inode = file->f_path.dentry->d_inode; + int err; + mutex_lock(&inode->i_mutex); + err = ps3flash_writeback(ps3flash_dev); + mutex_unlock(&inode->i_mutex); + return err; } static irqreturn_t ps3flash_interrupt(int irq, void *data) diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 191f3bb3c41..3320a50ba4f 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -189,12 +189,16 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) return new_offset; } -static int vol_cdev_fsync(struct file *file, int datasync) +static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct ubi_volume_desc *desc = file->private_data; struct ubi_device *ubi = desc->vol->ubi; - - return ubi_sync(ubi->ubi_num); + struct inode *inode = file->f_path.dentry->d_inode; + int err; + mutex_lock(&inode->i_mutex); + err = ubi_sync(ubi->ubi_num); + mutex_unlock(&inode->i_mutex); + return err; } diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index c0f0ac7c1cd..f3c6060c96b 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -887,11 +887,16 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb) /* * We want fsync() to work on POHMELFS. */ -static int pohmelfs_fsync(struct file *file, int datasync) +static int pohmelfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; - - return sync_inode_metadata(inode, 1); + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (!err) { + mutex_lock(&inode->i_mutex); + err = sync_inode_metadata(inode, 1); + mutex_unlock(&inode->i_mutex); + } + return err; } ssize_t pohmelfs_write(struct file *file, const char __user *buf, diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c index 271ef94668e..978e6a101bf 100644 --- a/drivers/usb/gadget/printer.c +++ b/drivers/usb/gadget/printer.c @@ -795,12 +795,14 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } static int -printer_fsync(struct file *fd, int datasync) +printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync) { struct printer_dev *dev = fd->private_data; + struct inode *inode = fd->f_path.dentry->d_inode; unsigned long flags; int tx_list_empty; + mutex_lock(&inode->i_mutex); spin_lock_irqsave(&dev->lock, flags); tx_list_empty = (likely(list_empty(&dev->tx_reqs))); spin_unlock_irqrestore(&dev->lock, flags); @@ -810,6 +812,7 @@ printer_fsync(struct file *fd, int datasync) wait_event_interruptible(dev->tx_flush_wait, (likely(list_empty(&dev->tx_reqs_active)))); } + mutex_unlock(&inode->i_mutex); return 0; } diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 804000183c5..32814e8800e 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -66,19 +66,26 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma, return 0; } -int fb_deferred_io_fsync(struct file *file, int datasync) +int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fb_info *info = file->private_data; + struct inode *inode = file->f_path.dentry->d_inode; + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; /* Skip if deferred io is compiled-in but disabled on this fbdev */ if (!info->fbdefio) return 0; + mutex_lock(&inode->i_mutex); /* Kill off the delayed work */ cancel_delayed_work_sync(&info->deferred_work); /* Run it immediately */ - return schedule_delayed_work(&info->deferred_work, 0); + err = schedule_delayed_work(&info->deferred_work, 0); + mutex_unlock(&inode->i_mutex); + return err; } EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 4014160903a..46ce357ca1a 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -70,7 +70,8 @@ ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64); void v9fs_blank_wstat(struct p9_wstat *wstat); int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); -int v9fs_file_fsync_dotl(struct file *filp, int datasync); +int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, + int datasync); ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *, const char __user *, size_t, loff_t *, int); int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index ffed55817f0..3c173fcc2c5 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -519,32 +519,50 @@ out: } -static int v9fs_file_fsync(struct file *filp, int datasync) +static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) { struct p9_fid *fid; + struct inode *inode = filp->f_mapping->host; struct p9_wstat wstat; int retval; + retval = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (retval) + return retval; + + mutex_lock(&inode->i_mutex); P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); fid = filp->private_data; v9fs_blank_wstat(&wstat); retval = p9_client_wstat(fid, &wstat); + mutex_unlock(&inode->i_mutex); + return retval; } -int v9fs_file_fsync_dotl(struct file *filp, int datasync) +int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, + int datasync) { struct p9_fid *fid; + struct inode *inode = filp->f_mapping->host; int retval; + retval = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (retval) + return retval; + + mutex_lock(&inode->i_mutex); P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", filp, datasync); fid = filp->private_data; retval = p9_client_fsync(fid, datasync); + mutex_unlock(&inode->i_mutex); + return retval; } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 0e95f73a702..c2b9c79eb64 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -182,7 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent void affs_free_prealloc(struct inode *inode); extern void affs_truncate(struct inode *); -int affs_file_fsync(struct file *, int); +int affs_file_fsync(struct file *, loff_t, loff_t, int); /* dir.c */ diff --git a/fs/affs/file.c b/fs/affs/file.c index acf321b70fc..2f4c935cb32 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -923,14 +923,20 @@ affs_truncate(struct inode *inode) affs_free_prealloc(inode); } -int affs_file_fsync(struct file *filp, int datasync) +int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *inode = filp->f_mapping->host; int ret, err; + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + + mutex_lock(&inode->i_mutex); ret = write_inode_now(inode, 0); err = sync_blockdev(inode->i_sb->s_bdev); if (!ret) ret = err; + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f396d337b81..d2b0888126d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -750,7 +750,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern int afs_writeback_all(struct afs_vnode *); -extern int afs_fsync(struct file *, int); +extern int afs_fsync(struct file *, loff_t, loff_t, int); /*****************************************************************************/ diff --git a/fs/afs/write.c b/fs/afs/write.c index b806285ff85..9aa52d93c73 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -681,9 +681,10 @@ int afs_writeback_all(struct afs_vnode *vnode) * - the return status from this call provides a reliable indication of * whether any write errors occurred for this process. */ -int afs_fsync(struct file *file, int datasync) +int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; + struct inode *inode = file->f_mapping->host; struct afs_writeback *wb, *xwb; struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); int ret; @@ -692,12 +693,19 @@ int afs_fsync(struct file *file, int datasync) vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + /* use a writeback record as a marker in the queue - when this reaches * the front of the queue, all the outstanding writes are either * completed or rejected */ wb = kzalloc(sizeof(*wb), GFP_KERNEL); - if (!wb) - return -ENOMEM; + if (!wb) { + ret = -ENOMEM; + goto out; + } wb->vnode = vnode; wb->first = 0; wb->last = -1; @@ -720,7 +728,7 @@ int afs_fsync(struct file *file, int datasync) if (ret < 0) { afs_put_writeback(wb); _leave(" = %d [wb]", ret); - return ret; + goto out; } /* wait for the preceding writes to actually complete */ @@ -729,6 +737,8 @@ int afs_fsync(struct file *file, int datasync) vnode->writebacks.next == &wb->link); afs_put_writeback(wb); _leave(" = %d", ret); +out: + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/bad_inode.c b/fs/bad_inode.c index f024d8aadde..9205cf25f1c 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -87,7 +87,8 @@ static int bad_file_release(struct inode *inode, struct file *filp) return -EIO; } -static int bad_file_fsync(struct file *file, int datasync) +static int bad_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) { return -EIO; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 966617a422d..9fb0b15331d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -378,7 +378,7 @@ out: return retval; } -int blkdev_fsync(struct file *filp, int datasync) +int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); @@ -389,14 +389,10 @@ int blkdev_fsync(struct file *filp, int datasync) * i_mutex and doing so causes performance issues with concurrent * O_SYNC writers to a block device. */ - mutex_unlock(&bd_inode->i_mutex); - error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); if (error == -EOPNOTSUPP) error = 0; - mutex_lock(&bd_inode->i_mutex); - return error; } EXPORT_SYMBOL(blkdev_fsync); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f1ff62bff1b..82be74efbb2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2605,7 +2605,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); -int btrfs_sync_file(struct file *file, int datasync); +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); extern const struct file_operations btrfs_file_operations; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bd4d061c6e4..59cbdb120ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * important optimization for directories because holding the mutex prevents * new operations on the dir while we write to disk. */ -int btrfs_sync_file(struct file *file, int datasync) +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync) trace_btrfs_sync_file(file, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + /* we wait first, since the writeback may change the inode */ root->log_batch++; - /* the VFS called filemap_fdatawrite for us */ btrfs_wait_ordered_range(inode, 0, (u64)-1); root->log_batch++; @@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync) * check the transaction that last modified this inode * and see if its already been committed */ - if (!BTRFS_I(inode)->last_trans) + if (!BTRFS_I(inode)->last_trans) { + mutex_unlock(&inode->i_mutex); goto out; + } /* * if the last transaction that changed this file was before @@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync) if ( |