diff options
71 files changed, 462 insertions, 164 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 9b6ed7c9f34..ca7e2529254 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -412,7 +412,7 @@ prototypes: int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); @@ -438,9 +438,7 @@ prototypes: locking rules: All may block except for ->setlease. - No VFS locks held on entry except for ->fsync and ->setlease. - -->fsync() has i_mutex on inode. + No VFS locks held on entry except for ->setlease. ->setlease has the file_list_lock held and must not sleep. diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting index 6b96773e27c..7f8861d341e 100644 --- a/Documentation/filesystems/porting +++ b/Documentation/filesystems/porting @@ -421,3 +421,10 @@ data and there is a virtual hole at the end of the file. So if the provided offset is less than i_size and SEEK_DATA is specified, return the same offset. If the above is true for the offset and you are given SEEK_HOLE, return the end of the file. If the offset is i_size or greater return -ENXIO in either case. + +[mandatory] + If you have your own ->fsync() you must make sure to call +filemap_write_and_wait_range() so that all dirty pages are synced out properly. +You must also keep in mind that ->fsync() is not called with i_mutex held +anymore, so if you require i_mutex locking you must make sure to take it and +release it yourself. diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 6bf85b78cfe..eff6617c9a0 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -777,7 +777,7 @@ struct file_operations { int (*open) (struct inode *, struct file *); int (*flush) (struct file *); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, int datasync); + int (*fsync) (struct file *, loff_t, loff_t, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index 3c7c3f82d84..fb59c46e9e9 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -1850,9 +1850,16 @@ out: return ret; } -static int spufs_mfc_fsync(struct file *file, int datasync) +static int spufs_mfc_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - return spufs_mfc_flush(file, NULL); + struct inode *inode = file->f_path.dentry->d_inode; + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (!err) { + mutex_lock(&inode->i_mutex); + err = spufs_mfc_flush(file, NULL); + mutex_unlock(&inode->i_mutex); + } + return err; } static int spufs_mfc_fasync(int fd, struct file *file, int on) diff --git a/drivers/char/ps3flash.c b/drivers/char/ps3flash.c index 5a06787e5be..d0c57c2e290 100644 --- a/drivers/char/ps3flash.c +++ b/drivers/char/ps3flash.c @@ -309,9 +309,14 @@ static int ps3flash_flush(struct file *file, fl_owner_t id) return ps3flash_writeback(ps3flash_dev); } -static int ps3flash_fsync(struct file *file, int datasync) +static int ps3flash_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - return ps3flash_writeback(ps3flash_dev); + struct inode *inode = file->f_path.dentry->d_inode; + int err; + mutex_lock(&inode->i_mutex); + err = ps3flash_writeback(ps3flash_dev); + mutex_unlock(&inode->i_mutex); + return err; } static irqreturn_t ps3flash_interrupt(int irq, void *data) diff --git a/drivers/mtd/ubi/cdev.c b/drivers/mtd/ubi/cdev.c index 191f3bb3c41..3320a50ba4f 100644 --- a/drivers/mtd/ubi/cdev.c +++ b/drivers/mtd/ubi/cdev.c @@ -189,12 +189,16 @@ static loff_t vol_cdev_llseek(struct file *file, loff_t offset, int origin) return new_offset; } -static int vol_cdev_fsync(struct file *file, int datasync) +static int vol_cdev_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct ubi_volume_desc *desc = file->private_data; struct ubi_device *ubi = desc->vol->ubi; - - return ubi_sync(ubi->ubi_num); + struct inode *inode = file->f_path.dentry->d_inode; + int err; + mutex_lock(&inode->i_mutex); + err = ubi_sync(ubi->ubi_num); + mutex_unlock(&inode->i_mutex); + return err; } diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c index c0f0ac7c1cd..f3c6060c96b 100644 --- a/drivers/staging/pohmelfs/inode.c +++ b/drivers/staging/pohmelfs/inode.c @@ -887,11 +887,16 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb) /* * We want fsync() to work on POHMELFS. */ -static int pohmelfs_fsync(struct file *file, int datasync) +static int pohmelfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct inode *inode = file->f_mapping->host; - - return sync_inode_metadata(inode, 1); + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (!err) { + mutex_lock(&inode->i_mutex); + err = sync_inode_metadata(inode, 1); + mutex_unlock(&inode->i_mutex); + } + return err; } ssize_t pohmelfs_write(struct file *file, const char __user *buf, diff --git a/drivers/usb/gadget/printer.c b/drivers/usb/gadget/printer.c index 271ef94668e..978e6a101bf 100644 --- a/drivers/usb/gadget/printer.c +++ b/drivers/usb/gadget/printer.c @@ -795,12 +795,14 @@ printer_write(struct file *fd, const char __user *buf, size_t len, loff_t *ptr) } static int -printer_fsync(struct file *fd, int datasync) +printer_fsync(struct file *fd, loff_t start, loff_t end, int datasync) { struct printer_dev *dev = fd->private_data; + struct inode *inode = fd->f_path.dentry->d_inode; unsigned long flags; int tx_list_empty; + mutex_lock(&inode->i_mutex); spin_lock_irqsave(&dev->lock, flags); tx_list_empty = (likely(list_empty(&dev->tx_reqs))); spin_unlock_irqrestore(&dev->lock, flags); @@ -810,6 +812,7 @@ printer_fsync(struct file *fd, int datasync) wait_event_interruptible(dev->tx_flush_wait, (likely(list_empty(&dev->tx_reqs_active)))); } + mutex_unlock(&inode->i_mutex); return 0; } diff --git a/drivers/video/fb_defio.c b/drivers/video/fb_defio.c index 804000183c5..32814e8800e 100644 --- a/drivers/video/fb_defio.c +++ b/drivers/video/fb_defio.c @@ -66,19 +66,26 @@ static int fb_deferred_io_fault(struct vm_area_struct *vma, return 0; } -int fb_deferred_io_fsync(struct file *file, int datasync) +int fb_deferred_io_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct fb_info *info = file->private_data; + struct inode *inode = file->f_path.dentry->d_inode; + int err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; /* Skip if deferred io is compiled-in but disabled on this fbdev */ if (!info->fbdefio) return 0; + mutex_lock(&inode->i_mutex); /* Kill off the delayed work */ cancel_delayed_work_sync(&info->deferred_work); /* Run it immediately */ - return schedule_delayed_work(&info->deferred_work, 0); + err = schedule_delayed_work(&info->deferred_work, 0); + mutex_unlock(&inode->i_mutex); + return err; } EXPORT_SYMBOL_GPL(fb_deferred_io_fsync); diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 4014160903a..46ce357ca1a 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -70,7 +70,8 @@ ssize_t v9fs_file_readn(struct file *, char *, char __user *, u32, u64); ssize_t v9fs_fid_readn(struct p9_fid *, char *, char __user *, u32, u64); void v9fs_blank_wstat(struct p9_wstat *wstat); int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *); -int v9fs_file_fsync_dotl(struct file *filp, int datasync); +int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, + int datasync); ssize_t v9fs_file_write_internal(struct inode *, struct p9_fid *, const char __user *, size_t, loff_t *, int); int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode); diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index ffed55817f0..3c173fcc2c5 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -519,32 +519,50 @@ out: } -static int v9fs_file_fsync(struct file *filp, int datasync) +static int v9fs_file_fsync(struct file *filp, loff_t start, loff_t end, + int datasync) { struct p9_fid *fid; + struct inode *inode = filp->f_mapping->host; struct p9_wstat wstat; int retval; + retval = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (retval) + return retval; + + mutex_lock(&inode->i_mutex); P9_DPRINTK(P9_DEBUG_VFS, "filp %p datasync %x\n", filp, datasync); fid = filp->private_data; v9fs_blank_wstat(&wstat); retval = p9_client_wstat(fid, &wstat); + mutex_unlock(&inode->i_mutex); + return retval; } -int v9fs_file_fsync_dotl(struct file *filp, int datasync) +int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end, + int datasync) { struct p9_fid *fid; + struct inode *inode = filp->f_mapping->host; int retval; + retval = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (retval) + return retval; + + mutex_lock(&inode->i_mutex); P9_DPRINTK(P9_DEBUG_VFS, "v9fs_file_fsync_dotl: filp %p datasync %x\n", filp, datasync); fid = filp->private_data; retval = p9_client_fsync(fid, datasync); + mutex_unlock(&inode->i_mutex); + return retval; } diff --git a/fs/affs/affs.h b/fs/affs/affs.h index 0e95f73a702..c2b9c79eb64 100644 --- a/fs/affs/affs.h +++ b/fs/affs/affs.h @@ -182,7 +182,7 @@ extern int affs_add_entry(struct inode *dir, struct inode *inode, struct dent void affs_free_prealloc(struct inode *inode); extern void affs_truncate(struct inode *); -int affs_file_fsync(struct file *, int); +int affs_file_fsync(struct file *, loff_t, loff_t, int); /* dir.c */ diff --git a/fs/affs/file.c b/fs/affs/file.c index acf321b70fc..2f4c935cb32 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -923,14 +923,20 @@ affs_truncate(struct inode *inode) affs_free_prealloc(inode); } -int affs_file_fsync(struct file *filp, int datasync) +int affs_file_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *inode = filp->f_mapping->host; int ret, err; + err = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (err) + return err; + + mutex_lock(&inode->i_mutex); ret = write_inode_now(inode, 0); err = sync_blockdev(inode->i_sb->s_bdev); if (!ret) ret = err; + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f396d337b81..d2b0888126d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -750,7 +750,7 @@ extern void afs_pages_written_back(struct afs_vnode *, struct afs_call *); extern ssize_t afs_file_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern int afs_writeback_all(struct afs_vnode *); -extern int afs_fsync(struct file *, int); +extern int afs_fsync(struct file *, loff_t, loff_t, int); /*****************************************************************************/ diff --git a/fs/afs/write.c b/fs/afs/write.c index b806285ff85..9aa52d93c73 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -681,9 +681,10 @@ int afs_writeback_all(struct afs_vnode *vnode) * - the return status from this call provides a reliable indication of * whether any write errors occurred for this process. */ -int afs_fsync(struct file *file, int datasync) +int afs_fsync(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; + struct inode *inode = file->f_mapping->host; struct afs_writeback *wb, *xwb; struct afs_vnode *vnode = AFS_FS_I(dentry->d_inode); int ret; @@ -692,12 +693,19 @@ int afs_fsync(struct file *file, int datasync) vnode->fid.vid, vnode->fid.vnode, dentry->d_name.name, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + /* use a writeback record as a marker in the queue - when this reaches * the front of the queue, all the outstanding writes are either * completed or rejected */ wb = kzalloc(sizeof(*wb), GFP_KERNEL); - if (!wb) - return -ENOMEM; + if (!wb) { + ret = -ENOMEM; + goto out; + } wb->vnode = vnode; wb->first = 0; wb->last = -1; @@ -720,7 +728,7 @@ int afs_fsync(struct file *file, int datasync) if (ret < 0) { afs_put_writeback(wb); _leave(" = %d [wb]", ret); - return ret; + goto out; } /* wait for the preceding writes to actually complete */ @@ -729,6 +737,8 @@ int afs_fsync(struct file *file, int datasync) vnode->writebacks.next == &wb->link); afs_put_writeback(wb); _leave(" = %d", ret); +out: + mutex_unlock(&inode->i_mutex); return ret; } diff --git a/fs/bad_inode.c b/fs/bad_inode.c index f024d8aadde..9205cf25f1c 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -87,7 +87,8 @@ static int bad_file_release(struct inode *inode, struct file *filp) return -EIO; } -static int bad_file_fsync(struct file *file, int datasync) +static int bad_file_fsync(struct file *file, loff_t start, loff_t end, + int datasync) { return -EIO; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 966617a422d..9fb0b15331d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -378,7 +378,7 @@ out: return retval; } -int blkdev_fsync(struct file *filp, int datasync) +int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync) { struct inode *bd_inode = filp->f_mapping->host; struct block_device *bdev = I_BDEV(bd_inode); @@ -389,14 +389,10 @@ int blkdev_fsync(struct file *filp, int datasync) * i_mutex and doing so causes performance issues with concurrent * O_SYNC writers to a block device. */ - mutex_unlock(&bd_inode->i_mutex); - error = blkdev_issue_flush(bdev, GFP_KERNEL, NULL); if (error == -EOPNOTSUPP) error = 0; - mutex_lock(&bd_inode->i_mutex); - return error; } EXPORT_SYMBOL(blkdev_fsync); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index f1ff62bff1b..82be74efbb2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -2605,7 +2605,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file, int btrfs_add_inode_defrag(struct btrfs_trans_handle *trans, struct inode *inode); int btrfs_run_defrag_inodes(struct btrfs_fs_info *fs_info); -int btrfs_sync_file(struct file *file, int datasync); +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end, int skip_pinned); extern const struct file_operations btrfs_file_operations; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index bd4d061c6e4..59cbdb120ad 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1452,7 +1452,7 @@ int btrfs_release_file(struct inode *inode, struct file *filp) * important optimization for directories because holding the mutex prevents * new operations on the dir while we write to disk. */ -int btrfs_sync_file(struct file *file, int datasync) +int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { struct dentry *dentry = file->f_path.dentry; struct inode *inode = dentry->d_inode; @@ -1462,9 +1462,13 @@ int btrfs_sync_file(struct file *file, int datasync) trace_btrfs_sync_file(file, datasync); + ret = filemap_write_and_wait_range(inode->i_mapping, start, end); + if (ret) + return ret; + mutex_lock(&inode->i_mutex); + /* we wait first, since the writeback may change the inode */ root->log_batch++; - /* the VFS called filemap_fdatawrite for us */ btrfs_wait_ordered_range(inode, 0, (u64)-1); root->log_batch++; @@ -1472,8 +1476,10 @@ int btrfs_sync_file(struct file *file, int datasync) * check the transaction that last modified this inode * and see if its already been committed */ - if (!BTRFS_I(inode)->last_trans) + if (!BTRFS_I(inode)->last_trans) { + mutex_unlock(&inode->i_mutex); goto out; + } /* * if the last transaction that changed this file was before @@ -1484,6 +1490,7 @@ int btrfs_sync_file(struct file *file, int datasync) if (BTRFS_I(inode)->last_trans <= root->fs_info->last_trans_committed) { BTRFS_I(inode)->last_trans = 0; + mutex_unlock(&inode->i_mutex); goto out; } @@ -1496,12 +1503,15 @@ int btrfs_sync_file(struct file *file, int datasync) trans = btrfs_start_transaction(root, 0); if (IS_ERR(trans)) { ret = PTR_ERR(trans); + mutex_unlock(&inode->i_mutex); goto out; } ret = btrfs_log_dentry_safe(trans, root, dentry); - if (ret < 0) + if (ret < 0) { + mutex_unlock(&inode->i_mutex); goto out; + } /* we've logged all the items and now have a consistent * version of the file in the log. It is possible that @@ -1513,7 +1523,7 @@ int btrfs_sync_file(struct file *file, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - mutex_unlock(&dentry->d_inode->i_mutex); + mutex_unlock(&inode->i_mutex); if (ret != BTRFS_NO_LOG_SYNC) { if (ret > 0) { @@ -1528,7 +1538,6 @@ int btrfs_sync_file(struct file *file, int datasync) } else { ret = btrfs_end_transaction(trans, ro |