aboutsummaryrefslogtreecommitdiff
path: root/fs/block_dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/block_dev.c')
-rw-r--r--fs/block_dev.c302
1 files changed, 180 insertions, 122 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 0e575d1304b..6d7274619bf 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -16,6 +16,7 @@
#include <linux/blkdev.h>
#include <linux/module.h>
#include <linux/blkpg.h>
+#include <linux/magic.h>
#include <linux/buffer_head.h>
#include <linux/swap.h>
#include <linux/pagevec.h>
@@ -26,6 +27,7 @@
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -56,30 +58,24 @@ static void bdev_inode_switch_bdi(struct inode *inode,
struct backing_dev_info *dst)
{
struct backing_dev_info *old = inode->i_data.backing_dev_info;
+ bool wakeup_bdi = false;
if (unlikely(dst == old)) /* deadlock avoidance */
return;
bdi_lock_two(&old->wb, &dst->wb);
spin_lock(&inode->i_lock);
inode->i_data.backing_dev_info = dst;
- if (inode->i_state & I_DIRTY)
+ if (inode->i_state & I_DIRTY) {
+ if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
+ wakeup_bdi = true;
list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+ }
spin_unlock(&inode->i_lock);
spin_unlock(&old->wb.list_lock);
spin_unlock(&dst->wb.list_lock);
-}
-static sector_t max_block(struct block_device *bdev)
-{
- sector_t retval = ~((sector_t)0);
- loff_t sz = i_size_read(bdev->bd_inode);
-
- if (sz) {
- unsigned int size = block_size(bdev);
- unsigned int sizebits = blksize_bits(size);
- retval = (sz >> sizebits);
- }
- return retval;
+ if (wakeup_bdi)
+ bdi_wakeup_thread_delayed(dst);
}
/* Kill _all_ buffers and pagecache , dirty or not.. */
@@ -87,7 +83,7 @@ void kill_bdev(struct block_device *bdev)
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0)
+ if (mapping->nrpages == 0 && mapping->nrshadows == 0)
return;
invalidate_bh_lrus();
@@ -109,7 +105,7 @@ void invalidate_bdev(struct block_device *bdev)
/* 99% of the time, we don't need to flush the cleancache on the bdev.
* But, for the strange corners, lets be cautious
*/
- cleancache_flush_inode(mapping);
+ cleancache_invalidate_inode(mapping);
}
EXPORT_SYMBOL(invalidate_bdev);
@@ -162,61 +158,22 @@ static int
blkdev_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create)
{
- if (iblock >= max_block(I_BDEV(inode))) {
- if (create)
- return -EIO;
-
- /*
- * for reads, we're just trying to fill a partial page.
- * return a hole, they will have to call get_block again
- * before they can fill it, and they will get -EIO at that
- * time
- */
- return 0;
- }
bh->b_bdev = I_BDEV(inode);
bh->b_blocknr = iblock;
set_buffer_mapped(bh);
return 0;
}
-static int
-blkdev_get_blocks(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- sector_t end_block = max_block(I_BDEV(inode));
- unsigned long max_blocks = bh->b_size >> inode->i_blkbits;
-
- if ((iblock + max_blocks) > end_block) {
- max_blocks = end_block - iblock;
- if ((long)max_blocks <= 0) {
- if (create)
- return -EIO; /* write fully beyond EOF */
- /*
- * It is a read which is fully beyond EOF. We return
- * a !buffer_mapped buffer
- */
- max_blocks = 0;
- }
- }
-
- bh->b_bdev = I_BDEV(inode);
- bh->b_blocknr = iblock;
- bh->b_size = max_blocks << inode->i_blkbits;
- if (max_blocks)
- set_buffer_mapped(bh);
- return 0;
-}
-
static ssize_t
-blkdev_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov,
- loff_t offset, unsigned long nr_segs)
+blkdev_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file->f_mapping->host;
- return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iov, offset,
- nr_segs, blkdev_get_blocks, NULL, NULL, 0);
+ return __blockdev_direct_IO(rw, iocb, inode, I_BDEV(inode), iter,
+ offset, blkdev_get_block,
+ NULL, NULL, 0);
}
int __sync_blockdev(struct block_device *bdev, int wait)
@@ -370,37 +327,16 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
/*
* private llseek:
- * for a block special file file->f_path.dentry->d_inode->i_size is zero
+ * for a block special file file_inode(file)->i_size is zero
* so we compute the size by hand (just as in block_read/write above)
*/
-static loff_t block_llseek(struct file *file, loff_t offset, int origin)
+static loff_t block_llseek(struct file *file, loff_t offset, int whence)
{
struct inode *bd_inode = file->f_mapping->host;
- loff_t size;
loff_t retval;
mutex_lock(&bd_inode->i_mutex);
- size = i_size_read(bd_inode);
-
- retval = -EINVAL;
- switch (origin) {
- case SEEK_END:
- offset += size;
- break;
- case SEEK_CUR:
- offset += file->f_pos;
- case SEEK_SET:
- break;
- default:
- goto out;
- }
- if (offset >= 0 && offset <= size) {
- if (offset != file->f_pos) {
- file->f_pos = offset;
- }
- retval = offset;
- }
-out:
+ retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
mutex_unlock(&bd_inode->i_mutex);
return retval;
}
@@ -428,6 +364,69 @@ int blkdev_fsync(struct file *filp, loff_t start, loff_t end, int datasync)
}
EXPORT_SYMBOL(blkdev_fsync);
+/**
+ * bdev_read_page() - Start reading a page from a block device
+ * @bdev: The device to read the page from
+ * @sector: The offset on the device to read the page to (need not be aligned)
+ * @page: The page to read
+ *
+ * On entry, the page should be locked. It will be unlocked when the page
+ * has been read. If the block driver implements rw_page synchronously,
+ * that will be true on exit from this function, but it need not be.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to read this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+ struct page *page)
+{
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ);
+}
+EXPORT_SYMBOL_GPL(bdev_read_page);
+
+/**
+ * bdev_write_page() - Start writing a page to a block device
+ * @bdev: The device to write the page to
+ * @sector: The offset on the device to write the page to (need not be aligned)
+ * @page: The page to write
+ * @wbc: The writeback_control for the write
+ *
+ * On entry, the page should be locked and not currently under writeback.
+ * On exit, if the write started successfully, the page will be unlocked and
+ * under writeback. If the write failed already (eg the driver failed to
+ * queue the page to the device), the page will still be locked. If the
+ * caller is a ->writepage implementation, it will need to unlock the page.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to write this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+ struct page *page, struct writeback_control *wbc)
+{
+ int result;
+ int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE;
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ if (!ops->rw_page)
+ return -EOPNOTSUPP;
+ set_page_writeback(page);
+ result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw);
+ if (result)
+ end_page_writeback(page);
+ else
+ unlock_page(page);
+ return result;
+}
+EXPORT_SYMBOL_GPL(bdev_write_page);
+
/*
* pseudo-fs
*/
@@ -484,9 +483,9 @@ static void bdev_evict_inode(struct inode *inode)
{
struct block_device *bdev = &BDEV_I(inode)->bdev;
struct list_head *p;
- truncate_inode_pages(&inode->i_data, 0);
+ truncate_inode_pages_final(&inode->i_data);
invalidate_inode_buffers(inode); /* is it needed here? */
- end_writeback(inode);
+ clear_inode(inode);
spin_lock(&bdev_lock);
while ( (p = bdev->bd_inodes.next) != &bdev->bd_inodes ) {
__bd_forget(list_entry(p, struct inode, i_devices));
@@ -506,7 +505,7 @@ static const struct super_operations bdev_sops = {
static struct dentry *bd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, 0x62646576);
+ return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
}
static struct file_system_type bd_type = {
@@ -603,6 +602,7 @@ struct block_device *bdgrab(struct block_device *bdev)
ihold(bdev->bd_inode);
return bdev;
}
+EXPORT_SYMBOL(bdgrab);
long nr_blockdev_pages(void)
{
@@ -656,7 +656,7 @@ static struct block_device *bd_acquire(struct inode *inode)
return bdev;
}
-static inline int sb_is_blkdev_sb(struct super_block *sb)
+int sb_is_blkdev_sb(struct super_block *sb)
{
return sb == blockdev_superblock;
}
@@ -668,11 +668,9 @@ void bd_forget(struct inode *inode)
struct block_device *bdev = NULL;
spin_lock(&bdev_lock);
- if (inode->i_bdev) {
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- __bd_forget(inode);
- }
+ if (!sb_is_blkdev_sb(inode->i_sb))
+ bdev = inode->i_bdev;
+ __bd_forget(inode);
spin_unlock(&bdev_lock);
if (bdev)
@@ -1046,6 +1044,7 @@ int revalidate_disk(struct gendisk *disk)
mutex_lock(&bdev->bd_mutex);
check_disk_size_change(disk, bdev);
+ bdev->bd_invalidated = 0;
mutex_unlock(&bdev->bd_mutex);
bdput(bdev);
return ret;
@@ -1084,7 +1083,9 @@ void bd_set_size(struct block_device *bdev, loff_t size)
{
unsigned bsize = bdev_logical_block_size(bdev);
- bdev->bd_inode->i_size = size;
+ mutex_lock(&bdev->bd_inode->i_mutex);
+ i_size_write(bdev->bd_inode, size);
+ mutex_unlock(&bdev->bd_inode->i_mutex);
while (bsize < PAGE_CACHE_SIZE) {
if (size & bsize)
break;
@@ -1095,7 +1096,7 @@ void bd_set_size(struct block_device *bdev, loff_t size)
}
EXPORT_SYMBOL(bd_set_size);
-static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
+static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
/*
* bd_mutex locking:
@@ -1169,7 +1170,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
}
}
- if (!ret && !bdev->bd_openers) {
+ if (!ret) {
bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
bdi = blk_get_backing_dev_info(bdev);
if (bdi == NULL)
@@ -1183,8 +1184,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
* The latter is necessary to prevent ghost
* partitions on a removed medium.
*/
- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
- rescan_partitions(disk, bdev);
+ if (bdev->bd_invalidated) {
+ if (!ret)
+ rescan_partitions(disk, bdev);
+ else if (ret == -ENOMEDIUM)
+ invalidate_partitions(disk, bdev);
+ }
if (ret)
goto out_clear;
} else {
@@ -1214,8 +1219,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
if (bdev->bd_disk->fops->open)
ret = bdev->bd_disk->fops->open(bdev, mode);
/* the same as first opener case, read comment there */
- if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM))
- rescan_partitions(bdev->bd_disk, bdev);
+ if (bdev->bd_invalidated) {
+ if (!ret)
+ rescan_partitions(bdev->bd_disk, bdev);
+ else if (ret == -ENOMEDIUM)
+ invalidate_partitions(bdev->bd_disk, bdev);
+ }
if (ret)
goto out_unlock_bdev;
}
@@ -1442,9 +1451,8 @@ static int blkdev_open(struct inode * inode, struct file * filp)
return blkdev_get(bdev, filp->f_mode, filp);
}
-static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
+static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
{
- int ret = 0;
struct gendisk *disk = bdev->bd_disk;
struct block_device *victim = NULL;
@@ -1464,7 +1472,7 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
}
if (bdev->bd_contains == bdev) {
if (disk->fops->release)
- ret = disk->fops->release(disk, mode);
+ disk->fops->release(disk, mode);
}
if (!bdev->bd_openers) {
struct module *owner = disk->fops->owner;
@@ -1483,10 +1491,9 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
bdput(bdev);
if (victim)
__blkdev_put(victim, mode, 1);
- return ret;
}
-int blkdev_put(struct block_device *bdev, fmode_t mode)
+void blkdev_put(struct block_device *bdev, fmode_t mode)
{
mutex_lock(&bdev->bd_mutex);
@@ -1530,15 +1537,15 @@ int blkdev_put(struct block_device *bdev, fmode_t mode)
mutex_unlock(&bdev->bd_mutex);
- return __blkdev_put(bdev, mode, 0);
+ __blkdev_put(bdev, mode, 0);
}
EXPORT_SYMBOL(blkdev_put);
static int blkdev_close(struct inode * inode, struct file * filp)
{
struct block_device *bdev = I_BDEV(filp->f_mapping->host);
-
- return blkdev_put(bdev, filp->f_mode);
+ blkdev_put(bdev, filp->f_mode);
+ return 0;
}
static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
@@ -1565,25 +1572,39 @@ static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
* Does not take i_mutex for the write and thus is not for general purpose
* use.
*/
-ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov,
- unsigned long nr_segs, loff_t pos)
+ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
+ struct blk_plug plug;
ssize_t ret;
- BUG_ON(iocb->ki_pos != pos);
-
- ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
- if (ret > 0 || ret == -EIOCBQUEUED) {
+ blk_start_plug(&plug);
+ ret = __generic_file_write_iter(iocb, from);
+ if (ret > 0) {
ssize_t err;
-
- err = generic_write_sync(file, pos, ret);
- if (err < 0 && ret > 0)
+ err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+ if (err < 0)
ret = err;
}
+ blk_finish_plug(&plug);
return ret;
}
-EXPORT_SYMBOL_GPL(blkdev_aio_write);
+EXPORT_SYMBOL_GPL(blkdev_write_iter);
+
+static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *bd_inode = file->f_mapping->host;
+ loff_t size = i_size_read(bd_inode);
+ loff_t pos = iocb->ki_pos;
+
+ if (pos >= size)
+ return 0;
+
+ size -= pos;
+ iov_iter_truncate(to, size);
+ return generic_file_read_iter(iocb, to);
+}
/*
* Try to release a page associated with block device when the system
@@ -1607,16 +1628,17 @@ static const struct address_space_operations def_blk_aops = {
.writepages = generic_writepages,
.releasepage = blkdev_releasepage,
.direct_IO = blkdev_direct_IO,
+ .is_dirty_writeback = buffer_check_dirty_writeback,
};
const struct file_operations def_blk_fops = {
.open = blkdev_open,
.release = blkdev_close,
.llseek = block_llseek,
- .read = do_sync_read,
- .write = do_sync_write,
- .aio_read = generic_file_aio_read,
- .aio_write = blkdev_aio_write,
+ .read = new_sync_read,
+ .write = new_sync_write,
+ .read_iter = blkdev_read_iter,
+ .write_iter = blkdev_write_iter,
.mmap = generic_file_mmap,
.fsync = blkdev_fsync,
.unlocked_ioctl = block_ioctl,
@@ -1624,7 +1646,7 @@ const struct file_operations def_blk_fops = {
.compat_ioctl = compat_blkdev_ioctl,
#endif
.splice_read = generic_file_splice_read,
- .splice_write = generic_file_splice_write,
+ .splice_write = iter_file_splice_write,
};
int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg)
@@ -1701,3 +1723,39 @@ int __invalidate_device(struct block_device *bdev, bool kill_dirty)
return res;
}
EXPORT_SYMBOL(__invalidate_device);
+
+void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+{
+ struct inode *inode, *old_inode = NULL;
+
+ spin_lock(&inode_sb_list_lock);
+ list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
+ struct address_space *mapping = inode->i_mapping;
+
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
+ mapping->nrpages == 0) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&inode_sb_list_lock);
+ /*
+ * We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the
+ * inode_sb_list_lock. We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it under
+ * inode_sb_list_lock. So we keep the reference and iput it
+ * later.
+ */
+ iput(old_inode);
+ old_inode = inode;
+
+ func(I_BDEV(inode), arg);
+
+ spin_lock(&inode_sb_list_lock);
+ }
+ spin_unlock(&inode_sb_list_lock);
+ iput(old_inode);
+}