aboutsummaryrefslogtreecommitdiff
path: root/fs/direct-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c46
1 files changed, 37 insertions, 9 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index 389863f59cd..4a588dbd11b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -36,6 +36,7 @@
#include <linux/rwsem.h>
#include <linux/uio.h>
#include <linux/atomic.h>
+#include <linux/prefetch.h>
/*
* How many user pages to map in one call to get_user_pages(). This determines
@@ -1087,8 +1088,8 @@ static inline int drop_refcount(struct dio *dio)
* individual fields and will generate much worse code. This is important
* for the whole file.
*/
-ssize_t
-__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+static inline ssize_t
+do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
struct block_device *bdev, const struct iovec *iov, loff_t offset,
unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
dio_submit_t submit_io, int flags)
@@ -1097,7 +1098,6 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
size_t size;
unsigned long addr;
unsigned blkbits = inode->i_blkbits;
- unsigned bdev_blkbits = 0;
unsigned blocksize_mask = (1 << blkbits) - 1;
ssize_t retval = -EINVAL;
loff_t end = offset;
@@ -1110,12 +1110,14 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
if (rw & WRITE)
rw = WRITE_ODIRECT;
- if (bdev)
- bdev_blkbits = blksize_bits(bdev_logical_block_size(bdev));
+ /*
+ * Avoid references to bdev if not absolutely needed to give
+ * the early prefetch in the caller enough time.
+ */
if (offset & blocksize_mask) {
if (bdev)
- blkbits = bdev_blkbits;
+ blkbits = blksize_bits(bdev_logical_block_size(bdev));
blocksize_mask = (1 << blkbits) - 1;
if (offset & blocksize_mask)
goto out;
@@ -1126,11 +1128,13 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
addr = (unsigned long)iov[seg].iov_base;
size = iov[seg].iov_len;
end += size;
- if ((addr & blocksize_mask) || (size & blocksize_mask)) {
+ if (unlikely((addr & blocksize_mask) ||
+ (size & blocksize_mask))) {
if (bdev)
- blkbits = bdev_blkbits;
+ blkbits = blksize_bits(
+ bdev_logical_block_size(bdev));
blocksize_mask = (1 << blkbits) - 1;
- if ((addr & blocksize_mask) || (size & blocksize_mask))
+ if ((addr & blocksize_mask) || (size & blocksize_mask))
goto out;
}
}
@@ -1313,6 +1317,30 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
out:
return retval;
}
+
+ssize_t
+__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
+ struct block_device *bdev, const struct iovec *iov, loff_t offset,
+ unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
+ dio_submit_t submit_io, int flags)
+{
+ /*
+ * The block device state is needed in the end to finally
+ * submit everything. Since it's likely to be cache cold
+ * prefetch it here as first thing to hide some of the
+ * latency.
+ *
+ * Attempt to prefetch the pieces we likely need later.
+ */
+ prefetch(&bdev->bd_disk->part_tbl);
+ prefetch(bdev->bd_queue);
+ prefetch((char *)bdev->bd_queue + SMP_CACHE_BYTES);
+
+ return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+ nr_segs, get_block, end_io,
+ submit_io, flags);
+}
+
EXPORT_SYMBOL(__blockdev_direct_IO);
static __init int dio_init(void)