aboutsummaryrefslogtreecommitdiff
path: root/fs/ext4/page-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/page-io.c')
-rw-r--r--fs/ext4/page-io.c560
1 files changed, 313 insertions, 247 deletions
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 7f5451cd1d3..b24a2541a9b 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -6,7 +6,6 @@
* Written by Theodore Ts'o, 2010.
*/
-#include <linux/module.h>
#include <linux/fs.h>
#include <linux/time.h>
#include <linux/jbd2.h>
@@ -19,258 +18,323 @@
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
+#include <linux/aio.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
#include <linux/kernel.h>
#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/ratelimit.h>
#include "ext4_jbd2.h"
#include "xattr.h"
#include "acl.h"
-#include "ext4_extents.h"
-static struct kmem_cache *io_page_cachep, *io_end_cachep;
-
-#define WQ_HASH_SZ 37
-#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
-static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
+static struct kmem_cache *io_end_cachep;
int __init ext4_init_pageio(void)
{
- int i;
-
- io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
- if (io_page_cachep == NULL)
- return -ENOMEM;
io_end_cachep = KMEM_CACHE(ext4_io_end, SLAB_RECLAIM_ACCOUNT);
- if (io_page_cachep == NULL) {
- kmem_cache_destroy(io_page_cachep);
+ if (io_end_cachep == NULL)
return -ENOMEM;
- }
- for (i = 0; i < WQ_HASH_SZ; i++)
- init_waitqueue_head(&ioend_wq[i]);
-
return 0;
}
void ext4_exit_pageio(void)
{
kmem_cache_destroy(io_end_cachep);
- kmem_cache_destroy(io_page_cachep);
}
-void ext4_ioend_wait(struct inode *inode)
+/*
+ * Print an buffer I/O error compatible with the fs/buffer.c. This
+ * provides compatibility with dmesg scrapers that look for a specific
+ * buffer I/O error message. We really need a unified error reporting
+ * structure to userspace ala Digital Unix's uerf system, but it's
+ * probably not going to happen in my lifetime, due to LKML politics...
+ */
+static void buffer_io_error(struct buffer_head *bh)
+{
+ char b[BDEVNAME_SIZE];
+ printk_ratelimited(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
+ bdevname(bh->b_bdev, b),
+ (unsigned long long)bh->b_blocknr);
+}
+
+static void ext4_finish_bio(struct bio *bio)
{
- wait_queue_head_t *wq = to_ioend_wq(inode);
+ int i;
+ int error = !test_bit(BIO_UPTODATE, &bio->bi_flags);
+ struct bio_vec *bvec;
+
+ bio_for_each_segment_all(bvec, bio, i) {
+ struct page *page = bvec->bv_page;
+ struct buffer_head *bh, *head;
+ unsigned bio_start = bvec->bv_offset;
+ unsigned bio_end = bio_start + bvec->bv_len;
+ unsigned under_io = 0;
+ unsigned long flags;
- wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+ if (!page)
+ continue;
+
+ if (error) {
+ SetPageError(page);
+ set_bit(AS_EIO, &page->mapping->flags);
+ }
+ bh = head = page_buffers(page);
+ /*
+ * We check all buffers in the page under BH_Uptodate_Lock
+ * to avoid races with other end io clearing async_write flags
+ */
+ local_irq_save(flags);
+ bit_spin_lock(BH_Uptodate_Lock, &head->b_state);
+ do {
+ if (bh_offset(bh) < bio_start ||
+ bh_offset(bh) + bh->b_size > bio_end) {
+ if (buffer_async_write(bh))
+ under_io++;
+ continue;
+ }
+ clear_buffer_async_write(bh);
+ if (error)
+ buffer_io_error(bh);
+ } while ((bh = bh->b_this_page) != head);
+ bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
+ local_irq_restore(flags);
+ if (!under_io)
+ end_page_writeback(page);
+ }
}
-static void put_io_page(struct ext4_io_page *io_page)
+static void ext4_release_io_end(ext4_io_end_t *io_end)
{
- if (atomic_dec_and_test(&io_page->p_count)) {
- end_page_writeback(io_page->p_page);
- put_page(io_page->p_page);
- kmem_cache_free(io_page_cachep, io_page);
+ struct bio *bio, *next_bio;
+
+ BUG_ON(!list_empty(&io_end->list));
+ BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
+ WARN_ON(io_end->handle);
+
+ if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
+ wake_up_all(ext4_ioend_wq(io_end->inode));
+
+ for (bio = io_end->bio; bio; bio = next_bio) {
+ next_bio = bio->bi_private;
+ ext4_finish_bio(bio);
+ bio_put(bio);
}
+ kmem_cache_free(io_end_cachep, io_end);
}
-void ext4_free_io_end(ext4_io_end_t *io)
+static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
{
- int i;
- wait_queue_head_t *wq;
-
- BUG_ON(!io);
- if (io->page)
- put_page(io->page);
- for (i = 0; i < io->num_io_pages; i++)
- put_io_page(io->pages[i]);
- io->num_io_pages = 0;
- wq = to_ioend_wq(io->inode);
- if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
- waitqueue_active(wq))
- wake_up_all(wq);
- kmem_cache_free(io_end_cachep, io);
+ struct inode *inode = io_end->inode;
+
+ io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
+ /* Wake up anyone waiting on unwritten extent conversion */
+ if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+ wake_up_all(ext4_ioend_wq(inode));
}
/*
- * check a range of space and convert unwritten extents to written.
+ * Check a range of space and convert unwritten extents to written. Note that
+ * we are protected from truncate touching same part of extent tree by the
+ * fact that truncate code waits for all DIO to finish (thus exclusion from
+ * direct IO is achieved) and also waits for PageWriteback bits. Thus we
+ * cannot get to ext4_ext_truncate() before all IOs overlapping that range are
+ * completed (happens from ext4_free_ioend()).
*/
-int ext4_end_io_nolock(ext4_io_end_t *io)
+static int ext4_end_io(ext4_io_end_t *io)
{
struct inode *inode = io->inode;
loff_t offset = io->offset;
ssize_t size = io->size;
+ handle_t *handle = io->handle;
int ret = 0;
ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
"list->prev 0x%p\n",
io, inode->i_ino, io->list.next, io->list.prev);
- if (list_empty(&io->list))
- return ret;
-
- if (!(io->flag & EXT4_IO_END_UNWRITTEN))
- return ret;
-
- ret = ext4_convert_unwritten_extents(inode, offset, size);
+ io->handle = NULL; /* Following call will use up the handle */
+ ret = ext4_convert_unwritten_extents(handle, inode, offset, size);
if (ret < 0) {
- printk(KERN_EMERG "%s: failed to convert unwritten "
- "extents to written extents, error is %d "
- "io is still on inode %lu aio dio list\n",
- __func__, ret, inode->i_ino);
- return ret;
+ ext4_msg(inode->i_sb, KERN_EMERG,
+ "failed to convert unwritten extents to written "
+ "extents -- potential data loss! "
+ "(inode %lu, offset %llu, size %zd, error %d)",
+ inode->i_ino, offset, size, ret);
}
-
- if (io->iocb)
- aio_complete(io->iocb, io->result, 0);
- /* clear the DIO AIO unwritten flag */
- io->flag &= ~EXT4_IO_END_UNWRITTEN;
+ ext4_clear_io_unwritten_flag(io);
+ ext4_release_io_end(io);
return ret;
}
-/*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
-static void ext4_end_io_work(struct work_struct *work)
+static void dump_completed_IO(struct inode *inode, struct list_head *head)
{
- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
- struct inode *inode = io->inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- ret = ext4_end_io_nolock(io);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
+#ifdef EXT4FS_DEBUG
+ struct list_head *cur, *before, *after;
+ ext4_io_end_t *io, *io0, *io1;
+
+ if (list_empty(head))
return;
+
+ ext4_debug("Dump inode %lu completed io list\n", inode->i_ino);
+ list_for_each_entry(io, head, list) {
+ cur = &io->list;
+ before = cur->prev;
+ io0 = container_of(before, ext4_io_end_t, list);
+ after = cur->next;
+ io1 = container_of(after, ext4_io_end_t, list);
+
+ ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
+ io, inode->i_ino, io0, io1);
}
+#endif
+}
+/* Add the io_end to per-inode completed end_io list. */
+static void ext4_add_complete_io(ext4_io_end_t *io_end)
+{
+ struct ext4_inode_info *ei = EXT4_I(io_end->inode);
+ struct ext4_sb_info *sbi = EXT4_SB(io_end->inode->i_sb);
+ struct workqueue_struct *wq;
+ unsigned long flags;
+
+ /* Only reserved conversions from writeback should enter here */
+ WARN_ON(!(io_end->flag & EXT4_IO_END_UNWRITTEN));
+ WARN_ON(!io_end->handle && sbi->s_journal);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (!list_empty(&io->list))
- list_del_init(&io->list);
+ wq = sbi->rsv_conversion_wq;
+ if (list_empty(&ei->i_rsv_conversion_list))
+ queue_work(wq, &ei->i_rsv_conversion_work);
+ list_add_tail(&io_end->list, &ei->i_rsv_conversion_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- mutex_unlock(&inode->i_mutex);
- ext4_free_io_end(io);
}
-ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
+static int ext4_do_flush_completed_IO(struct inode *inode,
+ struct list_head *head)
{
- ext4_io_end_t *io = NULL;
+ ext4_io_end_t *io;
+ struct list_head unwritten;
+ unsigned long flags;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ int err, ret = 0;
- io = kmem_cache_alloc(io_end_cachep, flags);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ dump_completed_IO(inode, head);
+ list_replace_init(head, &unwritten);
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+
+ while (!list_empty(&unwritten)) {
+ io = list_entry(unwritten.next, ext4_io_end_t, list);
+ BUG_ON(!(io->flag & EXT4_IO_END_UNWRITTEN));
+ list_del_init(&io->list);
+
+ err = ext4_end_io(io);
+ if (unlikely(!ret && err))
+ ret = err;
+ }
+ return ret;
+}
+
+/*
+ * work on completed IO, to convert unwritten extents to extents
+ */
+void ext4_end_io_rsv_work(struct work_struct *work)
+{
+ struct ext4_inode_info *ei = container_of(work, struct ext4_inode_info,
+ i_rsv_conversion_work);
+ ext4_do_flush_completed_IO(&ei->vfs_inode, &ei->i_rsv_conversion_list);
+}
+
+ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
+{
+ ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
if (io) {
- memset(io, 0, sizeof(*io));
atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode;
- INIT_WORK(&io->work, ext4_end_io_work);
INIT_LIST_HEAD(&io->list);
+ atomic_set(&io->count, 1);
}
return io;
}
-/*
- * Print an buffer I/O error compatible with the fs/buffer.c. This
- * provides compatibility with dmesg scrapers that look for a specific
- * buffer I/O error message. We really need a unified error reporting
- * structure to userspace ala Digital Unix's uerf system, but it's
- * probably not going to happen in my lifetime, due to LKML politics...
- */
-static void buffer_io_error(struct buffer_head *bh)
+void ext4_put_io_end_defer(ext4_io_end_t *io_end)
{
- char b[BDEVNAME_SIZE];
- printk(KERN_ERR "Buffer I/O error on device %s, logical block %llu\n",
- bdevname(bh->b_bdev, b),
- (unsigned long long)bh->b_blocknr);
+ if (atomic_dec_and_test(&io_end->count)) {
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN) || !io_end->size) {
+ ext4_release_io_end(io_end);
+ return;
+ }
+ ext4_add_complete_io(io_end);
+ }
+}
+
+int ext4_put_io_end(ext4_io_end_t *io_end)
+{
+ int err = 0;
+
+ if (atomic_dec_and_test(&io_end->count)) {
+ if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+ err = ext4_convert_unwritten_extents(io_end->handle,
+ io_end->inode, io_end->offset,
+ io_end->size);
+ io_end->handle = NULL;
+ ext4_clear_io_unwritten_flag(io_end);
+ }
+ ext4_release_io_end(io_end);
+ }
+ return err;
+}
+
+ext4_io_end_t *ext4_get_io_end(ext4_io_end_t *io_end)
+{
+ atomic_inc(&io_end->count);
+ return io_end;
}
+/* BIO completion function for page writeback */
static void ext4_end_bio(struct bio *bio, int error)
{
ext4_io_end_t *io_end = bio->bi_private;
- struct workqueue_struct *wq;
- struct inode *inode;
- unsigned long flags;
- int i;
+ sector_t bi_sector = bio->bi_iter.bi_sector;
BUG_ON(!io_end);
- bio->bi_private = NULL;
bio->bi_end_io = NULL;
if (test_bit(BIO_UPTODATE, &bio->bi_flags))
error = 0;
- bio_put(bio);
-
- for (i = 0; i < io_end->num_io_pages; i++) {
- struct page *page = io_end->pages[i]->p_page;
- struct buffer_head *bh, *head;
- int partial_write = 0;
-
- head = page_buffers(page);
- if (error)
- SetPageError(page);
- BUG_ON(!head);
- if (head->b_size == PAGE_CACHE_SIZE)
- clear_buffer_dirty(head);
- else {
- loff_t offset;
- loff_t io_end_offset = io_end->offset + io_end->size;
-
- offset = (sector_t) page->index << PAGE_CACHE_SHIFT;
- bh = head;
- do {
- if ((offset >= io_end->offset) &&
- (offset+bh->b_size <= io_end_offset)) {
- if (error)
- buffer_io_error(bh);
-
- clear_buffer_dirty(bh);
- }
- if (buffer_delay(bh))
- partial_write = 1;
- else if (!buffer_mapped(bh))
- clear_buffer_dirty(bh);
- else if (buffer_dirty(bh))
- partial_write = 1;
- offset += bh->b_size;
- bh = bh->b_this_page;
- } while (bh != head);
- }
-
- put_io_page(io_end->pages[i]);
-
- /*
- * If this is a partial write which happened to make
- * all buffers uptodate then we can optimize away a
- * bogus readpage() for the next read(). Here we
- * 'discover' whether the page went uptodate as a
- * result of this (potentially partial) write.
- */
- if (!partial_write)
- SetPageUptodate(page);
- }
- io_end->num_io_pages = 0;
- inode = io_end->inode;
if (error) {
- io_end->flag |= EXT4_IO_END_ERROR;
- ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+ struct inode *inode = io_end->inode;
+
+ ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
"(offset %llu size %ld starting block %llu)",
- inode->i_ino,
+ error, inode->i_ino,
(unsigned long long) io_end->offset,
(long) io_end->size,
(unsigned long long)
- bio->bi_sector >> (inode->i_blkbits - 9));
+ bi_sector >> (inode->i_blkbits - 9));
+ mapping_set_error(inode->i_mapping, error);
}
- /* Add the io_end to per-inode completed io list*/
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list);
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
-
- wq = EXT4_SB(inode->i_sb)->dio_unwritten_wq;
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
+ if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+ /*
+ * Link bio into list hanging from io_end. We have to do it
+ * atomically as bio completions can be racing against each
+ * other.
+ */
+ bio->bi_private = xchg(&io_end->bio, bio);
+ ext4_put_io_end_defer(io_end);
+ } else {
+ /*
+ * Drop io_end reference early. Inode can get freed once
+ * we finish the bio.
+ */
+ ext4_put_io_end_defer(io_end);
+ ext4_finish_bio(bio);
+ bio_put(bio);
+ }
}
void ext4_io_submit(struct ext4_io_submit *io)
@@ -283,149 +347,151 @@ void ext4_io_submit(struct ext4_io_submit *io)
BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP));
bio_put(io->io_bio);
}
- io->io_bio = 0;
- io->io_op = 0;
- io->io_end = 0;
+ io->io_bio = NULL;
}
-static int io_submit_init(struct ext4_io_submit *io,
- struct inode *inode,
- struct writeback_control *wbc,
- struct buffer_head *bh)
+void ext4_io_submit_init(struct ext4_io_submit *io,
+ struct writeback_control *wbc)
+{
+ io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE);
+ io->io_bio = NULL;
+ io->io_end = NULL;
+}
+
+static int io_submit_init_bio(struct ext4_io_submit *io,
+ struct buffer_head *bh)
{
- ext4_io_end_t *io_end;
- struct page *page = bh->b_page;
int nvecs = bio_get_nr_vecs(bh->b_bdev);
struct bio *bio;
- io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end)
+ bio = bio_alloc(GFP_NOIO, min(nvecs, BIO_MAX_PAGES));
+ if (!bio)
return -ENOMEM;
- do {
- bio = bio_alloc(GFP_NOIO, nvecs);
- nvecs >>= 1;
- } while (bio == NULL);
-
- bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
- bio->bi_private = io->io_end = io_end;
bio->bi_end_io = ext4_end_bio;
-
- io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
-
+ bio->bi_private = ext4_get_io_end(io->io_end);
io->io_bio = bio;
- io->io_op = (wbc->sync_mode == WB_SYNC_ALL ?
- WRITE_SYNC_PLUG : WRITE);
io->io_next_block = bh->b_blocknr;
return 0;
}
static int io_submit_add_bh(struct ext4_io_submit *io,
- struct ext4_io_page *io_page,
struct inode *inode,
- struct writeback_control *wbc,
struct buffer_head *bh)
{
- ext4_io_end_t *io_end;
int ret;
- if (buffer_new(bh)) {
- clear_buffer_new(bh);
- unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
- }
-
- if (!buffer_mapped(bh) || buffer_delay(bh)) {
- if (!buffer_mapped(bh))
- clear_buffer_dirty(bh);
- if (io->io_bio)
- ext4_io_submit(io);
- return 0;
- }
-
if (io->io_bio && bh->b_blocknr != io->io_next_block) {
submit_and_retry:
ext4_io_submit(io);
}
if (io->io_bio == NULL) {
- ret = io_submit_init(io, inode, wbc, bh);
+ ret = io_submit_init_bio(io, bh);
if (ret)
return ret;
}
- io_end = io->io_end;
- if ((io_end->num_io_pages >= MAX_IO_PAGES) &&
- (io_end->pages[io_end->num_io_pages-1] != io_page))
- goto submit_and_retry;
- if (buffer_uninit(bh))
- io->io_end->flag |= EXT4_IO_END_UNWRITTEN;
- io->io_end->size += bh->b_size;
- io->io_next_block++;
ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
if (ret != bh->b_size)
goto submit_and_retry;
- if ((io_end->num_io_pages == 0) ||
- (io_end->pages[io_end->num_io_pages-1] != io_page)) {
- io_end->pages[io_end->num_io_pages++] = io_page;
- atomic_inc(&io_page->p_count);
- }
+ io->io_next_block++;
return 0;
}
int ext4_bio_write_page(struct ext4_io_submit *io,
struct page *page,
int len,
- struct writeback_control *wbc)
+ struct writeback_control *wbc,
+ bool keep_towrite)
{
struct inode *inode = page->mapping->host;
- unsigned block_start, block_end, blocksize;
- struct ext4_io_page *io_page;
+ unsigned block_start, blocksize;
struct buffer_head *bh, *head;
int ret = 0;
+ int nr_submitted = 0;
blocksize = 1 << inode->i_blkbits;
+ BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
- set_page_writeback(page);
+
+ if (keep_towrite)
+ set_page_writeback_keepwrite(page);
+ else
+ set_page_writeback(page);
ClearPageError(page);
- io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS);
- if (!io_page) {
- set_page_dirty(page);
- unlock_page(page);
- return -ENOMEM;
- }
- io_page->p_page = page;
- atomic_set(&io_page->p_count, 1);
- get_page(page);
-
- for (bh = head = page_buffers(page), block_start = 0;
- bh != head || !block_start;
- block_start = block_end, bh = bh->b_this_page) {
- block_end = block_start + blocksize;
+ /*
+ * Comments copied from block_write_full_page:
+ *
+ * The page straddles i_size. It must be zeroed out on each and every
+ * writepage invocation because it may be mmapped. "A file is mapped
+ * in multiples of the page size. For a file that is not a multiple of
+ * the page size, the remaining memory is zeroed when mapped, and
+ * writes to that region are not written out to the file."
+ */
+ if (len < PAGE_CACHE_SIZE)
+ zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ /*
+ * In the first loop we prepare and mark buffers to submit. We have to
+ * mark all buffers in the page before submitting so that
+ * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
+ * on the first buffer finishes and we are still working on submitting
+ * the second buffer.
+ */
+ bh = head = page_buffers(page);
+ do {
+ block_start = bh_offset(bh);
if (block_start >= len) {
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
continue;
}
- ret = io_submit_add_bh(io, io_page, inode, wbc, bh);
+ if (!buffer_dirty(bh) || buffer_delay(bh) ||
+ !buffer_mapped(bh) || buffer_unwritten(bh)) {
+ /* A hole? We can safely clear the dirty bit */
+ if (!buffer_mapped(bh))
+ clear_buffer_dirty(bh);
+ if (io->io_bio)
+ ext4_io_submit(io);
+ continue;
+ }
+ if (buffer_new(bh)) {
+ clear_buffer_new(bh);
+ unmap_underlying_metadata(bh->b_bdev, bh->b_blocknr);
+ }
+ set_buffer_async_write(bh);
+ } while ((bh = bh->b_this_page) != head);
+
+ /* Now submit buffers to write */
+ bh = head = page_buffers(page);
+ do {
+ if (!buffer_async_write(bh))
+ continue;
+ ret = io_submit_add_bh(io, inode, bh);
if (ret) {
/*
* We only get here on ENOMEM. Not much else
* we can do but mark the page as dirty, and
* better luck next time.
*/
- set_page_dirty(page);
+ redirty_page_for_writepage(wbc, page);
break;
}
+ nr_submitted++;
+ clear_buffer_dirty(bh);
+ } while ((bh = bh->b_this_page) != head);
+
+ /* Error stopped previous loop? Clean up buffers... */
+ if (ret) {
+ do {
+ clear_buffer_async_write(bh);
+ bh = bh->b_this_page;
+ } while (bh != head);
}
unlock_page(page);
- /*
- * If the page was truncated before we could do the writeback,
- * or we had a memory allocation error while trying to write
- * the first buffer head, we won't have submitted any pages for
- * I/O. In that case we need to make sure we've cleared the
- * PageWriteback bit from the page to prevent the system from
- * wedging later on.
- */
- put_io_page(io_page);
+ /* Nothing submitted - we have to end page writeback */
+ if (!nr_submitted)
+ end_page_writeback(page);
return ret;
}