aboutsummaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c249
1 files changed, 73 insertions, 176 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index d54812b198e..3e7dca279d1 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -770,11 +770,12 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
spin_unlock(lock);
/*
* Ensure any pending I/O completes so that
- * ll_rw_block() actually writes the current
- * contents - it is a noop if I/O is still in
- * flight on potentially older contents.
+ * write_dirty_buffer() actually writes the
+ * current contents - it is a noop if I/O is
+ * still in flight on potentially older
+ * contents.
*/
- ll_rw_block(SWRITE_SYNC_PLUG, 1, &bh);
+ write_dirty_buffer(bh, WRITE_SYNC_PLUG);
/*
* Kick off IO for the previous mapping. Note
@@ -1833,9 +1834,10 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
}
EXPORT_SYMBOL(page_zero_new_buffers);
-static int __block_prepare_write(struct inode *inode, struct page *page,
- unsigned from, unsigned to, get_block_t *get_block)
+int block_prepare_write(struct page *page, unsigned from, unsigned to,
+ get_block_t *get_block)
{
+ struct inode *inode = page->mapping->host;
unsigned block_start, block_end;
sector_t block;
int err = 0;
@@ -1908,10 +1910,13 @@ static int __block_prepare_write(struct inode *inode, struct page *page,
if (!buffer_uptodate(*wait_bh))
err = -EIO;
}
- if (unlikely(err))
+ if (unlikely(err)) {
page_zero_new_buffers(page, from, to);
+ ClearPageUptodate(page);
+ }
return err;
}
+EXPORT_SYMBOL(block_prepare_write);
static int __block_commit_write(struct inode *inode, struct page *page,
unsigned from, unsigned to)
@@ -1948,90 +1953,41 @@ static int __block_commit_write(struct inode *inode, struct page *page,
return 0;
}
-/*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
- * The filesystem needs to handle block truncation upon failure.
- */
-int block_write_begin_newtrunc(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block)
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
+ get_block_t *get_block)
{
- struct inode *inode = mapping->host;
- int status = 0;
- struct page *page;
- pgoff_t index;
- unsigned start, end;
- int ownpage = 0;
+ unsigned start = pos & (PAGE_CACHE_SIZE - 1);
- index = pos >> PAGE_CACHE_SHIFT;
- start = pos & (PAGE_CACHE_SIZE - 1);
- end = start + len;
-
- page = *pagep;
- if (page == NULL) {
- ownpage = 1;
- page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page) {
- status = -ENOMEM;
- goto out;
- }
- *pagep = page;
- } else
- BUG_ON(!PageLocked(page));
-
- status = __block_prepare_write(inode, page, start, end, get_block);
- if (unlikely(status)) {
- ClearPageUptodate(page);
-
- if (ownpage) {
- unlock_page(page);
- page_cache_release(page);
- *pagep = NULL;
- }
- }
-
-out:
- return status;
+ return block_prepare_write(page, start, start + len, get_block);
}
-EXPORT_SYMBOL(block_write_begin_newtrunc);
+EXPORT_SYMBOL(__block_write_begin);
/*
* block_write_begin takes care of the basic task of block allocation and
* bringing partial write blocks uptodate first.
*
- * If *pagep is not NULL, then block_write_begin uses the locked page
- * at *pagep rather than allocating its own. In this case, the page will
- * not be unlocked or deallocated on failure.
+ * The filesystem needs to handle block truncation upon failure.
*/
-int block_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block)
+int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
+ unsigned flags, struct page **pagep, get_block_t *get_block)
{
- int ret;
+ pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ struct page *page;
+ int status;
- ret = block_write_begin_newtrunc(file, mapping, pos, len, flags,
- pagep, fsdata, get_block);
+ page = grab_cache_page_write_begin(mapping, index, flags);
+ if (!page)
+ return -ENOMEM;
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- *
- * Filesystems which pass down their own page also cannot
- * call into vmtruncate here because it would lead to lock
- * inversion problems (*pagep is locked). This is a further
- * example of where the old truncate sequence is inadequate.
- */
- if (unlikely(ret) && *pagep == NULL) {
- loff_t isize = mapping->host->i_size;
- if (pos + len > isize)
- vmtruncate(mapping->host, isize);
+ status = __block_write_begin(page, pos, len, get_block);
+ if (unlikely(status)) {
+ unlock_page(page);
+ page_cache_release(page);
+ page = NULL;
}
- return ret;
+ *pagep = page;
+ return status;
}
EXPORT_SYMBOL(block_write_begin);
@@ -2351,7 +2307,7 @@ out:
* For moronic filesystems that do not allow holes in file.
* We may have to extend the file.
*/
-int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int cont_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block, loff_t *bytes)
@@ -2363,7 +2319,7 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
err = cont_expand_zero(file, mapping, pos, bytes);
if (err)
- goto out;
+ return err;
zerofrom = *bytes & ~PAGE_CACHE_MASK;
if (pos+len > *bytes && zerofrom & (blocksize-1)) {
@@ -2371,44 +2327,10 @@ int cont_write_begin_newtrunc(struct file *file, struct address_space *mapping,
(*bytes)++;
}
- *pagep = NULL;
- err = block_write_begin_newtrunc(file, mapping, pos, len,
- flags, pagep, fsdata, get_block);
-out:
- return err;
-}
-EXPORT_SYMBOL(cont_write_begin_newtrunc);
-
-int cont_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block, loff_t *bytes)
-{
- int ret;
-
- ret = cont_write_begin_newtrunc(file, mapping, pos, len, flags,
- pagep, fsdata, get_block, bytes);
- if (unlikely(ret)) {
- loff_t isize = mapping->host->i_size;
- if (pos + len > isize)
- vmtruncate(mapping->host, isize);
- }
-
- return ret;
+ return block_write_begin(mapping, pos, len, flags, pagep, get_block);
}
EXPORT_SYMBOL(cont_write_begin);
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
- get_block_t *get_block)
-{
- struct inode *inode = page->mapping->host;
- int err = __block_prepare_write(inode, page, from, to, get_block);
- if (err)
- ClearPageUptodate(page);
- return err;
-}
-EXPORT_SYMBOL(block_prepare_write);
-
int block_commit_write(struct page *page, unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
@@ -2510,11 +2432,11 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head)
}
/*
- * Filesystems implementing the new truncate sequence should use the
- * _newtrunc postfix variant which won't incorrectly call vmtruncate.
+ * On entry, the page is fully not uptodate.
+ * On exit the page is fully uptodate in the areas outside (from,to)
* The filesystem needs to handle block truncation upon failure.
*/
-int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
+int nobh_write_begin(struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata,
get_block_t *get_block)
@@ -2547,8 +2469,8 @@ int nobh_write_begin_newtrunc(struct file *file, struct address_space *mapping,
unlock_page(page);
page_cache_release(page);
*pagep = NULL;
- return block_write_begin_newtrunc(file, mapping, pos, len,
- flags, pagep, fsdata, get_block);
+ return block_write_begin(mapping, pos, len, flags, pagep,
+ get_block);
}
if (PageMappedToDisk(page))
@@ -2654,35 +2576,6 @@ out_release:
return ret;
}
-EXPORT_SYMBOL(nobh_write_begin_newtrunc);
-
-/*
- * On entry, the page is fully not uptodate.
- * On exit the page is fully uptodate in the areas outside (from,to)
- */
-int nobh_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata,
- get_block_t *get_block)
-{
- int ret;
-
- ret = nobh_write_begin_newtrunc(file, mapping, pos, len, flags,
- pagep, fsdata, get_block);
-
- /*
- * prepare_write() may have instantiated a few blocks
- * outside i_size. Trim these off again. Don't need
- * i_size_read because we hold i_mutex.
- */
- if (unlikely(ret)) {
- loff_t isize = mapping->host->i_size;
- if (pos + len > isize)
- vmtruncate(mapping->host, isize);
- }
-
- return ret;
-}
EXPORT_SYMBOL(nobh_write_begin);
int nobh_write_end(struct file *file, struct address_space *mapping,
@@ -3020,13 +2913,6 @@ int submit_bh(int rw, struct buffer_head * bh)
BUG_ON(buffer_unwritten(bh));
/*
- * Mask in barrier bit for a write (could be either a WRITE or a
- * WRITE_SYNC
- */
- if (buffer_ordered(bh) && (rw & WRITE))
- rw |= WRITE_BARRIER;
-
- /*
* Only clear out a write error when rewriting
*/
if (test_set_buffer_req(bh) && (rw & WRITE))
@@ -3064,22 +2950,21 @@ EXPORT_SYMBOL(submit_bh);
/**
* ll_rw_block: low-level access to block devices (DEPRECATED)
- * @rw: whether to %READ or %WRITE or %SWRITE or maybe %READA (readahead)
+ * @rw: whether to %READ or %WRITE or maybe %READA (readahead)
* @nr: number of &struct buffer_heads in the array
* @bhs: array of pointers to &struct buffer_head
*
* ll_rw_block() takes an array of pointers to &struct buffer_heads, and
* requests an I/O operation on them, either a %READ or a %WRITE. The third
- * %SWRITE is like %WRITE only we make sure that the *current* data in buffers
- * are sent to disk. The fourth %READA option is described in the documentation
- * for generic_make_request() which ll_rw_block() calls.
+ * %READA option is described in the documentation for generic_make_request()
+ * which ll_rw_block() calls.
*
* This function drops any buffer that it cannot get a lock on (with the
- * BH_Lock state bit) unless SWRITE is required, any buffer that appears to be
- * clean when doing a write request, and any buffer that appears to be
- * up-to-date when doing read request. Further it marks as clean buffers that
- * are processed for writing (the buffer cache won't assume that they are
- * actually clean until the buffer gets unlocked).
+ * BH_Lock state bit), any buffer that appears to be clean when doing a write
+ * request, and any buffer that appears to be up-to-date when doing read
+ * request. Further it marks as clean buffers that are processed for
+ * writing (the buffer cache won't assume that they are actually clean
+ * until the buffer gets unlocked).
*
* ll_rw_block sets b_end_io to simple completion handler that marks
* the buffer up-to-date (if approriate), unlocks the buffer and wakes
@@ -3095,20 +2980,13 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
for (i = 0; i < nr; i++) {
struct buffer_head *bh = bhs[i];
- if (rw == SWRITE || rw == SWRITE_SYNC || rw == SWRITE_SYNC_PLUG)
- lock_buffer(bh);
- else if (!trylock_buffer(bh))
+ if (!trylock_buffer(bh))
continue;
-
- if (rw == WRITE || rw == SWRITE || rw == SWRITE_SYNC ||
- rw == SWRITE_SYNC_PLUG) {
+ if (rw == WRITE) {
if (test_clear_buffer_dirty(bh)) {
bh->b_end_io = end_buffer_write_sync;
get_bh(bh);
- if (rw == SWRITE_SYNC)
- submit_bh(WRITE_SYNC, bh);
- else
- submit_bh(WRITE, bh);
+ submit_bh(WRITE, bh);
continue;
}
} else {
@@ -3124,12 +3002,25 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
}
EXPORT_SYMBOL(ll_rw_block);
+void write_dirty_buffer(struct buffer_head *bh, int rw)
+{
+ lock_buffer(bh);
+ if (!test_clear_buffer_dirty(bh)) {
+ unlock_buffer(bh);
+ return;
+ }
+ bh->b_end_io = end_buffer_write_sync;
+ get_bh(bh);
+ submit_bh(rw, bh);
+}
+EXPORT_SYMBOL(write_dirty_buffer);
+
/*
* For a data-integrity writeout, we need to wait upon any in-progress I/O
* and then start new I/O and then wait upon it. The caller must have a ref on
* the buffer_head.
*/
-int sync_dirty_buffer(struct buffer_head *bh)
+int __sync_dirty_buffer(struct buffer_head *bh, int rw)
{
int ret = 0;
@@ -3138,7 +3029,7 @@ int sync_dirty_buffer(struct buffer_head *bh)
if (test_clear_buffer_dirty(bh)) {
get_bh(bh);
bh->b_end_io = end_buffer_write_sync;
- ret = submit_bh(WRITE_SYNC, bh);
+ ret = submit_bh(rw, bh);
wait_on_buffer(bh);
if (buffer_eopnotsupp(bh)) {
clear_buffer_eopnotsupp(bh);
@@ -3151,6 +3042,12 @@ int sync_dirty_buffer(struct buffer_head *bh)
}
return ret;
}
+EXPORT_SYMBOL(__sync_dirty_buffer);
+
+int sync_dirty_buffer(struct buffer_head *bh)
+{
+ return __sync_dirty_buffer(bh, WRITE_SYNC);
+}
EXPORT_SYMBOL(sync_dirty_buffer);
/*