aboutsummaryrefslogtreecommitdiff
path: root/fs/buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/buffer.c')
-rw-r--r--fs/buffer.c185
1 files changed, 133 insertions, 52 deletions
diff --git a/fs/buffer.c b/fs/buffer.c
index 7a75c3e0fd5..eba6e4f621c 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -41,6 +41,7 @@
#include <linux/bitops.h>
#include <linux/mpage.h>
#include <linux/bit_spinlock.h>
+#include <trace/events/block.h>
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list);
@@ -53,6 +54,13 @@ void init_buffer(struct buffer_head *bh, bh_end_io_t *handler, void *private)
}
EXPORT_SYMBOL(init_buffer);
+inline void touch_buffer(struct buffer_head *bh)
+{
+ trace_block_touch_buffer(bh);
+ mark_page_accessed(bh->b_page);
+}
+EXPORT_SYMBOL(touch_buffer);
+
static int sleep_on_buffer(void *word)
{
io_schedule();
@@ -69,12 +77,46 @@ EXPORT_SYMBOL(__lock_buffer);
void unlock_buffer(struct buffer_head *bh)
{
clear_bit_unlock(BH_Lock, &bh->b_state);
- smp_mb__after_clear_bit();
+ smp_mb__after_atomic();
wake_up_bit(&bh->b_state, BH_Lock);
}
EXPORT_SYMBOL(unlock_buffer);
/*
+ * Returns if the page has dirty or writeback buffers. If all the buffers
+ * are unlocked and clean then the PageDirty information is stale. If
+ * any of the pages are locked, it is assumed they are locked for IO.
+ */
+void buffer_check_dirty_writeback(struct page *page,
+ bool *dirty, bool *writeback)
+{
+ struct buffer_head *head, *bh;
+ *dirty = false;
+ *writeback = false;
+
+ BUG_ON(!PageLocked(page));
+
+ if (!page_has_buffers(page))
+ return;
+
+ if (PageWriteback(page))
+ *writeback = true;
+
+ head = page_buffers(page);
+ bh = head;
+ do {
+ if (buffer_locked(bh))
+ *writeback = true;
+
+ if (buffer_dirty(bh))
+ *dirty = true;
+
+ bh = bh->b_this_page;
+ } while (bh != head);
+}
+EXPORT_SYMBOL(buffer_check_dirty_writeback);
+
+/*
* Block until a buffer comes unlocked. This doesn't stop it
* from becoming locked again - you have to lock it yourself
* if you want to preserve its state.
@@ -185,7 +227,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
int all_mapped = 1;
index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
- page = find_get_page(bd_mapping, index);
+ page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
if (!page)
goto out;
@@ -612,14 +654,16 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
static void __set_page_dirty(struct page *page,
struct address_space *mapping, int warn)
{
- spin_lock_irq(&mapping->tree_lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
- spin_unlock_irq(&mapping->tree_lock);
+ spin_unlock_irqrestore(&mapping->tree_lock, flags);
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
@@ -857,8 +901,6 @@ try_again:
/* Link the buffer to its page */
set_bh_page(bh, page, offset);
-
- init_buffer(bh, NULL, NULL);
}
return head;
/*
@@ -965,9 +1007,19 @@ grow_dev_page(struct block_device *bdev, sector_t block,
struct buffer_head *bh;
sector_t end_block;
int ret = 0; /* Will call free_more_memory() */
+ gfp_t gfp_mask;
+
+ gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
+ gfp_mask |= __GFP_MOVABLE;
+ /*
+ * XXX: __getblk_slow() can not really deal with failure and
+ * will endlessly loop on improvised global reclaim. Prefer
+ * looping in the allocator rather than here, at least that
+ * code knows what it's doing.
+ */
+ gfp_mask |= __GFP_NOFAIL;
- page = find_or_create_page(inode->i_mapping, index,
- (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS)|__GFP_MOVABLE);
+ page = find_or_create_page(inode->i_mapping, index, gfp_mask);
if (!page)
return ret;
@@ -1113,6 +1165,8 @@ void mark_buffer_dirty(struct buffer_head *bh)
{
WARN_ON_ONCE(!buffer_uptodate(bh));
+ trace_block_dirty_buffer(bh);
+
/*
* Very *carefully* optimize the it-is-already-dirty case.
*
@@ -1260,7 +1314,7 @@ static void bh_lru_install(struct buffer_head *bh)
}
while (out < BH_LRU_SIZE)
bhs[out++] = NULL;
- memcpy(__this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
+ memcpy(this_cpu_ptr(&bh_lrus.bhs), bhs, sizeof(bhs));
}
bh_lru_unlock();
@@ -1312,12 +1366,13 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
struct buffer_head *bh = lookup_bh_lru(bdev, block, size);
if (bh == NULL) {
+ /* __find_get_block_slow will mark the page accessed */
bh = __find_get_block_slow(bdev, block);
if (bh)
bh_lru_install(bh);
- }
- if (bh)
+ } else
touch_buffer(bh);
+
return bh;
}
EXPORT_SYMBOL(__find_get_block);
@@ -1429,16 +1484,27 @@ EXPORT_SYMBOL(set_bh_page);
/*
* Called when truncating a buffer on a page completely.
*/
+
+/* Bits that are cleared during an invalidate */
+#define BUFFER_FLAGS_DISCARD \
+ (1 << BH_Mapped | 1 << BH_New | 1 << BH_Req | \
+ 1 << BH_Delay | 1 << BH_Unwritten)
+
static void discard_buffer(struct buffer_head * bh)
{
+ unsigned long b_state, b_state_old;
+
lock_buffer(bh);
clear_buffer_dirty(bh);
bh->b_bdev = NULL;
- clear_buffer_mapped(bh);
- clear_buffer_req(bh);
- clear_buffer_new(bh);
- clear_buffer_delay(bh);
- clear_buffer_unwritten(bh);
+ b_state = bh->b_state;
+ for (;;) {
+ b_state_old = cmpxchg(&bh->b_state, b_state,
+ (b_state & ~BUFFER_FLAGS_DISCARD));
+ if (b_state_old == b_state)
+ break;
+ b_state = b_state_old;
+ }
unlock_buffer(bh);
}
@@ -1446,7 +1512,8 @@ static void discard_buffer(struct buffer_head * bh)
* block_invalidatepage - invalidate part or all of a buffer-backed page
*
* @page: the page which is affected
- * @offset: the index of the truncation point
+ * @offset: start of the range to invalidate
+ * @length: length of the range to invalidate
*
* block_invalidatepage() is called when all or part of the page has become
* invalidated by a truncate operation.
@@ -1457,15 +1524,22 @@ static void discard_buffer(struct buffer_head * bh)
* point. Because the caller is about to free (and possibly reuse) those
* blocks on-disk.
*/
-void block_invalidatepage(struct page *page, unsigned long offset)
+void block_invalidatepage(struct page *page, unsigned int offset,
+ unsigned int length)
{
struct buffer_head *head, *bh, *next;
unsigned int curr_off = 0;
+ unsigned int stop = length + offset;
BUG_ON(!PageLocked(page));
if (!page_has_buffers(page))
goto out;
+ /*
+ * Check for overflow
+ */
+ BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+
head = page_buffers(page);
bh = head;
do {
@@ -1473,6 +1547,12 @@ void block_invalidatepage(struct page *page, unsigned long offset)
next = bh->b_this_page;
/*
+ * Are we still fully in range ?
+ */
+ if (next_off > stop)
+ goto out;
+
+ /*
* is this block fully invalidated?
*/
if (offset <= curr_off)
@@ -1493,6 +1573,7 @@ out:
}
EXPORT_SYMBOL(block_invalidatepage);
+
/*
* We attach and possibly dirty the buffers atomically wrt
* __set_page_dirty_buffers() via private_lock. try_to_free_buffers
@@ -2045,8 +2126,8 @@ EXPORT_SYMBOL(generic_write_end);
* Returns true if all buffers which correspond to a file portion
* we want to read are uptodate.
*/
-int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
- unsigned long from)
+int block_is_partially_uptodate(struct page *page, unsigned long from,
+ unsigned long count)
{
unsigned block_start, block_end, blocksize;
unsigned to;
@@ -2058,7 +2139,7 @@ int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc,
head = page_buffers(page);
blocksize = head->b_size;
- to = min_t(unsigned, PAGE_CACHE_SIZE - from, desc->count);
+ to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
to = from + to;
if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
return 0;
@@ -2332,7 +2413,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
{
struct page *page = vmf->page;
- struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
+ struct inode *inode = file_inode(vma->vm_file);
unsigned long end;
loff_t size;
int ret;
@@ -2359,7 +2440,7 @@ int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
if (unlikely(ret < 0))
goto out_unlock;
set_page_dirty(page);
- wait_on_page_writeback(page);
+ wait_for_stable_page(page);
return 0;
out_unlock:
unlock_page(page);
@@ -2371,7 +2452,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
get_block_t get_block)
{
int ret;
- struct super_block *sb = vma->vm_file->f_path.dentry->d_inode->i_sb;
+ struct super_block *sb = file_inode(vma->vm_file)->i_sb;
sb_start_pagefault(sb);
@@ -2810,10 +2891,9 @@ EXPORT_SYMBOL(block_truncate_page);
/*
* The generic ->writepage function for buffer-backed address_spaces
- * this form passes in the end_io handler used to finish the IO.
*/
-int block_write_full_page_endio(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc, bh_end_io_t *handler)
+int block_write_full_page(struct page *page, get_block_t *get_block,
+ struct writeback_control *wbc)
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
@@ -2823,7 +2903,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
/* Is the page fully inside i_size? */
if (page->index < end_index)
return __block_write_full_page(inode, page, get_block, wbc,
- handler);
+ end_buffer_async_write);
/* Is the page fully outside i_size? (truncate in progress) */
offset = i_size & (PAGE_CACHE_SIZE-1);
@@ -2833,7 +2913,7 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
* they may have been added in ext3_writepage(). Make them
* freeable here, so the page does not leak.
*/
- do_invalidatepage(page, 0);
+ do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
unlock_page(page);
return 0; /* don't care */
}
@@ -2846,18 +2926,8 @@ int block_write_full_page_endio(struct page *page, get_block_t *get_block,
* writes to that region are not written out to the file."
*/
zero_user_segment(page, offset, PAGE_CACHE_SIZE);
- return __block_write_full_page(inode, page, get_block, wbc, handler);
-}
-EXPORT_SYMBOL(block_write_full_page_endio);
-
-/*
- * The generic ->writepage function for buffer-backed address_spaces
- */
-int block_write_full_page(struct page *page, get_block_t *get_block,
- struct writeback_control *wbc)
-{
- return block_write_full_page_endio(page, get_block, wbc,
- end_buffer_async_write);
+ return __block_write_full_page(inode, page, get_block, wbc,
+ end_buffer_async_write);
}
EXPORT_SYMBOL(block_write_full_page);
@@ -2915,11 +2985,11 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
* let it through, and the IO layer will turn it into
* an EIO.
*/
- if (unlikely(bio->bi_sector >= maxsector))
+ if (unlikely(bio->bi_iter.bi_sector >= maxsector))
return;
- maxsector -= bio->bi_sector;
- bytes = bio->bi_size;
+ maxsector -= bio->bi_iter.bi_sector;
+ bytes = bio->bi_iter.bi_size;
if (likely((bytes >> 9) <= maxsector))
return;
@@ -2927,7 +2997,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
bytes = maxsector << 9;
/* Truncate the bio.. */
- bio->bi_size = bytes;
+ bio->bi_iter.bi_size = bytes;
bio->bi_io_vec[0].bv_len = bytes;
/* ..and clear the end of the buffer for reads */
@@ -2939,7 +3009,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
}
}
-int submit_bh(int rw, struct buffer_head * bh)
+int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
{
struct bio *bio;
int ret = 0;
@@ -2962,22 +3032,27 @@ int submit_bh(int rw, struct buffer_head * bh)
*/
bio = bio_alloc(GFP_NOIO, 1);
- bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9);
+ bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
bio->bi_bdev = bh->b_bdev;
bio->bi_io_vec[0].bv_page = bh->b_page;
bio->bi_io_vec[0].bv_len = bh->b_size;
bio->bi_io_vec[0].bv_offset = bh_offset(bh);
bio->bi_vcnt = 1;
- bio->bi_idx = 0;
- bio->bi_size = bh->b_size;
+ bio->bi_iter.bi_size = bh->b_size;
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
+ bio->bi_flags |= bio_flags;
/* Take care of bh's that straddle the end of the device */
guard_bh_eod(rw, bio, bh);
+ if (buffer_meta(bh))
+ rw |= REQ_META;
+ if (buffer_prio(bh))
+ rw |= REQ_PRIO;
+
bio_get(bio);
submit_bio(rw, bio);
@@ -2987,6 +3062,12 @@ int submit_bh(int rw, struct buffer_head * bh)
bio_put(bio);
return ret;
}
+EXPORT_SYMBOL_GPL(_submit_bh);
+
+int submit_bh(int rw, struct buffer_head *bh)
+{
+ return _submit_bh(rw, bh, 0);
+}
EXPORT_SYMBOL(submit_bh);
/**
@@ -3008,7 +3089,7 @@ EXPORT_SYMBOL(submit_bh);
* until the buffer gets unlocked).
*
* ll_rw_block sets b_end_io to simple completion handler that marks
- * the buffer up-to-date (if approriate), unlocks the buffer and wakes
+ * the buffer up-to-date (if appropriate), unlocks the buffer and wakes
* any waiters.
*
* All of the buffers must be for the same device, and must also be a
@@ -3227,7 +3308,7 @@ static struct kmem_cache *bh_cachep __read_mostly;
* Once the number of bh's in the machine exceeds this level, we start
* stripping them in writeback.
*/
-static int max_buffer_heads;
+static unsigned long max_buffer_heads;
int buffer_heads_over_limit;
@@ -3343,7 +3424,7 @@ EXPORT_SYMBOL(bh_submit_read);
void __init buffer_init(void)
{
- int nrpages;
+ unsigned long nrpages;
bh_cachep = kmem_cache_create("buffer_head",
sizeof(struct buffer_head), 0,