From f4a8e6563ea5366f563cb741a27fe90c5fa7f0fc Mon Sep 17 00:00:00 2001 From: Jan Schmidt Date: Thu, 1 Dec 2011 09:30:36 -0500 Subject: Btrfs: fix meta data raid-repair merge problem Commit 4a54c8c16 introduced raid-repair, killing the individual readpage_io_failed_hook entries from inode.c and disk-io.c. Commit 4bb31e92 introduced new readahead code, adding a readpage_io_failed_hook to disk-io.c. The raid-repair commit had logic to disable raid-repair, if readpage_io_failed_hook is set. Thus, the readahead commit effectively disabled raid-repair for meta data. This commit changes the logic to always attempt raid-repair when needed and call the readpage_io_failed_hook in case raid-repair fails. This is much more straight forward and should have been like that from the beginning. Signed-off-by: Jan Schmidt Reported-by: Stefan Behrens Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9472d3de5e5..be1bf627a14 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2287,14 +2287,20 @@ static void end_bio_extent_readpage(struct bio *bio, int err) if (!uptodate) { int failed_mirror; failed_mirror = (int)(unsigned long)bio->bi_bdev; - if (tree->ops && tree->ops->readpage_io_failed_hook) - ret = tree->ops->readpage_io_failed_hook( - bio, page, start, end, - failed_mirror, state); - else - ret = bio_readpage_error(bio, page, start, end, - failed_mirror, NULL); + /* + * The generic bio_readpage_error handles errors the + * following way: If possible, new read requests are + * created and submitted and will end up in + * end_bio_extent_readpage as well (if we're lucky, not + * in the !uptodate case). In that case it returns 0 and + * we just go on with the next page in our bio. If it + * can't handle the error it will return -EIO and we + * remain responsible for that page. + */ + ret = bio_readpage_error(bio, page, start, end, + failed_mirror, NULL); if (ret == 0) { +error_handled: uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); if (err) @@ -2302,6 +2308,13 @@ static void end_bio_extent_readpage(struct bio *bio, int err) uncache_state(&cached); continue; } + if (tree->ops && tree->ops->readpage_io_failed_hook) { + ret = tree->ops->readpage_io_failed_hook( + bio, page, start, end, + failed_mirror, state); + if (ret == 0) + goto error_handled; + } } if (uptodate) { -- cgit v1.2.3-18-g5258 From 1cf4ffdb3289624a6462c94f2ce05545b32ef736 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Wed, 7 Dec 2011 20:08:40 -0500 Subject: Btrfs: drop spin lock when memory alloc fails Drop spin lock in convert_extent_bit() when memory alloc fails, otherwise, it will be a deadlock. Signed-off-by: Liu Bo Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index be1bf627a14..49f3c9dc09f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -935,8 +935,10 @@ again: node = tree_search(tree, start); if (!node) { prealloc = alloc_extent_state_atomic(prealloc); - if (!prealloc) - return -ENOMEM; + if (!prealloc) { + err = -ENOMEM; + goto out; + } err = insert_state(tree, prealloc, start, end, &bits); prealloc = NULL; BUG_ON(err == -EEXIST); @@ -992,8 +994,10 @@ hit_next: */ if (state->start < start) { prealloc = alloc_extent_state_atomic(prealloc); - if (!prealloc) - return -ENOMEM; + if (!prealloc) { + err = -ENOMEM; + goto out; + } err = split_state(tree, state, prealloc, start); BUG_ON(err == -EEXIST); prealloc = NULL; @@ -1024,8 +1028,10 @@ hit_next: this_end = last_start - 1; prealloc = alloc_extent_state_atomic(prealloc); - if (!prealloc) - return -ENOMEM; + if (!prealloc) { + err = -ENOMEM; + goto out; + } /* * Avoid to free 'prealloc' if it can be merged with @@ -1051,8 +1057,10 @@ hit_next: */ if (state->start <= end && state->end > end) { prealloc = alloc_extent_state_atomic(prealloc); - if (!prealloc) - return -ENOMEM; + if (!prealloc) { + err = -ENOMEM; + goto out; + } err = split_state(tree, state, prealloc, end + 1); BUG_ON(err == -EEXIST); -- cgit v1.2.3-18-g5258 From 21adbd5cbb5344a3fca6bb7ddb2ab6cb03c44546 Mon Sep 17 00:00:00 2001 From: Stefan Behrens Date: Wed, 9 Nov 2011 13:44:05 +0100 Subject: Btrfs: integrate integrity check module into btrfs This is the last part of the patch series. It modifies the btrfs code to use the integrity check module if configured to do so with the define BTRFS_FS_CHECK_INTEGRITY. If this define is not set, the only effective change is that code is added that handles the mount option to activate the integrity check. If the mount option is set and the define BTRFS_FS_CHECK_INTEGRITY is not set, that code complains in the log and the mount fails with EINVAL. Add the mount option to activate the usage of the integrity check code. Add invocation of btrfs integrity check code init and cleanup function on mount and umount, respectively. Add hook to call btrfs integrity check code version of submit_bh/submit_bio. Signed-off-by: Stefan Behrens --- fs/btrfs/extent_io.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 49f3c9dc09f..246669296e0 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -18,6 +18,7 @@ #include "ctree.h" #include "btrfs_inode.h" #include "volumes.h" +#include "check-integrity.h" static struct kmem_cache *extent_state_cache; static struct kmem_cache *extent_buffer_cache; @@ -1895,7 +1896,7 @@ int repair_io_failure(struct btrfs_mapping_tree *map_tree, u64 start, } bio->bi_bdev = dev->bdev; bio_add_page(bio, page, length, start-page_offset(page)); - submit_bio(WRITE_SYNC, bio); + btrfsic_submit_bio(WRITE_SYNC, bio); wait_for_completion(&compl); if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { @@ -2393,7 +2394,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, ret = tree->ops->submit_bio_hook(page->mapping->host, rw, bio, mirror_num, bio_flags, start); else - submit_bio(rw, bio); + btrfsic_submit_bio(rw, bio); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; -- cgit v1.2.3-18-g5258 From 5b25f70f4200766355cdabda604e131d2fb6010d Mon Sep 17 00:00:00 2001 From: Arne Jansen Date: Tue, 13 Sep 2011 10:55:48 +0200 Subject: Btrfs: add nested locking mode for paths This patch adds the possibilty to read-lock an extent even if it is already write-locked from the same thread. btrfs_find_all_roots() needs this capability. Signed-off-by: Arne Jansen Signed-off-by: Jan Schmidt --- fs/btrfs/extent_io.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index be1bf627a14..dd8d140eb27 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3571,6 +3571,7 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, atomic_set(&eb->blocking_writers, 0); atomic_set(&eb->spinning_readers, 0); atomic_set(&eb->spinning_writers, 0); + eb->lock_nested = 0; init_waitqueue_head(&eb->write_lock_wq); init_waitqueue_head(&eb->read_lock_wq); -- cgit v1.2.3-18-g5258 From 8bedd51b6121c4607784d75f852828d25d119c52 Mon Sep 17 00:00:00 2001 From: Mitch Harder Date: Thu, 26 Jan 2012 15:01:11 -0500 Subject: Btrfs: Check for NULL page in extent_range_uptodate A user has encountered a NULL pointer kernel oops in btrfs when encountering media errors. The problem has been identified as an unhandled NULL pointer returned from find_get_page(). This modification simply checks for a NULL page, and returns with an error if found (the extent_range_uptodate() function returns 1 on errors). After testing this patch, the user reported that the error with the NULL pointer oops was solved. However, there is still a remaining problem with a thread becoming stuck in wait_on_page_locked(page) in the read_extent_buffer_pages(...) function in extent_io.c for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); wait_on_page_locked(page); if (!PageUptodate(page)) ret = -EIO; } This patch leaves the issue with the locked page yet to be resolved. Signed-off-by: Mitch Harder Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 9d09a4f8187..fcf77e1ded4 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3909,6 +3909,8 @@ int extent_range_uptodate(struct extent_io_tree *tree, while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); + if (!page) + return 1; uptodate = PageUptodate(page); page_cache_release(page); if (!uptodate) { -- cgit v1.2.3-18-g5258 From 87826df0ec36fc28884b4ddbb3f3af41c4c2008f Mon Sep 17 00:00:00 2001 From: Jeff Mahoney Date: Wed, 15 Feb 2012 16:23:57 +0100 Subject: btrfs: delalloc for page dirtied out-of-band in fixup worker We encountered an issue that was easily observable on s/390 systems but could really happen anywhere. The timing just seemed to hit reliably on s/390 with limited memory. The gist is that when an unexpected set_page_dirty() happened, we'd run into the BUG() in btrfs_writepage_fixup_worker since it wasn't properly set up for delalloc. This patch does the following: - Performs the missing delalloc in the fixup worker - Allow the start hook to return -EBUSY which informs __extent_writepage that it should mark the page skipped and not to redirty it. This is required since the fixup worker can fail with -ENOSPC and the page will have already been redirtied. That causes an Oops in drop_outstanding_extents later. Retrying the fixup worker could lead to an infinite loop. Deferring the page redirty also saves us some cycles since the page would be stuck in a resubmit-redirty loop until the fixup worker completes. It's not harmful, just wasteful. - If the fixup worker fails, we mark the page and mapping as errored, and end the writeback, similar to what we would do had the page actually been submitted to writeback. Signed-off-by: Jeff Mahoney --- fs/btrfs/extent_io.c | 65 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 25 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fcf77e1ded4..89ba79fb945 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2161,6 +2161,38 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, /* lots and lots of room for performance fixes in the end_bio funcs */ +int end_extent_writepage(struct page *page, int err, u64 start, u64 end) +{ + int uptodate = (err == 0); + struct extent_io_tree *tree; + int ret; + + tree = &BTRFS_I(page->mapping->host)->io_tree; + + if (tree->ops && tree->ops->writepage_end_io_hook) { + ret = tree->ops->writepage_end_io_hook(page, start, + end, NULL, uptodate); + if (ret) + uptodate = 0; + } + + if (!uptodate && tree->ops && + tree->ops->writepage_io_failed_hook) { + ret = tree->ops->writepage_io_failed_hook(NULL, page, + start, end, NULL); + /* Writeback already completed */ + if (ret == 0) + return 1; + } + + if (!uptodate) { + clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); + ClearPageUptodate(page); + SetPageError(page); + } + return 0; +} + /* * after a writepage IO is done, we need to: * clear the uptodate bits on error @@ -2172,13 +2204,11 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, */ static void end_bio_extent_writepage(struct bio *bio, int err) { - int uptodate = err == 0; struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1; struct extent_io_tree *tree; u64 start; u64 end; int whole_page; - int ret; do { struct page *page = bvec->bv_page; @@ -2195,28 +2225,9 @@ static void end_bio_extent_writepage(struct bio *bio, int err) if (--bvec >= bio->bi_io_vec) prefetchw(&bvec->bv_page->flags); - if (tree->ops && tree->ops->writepage_end_io_hook) { - ret = tree->ops->writepage_end_io_hook(page, start, - end, NULL, uptodate); - if (ret) - uptodate = 0; - } - - if (!uptodate && tree->ops && - tree->ops->writepage_io_failed_hook) { - ret = tree->ops->writepage_io_failed_hook(bio, page, - start, end, NULL); - if (ret == 0) { - uptodate = (err == 0); - continue; - } - } - if (!uptodate) { - clear_extent_uptodate(tree, start, end, NULL, GFP_NOFS); - ClearPageUptodate(page); - SetPageError(page); - } + if (end_extent_writepage(page, err, start, end)) + continue; if (whole_page) end_page_writeback(page); @@ -2818,8 +2829,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, if (tree->ops && tree->ops->writepage_start_hook) { ret = tree->ops->writepage_start_hook(page, start, page_end); - if (ret == -EAGAIN) { - redirty_page_for_writepage(wbc, page); + if (ret) { + /* Fixup worker will requeue */ + if (ret == -EBUSY) + wbc->pages_skipped++; + else + redirty_page_for_writepage(wbc, page); update_nr_written(page, wbc, nr_written); unlock_page(page); ret = 0; -- cgit v1.2.3-18-g5258 From 013bd4c336ad0d30e9e41f9cff0dbc1858934e75 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 16 Feb 2012 10:11:40 +0900 Subject: Btrfs: fix return value check of extent_io_ops This patch adds the check on the return value of extent_io_ops. Signed-off-by: Tsutomu Itoh --- fs/btrfs/extent_io.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 89ba79fb945..b05d35a7c0f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2154,9 +2154,10 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, "this_mirror=%d, num_copies=%d, in_validation=%d\n", read_mode, failrec->this_mirror, num_copies, failrec->in_validation); - tree->ops->submit_bio_hook(inode, read_mode, bio, failrec->this_mirror, - failrec->bio_flags, 0); - return 0; + ret = tree->ops->submit_bio_hook(inode, read_mode, bio, + failrec->this_mirror, + failrec->bio_flags, 0); + return ret; } /* lots and lots of room for performance fixes in the end_bio funcs */ @@ -2790,9 +2791,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc, delalloc_start = delalloc_end + 1; continue; } - tree->ops->fill_delalloc(inode, page, delalloc_start, - delalloc_end, &page_started, - &nr_written); + ret = tree->ops->fill_delalloc(inode, page, + delalloc_start, + delalloc_end, + &page_started, + &nr_written); + BUG_ON(ret); /* * delalloc_end is already one less than the total * length, so we don't subtract one from -- cgit v1.2.3-18-g5258 From 285190d99fef696ec8b0041787387f013cb71d67 Mon Sep 17 00:00:00 2001 From: Tsutomu Itoh Date: Thu, 16 Feb 2012 16:23:58 +0900 Subject: Btrfs: check return value of lookup_extent_mapping() correctly This patch corrects error checking of lookup_extent_mapping(). Signed-off-by: Tsutomu Itoh --- fs/btrfs/extent_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b05d35a7c0f..8d6f55fbd28 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3308,7 +3308,7 @@ int try_release_extent_mapping(struct extent_map_tree *map, len = end - start + 1; write_lock(&map->lock); em = lookup_extent_mapping(map, start, len); - if (IS_ERR_OR_NULL(em)) { + if (!em) { write_unlock(&map->lock); break; } -- cgit v1.2.3-18-g5258 From 0449314a9cc5a7fb0bd42e2175a3c46f68f3a2b0 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 16 Feb 2012 18:34:37 +0800 Subject: Btrfs: skip states when they does not contain bits to clear Clearing a range's bits is different with setting them, since we don't need to touch them when states do not contain bits we want. Signed-off-by: Liu Bo --- fs/btrfs/extent_io.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8d6f55fbd28..fe14285b53f 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -513,6 +513,15 @@ hit_next: WARN_ON(state->end < start); last_end = state->end; + if (state->end < end && !need_resched()) + next_node = rb_next(&state->rb_node); + else + next_node = NULL; + + /* the state doesn't have the wanted bits, go ahead */ + if (!(state->state & bits)) + goto next; + /* * | ---- desired range ---- | * | state | or @@ -565,12 +574,8 @@ hit_next: goto out; } - if (state->end < end && prealloc && !need_resched()) - next_node = rb_next(&state->rb_node); - else - next_node = NULL; - set |= clear_state_bit(tree, state, &bits, wake); +next: if (last_end == (u64)-1) goto out; start = last_end + 1; -- cgit v1.2.3-18-g5258 From 9d47c7671dc555e198c7347a173ed37316e0c4c1 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 16 Feb 2012 18:34:38 +0800 Subject: Btrfs: kick out redundant stuff in convert_extent_bit clear_state_bit will do merge_state for us, so kick out the redundant one. Signed-off-by: Liu Bo --- fs/btrfs/extent_io.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index fe14285b53f..37259ff5cd7 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -966,8 +966,6 @@ hit_next: set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - - merge_state(tree, state); if (last_end == (u64)-1) goto out; @@ -1012,7 +1010,6 @@ hit_next: if (state->end <= end) { set_state_bits(tree, state, &bits); clear_state_bit(tree, state, &clear_bits, 0); - merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; @@ -1073,8 +1070,6 @@ hit_next: set_state_bits(tree, prealloc, &bits); clear_state_bit(tree, prealloc, &clear_bits, 0); - - merge_state(tree, prealloc); prealloc = NULL; goto out; } -- cgit v1.2.3-18-g5258 From 692e5759a43b916f0b66bcb39b2957499992381e Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Thu, 16 Feb 2012 18:34:36 +0800 Subject: Btrfs: be less strict on finding next node in clear_extent_bit In clear_extent_bit, it is enough that next node is adjacent in tree level. Signed-off-by: Liu Bo --- fs/btrfs/extent_io.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 37259ff5cd7..45ca8f9b0d6 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -582,8 +582,7 @@ next: if (start <= end && next_node) { state = rb_entry(next_node, struct extent_state, rb_node); - if (state->start == start) - goto hit_next; + goto hit_next; } goto search_again; -- cgit v1.2.3-18-g5258 From 506531905296d6aee84480c879b25ea98c3f9db6 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Wed, 22 Feb 2012 12:36:24 -0500 Subject: Btrfs: clear the extent uptodate bits during parent transid failures If btrfs reads a block and finds a parent transid mismatch, it clears the uptodate flags on the extent buffer, and the pages inside it. But we only clear the uptodate bits in the state tree if the block straddles more than one page. This is from an old optimization from to reduce contention on the extent state tree. But it is buggy because the code that retries a read from a different copy of the block is going to find the uptodate state bits set and skip the IO. The end result of the bug is that we'll never actually read the good copy (if there is one). The fix here is to always clear the uptodate state bits, which is safe because this code is only called when the parent transid fails. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/extent_io.c') diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 45ca8f9b0d6..a55fbe6252d 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3871,10 +3871,9 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - if (eb_straddles_pages(eb)) { - clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - cached_state, GFP_NOFS); - } + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + cached_state, GFP_NOFS); + for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) -- cgit v1.2.3-18-g5258