aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNeilBrown <neilb@suse.de>2011-07-28 11:39:25 +1000
committerNeilBrown <neilb@suse.de>2011-07-28 11:39:25 +1000
commit58c54fcca3bac5bf9290cfed31c76e4c4bfbabaf (patch)
tree25f663873429468c3b582bc7544f983759b7592e
parent5e5702898e93eee7d69b6efde109609a89a61001 (diff)
md/raid10: handle further errors during fix_read_error better.
If we find more read/write errors we should record a bad block before failing the device. Signed-off-by: NeilBrown <neilb@suse.de>
-rw-r--r--drivers/md/raid10.c59
1 files changed, 44 insertions, 15 deletions
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index fc9ebbab3f6..8b29cd4f01c 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1749,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev)
atomic_set(&rdev->read_errors, read_errors >> hours_since_last);
}
+static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector,
+ int sectors, struct page *page, int rw)
+{
+ sector_t first_bad;
+ int bad_sectors;
+
+ if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors)
+ && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags)))
+ return -1;
+ if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
+ /* success */
+ return 1;
+ if (rw == WRITE)
+ set_bit(WriteErrorSeen, &rdev->flags);
+ /* need to record an error - either for the block or the device */
+ if (!rdev_set_badblocks(rdev, sector, sectors, 0))
+ md_error(rdev->mddev, rdev);
+ return 0;
+}
+
/*
* This is a kernel thread which:
*
@@ -1832,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
rcu_read_unlock();
if (!success) {
- /* Cannot read from anywhere -- bye bye array */
+ /* Cannot read from anywhere, just mark the block
+ * as bad on the first device to discourage future
+ * reads.
+ */
int dn = r10_bio->devs[r10_bio->read_slot].devnum;
- md_error(mddev, conf->mirrors[dn].rdev);
+ rdev = conf->mirrors[dn].rdev;
+
+ if (!rdev_set_badblocks(
+ rdev,
+ r10_bio->devs[r10_bio->read_slot].addr
+ + sect,
+ s, 0))
+ md_error(mddev, rdev);
break;
}
@@ -1855,10 +1885,10 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage, WRITE, false)
+ if (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage, WRITE)
== 0) {
/* Well, this device is dead */
printk(KERN_NOTICE
@@ -1873,7 +1903,6 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));
- md_error(mddev, rdev);
}
rdev_dec_pending(rdev, mddev);
rcu_read_lock();
@@ -1893,11 +1922,12 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
atomic_inc(&rdev->nr_pending);
rcu_read_unlock();
- if (sync_page_io(rdev,
- r10_bio->devs[sl].addr +
- sect,
- s<<9, conf->tmppage,
- READ, false) == 0) {
+ switch (r10_sync_page_io(rdev,
+ r10_bio->devs[sl].addr +
+ sect,
+ s<<9, conf->tmppage,
+ READ)) {
+ case 0:
/* Well, this device is dead */
printk(KERN_NOTICE
"md/raid10:%s: unable to read back "
@@ -1911,9 +1941,8 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio)
"drive\n",
mdname(mddev),
bdevname(rdev->bdev, b));
-
- md_error(mddev, rdev);
- } else {
+ break;
+ case 1:
printk(KERN_INFO
"md/raid10:%s: read error corrected"
" (%d sectors at %llu on %s)\n",