aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c185
1 files changed, 141 insertions, 44 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index ede2461e79c..a368db2431a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -135,7 +135,7 @@ out_free_pages:
put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
j = -1;
out_free_bio:
- while ( ++j < pi->raid_disks )
+ while (++j < pi->raid_disks)
bio_put(r1_bio->bios[j]);
r1bio_pool_free(r1_bio, data);
return NULL;
@@ -164,7 +164,7 @@ static void put_all_bios(struct r1conf *conf, struct r1bio *r1_bio)
{
int i;
- for (i = 0; i < conf->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks * 2; i++) {
struct bio **bio = r1_bio->bios + i;
if (!BIO_SPECIAL(*bio))
bio_put(*bio);
@@ -185,7 +185,7 @@ static void put_buf(struct r1bio *r1_bio)
struct r1conf *conf = r1_bio->mddev->private;
int i;
- for (i=0; i<conf->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks * 2; i++) {
struct bio *bio = r1_bio->bios[i];
if (bio->bi_end_io)
rdev_dec_pending(conf->mirrors[i].rdev, r1_bio->mddev);
@@ -277,13 +277,14 @@ static inline void update_head_pos(int disk, struct r1bio *r1_bio)
static int find_bio_disk(struct r1bio *r1_bio, struct bio *bio)
{
int mirror;
- int raid_disks = r1_bio->mddev->raid_disks;
+ struct r1conf *conf = r1_bio->mddev->private;
+ int raid_disks = conf->raid_disks;
- for (mirror = 0; mirror < raid_disks; mirror++)
+ for (mirror = 0; mirror < raid_disks * 2; mirror++)
if (r1_bio->bios[mirror] == bio)
break;
- BUG_ON(mirror == raid_disks);
+ BUG_ON(mirror == raid_disks * 2);
update_head_pos(mirror, r1_bio);
return mirror;
@@ -390,6 +391,11 @@ static void raid1_end_write_request(struct bio *bio, int error)
if (!uptodate) {
set_bit(WriteErrorSeen,
&conf->mirrors[mirror].rdev->flags);
+ if (!test_and_set_bit(WantReplacement,
+ &conf->mirrors[mirror].rdev->flags))
+ set_bit(MD_RECOVERY_NEEDED, &
+ conf->mddev->recovery);
+
set_bit(R1BIO_WriteError, &r1_bio->state);
} else {
/*
@@ -505,7 +511,7 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
start_disk = conf->last_used;
}
- for (i = 0 ; i < conf->raid_disks ; i++) {
+ for (i = 0 ; i < conf->raid_disks * 2 ; i++) {
sector_t dist;
sector_t first_bad;
int bad_sectors;
@@ -525,8 +531,17 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
if (test_bit(WriteMostly, &rdev->flags)) {
/* Don't balance among write-mostly, just
* use the first as a last resort */
- if (best_disk < 0)
+ if (best_disk < 0) {
+ if (is_badblock(rdev, this_sector, sectors,
+ &first_bad, &bad_sectors)) {
+ if (first_bad < this_sector)
+ /* Cannot use this */
+ continue;
+ best_good_sectors = first_bad - this_sector;
+ } else
+ best_good_sectors = sectors;
best_disk = disk;
+ }
continue;
}
/* This is a reasonable device to use. It might
@@ -609,7 +624,7 @@ int md_raid1_congested(struct mddev *mddev, int bits)
return 1;
rcu_read_lock();
- for (i = 0; i < mddev->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev && !test_bit(Faulty, &rdev->flags)) {
struct request_queue *q = bdev_get_queue(rdev->bdev);
@@ -974,7 +989,7 @@ read_again:
*/
plugged = mddev_check_plugged(mddev);
- disks = conf->raid_disks;
+ disks = conf->raid_disks * 2;
retry_write:
blocked_rdev = NULL;
rcu_read_lock();
@@ -988,7 +1003,8 @@ read_again:
}
r1_bio->bios[i] = NULL;
if (!rdev || test_bit(Faulty, &rdev->flags)) {
- set_bit(R1BIO_Degraded, &r1_bio->state);
+ if (i < conf->raid_disks)
+ set_bit(R1BIO_Degraded, &r1_bio->state);
continue;
}
@@ -1263,6 +1279,25 @@ static int raid1_spare_active(struct mddev *mddev)
*/
for (i = 0; i < conf->raid_disks; i++) {
struct md_rdev *rdev = conf->mirrors[i].rdev;
+ struct md_rdev *repl = conf->mirrors[conf->raid_disks + i].rdev;
+ if (repl
+ && repl->recovery_offset == MaxSector
+ && !test_bit(Faulty, &repl->flags)
+ && !test_and_set_bit(In_sync, &repl->flags)) {
+ /* replacement has just become active */
+ if (!rdev ||
+ !test_and_clear_bit(In_sync, &rdev->flags))
+ count++;
+ if (rdev) {
+ /* Replaced device not technically
+ * faulty, but we need to be sure
+ * it gets removed and never re-added
+ */
+ set_bit(Faulty, &rdev->flags);
+ sysfs_notify_dirent_safe(
+ rdev->sysfs_state);
+ }
+ }
if (rdev
&& !test_bit(Faulty, &rdev->flags)
&& !test_and_set_bit(In_sync, &rdev->flags)) {
@@ -1286,7 +1321,7 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
int mirror = 0;
struct mirror_info *p;
int first = 0;
- int last = mddev->raid_disks - 1;
+ int last = conf->raid_disks - 1;
if (mddev->recovery_disabled == conf->recovery_disabled)
return -EBUSY;
@@ -1294,8 +1329,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->raid_disk >= 0)
first = last = rdev->raid_disk;
- for (mirror = first; mirror <= last; mirror++)
- if ( !(p=conf->mirrors+mirror)->rdev) {
+ for (mirror = first; mirror <= last; mirror++) {
+ p = conf->mirrors+mirror;
+ if (!p->rdev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
@@ -1322,21 +1358,35 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
rcu_assign_pointer(p->rdev, rdev);
break;
}
+ if (test_bit(WantReplacement, &p->rdev->flags) &&
+ p[conf->raid_disks].rdev == NULL) {
+ /* Add this device as a replacement */
+ clear_bit(In_sync, &rdev->flags);
+ set_bit(Replacement, &rdev->flags);
+ rdev->raid_disk = mirror;
+ err = 0;
+ conf->fullsync = 1;
+ rcu_assign_pointer(p[conf->raid_disks].rdev, rdev);
+ break;
+ }
+ }
md_integrity_add_rdev(rdev, mddev);
print_conf(conf);
return err;
}
-static int raid1_remove_disk(struct mddev *mddev, int number)
+static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
{
struct r1conf *conf = mddev->private;
int err = 0;
- struct md_rdev *rdev;
+ int number = rdev->raid_disk;
struct mirror_info *p = conf->mirrors+ number;
+ if (rdev != p->rdev)
+ p = conf->mirrors + conf->raid_disks + number;
+
print_conf(conf);
- rdev = p->rdev;
- if (rdev) {
+ if (rdev == p->rdev) {
if (test_bit(In_sync, &rdev->flags) ||
atomic_read(&rdev->nr_pending)) {
err = -EBUSY;
@@ -1358,7 +1408,21 @@ static int raid1_remove_disk(struct mddev *mddev, int number)
err = -EBUSY;
p->rdev = rdev;
goto abort;
- }
+ } else if (conf->mirrors[conf->raid_disks + number].rdev) {
+ /* We just removed a device that is being replaced.
+ * Move down the replacement. We drain all IO before
+ * doing this to avoid confusion.
+ */
+ struct md_rdev *repl =
+ conf->mirrors[conf->raid_disks + number].rdev;
+ raise_barrier(conf);
+ clear_bit(Replacement, &repl->flags);
+ p->rdev = repl;
+ conf->mirrors[conf->raid_disks + number].rdev = NULL;
+ lower_barrier(conf);
+ clear_bit(WantReplacement, &rdev->flags);
+ } else
+ clear_bit(WantReplacement, &rdev->flags);
err = md_integrity_register(mddev);
}
abort:
@@ -1411,6 +1475,10 @@ static void end_sync_write(struct bio *bio, int error)
} while (sectors_to_go > 0);
set_bit(WriteErrorSeen,
&conf->mirrors[mirror].rdev->flags);
+ if (!test_and_set_bit(WantReplacement,
+ &conf->mirrors[mirror].rdev->flags))
+ set_bit(MD_RECOVERY_NEEDED, &
+ mddev->recovery);
set_bit(R1BIO_WriteError, &r1_bio->state);
} else if (is_badblock(conf->mirrors[mirror].rdev,
r1_bio->sector,
@@ -1441,8 +1509,13 @@ static int r1_sync_page_io(struct md_rdev *rdev, sector_t sector,
if (sync_page_io(rdev, sector, sectors << 9, page, rw, false))
/* success */
return 1;
- if (rw == WRITE)
+ if (rw == WRITE) {
set_bit(WriteErrorSeen, &rdev->flags);
+ if (!test_and_set_bit(WantReplacement,
+ &rdev->flags))
+ set_bit(MD_RECOVERY_NEEDED, &
+ rdev->mddev->recovery);
+ }
/* need to record an error - either for the block or the device */
if (!rdev_set_badblocks(rdev, sector, sectors, 0))
md_error(rdev->mddev, rdev);
@@ -1493,7 +1566,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
}
}
d++;
- if (d == conf->raid_disks)
+ if (d == conf->raid_disks * 2)
d = 0;
} while (!success && d != r1_bio->read_disk);
@@ -1510,7 +1583,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
mdname(mddev),
bdevname(bio->bi_bdev, b),
(unsigned long long)r1_bio->sector);
- for (d = 0; d < conf->raid_disks; d++) {
+ for (d = 0; d < conf->raid_disks * 2; d++) {
rdev = conf->mirrors[d].rdev;
if (!rdev || test_bit(Faulty, &rdev->flags))
continue;
@@ -1536,7 +1609,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
/* write it back and re-read */
while (d != r1_bio->read_disk) {
if (d == 0)
- d = conf->raid_disks;
+ d = conf->raid_disks * 2;
d--;
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
@@ -1551,7 +1624,7 @@ static int fix_sync_read_error(struct r1bio *r1_bio)
d = start;
while (d != r1_bio->read_disk) {
if (d == 0)
- d = conf->raid_disks;
+ d = conf->raid_disks * 2;
d--;
if (r1_bio->bios[d]->bi_end_io != end_sync_read)
continue;
@@ -1584,7 +1657,7 @@ static int process_checks(struct r1bio *r1_bio)
int primary;
int i;
- for (primary = 0; primary < conf->raid_disks; primary++)
+ for (primary = 0; primary < conf->raid_disks * 2; primary++)
if (r1_bio->bios[primary]->bi_end_io == end_sync_read &&
test_bit(BIO_UPTODATE, &r1_bio->bios[primary]->bi_flags)) {
r1_bio->bios[primary]->bi_end_io = NULL;
@@ -1592,7 +1665,7 @@ static int process_checks(struct r1bio *r1_bio)
break;
}
r1_bio->read_disk = primary;
- for (i = 0; i < conf->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks * 2; i++) {
int j;
int vcnt = r1_bio->sectors >> (PAGE_SHIFT- 9);
struct bio *pbio = r1_bio->bios[primary];
@@ -1656,7 +1729,7 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
{
struct r1conf *conf = mddev->private;
int i;
- int disks = conf->raid_disks;
+ int disks = conf->raid_disks * 2;
struct bio *bio, *wbio;
bio = r1_bio->bios[r1_bio->read_disk];
@@ -1737,7 +1810,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
success = 1;
else {
d++;
- if (d == conf->raid_disks)
+ if (d == conf->raid_disks * 2)
d = 0;
}
} while (!success && d != read_disk);
@@ -1753,7 +1826,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
start = d;
while (d != read_disk) {
if (d==0)
- d = conf->raid_disks;
+ d = conf->raid_disks * 2;
d--;
rdev = conf->mirrors[d].rdev;
if (rdev &&
@@ -1765,7 +1838,7 @@ static void fix_read_error(struct r1conf *conf, int read_disk,
while (d != read_disk) {
char b[BDEVNAME_SIZE];
if (d==0)
- d = conf->raid_disks;
+ d = conf->raid_disks * 2;
d--;
rdev = conf->mirrors[d].rdev;
if (rdev &&
@@ -1887,7 +1960,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
{
int m;
int s = r1_bio->sectors;
- for (m = 0; m < conf->raid_disks ; m++) {
+ for (m = 0; m < conf->raid_disks * 2 ; m++) {
struct md_rdev *rdev = conf->mirrors[m].rdev;
struct bio *bio = r1_bio->bios[m];
if (bio->bi_end_io == NULL)
@@ -1909,7 +1982,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio
static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
{
int m;
- for (m = 0; m < conf->raid_disks ; m++)
+ for (m = 0; m < conf->raid_disks * 2 ; m++)
if (r1_bio->bios[m] == IO_MADE_GOOD) {
struct md_rdev *rdev = conf->mirrors[m].rdev;
rdev_clear_badblocks(rdev,
@@ -2184,7 +2257,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
r1_bio->state = 0;
set_bit(R1BIO_IsSync, &r1_bio->state);
- for (i=0; i < conf->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks * 2; i++) {
struct md_rdev *rdev;
bio = r1_bio->bios[i];
@@ -2203,7 +2276,8 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
rdev = rcu_dereference(conf->mirrors[i].rdev);
if (rdev == NULL ||
test_bit(Faulty, &rdev->flags)) {
- still_degraded = 1;
+ if (i < conf->raid_disks)
+ still_degraded = 1;
} else if (!test_bit(In_sync, &rdev->flags)) {
bio->bi_rw = WRITE;
bio->bi_end_io = end_sync_write;
@@ -2254,7 +2328,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
* need to mark them bad on all write targets
*/
int ok = 1;
- for (i = 0 ; i < conf->raid_disks ; i++)
+ for (i = 0 ; i < conf->raid_disks * 2 ; i++)
if (r1_bio->bios[i]->bi_end_io == end_sync_write) {
struct md_rdev *rdev =
rcu_dereference(conf->mirrors[i].rdev);
@@ -2323,7 +2397,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
len = sync_blocks<<9;
}
- for (i=0 ; i < conf->raid_disks; i++) {
+ for (i = 0 ; i < conf->raid_disks * 2; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io) {
page = bio->bi_io_vec[bio->bi_vcnt].bv_page;
@@ -2356,7 +2430,7 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
*/
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
atomic_set(&r1_bio->remaining, read_targets);
- for (i=0; i<conf->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks * 2; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io == end_sync_read) {
md_sync_acct(bio->bi_bdev, nr_sectors);
@@ -2393,7 +2467,8 @@ static struct r1conf *setup_conf(struct mddev *mddev)
if (!conf)
goto abort;
- conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
+ conf->mirrors = kzalloc(sizeof(struct mirror_info)
+ * mddev->raid_disks * 2,
GFP_KERNEL);
if (!conf->mirrors)
goto abort;
@@ -2405,7 +2480,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->poolinfo = kzalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
if (!conf->poolinfo)
goto abort;
- conf->poolinfo->raid_disks = mddev->raid_disks;
+ conf->poolinfo->raid_disks = mddev->raid_disks * 2;
conf->r1bio_pool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
r1bio_pool_free,
conf->poolinfo);
@@ -2414,14 +2489,20 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->poolinfo->mddev = mddev;
+ err = -EINVAL;
spin_lock_init(&conf->device_lock);
list_for_each_entry(rdev, &mddev->disks, same_set) {
int disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks
|| disk_idx < 0)
continue;
- disk = conf->mirrors + disk_idx;
+ if (test_bit(Replacement, &rdev->flags))
+ disk = conf->mirrors + conf->raid_disks + disk_idx;
+ else
+ disk = conf->mirrors + disk_idx;
+ if (disk->rdev)
+ goto abort;
disk->rdev = rdev;
disk->head_position = 0;
@@ -2437,11 +2518,27 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->pending_count = 0;
conf->recovery_disabled = mddev->recovery_disabled - 1;
+ err = -EIO;
conf->last_used = -1;
- for (i = 0; i < conf->raid_disks; i++) {
+ for (i = 0; i < conf->raid_disks * 2; i++) {
disk = conf->mirrors + i;
+ if (i < conf->raid_disks &&
+ disk[conf->raid_disks].rdev) {
+ /* This slot has a replacement. */
+ if (!disk->rdev) {
+ /* No original, just make the replacement
+ * a recovering spare
+ */
+ disk->rdev =
+ disk[conf->raid_disks].rdev;
+ disk[conf->raid_disks].rdev = NULL;
+ } else if (!test_bit(In_sync, &disk->rdev->flags))
+ /* Original is not in_sync - bad */
+ goto abort;
+ }
+
if (!disk->rdev ||
!test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0;
@@ -2455,7 +2552,6 @@ static struct r1conf *setup_conf(struct mddev *mddev)
conf->last_used = i;
}
- err = -EIO;
if (conf->last_used < 0) {
printk(KERN_ERR "md/raid1:%s: no operational mirrors\n",
mdname(mddev));
@@ -2665,7 +2761,7 @@ static int raid1_reshape(struct mddev *mddev)
if (!newpoolinfo)
return -ENOMEM;
newpoolinfo->mddev = mddev;
- newpoolinfo->raid_disks = raid_disks;
+ newpoolinfo->raid_disks = raid_disks * 2;
newpool = mempool_create(NR_RAID1_BIOS, r1bio_pool_alloc,
r1bio_pool_free, newpoolinfo);
@@ -2673,7 +2769,8 @@ static int raid1_reshape(struct mddev *mddev)
kfree(newpoolinfo);
return -ENOMEM;
}
- newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks, GFP_KERNEL);
+ newmirrors = kzalloc(sizeof(struct mirror_info) * raid_disks * 2,
+ GFP_KERNEL);
if (!newmirrors) {
kfree(newpoolinfo);
mempool_destroy(newpool);