From de171cb9a52598cc023adceafc6c166112401386 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 31 Jan 2011 11:57:42 +1100 Subject: md: revert change to raid_disks on failure. If we try to update_raid_disks and it fails, we should put 'delta_disks' back to zero. This is important because some code, such as slot_store, assumes that delta_disks has been validated. Signed-off-by: NeilBrown --- drivers/md/md.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index b76cfc89e1b..e636e404e9a 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5578,6 +5578,8 @@ static int update_raid_disks(mddev_t *mddev, int raid_disks) mddev->delta_disks = raid_disks - mddev->raid_disks; rv = mddev->pers->check_reshape(mddev); + if (rv < 0) + mddev->delta_disks = 0; return rv; } -- cgit v1.2.3-18-g5258 From f21e9ff7f77d41ceca4e1e5ee5a4efa5ad7a5e40 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 31 Jan 2011 12:10:09 +1100 Subject: md: Remove the AllReserved flag for component devices. This flag is not needed and is used badly. Devices that are included in a native-metadata array are reserved exclusively for that array - and currently have AllReserved set. They all are bd_claimed for the rdev and so cannot be shared. Devices that are included in external-metadata arrays can be shared among multiple arrays - providing there is no overlap. These are bd_claimed for md in general - not for a particular rdev. When changing the amount of a device that is used in an array we need to check for overlap. This currently includes a check on AllReserved So even without overlap, sharing with an AllReserved device is not allowed. However the bd_claim usage already precludes sharing with these devices, so the test on AllReserved is not needed. And in fact it is wrong. As this is the only use of AllReserved, simply remove all usage and definition of AllReserved. Signed-off-by: NeilBrown --- drivers/md/md.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index e636e404e9a..f539b587ca7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1947,8 +1947,6 @@ static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared) __bdevname(dev, b)); return PTR_ERR(bdev); } - if (!shared) - set_bit(AllReserved, &rdev->flags); rdev->bdev = bdev; return err; } @@ -2610,12 +2608,11 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) mddev_lock(mddev); list_for_each_entry(rdev2, &mddev->disks, same_set) - if (test_bit(AllReserved, &rdev2->flags) || - (rdev->bdev == rdev2->bdev && - rdev != rdev2 && - overlaps(rdev->data_offset, rdev->sectors, - rdev2->data_offset, - rdev2->sectors))) { + if (rdev->bdev == rdev2->bdev && + rdev != rdev2 && + overlaps(rdev->data_offset, rdev->sectors, + rdev2->data_offset, + rdev2->sectors)) { overlap = 1; break; } -- cgit v1.2.3-18-g5258 From a8c42c7f476b5bb39bb3a5b32d5473b9a46cadb9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 31 Jan 2011 13:47:13 +1100 Subject: md: Don't use remove_and_add_spares to remove failed devices from a read-only array remove_and_add_spares is called in two places where the needs really are very different. remove_and_add_spares should not be called on an array which is about to be reshaped as some extra devices might have been manually added and that would remove them. However if the array is 'read-auto', that will currently happen, which is bad. So in the 'ro != 0' case don't call remove_and_add_spares but simply remove the failed devices as the comment suggests is needed. Signed-off-by: NeilBrown --- drivers/md/md.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index f539b587ca7..5b93829f3d4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -7027,7 +7027,7 @@ static int remove_and_add_spares(mddev_t *mddev) } } - if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) { + if (mddev->degraded && !mddev->recovery_disabled) { list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && @@ -7150,7 +7150,20 @@ void md_check_recovery(mddev_t *mddev) /* Only thing we do on a ro array is remove * failed devices. */ - remove_and_add_spares(mddev); + mdk_rdev_t *rdev; + list_for_each_entry(rdev, &mddev->disks, same_set) + if (rdev->raid_disk >= 0 && + !test_bit(Blocked, &rdev->flags) && + test_bit(Faulty, &rdev->flags) && + atomic_read(&rdev->nr_pending)==0) { + if (mddev->pers->hot_remove_disk( + mddev, rdev->raid_disk)==0) { + char nm[20]; + sprintf(nm,"rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); + rdev->raid_disk = -1; + } + } clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); goto unlock; } -- cgit v1.2.3-18-g5258 From 7281f8129c362436237b82c8c026494dd36479dc Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 31 Jan 2011 14:30:27 +1100 Subject: md: don't clear curr_resync_completed at end of resync. There is no need to set this to zero at this point. It will be set to zero by remove_and_add_spares or at the start of md_do_sync at the latest. And setting it to zero before MD_RECOVERY_RUNNING is cleared can make a 'zero' appear briefly in the 'sync_completed' sysfs attribute just as resync is finishing. So simply remove this setting to zero. Signed-off-by: NeilBrown --- drivers/md/md.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 5b93829f3d4..f2d5628d51c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6984,9 +6984,6 @@ void md_do_sync(mddev_t *mddev) } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) mddev->resync_min = mddev->curr_resync_completed; mddev->curr_resync = 0; - if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) - mddev->curr_resync_completed = 0; - sysfs_notify(&mddev->kobj, NULL, "sync_completed"); wake_up(&resync_wait); set_bit(MD_RECOVERY_DONE, &mddev->recovery); md_wakeup_thread(mddev->thread); -- cgit v1.2.3-18-g5258 From c6751b2bde477f56ceef67aa1d298ce44e8e2e23 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 2 Feb 2011 11:57:13 +1100 Subject: md: Don't allow slot_store while resync/recovery is happening. Activating a spare in an array while resync/recovery is already happening can lead the that spare being marked in-sync when it isn't really. So don't allow the 'slot' to be set (this activating the device) while resync/recovery is happening. Signed-off-by: NeilBrown --- drivers/md/md.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index f2d5628d51c..1138d1053e9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2463,6 +2463,9 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (rdev->raid_disk != -1) return -EBUSY; + if (test_bit(MD_RECOVERY_RUNNING, &rdev->mddev->recovery)) + return -EBUSY; + if (rdev->mddev->pers->hot_add_disk == NULL) return -EINVAL; -- cgit v1.2.3-18-g5258 From e91ece5590b3c728624ab57043fc7a05069c604a Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Mon, 7 Feb 2011 19:21:48 -0500 Subject: md_make_request: don't touch the bio after calling make_request md_make_request was calling bio_sectors() for part_stat_add after it was calling the make_request function. This is bad because the make_request function can free the bio and because the bi_size field can change around. The fix here was suggested by Jens Axboe. It saves the sector count before the make_request call. I hit this with CONFIG_DEBUG_PAGEALLOC turned on while trying to break his pretty fusionio card. Cc: Signed-off-by: Chris Mason Signed-off-by: NeilBrown --- drivers/md/md.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 1138d1053e9..0cc30ecda4c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -287,6 +287,7 @@ static int md_make_request(struct request_queue *q, struct bio *bio) mddev_t *mddev = q->queuedata; int rv; int cpu; + unsigned int sectors; if (mddev == NULL || mddev->pers == NULL || !mddev->ready) { @@ -311,12 +312,16 @@ static int md_make_request(struct request_queue *q, struct bio *bio) atomic_inc(&mddev->active_io); rcu_read_unlock(); + /* + * save the sectors now since our bio can + * go away inside make_request + */ + sectors = bio_sectors(bio); rv = mddev->pers->make_request(mddev, bio); cpu = part_stat_lock(); part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]); - part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], - bio_sectors(bio)); + part_stat_add(cpu, &mddev->gendisk->part0, sectors[rw], sectors); part_stat_unlock(); if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended) -- cgit v1.2.3-18-g5258