diff options
author | Dan Williams <dan.j.williams@intel.com> | 2008-05-13 19:10:11 +0000 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@suse.de> | 2008-05-15 07:50:08 -0700 |
commit | 15c2419876173c417a119b5665ecc62520e50a6c (patch) | |
tree | 424907ed1186be3c20ad852b8ceeb0eb4c83aac3 | |
parent | 69ad365afb26fa2ac1bdb4b166b6cc231251edd8 (diff) |
md: fix raid5 'repair' operations
commit c8894419acf5e56851de9741c5047bebd78acd1f upstream
Date: Mon, 12 May 2008 14:02:12 -0700
Subject: md: fix raid5 'repair' operations
commit bd2ab67030e9116f1e4aae1289220255412b37fd "md: close a livelock window
in handle_parity_checks5" introduced a bug in handling 'repair' operations.
After a repair operation completes we clear the state bits tracking this
operation. However, they are cleared too early and this results in the code
deciding to re-run the parity check operation. Since we have done the repair
in memory the second check does not find a mismatch and thus does not do a
writeback.
Test results:
$ echo repair > /sys/block/md0/md/sync_action
$ cat /sys/block/md0/md/mismatch_cnt
51072
$ echo repair > /sys/block/md0/md/sync_action
$ cat /sys/block/md0/md/mismatch_cnt
0
(also fix incorrect indentation)
Tested-by: George Spelvin <linux@horizon.com>
Acked-by: NeilBrown <neilb@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
-rw-r--r-- | drivers/md/raid5.c | 25 |
1 files changed, 13 insertions, 12 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b162b839a66..18a1379ed04 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2354,8 +2354,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, /* complete a check operation */ if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { - clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); - clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); + clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); + clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); if (s->failed == 0) { if (sh->ops.zero_sum_result == 0) /* parity is correct (on disc, @@ -2385,16 +2385,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, canceled_check = 1; /* STRIPE_INSYNC is not set */ } - /* check if we can clear a parity disk reconstruct */ - if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && - test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { - - clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); - clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); - } - /* start a new check operation if there are no failures, the stripe is * not insync, and a repair is not in flight */ @@ -2409,6 +2399,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, } } + /* check if we can clear a parity disk reconstruct */ + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { + + clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); + } + + /* Wait for check parity and compute block operations to complete * before write-back. If a failure occurred while the check operation * was in flight we need to cycle this stripe through handle_stripe |