diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 150 |
1 files changed, 114 insertions, 36 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 84acfe7d10e..ce88755baf4 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -36,8 +36,7 @@ #include <linux/blkdev.h> #include <linux/sysctl.h> #include <linux/seq_file.h> -#include <linux/mutex.h> -#include <linux/buffer_head.h> /* for invalidate_bdev */ +#include <linux/fs.h> #include <linux/poll.h> #include <linux/ctype.h> #include <linux/string.h> @@ -570,7 +569,7 @@ static void mddev_put(struct mddev *mddev) mddev->ctime == 0 && !mddev->hold_active) { /* Array is not configured at all, and not held active, * so destroy it */ - list_del(&mddev->all_mddevs); + list_del_init(&mddev->all_mddevs); bs = mddev->bio_set; mddev->bio_set = NULL; if (mddev->gendisk) { @@ -1714,6 +1713,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } if (sb->devflags & WriteMostly1) set_bit(WriteMostly, &rdev->flags); + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) + set_bit(Replacement, &rdev->flags); } else /* MULTIPATH are always insync */ set_bit(In_sync, &rdev->flags); @@ -1767,6 +1768,9 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->recovery_offset = cpu_to_le64(rdev->recovery_offset); } + if (test_bit(Replacement, &rdev->flags)) + sb->feature_map |= + cpu_to_le32(MD_FEATURE_REPLACEMENT); if (mddev->reshape_position != MaxSector) { sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE); @@ -2546,7 +2550,8 @@ state_show(struct md_rdev *rdev, char *page) sep = ","; } if (test_bit(Blocked, &rdev->flags) || - rdev->badblocks.unacked_exist) { + (rdev->badblocks.unacked_exist + && !test_bit(Faulty, &rdev->flags))) { len += sprintf(page+len, "%sblocked", sep); sep = ","; } @@ -2559,6 +2564,15 @@ state_show(struct md_rdev *rdev, char *page) len += sprintf(page+len, "%swrite_error", sep); sep = ","; } + if (test_bit(WantReplacement, &rdev->flags)) { + len += sprintf(page+len, "%swant_replacement", sep); + sep = ","; + } + if (test_bit(Replacement, &rdev->flags)) { + len += sprintf(page+len, "%sreplacement", sep); + sep = ","; + } + return len+sprintf(page+len, "\n"); } @@ -2627,6 +2641,42 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "-write_error")) { clear_bit(WriteErrorSeen, &rdev->flags); err = 0; + } else if (cmd_match(buf, "want_replacement")) { + /* Any non-spare device that is not a replacement can + * become want_replacement at any time, but we then need to + * check if recovery is needed. + */ + if (rdev->raid_disk >= 0 && + !test_bit(Replacement, &rdev->flags)) + set_bit(WantReplacement, &rdev->flags); + set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); + md_wakeup_thread(rdev->mddev->thread); + err = 0; + } else if (cmd_match(buf, "-want_replacement")) { + /* Clearing 'want_replacement' is always allowed. + * Once replacements starts it is too late though. + */ + err = 0; + clear_bit(WantReplacement, &rdev->flags); + } else if (cmd_match(buf, "replacement")) { + /* Can only set a device as a replacement when array has not + * yet been started. Once running, replacement is automatic + * from spares, or by assigning 'slot'. + */ + if (rdev->mddev->pers) + err = -EBUSY; + else { + set_bit(Replacement, &rdev->flags); + err = 0; + } + } else if (cmd_match(buf, "-replacement")) { + /* Similarly, can only clear Replacement before start */ + if (rdev->mddev->pers) + err = -EBUSY; + else { + clear_bit(Replacement, &rdev->flags); + err = 0; + } } if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -2688,7 +2738,7 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->mddev->pers->hot_remove_disk == NULL) return -EINVAL; err = rdev->mddev->pers-> - hot_remove_disk(rdev->mddev, rdev->raid_disk); + hot_remove_disk(rdev->mddev, rdev); if (err) return err; sysfs_unlink_rdev(rdev->mddev, rdev); @@ -2696,7 +2746,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); } else if (rdev->mddev->pers) { - struct md_rdev *rdev2; /* Activating a spare .. or possibly reactivating * if we ever get bitmaps working here. */ @@ -2710,10 +2759,6 @@ slot_store(struct md_rdev *rdev, const char *buf, size_t len) if (rdev->mddev->pers->hot_add_disk == NULL) return -EINVAL; - list_for_each_entry(rdev2, &rdev->mddev->disks, same_set) - if (rdev2->raid_disk == slot) - return -EEXIST; - if (slot >= rdev->mddev->raid_disks && slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks) return -ENOSPC; @@ -3788,6 +3833,8 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len) if (err) return err; else { + if (mddev->hold_active == UNTIL_IOCTL) + mddev->hold_active = 0; sysfs_notify_dirent_safe(mddev->sysfs_state); return len; } @@ -4487,11 +4534,20 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; + spin_lock(&all_mddevs_lock); + if (list_empty(&mddev->all_mddevs)) { + spin_unlock(&all_mddevs_lock); + return -EBUSY; + } + mddev_get(mddev); + spin_unlock(&all_mddevs_lock); + rv = mddev_lock(mddev); if (!rv) { rv = entry->show(mddev, page); mddev_unlock(mddev); } + mddev_put(mddev); return rv; } @@ -4507,13 +4563,19 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, return -EIO; if (!capable(CAP_SYS_ADMIN)) return -EACCES; + spin_lock(&all_mddevs_lock); + if (list_empty(&mddev->all_mddevs)) { + spin_unlock(&all_mddevs_lock); + return -EBUSY; + } + mddev_get(mddev); + spin_unlock(&all_mddevs_lock); rv = mddev_lock(mddev); - if (mddev->hold_active == UNTIL_IOCTL) - mddev->hold_active = 0; if (!rv) { rv = entry->store(mddev, page, length); mddev_unlock(mddev); } + mddev_put(mddev); return rv; } @@ -4604,6 +4666,7 @@ static int md_alloc(dev_t dev, char *name) mddev->queue->queuedata = mddev; blk_queue_make_request(mddev->queue, md_make_request); + blk_set_stacking_limits(&mddev->queue->limits); disk = alloc_disk(1 << shift); if (!disk) { @@ -6036,8 +6099,15 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, struct mddev *mddev = NULL; int ro; - if (!capable(CAP_SYS_ADMIN)) - return -EACCES; + switch (cmd) { + case RAID_VERSION: + case GET_ARRAY_INFO: + case GET_DISK_INFO: + break; + default: + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + } /* * Commands dealing with the RAID driver but not any @@ -6697,8 +6767,11 @@ static int md_seq_show(struct seq_file *seq, void *v) if (test_bit(Faulty, &rdev->flags)) { seq_printf(seq, "(F)"); continue; - } else if (rdev->raid_disk < 0) + } + if (rdev->raid_disk < 0) seq_printf(seq, "(S)"); /* spare */ + if (test_bit(Replacement, &rdev->flags)) + seq_printf(seq, "(R)"); sectors += rdev->sectors; } @@ -7260,7 +7333,8 @@ void md_do_sync(struct mddev *mddev) printk(KERN_INFO "md: checkpointing %s of %s.\n", desc, mdname(mddev)); - mddev->recovery_cp = mddev->curr_resync; + mddev->recovery_cp = + mddev->curr_resync_completed; } } else mddev->recovery_cp = MaxSector; @@ -7278,9 +7352,9 @@ void md_do_sync(struct mddev *mddev) rcu_read_unlock(); } } + skip: set_bit(MD_CHANGE_DEVS, &mddev->flags); - skip: if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { /* We completed so min/max setting can be forgotten if used. */ if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) @@ -7310,6 +7384,7 @@ static int remove_and_add_spares(struct mddev *mddev) { struct md_rdev *rdev; int spares = 0; + int removed = 0; mddev->curr_resync_completed = 0; @@ -7320,30 +7395,32 @@ static int remove_and_add_spares(struct mddev *mddev) ! test_bit(In_sync, &rdev->flags)) && atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( - mddev, rdev->raid_disk)==0) { + mddev, rdev) == 0) { sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; + removed++; } } + if (removed) + sysfs_notify(&mddev->kobj, NULL, + "degraded"); - if (mddev->degraded) { - list_for_each_entry(rdev, &mddev->disks, same_set) { - if (rdev->raid_disk >= 0 && - !test_bit(In_sync, &rdev->flags) && - !test_bit(Faulty, &rdev->flags)) + + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->raid_disk >= 0 && + !test_bit(In_sync, &rdev->flags) && + !test_bit(Faulty, &rdev->flags)) + spares++; + if (rdev->raid_disk < 0 + && !test_bit(Faulty, &rdev->flags)) { + rdev->recovery_offset = 0; + if (mddev->pers-> + hot_add_disk(mddev, rdev) == 0) { + if (sysfs_link_rdev(mddev, rdev)) + /* failure here is OK */; spares++; - if (rdev->raid_disk < 0 - && !test_bit(Faulty, &rdev->flags)) { - rdev->recovery_offset = 0; - if (mddev->pers-> - hot_add_disk(mddev, rdev) == 0) { - if (sysfs_link_rdev(mddev, rdev)) - /* failure here is OK */; - spares++; - md_new_event(mddev); - set_bit(MD_CHANGE_DEVS, &mddev->flags); - } else - break; + md_new_event(mddev); + set_bit(MD_CHANGE_DEVS, &mddev->flags); } } } @@ -7458,7 +7535,7 @@ void md_check_recovery(struct mddev *mddev) test_bit(Faulty, &rdev->flags) && atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( - mddev, rdev->raid_disk)==0) { + mddev, rdev) == 0) { sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } @@ -7840,6 +7917,7 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, s + rdev->data_offset, sectors, acknowledged); if (rv) { /* Make sure they get written out promptly */ + sysfs_notify_dirent_safe(rdev->sysfs_state); set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); md_wakeup_thread(rdev->mddev->thread); } |