aboutsummaryrefslogtreecommitdiff
path: root/drivers/md/md.c
diff options
context:
space:
mode:
authorDavid Vrabel <david.vrabel@csr.com>2009-01-22 19:12:32 +0000
committerDavid Vrabel <david.vrabel@csr.com>2009-01-22 19:12:32 +0000
commitdff243f7cb3a2ebbb09093066c1bc4a90ff5b3a4 (patch)
tree1c63e7c2f879cd322ca785671b74b4ff796dd24c /drivers/md/md.c
parenta5e6ced58d423cb09c4fc0087dcfdb0b5deb5e1c (diff)
parentf3b8436ad9a8ad36b3c9fa1fe030c7f38e5d3d0b (diff)
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-upstream
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r--drivers/md/md.c416
1 files changed, 293 insertions, 123 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1b1d32694f6..41e2509bf89 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -214,20 +214,33 @@ static inline mddev_t *mddev_get(mddev_t *mddev)
return mddev;
}
+static void mddev_delayed_delete(struct work_struct *ws)
+{
+ mddev_t *mddev = container_of(ws, mddev_t, del_work);
+ kobject_del(&mddev->kobj);
+ kobject_put(&mddev->kobj);
+}
+
static void mddev_put(mddev_t *mddev)
{
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
return;
- if (!mddev->raid_disks && list_empty(&mddev->disks)) {
+ if (!mddev->raid_disks && list_empty(&mddev->disks) &&
+ !mddev->hold_active) {
list_del(&mddev->all_mddevs);
- spin_unlock(&all_mddevs_lock);
- blk_cleanup_queue(mddev->queue);
- if (mddev->sysfs_state)
- sysfs_put(mddev->sysfs_state);
- mddev->sysfs_state = NULL;
- kobject_put(&mddev->kobj);
- } else
- spin_unlock(&all_mddevs_lock);
+ if (mddev->gendisk) {
+ /* we did a probe so need to clean up.
+ * Call schedule_work inside the spinlock
+ * so that flush_scheduled_work() after
+ * mddev_find will succeed in waiting for the
+ * work to be done.
+ */
+ INIT_WORK(&mddev->del_work, mddev_delayed_delete);
+ schedule_work(&mddev->del_work);
+ } else
+ kfree(mddev);
+ }
+ spin_unlock(&all_mddevs_lock);
}
static mddev_t * mddev_find(dev_t unit)
@@ -236,15 +249,50 @@ static mddev_t * mddev_find(dev_t unit)
retry:
spin_lock(&all_mddevs_lock);
- list_for_each_entry(mddev, &all_mddevs, all_mddevs)
- if (mddev->unit == unit) {
- mddev_get(mddev);
+
+ if (unit) {
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+ if (mddev->unit == unit) {
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
+ kfree(new);
+ return mddev;
+ }
+
+ if (new) {
+ list_add(&new->all_mddevs, &all_mddevs);
spin_unlock(&all_mddevs_lock);
- kfree(new);
- return mddev;
+ new->hold_active = UNTIL_IOCTL;
+ return new;
}
-
- if (new) {
+ } else if (new) {
+ /* find an unused unit number */
+ static int next_minor = 512;
+ int start = next_minor;
+ int is_free = 0;
+ int dev = 0;
+ while (!is_free) {
+ dev = MKDEV(MD_MAJOR, next_minor);
+ next_minor++;
+ if (next_minor > MINORMASK)
+ next_minor = 0;
+ if (next_minor == start) {
+ /* Oh dear, all in use. */
+ spin_unlock(&all_mddevs_lock);
+ kfree(new);
+ return NULL;
+ }
+
+ is_free = 1;
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs)
+ if (mddev->unit == dev) {
+ is_free = 0;
+ break;
+ }
+ }
+ new->unit = dev;
+ new->md_minor = MINOR(dev);
+ new->hold_active = UNTIL_STOP;
list_add(&new->all_mddevs, &all_mddevs);
spin_unlock(&all_mddevs_lock);
return new;
@@ -275,16 +323,6 @@ static mddev_t * mddev_find(dev_t unit)
new->resync_max = MaxSector;
new->level = LEVEL_NONE;
- new->queue = blk_alloc_queue(GFP_KERNEL);
- if (!new->queue) {
- kfree(new);
- return NULL;
- }
- /* Can be unlocked because the queue is new: no concurrency */
- queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, new->queue);
-
- blk_queue_make_request(new->queue, md_fail_request);
-
goto retry;
}
@@ -307,25 +345,23 @@ static inline void mddev_unlock(mddev_t * mddev)
static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
{
- mdk_rdev_t * rdev;
- struct list_head *tmp;
+ mdk_rdev_t *rdev;
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->desc_nr == nr)
return rdev;
- }
+
return NULL;
}
static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
{
- struct list_head *tmp;
mdk_rdev_t *rdev;
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->bdev->bd_dev == dev)
return rdev;
- }
+
return NULL;
}
@@ -861,7 +897,6 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
{
mdp_super_t *sb;
- struct list_head *tmp;
mdk_rdev_t *rdev2;
int next_spare = mddev->raid_disks;
@@ -933,7 +968,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
sb->disks[0].state = (1<<MD_DISK_REMOVED);
- rdev_for_each(rdev2, tmp, mddev) {
+ list_for_each_entry(rdev2, &mddev->disks, same_set) {
mdp_disk_t *d;
int desc_nr;
if (rdev2->raid_disk >= 0 && test_bit(In_sync, &rdev2->flags)
@@ -1259,7 +1294,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
{
struct mdp_superblock_1 *sb;
- struct list_head *tmp;
mdk_rdev_t *rdev2;
int max_dev, i;
/* make rdev->sb match mddev and rdev data. */
@@ -1307,7 +1341,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
}
max_dev = 0;
- rdev_for_each(rdev2, tmp, mddev)
+ list_for_each_entry(rdev2, &mddev->disks, same_set)
if (rdev2->desc_nr+1 > max_dev)
max_dev = rdev2->desc_nr+1;
@@ -1316,7 +1350,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe);
- rdev_for_each(rdev2, tmp, mddev) {
+ list_for_each_entry(rdev2, &mddev->disks, same_set) {
i = rdev2->desc_nr;
if (test_bit(Faulty, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -1466,6 +1500,9 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
list_add_rcu(&rdev->same_set, &mddev->disks);
bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
+
+ /* May as well allow recovery to be retried once */
+ mddev->recovery_disabled = 0;
return 0;
fail:
@@ -1571,8 +1608,7 @@ static void kick_rdev_from_array(mdk_rdev_t * rdev)
static void export_array(mddev_t *mddev)
{
- struct list_head *tmp;
- mdk_rdev_t *rdev;
+ mdk_rdev_t *rdev, *tmp;
rdev_for_each(rdev, tmp, mddev) {
if (!rdev->mddev) {
@@ -1593,7 +1629,7 @@ static void print_desc(mdp_disk_t *desc)
desc->major,desc->minor,desc->raid_disk,desc->state);
}
-static void print_sb(mdp_super_t *sb)
+static void print_sb_90(mdp_super_t *sb)
{
int i;
@@ -1624,10 +1660,57 @@ static void print_sb(mdp_super_t *sb)
}
printk(KERN_INFO "md: THIS: ");
print_desc(&sb->this_disk);
-
}
-static void print_rdev(mdk_rdev_t *rdev)
+static void print_sb_1(struct mdp_superblock_1 *sb)
+{
+ __u8 *uuid;
+
+ uuid = sb->set_uuid;
+ printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
+ ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
+ KERN_INFO "md: Name: \"%s\" CT:%llu\n",
+ le32_to_cpu(sb->major_version),
+ le32_to_cpu(sb->feature_map),
+ uuid[0], uuid[1], uuid[2], uuid[3],
+ uuid[4], uuid[5], uuid[6], uuid[7],
+ uuid[8], uuid[9], uuid[10], uuid[11],
+ uuid[12], uuid[13], uuid[14], uuid[15],
+ sb->set_name,
+ (unsigned long long)le64_to_cpu(sb->ctime)
+ & MD_SUPERBLOCK_1_TIME_SEC_MASK);
+
+ uuid = sb->device_uuid;
+ printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
+ " RO:%llu\n"
+ KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+ ":%02x%02x%02x%02x%02x%02x\n"
+ KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
+ KERN_INFO "md: (MaxDev:%u) \n",
+ le32_to_cpu(sb->level),
+ (unsigned long long)le64_to_cpu(sb->size),
+ le32_to_cpu(sb->raid_disks),
+ le32_to_cpu(sb->layout),
+ le32_to_cpu(sb->chunksize),
+ (unsigned long long)le64_to_cpu(sb->data_offset),
+ (unsigned long long)le64_to_cpu(sb->data_size),
+ (unsigned long long)le64_to_cpu(sb->super_offset),
+ (unsigned long long)le64_to_cpu(sb->recovery_offset),
+ le32_to_cpu(sb->dev_number),
+ uuid[0], uuid[1], uuid[2], uuid[3],
+ uuid[4], uuid[5], uuid[6], uuid[7],
+ uuid[8], uuid[9], uuid[10], uuid[11],
+ uuid[12], uuid[13], uuid[14], uuid[15],
+ sb->devflags,
+ (unsigned long long)le64_to_cpu(sb->utime) & MD_SUPERBLOCK_1_TIME_SEC_MASK,
+ (unsigned long long)le64_to_cpu(sb->events),
+ (unsigned long long)le64_to_cpu(sb->resync_offset),
+ le32_to_cpu(sb->sb_csum),
+ le32_to_cpu(sb->max_dev)
+ );
+}
+
+static void print_rdev(mdk_rdev_t *rdev, int major_version)
{
char b[BDEVNAME_SIZE];
printk(KERN_INFO "md: rdev %s, SZ:%08llu F:%d S:%d DN:%u\n",
@@ -1635,15 +1718,22 @@ static void print_rdev(mdk_rdev_t *rdev)
test_bit(Faulty, &rdev->flags), test_bit(In_sync, &rdev->flags),
rdev->desc_nr);
if (rdev->sb_loaded) {
- printk(KERN_INFO "md: rdev superblock:\n");
- print_sb((mdp_super_t*)page_address(rdev->sb_page));
+ printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
+ switch (major_version) {
+ case 0:
+ print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
+ break;
+ case 1:
+ print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
+ break;
+ }
} else
printk(KERN_INFO "md: no rdev superblock!\n");
}
static void md_print_devices(void)
{
- struct list_head *tmp, *tmp2;
+ struct list_head *tmp;
mdk_rdev_t *rdev;
mddev_t *mddev;
char b[BDEVNAME_SIZE];
@@ -1658,12 +1748,12 @@ static void md_print_devices(void)
bitmap_print_sb(mddev->bitmap);
else
printk("%s: ", mdname(mddev));
- rdev_for_each(rdev, tmp2, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
printk("<%s>", bdevname(rdev->bdev,b));
printk("\n");
- rdev_for_each(rdev, tmp2, mddev)
- print_rdev(rdev);
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ print_rdev(rdev, mddev->major_version);
}
printk("md: **********************************\n");
printk("\n");
@@ -1679,9 +1769,8 @@ static void sync_sbs(mddev_t * mddev, int nospares)
* with the rest of the array)
*/
mdk_rdev_t *rdev;
- struct list_head *tmp;
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->sb_events == mddev->events ||
(nospares &&
rdev->raid_disk < 0 &&
@@ -1699,7 +1788,6 @@ static void sync_sbs(mddev_t * mddev, int nospares)
static void md_update_sb(mddev_t * mddev, int force_change)
{
- struct list_head *tmp;
mdk_rdev_t *rdev;
int sync_req;
int nospares = 0;
@@ -1790,7 +1878,7 @@ repeat:
mdname(mddev),mddev->in_sync);
bitmap_update_sb(mddev->bitmap);
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
char b[BDEVNAME_SIZE];
dprintk(KERN_INFO "md: ");
if (rdev->sb_loaded != 1)
@@ -1999,7 +2087,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
md_wakeup_thread(rdev->mddev->thread);
} else if (rdev->mddev->pers) {
mdk_rdev_t *rdev2;
- struct list_head *tmp;
/* Activating a spare .. or possibly reactivating
* if we every get bitmaps working here.
*/
@@ -2010,7 +2097,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
if (rdev->mddev->pers->hot_add_disk == NULL)
return -EINVAL;
- rdev_for_each(rdev2, tmp, rdev->mddev)
+ list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
if (rdev2->raid_disk == slot)
return -EEXIST;
@@ -2125,14 +2212,14 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
*/
mddev_t *mddev;
int overlap = 0;
- struct list_head *tmp, *tmp2;
+ struct list_head *tmp;
mddev_unlock(my_mddev);
for_each_mddev(mddev, tmp) {
mdk_rdev_t *rdev2;
mddev_lock(mddev);
- rdev_for_each(rdev2, tmp2, mddev)
+ list_for_each_entry(rdev2, &mddev->disks, same_set)
if (test_bit(AllReserved, &rdev2->flags) ||
(rdev->bdev == rdev2->bdev &&
rdev != rdev2 &&
@@ -2328,8 +2415,7 @@ abort_free:
static void analyze_sbs(mddev_t * mddev)
{
int i;
- struct list_head *tmp;
- mdk_rdev_t *rdev, *freshest;
+ mdk_rdev_t *rdev, *freshest, *tmp;
char b[BDEVNAME_SIZE];
freshest = NULL;
@@ -3046,7 +3132,7 @@ action_store(mddev_t *mddev, const char *page, size_t len)
}
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
- sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ sysfs_notify_dirent(mddev->sysfs_action);
return len;
}
@@ -3404,6 +3490,8 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
rv = mddev_lock(mddev);
+ if (mddev->hold_active == UNTIL_IOCTL)
+ mddev->hold_active = 0;
if (!rv) {
rv = entry->store(mddev, page, length);
mddev_unlock(mddev);
@@ -3414,6 +3502,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr,
static void md_free(struct kobject *ko)
{
mddev_t *mddev = container_of(ko, mddev_t, kobj);
+
+ if (mddev->sysfs_state)
+ sysfs_put(mddev->sysfs_state);
+
+ if (mddev->gendisk) {
+ del_gendisk(mddev->gendisk);
+ put_disk(mddev->gendisk);
+ }
+ if (mddev->queue)
+ blk_cleanup_queue(mddev->queue);
+
kfree(mddev);
}
@@ -3429,34 +3528,74 @@ static struct kobj_type md_ktype = {
int mdp_major = 0;
-static struct kobject *md_probe(dev_t dev, int *part, void *data)
+static int md_alloc(dev_t dev, char *name)
{
static DEFINE_MUTEX(disks_mutex);
mddev_t *mddev = mddev_find(dev);
struct gendisk *disk;
- int partitioned = (MAJOR(dev) != MD_MAJOR);
- int shift = partitioned ? MdpMinorShift : 0;
- int unit = MINOR(dev) >> shift;
+ int partitioned;
+ int shift;
+ int unit;
int error;
if (!mddev)
- return NULL;
+ return -ENODEV;
+
+ partitioned = (MAJOR(mddev->unit) != MD_MAJOR);
+ shift = partitioned ? MdpMinorShift : 0;
+ unit = MINOR(mddev->unit) >> shift;
+
+ /* wait for any previous instance if this device
+ * to be completed removed (mddev_delayed_delete).
+ */
+ flush_scheduled_work();
mutex_lock(&disks_mutex);
if (mddev->gendisk) {
mutex_unlock(&disks_mutex);
mddev_put(mddev);
- return NULL;
+ return -EEXIST;
+ }
+
+ if (name) {
+ /* Need to ensure that 'name' is not a duplicate.
+ */
+ mddev_t *mddev2;
+ spin_lock(&all_mddevs_lock);
+
+ list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
+ if (mddev2->gendisk &&
+ strcmp(mddev2->gendisk->disk_name, name) == 0) {
+ spin_unlock(&all_mddevs_lock);
+ return -EEXIST;
+ }
+ spin_unlock(&all_mddevs_lock);
+ }
+
+ mddev->queue = blk_alloc_queue(GFP_KERNEL);
+ if (!mddev->queue) {
+ mutex_unlock(&disks_mutex);
+ mddev_put(mddev);
+ return -ENOMEM;
}
+ /* Can be unlocked because the queue is new: no concurrency */
+ queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, mddev->queue);
+
+ blk_queue_make_request(mddev->queue, md_fail_request);
+
disk = alloc_disk(1 << shift);
if (!disk) {
mutex_unlock(&disks_mutex);
+ blk_cleanup_queue(mddev->queue);
+ mddev->queue = NULL;
mddev_put(mddev);
- return NULL;
+ return -ENOMEM;
}
- disk->major = MAJOR(dev);
+ disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift;
- if (partitioned)
+ if (name)
+ strcpy(disk->disk_name, name);
+ else if (partitioned)
sprintf(disk->disk_name, "md_d%d", unit);
else
sprintf(disk->disk_name, "md%d", unit);
@@ -3464,7 +3603,7 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
disk->private_data = mddev;
disk->queue = mddev->queue;
/* Allow extended partitions. This makes the
- * 'mdp' device redundant, but we can really
+ * 'mdp' device redundant, but we can't really
* remove it now.
*/
disk->flags |= GENHD_FL_EXT_DEVT;
@@ -3480,9 +3619,35 @@ static struct kobject *md_probe(dev_t dev, int *part, void *data)
kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
}
+ mddev_put(mddev);
+ return 0;
+}
+
+static struct kobject *md_probe(dev_t dev, int *part, void *data)
+{
+ md_alloc(dev, NULL);
return NULL;
}
+static int add_named_array(const char *val, struct kernel_param *kp)
+{
+ /* val must be "md_*" where * is not all digits.
+ * We allocate an array with a large free minor number, and
+ * set the name to val. val must not already be an active name.
+ */
+ int len = strlen(val);
+ char buf[DISK_NAME_LEN];
+
+ while (len && val[len-1] == '\n')
+ len--;
+ if (len >= DISK_NAME_LEN)
+ return -E2BIG;
+ strlcpy(buf, val, len+1);
+ if (strncmp(buf, "md_", 3) != 0)
+ return -EINVAL;
+ return md_alloc(0, buf);
+}
+
static void md_safemode_timeout(unsigned long data)
{
mddev_t *mddev = (mddev_t *) data;
@@ -3501,7 +3666,6 @@ static int do_md_run(mddev_t * mddev)
{
int err;
int chunk_size;
- struct list_head *tmp;
mdk_rdev_t *rdev;
struct gendisk *disk;
struct mdk_personality *pers;
@@ -3540,7 +3704,7 @@ static int do_md_run(mddev_t * mddev)
}
/* devices must have minimum size of one chunk */
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
if (test_bit(Faulty, &rdev->flags))
continue;
if (rdev->size < chunk_size / 1024) {
@@ -3565,7 +3729,7 @@ static int do_md_run(mddev_t * mddev)
* the only valid external interface is through the md
* device.
*/
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
if (test_bit(Faulty, &rdev->flags))
continue;
sync_blockdev(rdev->bdev);
@@ -3630,10 +3794,10 @@ static int do_md_run(mddev_t * mddev)
*/
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
mdk_rdev_t *rdev2;
- struct list_head *tmp2;
int warned = 0;
- rdev_for_each(rdev, tmp, mddev) {
- rdev_for_each(rdev2, tmp2, mddev) {
+
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ list_for_each_entry(rdev2, &mddev->disks, same_set) {
if (rdev < rdev2 &&
rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains) {
@@ -3647,7 +3811,7 @@ static int do_md_run(mddev_t * mddev)
warned = 1;
}
}
- }
+
if (warned)
printk(KERN_WARNING
"True protection against single-disk"
@@ -3684,6 +3848,7 @@ static int do_md_run(mddev_t * mddev)
printk(KERN_WARNING
"md: cannot register extra attributes for %s\n",
mdname(mddev));
+ mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
} else if (mddev->ro == 2) /* auto-readonly not meaningful */
mddev->ro = 0;
@@ -3694,7 +3859,7 @@ static int do_md_run(mddev_t * mddev)
mddev->safemode_delay = (200 * HZ)/1000 +1; /* 200 msec delay */
mddev->in_sync = 1;
- rdev_for_each(rdev, tmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3725,9 +3890,8 @@ static int do_md_run(mddev_t * mddev)
* it will remove the drives and not do the right thing
*/
if (mddev->degraded && !mddev->sync_thread) {
- struct list_head *rtmp;
int spares = 0;
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
!test_bit(Faulty, &rdev->flags))
@@ -3754,7 +3918,8 @@ static int do_md_run(mddev_t * mddev)
mddev->changed = 1;
md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state);
- sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ if (mddev->sysfs_action)
+ sysfs_notify_dirent(mddev->sysfs_action);
sysfs_notify(&mddev->kobj, NULL, "degraded");
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
return 0;
@@ -3854,9 +4019,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->queue->merge_bvec_fn = NULL;
mddev->queue->unplug_fn = NULL;
mddev->queue->backing_dev_info.congested_fn = NULL;
- if (mddev->pers->sync_request)
+ if (mddev->pers->sync_request) {
sysfs_remove_group(&mddev->kobj, &md_redundancy_group);
-
+ if (mddev->sysfs_action)
+ sysfs_put(mddev->sysfs_action);
+ mddev->sysfs_action = NULL;
+ }
module_put(mddev->pers->owner);
mddev->pers = NULL;
/* tell userspace to handle 'inactive' */
@@ -3883,7 +4051,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
*/
if (mode == 0) {
mdk_rdev_t *rdev;
- struct list_head *tmp;
printk(KERN_INFO "md: %s stopped.\n", mdname(mddev));
@@ -3895,7 +4062,7 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
}
mddev->bitmap_offset = 0;
- rdev_for_each(rdev, tmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0) {
char nm[20];
sprintf(nm, "rd%d", rdev->raid_disk);
@@ -3941,6 +4108,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
mddev->barriers_work = 0;
mddev->safemode = 0;
kobject_uevent(&disk_to_dev(mddev->gendisk)->kobj, KOBJ_CHANGE);
+ if (mddev->hold_active == UNTIL_STOP)
+ mddev->hold_active = 0;
} else if (mddev->pers)
printk(KERN_INFO "md: %s switched to read-only mode.\n",
@@ -3956,7 +4125,6 @@ out:
static void autorun_array(mddev_t *mddev)
{
mdk_rdev_t *rdev;
- struct list_head *tmp;
int err;
if (list_empty(&mddev->disks))
@@ -3964,7 +4132,7 @@ static void autorun_array(mddev_t *mddev)
printk(KERN_INFO "md: running: ");
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
char b[BDEVNAME_SIZE];
printk("<%s>", bdevname(rdev->bdev,b));
}
@@ -3991,8 +4159,7 @@ static void autorun_array(mddev_t *mddev)
*/
static void autorun_devices(int part)
{
- struct list_head *tmp;
- mdk_rdev_t *rdev0, *rdev;
+ mdk_rdev_t *rdev0, *rdev, *tmp;
mddev_t *mddev;
char b[BDEVNAME_SIZE];
@@ -4007,7 +4174,7 @@ static void autorun_devices(int part)
printk(KERN_INFO "md: considering %s ...\n",
bdevname(rdev0->bdev,b));
INIT_LIST_HEAD(&candidates);
- rdev_for_each_list(rdev, tmp, pending_raid_disks)
+ rdev_for_each_list(rdev, tmp, &pending_raid_disks)
if (super_90_load(rdev, rdev0, 0) >= 0) {
printk(KERN_INFO "md: adding %s ...\n",
bdevname(rdev->bdev,b));
@@ -4053,7 +4220,7 @@ static void autorun_devices(int part)
} else {
printk(KERN_INFO "md: created %s\n", mdname(mddev));
mddev->persistent = 1;
- rdev_for_each_list(rdev, tmp, candidates) {
+ rdev_for_each_list(rdev, tmp, &candidates) {
list_del_init(&rdev->same_set);
if (bind_rdev_to_array(rdev, mddev))
export_rdev(rdev);
@@ -4064,7 +4231,7 @@ static void autorun_devices(int part)
/* on success, candidates will be empty, on error
* it won't...
*/
- rdev_for_each_list(rdev, tmp, candidates) {
+ rdev_for_each_list(rdev, tmp, &candidates) {
list_del_init(&rdev->same_set);
export_rdev(rdev);
}
@@ -4093,10 +4260,9 @@ static int get_array_info(mddev_t * mddev, void __user * arg)
mdu_array_info_t info;
int nr,working,active,failed,spare;
mdk_rdev_t *rdev;
- struct list_head *tmp;
nr=working=active=failed=spare=0;
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
nr++;
if (test_bit(Faulty, &rdev->flags))
failed++;
@@ -4614,9 +4780,8 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
static int update_size(mddev_t *mddev, sector_t num_sectors)
{
- mdk_rdev_t * rdev;
+ mdk_rdev_t *rdev;
int rv;
- struct list_head *tmp;
int fit = (num_sectors == 0);
if (mddev->pers->resize == NULL)
@@ -4638,7 +4803,7 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
* grow, and re-add.
*/
return -EBUSY;
- rdev_for_each(rdev, tmp, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
sector_t avail;
avail = rdev->size * 2;
@@ -5000,6 +5165,9 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
done_unlock:
abort_unlock:
+ if (mddev->hold_active == UNTIL_IOCTL &&
+ err != -EINVAL)
+ mddev->hold_active = 0;
mddev_unlock(mddev);
return err;
@@ -5016,14 +5184,25 @@ static int md_open(struct block_device *bdev, fmode_t mode)
* Succeed if we can lock the mddev, which confirms that
* it isn't being stopped right now.
*/
- mddev_t *mddev = bdev->bd_disk->private_data;
+ mddev_t *mddev = mddev_find(bdev->bd_dev);
int err;
+ if (mddev->gendisk != bdev->bd_disk) {
+ /* we are racing with mddev_put which is discarding this
+ * bd_disk.
+ */
+ mddev_put(mddev);
+ /* Wait until bdev->bd_disk is definitely gone */
+ flush_scheduled_work();
+ /* Then retry the open from the top */
+ return -ERESTARTSYS;
+ }
+ BUG_ON(mddev != bdev->bd_disk->private_data);
+
if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
goto out;
err = 0;
- mddev_get(mddev);
atomic_inc(&mddev->openers);
mddev_unlock(mddev);
@@ -5187,11 +5366,10 @@ static void status_unused(struct seq_file *seq)
{
int i = 0;
mdk_rdev_t *rdev;
- struct list_head *tmp;
seq_printf(seq, "unused devices: ");
- rdev_for_each_list(rdev, tmp, pending_raid_disks) {
+ list_for_each_entry(rdev, &pending_raid_disks, same_set) {
char b[BDEVNAME_SIZE];
i++;
seq_printf(seq, "%s ",
@@ -5350,7 +5528,6 @@ static int md_seq_show(struct seq_file *seq, void *v)
{
mddev_t *mddev = v;
sector_t size;
- struct list_head *tmp2;
mdk_rdev_t *rdev;
struct mdstat_info *mi = seq->private;
struct bitmap *bitmap;
@@ -5387,7 +5564,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
}
size = 0;
- rdev_for_each(rdev, tmp2, mddev) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
char b[BDEVNAME_SIZE];
seq_printf(seq, " %s[%d]",
bdevname(rdev->bdev,b), rdev->desc_nr);
@@ -5694,7 +5871,6 @@ void md_do_sync(mddev_t *mddev)
struct list_head *tmp;
sector_t last_check;
int skipped = 0;
- struct list_head *rtmp;
mdk_rdev_t *rdev;
char *desc;
@@ -5799,7 +5975,7 @@ void md_do_sync(mddev_t *mddev)
/* recovery follows the physical size of devices */
max_sectors = mddev->size << 1;
j = MaxSector;
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
@@ -5949,7 +6125,7 @@ void md_do_sync(mddev_t *mddev)
} else {
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync = MaxSector;
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
@@ -5985,10 +6161,9 @@ EXPORT_SYMBOL_GPL(md_do_sync);
static int remove_and_add_spares(mddev_t *mddev)
{
mdk_rdev_t *rdev;
- struct list_head *rtmp;
int spares = 0;
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
(test_bit(Faulty, &rdev->flags) ||
@@ -6003,8 +6178,8 @@ static int remove_and_add_spares(mddev_t *mddev)
}
}
- if (mddev->degraded && ! mddev->ro) {
- rdev_for_each(rdev, rtmp, mddev) {
+ if (mddev->degraded && ! mddev->ro && !mddev->recovery_disabled) {
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
if (rdev->raid_disk >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
!test_bit(Blocked, &rdev->flags))
@@ -6056,7 +6231,6 @@ static int remove_and_add_spares(mddev_t *mddev)
void md_check_recovery(mddev_t *mddev)
{
mdk_rdev_t *rdev;
- struct list_head *rtmp;
if (mddev->bitmap)
@@ -6120,7 +6294,7 @@ void md_check_recovery(mddev_t *mddev)
if (mddev->flags)
md_update_sb(mddev, 0);
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
if (test_and_clear_bit(StateChanged, &rdev->flags))
sysfs_notify_dirent(rdev->sysfs_state);
@@ -6149,13 +6323,13 @@ void md_check_recovery(mddev_t *mddev)
* information must be scrapped
*/
if (!mddev->degraded)
- rdev_for_each(rdev, rtmp, mddev)
+ list_for_each_entry(rdev, &mddev->disks, same_set)
rdev->saved_raid_disk = -1;
mddev->recovery = 0;
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ sysfs_notify_dirent(mddev->sysfs_action);
md_new_event(mddev);
goto unlock;
}
@@ -6216,7 +6390,7 @@ void md_check_recovery(mddev_t *mddev)
mddev->recovery = 0;
} else
md_wakeup_thread(mddev->sync_thread);
- sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ sysfs_notify_dirent(mddev->sysfs_action);
md_new_event(mddev);
}
unlock:
@@ -6224,7 +6398,8 @@ void md_check_recovery(mddev_t *mddev)
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
&mddev->recovery))
- sysfs_notify(&mddev->kobj, NULL, "sync_action");
+ if (mddev->sysfs_action)
+ sysfs_notify_dirent(mddev->sysfs_action);
}
mddev_unlock(mddev);
}
@@ -6386,14 +6561,8 @@ static __exit void md_exit(void)
unregister_sysctl_table(raid_table_header);
remove_proc_entry("mdstat", NULL);
for_each_mddev(mddev, tmp) {
- struct gendisk *disk = mddev->gendisk;
- if (!disk)
- continue;
export_array(mddev);
- del_gendisk(disk);
- put_disk(disk);
- mddev->gendisk = NULL;
- mddev_put(mddev);
+ mddev->hold_active = 0;
}
}
@@ -6418,6 +6587,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
module_param_call(start_ro, set_ro, get_ro, NULL, S_IRUSR|S_IWUSR);
module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
+module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
EXPORT_SYMBOL(register_md_personality);
EXPORT_SYMBOL(unregister_md_personality);