From 4b80991c6cb9efa607bc4fd6f3ecdf5511c31bb0 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 Jul 2008 17:05:25 +1000 Subject: md: Protect access to mddev->disks list using RCU All modifications and most access to the mddev->disks list are made under the reconfig_mutex lock. However there are three places where the list is walked without any locking. If a reconfig happens at this time, havoc (and oops) can ensue. So use RCU to protect these accesses: - wrap them in rcu_read_{,un}lock() - use list_for_each_entry_rcu - add to the list with list_add_rcu - delete from the list with list_del_rcu - delay the 'free' with call_rcu rather than schedule_work Note that export_rdev did a list_del_init on this list. In almost all cases the entry was not in the list anymore so it was a no-op and so safe. It is no longer safe as after list_del_rcu we may not touch the list_head. An audit shows that export_rdev is called: - after unbind_rdev_from_array, in which case the delete has already been done, - after bind_rdev_to_array fails, in which case the delete isn't needed. - before the device has been put on a list at all (e.g. in add_new_disk where reading the superblock fails). - and in autorun devices after a failure when the device is on a different list. So remove the list_del_init call from export_rdev, and add it back immediately before the called to export_rdev for that last case. Note also that ->same_set is sometimes used for lists other than mddev->list (e.g. candidates). In these cases rcu is not needed. Signed-off-by: NeilBrown --- drivers/md/md.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'drivers/md/md.c') diff --git a/drivers/md/md.c b/drivers/md/md.c index 450f30b6edf..c2ff77ccec5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1395,15 +1395,17 @@ static struct super_type super_types[] = { static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) { - struct list_head *tmp, *tmp2; mdk_rdev_t *rdev, *rdev2; - rdev_for_each(rdev, tmp, mddev1) - rdev_for_each(rdev2, tmp2, mddev2) + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev1) + rdev_for_each_rcu(rdev2, mddev2) if (rdev->bdev->bd_contains == - rdev2->bdev->bd_contains) + rdev2->bdev->bd_contains) { + rcu_read_unlock(); return 1; - + } + rcu_read_unlock(); return 0; } @@ -1470,7 +1472,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) kobject_del(&rdev->kobj); goto fail; } - list_add(&rdev->same_set, &mddev->disks); + list_add_rcu(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); return 0; @@ -1495,14 +1497,16 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) return; } bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); - list_del_init(&rdev->same_set); + list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); /* We need to delay this, otherwise we can deadlock when - * writing to 'remove' to "dev/state" + * writing to 'remove' to "dev/state". We also need + * to delay it due to rcu usage. */ + synchronize_rcu(); INIT_WORK(&rdev->del_work, md_delayed_delete); kobject_get(&rdev->kobj); schedule_work(&rdev->del_work); @@ -1558,7 +1562,6 @@ static void export_rdev(mdk_rdev_t * rdev) if (rdev->mddev) MD_BUG(); free_disk_sb(rdev); - list_del_init(&rdev->same_set); #ifndef MODULE if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); @@ -4062,8 +4065,10 @@ static void autorun_devices(int part) /* on success, candidates will be empty, on error * it won't... */ - rdev_for_each_list(rdev, tmp, candidates) + rdev_for_each_list(rdev, tmp, candidates) { + list_del_init(&rdev->same_set); export_rdev(rdev); + } mddev_put(mddev); } printk(KERN_INFO "md: ... autorun DONE.\n"); @@ -5529,12 +5534,12 @@ int unregister_md_personality(struct mdk_personality *p) static int is_mddev_idle(mddev_t *mddev) { mdk_rdev_t * rdev; - struct list_head *tmp; int idle; long curr_events; idle = 1; - rdev_for_each(rdev, tmp, mddev) { + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) { struct gendisk *disk = rdev->bdev->bd_contains->bd_disk; curr_events = disk_stat_read(disk, sectors[0]) + disk_stat_read(disk, sectors[1]) - @@ -5566,6 +5571,7 @@ static int is_mddev_idle(mddev_t *mddev) idle = 0; } } + rcu_read_unlock(); return idle; } -- cgit v1.2.3-18-g5258