diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-21 10:29:12 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-07-21 10:29:12 -0700 |
commit | 8a392625b665c676a77c62f8608d10ff430bcb83 (patch) | |
tree | 4000a65d61baed73200e47f91dea5263ed16edd0 /drivers/md | |
parent | 519f0141f1c42e2b8b59c7dea005cbf6095358e8 (diff) | |
parent | 4b80991c6cb9efa607bc4fd6f3ecdf5511c31bb0 (diff) |
Merge branch 'for-linus' of git://neil.brown.name/md
* 'for-linus' of git://neil.brown.name/md: (52 commits)
md: Protect access to mddev->disks list using RCU
md: only count actual openers as access which prevent a 'stop'
md: linear: Make array_size sector-based and rename it to array_sectors.
md: Make mddev->array_size sector-based.
md: Make super_type->rdev_size_change() take sector-based sizes.
md: Fix check for overlapping devices.
md: Tidy up rdev_size_store a bit:
md: Remove some unused macros.
md: Turn rdev->sb_offset into a sector-based quantity.
md: Make calc_dev_sboffset() return a sector count.
md: Replace calc_dev_size() by calc_num_sectors().
md: Make update_size() take the number of sectors.
md: Better control of when do_md_stop is allowed to stop the array.
md: get_disk_info(): Don't convert between signed and unsigned and back.
md: Simplify restart_array().
md: alloc_disk_sb(): Return proper error value.
md: Simplify sb_equal().
md: Simplify uuid_equal().
md: sb_equal(): Fix misleading printk.
md: Fix a typo in the comment to cmd_match().
...
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 54 | ||||
-rw-r--r-- | drivers/md/faulty.c | 2 | ||||
-rw-r--r-- | drivers/md/linear.c | 20 | ||||
-rw-r--r-- | drivers/md/md.c | 615 | ||||
-rw-r--r-- | drivers/md/multipath.c | 17 | ||||
-rw-r--r-- | drivers/md/raid0.c | 8 | ||||
-rw-r--r-- | drivers/md/raid1.c | 30 | ||||
-rw-r--r-- | drivers/md/raid10.c | 22 | ||||
-rw-r--r-- | drivers/md/raid5.c | 745 |
9 files changed, 752 insertions, 761 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index b26927ce889..621a272a2c7 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -225,7 +225,7 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde || test_bit(Faulty, &rdev->flags)) continue; - target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512); + target = rdev->sb_start + offset + index * (PAGE_SIZE/512); if (sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)) { page->index = index; @@ -241,10 +241,10 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { mdk_rdev_t *rdev; - struct list_head *tmp; mddev_t *mddev = bitmap->mddev; - rdev_for_each(rdev, tmp, mddev) + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev) if (test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) { int size = PAGE_SIZE; @@ -260,32 +260,37 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) + (long)(page->index * (PAGE_SIZE/512)) + size/512 > 0) /* bitmap runs in to metadata */ - return -EINVAL; + goto bad_alignment; if (rdev->data_offset + mddev->size*2 - > rdev->sb_offset*2 + bitmap->offset) + > rdev->sb_start + bitmap->offset) /* data runs in to bitmap */ - return -EINVAL; - } else if (rdev->sb_offset*2 < rdev->data_offset) { + goto bad_alignment; + } else if (rdev->sb_start < rdev->data_offset) { /* METADATA BITMAP DATA */ - if (rdev->sb_offset*2 + if (rdev->sb_start + bitmap->offset + page->index*(PAGE_SIZE/512) + size/512 > rdev->data_offset) /* bitmap runs in to data */ - return -EINVAL; + goto bad_alignment; } else { /* DATA METADATA BITMAP - no problems */ } md_super_write(mddev, rdev, - (rdev->sb_offset<<1) + bitmap->offset + rdev->sb_start + bitmap->offset + page->index * (PAGE_SIZE/512), size, page); } + rcu_read_unlock(); if (wait) md_super_wait(mddev); return 0; + + bad_alignment: + rcu_read_unlock(); + return -EINVAL; } static void bitmap_file_kick(struct bitmap *bitmap); @@ -454,8 +459,11 @@ void bitmap_update_sb(struct bitmap *bitmap) spin_unlock_irqrestore(&bitmap->lock, flags); sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0); sb->events = cpu_to_le64(bitmap->mddev->events); - if (!bitmap->mddev->degraded) - sb->events_cleared = cpu_to_le64(bitmap->mddev->events); + if (bitmap->mddev->events < bitmap->events_cleared) { + /* rocking back to read-only */ + bitmap->events_cleared = bitmap->mddev->events; + sb->events_cleared = cpu_to_le64(bitmap->events_cleared); + } kunmap_atomic(sb, KM_USER0); write_page(bitmap, bitmap->sb_page, 1); } @@ -1085,9 +1093,19 @@ void bitmap_daemon_work(struct bitmap *bitmap) } else spin_unlock_irqrestore(&bitmap->lock, flags); lastpage = page; -/* - printk("bitmap clean at page %lu\n", j); -*/ + + /* We are possibly going to clear some bits, so make + * sure that events_cleared is up-to-date. + */ + if (bitmap->need_sync) { + bitmap_super_t *sb; + bitmap->need_sync = 0; + sb = kmap_atomic(bitmap->sb_page, KM_USER0); + sb->events_cleared = + cpu_to_le64(bitmap->events_cleared); + kunmap_atomic(sb, KM_USER0); + write_page(bitmap, bitmap->sb_page, 1); + } spin_lock_irqsave(&bitmap->lock, flags); clear_page_attr(bitmap, page, BITMAP_PAGE_CLEAN); } @@ -1257,6 +1275,12 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto return; } + if (success && + bitmap->events_cleared < bitmap->mddev->events) { + bitmap->events_cleared = bitmap->mddev->events; + bitmap->need_sync = 1; + } + if (!success && ! (*bmc & NEEDED_MASK)) *bmc |= NEEDED_MASK; diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c index d107ddceefc..268547dbfbd 100644 --- a/drivers/md/faulty.c +++ b/drivers/md/faulty.c @@ -297,7 +297,7 @@ static int run(mddev_t *mddev) rdev_for_each(rdev, tmp, mddev) conf->rdev = rdev; - mddev->array_size = mddev->size; + mddev->array_sectors = mddev->size * 2; mddev->private = conf; reconfig(mddev, mddev->layout, -1); diff --git a/drivers/md/linear.c b/drivers/md/linear.c index 6a866d7c8ae..b1eebf88c20 100644 --- a/drivers/md/linear.c +++ b/drivers/md/linear.c @@ -122,13 +122,13 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) return NULL; cnt = 0; - conf->array_size = 0; + conf->array_sectors = 0; rdev_for_each(rdev, tmp, mddev) { int j = rdev->raid_disk; dev_info_t *disk = conf->disks + j; - if (j < 0 || j > raid_disks || disk->rdev) { + if (j < 0 || j >= raid_disks || disk->rdev) { printk("linear: disk numbering problem. Aborting!\n"); goto out; } @@ -146,7 +146,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9); disk->size = rdev->size; - conf->array_size += rdev->size; + conf->array_sectors += rdev->size * 2; cnt++; } @@ -155,7 +155,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) goto out; } - min_spacing = conf->array_size; + min_spacing = conf->array_sectors / 2; sector_div(min_spacing, PAGE_SIZE/sizeof(struct dev_info *)); /* min_spacing is the minimum spacing that will fit the hash @@ -164,7 +164,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) * that is larger than min_spacing as use the size of that as * the actual spacing */ - conf->hash_spacing = conf->array_size; + conf->hash_spacing = conf->array_sectors / 2; for (i=0; i < cnt-1 ; i++) { sector_t sz = 0; int j; @@ -194,7 +194,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) unsigned round; unsigned long base; - sz = conf->array_size >> conf->preshift; + sz = conf->array_sectors >> (conf->preshift + 1); sz += 1; /* force round-up */ base = conf->hash_spacing >> conf->preshift; round = sector_div(sz, base); @@ -221,7 +221,7 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks) curr_offset = 0; i = 0; for (curr_offset = 0; - curr_offset < conf->array_size; + curr_offset < conf->array_sectors / 2; curr_offset += conf->hash_spacing) { while (i < raid_disks-1 && @@ -258,7 +258,7 @@ static int linear_run (mddev_t *mddev) if (!conf) return 1; mddev->private = conf; - mddev->array_size = conf->array_size; + mddev->array_sectors = conf->array_sectors; blk_queue_merge_bvec(mddev->queue, linear_mergeable_bvec); mddev->queue->unplug_fn = linear_unplug; @@ -292,8 +292,8 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev) newconf->prev = mddev_to_conf(mddev); mddev->private = newconf; mddev->raid_disks++; - mddev->array_size = newconf->array_size; - set_capacity(mddev->gendisk, mddev->array_size << 1); + mddev->array_sectors = newconf->array_sectors; + set_capacity(mddev->gendisk, mddev->array_sectors); return 0; } diff --git a/drivers/md/md.c b/drivers/md/md.c index 2580ac1b9b0..c2ff77ccec5 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -169,7 +169,6 @@ void md_new_event(mddev_t *mddev) { atomic_inc(&md_event_count); wake_up(&md_event_waiters); - sysfs_notify(&mddev->kobj, NULL, "sync_action"); } EXPORT_SYMBOL_GPL(md_new_event); @@ -274,10 +273,12 @@ static mddev_t * mddev_find(dev_t unit) INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); atomic_set(&new->active, 1); + atomic_set(&new->openers, 0); spin_lock_init(&new->write_lock); init_waitqueue_head(&new->sb_wait); init_waitqueue_head(&new->recovery_wait); new->reshape_position = MaxSector; + new->resync_min = 0; new->resync_max = MaxSector; new->level = LEVEL_NONE; @@ -347,21 +348,20 @@ static struct mdk_personality *find_pers(int level, char *clevel) return NULL; } +/* return the offset of the super block in 512byte sectors */ static inline sector_t calc_dev_sboffset(struct block_device *bdev) { - sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS; - return MD_NEW_SIZE_BLOCKS(size); + sector_t num_sectors = bdev->bd_inode->i_size / 512; + return MD_NEW_SIZE_SECTORS(num_sectors); } -static sector_t calc_dev_size(mdk_rdev_t *rdev, unsigned chunk_size) +static sector_t calc_num_sectors(mdk_rdev_t *rdev, unsigned chunk_size) { - sector_t size; - - size = rdev->sb_offset; + sector_t num_sectors = rdev->sb_start; if (chunk_size) - size &= ~((sector_t)chunk_size/1024 - 1); - return size; + num_sectors &= ~((sector_t)chunk_size/512 - 1); + return num_sectors; } static int alloc_disk_sb(mdk_rdev_t * rdev) @@ -372,7 +372,7 @@ static int alloc_disk_sb(mdk_rdev_t * rdev) rdev->sb_page = alloc_page(GFP_KERNEL); if (!rdev->sb_page) { printk(KERN_ALERT "md: out of memory.\n"); - return -EINVAL; + return -ENOMEM; } return 0; @@ -384,7 +384,7 @@ static void free_disk_sb(mdk_rdev_t * rdev) put_page(rdev->sb_page); rdev->sb_loaded = 0; rdev->sb_page = NULL; - rdev->sb_offset = 0; + rdev->sb_start = 0; rdev->size = 0; } } @@ -530,7 +530,7 @@ static int read_disk_sb(mdk_rdev_t * rdev, int size) return 0; - if (!sync_page_io(rdev->bdev, rdev->sb_offset<<1, size, rdev->sb_page, READ)) + if (!sync_page_io(rdev->bdev, rdev->sb_start, size, rdev->sb_page, READ)) goto fail; rdev->sb_loaded = 1; return 0; @@ -543,17 +543,12 @@ fail: static int uuid_equal(mdp_super_t *sb1, mdp_super_t *sb2) { - if ( (sb1->set_uuid0 == sb2->set_uuid0) && - (sb1->set_uuid1 == sb2->set_uuid1) && - (sb1->set_uuid2 == sb2->set_uuid2) && - (sb1->set_uuid3 == sb2->set_uuid3)) - - return 1; - - return 0; + return sb1->set_uuid0 == sb2->set_uuid0 && + sb1->set_uuid1 == sb2->set_uuid1 && + sb1->set_uuid2 == sb2->set_uuid2 && + sb1->set_uuid3 == sb2->set_uuid3; } - static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) { int ret; @@ -564,7 +559,7 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) if (!tmp1 || !tmp2) { ret = 0; - printk(KERN_INFO "md.c: sb1 is not equal to sb2!\n"); + printk(KERN_INFO "md.c sb_equal(): failed to allocate memory!\n"); goto abort; } @@ -577,11 +572,7 @@ static int sb_equal(mdp_super_t *sb1, mdp_super_t *sb2) tmp1->nr_disks = 0; tmp2->nr_disks = 0; - if (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4)) - ret = 0; - else - ret = 1; - + ret = (memcmp(tmp1, tmp2, MD_SB_GENERIC_CONSTANT_WORDS * 4) == 0); abort: kfree(tmp1); kfree(tmp2); @@ -658,11 +649,14 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) */ struct super_type { - char *name; - struct module *owner; - int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version); - int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev); - void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev); + char *name; + struct module *owner; + int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev, + int minor_version); + int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev); + void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev); + unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev, + sector_t num_sectors); }; /* @@ -673,16 +667,14 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; mdp_super_t *sb; int ret; - sector_t sb_offset; /* - * Calculate the position of the superblock, + * Calculate the position of the superblock (512byte sectors), * it's at the end of the disk. * * It also happens to be a multiple of 4Kb. */ - sb_offset = calc_dev_sboffset(rdev->bdev); - rdev->sb_offset = sb_offset; + rdev->sb_start = calc_dev_sboffset(rdev->bdev); ret = read_disk_sb(rdev, MD_SB_BYTES); if (ret) return ret; @@ -759,7 +751,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version else ret = 0; } - rdev->size = calc_dev_size(rdev, sb->chunk_size); + rdev->size = calc_num_sectors(rdev, sb->chunk_size) / 2; if (rdev->size < sb->size && sb->level > 1) /* "this cannot possibly happen" ... */ @@ -1004,6 +996,26 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) } /* + * rdev_size_change for 0.90.0 + */ +static unsigned long long +super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) +{ + if (num_sectors && num_sectors < rdev->mddev->size * 2) + return 0; /* component must fit device */ + if (rdev->mddev->bitmap_offset) + return 0; /* can't move bitmap */ + rdev->sb_start = calc_dev_sboffset(rdev->bdev); + if (!num_sectors || num_sectors > rdev->sb_start) + num_sectors = rdev->sb_start; + md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, + rdev->sb_page); + md_super_wait(rdev->mddev); + return num_sectors / 2; /* kB for sysfs */ +} + + +/* * version 1 superblock */ @@ -1034,12 +1046,12 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) { struct mdp_superblock_1 *sb; int ret; - sector_t sb_offset; + sector_t sb_start; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; int bmask; /* - * Calculate the position of the superblock. + * Calculate the position of the superblock in 512byte sectors. * It is always aligned to a 4K boundary and * depeding on minor_version, it can be: * 0: At least 8K, but less than 12K, from end of device @@ -1048,22 +1060,20 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) */ switch(minor_version) { case 0: - sb_offset = rdev->bdev->bd_inode->i_size >> 9; - sb_offset -= 8*2; - sb_offset &= ~(sector_t)(4*2-1); - /* convert from sectors to K */ - sb_offset /= 2; + sb_start = rdev->bdev->bd_inode->i_size >> 9; + sb_start -= 8*2; + sb_start &= ~(sector_t)(4*2-1); break; case 1: - sb_offset = 0; + sb_start = 0; break; case 2: - sb_offset = 4; + sb_start = 8; break; default: return -EINVAL; } - rdev->sb_offset = sb_offset; + rdev->sb_start = sb_start; /* superblock is rarely larger than 1K, but it can be larger, * and it is safe to read 4k, so we do that @@ -1077,7 +1087,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (sb->magic != cpu_to_le32(MD_SB_MAGIC) || sb->major_version != cpu_to_le32(1) || le32_to_cpu(sb->max_dev) > (4096-256)/2 || - le64_to_cpu(sb->super_offset) != (rdev->sb_offset<<1) || + le64_to_cpu(sb->super_offset) != rdev->sb_start || (le32_to_cpu(sb->feature_map) & ~MD_FEATURE_ALL) != 0) return -EINVAL; @@ -1113,7 +1123,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) rdev->sb_size = (rdev->sb_size | bmask) + 1; if (minor_version - && rdev->data_offset < sb_offset + (rdev->sb_size/512)) + && rdev->data_offset < sb_start + (rdev->sb_size/512)) return -EINVAL; if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) @@ -1149,7 +1159,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (minor_version) rdev->size = ((rdev->bdev->bd_inode->i_size>>9) - le64_to_cpu(sb->data_offset)) / 2; else - rdev->size = rdev->sb_offset; + rdev->size = rdev->sb_start / 2; if (rdev->size < le64_to_cpu(sb->data_size)/2) return -EINVAL; rdev->size = le64_to_cpu(sb->data_size)/2; @@ -1328,35 +1338,74 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->sb_csum = calc_sb_1_csum(sb); } +static unsigned long long +super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) +{ + struct mdp_superblock_1 *sb; + sector_t max_sectors; + if (num_sectors && num_sectors < rdev->mddev->size * 2) + return 0; /* component must fit device */ + if (rdev->sb_start < rdev->data_offset) { + /* minor versions 1 and 2; superblock before data */ + max_sectors = rdev->bdev->bd_inode->i_size >> 9; + max_sectors -= rdev->data_offset; + if (!num_sectors || num_sectors > max_sectors) + num_sectors = max_sectors; + } else if (rdev->mddev->bitmap_offset) { + /* minor version 0 with bitmap we can't move */ + return 0; + } else { + /* minor version 0; superblock after data */ + sector_t sb_start; + sb_start = (rdev->bdev->bd_inode->i_size >> 9) - 8*2; + sb_start &= ~(sector_t)(4*2 - 1); + max_sectors = rdev->size * 2 + sb_start - rdev->sb_start; + if (!num_sectors || num_sectors > max_sectors) + num_sectors = max_sectors; + rdev->sb_start = sb_start; + } + sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page); + sb->data_size = cpu_to_le64(num_sectors); + sb->super_offset = rdev->sb_start; + sb->sb_csum = calc_sb_1_csum(sb); + md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size, + rdev->sb_page); + md_super_wait(rdev->mddev); + return num_sectors / 2; /* kB for sysfs */ +} static struct super_type super_types[] = { [0] = { .name = "0.90.0", .owner = THIS_MODULE, - .load_super = super_90_load, - .validate_super = super_90_validate, - .sync_super = super_90_sync, + .load_super = super_90_load, + .validate_super = super_90_validate, + .sync_super = super_90_sync, + .rdev_size_change = super_90_rdev_size_change, }, [1] = { .name = "md-1", .owner = THIS_MODULE, - .load_super = super_1_load, - .validate_super = super_1_validate, - .sync_super = super_1_sync, + .load_super = super_1_load, + .validate_super = super_1_validate, + .sync_super = super_1_sync, + .rdev_size_change = super_1_rdev_size_change, }, }; static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2) { - struct list_head *tmp, *tmp2; mdk_rdev_t *rdev, *rdev2; - rdev_for_each(rdev, tmp, mddev1) - rdev_for_each(rdev2, tmp2, mddev2) + rcu_read_lock(); + rdev_for_each_rcu(rdev, mddev1) + rdev_for_each_rcu(rdev2, mddev2) if (rdev->bdev->bd_contains == - rdev2->bdev->bd_contains) + rdev2->bdev->bd_contains) { + rcu_read_unlock(); return 1; - + } + rcu_read_unlock(); return 0; } @@ -1423,7 +1472,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) kobject_del(&rdev->kobj); goto fail; } - list_add(&rdev->same_set, &mddev->disks); + list_add_rcu(&rdev->same_set, &mddev->disks); bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk); return 0; @@ -1448,14 +1497,16 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) return; } bd_release_from_disk(rdev->bdev, rdev->mddev->gendisk); - list_del_init(&rdev->same_set); + list_del_rcu(&rdev->same_set); printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b)); rdev->mddev = NULL; sysfs_remove_link(&rdev->kobj, "block"); /* We need to delay this, otherwise we can deadlock when - * writing to 'remove' to "dev/state" + * writing to 'remove' to "dev/state". We also need + * to delay it due to rcu usage. */ + synchronize_rcu(); INIT_WORK(&rdev->del_work, md_delayed_delete); kobject_get(&rdev->kobj); schedule_work(&rdev->del_work); @@ -1511,7 +1562,6 @@ static void export_rdev(mdk_rdev_t * rdev) if (rdev->mddev) MD_BUG(); free_disk_sb(rdev); - list_del_init(&rdev->same_set); #ifndef MODULE if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); @@ -1758,11 +1808,11 @@ repeat: dprintk("%s ", bdevname(rdev->bdev,b)); if (!test_bit(Faulty, &rdev->flags)) { md_super_write(mddev,rdev, - rdev->sb_offset<<1, rdev->sb_size, + rdev->sb_start, rdev->sb_size, rdev->sb_page); dprintk(KERN_INFO "(write) %s's sb offset: %llu\n", bdevname(rdev->bdev,b), - (unsigned long long)rdev->sb_offset); + (unsigned long long)rdev->sb_start); rdev->sb_events = mddev->events; } else @@ -1787,7 +1837,7 @@ repeat: } -/* words written to sysfs files may, or my not, be \n terminated. +/* words written to sysfs files may, or may not, be \n terminated. * We want to accept with case. For this we use cmd_match. */ static int cmd_match(const char *cmd, const char *str) @@ -1886,6 +1936,8 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) err = 0; } + if (!err) + sysfs_notify(&rdev->kobj, NULL, "state"); return err ? err : len; } static struct rdev_sysfs_entry rdev_state = @@ -1931,7 +1983,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) slot = -1; else if (e==buf || (*e && *e!= '\n')) return -EINVAL; - if (rdev->mddev->pers) { + if (rdev->mddev->pers && slot == -1) { /* Setting 'slot' on an active array requires also * updating the 'rd%d' link, and communicating * with the personality with ->hot_*_disk. @@ -1939,8 +1991,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) * failed/spare devices. This normally happens automatically, * but not when the metadata is externally managed. */ - if (slot != -1) - return -EBUSY; if (rdev->raid_disk == -1) return -EEXIST; /* personality does all needed checks */ @@ -1954,6 +2004,43 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) sysfs_remove_link(&rdev->mddev->kobj, nm); set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); + } else if (rdev->mddev->pers) { + mdk_rdev_t *rdev2; + struct list_head *tmp; + /* Activating a spare .. or possibly reactivating + * if we every get bitmaps working here. + */ + + if (rdev->raid_disk != -1) + return -EBUSY; + + if (rdev->mddev->pers->hot_add_disk == NULL) + return -EINVAL; + + rdev_for_each(rdev2, tmp, rdev->mddev) + if (rdev2->raid_disk == slot) + return -EEXIST; + + rdev->raid_disk = slot; + if (test_bit(In_sync, &rdev->flags)) + rdev->saved_raid_disk = slot; + else + rdev->saved_raid_disk = -1; + err = rdev->mddev->pers-> + hot_add_disk(rdev->mddev, rdev); + if (err) { + rdev->raid_disk = -1; + return err; + } else + sysfs_notify(&rdev->kobj, NULL, "state"); + sprintf(nm, "rd%d", rdev->raid_disk); + if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) + printk(KERN_WARNING + "md: cannot register " + "%s for %s\n", + nm, mdname(rdev->mddev)); + + /* don't wakeup anyone, leave that to userspace. */ } else { if (slot >= rdev->mddev->raid_disks) return -ENOSPC; @@ -1962,6 +2049,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) clear_bit(Faulty, &rdev->flags); clear_bit(WriteMostly, &rdev->flags); set_bit(In_sync, &rdev->flags); + sysfs_notify(&rdev->kobj, NULL, "state"); } return len; } @@ -1983,7 +2071,7 @@ offset_store(mdk_rdev_t *rdev, const char *buf, size_t len) unsigned long long offset = simple_strtoull(buf, &e, 10); if (e==buf || (*e && *e != '\n')) return -EINVAL; - if (rdev->mddev->pers) + if (rdev->mddev->pers && rdev->raid_disk >= 0) return -EBUSY; if (rdev->size && rdev->mddev->external) /* Must set offset before size, so overlap checks @@ -2015,17 +2103,30 @@ static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) static ssize_t rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) { - char *e; - unsigned long long size = simple_strtoull(buf, &e, 10); + unsigned long long size; unsigned long long oldsize = rdev->size; mddev_t *my_mddev = rdev->mddev; - if (e==buf || (*e && *e != '\n')) + if (strict_strtoull(buf, 10, &size) < 0) return -EINVAL; - if (my_mddev->pers) - return -EBUSY; + if (size < my_mddev->size) + return -EINVAL; + if (my_mddev->pers && rdev->raid_disk >= 0) { + if (my_mddev->persistent) { + size = super_types[my_mddev->major_version]. + rdev_size_change(rdev, size * 2); + if (!size) + return -EBUSY; + } else if (!size) { + size = (rdev->bdev->bd_inode->i_size >> 10); + size -= rdev->data_offset/2; + } + if (size < my_mddev->size) + return -EINVAL; /* component must fit device */ + } + rdev->size = size; - if (size > oldsize && rdev->mddev->external) { + if (size > oldsize && my_mddev->external) { /* need to check that all other rdevs with the same ->bdev * do not overlap. We need to unlock the mddev to avoid * a deadlock. We have already changed rdev->size, and if @@ -2044,8 +2145,9 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) if (test_bit(AllReserved, &rdev2->flags) || (rdev->bdev == rdev2->bdev && rdev != rdev2 && - overlaps(rdev->data_offset, rdev->size, - rdev2->data_offset, rdev2->size))) { + overlaps(rdev->data_offset, rdev->size * 2, + rdev2->data_offset, + rdev2->size * 2))) { overlap = 1; break; } @@ -2067,8 +2169,6 @@ rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len) return -EBUSY; } } - if (size < my_mddev->size || my_mddev->size == 0) - my_mddev->size = size; return len; } @@ -2512,7 +2612,7 @@ __ATTR(resync_start, S_IRUGO|S_IWUSR, resync_start_show, resync_start_store); * When written, doesn't tear down array, but just stops it * suspended (not supported yet) * All IO requests will block. The array can be reconfigured. - * Writing this, if accepted, will block until array is quiessent + * Writing this, if accepted, will block until array is quiescent * readonly * no resync can happen. no superblocks get written. * write requests fail @@ -2585,7 +2685,7 @@ array_state_show(mddev_t *mddev, char *page) return sprintf(page, "%s\n", array_states[st]); } -static int do_md_stop(mddev_t * mddev, int ro); +static int do_md_stop(mddev_t * mddev, int ro, int is_open); static int do_md_run(mddev_t * mddev); static int restart_array(mddev_t *mddev); @@ -2599,16 +2699,16 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) break; case clear: /* stopping an active array */ - if (atomic_read(&mddev->active) > 1) + if (atomic_read(&mddev->openers) > 0) return -EBUSY; - err = do_md_stop(mddev, 0); + err = do_md_stop(mddev, 0, 0); break; case inactive: /* stopping an active array */ if (mddev->pers) { - if (atomic_read(&mddev->active) > 1) + if (atomic_read(&mddev->openers) > 0) return -EBUSY; - err = do_md_stop(mddev, 2); + err = do_md_stop(mddev, 2, 0); } else err = 0; /* already inactive */ break; @@ -2616,7 +2716,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) break; /* not supported yet */ case readonly: if (mddev->pers) - err = do_md_stop(mddev, 1); + err = do_md_stop(mddev, 1, 0); else { mddev->ro = 1; set_disk_ro(mddev->gendisk, 1); @@ -2626,7 +2726,7 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) case read_auto: if (mddev->pers) { if (mddev->ro != 1) - err = do_md_stop(mddev, 1); + err = do_md_stop(mddev, 1, 0); else err = restart_array(mddev); if (err == 0) { @@ -2681,8 +2781,10 @@ array_state_store(mddev_t *mddev, const char *buf, size_t len) } if (err) return err; - else + else { + sysfs_notify(&mddev->kobj, NULL, "array_state"); return len; + } } static struct md_sysfs_entry md_array_state = __ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store); @@ -2785,7 +2887,7 @@ size_show(mddev_t *mddev, char *page) return sprintf(page, "%llu\n", (unsigned long long)mddev->size); } -static int update_size(mddev_t *mddev, unsigned long size); +static int update_size(mddev_t *mddev, sector_t num_sectors); static ssize_t size_store(mddev_t *mddev, const char *buf, size_t len) @@ -2802,7 +2904,7 @@ size_store(mddev_t *mddev, const char *buf, size_t len) return -EINVAL; if (mddev->pers) { - err = update_size(mddev, size); + err = update_size(mddev, size * 2); md_update_sb(mddev, 1); } else { if (mddev->size == 0 || @@ -2899,7 +3001,7 @@ action_show(mddev_t *mddev, char *page) type = "check"; else type = "repair"; - } else + } else if (test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) type = "recover"; } return sprintf(page, "%s\n", type); @@ -2921,15 +3023,19 @@ action_store(mddev_t *mddev, const char *page, size_t len) } else if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) return -EBUSY; - else if (cmd_match(page, "resync") || cmd_match(page, "recover")) + else if (cmd_match(page, "resync")) + set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + else if (cmd_match(page, "recover")) { + set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); - else if (cmd_match(page, "reshape")) { + } else if (cmd_match(page, "reshape")) { int err; if (mddev->pers->start_reshape == NULL) return -EINVAL; err = mddev->pers->start_reshape(mddev); if (err) return err; + sysfs_notify(&mddev->kobj, NULL, "degraded"); } else { if (cmd_match(page, "check")) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); @@ -2940,6 +3046,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); + sysfs_notify(&mddev->kobj, NULL, "sync_action"); return len; } @@ -3049,11 +3156,11 @@ static ssize_t sync_speed_show(mddev_t *mddev, char *page) { unsigned long resync, dt, db; - resync = (mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active)); - dt = ((jiffies - mddev->resync_mark) / HZ); + resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); + dt = (jiffies - mddev->resync_mark) / HZ; if (!dt) dt++; - db = resync - (mddev->resync_mark_cnt); - return sprintf(page, "%ld\n", db/dt/2); /* K/sec */ + db = resync - mddev->resync_mark_cnt; + return sprintf(page, "%lu\n", db/dt/2); /* K/sec */ } static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed); @@ -3075,6 +3182,36 @@ sync_completed_show(mddev_t *mddev, char *page) static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed); static ssize_t +min_sync_show(mddev_t *mddev, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)mddev->resync_min); +} +static ssize_t +min_sync_store(mddev_t *mddev, const char *buf, size_t len) +{ + unsigned long long min; + if (strict_strtoull(buf, 10, &min)) + return -EINVAL; + if (min > mddev->resync_max) + return -EINVAL; + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + return -EBUSY; + + /* Must be a multiple of chunk_size */ + if (mddev->chunk_size) { + if (min & (sector_t)((mddev->chunk_size>>9)-1)) + return -EINVAL; + } + mddev->resync_min = min; + + return len; +} + +static struct md_sysfs_entry md_min_sync = +__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store); + +static ssize_t max_ |