aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 17:55:54 -0700
committerDan Williams <dan.j.williams@intel.com>2009-09-08 17:55:54 -0700
commit9134d02bc0af4a8747d448d1f811ec5f8eb96df6 (patch)
tree704c3e5dcc10f360815c4868a74711f82fb62e27 /drivers/md
parentbbb20089a3275a19e475dbc21320c3742e3ca423 (diff)
parent80ffb3cceaefa405f2ecd46d66500ed8d53efe74 (diff)
Merge commit 'md/for-linus' into async-tx-next
Conflicts: drivers/md/raid5.c
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-crypt.c4
-rw-r--r--drivers/md/dm-delay.c4
-rw-r--r--drivers/md/dm-exception-store.c9
-rw-r--r--drivers/md/dm-linear.c2
-rw-r--r--drivers/md/dm-mpath.c2
-rw-r--r--drivers/md/dm-raid1.c3
-rw-r--r--drivers/md/dm-stripe.c7
-rw-r--r--drivers/md/dm-table.c17
-rw-r--r--drivers/md/dm.c14
-rw-r--r--drivers/md/dm.h1
-rw-r--r--drivers/md/linear.c6
-rw-r--r--drivers/md/md.c251
-rw-r--r--drivers/md/md.h12
-rw-r--r--drivers/md/multipath.c12
-rw-r--r--drivers/md/raid0.c10
-rw-r--r--drivers/md/raid1.c16
-rw-r--r--drivers/md/raid10.c23
-rw-r--r--drivers/md/raid5.c87
18 files changed, 297 insertions, 183 deletions
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 9933eb861c7..ed103816401 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -776,7 +776,7 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
* But don't wait if split was due to the io size restriction
*/
if (unlikely(out_of_pages))
- congestion_wait(WRITE, HZ/100);
+ congestion_wait(BLK_RW_ASYNC, HZ/100);
/*
* With async crypto it is unsafe to share the crypto context
@@ -1318,7 +1318,7 @@ static int crypt_iterate_devices(struct dm_target *ti,
{
struct crypt_config *cc = ti->private;
- return fn(ti, cc->dev, cc->start, data);
+ return fn(ti, cc->dev, cc->start, ti->len, data);
}
static struct target_type crypt_target = {
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index 4e5b843cd4d..ebe7381f47c 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -324,12 +324,12 @@ static int delay_iterate_devices(struct dm_target *ti,
struct delay_c *dc = ti->private;
int ret = 0;
- ret = fn(ti, dc->dev_read, dc->start_read, data);
+ ret = fn(ti, dc->dev_read, dc->start_read, ti->len, data);
if (ret)
goto out;
if (dc->dev_write)
- ret = fn(ti, dc->dev_write, dc->start_write, data);
+ ret = fn(ti, dc->dev_write, dc->start_write, ti->len, data);
out:
return ret;
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index c3ae51584b1..3710ff88fc1 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -195,7 +195,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
struct dm_exception_store **store)
{
int r = 0;
- struct dm_exception_store_type *type;
+ struct dm_exception_store_type *type = NULL;
struct dm_exception_store *tmp_store;
char persistent;
@@ -211,12 +211,15 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
}
persistent = toupper(*argv[1]);
- if (persistent != 'P' && persistent != 'N') {
+ if (persistent == 'P')
+ type = get_type("P");
+ else if (persistent == 'N')
+ type = get_type("N");
+ else {
ti->error = "Persistent flag is not P or N";
return -EINVAL;
}
- type = get_type(&persistent);
if (!type) {
ti->error = "Exception store type not recognised";
r = -EINVAL;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 9184b6deb86..82f7d6e6b1e 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -139,7 +139,7 @@ static int linear_iterate_devices(struct dm_target *ti,
{
struct linear_c *lc = ti->private;
- return fn(ti, lc->dev, lc->start, data);
+ return fn(ti, lc->dev, lc->start, ti->len, data);
}
static struct target_type linear_target = {
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index c70604a2089..6f0d90d4a54 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -1453,7 +1453,7 @@ static int multipath_iterate_devices(struct dm_target *ti,
list_for_each_entry(pg, &m->priority_groups, list) {
list_for_each_entry(p, &pg->pgpaths, list) {
- ret = fn(ti, p->path.dev, ti->begin, data);
+ ret = fn(ti, p->path.dev, ti->begin, ti->len, data);
if (ret)
goto out;
}
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index ce8868c768c..9726577cde4 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -638,6 +638,7 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
spin_lock_irq(&ms->lock);
bio_list_merge(&ms->writes, &requeue);
spin_unlock_irq(&ms->lock);
+ delayed_wake(ms);
}
/*
@@ -1292,7 +1293,7 @@ static int mirror_iterate_devices(struct dm_target *ti,
for (i = 0; !ret && i < ms->nr_mirrors; i++)
ret = fn(ti, ms->mirror[i].dev,
- ms->mirror[i].offset, data);
+ ms->mirror[i].offset, ti->len, data);
return ret;
}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index b240e85ae39..4e0e5937e42 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -320,10 +320,11 @@ static int stripe_iterate_devices(struct dm_target *ti,
int ret = 0;
unsigned i = 0;
- do
+ do {
ret = fn(ti, sc->stripe[i].dev,
- sc->stripe[i].physical_start, data);
- while (!ret && ++i < sc->stripes);
+ sc->stripe[i].physical_start,
+ sc->stripe_width, data);
+ } while (!ret && ++i < sc->stripes);
return ret;
}
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 4899ebe767c..d952b344191 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -346,7 +346,7 @@ static void close_dev(struct dm_dev_internal *d, struct mapped_device *md)
* If possible, this checks an area of a destination device is valid.
*/
static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, void *data)
+ sector_t start, sector_t len, void *data)
{
struct queue_limits *limits = data;
struct block_device *bdev = dev->bdev;
@@ -359,7 +359,7 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
if (!dev_size)
return 1;
- if ((start >= dev_size) || (start + ti->len > dev_size)) {
+ if ((start >= dev_size) || (start + len > dev_size)) {
DMWARN("%s: %s too small for target",
dm_device_name(ti->table->md), bdevname(bdev, b));
return 0;
@@ -377,11 +377,11 @@ static int device_area_is_valid(struct dm_target *ti, struct dm_dev *dev,
return 0;
}
- if (ti->len & (logical_block_size_sectors - 1)) {
+ if (len & (logical_block_size_sectors - 1)) {
DMWARN("%s: len=%llu not aligned to h/w "
"logical block size %hu of %s",
dm_device_name(ti->table->md),
- (unsigned long long)ti->len,
+ (unsigned long long)len,
limits->logical_block_size, bdevname(bdev, b));
return 0;
}
@@ -482,7 +482,7 @@ static int __table_get_device(struct dm_table *t, struct dm_target *ti,
#define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r))
int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
- sector_t start, void *data)
+ sector_t start, sector_t len, void *data)
{
struct queue_limits *limits = data;
struct block_device *bdev = dev->bdev;
@@ -495,7 +495,7 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
return 0;
}
- if (blk_stack_limits(limits, &q->limits, start) < 0)
+ if (blk_stack_limits(limits, &q->limits, start << 9) < 0)
DMWARN("%s: target device %s is misaligned",
dm_device_name(ti->table->md), bdevname(bdev, b));
@@ -830,11 +830,6 @@ unsigned dm_table_get_type(struct dm_table *t)
return t->type;
}
-bool dm_table_bio_based(struct dm_table *t)
-{
- return dm_table_get_type(t) == DM_TYPE_BIO_BASED;
-}
-
bool dm_table_request_based(struct dm_table *t)
{
return dm_table_get_type(t) == DM_TYPE_REQUEST_BASED;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 3c6d4ee8921..8a311ea0d44 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1017,7 +1017,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector,
clone->bi_flags |= 1 << BIO_CLONED;
if (bio_integrity(bio)) {
- bio_integrity_clone(clone, bio, GFP_NOIO);
+ bio_integrity_clone(clone, bio, GFP_NOIO, bs);
bio_integrity_trim(clone,
bio_sector_offset(bio, idx, offset), len);
}
@@ -1045,7 +1045,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector,
clone->bi_flags &= ~(1 << BIO_SEG_VALID);
if (bio_integrity(bio)) {
- bio_integrity_clone(clone, bio, GFP_NOIO);
+ bio_integrity_clone(clone, bio, GFP_NOIO, bs);
if (idx != bio->bi_idx || clone->bi_size < bio->bi_size)
bio_integrity_trim(clone,
@@ -2203,16 +2203,6 @@ int dm_swap_table(struct mapped_device *md, struct dm_table *table)
goto out;
}
- /*
- * It is enought that blk_queue_ordered() is called only once when
- * the first bio-based table is bound.
- *
- * This setting should be moved to alloc_dev() when request-based dm
- * supports barrier.
- */
- if (!md->map && dm_table_bio_based(table))
- blk_queue_ordered(md->queue, QUEUE_ORDERED_DRAIN, NULL);
-
__unbind(md);
r = __bind(md, table, &limits);
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 23278ae80f0..a7663eba17e 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -61,7 +61,6 @@ int dm_table_any_congested(struct dm_table *t, int bdi_bits);
int dm_table_any_busy_target(struct dm_table *t);
int dm_table_set_type(struct dm_table *t);
unsigned dm_table_get_type(struct dm_table *t);
-bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
int dm_table_alloc_md_mempools(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 15c8b7b25a9..5fe39c2a3d2 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -166,8 +166,8 @@ static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
rdev->sectors = sectors * mddev->chunk_sectors;
}
- blk_queue_stack_limits(mddev->queue,
- rdev->bdev->bd_disk->queue);
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation.
@@ -220,6 +220,7 @@ static int linear_run (mddev_t *mddev)
mddev->queue->unplug_fn = linear_unplug;
mddev->queue->backing_dev_info.congested_fn = linear_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
+ md_integrity_register(mddev);
return 0;
}
@@ -256,6 +257,7 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
rcu_assign_pointer(mddev->private, newconf);
md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
set_capacity(mddev->gendisk, mddev->array_sectors);
+ revalidate_disk(mddev->gendisk);
call_rcu(&oldconf->rcu, free_conf);
return 0;
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 09be637d52c..9dd872000ce 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -359,6 +359,7 @@ static mddev_t * mddev_find(dev_t unit)
else
new->md_minor = MINOR(unit) >> MdpMinorShift;
+ mutex_init(&new->open_mutex);
mutex_init(&new->reconfig_mutex);
INIT_LIST_HEAD(&new->disks);
INIT_LIST_HEAD(&new->all_mddevs);
@@ -1308,7 +1309,12 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
}
if (mddev->level != LEVEL_MULTIPATH) {
int role;
- role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
+ if (rdev->desc_nr < 0 ||
+ rdev->desc_nr >= le32_to_cpu(sb->max_dev)) {
+ role = 0xffff;
+ rdev->desc_nr = -1;
+ } else
+ role = le16_to_cpu(sb->dev_roles[rdev->desc_nr]);
switch(role) {
case 0xffff: /* spare */
break;
@@ -1394,8 +1400,14 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
if (rdev2->desc_nr+1 > max_dev)
max_dev = rdev2->desc_nr+1;
- if (max_dev > le32_to_cpu(sb->max_dev))
+ if (max_dev > le32_to_cpu(sb->max_dev)) {
+ int bmask;
sb->max_dev = cpu_to_le32(max_dev);
+ rdev->sb_size = max_dev * 2 + 256;
+ bmask = queue_logical_block_size(rdev->bdev->bd_disk->queue)-1;
+ if (rdev->sb_size & bmask)
+ rdev->sb_size = (rdev->sb_size | bmask) + 1;
+ }
for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe);
@@ -1487,37 +1499,76 @@ static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
static LIST_HEAD(pending_raid_disks);
-static void md_integrity_check(mdk_rdev_t *rdev, mddev_t *mddev)
+/*
+ * Try to register data integrity profile for an mddev
+ *
+ * This is called when an array is started and after a disk has been kicked
+ * from the array. It only succeeds if all working and active component devices
+ * are integrity capable with matching profiles.
+ */
+int md_integrity_register(mddev_t *mddev)
+{
+ mdk_rdev_t *rdev, *reference = NULL;
+
+ if (list_empty(&mddev->disks))
+ return 0; /* nothing to do */
+ if (blk_get_integrity(mddev->gendisk))
+ return 0; /* already registered */
+ list_for_each_entry(rdev, &mddev->disks, same_set) {
+ /* skip spares and non-functional disks */
+ if (test_bit(Faulty, &rdev->flags))
+ continue;
+ if (rdev->raid_disk < 0)
+ continue;
+ /*
+ * If at least one rdev is not integrity capable, we can not
+ * enable data integrity for the md device.
+ */
+ if (!bdev_get_integrity(rdev->bdev))
+ return -EINVAL;
+ if (!reference) {
+ /* Use the first rdev as the reference */
+ reference = rdev;
+ continue;
+ }
+ /* does this rdev's profile match the reference profile? */
+ if (blk_integrity_compare(reference->bdev->bd_disk,
+ rdev->bdev->bd_disk) < 0)
+ return -EINVAL;
+ }
+ /*
+ * All component devices are integrity capable and have matching
+ * profiles, register the common profile for the md device.
+ */
+ if (blk_integrity_register(mddev->gendisk,
+ bdev_get_integrity(reference->bdev)) != 0) {
+ printk(KERN_ERR "md: failed to register integrity for %s\n",
+ mdname(mddev));
+ return -EINVAL;
+ }
+ printk(KERN_NOTICE "md: data integrity on %s enabled\n",
+ mdname(mddev));
+ return 0;
+}
+EXPORT_SYMBOL(md_integrity_register);
+
+/* Disable data integrity if non-capable/non-matching disk is being added */
+void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
{
- struct mdk_personality *pers = mddev->pers;
- struct gendisk *disk = mddev->gendisk;
struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
- struct blk_integrity *bi_mddev = blk_get_integrity(disk);
+ struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
- /* Data integrity passthrough not supported on RAID 4, 5 and 6 */
- if (pers && pers->level >= 4 && pers->level <= 6)
+ if (!bi_mddev) /* nothing to do */
return;
-
- /* If rdev is integrity capable, register profile for mddev */
- if (!bi_mddev && bi_rdev) {
- if (blk_integrity_register(disk, bi_rdev))
- printk(KERN_ERR "%s: %s Could not register integrity!\n",
- __func__, disk->disk_name);
- else
- printk(KERN_NOTICE "Enabling data integrity on %s\n",
- disk->disk_name);
+ if (rdev->raid_disk < 0) /* skip spares */
return;
- }
-
- /* Check that mddev and rdev have matching profiles */
- if (blk_integrity_compare(disk, rdev->bdev->bd_disk) < 0) {
- printk(KERN_ERR "%s: %s/%s integrity mismatch!\n", __func__,
- disk->disk_name, rdev->bdev->bd_disk->disk_name);
- printk(KERN_NOTICE "Disabling data integrity on %s\n",
- disk->disk_name);
- blk_integrity_unregister(disk);
- }
+ if (bi_rdev && blk_integrity_compare(mddev->gendisk,
+ rdev->bdev->bd_disk) >= 0)
+ return;
+ printk(KERN_NOTICE "disabling data integrity on %s\n", mdname(mddev));
+ blk_integrity_unregister(mddev->gendisk);
}
+EXPORT_SYMBOL(md_integrity_add_rdev);
static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
{
@@ -1591,7 +1642,6 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
/* May as well allow recovery to be retried once */
mddev->recovery_disabled = 0;
- md_integrity_check(rdev, mddev);
return 0;
fail:
@@ -1756,9 +1806,10 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
__u8 *uuid;
uuid = sb->set_uuid;
- printk(KERN_INFO "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
- ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
- KERN_INFO "md: Name: \"%s\" CT:%llu\n",
+ printk(KERN_INFO
+ "md: SB: (V:%u) (F:0x%08x) Array-ID:<%02x%02x%02x%02x"
+ ":%02x%02x:%02x%02x:%02x%02x:%02x%02x%02x%02x%02x%02x>\n"
+ "md: Name: \"%s\" CT:%llu\n",
le32_to_cpu(sb->major_version),
le32_to_cpu(sb->feature_map),
uuid[0], uuid[1], uuid[2], uuid[3],
@@ -1770,12 +1821,13 @@ static void print_sb_1(struct mdp_superblock_1 *sb)
& MD_SUPERBLOCK_1_TIME_SEC_MASK);
uuid = sb->device_uuid;
- printk(KERN_INFO "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
+ printk(KERN_INFO
+ "md: L%u SZ%llu RD:%u LO:%u CS:%u DO:%llu DS:%llu SO:%llu"
" RO:%llu\n"
- KERN_INFO "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
- ":%02x%02x%02x%02x%02x%02x\n"
- KERN_INFO "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
- KERN_INFO "md: (MaxDev:%u) \n",
+ "md: Dev:%08x UUID: %02x%02x%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+ ":%02x%02x%02x%02x%02x%02x\n"
+ "md: (F:0x%08x) UT:%llu Events:%llu ResyncOffset:%llu CSUM:0x%08x\n"
+ "md: (MaxDev:%u) \n",
le32_to_cpu(sb->level),
(unsigned long long)le64_to_cpu(sb->size),
le32_to_cpu(sb->raid_disks),
@@ -1923,17 +1975,14 @@ repeat:
/* otherwise we have to go forward and ... */
mddev->events ++;
if (!mddev->in_sync || mddev->recovery_cp != MaxSector) { /* not clean */
- /* .. if the array isn't clean, insist on an odd 'events' */
- if ((mddev->events&1)==0) {
- mddev->events++;
+ /* .. if the array isn't clean, an 'even' event must also go
+ * to spares. */
+ if ((mddev->events&1)==0)
nospares = 0;
- }
} else {
- /* otherwise insist on an even 'events' (for clean states) */
- if ((mddev->events&1)) {
- mddev->events++;
+ /* otherwise an 'odd' event must go to spares */
+ if ((mddev->events&1))
nospares = 0;
- }
}
}
@@ -2655,6 +2704,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
ssize_t rv = len;
struct mdk_personality *pers;
void *priv;
+ mdk_rdev_t *rdev;
if (mddev->pers == NULL) {
if (len == 0)
@@ -2734,6 +2784,12 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
mddev_suspend(mddev);
mddev->pers->stop(mddev);
module_put(mddev->pers->owner);
+ /* Invalidate devices that are now superfluous */
+ list_for_each_entry(rdev, &mddev->disks, same_set)
+ if (rdev->raid_disk >= mddev->raid_disks) {
+ rdev->raid_disk = -1;
+ clear_bit(In_sync, &rdev->flags);
+ }
mddev->pers = pers;
mddev->private = priv;
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
@@ -3543,6 +3599,7 @@ max_sync_store(mddev_t *mddev, const char *buf, size_t len)
if (max < mddev->resync_min)
return -EINVAL;
if (max < mddev->resync_max &&
+ mddev->ro == 0 &&
test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
return -EBUSY;
@@ -3573,7 +3630,8 @@ suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
- if (mddev->pers->quiesce == NULL)
+ if (mddev->pers == NULL ||
+ mddev->pers->quiesce == NULL)
return -EINVAL;
if (buf == e || (*e && *e != '\n'))
return -EINVAL;
@@ -3601,7 +3659,8 @@ suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
- if (mddev->pers->quiesce == NULL)
+ if (mddev->pers == NULL ||
+ mddev->pers->quiesce == NULL)
return -EINVAL;
if (buf == e || (*e && *e != '\n'))
return -EINVAL;
@@ -3681,17 +3740,8 @@ array_size_store(mddev_t *mddev, const char *buf, size_t len)
mddev->array_sectors = sectors;
set_capacity(mddev->gendisk, mddev->array_sectors);
- if (mddev->pers) {
- struct block_device *bdev = bdget_disk(mddev->gendisk, 0);
-
- if (bdev) {
- mutex_lock(&bdev->bd_inode->i_mutex);
- i_size_write(bdev->bd_inode,
- (loff_t)mddev->array_sectors << 9);
- mutex_unlock(&bdev->bd_inode->i_mutex);
- bdput(bdev);
- }
- }
+ if (mddev->pers)
+ revalidate_disk(mddev->gendisk);
return len;
}
@@ -3844,11 +3894,9 @@ static int md_alloc(dev_t dev, char *name)
flush_scheduled_work();
mutex_lock(&disks_mutex);
- if (mddev->gendisk) {
- mutex_unlock(&disks_mutex);
- mddev_put(mddev);
- return -EEXIST;
- }
+ error = -EEXIST;
+ if (mddev->gendisk)
+ goto abort;
if (name) {
/* Need to ensure that 'name' is not a duplicate.
@@ -3860,17 +3908,15 @@ static int md_alloc(dev_t dev, char *name)
if (mddev2->gendisk &&
strcmp(mddev2->gendisk->disk_name, name) == 0) {
spin_unlock(&all_mddevs_lock);
- return -EEXIST;
+ goto abort;
}
spin_unlock(&all_mddevs_lock);
}
+ error = -ENOMEM;
mddev->queue = blk_alloc_queue(GFP_KERNEL);
- if (!mddev->queue) {
- mutex_unlock(&disks_mutex);
- mddev_put(mddev);
- return -ENOMEM;
- }
+ if (!mddev->queue)
+ goto abort;
mddev->queue->queuedata = mddev;
/* Can be unlocked because the queue is new: no concurrency */
@@ -3880,11 +3926,9 @@ static int md_alloc(dev_t dev, char *name)
disk = alloc_disk(1 << shift);
if (!disk) {
- mutex_unlock(&disks_mutex);
blk_cleanup_queue(mddev->queue);
mddev->queue = NULL;
- mddev_put(mddev);
- return -ENOMEM;
+ goto abort;
}
disk->major = MAJOR(mddev->unit);
disk->first_minor = unit << shift;
@@ -3906,16 +3950,22 @@ static int md_alloc(dev_t dev, char *name)
mddev->gendisk = disk;
error = kobject_init_and_add(&mddev->kobj, &md_ktype,
&disk_to_dev(disk)->kobj, "%s", "md");
- mutex_unlock(&disks_mutex);
- if (error)
+ if (error) {
+ /* This isn't possible, but as kobject_init_and_add is marked
+ * __must_check, we must do something with the result
+ */
printk(KERN_WARNING "md: cannot register %s/md - name in use\n",
disk->disk_name);
- else {
+ error = 0;
+ }
+ abort:
+ mutex_unlock(&disks_mutex);
+ if (!error) {
kobject_uevent(&mddev->kobj, KOBJ_ADD);
mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
}
mddev_put(mddev);
- return 0;
+ return error;
}
static struct kobject *md_probe(dev_t dev, int *part, void *data)
@@ -4044,10 +4094,6 @@ static int do_md_run(mddev_t * mddev)
}
strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
- if (pers->level >= 4 && pers->level <= 6)
- /* Cannot support integrity (yet) */
- blk_integrity_unregister(mddev->gendisk);
-
if (mddev->reshape_position != MaxSector &&
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
@@ -4185,6 +4231,7 @@ static int do_md_run(mddev_t * mddev)
md_wakeup_thread(mddev->thread);
md_wakeup_thread(mddev->sync_thread); /* possibly kick off a reshape */
+ revalidate_disk(mddev->gendisk);
mddev->changed = 1;
md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state);
@@ -4256,12 +4303,11 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
struct gendisk *disk = mddev->gendisk;
mdk_rdev_t *rdev;
+ mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > is_open) {
printk("md: %s still in use.\n",mdname(mddev));
- return -EBUSY;
- }
-
- if (mddev->pers) {
+ err = -EBUSY;
+ } else if (mddev->pers) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
@@ -4318,8 +4364,12 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
if (mode == 1)
set_disk_ro(disk, 1);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
+ err = 0;
}
-
+out:
+ mutex_unlock(&mddev->open_mutex);
+ if (err)
+ return err;
/*
* Free resources if final stop
*/
@@ -4385,7 +4435,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open)
blk_integrity_unregister(disk);
md_new_event(mddev);
sysfs_notify_dirent(mddev->sysfs_state);
-out:
return err;
}
@@ -5083,18 +5132,8 @@ static int update_size(mddev_t *mddev, sector_t num_sectors)
return -ENOSPC;
}
rv = mddev->pers->resize(mddev, num_sectors);
- if (!rv) {
- struct block_device *bdev;
-
- bdev = bdget_disk(mddev->gendisk, 0);
- if (bdev) {
- mutex_lock(&bdev->bd_inode->i_mutex);
- i_size_write(bdev->bd_inode,
- (loff_t)mddev->array_sectors << 9);
- mutex_unlock(&bdev->bd_inode->i_mutex);
- bdput(bdev);
- }
- }
+ if (!rv)
+ revalidate_disk(mddev->gendisk);
return rv;
}
@@ -5480,12 +5519,12 @@ static int md_open(struct block_device *bdev, fmode_t mode)
}
BUG_ON(mddev != bdev->bd_disk->private_data);
- if ((err = mutex_lock_interruptible_nested(&mddev->reconfig_mutex, 1)))
+ if ((err = mutex_lock_interruptible(&mddev->open_mutex)))
goto out;
err = 0;
atomic_inc(&mddev->openers);
- mddev_unlock(mddev);
+ mutex_unlock(&mddev->open_mutex);
check_disk_change(bdev);
out:
@@ -6334,10 +6373,16 @@ void md_do_sync(mddev_t *mddev)
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
}
- if (j >= mddev->resync_max)
- wait_event(mddev->recovery_wait,
- mddev->resync_max > j
- || kthread_should_stop());
+ while (j >= mddev->resync_max && !kthread_should_stop()) {
+ /* As this condition is controlled by user-space,
+ * we can block indefinitely, so use '_interruptible'
+ * to avoid triggering warnings.
+ */
+ flush_signals(current); /* just in case */
+ wait_event_interruptible(mddev->recovery_wait,
+ mddev->resync_max > j
+ || kthread_should_stop());
+ }
if (kthread_should_stop())
goto interrupted;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 9430a110db9..f8fc188bc76 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -223,6 +223,16 @@ struct mddev_s
* so we don't loop trying */
int in_sync; /* know to not need resync */
+ /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so
+ * that we are never stopping an array while it is open.
+ * 'reconfig_mutex' protects all other reconfiguration.
+ * These locks are separate due to conflicting interactions
+ * with bdev->bd_mutex.
+ * Lock ordering is:
+ * reconfig_mutex -> bd_mutex : e.g. do_md_run -> revalidate_disk
+ * bd_mutex -> open_mutex: e.g. __blkdev_get -> md_open
+ */
+ struct mutex open_mutex;
struct mutex reconfig_mutex;
atomic_t active; /* general refcount */
atomic_t openers; /* number of active opens */
@@ -431,5 +441,7 @@ extern int md_allow_write(mddev_t *mddev);
extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors);
extern int md_check_no_bitmap(mddev_t *mddev);
+extern int md_integrity_register(mddev_t *mddev);
+void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev);
#endif /* _MD_MD_H */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index cbe368fa659..7140909f666 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -294,7 +294,8 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
for (path = first; path <= last; path++)
if ((p=conf->multipaths+path)->rdev == NULL) {
q = rdev->bdev->bd_disk->queue;
- blk_queue_stack_limits(mddev->queue, q);
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as
@@ -312,6 +313,7 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
set_bit(In_sync, &rdev->flags);
rcu_assign_pointer(p->rdev, rdev);
err = 0;
+ md_integrity_add_rdev(rdev, mddev);
break;
}
@@ -344,7 +346,9 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
/* lost the race, try later */
err = -EBUSY;
p->rdev = rdev;
+ goto abort;
}
+ md_integrity_register(mddev);
}
abort:
@@ -463,9 +467,9 @@ static int multipath_run (mddev_t *mddev)
disk = conf->multipaths + disk_idx;
disk->rdev = rdev;
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
- blk_queue_stack_limits(mddev->queue,
- rdev->bdev->bd_disk->queue);
/* as we don't honour merge_bvec_fn, we must never risk
* violating it, not that we ever expect a device with
* a merge_bvec_fn to be involved in multipath */
@@ -518,7 +522,7 @@ static int multipath_run (mddev_t *mddev)
mddev->queue->unplug_fn = multipath_unplug;
mddev->queue->backing_dev_info.congested_fn = multipath_congested;
mddev->queue->backing_dev_info.congested_data = mddev;
-
+ md_integrity_register(mddev);
return 0;
out_free_conf:
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index ab4a489d869..898e2bdfee4 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -170,8 +170,8 @@ static int create_strip_zones(mddev_t *mddev)
}
dev[j] = rdev1;
- blk_queue_stack_limits(mddev->queue,
- rdev1->bdev->bd_disk->queue);
+ disk_stack_limits(mddev->gendisk, rdev1->bdev,
+ rdev1->data_offset << 9);
/* as we don't honour merge_bvec_fn, we must never risk
* violating it, so limit ->max_sector to one PAGE, as
* a one page request is never in violation.
@@ -250,6 +250,11 @@ static int create_strip_zones(mddev_t *mddev)
mddev->chunk_sectors << 9);
goto abort;
}
+
+ blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9);
+ blk_queue_io_opt(mddev->queue,
+ (mddev->chunk_sectors << 9) * mddev->raid_disks);
+
printk(KERN_INFO "raid0: done.\n");
mddev->private = conf;
return 0;
@@ -346,6 +351,7 @@