aboutsummaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig26
-rw-r--r--drivers/md/dm-crypt.c11
-rw-r--r--drivers/md/dm-ioctl.c100
-rw-r--r--drivers/md/dm-linear.c8
-rw-r--r--drivers/md/dm-raid1.c29
-rw-r--r--drivers/md/dm-snap.c409
-rw-r--r--drivers/md/dm-stripe.c13
-rw-r--r--drivers/md/dm-table.c53
-rw-r--r--drivers/md/dm.c88
-rw-r--r--drivers/md/dm.h23
-rw-r--r--drivers/md/kcopyd.c28
-rw-r--r--drivers/md/md.c235
-rw-r--r--drivers/md/raid1.c27
-rw-r--r--drivers/md/raid5.c719
-rw-r--r--drivers/md/raid6main.c14
15 files changed, 1423 insertions, 360 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index ac43f98062f..fd2aae150cc 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -127,6 +127,32 @@ config MD_RAID5
If unsure, say Y.
+config MD_RAID5_RESHAPE
+ bool "Support adding drives to a raid-5 array (experimental)"
+ depends on MD_RAID5 && EXPERIMENTAL
+ ---help---
+ A RAID-5 set can be expanded by adding extra drives. This
+ requires "restriping" the array which means (almost) every
+ block must be written to a different place.
+
+ This option allows such restriping to be done while the array
+ is online. However it is still EXPERIMENTAL code. It should
+ work, but please be sure that you have backups.
+
+ You will need a version of mdadm newer than 2.3.1. During the
+ early stage of reshape there is a critical section where live data
+ is being over-written. A crash during this time needs extra care
+ for recovery. The newer mdadm takes a copy of the data in the
+ critical section and will restore it, if necessary, after a crash.
+
+ The mdadm usage is e.g.
+ mdadm --grow /dev/md1 --raid-disks=6
+ to grow '/dev/md1' to having 6 disks.
+
+ Note: The array can only be expanded, not contracted.
+ There should be enough spares already present to make the new
+ array workable.
+
config MD_RAID6
tristate "RAID-6 mode"
depends on BLK_DEV_MD
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 259e86f2654..61a590bb624 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -518,6 +518,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
char *ivopts;
unsigned int crypto_flags;
unsigned int key_size;
+ unsigned long long tmpll;
if (argc != 5) {
ti->error = PFX "Not enough arguments";
@@ -633,15 +634,17 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad5;
}
- if (sscanf(argv[2], SECTOR_FORMAT, &cc->iv_offset) != 1) {
+ if (sscanf(argv[2], "%llu", &tmpll) != 1) {
ti->error = PFX "Invalid iv_offset sector";
goto bad5;
}
+ cc->iv_offset = tmpll;
- if (sscanf(argv[4], SECTOR_FORMAT, &cc->start) != 1) {
+ if (sscanf(argv[4], "%llu", &tmpll) != 1) {
ti->error = PFX "Invalid device sector";
goto bad5;
}
+ cc->start = tmpll;
if (dm_get_device(ti, argv[3], cc->start, ti->len,
dm_table_get_mode(ti->table), &cc->dev)) {
@@ -885,8 +888,8 @@ static int crypt_status(struct dm_target *ti, status_type_t type,
result[sz++] = '-';
}
- DMEMIT(" " SECTOR_FORMAT " %s " SECTOR_FORMAT,
- cc->iv_offset, cc->dev->name, cc->start);
+ DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset,
+ cc->dev->name, (unsigned long long)cc->start);
break;
}
return 0;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index 442e2be6052..8edd6435414 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -15,6 +15,7 @@
#include <linux/slab.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/dm-ioctl.h>
+#include <linux/hdreg.h>
#include <asm/uaccess.h>
@@ -244,9 +245,9 @@ static void __hash_remove(struct hash_cell *hc)
dm_table_put(table);
}
- dm_put(hc->md);
if (hc->new_map)
dm_table_put(hc->new_map);
+ dm_put(hc->md);
free_cell(hc);
}
@@ -600,12 +601,22 @@ static int dev_create(struct dm_ioctl *param, size_t param_size)
*/
static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param)
{
+ struct mapped_device *md;
+ void *mdptr = NULL;
+
if (*param->uuid)
return __get_uuid_cell(param->uuid);
- else if (*param->name)
+
+ if (*param->name)
return __get_name_cell(param->name);
- else
- return dm_get_mdptr(huge_decode_dev(param->dev));
+
+ md = dm_get_md(huge_decode_dev(param->dev));
+ if (md) {
+ mdptr = dm_get_mdptr(md);
+ dm_put(md);
+ }
+
+ return mdptr;
}
static struct mapped_device *find_device(struct dm_ioctl *param)
@@ -690,6 +701,54 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
return dm_hash_rename(param->name, new_name);
}
+static int dev_set_geometry(struct dm_ioctl *param, size_t param_size)
+{
+ int r = -EINVAL, x;
+ struct mapped_device *md;
+ struct hd_geometry geometry;
+ unsigned long indata[4];
+ char *geostr = (char *) param + param->data_start;
+
+ md = find_device(param);
+ if (!md)
+ return -ENXIO;
+
+ if (geostr < (char *) (param + 1) ||
+ invalid_str(geostr, (void *) param + param_size)) {
+ DMWARN("Invalid geometry supplied.");
+ goto out;
+ }
+
+ x = sscanf(geostr, "%lu %lu %lu %lu", indata,
+ indata + 1, indata + 2, indata + 3);
+
+ if (x != 4) {
+ DMWARN("Unable to interpret geometry settings.");
+ goto out;
+ }
+
+ if (indata[0] > 65535 || indata[1] > 255 ||
+ indata[2] > 255 || indata[3] > ULONG_MAX) {
+ DMWARN("Geometry exceeds range limits.");
+ goto out;
+ }
+
+ geometry.cylinders = indata[0];
+ geometry.heads = indata[1];
+ geometry.sectors = indata[2];
+ geometry.start = indata[3];
+
+ r = dm_set_geometry(md, &geometry);
+ if (!r)
+ r = __dev_status(md, param);
+
+ param->data_size = 0;
+
+out:
+ dm_put(md);
+ return r;
+}
+
static int do_suspend(struct dm_ioctl *param)
{
int r = 0;
@@ -975,33 +1034,43 @@ static int table_load(struct dm_ioctl *param, size_t param_size)
int r;
struct hash_cell *hc;
struct dm_table *t;
+ struct mapped_device *md;
- r = dm_table_create(&t, get_mode(param), param->target_count);
+ md = find_device(param);
+ if (!md)
+ return -ENXIO;
+
+ r = dm_table_create(&t, get_mode(param), param->target_count, md);
if (r)
- return r;
+ goto out;
r = populate_table(t, param, param_size);
if (r) {
dm_table_put(t);
- return r;
+ goto out;
}
down_write(&_hash_lock);
- hc = __find_device_hash_cell(param);
- if (!hc) {
- DMWARN("device doesn't appear to be in the dev hash table.");
- up_write(&_hash_lock);
+ hc = dm_get_mdptr(md);
+ if (!hc || hc->md != md) {
+ DMWARN("device has been removed from the dev hash table.");
dm_table_put(t);
- return -ENXIO;
+ up_write(&_hash_lock);
+ r = -ENXIO;
+ goto out;
}
if (hc->new_map)
dm_table_put(hc->new_map);
hc->new_map = t;
+ up_write(&_hash_lock);
+
param->flags |= DM_INACTIVE_PRESENT_FLAG;
+ r = __dev_status(md, param);
+
+out:
+ dm_put(md);
- r = __dev_status(hc->md, param);
- up_write(&_hash_lock);
return r;
}
@@ -1214,7 +1283,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd)
{DM_LIST_VERSIONS_CMD, list_versions},
- {DM_TARGET_MSG_CMD, target_message}
+ {DM_TARGET_MSG_CMD, target_message},
+ {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry}
};
return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn;
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 6a2cd5dc8a6..daf586c0898 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -26,6 +26,7 @@ struct linear_c {
static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct linear_c *lc;
+ unsigned long long tmp;
if (argc != 2) {
ti->error = "dm-linear: Invalid argument count";
@@ -38,10 +39,11 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -ENOMEM;
}
- if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) {
+ if (sscanf(argv[1], "%llu", &tmp) != 1) {
ti->error = "dm-linear: Invalid device sector";
goto bad;
}
+ lc->start = tmp;
if (dm_get_device(ti, argv[0], lc->start, ti->len,
dm_table_get_mode(ti->table), &lc->dev)) {
@@ -87,8 +89,8 @@ static int linear_status(struct dm_target *ti, status_type_t type,
break;
case STATUSTYPE_TABLE:
- snprintf(result, maxlen, "%s " SECTOR_FORMAT, lc->dev->name,
- lc->start);
+ snprintf(result, maxlen, "%s %llu", lc->dev->name,
+ (unsigned long long)lc->start);
break;
}
return 0;
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 4e90f231fbf..d12cf3e5e07 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -402,9 +402,21 @@ static void rh_dec(struct region_hash *rh, region_t region)
spin_lock_irqsave(&rh->region_lock, flags);
if (atomic_dec_and_test(&reg->pending)) {
+ /*
+ * There is no pending I/O for this region.
+ * We can move the region to corresponding list for next action.
+ * At this point, the region is not yet connected to any list.
+ *
+ * If the state is RH_NOSYNC, the region should be kept off
+ * from clean list.
+ * The hash entry for RH_NOSYNC will remain in memory
+ * until the region is recovered or the map is reloaded.
+ */
+
+ /* do nothing for RH_NOSYNC */
if (reg->state == RH_RECOVERING) {
list_add_tail(&reg->list, &rh->quiesced_regions);
- } else {
+ } else if (reg->state == RH_DIRTY) {
reg->state = RH_CLEAN;
list_add(&reg->list, &rh->clean_regions);
}
@@ -922,9 +934,9 @@ static inline int _check_region_size(struct dm_target *ti, uint32_t size)
static int get_mirror(struct mirror_set *ms, struct dm_target *ti,
unsigned int mirror, char **argv)
{
- sector_t offset;
+ unsigned long long offset;
- if (sscanf(argv[1], SECTOR_FORMAT, &offset) != 1) {
+ if (sscanf(argv[1], "%llu", &offset) != 1) {
ti->error = "dm-mirror: Invalid offset";
return -EINVAL;
}
@@ -1191,16 +1203,17 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
for (m = 0; m < ms->nr_mirrors; m++)
DMEMIT("%s ", ms->mirror[m].dev->name);
- DMEMIT(SECTOR_FORMAT "/" SECTOR_FORMAT,
- ms->rh.log->type->get_sync_count(ms->rh.log),
- ms->nr_regions);
+ DMEMIT("%llu/%llu",
+ (unsigned long long)ms->rh.log->type->
+ get_sync_count(ms->rh.log),
+ (unsigned long long)ms->nr_regions);
break;
case STATUSTYPE_TABLE:
DMEMIT("%d ", ms->nr_mirrors);
for (m = 0; m < ms->nr_mirrors; m++)
- DMEMIT("%s " SECTOR_FORMAT " ",
- ms->mirror[m].dev->name, ms->mirror[m].offset);
+ DMEMIT("%s %llu ", ms->mirror[m].dev->name,
+ (unsigned long long)ms->mirror[m].offset);
}
return 0;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 7401540086d..08312b46463 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -49,11 +49,26 @@ struct pending_exception {
struct bio_list snapshot_bios;
/*
- * Other pending_exceptions that are processing this
- * chunk. When this list is empty, we know we can
- * complete the origins.
+ * Short-term queue of pending exceptions prior to submission.
*/
- struct list_head siblings;
+ struct list_head list;
+
+ /*
+ * The primary pending_exception is the one that holds
+ * the sibling_count and the list of origin_bios for a
+ * group of pending_exceptions. It is always last to get freed.
+ * These fields get set up when writing to the origin.
+ */
+ struct pending_exception *primary_pe;
+
+ /*
+ * Number of pending_exceptions processing this chunk.
+ * When this drops to zero we must complete the origin bios.
+ * If incrementing or decrementing this, hold pe->snap->lock for
+ * the sibling concerned and not pe->primary_pe->snap->lock unless
+ * they are the same.
+ */
+ atomic_t sibling_count;
/* Pointer back to snapshot context */
struct dm_snapshot *snap;
@@ -377,6 +392,8 @@ static void read_snapshot_metadata(struct dm_snapshot *s)
down_write(&s->lock);
s->valid = 0;
up_write(&s->lock);
+
+ dm_table_event(s->table);
}
}
@@ -542,8 +559,12 @@ static void snapshot_dtr(struct dm_target *ti)
{
struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+ /* Prevent further origin writes from using this snapshot. */
+ /* After this returns there can be no new kcopyd jobs. */
unregister_snapshot(s);
+ kcopyd_client_destroy(s->kcopyd_client);
+
exit_exception_table(&s->pending, pending_cache);
exit_exception_table(&s->complete, exception_cache);
@@ -552,7 +573,7 @@ static void snapshot_dtr(struct dm_target *ti)
dm_put_device(ti, s->origin);
dm_put_device(ti, s->cow);
- kcopyd_client_destroy(s->kcopyd_client);
+
kfree(s);
}
@@ -586,78 +607,117 @@ static void error_bios(struct bio *bio)
}
}
+static inline void error_snapshot_bios(struct pending_exception *pe)
+{
+ error_bios(bio_list_get(&pe->snapshot_bios));
+}
+
static struct bio *__flush_bios(struct pending_exception *pe)
{
- struct pending_exception *sibling;
+ /*
+ * If this pe is involved in a write to the origin and
+ * it is the last sibling to complete then release
+ * the bios for the original write to the origin.
+ */
+
+ if (pe->primary_pe &&
+ atomic_dec_and_test(&pe->primary_pe->sibling_count))
+ return bio_list_get(&pe->primary_pe->origin_bios);
+
+ return NULL;
+}
+
+static void __invalidate_snapshot(struct dm_snapshot *s,
+ struct pending_exception *pe, int err)
+{
+ if (!s->valid)
+ return;
- if (list_empty(&pe->siblings))
- return bio_list_get(&pe->origin_bios);
+ if (err == -EIO)
+ DMERR("Invalidating snapshot: Error reading/writing.");
+ else if (err == -ENOMEM)
+ DMERR("Invalidating snapshot: Unable to allocate exception.");
- sibling = list_entry(pe->siblings.next,
- struct pending_exception, siblings);
+ if (pe)
+ remove_exception(&pe->e);
- list_del(&pe->siblings);
+ if (s->store.drop_snapshot)
+ s->store.drop_snapshot(&s->store);
- /* This is fine as long as kcopyd is single-threaded. If kcopyd
- * becomes multi-threaded, we'll need some locking here.
- */
- bio_list_merge(&sibling->origin_bios, &pe->origin_bios);
+ s->valid = 0;
- return NULL;
+ dm_table_event(s->table);
}
static void pending_complete(struct pending_exception *pe, int success)
{
struct exception *e;
+ struct pending_exception *primary_pe;
struct dm_snapshot *s = pe->snap;
struct bio *flush = NULL;
- if (success) {
- e = alloc_exception();
- if (!e) {
- DMWARN("Unable to allocate exception.");
- down_write(&s->lock);
- s->store.drop_snapshot(&s->store);
- s->valid = 0;
- flush = __flush_bios(pe);
- up_write(&s->lock);
-
- error_bios(bio_list_get(&pe->snapshot_bios));
- goto out;
- }
- *e = pe->e;
-
- /*
- * Add a proper exception, and remove the
- * in-flight exception from the list.
- */
+ if (!success) {
+ /* Read/write error - snapshot is unusable */
down_write(&s->lock);
- insert_exception(&s->complete, e);
- remove_exception(&pe->e);
+ __invalidate_snapshot(s, pe, -EIO);
flush = __flush_bios(pe);
-
- /* Submit any pending write bios */
up_write(&s->lock);
- flush_bios(bio_list_get(&pe->snapshot_bios));
- } else {
- /* Read/write error - snapshot is unusable */
+ error_snapshot_bios(pe);
+ goto out;
+ }
+
+ e = alloc_exception();
+ if (!e) {
down_write(&s->lock);
- if (s->valid)
- DMERR("Error reading/writing snapshot");
- s->store.drop_snapshot(&s->store);
- s->valid = 0;
- remove_exception(&pe->e);
+ __invalidate_snapshot(s, pe, -ENOMEM);
flush = __flush_bios(pe);
up_write(&s->lock);
- error_bios(bio_list_get(&pe->snapshot_bios));
+ error_snapshot_bios(pe);
+ goto out;
+ }
+ *e = pe->e;
- dm_table_event(s->table);
+ /*
+ * Add a proper exception, and remove the
+ * in-flight exception from the list.
+ */
+ down_write(&s->lock);
+ if (!s->valid) {
+ flush = __flush_bios(pe);
+ up_write(&s->lock);
+
+ free_exception(e);
+
+ error_snapshot_bios(pe);
+ goto out;
}
+ insert_exception(&s->complete, e);
+ remove_exception(&pe->e);
+ flush = __flush_bios(pe);
+
+ up_write(&s->lock);
+
+ /* Submit any pending write bios */
+ flush_bios(bio_list_get(&pe->snapshot_bios));
+
out:
- free_pending_exception(pe);
+ primary_pe = pe->primary_pe;
+
+ /*
+ * Free the pe if it's not linked to an origin write or if
+ * it's not itself a primary pe.
+ */
+ if (!primary_pe || primary_pe != pe)
+ free_pending_exception(pe);
+
+ /*
+ * Free the primary pe if nothing references it.
+ */
+ if (primary_pe && !atomic_read(&primary_pe->sibling_count))
+ free_pending_exception(primary_pe);
if (flush)
flush_bios(flush);
@@ -734,38 +794,45 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio)
if (e) {
/* cast the exception to a pending exception */
pe = container_of(e, struct pending_exception, e);
+ goto out;
+ }
- } else {
- /*
- * Create a new pending exception, we don't want
- * to hold the lock while we do this.
- */
- up_write(&s->lock);
- pe = alloc_pending_exception();
- down_write(&s->lock);
+ /*
+ * Create a new pending exception, we don't want
+ * to hold the lock while we do this.
+ */
+ up_write(&s->lock);
+ pe = alloc_pending_exception();
+ down_write(&s->lock);
- e = lookup_exception(&s->pending, chunk);
- if (e) {
- free_pending_exception(pe);
- pe = container_of(e, struct pending_exception, e);
- } else {
- pe->e.old_chunk = chunk;
- bio_list_init(&pe->origin_bios);
- bio_list_init(&pe->snapshot_bios);
- INIT_LIST_HEAD(&pe->siblings);
- pe->snap = s;
- pe->started = 0;
-
- if (s->store.prepare_exception(&s->store, &pe->e)) {
- free_pending_exception(pe);
- s->valid = 0;
- return NULL;
- }
+ if (!s->valid) {
+ free_pending_exception(pe);
+ return NULL;
+ }
- insert_exception(&s->pending, &pe->e);
- }
+ e = lookup_exception(&s->pending, chunk);
+ if (e) {
+ free_pending_exception(pe);
+ pe = container_of(e, struct pending_exception, e);
+ goto out;
+ }
+
+ pe->e.old_chunk = chunk;
+ bio_list_init(&pe->origin_bios);
+ bio_list_init(&pe->snapshot_bios);
+ pe->primary_pe = NULL;
+ atomic_set(&pe->sibling_count, 1);
+ pe->snap = s;
+ pe->started = 0;
+
+ if (s->store.prepare_exception(&s->store, &pe->e)) {
+ free_pending_exception(pe);
+ return NULL;
}
+ insert_exception(&s->pending, &pe->e);
+
+ out:
return pe;
}
@@ -782,13 +849,15 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
{
struct exception *e;
struct dm_snapshot *s = (struct dm_snapshot *) ti->private;
+ int copy_needed = 0;
int r = 1;
chunk_t chunk;
- struct pending_exception *pe;
+ struct pending_exception *pe = NULL;
chunk = sector_to_chunk(s, bio->bi_sector);
/* Full snapshots are not usable */
+ /* To get here the table must be live so s->active is always set. */
if (!s->valid)
return -EIO;
@@ -806,36 +875,41 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
* to copy an exception */
down_write(&s->lock);
+ if (!s->valid) {
+ r = -EIO;
+ goto out_unlock;
+ }
+
/* If the block is already remapped - use that, else remap it */
e = lookup_exception(&s->complete, chunk);
if (e) {
remap_exception(s, e, bio);
- up_write(&s->lock);
-
- } else {
- pe = __find_pending_exception(s, bio);
-
- if (!pe) {
- if (s->store.drop_snapshot)
- s->store.drop_snapshot(&s->store);
- s->valid = 0;
- r = -EIO;
- up_write(&s->lock);
- } else {
- remap_exception(s, &pe->e, bio);
- bio_list_add(&pe->snapshot_bios, bio);
-
- if (!pe->started) {
- /* this is protected by snap->lock */
- pe->started = 1;
- up_write(&s->lock);
- start_copy(pe);
- } else
- up_write(&s->lock);
- r = 0;
- }
+ goto out_unlock;
+ }
+
+ pe = __find_pending_exception(s, bio);
+ if (!pe) {
+ __invalidate_snapshot(s, pe, -ENOMEM);
+ r = -EIO;
+ goto out_unlock;
+ }
+
+ remap_exception(s, &pe->e, bio);
+ bio_list_add(&pe->snapshot_bios, bio);
+
+ if (!pe->started) {
+ /* this is protected by snap->lock */
+ pe->started = 1;
+ copy_needed = 1;
}
+ r = 0;
+
+ out_unlock:
+ up_write(&s->lock);
+
+ if (copy_needed)
+ start_copy(pe);
} else {
/*
* FIXME: this read path scares me because we
@@ -847,6 +921,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio,
/* Do reads */
down_read(&s->lock);
+ if (!s->valid) {
+ up_read(&s->lock);
+ return -EIO;
+ }
+
/* See if it it has been remapped */
e = lookup_exception(&s->complete, chunk);
if (e)
@@ -884,9 +963,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
snap->store.fraction_full(&snap->store,
&numerator,
&denominator);
- snprintf(result, maxlen,
- SECTOR_FORMAT "/" SECTOR_FORMAT,
- numerator, denominator);
+ snprintf(result, maxlen, "%llu/%llu",
+ (unsigned long long)numerator,
+ (unsigned long long)denominator);
}
else
snprintf(result, maxlen, "Unknown");
@@ -899,9 +978,10 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
* to make private copies if the output is to
* make sense.
*/
- snprintf(result, maxlen, "%s %s %c " SECTOR_FORMAT,
+ snprintf(result, maxlen, "%s %s %c %llu",
snap->origin->name, snap->cow->name,
- snap->type, snap->chunk_size);
+ snap->type,
+ (unsigned long long)snap->chunk_size);
break;
}
@@ -911,40 +991,27 @@ static int snapshot_status(struct dm_target *ti, status_type_t type,
/*-----------------------------------------------------------------
* Origin methods
*---------------------------------------------------------------*/
-static void list_merge(struct list_head *l1, struct list_head *l2)
-{
- struct list_head *l1_n, *l2_p;
-
- l1_n = l1->next;
- l2_p = l2->prev;
-
- l1->next = l2;
- l2->prev = l1;
-
- l2_p->next = l1_n;
- l1_n->prev = l2_p;
-}
-
static int __origin_write(struct list_head *snapshots, struct bio *bio)
{
- int r = 1, first = 1;
+ int r = 1, first = 0;
struct dm_snapshot *snap;
struct exception *e;
- struct pending_exception *pe, *last = NULL;
+ struct pending_exception *pe, *next_pe, *primary_pe = NULL;
chunk_t chunk;
+ LIST_HEAD(pe_queue);
/* Do all the snapshots on this origin */
list_for_each_entry (snap, snapshots, list) {
+ down_write(&snap->lock);
+
/* Only deal with valid and active snapshots */
if (!snap->valid || !snap->active)
- continue;
+ goto next_snapshot;
/* Nothing to do if writing beyond end of snapshot */
if (bio->bi_sector >= dm_table_get_size(snap->table))
- continue;
-
- down_write(&snap->lock);
+ goto next_snapshot;
/*
* Remember, different snapshots can have
@@ -956,49 +1023,75 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio)
* Check exception table to see if block
* is already remapped in this snapshot
* and trigger an exception if not.
+ *
+ * sibling_count is initialised to 1 so pending_complete()
+ * won't destroy the primary_pe while we're inside this loop.
*/
e = lookup_exception(&snap->complete, chunk);
- if (!e) {
- pe = __find_pending_exception(snap, bio);
- if (!pe) {
- snap->store.drop_snapshot(&snap->store);
- snap->valid = 0;
-
- } else {
- if (last)
- list_merge(&pe->siblings,
- &last->siblings);
-
- last = pe;
- r = 0;
+ if (e)
+ goto next_snapshot;
+
+ pe = __find_pending_exception(snap, bio);
+ if (!pe) {
+ __invalidate_snapshot(snap, pe, ENOMEM);
+ goto next_snapshot;
+ }
+
+ if (!primary_pe) {
+ /*
+ * Either every pe here has same
+ * primary_pe or none has one yet.
+ */
+ if (pe->primary_pe)
+ primary_pe = pe->primary_pe;
+ else {
+ primary_pe = pe;
+ first = 1;
}
+
+ bio_list_add(&primary_pe->origin_bios, bio);
+
+ r = 0;
+ }
+
+ if (!pe->primary_pe) {
+ atomic_inc(&primary_pe->sibling_count);
+ pe->primary_pe = primary_pe;
+ }
+
+ if (!pe->started) {
+ pe->started = 1;
+ list_add_tail(&pe->list, &pe_queue);
}
+ next_snapshot:
up_write(&snap->lock);
}
+ if (!primary_pe)
+ goto out;
+
/*
- * Now that we have a complete pe list we can start the copying.
+ * If this is the first time we're processing this chunk and
+ * sibling_count is now 1 it means all the pending exceptions
+ * got completed while we were in the loop above, so it falls to
+ * us here to remove the primary_pe and submit any origin_bios.
*/
- if (last) {
- pe = last;
- do {
- down_write(&pe->snap->lock);
- if (first)
- bio_list_add(&pe->origin_bios, bio);
- if (!pe->started) {
- pe->started = 1;
- up_write(&pe->snap->lock);
- start_copy(pe);
- } else
- up_write(&pe->snap->lock);
- first = 0;
- pe = list_entry(pe->siblings.next,
- struct pending_exception, siblings);
-
- } while (pe != last);
+
+ if (first && atomic_dec_and_test(&primary_pe->sibling_count)) {
+ flush_bios(bio_list_get(&primary_pe->origin_bios));
+ free_pending_exception(primary_pe);
+ /* If we got here, pe_queue is necessarily empty. */
+ goto out;
}
+ /*
+ * Now that we have a complete pe list we can start the copying.
+ */
+ list_for_each_entry_safe(pe, next_pe, &pe_queue, list)
+ start_copy(pe);
+
+ out:
return r;
}
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 697aacafb02..08328a8f5a3 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -49,9 +49,9 @@ static inline struct stripe_c *alloc_context(unsigned int stripes)
static int get_stripe(struct dm_target *ti, struct stripe_c *sc,
unsigned int stripe, char **argv)
{
- sector_t start;
+ unsigned long long start;
- if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1)
+ if (sscanf(argv[1], "%llu", &start) != 1)
return -EINVAL;
if (dm_get_device(ti, argv[0], start, sc->stripe_width,
@@ -103,7 +103,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv)
return -EINVAL;
}
- if (((uint32_t)ti->len) & (chunk_size - 1)) {
+ if (ti->len & (chunk_size - 1)) {
ti->error = "dm-stripe: Target length not divisible by "
"chunk size";
return -EINVAL;
@@ -201,10 +201,11 @@ static int stripe_status(struct dm_target *ti,
break;
case STATUSTYPE_TABLE:
- DMEMIT("%d " SECTOR_FORMAT, sc->stripes, sc->chunk_mask + 1);
+ DMEMIT("%d %llu", sc->stripes,
+ (unsigned long long)sc->chunk_mask + 1);
for (i = 0; i < sc->stripes; i++)
- DMEMIT(" %s " SECTOR_FORMAT, sc->stripe[i].dev->name,
- sc->stripe[i].physical_start);
+ DMEMIT(" %s %llu", sc->stripe[i].dev->name,
+ (unsigned long long)sc->stripe[i].physical_start);
break;
}
return 0;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 907b08ddb78..8f56a54cf0c 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -14,6 +14,7 @@
#include <linux/ctype.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
+#include <linux/mutex.h>
#include <asm/atomic.h>
#define MAX_DEPTH 16
@@ -22,6 +23,7 @@
#define CHILDREN_PER_NODE (KEYS_PER_NODE + 1)
struct dm_table {
+ struct mapped_device *md;
atomic_t holders;
/* btree table */
@@ -97,6 +99,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs,
lhs->seg_boundary_mask =
min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask);
+
+ lhs->no_cluster |= rhs->no_cluster;
}
/*
@@ -204,7 +208,8 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
return 0;
}
-int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
+int dm_table_create(struct dm_table **result, int mode,
+ unsigned num_targets, struct mapped_device *md)
{
struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL);
@@ -227,6 +232,7 @@ int dm_table_create(struct dm_table **result, int mode, unsigned num_targets)
}
t->mode = mode;
+ t->md = md;
*result = t;
return 0;
}
@@ -345,7 +351,7 @@ static struct dm_dev *find_device(struct list_head *l, dev_t dev)
/*
* Open a device so we can use it as a map destination.
*/
-static int open_dev(struct dm_dev *d, dev_t dev)
+static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md)
{
static char *_claim_ptr = "I belong to device-mapper";
struct block_device *bdev;
@@ -357,7 +363,7 @@ static int open_dev(struct dm_dev *d, dev_t dev)
bdev = open_by_devnum(dev, d->mode);
if (IS_ERR(bdev))
return PTR_ERR(bdev);
- r = bd_claim(bdev, _claim_ptr);
+ r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md));
if (r)
blkdev_put(bdev);
else
@@ -368,12 +374,12 @@ static int open_dev(struct dm_dev *d, dev_t dev)
/*
* Close a device that we've been using.
*/