diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 26 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 11 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 100 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 8 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 29 | ||||
-rw-r--r-- | drivers/md/dm-snap.c | 409 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 53 | ||||
-rw-r--r-- | drivers/md/dm.c | 88 | ||||
-rw-r--r-- | drivers/md/dm.h | 23 | ||||
-rw-r--r-- | drivers/md/kcopyd.c | 28 | ||||
-rw-r--r-- | drivers/md/md.c | 235 | ||||
-rw-r--r-- | drivers/md/raid1.c | 27 | ||||
-rw-r--r-- | drivers/md/raid5.c | 719 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 14 |
15 files changed, 1423 insertions, 360 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index ac43f98062f..fd2aae150cc 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -127,6 +127,32 @@ config MD_RAID5 If unsure, say Y. +config MD_RAID5_RESHAPE + bool "Support adding drives to a raid-5 array (experimental)" + depends on MD_RAID5 && EXPERIMENTAL + ---help--- + A RAID-5 set can be expanded by adding extra drives. This + requires "restriping" the array which means (almost) every + block must be written to a different place. + + This option allows such restriping to be done while the array + is online. However it is still EXPERIMENTAL code. It should + work, but please be sure that you have backups. + + You will need a version of mdadm newer than 2.3.1. During the + early stage of reshape there is a critical section where live data + is being over-written. A crash during this time needs extra care + for recovery. The newer mdadm takes a copy of the data in the + critical section and will restore it, if necessary, after a crash. + + The mdadm usage is e.g. + mdadm --grow /dev/md1 --raid-disks=6 + to grow '/dev/md1' to having 6 disks. + + Note: The array can only be expanded, not contracted. + There should be enough spares already present to make the new + array workable. + config MD_RAID6 tristate "RAID-6 mode" depends on BLK_DEV_MD diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 259e86f2654..61a590bb624 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -518,6 +518,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) char *ivopts; unsigned int crypto_flags; unsigned int key_size; + unsigned long long tmpll; if (argc != 5) { ti->error = PFX "Not enough arguments"; @@ -633,15 +634,17 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad5; } - if (sscanf(argv[2], SECTOR_FORMAT, &cc->iv_offset) != 1) { + if (sscanf(argv[2], "%llu", &tmpll) != 1) { ti->error = PFX "Invalid iv_offset sector"; goto bad5; } + cc->iv_offset = tmpll; - if (sscanf(argv[4], SECTOR_FORMAT, &cc->start) != 1) { + if (sscanf(argv[4], "%llu", &tmpll) != 1) { ti->error = PFX "Invalid device sector"; goto bad5; } + cc->start = tmpll; if (dm_get_device(ti, argv[3], cc->start, ti->len, dm_table_get_mode(ti->table), &cc->dev)) { @@ -885,8 +888,8 @@ static int crypt_status(struct dm_target *ti, status_type_t type, result[sz++] = '-'; } - DMEMIT(" " SECTOR_FORMAT " %s " SECTOR_FORMAT, - cc->iv_offset, cc->dev->name, cc->start); + DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, + cc->dev->name, (unsigned long long)cc->start); break; } return 0; diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 442e2be6052..8edd6435414 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -15,6 +15,7 @@ #include <linux/slab.h> #include <linux/devfs_fs_kernel.h> #include <linux/dm-ioctl.h> +#include <linux/hdreg.h> #include <asm/uaccess.h> @@ -244,9 +245,9 @@ static void __hash_remove(struct hash_cell *hc) dm_table_put(table); } - dm_put(hc->md); if (hc->new_map) dm_table_put(hc->new_map); + dm_put(hc->md); free_cell(hc); } @@ -600,12 +601,22 @@ static int dev_create(struct dm_ioctl *param, size_t param_size) */ static struct hash_cell *__find_device_hash_cell(struct dm_ioctl *param) { + struct mapped_device *md; + void *mdptr = NULL; + if (*param->uuid) return __get_uuid_cell(param->uuid); - else if (*param->name) + + if (*param->name) return __get_name_cell(param->name); - else - return dm_get_mdptr(huge_decode_dev(param->dev)); + + md = dm_get_md(huge_decode_dev(param->dev)); + if (md) { + mdptr = dm_get_mdptr(md); + dm_put(md); + } + + return mdptr; } static struct mapped_device *find_device(struct dm_ioctl *param) @@ -690,6 +701,54 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size) return dm_hash_rename(param->name, new_name); } +static int dev_set_geometry(struct dm_ioctl *param, size_t param_size) +{ + int r = -EINVAL, x; + struct mapped_device *md; + struct hd_geometry geometry; + unsigned long indata[4]; + char *geostr = (char *) param + param->data_start; + + md = find_device(param); + if (!md) + return -ENXIO; + + if (geostr < (char *) (param + 1) || + invalid_str(geostr, (void *) param + param_size)) { + DMWARN("Invalid geometry supplied."); + goto out; + } + + x = sscanf(geostr, "%lu %lu %lu %lu", indata, + indata + 1, indata + 2, indata + 3); + + if (x != 4) { + DMWARN("Unable to interpret geometry settings."); + goto out; + } + + if (indata[0] > 65535 || indata[1] > 255 || + indata[2] > 255 || indata[3] > ULONG_MAX) { + DMWARN("Geometry exceeds range limits."); + goto out; + } + + geometry.cylinders = indata[0]; + geometry.heads = indata[1]; + geometry.sectors = indata[2]; + geometry.start = indata[3]; + + r = dm_set_geometry(md, &geometry); + if (!r) + r = __dev_status(md, param); + + param->data_size = 0; + +out: + dm_put(md); + return r; +} + static int do_suspend(struct dm_ioctl *param) { int r = 0; @@ -975,33 +1034,43 @@ static int table_load(struct dm_ioctl *param, size_t param_size) int r; struct hash_cell *hc; struct dm_table *t; + struct mapped_device *md; - r = dm_table_create(&t, get_mode(param), param->target_count); + md = find_device(param); + if (!md) + return -ENXIO; + + r = dm_table_create(&t, get_mode(param), param->target_count, md); if (r) - return r; + goto out; r = populate_table(t, param, param_size); if (r) { dm_table_put(t); - return r; + goto out; } down_write(&_hash_lock); - hc = __find_device_hash_cell(param); - if (!hc) { - DMWARN("device doesn't appear to be in the dev hash table."); - up_write(&_hash_lock); + hc = dm_get_mdptr(md); + if (!hc || hc->md != md) { + DMWARN("device has been removed from the dev hash table."); dm_table_put(t); - return -ENXIO; + up_write(&_hash_lock); + r = -ENXIO; + goto out; } if (hc->new_map) dm_table_put(hc->new_map); hc->new_map = t; + up_write(&_hash_lock); + param->flags |= DM_INACTIVE_PRESENT_FLAG; + r = __dev_status(md, param); + +out: + dm_put(md); - r = __dev_status(hc->md, param); - up_write(&_hash_lock); return r; } @@ -1214,7 +1283,8 @@ static ioctl_fn lookup_ioctl(unsigned int cmd) {DM_LIST_VERSIONS_CMD, list_versions}, - {DM_TARGET_MSG_CMD, target_message} + {DM_TARGET_MSG_CMD, target_message}, + {DM_DEV_SET_GEOMETRY_CMD, dev_set_geometry} }; return (cmd >= ARRAY_SIZE(_ioctls)) ? NULL : _ioctls[cmd].fn; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 6a2cd5dc8a6..daf586c0898 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -26,6 +26,7 @@ struct linear_c { static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct linear_c *lc; + unsigned long long tmp; if (argc != 2) { ti->error = "dm-linear: Invalid argument count"; @@ -38,10 +39,11 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -ENOMEM; } - if (sscanf(argv[1], SECTOR_FORMAT, &lc->start) != 1) { + if (sscanf(argv[1], "%llu", &tmp) != 1) { ti->error = "dm-linear: Invalid device sector"; goto bad; } + lc->start = tmp; if (dm_get_device(ti, argv[0], lc->start, ti->len, dm_table_get_mode(ti->table), &lc->dev)) { @@ -87,8 +89,8 @@ static int linear_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s " SECTOR_FORMAT, lc->dev->name, - lc->start); + snprintf(result, maxlen, "%s %llu", lc->dev->name, + (unsigned long long)lc->start); break; } return 0; diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 4e90f231fbf..d12cf3e5e07 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -402,9 +402,21 @@ static void rh_dec(struct region_hash *rh, region_t region) spin_lock_irqsave(&rh->region_lock, flags); if (atomic_dec_and_test(®->pending)) { + /* + * There is no pending I/O for this region. + * We can move the region to corresponding list for next action. + * At this point, the region is not yet connected to any list. + * + * If the state is RH_NOSYNC, the region should be kept off + * from clean list. + * The hash entry for RH_NOSYNC will remain in memory + * until the region is recovered or the map is reloaded. + */ + + /* do nothing for RH_NOSYNC */ if (reg->state == RH_RECOVERING) { list_add_tail(®->list, &rh->quiesced_regions); - } else { + } else if (reg->state == RH_DIRTY) { reg->state = RH_CLEAN; list_add(®->list, &rh->clean_regions); } @@ -922,9 +934,9 @@ static inline int _check_region_size(struct dm_target *ti, uint32_t size) static int get_mirror(struct mirror_set *ms, struct dm_target *ti, unsigned int mirror, char **argv) { - sector_t offset; + unsigned long long offset; - if (sscanf(argv[1], SECTOR_FORMAT, &offset) != 1) { + if (sscanf(argv[1], "%llu", &offset) != 1) { ti->error = "dm-mirror: Invalid offset"; return -EINVAL; } @@ -1191,16 +1203,17 @@ static int mirror_status(struct dm_target *ti, status_type_t type, for (m = 0; m < ms->nr_mirrors; m++) DMEMIT("%s ", ms->mirror[m].dev->name); - DMEMIT(SECTOR_FORMAT "/" SECTOR_FORMAT, - ms->rh.log->type->get_sync_count(ms->rh.log), - ms->nr_regions); + DMEMIT("%llu/%llu", + (unsigned long long)ms->rh.log->type-> + get_sync_count(ms->rh.log), + (unsigned long long)ms->nr_regions); break; case STATUSTYPE_TABLE: DMEMIT("%d ", ms->nr_mirrors); for (m = 0; m < ms->nr_mirrors; m++) - DMEMIT("%s " SECTOR_FORMAT " ", - ms->mirror[m].dev->name, ms->mirror[m].offset); + DMEMIT("%s %llu ", ms->mirror[m].dev->name, + (unsigned long long)ms->mirror[m].offset); } return 0; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 7401540086d..08312b46463 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -49,11 +49,26 @@ struct pending_exception { struct bio_list snapshot_bios; /* - * Other pending_exceptions that are processing this - * chunk. When this list is empty, we know we can - * complete the origins. + * Short-term queue of pending exceptions prior to submission. */ - struct list_head siblings; + struct list_head list; + + /* + * The primary pending_exception is the one that holds + * the sibling_count and the list of origin_bios for a + * group of pending_exceptions. It is always last to get freed. + * These fields get set up when writing to the origin. + */ + struct pending_exception *primary_pe; + + /* + * Number of pending_exceptions processing this chunk. + * When this drops to zero we must complete the origin bios. + * If incrementing or decrementing this, hold pe->snap->lock for + * the sibling concerned and not pe->primary_pe->snap->lock unless + * they are the same. + */ + atomic_t sibling_count; /* Pointer back to snapshot context */ struct dm_snapshot *snap; @@ -377,6 +392,8 @@ static void read_snapshot_metadata(struct dm_snapshot *s) down_write(&s->lock); s->valid = 0; up_write(&s->lock); + + dm_table_event(s->table); } } @@ -542,8 +559,12 @@ static void snapshot_dtr(struct dm_target *ti) { struct dm_snapshot *s = (struct dm_snapshot *) ti->private; + /* Prevent further origin writes from using this snapshot. */ + /* After this returns there can be no new kcopyd jobs. */ unregister_snapshot(s); + kcopyd_client_destroy(s->kcopyd_client); + exit_exception_table(&s->pending, pending_cache); exit_exception_table(&s->complete, exception_cache); @@ -552,7 +573,7 @@ static void snapshot_dtr(struct dm_target *ti) dm_put_device(ti, s->origin); dm_put_device(ti, s->cow); - kcopyd_client_destroy(s->kcopyd_client); + kfree(s); } @@ -586,78 +607,117 @@ static void error_bios(struct bio *bio) } } +static inline void error_snapshot_bios(struct pending_exception *pe) +{ + error_bios(bio_list_get(&pe->snapshot_bios)); +} + static struct bio *__flush_bios(struct pending_exception *pe) { - struct pending_exception *sibling; + /* + * If this pe is involved in a write to the origin and + * it is the last sibling to complete then release + * the bios for the original write to the origin. + */ + + if (pe->primary_pe && + atomic_dec_and_test(&pe->primary_pe->sibling_count)) + return bio_list_get(&pe->primary_pe->origin_bios); + + return NULL; +} + +static void __invalidate_snapshot(struct dm_snapshot *s, + struct pending_exception *pe, int err) +{ + if (!s->valid) + return; - if (list_empty(&pe->siblings)) - return bio_list_get(&pe->origin_bios); + if (err == -EIO) + DMERR("Invalidating snapshot: Error reading/writing."); + else if (err == -ENOMEM) + DMERR("Invalidating snapshot: Unable to allocate exception."); - sibling = list_entry(pe->siblings.next, - struct pending_exception, siblings); + if (pe) + remove_exception(&pe->e); - list_del(&pe->siblings); + if (s->store.drop_snapshot) + s->store.drop_snapshot(&s->store); - /* This is fine as long as kcopyd is single-threaded. If kcopyd - * becomes multi-threaded, we'll need some locking here. - */ - bio_list_merge(&sibling->origin_bios, &pe->origin_bios); + s->valid = 0; - return NULL; + dm_table_event(s->table); } static void pending_complete(struct pending_exception *pe, int success) { struct exception *e; + struct pending_exception *primary_pe; struct dm_snapshot *s = pe->snap; struct bio *flush = NULL; - if (success) { - e = alloc_exception(); - if (!e) { - DMWARN("Unable to allocate exception."); - down_write(&s->lock); - s->store.drop_snapshot(&s->store); - s->valid = 0; - flush = __flush_bios(pe); - up_write(&s->lock); - - error_bios(bio_list_get(&pe->snapshot_bios)); - goto out; - } - *e = pe->e; - - /* - * Add a proper exception, and remove the - * in-flight exception from the list. - */ + if (!success) { + /* Read/write error - snapshot is unusable */ down_write(&s->lock); - insert_exception(&s->complete, e); - remove_exception(&pe->e); + __invalidate_snapshot(s, pe, -EIO); flush = __flush_bios(pe); - - /* Submit any pending write bios */ up_write(&s->lock); - flush_bios(bio_list_get(&pe->snapshot_bios)); - } else { - /* Read/write error - snapshot is unusable */ + error_snapshot_bios(pe); + goto out; + } + + e = alloc_exception(); + if (!e) { down_write(&s->lock); - if (s->valid) - DMERR("Error reading/writing snapshot"); - s->store.drop_snapshot(&s->store); - s->valid = 0; - remove_exception(&pe->e); + __invalidate_snapshot(s, pe, -ENOMEM); flush = __flush_bios(pe); up_write(&s->lock); - error_bios(bio_list_get(&pe->snapshot_bios)); + error_snapshot_bios(pe); + goto out; + } + *e = pe->e; - dm_table_event(s->table); + /* + * Add a proper exception, and remove the + * in-flight exception from the list. + */ + down_write(&s->lock); + if (!s->valid) { + flush = __flush_bios(pe); + up_write(&s->lock); + + free_exception(e); + + error_snapshot_bios(pe); + goto out; } + insert_exception(&s->complete, e); + remove_exception(&pe->e); + flush = __flush_bios(pe); + + up_write(&s->lock); + + /* Submit any pending write bios */ + flush_bios(bio_list_get(&pe->snapshot_bios)); + out: - free_pending_exception(pe); + primary_pe = pe->primary_pe; + + /* + * Free the pe if it's not linked to an origin write or if + * it's not itself a primary pe. + */ + if (!primary_pe || primary_pe != pe) + free_pending_exception(pe); + + /* + * Free the primary pe if nothing references it. + */ + if (primary_pe && !atomic_read(&primary_pe->sibling_count)) + free_pending_exception(primary_pe); if (flush) flush_bios(flush); @@ -734,38 +794,45 @@ __find_pending_exception(struct dm_snapshot *s, struct bio *bio) if (e) { /* cast the exception to a pending exception */ pe = container_of(e, struct pending_exception, e); + goto out; + } - } else { - /* - * Create a new pending exception, we don't want - * to hold the lock while we do this. - */ - up_write(&s->lock); - pe = alloc_pending_exception(); - down_write(&s->lock); + /* + * Create a new pending exception, we don't want + * to hold the lock while we do this. + */ + up_write(&s->lock); + pe = alloc_pending_exception(); + down_write(&s->lock); - e = lookup_exception(&s->pending, chunk); - if (e) { - free_pending_exception(pe); - pe = container_of(e, struct pending_exception, e); - } else { - pe->e.old_chunk = chunk; - bio_list_init(&pe->origin_bios); - bio_list_init(&pe->snapshot_bios); - INIT_LIST_HEAD(&pe->siblings); - pe->snap = s; - pe->started = 0; - - if (s->store.prepare_exception(&s->store, &pe->e)) { - free_pending_exception(pe); - s->valid = 0; - return NULL; - } + if (!s->valid) { + free_pending_exception(pe); + return NULL; + } - insert_exception(&s->pending, &pe->e); - } + e = lookup_exception(&s->pending, chunk); + if (e) { + free_pending_exception(pe); + pe = container_of(e, struct pending_exception, e); + goto out; + } + + pe->e.old_chunk = chunk; + bio_list_init(&pe->origin_bios); + bio_list_init(&pe->snapshot_bios); + pe->primary_pe = NULL; + atomic_set(&pe->sibling_count, 1); + pe->snap = s; + pe->started = 0; + + if (s->store.prepare_exception(&s->store, &pe->e)) { + free_pending_exception(pe); + return NULL; } + insert_exception(&s->pending, &pe->e); + + out: return pe; } @@ -782,13 +849,15 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, { struct exception *e; struct dm_snapshot *s = (struct dm_snapshot *) ti->private; + int copy_needed = 0; int r = 1; chunk_t chunk; - struct pending_exception *pe; + struct pending_exception *pe = NULL; chunk = sector_to_chunk(s, bio->bi_sector); /* Full snapshots are not usable */ + /* To get here the table must be live so s->active is always set. */ if (!s->valid) return -EIO; @@ -806,36 +875,41 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, * to copy an exception */ down_write(&s->lock); + if (!s->valid) { + r = -EIO; + goto out_unlock; + } + /* If the block is already remapped - use that, else remap it */ e = lookup_exception(&s->complete, chunk); if (e) { remap_exception(s, e, bio); - up_write(&s->lock); - - } else { - pe = __find_pending_exception(s, bio); - - if (!pe) { - if (s->store.drop_snapshot) - s->store.drop_snapshot(&s->store); - s->valid = 0; - r = -EIO; - up_write(&s->lock); - } else { - remap_exception(s, &pe->e, bio); - bio_list_add(&pe->snapshot_bios, bio); - - if (!pe->started) { - /* this is protected by snap->lock */ - pe->started = 1; - up_write(&s->lock); - start_copy(pe); - } else - up_write(&s->lock); - r = 0; - } + goto out_unlock; + } + + pe = __find_pending_exception(s, bio); + if (!pe) { + __invalidate_snapshot(s, pe, -ENOMEM); + r = -EIO; + goto out_unlock; + } + + remap_exception(s, &pe->e, bio); + bio_list_add(&pe->snapshot_bios, bio); + + if (!pe->started) { + /* this is protected by snap->lock */ + pe->started = 1; + copy_needed = 1; } + r = 0; + + out_unlock: + up_write(&s->lock); + + if (copy_needed) + start_copy(pe); } else { /* * FIXME: this read path scares me because we @@ -847,6 +921,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio, /* Do reads */ down_read(&s->lock); + if (!s->valid) { + up_read(&s->lock); + return -EIO; + } + /* See if it it has been remapped */ e = lookup_exception(&s->complete, chunk); if (e) @@ -884,9 +963,9 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, snap->store.fraction_full(&snap->store, &numerator, &denominator); - snprintf(result, maxlen, - SECTOR_FORMAT "/" SECTOR_FORMAT, - numerator, denominator); + snprintf(result, maxlen, "%llu/%llu", + (unsigned long long)numerator, + (unsigned long long)denominator); } else snprintf(result, maxlen, "Unknown"); @@ -899,9 +978,10 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, * to make private copies if the output is to * make sense. */ - snprintf(result, maxlen, "%s %s %c " SECTOR_FORMAT, + snprintf(result, maxlen, "%s %s %c %llu", snap->origin->name, snap->cow->name, - snap->type, snap->chunk_size); + snap->type, + (unsigned long long)snap->chunk_size); break; } @@ -911,40 +991,27 @@ static int snapshot_status(struct dm_target *ti, status_type_t type, /*----------------------------------------------------------------- * Origin methods *---------------------------------------------------------------*/ -static void list_merge(struct list_head *l1, struct list_head *l2) -{ - struct list_head *l1_n, *l2_p; - - l1_n = l1->next; - l2_p = l2->prev; - - l1->next = l2; - l2->prev = l1; - - l2_p->next = l1_n; - l1_n->prev = l2_p; -} - static int __origin_write(struct list_head *snapshots, struct bio *bio) { - int r = 1, first = 1; + int r = 1, first = 0; struct dm_snapshot *snap; struct exception *e; - struct pending_exception *pe, *last = NULL; + struct pending_exception *pe, *next_pe, *primary_pe = NULL; chunk_t chunk; + LIST_HEAD(pe_queue); /* Do all the snapshots on this origin */ list_for_each_entry (snap, snapshots, list) { + down_write(&snap->lock); + /* Only deal with valid and active snapshots */ if (!snap->valid || !snap->active) - continue; + goto next_snapshot; /* Nothing to do if writing beyond end of snapshot */ if (bio->bi_sector >= dm_table_get_size(snap->table)) - continue; - - down_write(&snap->lock); + goto next_snapshot; /* * Remember, different snapshots can have @@ -956,49 +1023,75 @@ static int __origin_write(struct list_head *snapshots, struct bio *bio) * Check exception table to see if block * is already remapped in this snapshot * and trigger an exception if not. + * + * sibling_count is initialised to 1 so pending_complete() + * won't destroy the primary_pe while we're inside this loop. */ e = lookup_exception(&snap->complete, chunk); - if (!e) { - pe = __find_pending_exception(snap, bio); - if (!pe) { - snap->store.drop_snapshot(&snap->store); - snap->valid = 0; - - } else { - if (last) - list_merge(&pe->siblings, - &last->siblings); - - last = pe; - r = 0; + if (e) + goto next_snapshot; + + pe = __find_pending_exception(snap, bio); + if (!pe) { + __invalidate_snapshot(snap, pe, ENOMEM); + goto next_snapshot; + } + + if (!primary_pe) { + /* + * Either every pe here has same + * primary_pe or none has one yet. + */ + if (pe->primary_pe) + primary_pe = pe->primary_pe; + else { + primary_pe = pe; + first = 1; } + + bio_list_add(&primary_pe->origin_bios, bio); + + r = 0; + } + + if (!pe->primary_pe) { + atomic_inc(&primary_pe->sibling_count); + pe->primary_pe = primary_pe; + } + + if (!pe->started) { + pe->started = 1; + list_add_tail(&pe->list, &pe_queue); } + next_snapshot: up_write(&snap->lock); } + if (!primary_pe) + goto out; + /* - * Now that we have a complete pe list we can start the copying. + * If this is the first time we're processing this chunk and + * sibling_count is now 1 it means all the pending exceptions + * got completed while we were in the loop above, so it falls to + * us here to remove the primary_pe and submit any origin_bios. */ - if (last) { - pe = last; - do { - down_write(&pe->snap->lock); - if (first) - bio_list_add(&pe->origin_bios, bio); - if (!pe->started) { - pe->started = 1; - up_write(&pe->snap->lock); - start_copy(pe); - } else - up_write(&pe->snap->lock); - first = 0; - pe = list_entry(pe->siblings.next, - struct pending_exception, siblings); - - } while (pe != last); + + if (first && atomic_dec_and_test(&primary_pe->sibling_count)) { + flush_bios(bio_list_get(&primary_pe->origin_bios)); + free_pending_exception(primary_pe); + /* If we got here, pe_queue is necessarily empty. */ + goto out; } + /* + * Now that we have a complete pe list we can start the copying. + */ + list_for_each_entry_safe(pe, next_pe, &pe_queue, list) + start_copy(pe); + + out: return r; } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 697aacafb02..08328a8f5a3 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -49,9 +49,9 @@ static inline struct stripe_c *alloc_context(unsigned int stripes) static int get_stripe(struct dm_target *ti, struct stripe_c *sc, unsigned int stripe, char **argv) { - sector_t start; + unsigned long long start; - if (sscanf(argv[1], SECTOR_FORMAT, &start) != 1) + if (sscanf(argv[1], "%llu", &start) != 1) return -EINVAL; if (dm_get_device(ti, argv[0], start, sc->stripe_width, @@ -103,7 +103,7 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) return -EINVAL; } - if (((uint32_t)ti->len) & (chunk_size - 1)) { + if (ti->len & (chunk_size - 1)) { ti->error = "dm-stripe: Target length not divisible by " "chunk size"; return -EINVAL; @@ -201,10 +201,11 @@ static int stripe_status(struct dm_target *ti, break; case STATUSTYPE_TABLE: - DMEMIT("%d " SECTOR_FORMAT, sc->stripes, sc->chunk_mask + 1); + DMEMIT("%d %llu", sc->stripes, + (unsigned long long)sc->chunk_mask + 1); for (i = 0; i < sc->stripes; i++) - DMEMIT(" %s " SECTOR_FORMAT, sc->stripe[i].dev->name, - sc->stripe[i].physical_start); + DMEMIT(" %s %llu", sc->stripe[i].dev->name, + (unsigned long long)sc->stripe[i].physical_start); break; } return 0; diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 907b08ddb78..8f56a54cf0c 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -14,6 +14,7 @@ #include <linux/ctype.h> #include <linux/slab.h> #include <linux/interrupt.h> +#include <linux/mutex.h> #include <asm/atomic.h> #define MAX_DEPTH 16 @@ -22,6 +23,7 @@ #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) struct dm_table { + struct mapped_device *md; atomic_t holders; /* btree table */ @@ -97,6 +99,8 @@ static void combine_restrictions_low(struct io_restrictions *lhs, lhs->seg_boundary_mask = min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask); + + lhs->no_cluster |= rhs->no_cluster; } /* @@ -204,7 +208,8 @@ static int alloc_targets(struct dm_table *t, unsigned int num) return 0; } -int dm_table_create(struct dm_table **result, int mode, unsigned num_targets) +int dm_table_create(struct dm_table **result, int mode, + unsigned num_targets, struct mapped_device *md) { struct dm_table *t = kmalloc(sizeof(*t), GFP_KERNEL); @@ -227,6 +232,7 @@ int dm_table_create(struct dm_table **result, int mode, unsigned num_targets) } t->mode = mode; + t->md = md; *result = t; return 0; } @@ -345,7 +351,7 @@ static struct dm_dev *find_device(struct list_head *l, dev_t dev) /* * Open a device so we can use it as a map destination. */ -static int open_dev(struct dm_dev *d, dev_t dev) +static int open_dev(struct dm_dev *d, dev_t dev, struct mapped_device *md) { static char *_claim_ptr = "I belong to device-mapper"; struct block_device *bdev; @@ -357,7 +363,7 @@ static int open_dev(struct dm_dev *d, dev_t dev) bdev = open_by_devnum(dev, d->mode); if (IS_ERR(bdev)) return PTR_ERR(bdev); - r = bd_claim(bdev, _claim_ptr); + r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); if (r) blkdev_put(bdev); else @@ -368,12 +374,12 @@ static int open_dev(struct dm_dev *d, dev_t dev) /* * Close a device that we've been using. */ |