diff options
Diffstat (limited to 'drivers/md')
| -rw-r--r-- | drivers/md/dm-bio-prison.c | 70 | ||||
| -rw-r--r-- | drivers/md/dm-bio-prison.h | 2 | ||||
| -rw-r--r-- | drivers/md/dm-bufio.c | 2 | ||||
| -rw-r--r-- | drivers/md/dm-cache-metadata.c | 9 | ||||
| -rw-r--r-- | drivers/md/dm-cache-target.c | 13 | ||||
| -rw-r--r-- | drivers/md/dm-crypt.c | 4 | ||||
| -rw-r--r-- | drivers/md/dm-era-target.c | 3 | ||||
| -rw-r--r-- | drivers/md/dm-io.c | 22 | ||||
| -rw-r--r-- | drivers/md/dm-mpath.c | 16 | ||||
| -rw-r--r-- | drivers/md/dm-snap.c | 67 | ||||
| -rw-r--r-- | drivers/md/dm-table.c | 5 | ||||
| -rw-r--r-- | drivers/md/dm-thin-metadata.c | 9 | ||||
| -rw-r--r-- | drivers/md/dm-thin.c | 93 | ||||
| -rw-r--r-- | drivers/md/dm-zero.c | 4 | ||||
| -rw-r--r-- | drivers/md/dm.c | 101 | ||||
| -rw-r--r-- | drivers/md/md.c | 15 |
16 files changed, 294 insertions, 141 deletions
diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c index 85f0b707425..f752d12081f 100644 --- a/drivers/md/dm-bio-prison.c +++ b/drivers/md/dm-bio-prison.c @@ -14,13 +14,17 @@ /*----------------------------------------------------------------*/ -struct dm_bio_prison { +struct bucket { spinlock_t lock; + struct hlist_head cells; +}; + +struct dm_bio_prison { mempool_t *cell_pool; unsigned nr_buckets; unsigned hash_mask; - struct hlist_head *cells; + struct bucket *buckets; }; /*----------------------------------------------------------------*/ @@ -40,6 +44,12 @@ static uint32_t calc_nr_buckets(unsigned nr_cells) static struct kmem_cache *_cell_cache; +static void init_bucket(struct bucket *b) +{ + spin_lock_init(&b->lock); + INIT_HLIST_HEAD(&b->cells); +} + /* * @nr_cells should be the number of cells you want in use _concurrently_. * Don't confuse it with the number of distinct keys. @@ -49,13 +59,12 @@ struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) unsigned i; uint32_t nr_buckets = calc_nr_buckets(nr_cells); size_t len = sizeof(struct dm_bio_prison) + - (sizeof(struct hlist_head) * nr_buckets); + (sizeof(struct bucket) * nr_buckets); struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); if (!prison) return NULL; - spin_lock_init(&prison->lock); prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); if (!prison->cell_pool) { kfree(prison); @@ -64,9 +73,9 @@ struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) prison->nr_buckets = nr_buckets; prison->hash_mask = nr_buckets - 1; - prison->cells = (struct hlist_head *) (prison + 1); + prison->buckets = (struct bucket *) (prison + 1); for (i = 0; i < nr_buckets; i++) - INIT_HLIST_HEAD(prison->cells + i); + init_bucket(prison->buckets + i); return prison; } @@ -107,40 +116,44 @@ static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) (lhs->block == rhs->block); } -static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, +static struct bucket *get_bucket(struct dm_bio_prison *prison, + struct dm_cell_key *key) +{ + return prison->buckets + hash_key(prison, key); +} + +static struct dm_bio_prison_cell *__search_bucket(struct bucket *b, struct dm_cell_key *key) { struct dm_bio_prison_cell *cell; - hlist_for_each_entry(cell, bucket, list) + hlist_for_each_entry(cell, &b->cells, list) if (keys_equal(&cell->key, key)) return cell; return NULL; } -static void __setup_new_cell(struct dm_bio_prison *prison, +static void __setup_new_cell(struct bucket *b, struct dm_cell_key *key, struct bio *holder, - uint32_t hash, struct dm_bio_prison_cell *cell) { memcpy(&cell->key, key, sizeof(cell->key)); cell->holder = holder; bio_list_init(&cell->bios); - hlist_add_head(&cell->list, prison->cells + hash); + hlist_add_head(&cell->list, &b->cells); } -static int __bio_detain(struct dm_bio_prison *prison, +static int __bio_detain(struct bucket *b, struct dm_cell_key *key, struct bio *inmate, struct dm_bio_prison_cell *cell_prealloc, struct dm_bio_prison_cell **cell_result) { - uint32_t hash = hash_key(prison, key); struct dm_bio_prison_cell *cell; - cell = __search_bucket(prison->cells + hash, key); + cell = __search_bucket(b, key); if (cell) { if (inmate) bio_list_add(&cell->bios, inmate); @@ -148,7 +161,7 @@ static int __bio_detain(struct dm_bio_prison *prison, return 1; } - __setup_new_cell(prison, key, inmate, hash, cell_prealloc); + __setup_new_cell(b, key, inmate, cell_prealloc); *cell_result = cell_prealloc; return 0; } @@ -161,10 +174,11 @@ static int bio_detain(struct dm_bio_prison *prison, { int r; unsigned long flags; + struct bucket *b = get_bucket(prison, key); - spin_lock_irqsave(&prison->lock, flags); - r = __bio_detain(prison, key, inmate, cell_prealloc, cell_result); - spin_unlock_irqrestore(&prison->lock, flags); + spin_lock_irqsave(&b->lock, flags); + r = __bio_detain(b, key, inmate, cell_prealloc, cell_result); + spin_unlock_irqrestore(&b->lock, flags); return r; } @@ -208,10 +222,11 @@ void dm_cell_release(struct dm_bio_prison *prison, struct bio_list *bios) { unsigned long flags; + struct bucket *b = get_bucket(prison, &cell->key); - spin_lock_irqsave(&prison->lock, flags); + spin_lock_irqsave(&b->lock, flags); __cell_release(cell, bios); - spin_unlock_irqrestore(&prison->lock, flags); + spin_unlock_irqrestore(&b->lock, flags); } EXPORT_SYMBOL_GPL(dm_cell_release); @@ -230,28 +245,25 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison, struct bio_list *inmates) { unsigned long flags; + struct bucket *b = get_bucket(prison, &cell->key); - spin_lock_irqsave(&prison->lock, flags); + spin_lock_irqsave(&b->lock, flags); __cell_release_no_holder(cell, inmates); - spin_unlock_irqrestore(&prison->lock, flags); + spin_unlock_irqrestore(&b->lock, flags); } EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); void dm_cell_error(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell) + struct dm_bio_prison_cell *cell, int error) { struct bio_list bios; struct bio *bio; - unsigned long flags; bio_list_init(&bios); - - spin_lock_irqsave(&prison->lock, flags); - __cell_release(cell, &bios); - spin_unlock_irqrestore(&prison->lock, flags); + dm_cell_release(prison, cell, &bios); while ((bio = bio_list_pop(&bios))) - bio_io_error(bio); + bio_endio(bio, error); } EXPORT_SYMBOL_GPL(dm_cell_error); diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h index 3f833190ead..6805a142b75 100644 --- a/drivers/md/dm-bio-prison.h +++ b/drivers/md/dm-bio-prison.h @@ -85,7 +85,7 @@ void dm_cell_release_no_holder(struct dm_bio_prison *prison, struct dm_bio_prison_cell *cell, struct bio_list *inmates); void dm_cell_error(struct dm_bio_prison *prison, - struct dm_bio_prison_cell *cell); + struct dm_bio_prison_cell *cell, int error); /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 4e84095833d..d724459860d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1541,7 +1541,7 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign BUG_ON(block_size < 1 << SECTOR_SHIFT || (block_size & (block_size - 1))); - c = kmalloc(sizeof(*c), GFP_KERNEL); + c = kzalloc(sizeof(*c), GFP_KERNEL); if (!c) { r = -ENOMEM; goto bad_client; diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c index 4ead4ba6065..d2899e7eb3a 100644 --- a/drivers/md/dm-cache-metadata.c +++ b/drivers/md/dm-cache-metadata.c @@ -425,6 +425,15 @@ static int __open_metadata(struct dm_cache_metadata *cmd) disk_super = dm_block_data(sblock); + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk_super->data_block_size) != cmd->data_block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk_super->data_block_size), + (unsigned long long)cmd->data_block_size); + r = -EINVAL; + goto bad; + } + r = __check_incompat_features(disk_super, cmd); if (r < 0) goto bad; diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 5f054c44b48..2c63326638b 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -231,7 +231,7 @@ struct cache { /* * cache_size entries, dirty if set */ - dm_cblock_t nr_dirty; + atomic_t nr_dirty; unsigned long *dirty_bitset; /* @@ -492,7 +492,7 @@ static bool is_dirty(struct cache *cache, dm_cblock_t b) static void set_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cblock) { if (!test_and_set_bit(from_cblock(cblock), cache->dirty_bitset)) { - cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) + 1); + atomic_inc(&cache->nr_dirty); policy_set_dirty(cache->policy, oblock); } } @@ -501,8 +501,7 @@ static void clear_dirty(struct cache *cache, dm_oblock_t oblock, dm_cblock_t cbl { if (test_and_clear_bit(from_cblock(cblock), cache->dirty_bitset)) { policy_clear_dirty(cache->policy, oblock); - cache->nr_dirty = to_cblock(from_cblock(cache->nr_dirty) - 1); - if (!from_cblock(cache->nr_dirty)) + if (atomic_dec_return(&cache->nr_dirty) == 0) dm_table_event(cache->ti->table); } } @@ -2269,7 +2268,7 @@ static int cache_create(struct cache_args *ca, struct cache **result) atomic_set(&cache->quiescing_ack, 0); r = -ENOMEM; - cache->nr_dirty = 0; + atomic_set(&cache->nr_dirty, 0); cache->dirty_bitset = alloc_bitset(from_cblock(cache->cache_size)); if (!cache->dirty_bitset) { *error = "could not allocate dirty bitset"; @@ -2808,7 +2807,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, residency = policy_residency(cache->policy); - DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %llu ", + DMEMIT("%u %llu/%llu %u %llu/%llu %u %u %u %u %u %u %lu ", (unsigned)(DM_CACHE_METADATA_BLOCK_SIZE >> SECTOR_SHIFT), (unsigned long long)(nr_blocks_metadata - nr_free_blocks_metadata), (unsigned long long)nr_blocks_metadata, @@ -2821,7 +2820,7 @@ static void cache_status(struct dm_target *ti, status_type_t type, (unsigned) atomic_read(&cache->stats.write_miss), (unsigned) atomic_read(&cache->stats.demotion), (unsigned) atomic_read(&cache->stats.promotion), - (unsigned long long) from_cblock(cache->nr_dirty)); + (unsigned long) atomic_read(&cache->nr_dirty)); if (writethrough_mode(&cache->features)) DMEMIT("1 writethrough "); diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 53b213226c0..4cba2d808af 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003 Christophe Saout <christophe@saout.de> + * Copyright (C) 2003 Jana Saout <jana@saout.de> * Copyright (C) 2004 Clemens Fruhwirth <clemens@endorphin.org> * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved. * Copyright (C) 2013 Milan Broz <gmazyland@gmail.com> @@ -1996,6 +1996,6 @@ static void __exit dm_crypt_exit(void) module_init(dm_crypt_init); module_exit(dm_crypt_exit); -MODULE_AUTHOR("Christophe Saout <christophe@saout.de>"); +MODULE_AUTHOR("Jana Saout <jana@saout.de>"); MODULE_DESCRIPTION(DM_NAME " target for transparent encryption / decryption"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c index 414dad4cb49..ad913cd4ade 100644 --- a/drivers/md/dm-era-target.c +++ b/drivers/md/dm-era-target.c @@ -1391,7 +1391,8 @@ static int era_is_congested(struct dm_target_callbacks *cb, int bdi_bits) static void era_destroy(struct era *era) { - metadata_close(era->md); + if (era->md) + metadata_close(era->md); if (era->wq) destroy_workqueue(era->wq); diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c index 3842ac738f9..db404a0f7e2 100644 --- a/drivers/md/dm-io.c +++ b/drivers/md/dm-io.c @@ -10,6 +10,7 @@ #include <linux/device-mapper.h> #include <linux/bio.h> +#include <linux/completion.h> #include <linux/mempool.h> #include <linux/module.h> #include <linux/sched.h> @@ -32,7 +33,7 @@ struct dm_io_client { struct io { unsigned long error_bits; atomic_t count; - struct task_struct *sleeper; + struct completion *wait; struct dm_io_client *client; io_notify_fn callback; void *context; @@ -121,8 +122,8 @@ static void dec_count(struct io *io, unsigned int region, int error) invalidate_kernel_vmap_range(io->vma_invalidate_address, io->vma_invalidate_size); - if (io->sleeper) - wake_up_process(io->sleeper); + if (io->wait) + complete(io->wait); else { unsigned long r = io->error_bits; @@ -387,6 +388,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, */ volatile char io_[sizeof(struct io) + __alignof__(struct io) - 1]; struct io *io = (struct io *)PTR_ALIGN(&io_, __alignof__(struct io)); + DECLARE_COMPLETION_ONSTACK(wait); if (num_regions > 1 && (rw & RW_MASK) != WRITE) { WARN_ON(1); @@ -395,7 +397,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->sleeper = current; + io->wait = &wait; io->client = client; io->vma_invalidate_address = dp->vma_invalidate_address; @@ -403,15 +405,7 @@ static int sync_io(struct dm_io_client *client, unsigned int num_regions, dispatch_io(rw, num_regions, where, dp, io, 1); - while (1) { - set_current_state(TASK_UNINTERRUPTIBLE); - - if (!atomic_read(&io->count)) - break; - - io_schedule(); - } - set_current_state(TASK_RUNNING); + wait_for_completion_io(&wait); if (error_bits) *error_bits = io->error_bits; @@ -434,7 +428,7 @@ static int async_io(struct dm_io_client *client, unsigned int num_regions, io = mempool_alloc(client->pool, GFP_NOIO); io->error_bits = 0; atomic_set(&io->count, 1); /* see dispatch_io() */ - io->sleeper = NULL; + io->wait = NULL; io->client = client; io->callback = fn; io->context = context; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index ebfa411d1a7..f4167b013d9 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1242,17 +1242,8 @@ static int do_end_io(struct multipath *m, struct request *clone, if (!error && !clone->errors) return 0; /* I/O complete */ - if (noretry_error(error)) { - if ((clone->cmd_flags & REQ_WRITE_SAME) && - !clone->q->limits.max_write_same_sectors) { - struct queue_limits *limits; - - /* device doesn't really support WRITE SAME, disable it */ - limits = dm_get_queue_limits(dm_table_get_md(m->ti->table)); - limits->max_write_same_sectors = 0; - } + if (noretry_error(error)) return error; - } if (mpio->pgpath) fail_path(mpio->pgpath); @@ -1620,8 +1611,9 @@ static int multipath_busy(struct dm_target *ti) spin_lock_irqsave(&m->lock, flags); - /* pg_init in progress, requeue until done */ - if (!pg_ready(m)) { + /* pg_init in progress or no paths available */ + if (m->pg_init_in_progress || + (!m->nr_valid_paths && m->queue_if_no_path)) { busy = 1; goto out; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 8e0caed0bf7..5bd2290cfb1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -2141,6 +2141,11 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, * Origin: maps a linear range of a device, with hooks for snapshotting. */ +struct dm_origin { + struct dm_dev *dev; + unsigned split_boundary; +}; + /* * Construct an origin mapping: <dev_path> * The context for an origin is merely a 'struct dm_dev *' @@ -2149,41 +2154,65 @@ static int origin_write_extent(struct dm_snapshot *merging_snap, static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) { int r; - struct dm_dev *dev; + struct dm_origin *o; if (argc != 1) { ti->error = "origin: incorrect number of arguments"; return -EINVAL; } - r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dev); + o = kmalloc(sizeof(struct dm_origin), GFP_KERNEL); + if (!o) { + ti->error = "Cannot allocate private origin structure"; + r = -ENOMEM; + goto bad_alloc; + } + + r = dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &o->dev); if (r) { ti->error = "Cannot get target device"; - return r; + goto bad_open; } - ti->private = dev; + ti->private = o; ti->num_flush_bios = 1; return 0; + +bad_open: + kfree(o); +bad_alloc: + return r; } static void origin_dtr(struct dm_target *ti) { - struct dm_dev *dev = ti->private; - dm_put_device(ti, dev); + struct dm_origin *o = ti->private; + dm_put_device(ti, o->dev); + kfree(o); } static int origin_map(struct dm_target *ti, struct bio *bio) { - struct dm_dev *dev = ti->private; - bio->bi_bdev = dev->bdev; + struct dm_origin *o = ti->private; + unsigned available_sectors; - if (bio->bi_rw & REQ_FLUSH) + bio->bi_bdev = o->dev->bdev; + + if (unlikely(bio->bi_rw & REQ_FLUSH)) return DM_MAPIO_REMAPPED; + if (bio_rw(bio) != WRITE) + return DM_MAPIO_REMAPPED; + + available_sectors = o->split_boundary - + ((unsigned)bio->bi_iter.bi_sector & (o->split_boundary - 1)); + + if (bio_sectors(bio) > available_sectors) + dm_accept_partial_bio(bio, available_sectors); + /* Only tell snapshots if this is a write */ - return (bio_rw(bio) == WRITE) ? do_origin(dev, bio) : DM_MAPIO_REMAPPED; + return do_origin(o->dev, bio); } /* @@ -2192,15 +2221,15 @@ static int origin_map(struct dm_target *ti, struct bio *bio) */ static void origin_resume(struct dm_target *ti) { - struct dm_dev *dev = ti->private; + struct dm_origin *o = ti->private; - ti->max_io_len = get_origin_minimum_chunksize(dev->bdev); + o->split_boundary = get_origin_minimum_chunksize(o->dev->bdev); } static void origin_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { - struct dm_dev *dev = ti->private; + struct dm_origin *o = ti->private; switch (type) { case STATUSTYPE_INFO: @@ -2208,7 +2237,7 @@ static void origin_status(struct dm_target *ti, status_type_t type, break; case STATUSTYPE_TABLE: - snprintf(result, maxlen, "%s", dev->name); + snprintf(result, maxlen, "%s", o->dev->name); break; } } @@ -2216,13 +2245,13 @@ static void origin_status(struct dm_target *ti, status_type_t type, static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, struct bio_vec *biovec, int max_size) { - struct dm_dev *dev = ti->private; - struct request_queue *q = bdev_get_queue(dev->bdev); + struct dm_origin *o = ti->private; + struct request_queue *q = bdev_get_queue(o->dev->bdev); if (!q->merge_bvec_fn) return max_size; - bvm->bi_bdev = dev->bdev; + bvm->bi_bdev = o->dev->bdev; return min(max_size, q->merge_bvec_fn(q, bvm, biovec)); } @@ -2230,9 +2259,9 @@ static int origin_merge(struct dm_target *ti, struct bvec_merge_data *bvm, static int origin_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { - struct dm_dev *dev = ti->private; + struct dm_origin *o = ti->private; - return fn(ti, dev, 0, ti->len, data); + return fn(ti, o->dev, 0, ti->len, data); } static struct target_type origin_target = { diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 50601ec7017..5f59f1e3e5b 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -465,8 +465,8 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode, } EXPORT_SYMBOL(dm_get_device); -int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, - sector_t start, sector_t len, void *data) +static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) { struct queue_limits *limits = data; struct block_device *bdev = dev->bdev; @@ -499,7 +499,6 @@ int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, (unsigned int) (PAGE_SIZE >> 9)); return 0; } -EXPORT_SYMBOL_GPL(dm_set_device_limits); /* * Decrement a device's use count and remove it if necessary. diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c index b086a945edc..e9d33ad59df 100644 --- a/drivers/md/dm-thin-metadata.c +++ b/drivers/md/dm-thin-metadata.c @@ -613,6 +613,15 @@ static int __open_metadata(struct dm_pool_metadata *pmd) disk_super = dm_block_data(sblock); + /* Verify the data block size hasn't changed */ + if (le32_to_cpu(disk_super->data_block_size) != pmd->data_block_size) { + DMERR("changing the data block size (from %u to %llu) is not supported", + le32_to_cpu(disk_super->data_block_size), + (unsigned long long)pmd->data_block_size); + r = -EINVAL; + goto bad_unlock_sblock; + } + r = __check_incompat_features(disk_super, pmd); if (r < 0) goto bad_unlock_sblock; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 242ac2ea5f2..fc9c848a60c 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -310,13 +310,18 @@ static void cell_defer_no_holder_no_free(struct thin_c *tc, wake_worker(pool); } -static void cell_error(struct pool *pool, - struct dm_bio_prison_cell *cell) +static void cell_error_with_code(struct pool *pool, + struct dm_bio_prison_cell *cell, int error_code) { - dm_cell_error(pool->prison, cell); + dm_cell_error(pool->prison, cell, error_code); dm_bio_prison_free_cell(pool->prison, cell); } +static void cell_error(struct pool *pool, struct dm_bio_prison_cell *cell) +{ + cell_error_with_code(pool, cell, -EIO); +} + /*----------------------------------------------------------------*/ /* @@ -1027,7 +1032,7 @@ static void retry_on_resume(struct bio *bio) spin_unlock_irqrestore(&tc->lock, flags); } -static bool should_error_unserviceable_bio(struct pool *pool) +static int should_error_unserviceable_bio(struct pool *pool) { enum pool_mode m = get_pool_mode(pool); @@ -1035,25 +1040,27 @@ static bool should_error_unserviceable_bio(struct pool *pool) case PM_WRITE: /* Shouldn't get here */ DMERR_LIMIT("bio unserviceable, yet pool is in PM_WRITE mode"); - return true; + return -EIO; case PM_OUT_OF_DATA_SPACE: - return pool->pf.error_if_no_space; + return pool->pf.error_if_no_space ? -ENOSPC : 0; case PM_READ_ONLY: case PM_FAIL: - return true; + return -EIO; default: /* Shouldn't get here */ DMERR_LIMIT("bio unserviceable, yet pool has an unknown mode"); - return true; + return -EIO; } } static void handle_unserviceable_bio(struct pool *pool, struct bio *bio) { - if (should_error_unserviceable_bio(pool)) - bio_io_error(bio); + int error = should_error_unserviceable_bio(pool); + + if (error) + bio_endio(bio, error); else retry_on_resume(bio); } @@ -1062,18 +1069,21 @@ static void retry_bios_on_resume(struct pool *pool, struct dm_bio_prison_cell *c { struct bio *bio; struct bio_list bios; + int error; - if (should_error_unserviceable_bio(pool)) { - cell_error(pool, cell); + error = should_error_unserviceable_bio(pool); + if (error) { + cell_error_with_code(pool, cell, error); return; } bio_list_init(&bios); cell_release(pool, cell, &bios); - if (should_error_unserviceable_bio(pool)) + error = should_error_unserviceable_bio(pool); + if (error) while ((bio = bio_list_pop(&bios))) - bio_io_error(bio); + bio_endio(bio, error); else while ((bio = bio_list_pop(&bios))) retry_on_resume(bio); @@ -1610,47 +1620,63 @@ static void do_no_space_timeout(struct work_struct *ws) /*----------------------------------------------------------------*/ -struct noflush_work { +struct pool_work { struct work_struct worker; - struct thin_c *tc; + struct completion complete; +}; + +static struct pool_work *to_pool_work(struct work_struct *ws) +{ + return container_of(ws, struct pool_work, worker); +} - atomic_t complete; - wait_queue_head_t wait; +static void pool_work_complete(struct pool_work *pw) +{ + complete(&pw->complete); +} + +static void pool_work_wait(struct pool_work *pw, struct pool *pool, + void (*fn)(struct work_struct *)) +{ + INIT_WORK_ONSTACK(&pw->worker, fn); + init_completion(&pw->complete); + queue_work(pool->wq, &pw->worker); + wait_for_completion(&pw->complete); +} + +/*----------------------------------------------------------------*/ + +struct noflush_work { + struct pool_work pw; + struct thin_c *tc; }; -static void complete_noflush_work(struct noflush_work *w) +static struct noflush_work *to_noflush(struct work_struct *ws) { - atomic_set(&w->complete, 1); - wake_up(&w->wait); + return container_of(to_pool_work(ws), struct noflush_work, pw); } static void do_noflush_start(struct work_struct *ws) { - struct noflush_work *w = container_of(ws, struct noflush_work, worker); + struct noflush_work *w = to_noflush(ws); w->tc->requeue_mode = true; requeue_io(w->tc); - complete_noflush_work(w); + pool_work_complete(&w->pw); } static void do_noflush_stop(struct work_struct *ws) { - struct noflush_work *w = container_of(ws, struct noflush_work, worker); + struct noflush_work *w = to_noflush(ws); w->tc->requeue_mode = false; - complete_noflush_work(w); + pool_work_complete(&w->pw); } static void noflush_work(struct thin_c *tc, void (*fn)(struct work_struct *)) { struct noflush_work w; - INIT_WORK_ONSTACK(&w.worker, fn); w.tc = tc; - atomic_set(&w.complete, 0); - init_waitqueue_head(&w.wait); - - queue_work(tc->pool->wq, &w.worker); - - wait_event(w.wait, atomic_read(&w.complete)); + pool_work_wait(&w.pw, tc->pool, fn); } /*----------------------------------------------------------------*/ @@ -3068,7 +3094,8 @@ static void set_discard_limits(struct pool_c *pt, struct queue_limits *limits) */ if (pt->adjusted_pf.discard_passdown) { data_limits = &bdev_get_queue(pt->data_dev->bdev)->limits; - limits->discard_granularity = data_limits->discard_granularity; + limits->discard_granularity = max(data_limits->discard_granularity, + pool->sectors_per_block << SECTOR_SHIFT); } else limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT; } diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index c99003e0d47..b9a64bbce30 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2003 Christophe Saout <christophe@saout.de> + * Copyright (C) 2003 Jana Saout <jana@saout.de> * * This file is released under the GPL. */ @@ -79,6 +79,6 @@ static void __exit dm_zero_exit(void) module_init(dm_zero_init) module_exit(dm_zero_exit) -MODULE_AUTHOR("Christophe Saout <christophe@saout.de>"); +MODULE_AUTHOR("Jana Saout <jana@saout.de>"); MODULE_DESCRIPTION(DM_NAME " dummy target returning zeros"); MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index aa9e093343d..32b958dbc49 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -54,6 +54,8 @@ static void do_deferred_remove(struct work_struct *w); static DECLARE_WORK(deferred_remove_work, do_deferred_remove); +static struct workqueue_struct *deferred_remove_workqueue; + /* * For bio-based dm. * One of these is allocated per bio. @@ -276,16 +278,24 @@ static int __init local_init(void) if (r) goto out_free_rq_tio_cache; + deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1); + if (!deferred_remove_workqueue) { + r = -ENOMEM; + goto out_uevent_exit; + } + _major = major; r = register_blkdev(_major, _name); if (r < 0) - goto out_uevent_exit; + goto out_free_workqueue; if (!_major) _major = r; return 0; +out_free_workqueue: + destroy_workqueue(deferred_remove_workqueue); out_uevent_exit: dm_uevent_exit(); out_free_rq_tio_cache: @@ -299,6 +309,7 @@ out_free_io_cache: static void local_exit(void) { flush_scheduled_work(); + destroy_workqueue(deferred_remove_workqueue); kmem_cache_destroy(_rq_tio_cache); kmem_cache_destroy(_io_cache); @@ -407,7 +418,7 @@ static void dm_blk_close(struct gendisk *disk, fmode_t mode) if (atomic_dec_and_test(&md->open_count) && (test_bit(DMF_DEFERRED_REMOVE, &md->flags))) - schedule_work(&deferred_remove_work); + queue_work(deferred_remove_workqueue, &deferred_remove_work); dm_put(md); @@ -755,6 +766,14 @@ static void dec_pending(struct dm_io *io, int error) } } +static void disable_write_same(struct mapped_device *md) +{ + struct queue_limits *limits = dm_get_queue_limits(md); + + /* device doesn't really support WRITE SAME, disable it */ + limits->max_write_same_sectors = 0; +} + static void clone_endio(struct bio *bio, int error) { int r = 0; @@ -783,6 +802,10 @@ static void clone_endio(struct bio *bio, int error) } } + if (unlikely(r == -EREMOTEIO && (bio->bi_rw & REQ_WRITE_SAME) && + !bdev_get_queue(bio->bi_bdev)->limits.max_write_same_sectors)) + disable_write_same(md); + free_tio(md, tio); dec_pending(io, error); } @@ -977,6 +1000,10 @@ static void dm_done(struct request *clone, int error, bool mapped) r = rq_end_io(tio->ti, clone, error, &tio->info); } + if (unlikely(r == -EREMOTEIO && (clone->cmd_flags & REQ_WRITE_SAME) && + !clone->q->limits.max_write_same_sectors)) + disable_write_same(tio->md); + if (r <= 0) /* The target wants to complete the I/O */ dm_end_request(clone, r); @@ -1110,6 +1137,46 @@ int dm_set_target_max_io_len(struct dm_target *ti, sector_t len) } EXPORT_SYMBOL_GPL(dm_set_target_max_io_len); +/* + * A target may call dm_accept_partial_bio only from the map routine. It is + * allowed for all bio types except REQ_FLUSH. + * + * dm_accept_partial_bio informs the dm that the target only wants to process + * additional n_sectors sectors of the bio and the rest of the data should be + * sent in a next bio. + * + * A diagram that explains the arithmetics: + * +--------------------+---------------+-------+ + * | 1 | 2 | 3 | + * +--------------------+---------------+-------+ + * + * <-------------- *tio->len_ptr ---------------> + * <------- bi_size -------> + * <-- n_sectors --> + * + * Region 1 was already iterated over with bio_advance or similar function. + * (it may be empty if the target doesn't use bio_advance) + * Region 2 is the remaining bio size that the target wants to process. + * (it may be empty if region 1 is non-empty, although there is no reason + * to make it empty) + * The target requires that region 3 is to be sent in the next bio. + * + * If the target wants to receive multiple copies of the bio (via num_*bios, etc), + * the partially processed part (the sum of regions 1+2) must be the same for all + * copies of the bio. + */ +void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) +{ + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + unsigned bi_size = bio->bi_iter.bi_size >> SECTOR_SHIFT; + BUG_ON(bio->bi_rw & REQ_FLUSH); + BUG_ON(bi_size > *tio->len_ptr); + BUG_ON(n_sectors > bi_size); + *tio->len_ptr -= bi_size - n_sectors; + bio->bi_iter.bi_size = n_sectors << SECTOR_SHIFT; +} +EXPORT_SYMBOL_GPL(dm_accept_partial_bio); + static void __map_bio(struct dm_target_io *tio) { int r; @@ -1152,10 +1219,10 @@ struct clone_info { struct bio *bio; struct dm_io *io; sector_t sector; - sector_t sector_count; + unsigned sector_count; }; -static void bio_setup_sector(struct bio *bio, sector_t sector, sector_t len) +static void bio_setup_sector(struct bio *bio, sector_t sector, unsigned len) { bio->bi_iter.bi_sector = sector; bio->bi_iter.bi_size = to_bytes(len); @@ -1200,11 +1267,13 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, static void __clone_and_map_simple_bio(struct clone_info *ci, struct dm_target *ti, - unsigned target_bio_nr, sector_t len) + unsigned target_bio_nr, unsigned *len) { struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; + tio->len_ptr = len; + /* * Discard requests require the bio's inline iovecs be initialized. * ci->bio->bi_max_vecs is BIO_INLINE_VECS anyway, for both flush @@ -1212,13 +1281,13 @@ static void __clone_and_map_simple_bio(struct clone_info *ci, */ __bio_clone_fast(clone, ci->bio); if (len) - bio_setup_sector(clone, ci->sector, len); + bio_setup_sector(clone, ci->sector, *len); __map_bio(tio); } static void __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti, - unsigned num_bios, sector_t len) + unsigned num_bios, unsigned *len) { unsigned target_bio_nr; @@ -1233,13 +1302,13 @@ static int __send_empty_flush(struct clone_info *ci) BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __send_duplicate_bios(ci, ti, ti->num_flush_bios, 0); + __send_duplicate_bios(ci, ti, ti->num_flush_bios, NULL); return 0; } static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti, - sector_t sector, unsigned len) + sector_t sector, unsigned *len) { struct bio *bio = ci->bio; struct dm_target_io *tio; @@ -1254,7 +1323,8 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { tio = alloc_tio(ci, ti, 0, target_bio_nr); - clone_bio(tio, bio, sector, len); + tio->len_ptr = len; + clone_bio(tio, bio, sector, *len); __map_bio(tio); } } @@ -1283,7 +1353,7 @@ static int __send_changing_extent_only(struct clone_info *ci, is_split_required_fn is_split_required) { struct dm_target *ti; - sector_t len; + unsigned len; unsigned num_bios; do { @@ -1302,11 +1372,11 @@ static int __send_changing_extent_only(struct clone_info *ci, return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) - len = min(ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); + len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti)); else - len = min(ci->sector_count, max_io_len(ci->sector, ti)); + len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti)); - __send_duplicate_bios(ci, ti, num_bios, len); + __send_duplicate_bios(ci, ti, num_bios, &len); ci->sector += len; } while (ci->sector_count -= len); @@ -1345,7 +1415,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); - __clone_and_map_data_bio(ci, ti, ci->sector, len); + __clone_and_map_data_bio(ci, ti, ci->sector, &len); ci->sector += len; ci->sector_count -= len; @@ -1439,7 +1509,6 @@ static int dm_merge_bvec(struct request_queue *q, * just one page. */ else if (queue_max_hw_sectors(q) <= PAGE_SIZE >> 9) - max_size = 0; out: diff --git a/drivers/md/md.c b/drivers/md/md.c index 34846856dbc..32fc19c540d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5599,7 +5599,7 @@ static int get_array_info(struct mddev * mddev, void __user * arg) if (mddev->in_sync) info.state = (1<<MD_SB_CLEAN); if (mddev->bitmap && mddev->bitmap_info.offset) - info.state = (1<<MD_SB_BITMAP_PRESENT); + info.state |= (1<<MD_SB_BITMAP_PRESENT); info.active_disks = insync; info.working_disks = working; info.failed_disks = failed; @@ -7501,6 +7501,19 @@ void md_do_sync(struct md_thread *thread) rdev->recovery_offset < j) j = rdev->recovery_offset; rcu_read_unlock(); + + /* If there is a bitmap, we need to make sure all + * writes that started before we added a spare + * complete before we start doing a recovery. + * Otherwise the write might complete and (via + * bitmap_endwrite) set a bit in the bitmap after the + * recovery has checked that bit and skipped that + * region. + */ + if (mddev->bitmap) { + mddev->pers->quiesce(mddev, 1); + mddev->pers->quiesce(mddev, 0); + } } printk(KERN_INFO "md: %s of RAID array %s\n", desc, mdname(mddev)); |
