diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 10:57:57 +0900 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-10-13 10:57:57 +0900 |
commit | 79c63eeb805d086f52e5efda9c8d321beeed0b2b (patch) | |
tree | 8efd4619d384a7296f5a7f08149dc7d468f63aa2 /drivers | |
parent | 6a5a3d6a4adde0c66f3be29bbd7c0d6ffb7e1a40 (diff) | |
parent | dba141601d1327146c84b575bd581ea8730e901c (diff) |
Merge tag 'dm-3.7-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm
Pull device-mapper changes from Alasdair G Kergon:
"Remove the power-of-2 block size constraint on discards in dm thin
provisioning and factor the bio_prison code out into a separate module
(for sharing with the forthcoming cache target).
Use struct bio's front_pad to eliminate the use of one separate
mempool by bio-based devices.
A few other tiny clean-ups."
* tag 'dm-3.7-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/agk/linux-dm:
dm: store dm_target_io in bio front_pad
dm thin: move bio_prison code to separate module
dm thin: prepare to separate bio_prison code
dm thin: support discard with non power of two block size
dm persistent data: convert to use le32_add_cpu
dm: use ACCESS_ONCE for sysfs values
dm bufio: use list_move
dm mpath: fix check for null mpio in end_io fn
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/md/Kconfig | 8 | ||||
-rw-r--r-- | drivers/md/Makefile | 1 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison.c | 415 | ||||
-rw-r--r-- | drivers/md/dm-bio-prison.h | 72 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-thin.c | 521 | ||||
-rw-r--r-- | drivers/md/dm-verity.c | 2 | ||||
-rw-r--r-- | drivers/md/dm.c | 108 | ||||
-rw-r--r-- | drivers/md/persistent-data/dm-space-map-common.c | 4 |
10 files changed, 636 insertions, 511 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index d949b781f6f..91a02eeeb31 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -216,6 +216,13 @@ config DM_BUFIO as a cache, holding recently-read blocks in memory and performing delayed writes. +config DM_BIO_PRISON + tristate + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Some bio locking schemes used by other device-mapper targets + including thin provisioning. + source "drivers/md/persistent-data/Kconfig" config DM_CRYPT @@ -247,6 +254,7 @@ config DM_THIN_PROVISIONING tristate "Thin provisioning target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL select DM_PERSISTENT_DATA + select DM_BIO_PRISON ---help--- Provides thin provisioning and snapshots that share a data store. diff --git a/drivers/md/Makefile b/drivers/md/Makefile index 8b2e0dffe82..94dce8b4932 100644 --- a/drivers/md/Makefile +++ b/drivers/md/Makefile @@ -29,6 +29,7 @@ obj-$(CONFIG_MD_FAULTY) += faulty.o obj-$(CONFIG_BLK_DEV_MD) += md-mod.o obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o obj-$(CONFIG_DM_BUFIO) += dm-bufio.o +obj-$(CONFIG_DM_BIO_PRISON) += dm-bio-prison.o obj-$(CONFIG_DM_CRYPT) += dm-crypt.o obj-$(CONFIG_DM_DELAY) += dm-delay.o obj-$(CONFIG_DM_FLAKEY) += dm-flakey.o diff --git a/drivers/md/dm-bio-prison.c b/drivers/md/dm-bio-prison.c new file mode 100644 index 00000000000..e4e84156745 --- /dev/null +++ b/drivers/md/dm-bio-prison.c @@ -0,0 +1,415 @@ +/* + * Copyright (C) 2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#include "dm.h" +#include "dm-bio-prison.h" + +#include <linux/spinlock.h> +#include <linux/mempool.h> +#include <linux/module.h> +#include <linux/slab.h> + +/*----------------------------------------------------------------*/ + +struct dm_bio_prison_cell { + struct hlist_node list; + struct dm_bio_prison *prison; + struct dm_cell_key key; + struct bio *holder; + struct bio_list bios; +}; + +struct dm_bio_prison { + spinlock_t lock; + mempool_t *cell_pool; + + unsigned nr_buckets; + unsigned hash_mask; + struct hlist_head *cells; +}; + +/*----------------------------------------------------------------*/ + +static uint32_t calc_nr_buckets(unsigned nr_cells) +{ + uint32_t n = 128; + + nr_cells /= 4; + nr_cells = min(nr_cells, 8192u); + + while (n < nr_cells) + n <<= 1; + + return n; +} + +static struct kmem_cache *_cell_cache; + +/* + * @nr_cells should be the number of cells you want in use _concurrently_. + * Don't confuse it with the number of distinct keys. + */ +struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells) +{ + unsigned i; + uint32_t nr_buckets = calc_nr_buckets(nr_cells); + size_t len = sizeof(struct dm_bio_prison) + + (sizeof(struct hlist_head) * nr_buckets); + struct dm_bio_prison *prison = kmalloc(len, GFP_KERNEL); + + if (!prison) + return NULL; + + spin_lock_init(&prison->lock); + prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); + if (!prison->cell_pool) { + kfree(prison); + return NULL; + } + + prison->nr_buckets = nr_buckets; + prison->hash_mask = nr_buckets - 1; + prison->cells = (struct hlist_head *) (prison + 1); + for (i = 0; i < nr_buckets; i++) + INIT_HLIST_HEAD(prison->cells + i); + + return prison; +} +EXPORT_SYMBOL_GPL(dm_bio_prison_create); + +void dm_bio_prison_destroy(struct dm_bio_prison *prison) +{ + mempool_destroy(prison->cell_pool); + kfree(prison); +} +EXPORT_SYMBOL_GPL(dm_bio_prison_destroy); + +static uint32_t hash_key(struct dm_bio_prison *prison, struct dm_cell_key *key) +{ + const unsigned long BIG_PRIME = 4294967291UL; + uint64_t hash = key->block * BIG_PRIME; + + return (uint32_t) (hash & prison->hash_mask); +} + +static int keys_equal(struct dm_cell_key *lhs, struct dm_cell_key *rhs) +{ + return (lhs->virtual == rhs->virtual) && + (lhs->dev == rhs->dev) && + (lhs->block == rhs->block); +} + +static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, + struct dm_cell_key *key) +{ + struct dm_bio_prison_cell *cell; + struct hlist_node *tmp; + + hlist_for_each_entry(cell, tmp, bucket, list) + if (keys_equal(&cell->key, key)) + return cell; + + return NULL; +} + +/* + * This may block if a new cell needs allocating. You must ensure that + * cells will be unlocked even if the calling thread is blocked. + * + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + */ +int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, + struct bio *inmate, struct dm_bio_prison_cell **ref) +{ + int r = 1; + unsigned long flags; + uint32_t hash = hash_key(prison, key); + struct dm_bio_prison_cell *cell, *cell2; + + BUG_ON(hash > prison->nr_buckets); + + spin_lock_irqsave(&prison->lock, flags); + + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + bio_list_add(&cell->bios, inmate); + goto out; + } + + /* + * Allocate a new cell + */ + spin_unlock_irqrestore(&prison->lock, flags); + cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); + spin_lock_irqsave(&prison->lock, flags); + + /* + * We've been unlocked, so we have to double check that + * nobody else has inserted this cell in the meantime. + */ + cell = __search_bucket(prison->cells + hash, key); + if (cell) { + mempool_free(cell2, prison->cell_pool); + bio_list_add(&cell->bios, inmate); + goto out; + } + + /* + * Use new cell. + */ + cell = cell2; + + cell->prison = prison; + memcpy(&cell->key, key, sizeof(cell->key)); + cell->holder = inmate; + bio_list_init(&cell->bios); + hlist_add_head(&cell->list, prison->cells + hash); + + r = 0; + +out: + spin_unlock_irqrestore(&prison->lock, flags); + + *ref = cell; + + return r; +} +EXPORT_SYMBOL_GPL(dm_bio_detain); + +/* + * @inmates must have been initialised prior to this call + */ +static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +{ + struct dm_bio_prison *prison = cell->prison; + + hlist_del(&cell->list); + + if (inmates) { + bio_list_add(inmates, cell->holder); + bio_list_merge(inmates, &cell->bios); + } + + mempool_free(cell, prison->cell_pool); +} + +void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) +{ + unsigned long flags; + struct dm_bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release(cell, bios); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release); + +/* + * There are a couple of places where we put a bio into a cell briefly + * before taking it out again. In these situations we know that no other + * bio may be in the cell. This function releases the cell, and also does + * a sanity check. + */ +static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) +{ + BUG_ON(cell->holder != bio); + BUG_ON(!bio_list_empty(&cell->bios)); + + __cell_release(cell, NULL); +} + +void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) +{ + unsigned long flags; + struct dm_bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release_singleton(cell, bio); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release_singleton); + +/* + * Sometimes we don't want the holder, just the additional bios. + */ +static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +{ + struct dm_bio_prison *prison = cell->prison; + + hlist_del(&cell->list); + bio_list_merge(inmates, &cell->bios); + + mempool_free(cell, prison->cell_pool); +} + +void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates) +{ + unsigned long flags; + struct dm_bio_prison *prison = cell->prison; + + spin_lock_irqsave(&prison->lock, flags); + __cell_release_no_holder(cell, inmates); + spin_unlock_irqrestore(&prison->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_cell_release_no_holder); + +void dm_cell_error(struct dm_bio_prison_cell *cell) +{ + struct dm_bio_prison *prison = cell->prison; + struct bio_list bios; + struct bio *bio; + unsigned long flags; + + bio_list_init(&bios); + + spin_lock_irqsave(&prison->lock, flags); + __cell_release(cell, &bios); + spin_unlock_irqrestore(&prison->lock, flags); + + while ((bio = bio_list_pop(&bios))) + bio_io_error(bio); +} +EXPORT_SYMBOL_GPL(dm_cell_error); + +/*----------------------------------------------------------------*/ + +#define DEFERRED_SET_SIZE 64 + +struct dm_deferred_entry { + struct dm_deferred_set *ds; + unsigned count; + struct list_head work_items; +}; + +struct dm_deferred_set { + spinlock_t lock; + unsigned current_entry; + unsigned sweeper; + struct dm_deferred_entry entries[DEFERRED_SET_SIZE]; +}; + +struct dm_deferred_set *dm_deferred_set_create(void) +{ + int i; + struct dm_deferred_set *ds; + + ds = kmalloc(sizeof(*ds), GFP_KERNEL); + if (!ds) + return NULL; + + spin_lock_init(&ds->lock); + ds->current_entry = 0; + ds->sweeper = 0; + for (i = 0; i < DEFERRED_SET_SIZE; i++) { + ds->entries[i].ds = ds; + ds->entries[i].count = 0; + INIT_LIST_HEAD(&ds->entries[i].work_items); + } + + return ds; +} +EXPORT_SYMBOL_GPL(dm_deferred_set_create); + +void dm_deferred_set_destroy(struct dm_deferred_set *ds) +{ + kfree(ds); +} +EXPORT_SYMBOL_GPL(dm_deferred_set_destroy); + +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds) +{ + unsigned long flags; + struct dm_deferred_entry *entry; + + spin_lock_irqsave(&ds->lock, flags); + entry = ds->entries + ds->current_entry; + entry->count++; + spin_unlock_irqrestore(&ds->lock, flags); + + return entry; +} +EXPORT_SYMBOL_GPL(dm_deferred_entry_inc); + +static unsigned ds_next(unsigned index) +{ + return (index + 1) % DEFERRED_SET_SIZE; +} + +static void __sweep(struct dm_deferred_set *ds, struct list_head *head) +{ + while ((ds->sweeper != ds->current_entry) && + !ds->entries[ds->sweeper].count) { + list_splice_init(&ds->entries[ds->sweeper].work_items, head); + ds->sweeper = ds_next(ds->sweeper); + } + + if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) + list_splice_init(&ds->entries[ds->sweeper].work_items, head); +} + +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head) +{ + unsigned long flags; + + spin_lock_irqsave(&entry->ds->lock, flags); + BUG_ON(!entry->count); + --entry->count; + __sweep(entry->ds, head); + spin_unlock_irqrestore(&entry->ds->lock, flags); +} +EXPORT_SYMBOL_GPL(dm_deferred_entry_dec); + +/* + * Returns 1 if deferred or 0 if no pending items to delay job. + */ +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work) +{ + int r = 1; + unsigned long flags; + unsigned next_entry; + + spin_lock_irqsave(&ds->lock, flags); + if ((ds->sweeper == ds->current_entry) && + !ds->entries[ds->current_entry].count) + r = 0; + else { + list_add(work, &ds->entries[ds->current_entry].work_items); + next_entry = ds_next(ds->current_entry); + if (!ds->entries[next_entry].count) + ds->current_entry = next_entry; + } + spin_unlock_irqrestore(&ds->lock, flags); + + return r; +} +EXPORT_SYMBOL_GPL(dm_deferred_set_add_work); + +/*----------------------------------------------------------------*/ + +static int __init dm_bio_prison_init(void) +{ + _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0); + if (!_cell_cache) + return -ENOMEM; + + return 0; +} + +static void __exit dm_bio_prison_exit(void) +{ + kmem_cache_destroy(_cell_cache); + _cell_cache = NULL; +} + +/* + * module hooks + */ +module_init(dm_bio_prison_init); +module_exit(dm_bio_prison_exit); + +MODULE_DESCRIPTION(DM_NAME " bio prison"); +MODULE_AUTHOR("Joe Thornber <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/md/dm-bio-prison.h b/drivers/md/dm-bio-prison.h new file mode 100644 index 00000000000..4e0ac376700 --- /dev/null +++ b/drivers/md/dm-bio-prison.h @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This file is released under the GPL. + */ + +#ifndef DM_BIO_PRISON_H +#define DM_BIO_PRISON_H + +#include "persistent-data/dm-block-manager.h" /* FIXME: for dm_block_t */ +#include "dm-thin-metadata.h" /* FIXME: for dm_thin_id */ + +#include <linux/list.h> +#include <linux/bio.h> + +/*----------------------------------------------------------------*/ + +/* + * Sometimes we can't deal with a bio straight away. We put them in prison + * where they can't cause any mischief. Bios are put in a cell identified + * by a key, multiple bios can be in the same cell. When the cell is + * subsequently unlocked the bios become available. + */ +struct dm_bio_prison; +struct dm_bio_prison_cell; + +/* FIXME: this needs to be more abstract */ +struct dm_cell_key { + int virtual; + dm_thin_id dev; + dm_block_t block; +}; + +struct dm_bio_prison *dm_bio_prison_create(unsigned nr_cells); +void dm_bio_prison_destroy(struct dm_bio_prison *prison); + +/* + * This may block if a new cell needs allocating. You must ensure that + * cells will be unlocked even if the calling thread is blocked. + * + * Returns 1 if the cell was already held, 0 if @inmate is the new holder. + */ +int dm_bio_detain(struct dm_bio_prison *prison, struct dm_cell_key *key, + struct bio *inmate, struct dm_bio_prison_cell **ref); + +void dm_cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios); +void dm_cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio); // FIXME: bio arg not needed +void dm_cell_release_no_holder(struct dm_bio_prison_cell *cell, struct bio_list *inmates); +void dm_cell_error(struct dm_bio_prison_cell *cell); + +/*----------------------------------------------------------------*/ + +/* + * We use the deferred set to keep track of pending reads to shared blocks. + * We do this to ensure the new mapping caused by a write isn't performed + * until these prior reads have completed. Otherwise the insertion of the + * new mapping could free the old block that the read bios are mapped to. + */ + +struct dm_deferred_set; +struct dm_deferred_entry; + +struct dm_deferred_set *dm_deferred_set_create(void); +void dm_deferred_set_destroy(struct dm_deferred_set *ds); + +struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds); +void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head); +int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work); + +/*----------------------------------------------------------------*/ + +#endif diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index cc06a1e5242..651ca79881d 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -280,9 +280,7 @@ static void __cache_size_refresh(void) BUG_ON(!mutex_is_locked(&dm_bufio_clients_lock)); BUG_ON(dm_bufio_client_count < 0); - dm_bufio_cache_size_latch = dm_bufio_cache_size; - - barrier(); + dm_bufio_cache_size_latch = ACCESS_ONCE(dm_bufio_cache_size); /* * Use default if set to 0 and report the actual cache size used. @@ -441,8 +439,7 @@ static void __relink_lru(struct dm_buffer *b, int dirty) c->n_buffers[b->list_mode]--; c->n_buffers[dirty]++; b->list_mode = dirty; - list_del(&b->lru_list); - list_add(&b->lru_list, &c->lru[dirty]); + list_move(&b->lru_list, &c->lru[dirty]); } /*---------------------------------------------------------------- @@ -813,7 +810,7 @@ static void __get_memory_limit(struct dm_bufio_client *c, { unsigned long buffers; - if (dm_bufio_cache_size != dm_bufio_cache_size_latch) { + if (ACCESS_ONCE(dm_bufio_cache_size) != dm_bufio_cache_size_latch) { mutex_lock(&dm_bufio_clients_lock); __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); @@ -1591,11 +1588,9 @@ EXPORT_SYMBOL_GPL(dm_bufio_client_destroy); static void cleanup_old_buffers(void) { - unsigned long max_age = dm_bufio_max_age; + unsigned long max_age = ACCESS_ONCE(dm_bufio_max_age); struct dm_bufio_client *c; - barrier(); - if (max_age > ULONG_MAX / HZ) max_age = ULONG_MAX / HZ; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d778563a4ff..573bd04591b 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1309,13 +1309,14 @@ static int multipath_end_io(struct dm_target *ti, struct request *clone, { struct multipath *m = ti->private; struct dm_mpath_io *mpio = map_context->ptr; - struct pgpath *pgpath = mpio->pgpath; + struct pgpath *pgpath; struct path_selector *ps; int r; BUG_ON(!mpio); r = do_end_io(m, clone, error, mpio); + pgpath = mpio->pgpath; if (pgpath) { ps = &pgpath->pg->ps; if (ps->type->end_io) diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index c29410af1e2..058acf3a5ba 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -5,6 +5,7 @@ */ #include "dm-thin-metadata.h" +#include "dm-bio-prison.h" #include "dm.h" #include <linux/device-mapper.h> @@ -21,7 +22,6 @@ * Tunable constants */ #define ENDIO_HOOK_POOL_SIZE 1024 -#define DEFERRED_SET_SIZE 64 #define MAPPING_POOL_SIZE 1024 #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ @@ -58,7 +58,7 @@ * i) plug io further to this physical block. (see bio_prison code). * * ii) quiesce any read io to that shared data block. Obviously - * including all devices that share this block. (see deferred_set code) + * including all devices that share this block. (see dm_deferred_set code) * * iii) copy the data block to a newly allocate block. This step can be * missed out if the io covers the block. (schedule_copy). @@ -99,381 +99,10 @@ /*----------------------------------------------------------------*/ /* - * Sometimes we can't deal with a bio straight away. We put them in prison - * where they can't cause any mischief. Bios are put in a cell identified - * by a key, multiple bios can be in the same cell. When the cell is - * subsequently unlocked the bios become available. - */ -struct bio_prison; - -struct cell_key { - int virtual; - dm_thin_id dev; - dm_block_t block; -}; - -struct dm_bio_prison_cell { - struct hlist_node list; - struct bio_prison *prison; - struct cell_key key; - struct bio *holder; - struct bio_list bios; -}; - -struct bio_prison { - spinlock_t lock; - mempool_t *cell_pool; - - unsigned nr_buckets; - unsigned hash_mask; - struct hlist_head *cells; -}; - -static uint32_t calc_nr_buckets(unsigned nr_cells) -{ - uint32_t n = 128; - - nr_cells /= 4; - nr_cells = min(nr_cells, 8192u); - - while (n < nr_cells) - n <<= 1; - - return n; -} - -static struct kmem_cache *_cell_cache; - -/* - * @nr_cells should be the number of cells you want in use _concurrently_. - * Don't confuse it with the number of distinct keys. - */ -static struct bio_prison *prison_create(unsigned nr_cells) -{ - unsigned i; - uint32_t nr_buckets = calc_nr_buckets(nr_cells); - size_t len = sizeof(struct bio_prison) + - (sizeof(struct hlist_head) * nr_buckets); - struct bio_prison *prison = kmalloc(len, GFP_KERNEL); - - if (!prison) - return NULL; - - spin_lock_init(&prison->lock); - prison->cell_pool = mempool_create_slab_pool(nr_cells, _cell_cache); - if (!prison->cell_pool) { - kfree(prison); - return NULL; - } - - prison->nr_buckets = nr_buckets; - prison->hash_mask = nr_buckets - 1; - prison->cells = (struct hlist_head *) (prison + 1); - for (i = 0; i < nr_buckets; i++) - INIT_HLIST_HEAD(prison->cells + i); - - return prison; -} - -static void prison_destroy(struct bio_prison *prison) -{ - mempool_destroy(prison->cell_pool); - kfree(prison); -} - -static uint32_t hash_key(struct bio_prison *prison, struct cell_key *key) -{ - const unsigned long BIG_PRIME = 4294967291UL; - uint64_t hash = key->block * BIG_PRIME; - - return (uint32_t) (hash & prison->hash_mask); -} - -static int keys_equal(struct cell_key *lhs, struct cell_key *rhs) -{ - return (lhs->virtual == rhs->virtual) && - (lhs->dev == rhs->dev) && - (lhs->block == rhs->block); -} - -static struct dm_bio_prison_cell *__search_bucket(struct hlist_head *bucket, - struct cell_key *key) -{ - struct dm_bio_prison_cell *cell; - struct hlist_node *tmp; - - hlist_for_each_entry(cell, tmp, bucket, list) - if (keys_equal(&cell->key, key)) - return cell; - - return NULL; -} - -/* - * This may block if a new cell needs allocating. You must ensure that - * cells will be unlocked even if the calling thread is blocked. - * - * Returns 1 if the cell was already held, 0 if @inmate is the new holder. - */ -static int bio_detain(struct bio_prison *prison, struct cell_key *key, - struct bio *inmate, struct dm_bio_prison_cell **ref) -{ - int r = 1; - unsigned long flags; - uint32_t hash = hash_key(prison, key); - struct dm_bio_prison_cell *cell, *cell2; - - BUG_ON(hash > prison->nr_buckets); - - spin_lock_irqsave(&prison->lock, flags); - - cell = __search_bucket(prison->cells + hash, key); - if (cell) { - bio_list_add(&cell->bios, inmate); - goto out; - } - - /* - * Allocate a new cell - */ - spin_unlock_irqrestore(&prison->lock, flags); - cell2 = mempool_alloc(prison->cell_pool, GFP_NOIO); - spin_lock_irqsave(&prison->lock, flags); - - /* - * We've been unlocked, so we have to double check that - * nobody else has inserted this cell in the meantime. - */ - cell = __search_bucket(prison->cells + hash, key); - if (cell) { - mempool_free(cell2, prison->cell_pool); - bio_list_add(&cell->bios, inmate); - goto out; - } - - /* - * Use new cell. - */ - cell = cell2; - - cell->prison = prison; - memcpy(&cell->key, key, sizeof(cell->key)); - cell->holder = inmate; - bio_list_init(&cell->bios); - hlist_add_head(&cell->list, prison->cells + hash); - - r = 0; - -out: - spin_unlock_irqrestore(&prison->lock, flags); - - *ref = cell; - - return r; -} - -/* - * @inmates must have been initialised prior to this call - */ -static void __cell_release(struct dm_bio_prison_cell *cell, struct bio_list *inmates) -{ - struct bio_prison *prison = cell->prison; - - hlist_del(&cell->list); - - if (inmates) { - bio_list_add(inmates, cell->holder); - bio_list_merge(inmates, &cell->bios); - } - - mempool_free(cell, prison->cell_pool); -} - -static void cell_release(struct dm_bio_prison_cell *cell, struct bio_list *bios) -{ - unsigned long flags; - struct bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release(cell, bios); - spin_unlock_irqrestore(&prison->lock, flags); -} - -/* - * There are a couple of places where we put a bio into a cell briefly - * before taking it out again. In these situations we know that no other - * bio may be in the cell. This function releases the cell, and also does - * a sanity check. - */ -static void __cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - BUG_ON(cell->holder != bio); - BUG_ON(!bio_list_empty(&cell->bios)); - - __cell_release(cell, NULL); -} - -static void cell_release_singleton(struct dm_bio_prison_cell *cell, struct bio *bio) -{ - unsigned long flags; - struct bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_singleton(cell, bio); - spin_unlock_irqrestore(&prison->lock, flags); -} - -/* - * Sometimes we don't want the holder, just the additional bios. - */ -static void __cell_release_no_holder(struct dm_bio_prison_cell *cell, - struct bio_list *inmates) -{ - struct bio_prison *prison = cell->prison; - - hlist_del(&cell->list); - bio_list_merge(inmates, &cell->bios); - - mempool_free(cell, prison->cell_pool); -} - -static void cell_release_no_holder(struct dm_bio_prison_cell *cell, - struct bio_list *inmates) -{ - unsigned long flags; - struct bio_prison *prison = cell->prison; - - spin_lock_irqsave(&prison->lock, flags); - __cell_release_no_holder(cell, inmates); - spin_unlock_irqrestore(&prison->lock, flags); -} - -static void cell_error(struct dm_bio_prison_cell *cell) -{ - struct bio_prison *prison = cell->prison; - struct bio_list bios; - struct bio *bio; - unsigned long flags; - - bio_list_init(&bios); - - spin_lock_irqsave(&prison->lock, flags); - __cell_release(cell, &bios); - spin_unlock_irqrestore(&prison->lock, flags); - - while ((bio = bio_list_pop(&bios))) - bio_io_error(bio); -} - -/*----------------------------------------------------------------*/ - -/* - * We use the deferred set to keep track of pending reads to shared blocks. - * We do this to ensure the new mapping caused by a write isn't performed - * until these prior reads have completed. Otherwise the insertion of the - * new mapping could free the old block that the read bios are mapped to. - */ - -struct deferred_set; -struct deferred_entry { - struct deferred_set *ds; - unsigned count; - struct list_head work_items; -}; - -struct deferred_set { - spinlock_t lock; - unsigned current_entry; - unsigned sweeper; - struct deferred_entry entries[DEFERRED_SET_SIZE]; -}; - -static void ds_init(struct deferred_set *ds) -{ - int i; - - spin_lock_init(&ds->lock); - ds->current_entry = 0; - ds->sweeper = 0; - for (i = 0; i < DEFERRED_SET_SIZE; i++) { - ds->entries[i].ds = ds; - ds->entries[i].count = 0; - INIT_LIST_HEAD(&ds->entries[i].work_items); - } -} - -static struct deferred_entry *ds_inc(struct deferred_set *ds) -{ - unsigned long flags; - struct deferred_entry *entry; - - spin_lock_irqsave(&ds->lock, flags); - entry = ds->entries + ds->current_entry; - entry->count++; - spin_unlock_irqrestore(&ds->lock, flags); - - return entry; -} - -static unsigned ds_next(unsigned index) -{ - return (index + 1) % DEFERRED_SET_SIZE; -} - -static void __sweep(struct deferred_set *ds, struct list_head *head) -{ - while ((ds->sweeper != ds->current_entry) && - !ds->entries[ds->sweeper].count) { - list_splice_init(&ds->entries[ds->sweeper].work_items, head); - ds->sweeper = ds_next(ds->sweeper); - } - - if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count) - list_splice_init(&ds->entries[ds->sweeper].work_items, head); -} - -static void ds_dec(struct deferred_entry *entry, struct list_head *head) -{ - unsigned long flags; - - spin_lock_irqsave(&entry->ds->lock, flags); - BUG_ON(!entry->count); - --entry->count; - __sweep(entry->ds, head); - spin_unlock_irqrestore(&entry->ds->lock, flags); -} - -/* - * Returns 1 if deferred or 0 if no pending items to delay job. - */ -static int ds_add_work(struct deferred_set *ds, struct list_head *work) -{ - int r = 1; - unsigned long flags; - unsigned next_entry; - - spin_lock_irqsave(&ds->lock, flags); - if ((ds->sweeper == ds->current_entry) && - !ds->entries[ds->current_entry].count) - r = 0; - else { - list_add(work, &ds->entries[ds->current_entry].work_items); - next_entry = ds_next(ds->current_entry); - if (!ds->entries[next_entry].count) - ds->current_entry = next_entry; - } - spin_unlock_irqrestore(&ds->lock, flags); - - return r; -} - -/*----------------------------------------------------------------*/ - -/* * Key building. */ static void build_data_key(struct dm_thin_device *td, - dm_block_t b, struct cell_key *key) + dm_block_t b, struct dm_cell_key *key) { key->virtual = 0; key->dev = dm_thin_dev_id(td); @@ -481,7 +110,7 @@ static void build_data_key(struct dm_thin_device *td, } static void build_virtual_key(struct dm_thin_device *td, dm_block_t b, - struct cell_key *key) + struct dm_cell_key *key) { key->virtual = 1; key->dev = dm_thin_dev_id(td); @@ -534,7 +163,7 @@ struct pool { unsigned low_water_triggered:1; /* A dm event has been sent */ unsigned no_free_space:1; /* A -ENOSPC warning has been issued */ - struct bio_prison *prison; + struct dm_bio_prison *prison; struct dm_kcopyd_client *copier; struct workqueue_struct *wq; @@ -552,8 +181,8 @@ struct pool { struct bio_list retry_on_resume_list; - struct deferred_set shared_read_ds; - struct deferred_set all_io_ds; + struct dm_deferred_set *shared_read_ds; + struct dm_deferred_set *all_io_ds; struct dm_thin_new_mapping *next_mapping; mempool_t *mapping_pool; @@ -660,8 +289,8 @@ static struct pool *__pool_table_lookup_metadata_dev(struct block_device *md_dev struct dm_thin_endio_hook { struct thin_c *tc; - struct deferred_entry *shared_read_entry; - struct deferred_entry *all_io_entry; + struct dm_deferred_entry *shared_read_entry; + struct dm_deferred_entry *all_io_entry; struct dm_thin_new_mapping *overwrite_mapping; }; @@ -877,7 +506,7 @@ static void cell_defer(struct thin_c *tc, struct dm_bio_prison_cell *cell, unsigned long flags; spin_lock_irqsave(&pool->lock, flags); - cell_release(cell, &pool->deferred_bios); + dm_cell_release(cell, &pool->deferred_bios); spin_unlock_irqrestore(&tc->pool->lock, flags); wake_worker(pool); @@ -896,7 +525,7 @@ static void cell_defer_except(struct thin_c *tc, struct dm_bio_prison_cell *cell bio_list_init(&bios); spin_lock_irqsave(&pool-&g |